summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@beefcake.parabola.nu>2018-05-19 01:38:13 -0400
committerLuke Shumaker <lukeshu@beefcake.parabola.nu>2018-05-19 01:38:13 -0400
commit86201baa1bf72f9717fc4643cd0a00554a23bd72 (patch)
tree9f53f1024d37a90be1b6ff2f2f37d69a68185ece
parent1fd8e694c26a05069da7c660f1c4b95395bfea59 (diff)
eh
-rw-r--r--go/src/cow-dedupe/dedupe.go119
-rw-r--r--go/src/lib/pipeline/cmd.go16
-rw-r--r--go/src/lib/pipeline/pipeline.go80
-rw-r--r--go/src/lib/statusline/statuslinue.go24
4 files changed, 159 insertions, 80 deletions
diff --git a/go/src/cow-dedupe/dedupe.go b/go/src/cow-dedupe/dedupe.go
index f28d2f4..9fee970 100644
--- a/go/src/cow-dedupe/dedupe.go
+++ b/go/src/cow-dedupe/dedupe.go
@@ -10,6 +10,8 @@ import (
"runtime"
"strconv"
"strings"
+
+ "lib/statusline"
)
//#include <unistd.h>
@@ -23,100 +25,62 @@ func errhandle(err error) {
}
}
-func findLikelyDups(paths []string) map[string][]string {
- ret := map[string][]string{}
+func getFiemaps(paths []string) map[string][]string {
var err error
for i := range paths {
paths[i], err = filepath.Abs(paths[i])
errhandle(err)
}
- cmd := exec.Command("find", append(paths, "-type", "f", "-printf", "%s %p\\0")...)
+
+ ret := map[string][]string{}
+
+ sl := statusline.NewStatusLine(os.Stderr)
+ cnt := 0
+ sl.Put("Mapping extents...")
+
+ cmd := exec.Command("find", append(paths, "-type", "f", "-exec", "./cow-extent-map", "-m", "--", "{}", "+")...)
stdout, err := cmd.StdoutPipe()
errhandle(err)
cmd.Stderr = os.Stderr
+
errhandle(cmd.Start())
rd := bufio.NewReader(stdout)
for {
- line, err := rd.ReadString('\x00')
- if line == "" && err == io.EOF {
+ filename, err := rd.ReadString('\x00')
+ if filename == "" && err == io.EOF {
break
}
- errhandle(err)
- parts := strings.SplitN(strings.TrimSuffix(line, "\x00"), " ", 2)
- if len(parts) != 2 {
- panic("wut")
- }
- size := parts[0]
- filename := parts[1]
- basename := filepath.Base(filename)
- key := size + " " + basename
- ret[key] = append(ret[key], filename)
- }
- errhandle(cmd.Wait())
- for key := range ret {
- if len(ret[key]) < 2 {
- delete(ret, key)
+ filename = strings.TrimSuffix(filename, "\x00")
+ if !strings.HasPrefix(filename, "/") {
+ panic("ugly filename")
}
- }
- return ret
-}
-
-func getFiemaps(paths []string) map[string][]string {
- ret := map[string][]string{}
- fmt.Fprintf(os.Stderr, "Getting fiemaps for %d files...\n", len(paths))
-
- cnt := 0
- for len(paths) > 0 {
- _paths := paths
- arg_len := 0
- for i := range _paths {
- arg_len += len(_paths[i]) + 1
- if arg_len > arg_max/2 {
- _paths = _paths[:i-1]
- break
- }
+ errhandle(err)
+ fiemap, err := rd.ReadString('\x00')
+ fiemap = strings.TrimSuffix(fiemap, "\x00")
+ if !(strings.HasPrefix(fiemap, "logical=") || fiemap == "") {
+ panic("ugly fiemap")
}
- paths = paths[len(_paths):]
-
- cmd := exec.Command("./cow-extent-map", append([]string{"-m", "--"}, _paths...)...)
- stdout, err := cmd.StdoutPipe()
errhandle(err)
- cmd.Stderr = os.Stderr
- errhandle(cmd.Start())
- rd := bufio.NewReader(stdout)
- for {
- filename, err := rd.ReadString('\x00')
- if filename == "" && err == io.EOF {
- break
- }
- filename = strings.TrimSuffix(filename, "\x00")
- if !strings.HasPrefix(filename, "/") {
- panic("ugly filename")
- }
- errhandle(err)
- fiemap, err := rd.ReadString('\x00')
- fiemap = strings.TrimSuffix(fiemap, "\x00")
- if !(strings.HasPrefix(fiemap, "logical=") || fiemap == "") {
- panic("ugly fiemap")
- }
- errhandle(err)
- ret[fiemap] = append(ret[fiemap], filename)
- cnt++
- fmt.Fprintf(os.Stderr, "\r%d ", cnt)
- }
- errhandle(cmd.Wait())
+ ret[fiemap] = append(ret[fiemap], filename)
+ cnt++
+ sl.Put(fmt.Sprintf("Mapping extents... %d", cnt))
}
+ errhandle(cmd.Wait())
- fmt.Fprintf(os.Stderr, "\r...done \n")
+ sl.Put(fmt.Sprintf("Mapping extents... done; mapped %d files", cnt))
+ sl.End()
+ io.WriteString(os.Stderr, "\n")
return ret
}
func getChecksums(paths []string) map[string][]string {
ret := map[string][]string{}
- fmt.Fprintf(os.Stderr, "Generating checksums for %d files...\n", len(paths))
+ sl := statusline.NewStatusLine(os.Stderr)
cnt := 0
+ sl.Put(fmt.Sprintf("Generating checksums for files... %d/%d\n", cnt, len(paths)))
+
for len(paths) > 0 {
_paths := paths
arg_len := 0
@@ -150,28 +114,23 @@ func getChecksums(paths []string) map[string][]string {
ret[checksum] = append(ret[checksum], filename)
cnt++
- fmt.Fprintf(os.Stderr, "\r%d ", cnt)
+ sl.Put(fmt.Sprintf("Generating checksums for files... %d/%d\n", cnt, len(paths)))
}
errhandle(cmd.Wait())
}
- fmt.Fprintf(os.Stderr, "\r...done \n")
+ sl.Put(fmt.Sprintf("Generating checksums for files... done; summed %d files\n", cnt))
+ sl.End()
+ io.WriteString(os.Stderr, "\n")
return ret
}
func main() {
- // we have no parallelism, don't let syscalls fan-out weird on
- // many-core systems
+ // we have low parallelism, don't let syscalls fan-out weird
+ // on many-core systems
runtime.GOMAXPROCS(1)
- likely := findLikelyDups(os.Args[1:])
-
- var flatLikely []string
- for _, filenames := range likely {
- flatLikely = append(flatLikely, filenames...)
- }
-
- fiemap2filenames := getFiemaps(flatLikely)
+ fiemap2filenames := getFiemaps(os.Args[1:])
filename2fiemap := map[string]string{}
for fiemap, filenames := range fiemap2filenames {
diff --git a/go/src/lib/pipeline/cmd.go b/go/src/lib/pipeline/cmd.go
new file mode 100644
index 0000000..563becf
--- /dev/null
+++ b/go/src/lib/pipeline/cmd.go
@@ -0,0 +1,16 @@
+package pipeline
+
+import (
+ "io"
+)
+
+type Cmd interface {
+ CombinedOutput() ([]byte, error)
+ Output() ([]byte, error)
+ Run() error
+ Start() error
+ StderrPipe() (io.ReadCloser, error)
+ StdinPipe() (io.WriteCloser, error)
+ StdoutPipe() (io.ReadCloser, error)
+ Wait() error
+}
diff --git a/go/src/lib/pipeline/pipeline.go b/go/src/lib/pipeline/pipeline.go
new file mode 100644
index 0000000..a74e626
--- /dev/null
+++ b/go/src/lib/pipeline/pipeline.go
@@ -0,0 +1,80 @@
+package pipeline
+
+import (
+ "os/exec"
+ "io"
+)
+
+type Pipe struct {
+ Cmds []exec.Cmd
+}
+
+func (pl *Pipe) preStart() error {
+ for _, cmd := range pl.Cmds[:len(pl.Cmds)-2] {
+ if err := cmd.Start(); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (pl *Pipe) Start() error {
+ for _, cmd := range pl.Cmds {
+ if err := cmd.Start(); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (pl *Pipe) Wait() error {
+ for _, cmd := range pl.Cmds {
+ if err := cmd.Wait(); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (pl *Pipe) Run() error {
+ if err := pl.Start(); err != nil {
+ return err
+ }
+ return pl.Wait()
+}
+
+func (pl *Pipe) CombinedOutput() ([]byte, error) {
+ if err := pl.preStart(); err != nil {
+ return nil, err
+ }
+ return pl.Cmds[len(pl.Cmds)-1].CombinedOutput()
+}
+
+func (pl *Pipe) Output() ([]byte, error) {
+ if err := pl.preStart(); err != nil {
+ return nil, err
+ }
+ return pl.Cmds[len(pl.Cmds)-1].Output()
+}
+
+func (pl *Pipe) StdinPipe() (io.WriteCloser, error) {
+ return pl.Cmds[0].StdinPipe()
+}
+
+func (pl *Pipe) StdoutPipe() (io.ReadCloser, error) {
+ return pl.Cmds[len(pl.Cmds)-1].StdoutPipe()
+}
+
+var _ = &Pipe{}
+
+func Pipeline(cmds ...exec.Cmd) (*Pipe, error) {
+ pl := &Pipe{cmds}
+ for i := range pl.Cmds[:len(pl.Cmds)-2] {
+ out, err := pl.Cmds[i].StdoutPipe()
+ if err != nil {
+ return nil, err
+ }
+ pl.Cmds[i+1].Stdin = out
+ }
+ return pl, nil
+}
diff --git a/go/src/lib/statusline/statuslinue.go b/go/src/lib/statusline/statuslinue.go
new file mode 100644
index 0000000..90ec1af
--- /dev/null
+++ b/go/src/lib/statusline/statuslinue.go
@@ -0,0 +1,24 @@
+package statusline
+
+import (
+ "fmt"
+ "io"
+)
+
+type StatusLine struct {
+ out io.Writer
+ prevLen int
+}
+
+func NewStatusLine(out io.Writer) *StatusLine {
+ return &StatusLine{out: out}
+}
+
+func (sl *StatusLine) Put(line string) {
+ fmt.Fprintf(sl.out, "\r%-[1]*[2]s", sl.prevLen, line)
+ sl.prevLen = len(line)
+}
+
+func (sl *StatusLine) End() {
+ fmt.Fprintf(sl.out, "\r%-[1]*[2]s\r", sl.prevLen, "")
+}