From 4af19e8a4c5c1363872ca4c37f181a19e67da4c0 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 3 Jun 2018 13:20:19 -0400 Subject: cow-dedupe: Shorter status line; better statistics --- go/src/cow-dedupe/dedupe.go | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/go/src/cow-dedupe/dedupe.go b/go/src/cow-dedupe/dedupe.go index 5717e30..3fc2254 100644 --- a/go/src/cow-dedupe/dedupe.go +++ b/go/src/cow-dedupe/dedupe.go @@ -176,19 +176,17 @@ func dedupe(srcFile string, dupFiles []string) error { return err } -// [ 0s ] size-set[n/d]->c :: sha256-set[t+n/d]->c :: (summed=n deduped=n) :: verb[m/n] +// [ 0s ] set[n/(d+e)]->c :: (summed=n deduped=n) :: verb[m/n] type fancyStatus struct { - sizeN, sizeD, sizeC int - sumT, sumN, sumD, sumC int - summed, errs int - deduped struct{ srcs, dstMaps, dstFiles int } - verb string + set struct{ n, d, p, m, size int } + summed, errs int + deduped struct{ srcs, dstMaps, dstFiles int } + verb string } func (s fancyStatus) String() string { - return fmt.Sprintf("size-set[%d/%d]->%d :: sha256-set[%d+%d/%d]->%d :: (summed=%d deduped={%dmaps:%dfiles->%d} errs=%d) :: %s", - s.sizeN, s.sizeD, s.sizeC, - s.sumT, s.sumN, s.sumD, s.sumC, + return fmt.Sprintf("set[%d/(%d+%d-%d)]->%d :: summed=%d deduped={%dmaps:%dfiles->%d} errs=%d :: %s", + s.set.n, s.set.d, s.set.p, s.set.m, s.set.size, s.summed, s.deduped.dstMaps+s.deduped.srcs, s.deduped.dstFiles+s.deduped.srcs, s.deduped.srcs, s.errs, @@ -236,9 +234,9 @@ func main() { sl = myStatusLine() var status fancyStatus - status.sizeD = len(size2filenames) + status.set.d = len(size2filenames) for _, filenames := range size2filenames { - status.sizeC = len(filenames) + status.set.size = len(filenames) // Now do strict hashing, instead of the incredibly // sloppy (but fast) size-bucketing. status.verb = "sha256sum[%v/%v]" @@ -248,9 +246,13 @@ func main() { sl.Put(status.String()) pruneSingles(checksum2filenames) // And loop over the smaller, precise buckets - status.sumD = len(checksum2filenames) + if len(checksum2filenames) == 0 { + status.set.m++ + } else { + status.set.p += len(checksum2filenames) - 1 + } for _, filenames := range checksum2filenames { - status.sumC = len(filenames) + status.set.size = len(filenames) status.verb = "prep" sl.Put(status.String()) var fiemaps []string @@ -285,7 +287,7 @@ func main() { if len(name) > 21 { name = name[:20] + "…" } - status.verb = fmt.Sprintf("dedupe %q (and %d more)", name, len(dupFiles)) + status.verb = fmt.Sprintf("dedupe %q<-[%d]file{…}", name, len(dupFiles)) sl.Put(status.String()) err := dedupe(srcFile, dupFiles) if err != nil { @@ -295,13 +297,8 @@ func main() { status.deduped.dstMaps += len(fiemaps) - 1 status.deduped.dstFiles += len(dupFiles) } - status.sumN++ + status.set.n++ } - status.sumT += len(checksum2filenames) - status.sumN = 0 - status.sumD = 0 - status.sumC = 0 - status.sizeN++ } status.verb = "done" sl.Put(status.String()) -- cgit v1.2.3