From a4587d50708a73b65089c87e17523fac92d0249a Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 26 May 2018 00:51:34 -0400 Subject: cow-dedupe: Don't pass too many arguments to cow-dedupe-range at once --- go/src/cow-dedupe/dedupe.go | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/go/src/cow-dedupe/dedupe.go b/go/src/cow-dedupe/dedupe.go index dc37491..bc543f1 100644 --- a/go/src/cow-dedupe/dedupe.go +++ b/go/src/cow-dedupe/dedupe.go @@ -19,6 +19,7 @@ import ( import "C" var arg_max = int(C.sysconf(C._SC_ARG_MAX)) +var open_max = int(C.sysconf(C._SC_OPEN_MAX)) func errhandle(err error) { if err != nil { @@ -139,21 +140,40 @@ func pruneSingles(key2vals map[string][]string) { func dedupe(srcFile string, dupFiles []string) error { stat, err := os.Stat(srcFile) - errhandle(err) - args := []string{ + if err != nil { + return err + } + baseArgs := []string{ "-r", "--", strconv.FormatInt(stat.Size(), 10), srcFile, "0", } - for _, dupFile := range dupFiles { - args = append(args, dupFile, "0") - } - cmd := exec.Command("./cow-dedupe-range", args...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - //fmt.Println("#", checksum) - //fmt.Println(cmd.Args) - return cmd.Run() + dupFilesTodo := dupFiles + for len(dupFilesTodo) > 0 { + dupFilesDoing := dupFilesTodo + arg_len := 0 + for i := range dupFilesDoing { + arg_len += len(dupFilesDoing[i]) + 3 + if arg_len > arg_max/2 || (i+15) > open_max { + dupFilesDoing = dupFilesDoing[:i-1] + break + } + } + dupFilesTodo = dupFilesTodo[len(dupFilesDoing):] + + args := baseArgs + for _, dupFile := range dupFilesDoing { + args = append(args, dupFile, "0") + } + + cmd := exec.Command("./cow-dedupe-range", args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if _err := cmd.Run(); _err != nil { + err = _err + } + } + return err } // [ 0s ] size-set[n/d]->c :: sha256-set[t+n/d]->c :: (summed=n deduped=n) :: verb[m/n] -- cgit v1.2.3