summaryrefslogtreecommitdiff
path: root/go/src/cow-dedupe/dedupe.go
diff options
context:
space:
mode:
Diffstat (limited to 'go/src/cow-dedupe/dedupe.go')
-rw-r--r--go/src/cow-dedupe/dedupe.go42
1 files changed, 31 insertions, 11 deletions
diff --git a/go/src/cow-dedupe/dedupe.go b/go/src/cow-dedupe/dedupe.go
index dc37491..bc543f1 100644
--- a/go/src/cow-dedupe/dedupe.go
+++ b/go/src/cow-dedupe/dedupe.go
@@ -19,6 +19,7 @@ import (
import "C"
var arg_max = int(C.sysconf(C._SC_ARG_MAX))
+var open_max = int(C.sysconf(C._SC_OPEN_MAX))
func errhandle(err error) {
if err != nil {
@@ -139,21 +140,40 @@ func pruneSingles(key2vals map[string][]string) {
func dedupe(srcFile string, dupFiles []string) error {
stat, err := os.Stat(srcFile)
- errhandle(err)
- args := []string{
+ if err != nil {
+ return err
+ }
+ baseArgs := []string{
"-r", "--", strconv.FormatInt(stat.Size(), 10),
srcFile, "0",
}
- for _, dupFile := range dupFiles {
- args = append(args, dupFile, "0")
- }
- cmd := exec.Command("./cow-dedupe-range", args...)
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- //fmt.Println("#", checksum)
- //fmt.Println(cmd.Args)
- return cmd.Run()
+ dupFilesTodo := dupFiles
+ for len(dupFilesTodo) > 0 {
+ dupFilesDoing := dupFilesTodo
+ arg_len := 0
+ for i := range dupFilesDoing {
+ arg_len += len(dupFilesDoing[i]) + 3
+ if arg_len > arg_max/2 || (i+15) > open_max {
+ dupFilesDoing = dupFilesDoing[:i-1]
+ break
+ }
+ }
+ dupFilesTodo = dupFilesTodo[len(dupFilesDoing):]
+
+ args := baseArgs
+ for _, dupFile := range dupFilesDoing {
+ args = append(args, dupFile, "0")
+ }
+
+ cmd := exec.Command("./cow-dedupe-range", args...)
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ if _err := cmd.Run(); _err != nil {
+ err = _err
+ }
+ }
+ return err
}
// [ 0s ] size-set[n/d]->c :: sha256-set[t+n/d]->c :: (summed=n deduped=n) :: verb[m/n]