From 0468c150fea1a3f0941b786c67bc27e797dade40 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 18 May 2018 18:27:16 -0400 Subject: fix dedupe-range --- lib/dedupe-range.c | 58 +++++++++++++++++++++++++++++++++----------------- lib/dedupe-range.h | 5 ++--- src/cow-dedupe-range.c | 26 +++++++++++----------- 3 files changed, 54 insertions(+), 35 deletions(-) diff --git a/lib/dedupe-range.c b/lib/dedupe-range.c index cdb5044..c9e9bb0 100644 --- a/lib/dedupe-range.c +++ b/lib/dedupe-range.c @@ -13,17 +13,23 @@ #define MIN(a, b) ((a) < (b) ? (a) : (b)) -void dedupe_range(struct range src, struct range *dsts) { +void dedupe_range(uint64_t src_length, struct filepos src, struct filepos *dsts) { + // Count how many destination ranges we have size_t dst_count; for (dst_count = 0; dsts[dst_count].filename; dst_count++); + // Figure the maximum number of destination ranges we can fit + // in 1 ioctl const size_t max_dst_count = (sysconf(_SC_PAGESIZE) - sizeof(struct file_dedupe_range)) / sizeof(struct file_dedupe_range_info); + assert(max_dst_count > 0); + // Open the source file int src_fd = open(src.filename, src.flags); if (src_fd < 0) error(EXIT_FAILURE, errno, "open src: %s", src.filename); + // Open the destination files struct file_dedupe_range_info *range_info = calloc(dst_count, sizeof(struct file_dedupe_range_info)); if (!range_info) @@ -36,27 +42,36 @@ void dedupe_range(struct range src, struct range *dsts) { range_info[i].dest_offset = dsts[i].offset; } + // Do the work + struct file_dedupe_range *range = malloc(sysconf(_SC_PAGESIZE)); + if (!range) + error(EXIT_FAILURE, errno, "malloc"); for (size_t files_deduped = 0; files_deduped < dst_count; ) { - uint16_t dest_count = MIN(dst_count - files_deduped, max_dst_count); - struct file_dedupe_range *range = malloc(sizeof(struct file_dedupe_range) + dest_count * sizeof(struct file_dedupe_range_info)); - if (!range) - error(EXIT_FAILURE, errno, "malloc"); - *range = (struct file_dedupe_range){ - .src_offset = src.offset, - .src_length = src.length, - .dest_count = dest_count, - .reserved1 = 0, - .reserved2 = 0, - }; - for (size_t i = 0; i < dest_count; i++) + // initialize the range structure + *range = (struct file_dedupe_range){0}; + range->dest_count = MIN(dst_count - files_deduped, max_dst_count); + assert(range->dest_count > 0); + for (size_t i = 0;i < range->dest_count; i++) range->info[i] = range_info[files_deduped+i]; - bool erred = false; - while (range->src_length > 0) { - if (ioctl(src_fd, FIDEDUPERANGE, &range) < 0) - error(EXIT_FAILURE, errno, "ioctl (FIDEDUPERANGE)"); + // call FIDEDUPERANGE repeatedly to dedupe all of src_length + uint64_t bytes_todo = src_length; + uint64_t bytes_done = 0; + while (bytes_todo > 0) { + range->src_offset = src.offset + bytes_done; + range->src_length = MIN(bytes_todo, 16*1024*1024/*16MiB*/); // XXX "silently ignore"? + + if (ioctl(src_fd, FIDEDUPERANGE, range) < 0) + error(EXIT_FAILURE, errno, "FIDEDUPERANGE"); + + // how much actually got done? uint64_t bytes_deduped = range->info[0].bytes_deduped; assert(bytes_deduped <= range->src_length); + bytes_done += bytes_deduped; + bytes_todo -= bytes_deduped; + + // verify that dedupe for each dst went ok + bool erred = false; for (size_t i = 0; i < range->dest_count; i ++) { if (range->info[i].bytes_deduped != bytes_deduped) { error(0, errno, "dedupe: %"PRIu64" != %"PRIu64": %s", @@ -79,11 +94,14 @@ void dedupe_range(struct range src, struct range *dsts) { } if (erred == true) exit(EXIT_FAILURE); - range->src_offset += bytes_deduped; - range->src_length -= bytes_deduped; } files_deduped += range->dest_count; - free(range); } + free(range); + + for (size_t i = 0; i < dst_count; i++) + close(range_info[i].dest_fd); free(range_info); + + close(src_fd); } diff --git a/lib/dedupe-range.h b/lib/dedupe-range.h index 028fe08..142eafc 100644 --- a/lib/dedupe-range.h +++ b/lib/dedupe-range.h @@ -1,10 +1,9 @@ #include -struct range { +struct filepos { char *filename; int flags; /* to pass to open(2) */ uint64_t offset; - uint64_t length; }; -void dedupe_range(struct range src, struct range *dsts); +void dedupe_range(uint64_t length, struct filepos src, struct filepos *dsts); diff --git a/src/cow-dedupe-range.c b/src/cow-dedupe-range.c index bffa8f1..b96ead1 100644 --- a/src/cow-dedupe-range.c +++ b/src/cow-dedupe-range.c @@ -34,9 +34,9 @@ bool atou64(const char *str, uint64_t *ret) { } void usage() { - printf("Usage: %2$*1$s [OPTIONS] SRC_FILENAME SRC_OFFSET SRC_LENGTH \\\n" - " %3$*1$s DST_FILENAME DST_OFFSET \\\n" - " %3$*1$s [DST_FILENAME DST_OFFSET]...\n" + printf("Usage: %2$*1$s [OPTIONS] LENGTH SRC_FILENAME SRC_OFFSET \\\n" + " %3$*1$s DST_FILENAME DST_OFFSET \\\n" + " %3$*1$s [DST_FILENAME DST_OFFSET]...\n" "Submit a file deduplication request to the kernel.\n" "If the file ranges are not duplicates, the kernel will ignore this request.\n" "\n" @@ -76,19 +76,21 @@ int main(int argc, char *argv[]) { } if (argc - optind < 5) errusage("too few arguments"); - if ((argc - optind - 3) % 2 != 0) + if ((argc - optind - 1) % 2 != 0) errusage("wrong number of arguments"); - struct range src; - src.filename = argv[optind]; + uint64_t src_length; + if (!atou64(argv[optind], &src_length)) + error(2, errno, "invalid length '%s'", argv[optind]); + + struct filepos src; + src.filename = argv[optind+1]; src.flags = O_RDONLY; - if (!atou64(argv[optind+1], &src.offset)) - error(2, errno, "invalid offset '%s'", argv[optind+1]); - if (!atou64(argv[optind+2], &src.length)) - error(2, errno, "invalid length '%s'", argv[optind+2]); + if (!atou64(argv[optind+2], &src.offset)) + error(2, errno, "invalid offset '%s'", argv[optind+2]); const size_t dst_count = (argc - optind - 3) / 2; - struct range *dsts = calloc(dst_count + 1, sizeof(struct range)); + struct filepos *dsts = calloc(dst_count + 1, sizeof(struct filepos)); for (size_t i = 0; i < dst_count; i++) { dsts[i].filename = argv[optind+3+(i*2)]; dsts[i].flags = ro ? O_RDONLY : O_RDWR; @@ -96,7 +98,7 @@ int main(int argc, char *argv[]) { error(2, errno, "invalid offset '%s'", argv[optind+3+(i*2)+1]); } - dedupe_range(src, dsts); + dedupe_range(src_length, src, dsts); free(dsts); return EXIT_SUCCESS; } -- cgit v1.2.3