summaryrefslogtreecommitdiff
path: root/lib/dedupe-range.c
blob: b102b8f4886a4af8f9c7156dc1d24fad782a186e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#include <assert.h>    /* for assert(3p) */
#include <errno.h>     /* for errno */
#include <error.h>     /* for error(3gnu) */
#include <fcntl.h>     /* for open(2) */
#include <inttypes.h>  /* for uint64_t, PRIu64 */
#include <linux/fs.h>  /* for FIDEDUPRANGE and related */
#include <stdbool.h>   /* for bool, true, false */
#include <stdlib.h>    /* for exit(3p), EXIT_SUCCESS, EXIT_FAILURE, malloc(3p), calloc(3p), free(3p) */
#include <sys/ioctl.h> /* for ioctl(2) */
#include <unistd.h>    /* for sysconf(3p), _SC_PAGESIZE */

#include "dedupe-range.h"

#define MIN(a, b) ((a) < (b) ? (a) : (b))

void dedupe_range(uint64_t src_length, struct filepos src, struct filepos *dsts) {
	// Count how many destination ranges we have
	size_t dst_count;
	for (dst_count = 0; dsts[dst_count].filename; dst_count++);

	// Figure the maximum number of destination ranges we can fit
	// in 1 ioctl
	const size_t max_dst_count = (sysconf(_SC_PAGESIZE) - sizeof(struct file_dedupe_range))
		/ sizeof(struct file_dedupe_range_info);
	assert(max_dst_count > 0);

	// Open the source file
	int src_fd = open(src.filename, src.flags);
	if (src_fd < 0)
		error(EXIT_FAILURE, errno, "open src: %s", src.filename);

	// Open the destination files
	struct file_dedupe_range_info *range_info =
		calloc(dst_count, sizeof(struct file_dedupe_range_info));
	if (!range_info)
		error(EXIT_FAILURE, errno, "malloc");
	for (size_t i = 0; i < dst_count; i++) {
		int dst_fd = open(dsts[i].filename, dsts[i].flags);
		if (dst_fd < 0)
			error(EXIT_FAILURE, errno, "open dst[%zu]: %s", i, dsts[i].filename);
		range_info[i].dest_fd = dst_fd;
		range_info[i].dest_offset = dsts[i].offset;
	}

	// Do the work
	struct file_dedupe_range *range = malloc(sysconf(_SC_PAGESIZE));
	if (!range)
		error(EXIT_FAILURE, errno, "malloc");
	for (size_t files_deduped = 0; files_deduped < dst_count; ) {
		// initialize the range structure
		*range = (struct file_dedupe_range){0};
		range->dest_count = MIN(dst_count - files_deduped, max_dst_count);
		assert(range->dest_count > 0);
		for (size_t i = 0;i < range->dest_count; i++)
			range->info[i] = range_info[files_deduped+i];

		// call FIDEDUPERANGE repeatedly to dedupe all of src_length
		uint64_t bytes_todo = src_length;
		uint64_t bytes_done = 0;
		while (bytes_todo > 0) {
			// allegedly, these need to be aligned to the
			// fundamental block size (statfs->f_frsize,
			// fall back to ->f_bsize)
			range->src_offset = src.offset + bytes_done;
			range->src_length = bytes_todo;

			if (ioctl(src_fd, FIDEDUPERANGE, range) < 0)
				error(EXIT_FAILURE, errno, "FIDEDUPERANGE");

			// how much actually got done?
			uint64_t bytes_deduped = range->info[0].bytes_deduped;
			assert(bytes_deduped <= range->src_length);
			bytes_done += bytes_deduped;
			bytes_todo -= bytes_deduped;

			// verify that dedupe for each dst went ok
			bool erred = false;
			for (size_t i = 0; i < range->dest_count; i ++) {
				if (range->info[i].bytes_deduped != bytes_deduped) {
					error(0, errno, "dedupe: %"PRIu64" != %"PRIu64": %s",
					      bytes_deduped,
					      range->info[i].bytes_deduped,
					      dsts[files_deduped+i].filename);
					erred = true;
				}
				switch (range->info[i].status) {
				case FILE_DEDUPE_RANGE_DIFFERS:
					error(0, 0, "dedupe: range differs: %s", dsts[files_deduped+i].filename);
					erred = true;
					break;
				case FILE_DEDUPE_RANGE_SAME:
					range->info[i].dest_offset += range->info[i].bytes_deduped;
					break;
				default:
					assert(range->info[i].status < 0);
					error(0, -range->info[i].status, "dedupe: %s", dsts[files_deduped+i].filename);
					erred = true;
					break;
				}
			}
			if (erred == true)
				exit(EXIT_FAILURE);
		}
		files_deduped += range->dest_count;
	}
	free(range);

	for (size_t i = 0; i < dst_count; i++)
		close(range_info[i].dest_fd);
	free(range_info);

	close(src_fd);
}