summaryrefslogtreecommitdiff
path: root/lib/btrfsprogs/btrfsinspect/rebuildmappings/fuzzymatchsums.go
blob: 4724c12162de5f18212e6f60da54ec66184b6779 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
// Copyright (C) 2022-2023  Luke Shumaker <lukeshu@lukeshu.com>
//
// SPDX-License-Identifier: GPL-2.0-or-later

package rebuildmappings

import (
	"context"
	"sort"

	"github.com/datawire/dlib/dlog"
	"golang.org/x/text/number"

	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum"
	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
	"git.lukeshu.com/btrfs-progs-ng/lib/containers"
	"git.lukeshu.com/btrfs-progs-ng/lib/maps"
	"git.lukeshu.com/btrfs-progs-ng/lib/textui"
)

var minFuzzyPct = textui.Tunable(0.5)

type fuzzyRecord struct {
	PAddr btrfsvol.QualifiedPhysicalAddr
	N     int
}

func (a fuzzyRecord) Compare(b fuzzyRecord) int {
	switch {
	case a.N < b.N:
		return -1
	case a.N > b.N:
		return 1
	default:
		return 0
	}
}

func fuzzyMatchBlockGroupSums(ctx context.Context,
	fs *btrfs.FS,
	blockgroups map[btrfsvol.LogicalAddr]BlockGroup,
	physicalSums map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr],
	logicalSums SumRunWithGaps[btrfsvol.LogicalAddr],
) error {
	_ctx := ctx

	ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.substep", "indexing")
	dlog.Info(ctx, "Indexing physical regions...") // O(m)
	regions := ListUnmappedPhysicalRegions(fs)
	physicalIndex := make(map[btrfssum.ShortSum][]btrfsvol.QualifiedPhysicalAddr)
	if err := WalkUnmappedPhysicalRegions(ctx, physicalSums, regions, func(devID btrfsvol.DeviceID, region btrfssum.SumRun[btrfsvol.PhysicalAddr]) error {
		return region.Walk(ctx, func(paddr btrfsvol.PhysicalAddr, sum btrfssum.ShortSum) error {
			physicalIndex[sum] = append(physicalIndex[sum], btrfsvol.QualifiedPhysicalAddr{
				Dev:  devID,
				Addr: paddr,
			})
			return nil
		})
	}); err != nil {
		return err
	}
	dlog.Info(ctx, "... done indexing")

	ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.substep", "searching")
	dlog.Info(ctx, "Searching...")
	numBlockgroups := len(blockgroups)
	for i, bgLAddr := range maps.SortedKeys(blockgroups) {
		blockgroup := blockgroups[bgLAddr]
		bgRun := SumsForLogicalRegion(logicalSums, blockgroup.LAddr, blockgroup.Size)

		d := bgRun.PatLen()
		matches := make(map[btrfsvol.QualifiedPhysicalAddr]int)
		if err := bgRun.Walk(ctx, func(laddr btrfsvol.LogicalAddr, sum btrfssum.ShortSum) error { // O(n*…
			off := laddr.Sub(bgRun.Addr)
			for _, paddr := range physicalIndex[sum] { // …log(m)) expected but …m) worst
				key := btrfsvol.QualifiedPhysicalAddr{
					Dev:  paddr.Dev,
					Addr: paddr.Addr.Add(-off),
				}
				matches[key]++
			}
			return nil
		}); err != nil {
			return err
		}

		best := lowestN[fuzzyRecord]{N: 2}
		for paddr, n := range matches { // O(m)
			best.Insert(fuzzyRecord{
				PAddr: paddr,
				N:     d - n,
			})
		}

		var apply bool
		var matchesStr string
		switch len(best.Dat) {
		case 0: // can happen if there are no sums in the run
			matchesStr = ""
		case 1: // not sure how this can happen, but whatev
			pct := float64(d-best.Dat[0].N) / float64(d)
			matchesStr = textui.Sprintf("%v", number.Percent(pct))
			apply = pct > minFuzzyPct
		case 2:
			pct := float64(d-best.Dat[0].N) / float64(d)
			pct2 := float64(d-best.Dat[1].N) / float64(d)
			matchesStr = textui.Sprintf("best=%v secondbest=%v", number.Percent(pct), number.Percent(pct2))
			apply = pct > minFuzzyPct && pct2 < minFuzzyPct
		}
		lvl := dlog.LogLevelError
		if apply {
			lvl = dlog.LogLevelInfo
		}
		dlog.Logf(ctx, lvl, "(%v/%v) blockgroup[laddr=%v] matches=[%s]; bestpossible=%v (based on %v runs)",
			i+1, numBlockgroups, bgLAddr, matchesStr, number.Percent(bgRun.PctFull()), len(bgRun.Runs))
		if !apply {
			continue
		}

		mapping := btrfsvol.Mapping{
			LAddr:      blockgroup.LAddr,
			PAddr:      best.Dat[0].PAddr,
			Size:       blockgroup.Size,
			SizeLocked: true,
			Flags: containers.Optional[btrfsvol.BlockGroupFlags]{
				OK:  true,
				Val: blockgroup.Flags,
			},
		}
		if err := fs.LV.AddMapping(mapping); err != nil {
			dlog.Errorf(ctx, "error: %v", err)
			continue
		}
		delete(blockgroups, bgLAddr)
	}
	dlog.Info(ctx, "done searching")

	return nil
}

type lowestN[T containers.Ordered[T]] struct {
	N   int
	Dat []T
}

func (l *lowestN[T]) Insert(v T) {
	switch {
	case len(l.Dat) < l.N:
		l.Dat = append(l.Dat, v)
	case v.Compare(l.Dat[0]) < 0:
		l.Dat[0] = v
	default:
		return
	}
	sort.Slice(l.Dat, func(i, j int) bool {
		return l.Dat[i].Compare(l.Dat[j]) < 0
	})
}