diff options
-rw-r--r-- | lib/btrfs/btrfsitem/item_uuid.go | 10 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go | 17 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go | 2 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go | 491 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/rebuildnodes/graph/graph.go | 118 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/rebuildnodes/keyio/keyio.go | 130 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/rebuildnodes/orphans.go | 166 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuild.go | 696 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuild_graph.go | 16 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/rebuildnodes/scan.go | 41 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/rebuildnodes/uuidmap/scan.go | 73 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/rebuildnodes/uuidmap/uuidmap.go | 55 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/scandevices.go | 2 | ||||
-rw-r--r-- | lib/btrfsprogs/btrfsutil/broken_btree.go | 2 | ||||
-rw-r--r-- | lib/containers/sortedmap.go | 30 | ||||
-rwxr-xr-x | scripts/main.sh | 12 |
16 files changed, 1159 insertions, 702 deletions
diff --git a/lib/btrfs/btrfsitem/item_uuid.go b/lib/btrfs/btrfsitem/item_uuid.go index 451ccae..e1a6cf9 100644 --- a/lib/btrfs/btrfsitem/item_uuid.go +++ b/lib/btrfs/btrfsitem/item_uuid.go @@ -24,6 +24,14 @@ type UUIDMap struct { // UUID_SUBVOL=251 UUID_RECEIVED_SUBVOL=252 func KeyToUUID(key btrfsprim.Key) btrfsprim.UUID { var uuid btrfsprim.UUID binary.LittleEndian.PutUint64(uuid[:8], uint64(key.ObjectID)) - binary.LittleEndian.PutUint64(uuid[8:], uint64(key.Offset)) + binary.LittleEndian.PutUint64(uuid[8:], key.Offset) return uuid } + +func UUIDToKey(uuid btrfsprim.UUID) btrfsprim.Key { + return btrfsprim.Key{ + ObjectID: btrfsprim.ObjID(binary.LittleEndian.Uint64(uuid[:8])), + ItemType: UUID_SUBVOL_KEY, + Offset: binary.LittleEndian.Uint64(uuid[8:]), + } +} diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go index 9b2da93..537a970 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go @@ -20,7 +20,7 @@ import ( "git.lukeshu.com/btrfs-progs-ng/lib/slices" ) -func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevicesResult) (addrspace *containers.RBTree[containers.NativeOrdered[btrfsvol.LogicalAddr], btrfsinspect.SysExtentCSum], sumSize int) { +func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevicesResult) btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr] { var records []btrfsinspect.SysExtentCSum for _, devResults := range scanResults { records = append(records, devResults.FoundExtentCSums...) @@ -38,9 +38,9 @@ func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevice } }) if len(records) == 0 { - return nil, 0 + return btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr]{} } - sumSize = records[0].Sums.ChecksumSize + sumSize := records[0].Sums.ChecksumSize // Now build them in to a coherent address space. // @@ -53,7 +53,7 @@ func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevice // "AAAAAAA" shouldn't be present, and if we just discard "BBBBBBBB" // because it conflicts with "CCCCCCC", then we would erroneously // include "AAAAAAA". - addrspace = &containers.RBTree[containers.NativeOrdered[btrfsvol.LogicalAddr], btrfsinspect.SysExtentCSum]{ + addrspace := &containers.RBTree[containers.NativeOrdered[btrfsvol.LogicalAddr], btrfsinspect.SysExtentCSum]{ KeyFn: func(item btrfsinspect.SysExtentCSum) containers.NativeOrdered[btrfsvol.LogicalAddr] { return containers.NativeOrdered[btrfsvol.LogicalAddr]{Val: item.Sums.Addr} }, @@ -148,15 +148,6 @@ func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevice } } - return addrspace, sumSize -} - -func ExtractAndFlattenLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevicesResult) btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr] { - addrspace, sumSize := ExtractLogicalSums(ctx, scanResults) - if addrspace == nil { - return btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr]{} - } - // Now flatten that RBTree in to a SumRunWithGaps. var flattened btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr] var curAddr btrfsvol.LogicalAddr diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go index c391053..f1959b4 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go @@ -145,7 +145,7 @@ func RebuildMappings(ctx context.Context, fs *btrfs.FS, scanResults btrfsinspect dlog.Infof(ctx, "5/6: Searching for %d block groups in checksum map (exact)...", len(bgs)) physicalSums := ExtractPhysicalSums(scanResults) - logicalSums := ExtractAndFlattenLogicalSums(ctx, scanResults) + logicalSums := ExtractLogicalSums(ctx, scanResults) if err := matchBlockGroupSums(ctx, fs, bgs, physicalSums, logicalSums); err != nil { return err } diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go new file mode 100644 index 0000000..50c75a4 --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go @@ -0,0 +1,491 @@ +// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package btrees + +import ( + "context" + "fmt" + "time" + + "github.com/datawire/dlib/dlog" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + pkggraph "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/keyio" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/maps" + "git.lukeshu.com/btrfs-progs-ng/lib/slices" + "git.lukeshu.com/btrfs-progs-ng/lib/textui" +) + +type rebuiltTree struct { + // static + ID btrfsprim.ObjID + UUID btrfsprim.UUID + Parent *rebuiltTree + ParentGen btrfsprim.Generation // offset of this tree's root item + + // all leafs (lvl=0) that pass .isOwnerOK, even if not in the tree + leafToRoots map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] + keys containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] + + // mutable + Roots containers.Set[btrfsvol.LogicalAddr] + Leafs containers.Set[btrfsvol.LogicalAddr] + Items containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] +} + +// isOwnerOK returns whether it is permissible for a node with +// .Head.Owner=owner to be in this tree. +func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID, gen btrfsprim.Generation) bool { + for { + if owner == tree.ID { + return true + } + if tree.Parent == nil || gen >= tree.ParentGen { + return false + } + tree = tree.Parent + } +} + +// cowDistance returns how many COW-snapshots down the 'tree' is from +// the 'parent'. +func (tree *rebuiltTree) cowDistance(parentID btrfsprim.ObjID) (dist int, ok bool) { + for { + if parentID == tree.ID { + return dist, true + } + if tree.Parent == nil { + return 0, false + } + tree = tree.Parent + dist++ + } +} + +func (tree *rebuiltTree) shouldReplace(graph pkggraph.Graph, oldNode, newNode btrfsvol.LogicalAddr) bool { + oldDist, _ := tree.cowDistance(graph.Nodes[oldNode].Owner) + newDist, _ := tree.cowDistance(graph.Nodes[newNode].Owner) + switch { + case newDist < oldDist: + // Replace the old one with the new lower-dist one. + return true + case newDist > oldDist: + // Retain the old lower-dist one. + return false + default: + oldGen := graph.Nodes[oldNode].Generation + newGen := graph.Nodes[newNode].Generation + switch { + case newGen > oldGen: + // Replace the old one with the new higher-gen one. + return true + case newGen < oldGen: + // Retain the old higher-gen one. + return false + default: + // This is a panic because I'm not really sure what the best way to + // handle this is, and so if this happens I want the program to crash + // and force me to figure out how to handle it. + panic(fmt.Errorf("dup nodes in tree=%v: old=%v=%v ; new=%v=%v", + tree.ID, + oldNode, graph.Nodes[oldNode], + newNode, graph.Nodes[newNode])) + } + } +} + +// RebuiltTrees is an abstraction for rebuilding and accessing +// potentially broken btrees. +// +// It is conceptually a btrfstree.TreeOperator, and adds similar +// broken-tree handling to btrfsutil.BrokenTrees. However, the API is +// different thant btrfstree.TreeOperator, and is much more efficient +// than btrfsutil.BrokenTrees. +// +// The efficiency improvements are possible because of the API +// differences, which are necessary for how it is used in +// rebuildnodes: +// +// - it consumes an already-read graph.Graph instead of reading the +// graph itself +// +// - it does not use `btrfstree.TreePath` +// +// - it does not keep track of errors encountered in a tree +// +// Additionally, it provides some functionality that +// btrfsutil.BrokenTrees does not: +// +// - it provides a .LeafToRoots() method to advise on what +// additional roots should be added +// +// - it provides a .COWDistance() method to compare how related two +// trees are +// +// A zero RebuiltTrees is invalid; it must be initialized with +// NewRebuiltTrees(). +type RebuiltTrees struct { + // static + sb btrfstree.Superblock + graph pkggraph.Graph + keyIO *keyio.Handle + + // static callbacks + cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) + cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool) + cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) + + // mutable + trees map[btrfsprim.ObjID]*rebuiltTree +} + +// NewRebuiltTrees returns a new RebuiltTrees instance. All of the +// callbacks must be non-nil. +func NewRebuiltTrees( + sb btrfstree.Superblock, graph pkggraph.Graph, keyIO *keyio.Handle, + cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key), + cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool), + cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool), +) *RebuiltTrees { + return &RebuiltTrees{ + sb: sb, + graph: graph, + keyIO: keyIO, + + cbAddedItem: cbAddedItem, + cbLookupRoot: cbLookupRoot, + cbLookupUUID: cbLookupUUID, + + trees: make(map[btrfsprim.ObjID]*rebuiltTree), + } +} + +type rootStats struct { + TreeID btrfsprim.ObjID + RootNode btrfsvol.LogicalAddr + + DoneLeafs int + TotalLeafs int + AddedItems int + ReplacedItems int +} + +func (s rootStats) String() string { + return fmt.Sprintf("tree %v: adding root node@%v: %v%% (%v/%v) (added %v items, replaced %v items)", + s.TreeID, s.RootNode, + int(100*float64(s.DoneLeafs)/float64(s.TotalLeafs)), + s.DoneLeafs, s.TotalLeafs, + s.AddedItems, s.ReplacedItems) +} + +// AddRoot adds an additional root node to an existing tree. It is +// useful to call .AddRoot() to re-attach part of the tree that has +// been broken off. +// +// It is invalid (panic) to call AddRoot for a tree without having +// called AddTree first. +func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, rootNode btrfsvol.LogicalAddr) { + tree := ts.trees[treeID] + tree.Roots.Insert(rootNode) + + progressWriter := textui.NewProgress[rootStats](ctx, dlog.LogLevelInfo, 1*time.Second) + numAdded := 0 + numReplaced := 0 + progress := func(done int) { + progressWriter.Set(rootStats{ + TreeID: treeID, + RootNode: rootNode, + DoneLeafs: done, + TotalLeafs: len(tree.leafToRoots), + AddedItems: numAdded, + ReplacedItems: numReplaced, + }) + } + for i, leaf := range maps.SortedKeys(tree.leafToRoots) { + progress(i) + if tree.Leafs.Has(leaf) || !tree.leafToRoots[leaf].Has(rootNode) { + continue + } + tree.Leafs.Insert(leaf) + for j, itemKey := range ts.graph.Nodes[leaf].Items { + newPtr := keyio.ItemPtr{ + Node: leaf, + Idx: j, + } + if oldPtr, exists := tree.Items.Load(itemKey); !exists { + tree.Items.Store(itemKey, newPtr) + numAdded++ + } else if tree.shouldReplace(ts.graph, oldPtr.Node, newPtr.Node) { + tree.Items.Store(itemKey, newPtr) + numReplaced++ + } + ts.cbAddedItem(ctx, treeID, itemKey) + progress(i) + } + } + progress(len(tree.leafToRoots)) + progressWriter.Done() +} + +// AddTree initializes the given tree, returning true if it was able +// to do so, or false if there was a problem and nothing was done. +// The tree is initialized with the normal root node of the tree. +// +// Subsequent calls to AddTree for the same tree are no-ops. +func (ts *RebuiltTrees) AddTree(ctx context.Context, treeID btrfsprim.ObjID) (ok bool) { + return ts.addTree(ctx, treeID, nil) +} + +func (ts *RebuiltTrees) addTree(ctx context.Context, treeID btrfsprim.ObjID, stack []btrfsprim.ObjID) (ok bool) { + if _, ok := ts.trees[treeID]; ok { + return true + } + if slices.Contains(treeID, stack) { + return false + } + + tree := &rebuiltTree{ + ID: treeID, + Roots: make(containers.Set[btrfsvol.LogicalAddr]), + Leafs: make(containers.Set[btrfsvol.LogicalAddr]), + } + var root btrfsvol.LogicalAddr + switch treeID { + case btrfsprim.ROOT_TREE_OBJECTID: + root = ts.sb.RootTree + case btrfsprim.CHUNK_TREE_OBJECTID: + root = ts.sb.ChunkTree + case btrfsprim.TREE_LOG_OBJECTID: + root = ts.sb.LogTree + case btrfsprim.BLOCK_GROUP_TREE_OBJECTID: + root = ts.sb.BlockGroupRoot + default: + stack := append(stack, treeID) + if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) { + return false + } + rootOff, rootItem, ok := ts.cbLookupRoot(ctx, treeID) + if !ok { + return false + } + root = rootItem.ByteNr + tree.UUID = rootItem.UUID + if rootItem.ParentUUID != (btrfsprim.UUID{}) { + tree.ParentGen = rootOff + if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) { + return false + } + parentID, ok := ts.cbLookupUUID(ctx, rootItem.ParentUUID) + if !ok { + return false + } + if !ts.addTree(ctx, parentID, append(stack, treeID)) { + return false + } + tree.Parent = ts.trees[parentID] + } + } + tree.indexLeafs(ctx, ts.graph) + + ts.trees[treeID] = tree + if root != 0 { + ts.AddRoot(ctx, treeID, root) + } + + return true +} + +type indexStats struct { + TreeID btrfsprim.ObjID + DoneNodes int + TotalNodes int +} + +func (s indexStats) String() string { + return fmt.Sprintf("tree %v: indexing leaf nodes: %v%% (%v/%v)", + s.TreeID, + int(100*float64(s.DoneNodes)/float64(s.TotalNodes)), + s.DoneNodes, s.TotalNodes) +} + +func (tree *rebuiltTree) indexLeafs(ctx context.Context, graph pkggraph.Graph) { + nodeToRoots := make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) + progressWriter := textui.NewProgress[indexStats](ctx, dlog.LogLevelInfo, 1*time.Second) + progress := func() { + progressWriter.Set(indexStats{ + TreeID: tree.ID, + DoneNodes: len(nodeToRoots), + TotalNodes: len(graph.Nodes), + }) + } + progress() + for _, node := range maps.SortedKeys(graph.Nodes) { + tree.indexNode(graph, node, nodeToRoots, progress, nil) + } + progressWriter.Done() + + tree.leafToRoots = make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) + for node, roots := range nodeToRoots { + if graph.Nodes[node].Level == 0 && len(roots) > 0 { + tree.leafToRoots[node] = roots + } + } +} + +func (tree *rebuiltTree) indexNode(graph pkggraph.Graph, node btrfsvol.LogicalAddr, index map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr], progress func(), stack []btrfsvol.LogicalAddr) { + defer progress() + if _, done := index[node]; done { + return + } + if slices.Contains(node, stack) { + panic("loop") + } + if !tree.isOwnerOK(graph.Nodes[node].Owner, graph.Nodes[node].Generation) { + index[node] = nil + return + } + + // tree.leafToRoots + stack = append(stack, node) + var roots containers.Set[btrfsvol.LogicalAddr] + kps := slices.RemoveAllFunc(graph.EdgesTo[node], func(kp *pkggraph.Edge) bool { + return !tree.isOwnerOK(graph.Nodes[kp.FromNode].Owner, graph.Nodes[kp.FromNode].Generation) + }) + for _, kp := range kps { + tree.indexNode(graph, kp.FromNode, index, progress, stack) + if len(index[kp.FromNode]) > 0 { + if roots == nil { + roots = make(containers.Set[btrfsvol.LogicalAddr]) + } + roots.InsertFrom(index[kp.FromNode]) + } + } + if roots == nil { + roots = containers.NewSet[btrfsvol.LogicalAddr](node) + } + index[node] = roots + + // tree.keys + for i, key := range graph.Nodes[node].Items { + if oldPtr, ok := tree.keys.Load(key); !ok || tree.shouldReplace(graph, oldPtr.Node, node) { + tree.keys.Store(key, keyio.ItemPtr{ + Node: node, + Idx: i, + }) + } + } +} + +// Load reads an item from a tree. +// +// It is not nescessary to call AddTree for that tree first; Load will +// call it for you. +func (ts *RebuiltTrees) Load(ctx context.Context, treeID btrfsprim.ObjID, key btrfsprim.Key) (item btrfsitem.Item, ok bool) { + if !ts.AddTree(ctx, treeID) { + return nil, false + } + ptr, ok := ts.trees[treeID].Items.Load(key) + if !ok { + return nil, false + } + return ts.keyIO.ReadItem(ptr) +} + +// Search searches for an item from a tree. +// +// It is not nescessary to call AddTree for that tree first; Search +// will call it for you. +func (ts *RebuiltTrees) Search(ctx context.Context, treeID btrfsprim.ObjID, fn func(btrfsprim.Key) int) (key btrfsprim.Key, ok bool) { + if !ts.AddTree(ctx, treeID) { + return btrfsprim.Key{}, false + } + k, _, ok := ts.trees[treeID].Items.Search(func(k btrfsprim.Key, _ keyio.ItemPtr) int { + return fn(k) + }) + return k, ok +} + +// Search searches for a range of items from a tree. +// +// It is not nescessary to call AddTree for that tree first; SearchAll +// will call it for you. +func (ts *RebuiltTrees) SearchAll(ctx context.Context, treeID btrfsprim.ObjID, fn func(btrfsprim.Key) int) []btrfsprim.Key { + if !ts.AddTree(ctx, treeID) { + return nil + } + kvs := ts.trees[treeID].Items.SearchAll(func(k btrfsprim.Key, _ keyio.ItemPtr) int { + return fn(k) + }) + if len(kvs) == 0 { + return nil + } + ret := make([]btrfsprim.Key, len(kvs)) + for i := range kvs { + ret[i] = kvs[i].K + } + return ret +} + +// LeafToRoots returns the list of potential roots (to pass to +// .AddRoot) that include a given leaf-node. +// +// It is not nescessary to call AddTree for the tree first; +// LeafToRoots will call it for you. +func (ts *RebuiltTrees) LeafToRoots(ctx context.Context, treeID btrfsprim.ObjID, leaf btrfsvol.LogicalAddr) containers.Set[btrfsvol.LogicalAddr] { + if !ts.AddTree(ctx, treeID) { + return nil + } + if ts.graph.Nodes[leaf].Level != 0 { + panic(fmt.Errorf("should not happen: NodeToRoots(tree=%v, leaf=%v): not a leaf", + treeID, leaf)) + } + ret := make(containers.Set[btrfsvol.LogicalAddr]) + for root := range ts.trees[treeID].leafToRoots[leaf] { + if ts.trees[treeID].Roots.Has(root) { + panic(fmt.Errorf("should not happen: NodeToRoots(tree=%v, leaf=%v): tree contains root=%v but not leaf", + treeID, leaf, root)) + } + ret.Insert(root) + } + if len(ret) == 0 { + return nil + } + return ret +} + +// Keys returns a map of all keys in node that would be valid in this tree. +// +// It is invalid (panic) to call Keys for a tree without having called +// AddTree first. +func (ts *RebuiltTrees) Keys(treeID btrfsprim.ObjID) *containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] { + return &ts.trees[treeID].keys +} + +// COWDistance returns how many COW-snapshots down from the 'child' +// tree is from the 'parent' tree. +// +// It is invalid (panic) to call COWDistance for a tree without having +// called AddTree for the child first. +func (ts *RebuiltTrees) COWDistance(ctx context.Context, childID, parentID btrfsprim.ObjID) (dist int, ok bool) { + return ts.trees[childID].cowDistance(parentID) +} + +// ListRoots returns a listing of all initialized trees and their root +// nodes. +// +// Do not mutate the set of roots for a tree; it is a pointer to the +// RebuiltTrees' internal set! +func (ts *RebuiltTrees) ListRoots() map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr] { + ret := make(map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr], len(ts.trees)) + for treeID := range ts.trees { + ret[treeID] = ts.trees[treeID].Roots + } + return ret +} diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph/graph.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph/graph.go index 7ae3b89..1180b0d 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph/graph.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph/graph.go @@ -6,6 +6,7 @@ package graph import ( "fmt" + "reflect" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" @@ -22,12 +23,28 @@ type Node struct { NumItems uint32 MinItem btrfsprim.Key MaxItem btrfsprim.Key + Items []btrfsprim.Key +} + +func (n Node) String() string { + if reflect.ValueOf(n).IsZero() { + return "{}" + } + return fmt.Sprintf(`{lvl:%v, gen:%v, tree:%v, cnt:%v, min:(%v,%v,%v), max:(%v,%v,%v)}`, + n.Level, n.Generation, n.Owner, n.NumItems, + n.MinItem.ObjectID, n.MinItem.ItemType, n.MinItem.Offset, + n.MaxItem.ObjectID, n.MaxItem.ItemType, n.MaxItem.Offset) } type Edge struct { - FromTree btrfsprim.ObjID + // It is invalid both 'FromRoot' and 'FromNode' to be + // non-zero. If both are zero, then the Edge is from the + // superblock. + FromRoot btrfsvol.LogicalAddr FromNode btrfsvol.LogicalAddr - FromItem int + FromItem int // only valid if one of FromRoot or FromNode is non-zero + + FromTree btrfsprim.ObjID ToNode btrfsvol.LogicalAddr ToLevel uint8 @@ -36,8 +53,19 @@ type Edge struct { } func (kp Edge) String() string { - return fmt.Sprintf(`{t:%v,n:%v}[%d]->{n:%v,l:%v,g:%v,k:(%d,%v,%d)}`, - kp.FromTree, kp.FromNode, kp.FromItem, + var from string + switch { + case kp.FromRoot != 0: + from = fmt.Sprintf("root@%v[%d]:%v", + kp.FromRoot, kp.FromItem, kp.FromTree) + case kp.FromNode != 0: + from = fmt.Sprintf("{node:%v, tree:%v}[%d]", + kp.FromNode, kp.FromTree, kp.FromItem) + default: + from = fmt.Sprintf("superblock:%v", kp.FromTree) + } + return fmt.Sprintf(`%s -> {n:%v,l:%v,g:%v,k:(%v,%v,%v)}`, + from, kp.ToNode, kp.ToLevel, kp.ToGeneration, kp.ToKey.ObjectID, kp.ToKey.ItemType, @@ -51,13 +79,18 @@ type Graph struct { EdgesTo map[btrfsvol.LogicalAddr][]*Edge } -func (g Graph) insertEdge(kp Edge) { - ptr := &kp - if kp.ToNode == 0 { +func (g Graph) insertEdge(ptr *Edge) { + if ptr.ToNode == 0 { panic("kp.ToNode should not be zero") } - g.EdgesFrom[kp.FromNode] = append(g.EdgesFrom[kp.FromNode], ptr) - g.EdgesTo[kp.ToNode] = append(g.EdgesTo[kp.ToNode], ptr) + if ptr.FromRoot != 0 && ptr.FromNode != 0 { + panic("kp.FromRoot and kp.FromNode should not both be set") + } + if (ptr.FromRoot == 0 && ptr.FromNode == 0) && ptr.FromItem != 0 { + panic("kp.FromItem should only be set if either kp.FromRoot or kp.FromItem is set") + } + g.EdgesFrom[ptr.FromNode] = append(g.EdgesFrom[ptr.FromNode], ptr) + g.EdgesTo[ptr.ToNode] = append(g.EdgesTo[ptr.ToNode], ptr) } func (g Graph) insertTreeRoot(sb btrfstree.Superblock, treeID btrfsprim.ObjID) { @@ -72,7 +105,7 @@ func (g Graph) insertTreeRoot(sb btrfstree.Superblock, treeID btrfsprim.ObjID) { if treeInfo.RootNode == 0 { return } - g.insertEdge(Edge{ + g.insertEdge(&Edge{ FromTree: treeID, ToNode: treeInfo.RootNode, ToLevel: treeInfo.Level, @@ -99,19 +132,7 @@ func New(sb btrfstree.Superblock) *Graph { } func (g *Graph) InsertNode(nodeRef *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]) { - for _, item := range nodeRef.Data.BodyLeaf { - switch itemBody := item.Body.(type) { - case btrfsitem.Root: - g.insertEdge(Edge{ - FromTree: item.Key.ObjectID, - ToNode: itemBody.ByteNr, - ToLevel: itemBody.Level, - ToGeneration: itemBody.Generation, - }) - } - } - - g.Nodes[nodeRef.Addr] = Node{ + nodeData := Node{ Level: nodeRef.Data.Head.Level, Generation: nodeRef.Data.Head.Generation, Owner: nodeRef.Data.Head.Owner, @@ -119,16 +140,47 @@ func (g *Graph) InsertNode(nodeRef *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.N MinItem: discardOK(nodeRef.Data.MinItem()), MaxItem: discardOK(nodeRef.Data.MaxItem()), } - for i, kp := range nodeRef.Data.BodyInternal { - g.insertEdge(Edge{ - FromTree: nodeRef.Data.Head.Owner, - FromNode: nodeRef.Addr, - FromItem: i, - ToNode: kp.BlockPtr, - ToLevel: nodeRef.Data.Head.Level - 1, - ToKey: kp.Key, - ToGeneration: kp.Generation, - }) + + if nodeRef.Data.Head.Level == 0 { + cnt := 0 + for _, item := range nodeRef.Data.BodyLeaf { + if _, ok := item.Body.(btrfsitem.Root); ok { + cnt++ + } + } + kps := make([]Edge, 0, cnt) + keys := make([]btrfsprim.Key, len(nodeRef.Data.BodyLeaf)) + nodeData.Items = keys + g.Nodes[nodeRef.Addr] = nodeData + for i, item := range nodeRef.Data.BodyLeaf { + keys[i] = item.Key + if itemBody, ok := item.Body.(btrfsitem.Root); ok { + kps = append(kps, Edge{ + FromRoot: nodeRef.Addr, + FromItem: i, + FromTree: item.Key.ObjectID, + ToNode: itemBody.ByteNr, + ToLevel: itemBody.Level, + ToGeneration: itemBody.Generation, + }) + g.insertEdge(&kps[len(kps)-1]) + } + } + } else { + g.Nodes[nodeRef.Addr] = nodeData + kps := make([]Edge, len(nodeRef.Data.BodyInternal)) + for i, kp := range nodeRef.Data.BodyInternal { + kps[i] = Edge{ + FromNode: nodeRef.Addr, + FromItem: i, + FromTree: nodeRef.Data.Head.Owner, + ToNode: kp.BlockPtr, + ToLevel: nodeRef.Data.Head.Level - 1, + ToKey: kp.Key, + ToGeneration: kp.Generation, + } + g.insertEdge(&kps[i]) + } } } diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/keyio/keyio.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/keyio/keyio.go new file mode 100644 index 0000000..d92495e --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/keyio/keyio.go @@ -0,0 +1,130 @@ +// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package keyio + +import ( + "fmt" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/diskio" +) + +type ItemPtr struct { + Node btrfsvol.LogicalAddr + Idx int +} + +func (ptr ItemPtr) String() string { + return fmt.Sprintf("node@%v[%v]", ptr.Node, ptr.Idx) +} + +type SizeAndErr struct { + Size uint64 + Err error +} + +type Handle struct { + rawFile diskio.File[btrfsvol.LogicalAddr] + sb btrfstree.Superblock + graph graph.Graph + + Sizes map[ItemPtr]SizeAndErr + + cache *containers.LRUCache[btrfsvol.LogicalAddr, *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]] +} + +func NewHandle(file diskio.File[btrfsvol.LogicalAddr], sb btrfstree.Superblock) *Handle { + return &Handle{ + rawFile: file, + sb: sb, + + Sizes: make(map[ItemPtr]SizeAndErr), + + cache: containers.NewLRUCache[btrfsvol.LogicalAddr, *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]](8), + } +} + +func (o *Handle) InsertNode(nodeRef *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]) { + for i, item := range nodeRef.Data.BodyLeaf { + ptr := ItemPtr{ + Node: nodeRef.Addr, + Idx: i, + } + switch itemBody := item.Body.(type) { + case btrfsitem.ExtentCSum: + o.Sizes[ptr] = SizeAndErr{ + Size: uint64(itemBody.Size()), + Err: nil, + } + case btrfsitem.FileExtent: + size, err := itemBody.Size() + o.Sizes[ptr] = SizeAndErr{ + Size: uint64(size), + Err: err, + } + case btrfsitem.Error: + switch item.Key.ItemType { + case btrfsprim.EXTENT_CSUM_KEY, btrfsprim.EXTENT_DATA_KEY: + o.Sizes[ptr] = SizeAndErr{ + Err: fmt.Errorf("error decoding item: ptr=%v (tree=%v key=%v): %w", + ptr, nodeRef.Data.Head.Owner, item.Key, itemBody.Err), + } + } + } + } +} + +func (o *Handle) SetGraph(graph graph.Graph) { + o.graph = graph +} + +func (o *Handle) readNode(laddr btrfsvol.LogicalAddr) *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node] { + if cached, ok := o.cache.Get(laddr); ok { + return cached + } + + graphInfo, ok := o.graph.Nodes[laddr] + if !ok { + panic(fmt.Errorf("should not happen: node@%v is not mentioned in the in-memory graph", laddr)) + } + + ref, err := btrfstree.ReadNode(o.rawFile, o.sb, laddr, btrfstree.NodeExpectations{ + LAddr: containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: laddr}, + Level: containers.Optional[uint8]{OK: true, Val: graphInfo.Level}, + Generation: containers.Optional[btrfsprim.Generation]{OK: true, Val: graphInfo.Generation}, + Owner: func(treeID btrfsprim.ObjID) error { + if treeID != graphInfo.Owner { + return fmt.Errorf("expected owner=%v but claims to have owner=%v", + graphInfo.Owner, treeID) + } + return nil + }, + MinItem: containers.Optional[btrfsprim.Key]{OK: true, Val: graphInfo.MinItem}, + MaxItem: containers.Optional[btrfsprim.Key]{OK: true, Val: graphInfo.MaxItem}, + }) + if err != nil { + panic(fmt.Errorf("should not happen: i/o error: %w", err)) + } + + o.cache.Add(laddr, ref) + + return ref +} + +func (o *Handle) ReadItem(ptr ItemPtr) (item btrfsitem.Item, ok bool) { + if o.graph.Nodes[ptr.Node].Level != 0 || ptr.Idx < 0 { + return nil, false + } + items := o.readNode(ptr.Node).Data.BodyLeaf + if ptr.Idx >= len(items) { + return nil, false + } + return items[ptr.Idx].Body, true +} diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/orphans.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/orphans.go deleted file mode 100644 index 51313a9..0000000 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/orphans.go +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com> -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package rebuildnodes - -import ( - "context" - "fmt" - "time" - - "github.com/datawire/dlib/dlog" - - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph" - "git.lukeshu.com/btrfs-progs-ng/lib/containers" - "git.lukeshu.com/btrfs-progs-ng/lib/diskio" - "git.lukeshu.com/btrfs-progs-ng/lib/maps" - "git.lukeshu.com/btrfs-progs-ng/lib/textui" -) - -func listRoots(graph graph.Graph, leaf btrfsvol.LogicalAddr) containers.Set[btrfsvol.LogicalAddr] { - kps := graph.EdgesTo[leaf] - if len(kps) == 0 { - return containers.NewSet(leaf) - } - ret := make(containers.Set[btrfsvol.LogicalAddr]) - for _, kp := range kps { - ret.InsertFrom(listRoots(graph, kp.FromNode)) - } - return ret -} - -func walk(graph graph.Graph, root btrfsvol.LogicalAddr, fn func(btrfsvol.LogicalAddr) bool) { - if _, ok := graph.Nodes[root]; !ok { - return - } - if !fn(root) { - return - } - for _, kp := range graph.EdgesFrom[root] { - walk(graph, kp.ToNode, fn) - } -} - -type keyAndTree struct { - btrfsprim.Key - TreeID btrfsprim.ObjID -} - -func (a keyAndTree) Cmp(b keyAndTree) int { - if d := a.Key.Cmp(b.Key); d != 0 { - return d - } - return containers.NativeCmp(a.TreeID, b.TreeID) -} - -type crawlStats struct { - DoneOrphans int - TotalOrphans int - - VisitedNodes int - FoundLeafs int -} - -func (s crawlStats) String() string { - return fmt.Sprintf("... crawling orphans %v%% (%v/%v); visited %d nodes, found %d leaf nodes", - int(100*float64(s.DoneOrphans)/float64(s.TotalOrphans)), - s.DoneOrphans, s.TotalOrphans, s.VisitedNodes, s.FoundLeafs) -} - -type readStats struct { - DoneLeafNodes int - TotalLeafNodes int -} - -func (s readStats) String() string { - return fmt.Sprintf("... reading leafs %v%% (%v/%v)", - int(100*float64(s.DoneLeafNodes)/float64(s.TotalLeafNodes)), - s.DoneLeafNodes, s.TotalLeafNodes) -} - -func indexOrphans(ctx context.Context, fs diskio.File[btrfsvol.LogicalAddr], sb btrfstree.Superblock, graph graph.Graph) ( - orphans containers.Set[btrfsvol.LogicalAddr], - leaf2orphans map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr], - key2leaf *containers.SortedMap[keyAndTree, btrfsvol.LogicalAddr], - err error, -) { - dlog.Info(ctx, "... counting orphans") - orphans = make(containers.Set[btrfsvol.LogicalAddr]) - for node := range graph.Nodes { - if len(graph.EdgesTo[node]) == 0 { - orphans.Insert(node) - } - } - - leaf2orphans = make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) - visited := make(containers.Set[btrfsvol.LogicalAddr]) - - done := 0 - crawlProgressWriter := textui.NewProgress[crawlStats](ctx, dlog.LogLevelInfo, 1*time.Second) - progress := func() { - crawlProgressWriter.Set(crawlStats{ - DoneOrphans: done, - TotalOrphans: len(orphans), - VisitedNodes: len(visited), - FoundLeafs: len(leaf2orphans), - }) - } - progress() - for _, orphan := range maps.SortedKeys(orphans) { - walk(graph, orphan, func(node btrfsvol.LogicalAddr) bool { - if visited.Has(node) { - return false - } - visited.Insert(node) - if graph.Nodes[node].Level == 0 { - if roots := listRoots(graph, node); !roots.Has(0) { - leaf2orphans[node] = roots - } - } - progress() - return true - }) - done++ - progress() - } - crawlProgressWriter.Done() - - key2leaf = new(containers.SortedMap[keyAndTree, btrfsvol.LogicalAddr]) - done = 0 - readProgressWriter := textui.NewProgress[readStats](ctx, dlog.LogLevelInfo, 1*time.Second) - progress = func() { - readProgressWriter.Set(readStats{ - DoneLeafNodes: done, - TotalLeafNodes: len(leaf2orphans), - }) - } - progress() - for _, laddr := range maps.SortedKeys(leaf2orphans) { - nodeRef, err := btrfstree.ReadNode[btrfsvol.LogicalAddr](fs, sb, laddr, btrfstree.NodeExpectations{ - LAddr: containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: laddr}, - Level: containers.Optional[uint8]{OK: true, Val: 0}, - }) - if err != nil { - return nil, nil, nil, err - } - - for _, item := range nodeRef.Data.BodyLeaf { - k := keyAndTree{ - Key: item.Key, - TreeID: nodeRef.Data.Head.Owner, - } - if cur, ok := key2leaf.Load(k); !ok || graph.Nodes[cur].Generation < nodeRef.Data.Head.Generation { - key2leaf.Store(k, laddr) - } - } - done++ - progress() - } - readProgressWriter.Done() - - return orphans, leaf2orphans, key2leaf, nil -} diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuild.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuild.go index ef50653..66eaf1a 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuild.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuild.go @@ -8,46 +8,50 @@ import ( "context" "fmt" "sort" + "time" "github.com/datawire/dlib/dlog" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildmappings" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees" "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/uuidmap" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsutil" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/keyio" "git.lukeshu.com/btrfs-progs-ng/lib/containers" "git.lukeshu.com/btrfs-progs-ng/lib/maps" + "git.lukeshu.com/btrfs-progs-ng/lib/textui" ) -type Rebuilder struct { - raw *btrfs.FS - inner interface { - btrfstree.TreeOperator - Augment(treeID btrfsprim.ObjID, nodeAddr btrfsvol.LogicalAddr) ([]btrfsprim.Key, error) +type keyAndTree struct { + btrfsprim.Key + TreeID btrfsprim.ObjID +} + +func (a keyAndTree) Cmp(b keyAndTree) int { + if d := a.Key.Cmp(b.Key); d != 0 { + return d } - sb btrfstree.Superblock + return containers.NativeCmp(a.TreeID, b.TreeID) +} - graph graph.Graph - uuidMap uuidmap.UUIDMap - csums containers.RBTree[containers.NativeOrdered[btrfsvol.LogicalAddr], btrfsinspect.SysExtentCSum] - orphans containers.Set[btrfsvol.LogicalAddr] - leaf2orphans map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] - key2leaf containers.SortedMap[keyAndTree, btrfsvol.LogicalAddr] +type rebuilder struct { + sb btrfstree.Superblock + rebuilt *btrees.RebuiltTrees - augments map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr] + graph graph.Graph + keyIO *keyio.Handle + curKey keyAndTree + queue []keyAndTree pendingAugments map[btrfsprim.ObjID][]map[btrfsvol.LogicalAddr]int } func RebuildNodes(ctx context.Context, fs *btrfs.FS, nodeScanResults btrfsinspect.ScanDevicesResult) (map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr], error) { - scanData, err := ScanDevices(ctx, fs, nodeScanResults) // ScanDevices does its own logging + nodeGraph, keyIO, err := ScanDevices(ctx, fs, nodeScanResults) // ScanDevices does its own logging if err != nil { return nil, err } @@ -58,102 +62,177 @@ func RebuildNodes(ctx context.Context, fs *btrfs.FS, nodeScanResults btrfsinspec return nil, err } - dlog.Info(ctx, "Indexing checksums...") - csums, _ := rebuildmappings.ExtractLogicalSums(ctx, nodeScanResults) - if csums == nil { - csums = new(containers.RBTree[containers.NativeOrdered[btrfsvol.LogicalAddr], btrfsinspect.SysExtentCSum]) - } - - dlog.Info(ctx, "Indexing orphans...") - orphans, leaf2orphans, key2leaf, err := indexOrphans(ctx, fs, *sb, *scanData.nodeGraph) - if err != nil { - return nil, err - } - dlog.Info(ctx, "Rebuilding node tree...") - o := &Rebuilder{ - raw: fs, - inner: btrfsutil.NewBrokenTrees(ctx, fs), - sb: *sb, - - graph: *scanData.nodeGraph, - uuidMap: *scanData.uuidMap, - csums: *csums, - orphans: orphans, - leaf2orphans: leaf2orphans, - key2leaf: *key2leaf, + o := &rebuilder{ + sb: *sb, - augments: make(map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr]), + graph: nodeGraph, + keyIO: keyIO, } + o.rebuilt = btrees.NewRebuiltTrees(*sb, nodeGraph, keyIO, + o.cbAddedItem, o.cbLookupRoot, o.cbLookupUUID) if err := o.rebuild(ctx); err != nil { return nil, err } - return o.augments, nil + return o.rebuilt.ListRoots(), nil } -func (o *Rebuilder) ioErr(ctx context.Context, err error) { +func (o *rebuilder) ioErr(ctx context.Context, err error) { err = fmt.Errorf("should not happen: i/o error: %w", err) dlog.Error(ctx, err) panic(err) } -func (o *Rebuilder) rebuild(ctx context.Context) error { +type rebuildStats struct { + PassNum int + Task string + N, D int +} + +func (s rebuildStats) String() string { + pct := 100 + if s.D > 0 { + pct = int(100 * float64(s.N) / float64(s.D)) + } + return fmt.Sprintf("... pass %d: %s %v%% (%v/%v)", + s.PassNum, s.Task, pct, s.N, s.D) +} + +func (o *rebuilder) rebuild(ctx context.Context) error { passNum := 0 dlog.Infof(ctx, "... pass %d: scanning for implied items", passNum) + // Seed the queue o.pendingAugments = make(map[btrfsprim.ObjID][]map[btrfsvol.LogicalAddr]int) - btrfsutil.WalkAllTrees(ctx, o.inner, btrfsutil.WalkAllTreesHandler{ - Err: func(*btrfsutil.WalkError) {}, - TreeWalkHandler: btrfstree.TreeWalkHandler{ - Item: func(path btrfstree.TreePath, item btrfstree.Item) error { - handleItem(o, ctx, path[0].FromTree, item) - return nil - }, - }, - }) - for len(o.pendingAugments) > 0 { - // Apply the augments, keeping track of what keys are added to what tree. - dlog.Infof(ctx, "... pass %d: augmenting trees to add implied items", passNum) - newKeys := make(map[btrfsprim.ObjID][]btrfsprim.Key) - for _, treeID := range maps.SortedKeys(o.pendingAugments) { - dlog.Infof(ctx, "... ... augmenting tree %v:", treeID) - treeAugments := o.resolveTreeAugments(ctx, o.pendingAugments[treeID]) - for _, nodeAddr := range maps.SortedKeys(treeAugments) { - added, err := o.inner.Augment(treeID, nodeAddr) - if err != nil { - dlog.Errorf(ctx, "error augmenting: %v", err) - continue - } - newKeys[treeID] = append(newKeys[treeID], added...) - - set := o.augments[treeID] - if set == nil { - set = make(containers.Set[btrfsvol.LogicalAddr]) - o.augments[treeID] = set - } - set.Insert(nodeAddr) + o.rebuilt.AddTree(ctx, btrfsprim.ROOT_TREE_OBJECTID) + o.rebuilt.AddTree(ctx, btrfsprim.CHUNK_TREE_OBJECTID) + //o.rebuilt.AddTree(ctx, btrfsprim.TREE_LOG_OBJECTID) // TODO(lukeshu): Special LOG_TREE handling + o.rebuilt.AddTree(ctx, btrfsprim.BLOCK_GROUP_TREE_OBJECTID) + for { + // Handle items in the queue + queue := o.queue + o.queue = nil + progressWriter := textui.NewProgress[rebuildStats](ctx, dlog.LogLevelInfo, 1*time.Second) + queueProgress := func(done int) { + progressWriter.Set(rebuildStats{ + PassNum: passNum, + Task: "processing item queue", + N: done, + D: len(queue), + }) + } + for i, key := range queue { + queueProgress(i) + o.curKey = key + itemBody, ok := o.rebuilt.Load(ctx, key.TreeID, key.Key) + if !ok { + o.ioErr(ctx, fmt.Errorf("could not read previously read item: %v", key)) + } + handleItem(o, ctx, key.TreeID, btrfstree.Item{ + Key: key.Key, + Body: itemBody, + }) + if key.ItemType == btrfsitem.ROOT_ITEM_KEY { + o.rebuilt.AddTree(ctx, key.ObjectID) } } - // Clear the list of pending augments. + queueProgress(len(queue)) + progressWriter.Done() + + // Check if we can bail + if len(o.queue) == 0 && len(o.pendingAugments) == 0 { + break + } + + // Apply augments that were requested while handling items from the queue + resolvedAugments := make(map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr], len(o.pendingAugments)) + numAugments := 0 + for _, treeID := range maps.SortedKeys(o.pendingAugments) { + dlog.Infof(ctx, "... ... augments for tree %v:", treeID) + resolvedAugments[treeID] = o.resolveTreeAugments(ctx, o.pendingAugments[treeID]) + numAugments += len(resolvedAugments[treeID]) + } o.pendingAugments = make(map[btrfsprim.ObjID][]map[btrfsvol.LogicalAddr]int) - passNum++ - // Call handleItem() for each of the added keys. - dlog.Infof(ctx, "... pass %d: scanning for implied items", passNum) - for _, treeID := range maps.SortedKeys(newKeys) { - for _, key := range newKeys[treeID] { - item, err := o.inner.TreeLookup(treeID, key) - if err != nil { - o.ioErr(ctx, fmt.Errorf("error looking up already-inserted item: tree=%v key=%v: %w", - treeID, key, err)) - } - handleItem(o, ctx, treeID, item) + progressWriter = textui.NewProgress[rebuildStats](ctx, dlog.LogLevelInfo, 1*time.Second) + numAugmented := 0 + augmentProgress := func() { + progressWriter.Set(rebuildStats{ + PassNum: passNum, + Task: "applying augments", + N: numAugmented, + D: numAugments, + }) + } + for _, treeID := range maps.SortedKeys(resolvedAugments) { + for _, nodeAddr := range maps.SortedKeys(resolvedAugments[treeID]) { + augmentProgress() + o.rebuilt.AddRoot(ctx, treeID, nodeAddr) + numAugmented++ } } + augmentProgress() + progressWriter.Done() + + passNum++ + dlog.Infof(ctx, "... pass %d: scanning for implied items", passNum) } return nil } -func (o *Rebuilder) resolveTreeAugments(ctx context.Context, listsWithDistances []map[btrfsvol.LogicalAddr]int) containers.Set[btrfsvol.LogicalAddr] { +func (o *rebuilder) cbAddedItem(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) { + o.queue = append(o.queue, keyAndTree{ + TreeID: tree, + Key: key, + }) +} + +func (o *rebuilder) cbLookupRoot(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool) { + key, ok := o._want(ctx, btrfsprim.ROOT_TREE_OBJECTID, tree, btrfsitem.ROOT_ITEM_KEY) + if !ok { + o.queue = append(o.queue, o.curKey) + return 0, btrfsitem.Root{}, false + } + itemBody, ok := o.rebuilt.Load(ctx, btrfsprim.ROOT_TREE_OBJECTID, key) + if !ok { + o.ioErr(ctx, fmt.Errorf("could not read previously read item: %v", key)) + } + switch itemBody := itemBody.(type) { + case btrfsitem.Root: + return btrfsprim.Generation(key.Offset), itemBody, true + case btrfsitem.Error: + o.fsErr(ctx, fmt.Errorf("error decoding item: tree=%v key=%v: %w", btrfsprim.ROOT_TREE_OBJECTID, key, itemBody.Err)) + return 0, btrfsitem.Root{}, false + default: + // This is a panic because the item decoder should not emit ROOT_ITEM items as anything but + // btrfsitem.Root or btrfsitem.Error without this code also being updated. + panic(fmt.Errorf("should not happen: ROOT_ITEM item has unexpected type: %T", itemBody)) + } +} + +func (o *rebuilder) cbLookupUUID(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) { + key := btrfsitem.UUIDToKey(uuid) + if ok := o._wantOff(ctx, btrfsprim.UUID_TREE_OBJECTID, key.ObjectID, key.ItemType, key.Offset); !ok { + o.queue = append(o.queue, o.curKey) + return 0, false + } + itemBody, ok := o.rebuilt.Load(ctx, btrfsprim.UUID_TREE_OBJECTID, key) + if !ok { + o.ioErr(ctx, fmt.Errorf("could not read previously read item: %v", key)) + } + switch itemBody := itemBody.(type) { + case btrfsitem.UUIDMap: + return itemBody.ObjID, true + case btrfsitem.Error: + o.fsErr(ctx, fmt.Errorf("error decoding item: tree=%v key=%v: %w", btrfsprim.UUID_TREE_OBJECTID, key, itemBody.Err)) + return 0, false + default: + // This is a panic because the item decoder should not emit UUID_SUBVOL items as anything but + // btrfsitem.UUIDMap or btrfsitem.Error without this code also being updated. + panic(fmt.Errorf("should not happen: UUID_SUBVOL item has unexpected type: %T", itemBody)) + } +} + +func (o *rebuilder) resolveTreeAugments(ctx context.Context, listsWithDistances []map[btrfsvol.LogicalAddr]int) containers.Set[btrfsvol.LogicalAddr] { distances := make(map[btrfsvol.LogicalAddr]int) generations := make(map[btrfsvol.LogicalAddr]btrfsprim.Generation) lists := make([]containers.Set[btrfsvol.LogicalAddr], len(listsWithDistances)) @@ -261,7 +340,12 @@ func (o *Rebuilder) resolveTreeAugments(ctx context.Context, listsWithDistances } for i, list := range lists { - dlog.Infof(ctx, "... ... ... %d: %v: %v", i, list.Intersection(ret).TakeOne(), maps.SortedKeys(list)) + chose := list.Intersection(ret) + if len(chose) == 0 { + dlog.Infof(ctx, "... ... ... lists[%d]: chose (none) from %v", i, maps.SortedKeys(list)) + } else { + dlog.Infof(ctx, "... ... ... lists[%d]: chose %v from %v", i, chose.TakeOne(), maps.SortedKeys(list)) + } } return ret @@ -269,43 +353,18 @@ func (o *Rebuilder) resolveTreeAugments(ctx context.Context, listsWithDistances //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// _NodeFile is a subset of btrfstree.NodeFile. -type _NodeFile interface { - ParentTree(btrfsprim.ObjID) (btrfsprim.ObjID, bool) -} - -func treeDistance(fs _NodeFile, tree, leaf btrfsprim.ObjID) (int, bool) { - dist := 0 - for { - if tree == leaf { - return dist, true - } - - parentTree, ok := fs.ParentTree(tree) - if !ok { - // Failed to look up parent info. - return 0, false - } - if parentTree == 0 { - // End of the line. - return 0, false - } - - tree = parentTree - dist++ +func (o *rebuilder) wantAugment(ctx context.Context, treeID btrfsprim.ObjID, choices containers.Set[btrfsvol.LogicalAddr]) { + if len(choices) == 0 { + dlog.Errorf(ctx, "augment(tree=%v): could not find wanted item", treeID) + return } -} - -func (o *Rebuilder) wantAugment(ctx context.Context, treeID btrfsprim.ObjID, choices containers.Set[btrfsvol.LogicalAddr]) { - choicesWithDist := make(map[btrfsvol.LogicalAddr]int) + choicesWithDist := make(map[btrfsvol.LogicalAddr]int, len(choices)) for choice := range choices { - if dist, ok := treeDistance(o.uuidMap, treeID, o.graph.Nodes[choice].Owner); ok { - choicesWithDist[choice] = dist + dist, ok := o.rebuilt.COWDistance(ctx, treeID, o.graph.Nodes[choice].Owner) + if !ok { + panic(fmt.Errorf("should not happen: .wantAugment called for tree=%v with invalid choice=%v", treeID, choice)) } - } - if len(choicesWithDist) == 0 { - dlog.Errorf(ctx, "augment(tree=%v): could not find wanted item", treeID) - return + choicesWithDist[choice] = dist } dlog.Infof(ctx, "augment(tree=%v): %v", treeID, maps.SortedKeys(choicesWithDist)) o.pendingAugments[treeID] = append(o.pendingAugments[treeID], choicesWithDist) @@ -314,40 +373,57 @@ func (o *Rebuilder) wantAugment(ctx context.Context, treeID btrfsprim.ObjID, cho //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // fsErr implements rebuildCallbacks. -func (o *Rebuilder) fsErr(ctx context.Context, e error) { +func (o *rebuilder) fsErr(ctx context.Context, e error) { dlog.Errorf(ctx, "filesystem error: %v", e) } // want implements rebuildCallbacks. -func (o *Rebuilder) want(ctx context.Context, treeID btrfsprim.ObjID, objID btrfsprim.ObjID, typ btrfsprim.ItemType) { +func (o *rebuilder) want(ctx context.Context, treeID btrfsprim.ObjID, objID btrfsprim.ObjID, typ btrfsprim.ItemType) { + o._want(ctx, treeID, objID, typ) +} +func (o *rebuilder) _want(ctx context.Context, treeID btrfsprim.ObjID, objID btrfsprim.ObjID, typ btrfsprim.ItemType) (key btrfsprim.Key, ok bool) { + if !o.rebuilt.AddTree(ctx, treeID) { + o.queue = append(o.queue, o.curKey) + return btrfsprim.Key{}, false + } + // check if we already have it tgt := btrfsprim.Key{ ObjectID: objID, ItemType: typ, } - if _, err := o.inner.TreeSearch(treeID, func(key btrfsprim.Key, _ uint32) int { + if key, ok := o.rebuilt.Search(ctx, treeID, func(key btrfsprim.Key) int { key.Offset = 0 return tgt.Cmp(key) - }); err == nil { - return + }); ok { + return key, true } // OK, we need to insert it ctx = dlog.WithField(ctx, "want_key", fmt.Sprintf("tree=%v key={%v %v ?}", treeID, objID, typ)) wants := make(containers.Set[btrfsvol.LogicalAddr]) - o.key2leaf.Subrange( - func(k keyAndTree, _ btrfsvol.LogicalAddr) int { k.Key.Offset = 0; return tgt.Cmp(k.Key) }, - func(_ keyAndTree, v btrfsvol.LogicalAddr) bool { - wants.InsertFrom(o.leaf2orphans[v]) + o.rebuilt.Keys(treeID).Subrange( + func(k btrfsprim.Key, _ keyio.ItemPtr) int { k.Offset = 0; return tgt.Cmp(k) }, + func(_ btrfsprim.Key, v keyio.ItemPtr) bool { + wants.InsertFrom(o.rebuilt.LeafToRoots(ctx, treeID, v.Node)) return true }) o.wantAugment(ctx, treeID, wants) + return btrfsprim.Key{}, false } // wantOff implements rebuildCallbacks. -func (o *Rebuilder) wantOff(ctx context.Context, treeID btrfsprim.ObjID, objID btrfsprim.ObjID, typ btrfsprim.ItemType, off uint64) { +func (o *rebuilder) wantOff(ctx context.Context, treeID btrfsprim.ObjID, objID btrfsprim.ObjID, typ btrfsprim.ItemType, off uint64) { + o._wantOff(ctx, treeID, objID, typ, off) +} +func (o *rebuilder) _wantOff(ctx context.Context, treeID btrfsprim.ObjID, objID btrfsprim.ObjID, typ btrfsprim.ItemType, off uint64) (ok bool) { + if !o.rebuilt.AddTree(ctx, treeID) { + o.queue = append(o.queue, o.curKey) + return false + } + // check if we already have it tgt := btrfsprim.Key{ @@ -355,37 +431,47 @@ func (o *Rebuilder) wantOff(ctx context.Context, treeID btrfsprim.ObjID, objID b ItemType: typ, Offset: off, } - if _, err := o.inner.TreeLookup(treeID, tgt); err == nil { - return + if _, ok := o.rebuilt.Load(ctx, treeID, tgt); ok { + return true } // OK, we need to insert it ctx = dlog.WithField(ctx, "want_key", fmt.Sprintf("tree=%v key=%v", treeID, tgt)) wants := make(containers.Set[btrfsvol.LogicalAddr]) - o.key2leaf.Subrange( - func(k keyAndTree, _ btrfsvol.LogicalAddr) int { return tgt.Cmp(k.Key) }, - func(_ keyAndTree, v btrfsvol.LogicalAddr) bool { - wants.InsertFrom(o.leaf2orphans[v]) + o.rebuilt.Keys(treeID).Subrange( + func(k btrfsprim.Key, _ keyio.ItemPtr) int { return tgt.Cmp(k) }, + func(_ btrfsprim.Key, v keyio.ItemPtr) bool { + wants.InsertFrom(o.rebuilt.LeafToRoots(ctx, treeID, v.Node)) return true }) o.wantAugment(ctx, treeID, wants) + return false } // wantFunc implements rebuildCallbacks. -func (o *Rebuilder) wantFunc(ctx context.Context, treeID btrfsprim.ObjID, objID btrfsprim.ObjID, typ btrfsprim.ItemType, fn func(btrfsitem.Item) bool) { +func (o *rebuilder) wantFunc(ctx context.Context, treeID btrfsprim.ObjID, objID btrfsprim.ObjID, typ btrfsprim.ItemType, fn func(btrfsitem.Item) bool) { + if !o.rebuilt.AddTree(ctx, treeID) { + o.queue = append(o.queue, o.curKey) + return + } + // check if we already have it tgt := btrfsprim.Key{ ObjectID: objID, ItemType: typ, } - items, _ := o.inner.TreeSearchAll(treeID, func(key btrfsprim.Key, _ uint32) int { + keys := o.rebuilt.SearchAll(ctx, treeID, func(key btrfsprim.Key) int { key.Offset = 0 return tgt.Cmp(key) }) - for _, item := range items { - if fn(item.Body) { + for _, itemKey := range keys { + itemBody, ok := o.rebuilt.Load(ctx, treeID, itemKey) + if !ok { + o.ioErr(ctx, fmt.Errorf("could not read previously read item: %v", itemKey)) + } + if fn(itemBody) { return } } @@ -394,226 +480,202 @@ func (o *Rebuilder) wantFunc(ctx context.Context, treeID btrfsprim.ObjID, objID ctx = dlog.WithField(ctx, "want_key", fmt.Sprintf("tree=%v key=%v +func", treeID, tgt)) wants := make(containers.Set[btrfsvol.LogicalAddr]) - o.key2leaf.Subrange( - func(k keyAndTree, _ btrfsvol.LogicalAddr) int { k.Key.Offset = 0; return tgt.Cmp(k.Key) }, - func(k keyAndTree, v btrfsvol.LogicalAddr) bool { - nodeRef, err := btrfstree.ReadNode[btrfsvol.LogicalAddr](o.raw, o.sb, v, btrfstree.NodeExpectations{ - LAddr: containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: v}, - Generation: containers.Optional[btrfsprim.Generation]{OK: true, Val: o.graph.Nodes[v].Generation}, - }) - if err != nil { - o.ioErr(ctx, err) + o.rebuilt.Keys(treeID).Subrange( + func(k btrfsprim.Key, _ keyio.ItemPtr) int { k.Offset = 0; return tgt.Cmp(k) }, + func(k btrfsprim.Key, v keyio.ItemPtr) bool { + itemBody, ok := o.keyIO.ReadItem(v) + if !ok { + o.ioErr(ctx, fmt.Errorf("could not read previously read item: %v at %v", k, v)) } - for _, item := range nodeRef.Data.BodyLeaf { - if k.Key == item.Key && fn(item.Body) { - wants.InsertFrom(o.leaf2orphans[v]) - } + if fn(itemBody) { + wants.InsertFrom(o.rebuilt.LeafToRoots(ctx, treeID, v.Node)) } return true }) o.wantAugment(ctx, treeID, wants) } -// func implements rebuildCallbacks. -// -// interval is [beg, end) -func (o *Rebuilder) wantCSum(ctx context.Context, beg, end btrfsvol.LogicalAddr) { - for beg < end { - // check if we already have it - if run, err := btrfs.LookupCSum(o.inner, o.sb.ChecksumType, beg); err == nil { - // we already have it - beg = run.Addr.Add(run.Size()) - } else { - // we need to insert it - ctx := dlog.WithField(ctx, "want_key", fmt.Sprintf("csum for laddr=%v", beg)) - rbNode := o.csums.Search(func(item btrfsinspect.SysExtentCSum) int { - switch { - case item.Sums.Addr > beg: - return -1 - case item.Sums.Addr.Add(item.Sums.Size()) <= beg: - return 1 - default: - return 0 - } - - }) - if rbNode == nil { - o.wantAugment(ctx, btrfsprim.CSUM_TREE_OBJECTID, nil) // log an error - beg += btrfssum.BlockSize - continue - } - run := rbNode.Value.Sums - key := keyAndTree{ - Key: rbNode.Value.Key, - TreeID: btrfsprim.CSUM_TREE_OBJECTID, - } - leaf, ok := o.key2leaf.Load(key) - if !ok { - // This is a panic because if we found it in `o.csums` then it has - // to be in some Node, and if we didn't find it from - // btrfs.LookupCSum(), then that Node must be an orphan. - panic(fmt.Errorf("should not happen: no orphan contains %v", key.Key)) - } - o.wantAugment(ctx, key.TreeID, o.leaf2orphans[leaf]) +func (o *rebuilder) _wantRange( + ctx context.Context, + treeID btrfsprim.ObjID, objID btrfsprim.ObjID, typ btrfsprim.ItemType, + beg, end uint64, +) { + if !o.rebuilt.AddTree(ctx, treeID) { + o.queue = append(o.queue, o.curKey) + return + } - beg = run.Addr.Add(run.Size()) + sizeFn := func(key btrfsprim.Key) (uint64, error) { + ptr, ok := o.rebuilt.Keys(treeID).Load(key) + if !ok { + panic(fmt.Errorf("should not happen: could not load key: %v", key)) + } + sizeAndErr, ok := o.keyIO.Sizes[ptr] + if !ok { + panic(fmt.Errorf("should not happen: %v item did not have a size recorded", typ)) } + return sizeAndErr.Size, sizeAndErr.Err } -} -// wantFileExt implements rebuildCallbacks. -func (o *Rebuilder) wantFileExt(ctx context.Context, treeID btrfsprim.ObjID, ino btrfsprim.ObjID, size int64) { - min := btrfsprim.Key{ - ObjectID: ino, - ItemType: btrfsitem.EXTENT_DATA_KEY, - Offset: 0, - } - max := btrfsprim.Key{ - ObjectID: ino, - ItemType: btrfsitem.EXTENT_DATA_KEY, - Offset: uint64(size - 1), - } - exts, _ := o.inner.TreeSearchAll(treeID, func(key btrfsprim.Key, _ uint32) int { + // Step 1: Build a listing of the runs that we do have. + runMin := btrfsprim.Key{ + ObjectID: objID, + ItemType: typ, + Offset: 0, // *NOT* `beg` + } + runMax := btrfsprim.Key{ + ObjectID: objID, + ItemType: typ, + Offset: end - 1, + } + runKeys := o.rebuilt.SearchAll(ctx, treeID, func(key btrfsprim.Key) int { switch { - case min.Cmp(key) < 0: + case runMin.Cmp(key) < 0: return 1 - case max.Cmp(key) > 0: + case runMax.Cmp(key) > 0: return -1 default: return 0 } }) + // Step 2: Build a listing of the gaps. + // + // Start with a gap of the whole range, then subtract each run + // from it. type gap struct { // range is [Beg,End) - Beg, End int64 + Beg, End uint64 } - gaps := &containers.RBTree[containers.NativeOrdered[int64], gap]{ - KeyFn: func(gap gap) containers.NativeOrdered[int64] { - return containers.NativeOrdered[int64]{Val: gap.Beg} + gaps := &containers.RBTree[containers.NativeOrdered[uint64], gap]{ + KeyFn: func(gap gap) containers.NativeOrdered[uint64] { + return containers.NativeOrdered[uint64]{Val: gap.Beg} }, } gaps.Insert(gap{ - Beg: 0, - End: size, + Beg: beg, + End: end, }) - for _, ext := range exts { - switch extBody := ext.Body.(type) { - case btrfsitem.FileExtent: - extBeg := int64(ext.Key.Offset) - extSize, err := extBody.Size() - if err != nil { - o.fsErr(ctx, fmt.Errorf("FileExtent: tree=%v key=%v: %w", treeID, ext.Key, err)) - continue + for _, runKey := range runKeys { + runSize, err := sizeFn(runKey) + if err != nil { + o.fsErr(ctx, fmt.Errorf("tree=%v key=%v: %w", treeID, runKey, err)) + } + if runSize == 0 { + continue + } + runBeg := runKey.Offset + runEnd := runBeg + runSize + if runEnd <= beg { + continue + } + overlappingGaps := gaps.SearchRange(func(gap gap) int { + switch { + case gap.End <= runBeg: + return 1 + case runEnd <= gap.Beg: + return -1 + default: + return 0 } - extEnd := extBeg + extSize - overlappingGaps := gaps.SearchRange(func(gap gap) int { - switch { - case gap.End <= extBeg: - return 1 - case extEnd <= gap.Beg: - return -1 - default: - return 0 - } + }) + if len(overlappingGaps) == 0 { + continue + } + gapsBeg := overlappingGaps[0].Beg + gapsEnd := overlappingGaps[len(overlappingGaps)-1].End + for _, gap := range overlappingGaps { + gaps.Delete(containers.NativeOrdered[uint64]{Val: gap.Beg}) + } + if gapsBeg < runBeg { + gaps.Insert(gap{ + Beg: gapsBeg, + End: runBeg, + }) + } + if gapsEnd > runEnd { + gaps.Insert(gap{ + Beg: runEnd, + End: gapsEnd, }) - if len(overlappingGaps) == 0 { - continue - } - beg := overlappingGaps[0].Beg - end := overlappingGaps[len(overlappingGaps)-1].End - for _, gap := range overlappingGaps { - gaps.Delete(containers.NativeOrdered[int64]{Val: gap.Beg}) - } - if beg < extBeg { - gaps.Insert(gap{ - Beg: beg, - End: extBeg, - }) - } - if end > extEnd { - gaps.Insert(gap{ - Beg: extEnd, - End: end, - }) - } - case btrfsitem.Error: - o.fsErr(ctx, fmt.Errorf("error decoding item: tree=%v key=%v: %w", treeID, ext.Key, extBody.Err)) - default: - // This is a panic because the item decoder should not emit EXTENT_DATA - // items as anything but btrfsitem.FileExtent or btrfsitem.Error without - // this code also being updated. - panic(fmt.Errorf("should not happen: EXTENT_DATA item has unexpected type: %T", extBody)) } } + + // Step 2: Fill each gap. _ = gaps.Walk(func(rbNode *containers.RBNode[gap]) error { gap := rbNode.Value - min := btrfsprim.Key{ - ObjectID: ino, - ItemType: btrfsitem.EXTENT_DATA_KEY, - Offset: 0, + last := gap.Beg + runMin := btrfsprim.Key{ + ObjectID: objID, + ItemType: typ, + Offset: 0, // *NOT* `gap.Beg` } - max := btrfsprim.Key{ - ObjectID: ino, - ItemType: btrfsitem.EXTENT_DATA_KEY, - Offset: uint64(gap.End - 1), + runMax := btrfsprim.Key{ + ObjectID: objID, + ItemType: typ, + Offset: gap.End - 1, } - ctx := dlog.WithField(ctx, "want_key", fmt.Sprintf("file extent for tree=%v inode=%v bytes [%v, %v)", treeID, ino, gap.Beg, gap.End)) - wants := make(containers.Set[btrfsvol.LogicalAddr]) - o.key2leaf.Subrange( - func(k keyAndTree, _ btrfsvol.LogicalAddr) int { + o.rebuilt.Keys(treeID).Subrange( + func(key btrfsprim.Key, _ keyio.ItemPtr) int { switch { - case min.Cmp(k.Key) < 0: + case runMin.Cmp(key) < 0: return 1 - case max.Cmp(k.Key) > 0: + case runMax.Cmp(key) > 0: return -1 default: return 0 } }, - func(k keyAndTree, v btrfsvol.LogicalAddr) bool { - nodeRef, err := btrfstree.ReadNode[btrfsvol.LogicalAddr](o.raw, o.sb, v, btrfstree.NodeExpectations{ - LAddr: containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: v}, - Generation: containers.Optional[btrfsprim.Generation]{OK: true, Val: o.graph.Nodes[v].Generation}, - }) + func(k btrfsprim.Key, v keyio.ItemPtr) bool { + runSize, err := sizeFn(k) if err != nil { - o.ioErr(ctx, fmt.Errorf("error reading previously read node@%v: %w", v, err)) + o.fsErr(ctx, fmt.Errorf("tree=%v key=%v: %w", treeID, k, err)) + return true + } + if runSize == 0 { + return true } - for _, item := range nodeRef.Data.BodyLeaf { - if k.Key != item.Key { - continue - } - switch itemBody := item.Body.(type) { - case btrfsitem.FileExtent: - itemBeg := int64(item.Key.Offset) - itemSize, err := itemBody.Size() - if err != nil { - o.fsErr(ctx, fmt.Errorf("FileExtent: tree=%v key=%v: %w", treeID, item.Key, err)) - continue - } - itemEnd := itemBeg + itemSize - // We're being and "wanting" any extent that has any overlap with the - // gap. But maybe instead we sould only want extents that are - // *entirely* within the gap. I'll have to run it on real filesystems - // to see what works better. - // - // TODO(lukeshu): Re-evaluate whether being greedy here is the right - // thing. - if itemEnd > gap.Beg && itemBeg < gap.End { - wants.InsertFrom(o.leaf2orphans[v]) - } - case btrfsitem.Error: - o.fsErr(ctx, fmt.Errorf("error decoding item: tree=%v key=%v: %w", treeID, item.Key, itemBody.Err)) - default: - // This is a panic because the item decoder should not emit EXTENT_DATA - // items as anything but btrfsitem.FileExtent or btrfsitem.Error without - // this code also being updated. - panic(fmt.Errorf("should not happen: EXTENT_DATA item has unexpected type: %T", itemBody)) - } + runBeg := k.Offset + runEnd := runBeg + runSize + if runEnd <= gap.Beg { + return true } + + // TODO: This is dumb and greedy. + if last < runBeg { + // log an error + o.wantAugment(dlog.WithField(ctx, "want_key", fmt.Sprintf("tree=%v key={%v, %v, %v-%v}", + treeID, objID, typ, last, runBeg)), + treeID, nil) + } + o.wantAugment(dlog.WithField(ctx, "want_key", fmt.Sprintf("tree=%v key={%v, %v, %v-%v}", + treeID, objID, typ, gap.Beg, gap.End)), + treeID, o.rebuilt.LeafToRoots(ctx, treeID, v.Node)) + last = runEnd + return true }) - o.wantAugment(ctx, treeID, wants) + if last < gap.End { + // log an error + o.wantAugment(dlog.WithField(ctx, "want_key", fmt.Sprintf("tree=%v key={%v, %v, %v-%v}", + treeID, objID, typ, last, gap.End)), + treeID, nil) + } return nil }) } + +// func implements rebuildCallbacks. +// +// interval is [beg, end) +func (o *rebuilder) wantCSum(ctx context.Context, beg, end btrfsvol.LogicalAddr) { + const treeID = btrfsprim.CSUM_TREE_OBJECTID + o._wantRange(ctx, treeID, btrfsprim.EXTENT_CSUM_OBJECTID, btrfsprim.EXTENT_CSUM_KEY, + uint64(beg), uint64(end)) +} + +// wantFileExt implements rebuildCallbacks. +func (o *rebuilder) wantFileExt(ctx context.Context, treeID btrfsprim.ObjID, ino btrfsprim.ObjID, size int64) { + o._wantRange(ctx, treeID, ino, btrfsprim.EXTENT_DATA_KEY, + 0, uint64(size)) +} diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuild_graph.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuild_graph.go index 976716d..db7a7a5 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuild_graph.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuild_graph.go @@ -269,6 +269,22 @@ func handleItem(o rebuildCallbacks, ctx context.Context, treeID btrfsprim.ObjID, btrfsitem.INODE_ITEM_KEY, 0) } + if body.UUID != (btrfsprim.UUID{}) { + key := btrfsitem.UUIDToKey(body.UUID) + o.wantOff(dlog.WithField(ctx, "wants", "uuid"), + btrfsprim.UUID_TREE_OBJECTID, + key.ObjectID, + key.ItemType, + key.Offset) + } + if body.ParentUUID != (btrfsprim.UUID{}) { + key := btrfsitem.UUIDToKey(body.ParentUUID) + o.wantOff(dlog.WithField(ctx, "wants", "parent uuid"), + btrfsprim.UUID_TREE_OBJECTID, + key.ObjectID, + key.ItemType, + key.Offset) + } case btrfsitem.RootRef: var otherType btrfsprim.ItemType var parent, child btrfsprim.ObjID diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/scan.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/scan.go index 3575534..5c2d0fd 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/scan.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/scan.go @@ -16,17 +16,11 @@ import ( "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect" "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/uuidmap" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/keyio" "git.lukeshu.com/btrfs-progs-ng/lib/containers" - "git.lukeshu.com/btrfs-progs-ng/lib/maps" "git.lukeshu.com/btrfs-progs-ng/lib/textui" ) -type scanResult struct { - uuidMap *uuidmap.UUIDMap - nodeGraph *graph.Graph -} - type scanStats struct { N, D int } @@ -37,12 +31,12 @@ func (s scanStats) String() string { s.N, s.D) } -func ScanDevices(ctx context.Context, fs *btrfs.FS, scanResults btrfsinspect.ScanDevicesResult) (*scanResult, error) { +func ScanDevices(ctx context.Context, fs *btrfs.FS, scanResults btrfsinspect.ScanDevicesResult) (graph.Graph, *keyio.Handle, error) { dlog.Infof(ctx, "Reading node data from FS...") sb, err := fs.Superblock() if err != nil { - return nil, err + return graph.Graph{}, nil, err } total := countNodes(scanResults) @@ -52,10 +46,8 @@ func ScanDevices(ctx context.Context, fs *btrfs.FS, scanResults btrfsinspect.Sca progressWriter.Set(scanStats{N: done, D: total}) } - ret := &scanResult{ - uuidMap: uuidmap.New(), - nodeGraph: graph.New(*sb), - } + nodeGraph := graph.New(*sb) + keyIO := keyio.NewHandle(fs, *sb) progress(done, total) for _, devResults := range scanResults { @@ -64,14 +56,11 @@ func ScanDevices(ctx context.Context, fs *btrfs.FS, scanResults btrfsinspect.Sca LAddr: containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: laddr}, }) if err != nil { - return nil, err - } - - if err := ret.uuidMap.InsertNode(nodeRef); err != nil { - return nil, err + return graph.Graph{}, nil, err } - ret.nodeGraph.InsertNode(nodeRef) + nodeGraph.InsertNode(nodeRef) + keyIO.InsertNode(nodeRef) done++ progress(done, total) @@ -81,21 +70,17 @@ func ScanDevices(ctx context.Context, fs *btrfs.FS, scanResults btrfsinspect.Sca panic("should not happen") } progressWriter.Done() - - missing := ret.uuidMap.MissingRootItems() - if len(missing) > 0 { - dlog.Errorf(ctx, "... could not find root items for %d trees: %v", len(missing), maps.SortedKeys(missing)) - } - dlog.Info(ctx, "... done reading node data") progressWriter = textui.NewProgress[scanStats](ctx, dlog.LogLevelInfo, 1*time.Second) dlog.Infof(ctx, "Checking keypointers for dead-ends...") - if err := ret.nodeGraph.FinalCheck(fs, *sb, progress); err != nil { - return nil, err + if err := nodeGraph.FinalCheck(fs, *sb, progress); err != nil { + return graph.Graph{}, nil, err } progressWriter.Done() dlog.Info(ctx, "... done checking keypointers") - return ret, nil + keyIO.SetGraph(*nodeGraph) + + return *nodeGraph, keyIO, nil } diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/uuidmap/scan.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/uuidmap/scan.go deleted file mode 100644 index 98ed097..0000000 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/uuidmap/scan.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com> -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package uuidmap - -import ( - "fmt" - - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" - "git.lukeshu.com/btrfs-progs-ng/lib/containers" - "git.lukeshu.com/btrfs-progs-ng/lib/diskio" -) - -func maybeSet[K, V comparable](name string, m map[K]V, k K, v V) error { - if other, conflict := m[k]; conflict && other != v { - return fmt.Errorf("conflict: %s %v can't have both %v and %v", name, k, other, v) - } - m[k] = v - return nil -} - -func New() *UUIDMap { - ret := &UUIDMap{ - ObjID2UUID: make(map[btrfsprim.ObjID]btrfsprim.UUID), - UUID2ObjID: make(map[btrfsprim.UUID]btrfsprim.ObjID), - TreeParent: make(map[btrfsprim.ObjID]btrfsprim.UUID), - - SeenTrees: make(containers.Set[btrfsprim.ObjID]), - } - - // These 4 trees are mentioned directly in the superblock, so - // they are always seen. - ret.SeenTrees.Insert(btrfsprim.ROOT_TREE_OBJECTID) - ret.SeenTrees.Insert(btrfsprim.CHUNK_TREE_OBJECTID) - ret.SeenTrees.Insert(btrfsprim.TREE_LOG_OBJECTID) - ret.SeenTrees.Insert(btrfsprim.BLOCK_GROUP_TREE_OBJECTID) - - return ret -} - -func (m *UUIDMap) InsertNode(nodeRef *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]) error { - for _, item := range nodeRef.Data.BodyLeaf { - switch itemBody := item.Body.(type) { - case btrfsitem.Root: - if err := maybeSet("ObjID2UUID", m.ObjID2UUID, item.Key.ObjectID, itemBody.UUID); err != nil { - return err - } - if itemBody.UUID != (btrfsprim.UUID{}) { - if err := maybeSet("UUID2ObjID", m.UUID2ObjID, itemBody.UUID, item.Key.ObjectID); err != nil { - return err - } - } - if err := maybeSet("ParentUUID", m.TreeParent, item.Key.ObjectID, itemBody.ParentUUID); err != nil { - return err - } - m.SeenTrees.Insert(item.Key.ObjectID) - case btrfsitem.UUIDMap: - uuid := btrfsitem.KeyToUUID(item.Key) - if err := maybeSet("ObjID2UUID", m.ObjID2UUID, itemBody.ObjID, uuid); err != nil { - return err - } - if err := maybeSet("UUID2ObjID", m.UUID2ObjID, uuid, itemBody.ObjID); err != nil { - return err - } - } - } - m.SeenTrees.Insert(nodeRef.Data.Head.Owner) - return nil -} diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/uuidmap/uuidmap.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/uuidmap/uuidmap.go deleted file mode 100644 index 32a34d1..0000000 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/uuidmap/uuidmap.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com> -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package uuidmap - -import ( - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" - "git.lukeshu.com/btrfs-progs-ng/lib/containers" -) - -type UUIDMap struct { - ObjID2UUID map[btrfsprim.ObjID]btrfsprim.UUID - UUID2ObjID map[btrfsprim.UUID]btrfsprim.ObjID - TreeParent map[btrfsprim.ObjID]btrfsprim.UUID - - SeenTrees containers.Set[btrfsprim.ObjID] -} - -func (m UUIDMap) MissingRootItems() containers.Set[btrfsprim.ObjID] { - missing := make(containers.Set[btrfsprim.ObjID]) - for treeID := range m.SeenTrees { - if _, ok := m.ObjID2UUID[treeID]; !ok && treeID != btrfsprim.ROOT_TREE_OBJECTID { - missing.Insert(treeID) - continue - } - if _, ok := m.TreeParent[treeID]; !ok && treeID >= btrfsprim.FIRST_FREE_OBJECTID && treeID <= btrfsprim.LAST_FREE_OBJECTID { - missing.Insert(treeID) - } - } - return missing -} - -// ParentTree implements btrfstree.NodeFile. -func (m UUIDMap) ParentTree(tree btrfsprim.ObjID) (btrfsprim.ObjID, bool) { - if tree < btrfsprim.FIRST_FREE_OBJECTID || tree > btrfsprim.LAST_FREE_OBJECTID { - // no parent - return 0, true - } - parentUUID, ok := m.TreeParent[tree] - if !ok { - // could not look up parent info - return 0, false - } - if parentUUID == (btrfsprim.UUID{}) { - // no parent - return 0, true - } - parentObjID, ok := m.UUID2ObjID[parentUUID] - if !ok { - // could not look up parent info - return 0, false - } - return parentObjID, true -} diff --git a/lib/btrfsprogs/btrfsinspect/scandevices.go b/lib/btrfsprogs/btrfsinspect/scandevices.go index 4834826..b6c929b 100644 --- a/lib/btrfsprogs/btrfsinspect/scandevices.go +++ b/lib/btrfsprogs/btrfsinspect/scandevices.go @@ -75,7 +75,6 @@ type SysDevExtent struct { } type SysExtentCSum struct { - Key btrfsprim.Key Generation btrfsprim.Generation Sums btrfsitem.ExtentCSum } @@ -227,7 +226,6 @@ func ScanOneDevice(ctx context.Context, dev *btrfs.Device, sb btrfstree.Superblo //dlog.Tracef(ctx, "... dev[%q] node@%v: item %v: found csums", // dev.Name(), nodeRef.Addr, i) result.FoundExtentCSums = append(result.FoundExtentCSums, SysExtentCSum{ - Key: item.Key, Generation: nodeRef.Data.Head.Generation, Sums: sums, }) diff --git a/lib/btrfsprogs/btrfsutil/broken_btree.go b/lib/btrfsprogs/btrfsutil/broken_btree.go index e44c9e1..28e8b1d 100644 --- a/lib/btrfsprogs/btrfsutil/broken_btree.go +++ b/lib/btrfsprogs/btrfsutil/broken_btree.go @@ -75,7 +75,7 @@ var _ btrfstree.TreeOperator = (*brokenTrees)(nil) // NewBrokenTrees wraps a *btrfs.FS to support looking up information // from broken trees. // -// Of the btrfs.FS.Tree{Verb} methods: +// Of the btrfstree.TreeOperator methods: // // - TreeWalk works on broken trees // - TreeLookup relies on the tree being properly ordered (which a diff --git a/lib/containers/sortedmap.go b/lib/containers/sortedmap.go index b759fa3..d6d2f9e 100644 --- a/lib/containers/sortedmap.go +++ b/lib/containers/sortedmap.go @@ -8,13 +8,13 @@ import ( "errors" ) -type orderedKV[K Ordered[K], V any] struct { +type OrderedKV[K Ordered[K], V any] struct { K K V V } type SortedMap[K Ordered[K], V any] struct { - inner RBTree[K, orderedKV[K, V]] + inner RBTree[K, OrderedKV[K, V]] } func (m *SortedMap[K, V]) init() { @@ -23,7 +23,7 @@ func (m *SortedMap[K, V]) init() { } } -func (m *SortedMap[K, V]) keyFn(kv orderedKV[K, V]) K { +func (m *SortedMap[K, V]) keyFn(kv OrderedKV[K, V]) K { return kv.K } @@ -46,7 +46,7 @@ var errStop = errors.New("stop") func (m *SortedMap[K, V]) Range(f func(key K, value V) bool) { m.init() - _ = m.inner.Walk(func(node *RBNode[orderedKV[K, V]]) error { + _ = m.inner.Walk(func(node *RBNode[OrderedKV[K, V]]) error { if f(node.Value.K, node.Value.V) { return nil } else { @@ -57,7 +57,7 @@ func (m *SortedMap[K, V]) Range(f func(key K, value V) bool) { func (m *SortedMap[K, V]) Subrange(rangeFn func(K, V) int, handleFn func(K, V) bool) { m.init() - kvs := m.inner.SearchRange(func(kv orderedKV[K, V]) int { + kvs := m.inner.SearchRange(func(kv OrderedKV[K, V]) int { return rangeFn(kv.K, kv.V) }) for _, kv := range kvs { @@ -69,8 +69,26 @@ func (m *SortedMap[K, V]) Subrange(rangeFn func(K, V) int, handleFn func(K, V) b func (m *SortedMap[K, V]) Store(key K, value V) { m.init() - m.inner.Insert(orderedKV[K, V]{ + m.inner.Insert(OrderedKV[K, V]{ K: key, V: value, }) } + +func (m *SortedMap[K, V]) Search(fn func(K, V) int) (K, V, bool) { + node := m.inner.Search(func(kv OrderedKV[K, V]) int { + return fn(kv.K, kv.V) + }) + if node == nil { + var zeroK K + var zeroV V + return zeroK, zeroV, false + } + return node.Value.K, node.Value.V, true +} + +func (m *SortedMap[K, V]) SearchAll(fn func(K, V) int) []OrderedKV[K, V] { + return m.inner.SearchRange(func(kv OrderedKV[K, V]) int { + return fn(kv.K, kv.V) + }) +} diff --git a/scripts/main.sh b/scripts/main.sh index 19f3eea..e44ae7f 100755 --- a/scripts/main.sh +++ b/scripts/main.sh @@ -52,9 +52,9 @@ gen $b.gen/3.nodes.json \ ./btrfs-rec --pv=$b.img --mappings=$b.gen/2.mappings.json \ inspect rebuild-nodes $b.gen/0.scandevices.json -gen $b.gen/4.ls-files.txt \ - ./btrfs-rec --pv=$b.img --mappings=$b.gen/2.mappings.json \ - inspect ls-files -gen $b.gen/4.ls-trees.txt \ - ./btrfs-rec --pv=$b.img --mappings=$b.gen/2.mappings.json \ - inspect ls-trees --scandevices=$b.gen/0.scandevices.json +# gen $b.gen/4.ls-files.txt \ +# ./btrfs-rec --pv=$b.img --mappings=$b.gen/2.mappings.json \ +# inspect ls-files +# gen $b.gen/4.ls-trees.txt \ +# ./btrfs-rec --pv=$b.img --mappings=$b.gen/2.mappings.json \ +# inspect ls-trees --scandevices=$b.gen/0.scandevices.json |