summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-04-13 13:18:47 -0600
committerLuke Shumaker <lukeshu@lukeshu.com>2023-04-13 13:18:47 -0600
commit163e8a157ab812a8eafa3a1d2d2b8c0e45431559 (patch)
tree29a58434fd79f2c91a3603ed8e05cd22cf09d2af
parentf37ad3c90dab9b77e7c2796e963f5992458ad4a4 (diff)
parent1879a68976052d06cc62473a0712df9ce89b5013 (diff)
Merge branch 'lukeshu/rebuilt-misc'
-rw-r--r--cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go183
-rw-r--r--cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go5
-rw-r--r--cmd/btrfs-rec/inspect/rebuildtrees/scan.go43
-rw-r--r--lib/btrfsutil/rebuilt_callbacks.go88
-rw-r--r--lib/btrfsutil/rebuilt_forrest.go102
-rw-r--r--lib/btrfsutil/rebuilt_readitem.go4
-rw-r--r--lib/btrfsutil/rebuilt_tree.go241
-rw-r--r--lib/textui/log.go2
8 files changed, 489 insertions, 179 deletions
diff --git a/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go b/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go
index e6345ae..d1ccfac 100644
--- a/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go
+++ b/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go
@@ -85,7 +85,7 @@ func NewRebuilder(ctx context.Context, fs *btrfs.FS, nodeList []btrfsvol.Logical
o := &rebuilder{
scan: scanData,
}
- o.rebuilt = btrfsutil.NewRebuiltForrest(fs, scanData.Superblock, scanData.Graph, forrestCallbacks{o})
+ o.rebuilt = btrfsutil.NewRebuiltForrest(fs, scanData.Graph, forrestCallbacks{o})
return o, nil
}
@@ -224,6 +224,153 @@ func (o *rebuilder) processAddedItemQueue(ctx context.Context) error {
return nil
}
+type itemToVisit struct {
+ SortTreeID btrfsprim.ObjID // Use this tree ID for sorting, but not lookups
+ keyAndTree
+ RefNum int // Only for EXTENT_ITEM and METADATA_ITEM
+}
+
+func (k itemToVisit) String() string {
+ if k.TreeID == btrfsprim.EXTENT_TREE_OBJECTID &&
+ (k.ItemType == btrfsprim.EXTENT_ITEM_KEY || k.ItemType == btrfsprim.METADATA_ITEM_KEY) {
+ return textui.Sprintf("%v#%d", k.keyAndTree, k.RefNum)
+ }
+ return textui.Sprintf("%v", k.keyAndTree)
+}
+
+func (a itemToVisit) Compare(b itemToVisit) int {
+ if d := containers.NativeCompare(a.SortTreeID, b.SortTreeID); d != 0 {
+ return d
+ }
+ if d := a.keyAndTree.Compare(b.keyAndTree); d != 0 {
+ return d
+ }
+ return containers.NativeCompare(a.RefNum, b.RefNum)
+}
+
+// sortSettledItemQueue is like a the usual simple by-key sort; but
+// applies a different sort-order to members of the EXTENT_TREE. It
+// sorts those members by the FS trees of the referencing inodes,
+// rather than by the laddr of the extent being referenced. This
+// greatly reduces the number of .RebuiltAcquireItems() cache-misses.
+func (o *rebuilder) sortSettledItemQueue(ctx context.Context, unorderedQueue containers.Set[keyAndTree]) []itemToVisit {
+ // Like many problems, the trick isn't answering the question,
+ // it's asking the right question.
+ //
+ // "Naively", the problem might be stated as:
+ //
+ // Definitions:
+ //
+ // An "item" contains a set of 0 or more (`uint64`) "tree
+ // IDs". "Processing" an item does a cache-load operation
+ // (from a replacement cache) for each tree ID.
+ //
+ // Problem:
+ //
+ // Given a list of items, sort the list in a manor that
+ // minimizes cache-misses when processing the items in the
+ // list in order. Does the cache size or cache
+ // replacement policy affect what the optimal order is?
+ //
+ // Discussion:
+ //
+ // Put another way, sort the list such that items
+ // containing the same tree IDs are near to eachother. If
+ // each item only contained 1 tree ID, this would be
+ // simple: sort by that tree ID. The difficulty of the
+ // question is how to weight each tree ID when items
+ // contain multiple; if an item contains tree IDs 'A' and
+ // 'B', and putting it near other items with 'A' if that
+ // means putting it farther from other items with 'B',
+ // when is it worth it to do so?
+ //
+ // The most obvious approach that is independent of the cache
+ // size/policy is to minimize the total distance between items
+ // within the same set. It turns out that this is the
+ // "Minimum Linear Arrangement" problem ("MinLA"), which is
+ // NP-hard. But, if you were paying attention, it's not quite
+ // MinLA; in our once two items are far enough apart that a
+ // cache eviction happens between them, there's no cost to
+ // moving them farther apart. And continuing to try to keep
+ // them close (instead of giving up on them) results in
+ // sub-optimal arrangements. So not only is MinLA
+ // computationally expensive for us to try to approximate a
+ // solution for, it won't actually give us a very good
+ // solution!
+ //
+ // So you might think "Ah, the trick is to not ask MinLA, the
+ // trick is to ask this MinLA-with-capped-cost question!" But
+ // we can find an even better question!
+ //
+ // Some concrete numbers to help with thinking about this: In
+ // my test trees, the maximum number of trees per item is 33,
+ // and slowdown from areas of the queue with few cache misses
+ // to areas where the MinLA approximation does poorly is
+ // around ~300×. And I don't think it's possible to come up
+ // with a better solution without going O(n^2), which is
+ // prohibitive when there are 4 million items in the
+ // EXTENT_TREE.
+ //
+ // The *right* question involves backing up and revisiting the
+ // assumption that it's *items* that we're sorting.
+ //
+ // Instead, let's allow items in the EXTENT_TREE to be visited
+ // more than once; have an entry in the queue for each
+ // ExtentDataRef within an item. Sure, that'll cause some
+ // inefficiency because EXTENT_ITEMs and METADATA_ITEMs will
+ // need to be read more than once. But that's a ~30×
+ // slowdown, and allows us to just sort those queue-entries
+ // near the trees being back-referenced. A ~30× slowdown is a
+ // heck of a lot better than a ~300× slowdown. And we don't
+ // have to try to solve a problem that's NP-hard.
+
+ ctx = dlog.WithField(ctx, "btrfs.inspect.rebuild-trees.rebuild.process.substep", "sort")
+ dlog.Info(ctx, "building ordered queue...")
+
+ dlog.Infof(ctx, "... walking %d items...", len(unorderedQueue))
+
+ // Don't worry about bailing if there is a failure to get the
+ // EXTENT_TREE; if that fails, then there can't be any items
+ // in the EXTENT_TREE for us to have to handle special, and
+ // all of the following code will fall through common-path.
+ extentTree := o.rebuilt.RebuiltTree(ctx, btrfsprim.EXTENT_TREE_OBJECTID)
+ var extentItems *containers.SortedMap[btrfsprim.Key, btrfsutil.ItemPtr]
+ if extentTree != nil {
+ extentItems = extentTree.RebuiltAcquireItems(ctx)
+ defer extentTree.RebuiltReleaseItems()
+ }
+
+ orderedQueue := make([]itemToVisit, 0, len(unorderedQueue))
+ for itemKey := range unorderedQueue {
+ if itemKey.TreeID == btrfsprim.EXTENT_TREE_OBJECTID && (itemKey.ItemType == btrfsprim.EXTENT_ITEM_KEY ||
+ itemKey.ItemType == btrfsprim.METADATA_ITEM_KEY ||
+ itemKey.ItemType == btrfsprim.EXTENT_DATA_REF_KEY) {
+ ptr, _ := extentItems.Load(itemKey.Key)
+ for i, treeID := range o.scan.DataBackrefs[ptr] {
+ orderedQueue = append(orderedQueue, itemToVisit{
+ keyAndTree: itemKey,
+ SortTreeID: treeID,
+ RefNum: i,
+ })
+ }
+ } else {
+ orderedQueue = append(orderedQueue, itemToVisit{
+ keyAndTree: itemKey,
+ SortTreeID: itemKey.TreeID,
+ })
+ }
+ }
+
+ dlog.Infof(ctx, "... sorting %d queue entries...", len(orderedQueue))
+ sort.Slice(orderedQueue, func(i, j int) bool {
+ return orderedQueue[i].Compare(orderedQueue[j]) < 0
+ })
+
+ dlog.Info(ctx, "... done")
+
+ return orderedQueue
+}
+
type processItemStats struct {
textui.Portion[int]
NumAugments int
@@ -241,11 +388,8 @@ func (s processItemStats) String() string {
func (o *rebuilder) processSettledItemQueue(ctx context.Context) error {
ctx = dlog.WithField(ctx, "btrfs.inspect.rebuild-trees.rebuild.substep", "process-items")
- queue := maps.Keys(o.settledItemQueue)
+ queue := o.sortSettledItemQueue(ctx, o.settledItemQueue)
o.settledItemQueue = make(containers.Set[keyAndTree])
- sort.Slice(queue, func(i, j int) bool {
- return queue[i].Compare(queue[j]) < 0
- })
var progress processItemStats
progress.D = len(queue)
@@ -254,23 +398,46 @@ func (o *rebuilder) processSettledItemQueue(ctx context.Context) error {
defer progressWriter.Done()
ctx = dlog.WithField(ctx, "btrfs.inspect.rebuild-trees.rebuild.substep.progress", &progress)
+ progressWriter.Set(progress)
type keyAndBody struct {
- keyAndTree
+ itemToVisit
Body btrfsitem.Item
}
itemChan := make(chan keyAndBody, textui.Tunable(300)) // average items-per-node≈100; let's have a buffer of ~3 nodes
grp := dgroup.NewGroup(ctx, dgroup.GroupConfig{})
grp.Go("io", func(ctx context.Context) error {
defer close(itemChan)
+ nextKey:
for _, key := range queue {
if err := ctx.Err(); err != nil {
return err
}
ctx := dlog.WithField(ctx, "btrfs.inspect.rebuild-trees.rebuild.process.item", key)
item := keyAndBody{
- keyAndTree: key,
- Body: o.rebuilt.RebuiltTree(ctx, key.TreeID).ReadItem(ctx, key.Key),
+ itemToVisit: key,
+ Body: o.rebuilt.RebuiltTree(ctx, key.TreeID).ReadItem(ctx, key.Key),
+ }
+ if key.TreeID == btrfsprim.EXTENT_TREE_OBJECTID &&
+ (key.ItemType == btrfsprim.EXTENT_ITEM_KEY || key.ItemType == btrfsprim.METADATA_ITEM_KEY) {
+ switch itemBody := item.Body.(type) {
+ case *btrfsitem.Extent:
+ item.Body = itemBody.Refs[key.RefNum].Body
+ if item.Body == nil {
+ continue nextKey
+ }
+ case *btrfsitem.Metadata:
+ item.Body = itemBody.Refs[key.RefNum].Body
+ if item.Body == nil {
+ continue nextKey
+ }
+ case *btrfsitem.Error:
+ // do nothing
+ default:
+ // This is a panic because the item decoder should not emit a new
+ // type to ref.Body without this code also being updated.
+ panic(fmt.Errorf("should not happen: unexpected type %T for %v", itemBody, key.ItemType))
+ }
}
select {
case itemChan <- item:
diff --git a/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go b/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go
index 92b5ee5..0c52556 100644
--- a/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go
+++ b/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go
@@ -11,13 +11,16 @@ import (
"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem"
"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim"
"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfsutil"
)
type forrestCallbacks struct {
*rebuilder
}
-// AddedItem implements btrfsutil.RebuiltForrestCallbacks.
+var _ btrfsutil.RebuiltForrestExtendedCallbacks = forrestCallbacks{}
+
+// AddedItem implements btrfsutil.RebuiltForrestExtendedCallbacks.
func (o forrestCallbacks) AddedItem(_ context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) {
o.addedItemQueue.Insert(keyAndTree{
TreeID: tree,
diff --git a/cmd/btrfs-rec/inspect/rebuildtrees/scan.go b/cmd/btrfs-rec/inspect/rebuildtrees/scan.go
index b1469ff..72557ea 100644
--- a/cmd/btrfs-rec/inspect/rebuildtrees/scan.go
+++ b/cmd/btrfs-rec/inspect/rebuildtrees/scan.go
@@ -31,14 +31,21 @@ type FlagsAndErr struct {
Err error
}
-type ScanDevicesResult struct {
- Superblock btrfstree.Superblock
+// ExtentDataRefPtr is a pointer to a *btrfsitem.ExtentDataRef,
+// whether it be to an EXTENT_DATA_REF item proper, or to an inline
+// ref inside of another EXTENT_ITEM or METADATA_ITEM.
+type ExtentDataRefPtr struct {
+ btrfsutil.ItemPtr
+ RefNum int // Only for EXTENT_ITEM and METADATA_ITEM
+}
+type ScanDevicesResult struct {
Graph btrfsutil.Graph
- Flags map[btrfsutil.ItemPtr]FlagsAndErr // INODE_ITEM
- Names map[btrfsutil.ItemPtr][]byte // DIR_INDEX
- Sizes map[btrfsutil.ItemPtr]SizeAndErr // EXTENT_CSUM and EXTENT_DATA
+ Flags map[btrfsutil.ItemPtr]FlagsAndErr // INODE_ITEM
+ Names map[btrfsutil.ItemPtr][]byte // DIR_INDEX
+ Sizes map[btrfsutil.ItemPtr]SizeAndErr // EXTENT_CSUM and EXTENT_DATA
+ DataBackrefs map[btrfsutil.ItemPtr][]btrfsprim.ObjID // EXTENT_DATA_REF, EXTENT_ITEM, and METADATA_ITEM
}
func ScanDevices(_ctx context.Context, fs *btrfs.FS, nodeList []btrfsvol.LogicalAddr) (ScanDevicesResult, error) {
@@ -53,12 +60,12 @@ func ScanDevices(_ctx context.Context, fs *btrfs.FS, nodeList []btrfsvol.Logical
// read-roots //////////////////////////////////////////////////////////////////
ctx = dlog.WithField(_ctx, "btrfs.inspect.rebuild-trees.read.substep", "read-roots")
ret := ScanDevicesResult{
- Superblock: *sb,
-
Graph: btrfsutil.NewGraph(ctx, *sb),
- Flags: make(map[btrfsutil.ItemPtr]FlagsAndErr),
- Names: make(map[btrfsutil.ItemPtr][]byte),
- Sizes: make(map[btrfsutil.ItemPtr]SizeAndErr),
+
+ Flags: make(map[btrfsutil.ItemPtr]FlagsAndErr),
+ Names: make(map[btrfsutil.ItemPtr][]byte),
+ Sizes: make(map[btrfsutil.ItemPtr]SizeAndErr),
+ DataBackrefs: make(map[btrfsutil.ItemPtr][]btrfsprim.ObjID),
}
// read-nodes //////////////////////////////////////////////////////////////////
@@ -132,6 +139,22 @@ func (o *ScanDevicesResult) insertNode(node *btrfstree.Node) {
Size: uint64(size),
Err: err,
}
+ case *btrfsitem.Extent:
+ o.DataBackrefs[ptr] = make([]btrfsprim.ObjID, len(itemBody.Refs))
+ for i, ref := range itemBody.Refs {
+ if refBody, ok := ref.Body.(*btrfsitem.ExtentDataRef); ok {
+ o.DataBackrefs[ptr][i] = refBody.Root
+ }
+ }
+ case *btrfsitem.Metadata:
+ o.DataBackrefs[ptr] = make([]btrfsprim.ObjID, len(itemBody.Refs))
+ for i, ref := range itemBody.Refs {
+ if refBody, ok := ref.Body.(*btrfsitem.ExtentDataRef); ok {
+ o.DataBackrefs[ptr][i] = refBody.Root
+ }
+ }
+ case *btrfsitem.ExtentDataRef:
+ o.DataBackrefs[ptr] = []btrfsprim.ObjID{itemBody.Root}
case *btrfsitem.Error:
switch item.Key.ItemType {
case btrfsprim.INODE_ITEM_KEY:
diff --git a/lib/btrfsutil/rebuilt_callbacks.go b/lib/btrfsutil/rebuilt_callbacks.go
new file mode 100644
index 0000000..3a7e6e6
--- /dev/null
+++ b/lib/btrfsutil/rebuilt_callbacks.go
@@ -0,0 +1,88 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package btrfsutil
+
+import (
+ "context"
+ "fmt"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+)
+
+type RebuiltForrestCallbacks interface {
+ AddedRoot(ctx context.Context, tree btrfsprim.ObjID, root btrfsvol.LogicalAddr)
+ LookupRoot(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool)
+ LookupUUID(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool)
+}
+
+type RebuiltForrestExtendedCallbacks interface {
+ RebuiltForrestCallbacks
+ AddedItem(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key)
+}
+
+type noopRebuiltForrestCallbacks struct {
+ forrest *RebuiltForrest
+}
+
+func (noopRebuiltForrestCallbacks) AddedRoot(context.Context, btrfsprim.ObjID, btrfsvol.LogicalAddr) {
+}
+
+func (cb noopRebuiltForrestCallbacks) LookupRoot(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, _item btrfsitem.Root, ok bool) {
+ rootTree := cb.forrest.RebuiltTree(ctx, btrfsprim.ROOT_TREE_OBJECTID)
+ if rootTree == nil {
+ return 0, btrfsitem.Root{}, false
+ }
+ tgt := btrfsprim.Key{
+ ObjectID: tree,
+ ItemType: btrfsprim.ROOT_ITEM_KEY,
+ }
+ itemKey, itemPtr, ok := rootTree.RebuiltAcquireItems(ctx).Search(func(key btrfsprim.Key, _ ItemPtr) int {
+ key.Offset = 0
+ return tgt.Compare(key)
+ })
+ rootTree.RebuiltReleaseItems()
+ if !ok {
+ return 0, btrfsitem.Root{}, false
+ }
+ itemBody := cb.forrest.readItem(ctx, itemPtr)
+ defer itemBody.Free()
+ switch itemBody := itemBody.(type) {
+ case *btrfsitem.Root:
+ return btrfsprim.Generation(itemKey.Offset), *itemBody, true
+ case *btrfsitem.Error:
+ return 0, btrfsitem.Root{}, false
+ default:
+ // This is a panic because the item decoder should not emit ROOT_ITEM items as anything but
+ // btrfsitem.Root or btrfsitem.Error without this code also being updated.
+ panic(fmt.Errorf("should not happen: ROOT_ITEM item has unexpected type: %T", itemBody))
+ }
+}
+
+func (cb noopRebuiltForrestCallbacks) LookupUUID(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) {
+ uuidTree := cb.forrest.RebuiltTree(ctx, btrfsprim.UUID_TREE_OBJECTID)
+ if uuidTree == nil {
+ return 0, false
+ }
+ tgt := btrfsitem.UUIDToKey(uuid)
+ itemPtr, ok := uuidTree.RebuiltAcquireItems(ctx).Load(tgt)
+ uuidTree.RebuiltReleaseItems()
+ if !ok {
+ return 0, false
+ }
+ itemBody := cb.forrest.readItem(ctx, itemPtr)
+ defer itemBody.Free()
+ switch itemBody := itemBody.(type) {
+ case *btrfsitem.UUIDMap:
+ return itemBody.ObjID, true
+ case *btrfsitem.Error:
+ return 0, false
+ default:
+ // This is a panic because the item decoder should not emit UUID_SUBVOL items as anything but
+ // btrfsitem.UUIDMap or btrfsitem.Error without this code also being updated.
+ panic(fmt.Errorf("should not happen: UUID_SUBVOL item has unexpected type: %T", itemBody))
+ }
+}
diff --git a/lib/btrfsutil/rebuilt_forrest.go b/lib/btrfsutil/rebuilt_forrest.go
index 6231563..4249396 100644
--- a/lib/btrfsutil/rebuilt_forrest.go
+++ b/lib/btrfsutil/rebuilt_forrest.go
@@ -6,89 +6,16 @@ package btrfsutil
import (
"context"
- "fmt"
"github.com/datawire/dlib/dlog"
- "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim"
- "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree"
"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
"git.lukeshu.com/btrfs-progs-ng/lib/containers"
"git.lukeshu.com/btrfs-progs-ng/lib/slices"
)
-type RebuiltForrestCallbacks interface {
- AddedItem(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key)
- AddedRoot(ctx context.Context, tree btrfsprim.ObjID, root btrfsvol.LogicalAddr)
- LookupRoot(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool)
- LookupUUID(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool)
-}
-
-type noopRebuiltForrestCallbacks struct {
- forrest *RebuiltForrest
-}
-
-func (noopRebuiltForrestCallbacks) AddedItem(context.Context, btrfsprim.ObjID, btrfsprim.Key) {}
-func (noopRebuiltForrestCallbacks) AddedRoot(context.Context, btrfsprim.ObjID, btrfsvol.LogicalAddr) {
-}
-
-func (cb noopRebuiltForrestCallbacks) LookupRoot(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, _item btrfsitem.Root, ok bool) {
- rootTree := cb.forrest.RebuiltTree(ctx, btrfsprim.ROOT_TREE_OBJECTID)
- if rootTree == nil {
- return 0, btrfsitem.Root{}, false
- }
- tgt := btrfsprim.Key{
- ObjectID: tree,
- ItemType: btrfsprim.ROOT_ITEM_KEY,
- }
- itemKey, itemPtr, ok := rootTree.RebuiltAcquireItems(ctx).Search(func(key btrfsprim.Key, _ ItemPtr) int {
- key.Offset = 0
- return tgt.Compare(key)
- })
- rootTree.RebuiltReleaseItems()
- if !ok {
- return 0, btrfsitem.Root{}, false
- }
- itemBody := cb.forrest.readItem(ctx, itemPtr)
- defer itemBody.Free()
- switch itemBody := itemBody.(type) {
- case *btrfsitem.Root:
- return btrfsprim.Generation(itemKey.Offset), *itemBody, true
- case *btrfsitem.Error:
- return 0, btrfsitem.Root{}, false
- default:
- // This is a panic because the item decoder should not emit ROOT_ITEM items as anything but
- // btrfsitem.Root or btrfsitem.Error without this code also being updated.
- panic(fmt.Errorf("should not happen: ROOT_ITEM item has unexpected type: %T", itemBody))
- }
-}
-
-func (cb noopRebuiltForrestCallbacks) LookupUUID(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) {
- uuidTree := cb.forrest.RebuiltTree(ctx, btrfsprim.UUID_TREE_OBJECTID)
- if uuidTree == nil {
- return 0, false
- }
- tgt := btrfsitem.UUIDToKey(uuid)
- itemPtr, ok := uuidTree.RebuiltAcquireItems(ctx).Load(tgt)
- uuidTree.RebuiltReleaseItems()
- if !ok {
- return 0, false
- }
- itemBody := cb.forrest.readItem(ctx, itemPtr)
- defer itemBody.Free()
- switch itemBody := itemBody.(type) {
- case *btrfsitem.UUIDMap:
- return itemBody.ObjID, true
- case *btrfsitem.Error:
- return 0, false
- default:
- // This is a panic because the item decoder should not emit UUID_SUBVOL items as anything but
- // btrfsitem.UUIDMap or btrfsitem.Error without this code also being updated.
- panic(fmt.Errorf("should not happen: UUID_SUBVOL item has unexpected type: %T", itemBody))
- }
-}
-
// RebuiltForrest is an abstraction for rebuilding and accessing
// potentially broken btrees.
//
@@ -134,8 +61,7 @@ func (cb noopRebuiltForrestCallbacks) LookupUUID(ctx context.Context, uuid btrfs
// NewRebuiltForrest().
type RebuiltForrest struct {
// static
- file btrfstree.NodeSource
- sb btrfstree.Superblock
+ inner btrfs.ReadableFS
graph Graph
cb RebuiltForrestCallbacks
@@ -147,12 +73,14 @@ type RebuiltForrest struct {
rebuiltSharedCache
}
-// NewRebuiltForrest returns a new RebuiltForrest instance. The
-// RebuiltForrestCallbacks may be nil.
-func NewRebuiltForrest(file btrfstree.NodeSource, sb btrfstree.Superblock, graph Graph, cb RebuiltForrestCallbacks) *RebuiltForrest {
+// NewRebuiltForrest returns a new RebuiltForrest instance.
+//
+// The `cb` RebuiltForrestCallbacks may be nil. If `cb` also
+// implements RebuiltForrestExtendedCallbacks, then a series of
+// .AddedItem() calls will be made before each call to .AddedRoot().
+func NewRebuiltForrest(fs btrfs.ReadableFS, graph Graph, cb RebuiltForrestCallbacks) *RebuiltForrest {
ret := &RebuiltForrest{
- file: file,
- sb: sb,
+ inner: fs,
graph: graph,
cb: cb,
@@ -213,13 +141,17 @@ func (ts *RebuiltForrest) addTree(ctx context.Context, treeID btrfsprim.ObjID, s
var root btrfsvol.LogicalAddr
switch treeID {
case btrfsprim.ROOT_TREE_OBJECTID:
- root = ts.sb.RootTree
+ sb, _ := ts.inner.Superblock()
+ root = sb.RootTree
case btrfsprim.CHUNK_TREE_OBJECTID:
- root = ts.sb.ChunkTree
+ sb, _ := ts.inner.Superblock()
+ root = sb.ChunkTree
case btrfsprim.TREE_LOG_OBJECTID:
- root = ts.sb.LogTree
+ sb, _ := ts.inner.Superblock()
+ root = sb.LogTree
case btrfsprim.BLOCK_GROUP_TREE_OBJECTID:
- root = ts.sb.BlockGroupRoot
+ sb, _ := ts.inner.Superblock()
+ root = sb.BlockGroupRoot
default:
if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) {
dlog.Error(ctx, "failed to add tree: add ROOT_TREE")
diff --git a/lib/btrfsutil/rebuilt_readitem.go b/lib/btrfsutil/rebuilt_readitem.go
index 80a9e4b..d1d1319 100644
--- a/lib/btrfsutil/rebuilt_readitem.go
+++ b/lib/btrfsutil/rebuilt_readitem.go
@@ -36,7 +36,7 @@ func (ts *RebuiltForrest) readItem(ctx context.Context, ptr ItemPtr) btrfsitem.I
panic(fmt.Errorf("should not happen: btrfsutil.RebuiltForrest.readItem called for negative item slot: %v", ptr.Slot))
}
- node, err := ts.file.AcquireNode(ctx, ptr.Node, btrfstree.NodeExpectations{
+ node, err := ts.inner.AcquireNode(ctx, ptr.Node, btrfstree.NodeExpectations{
LAddr: containers.OptionalValue(ptr.Node),
Level: containers.OptionalValue(graphInfo.Level),
Generation: containers.OptionalValue(graphInfo.Generation),
@@ -51,7 +51,7 @@ func (ts *RebuiltForrest) readItem(ctx context.Context, ptr ItemPtr) btrfsitem.I
MinItem: containers.OptionalValue(graphInfo.MinItem(ts.graph)),
MaxItem: containers.OptionalValue(graphInfo.MaxItem(ts.graph)),
})
- defer ts.file.ReleaseNode(node)
+ defer ts.inner.ReleaseNode(node)
if err != nil {
panic(fmt.Errorf("should not happen: i/o error: %w", err))
}
diff --git a/lib/btrfsutil/rebuilt_tree.go b/lib/btrfsutil/rebuilt_tree.go
index 31a31be..177b859 100644
--- a/lib/btrfsutil/rebuilt_tree.go
+++ b/lib/btrfsutil/rebuilt_tree.go
@@ -37,24 +37,24 @@ type RebuiltTree struct {
// derived from tree.Roots, which is why it's OK if they get
// evicted.
//
- // 1. tree.acquireLeafToRoots() = tree.forrest.leafs.Acquire(tree.ID)
+ // 1. tree.acquireNodeIndex() = tree.forrest.nodeIndex.Acquire(tree.ID)
// 2. tree.RebuiltAcquireItems() = tree.forrest.incItems.Acquire(tree.ID)
// 3. tree.RebuiltAcquirePotentialItems() = tree.forrest.excItems.Acquire(tree.ID)
}
type rebuiltSharedCache struct {
- leafs containers.Cache[btrfsprim.ObjID, map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]]
- incItems containers.Cache[btrfsprim.ObjID, containers.SortedMap[btrfsprim.Key, ItemPtr]]
- excItems containers.Cache[btrfsprim.ObjID, containers.SortedMap[btrfsprim.Key, ItemPtr]]
+ nodeIndex containers.Cache[btrfsprim.ObjID, rebuiltNodeIndex]
+ incItems containers.Cache[btrfsprim.ObjID, containers.SortedMap[btrfsprim.Key, ItemPtr]]
+ excItems containers.Cache[btrfsprim.ObjID, containers.SortedMap[btrfsprim.Key, ItemPtr]]
}
func makeRebuiltSharedCache(forrest *RebuiltForrest) rebuiltSharedCache {
var ret rebuiltSharedCache
- ret.leafs = containers.NewARCache[btrfsprim.ObjID, map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]](
+ ret.nodeIndex = containers.NewARCache[btrfsprim.ObjID, rebuiltNodeIndex](
textui.Tunable(8),
- containers.SourceFunc[btrfsprim.ObjID, map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]](
- func(ctx context.Context, treeID btrfsprim.ObjID, leafs *map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) {
- *leafs = forrest.trees[treeID].uncachedLeafToRoots(ctx)
+ containers.SourceFunc[btrfsprim.ObjID, rebuiltNodeIndex](
+ func(ctx context.Context, treeID btrfsprim.ObjID, index *rebuiltNodeIndex) {
+ *index = forrest.trees[treeID].uncachedNodeIndex(ctx)
}))
ret.incItems = containers.NewARCache[btrfsprim.ObjID, containers.SortedMap[btrfsprim.Key, ItemPtr]](
textui.Tunable(8),
@@ -71,52 +71,88 @@ func makeRebuiltSharedCache(forrest *RebuiltForrest) rebuiltSharedCache {
return ret
}
-// evictable member 1: .acquireLeafToRoots() ///////////////////////////////////////////////////////////////////////////
+// evictable member 1: .acquireNodeIndex() /////////////////////////////////////////////////////////////////////////////
-// acquireLeafToRoots returns all leafs (lvl=0) in the filesystem that
-// pass .isOwnerOK, whether or not they're in the tree.
-func (tree *RebuiltTree) acquireLeafToRoots(ctx context.Context) map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] {
- return *tree.forrest.leafs.Acquire(ctx, tree.ID)
+type rebuiltRoots = map[btrfsvol.LogicalAddr]rebuiltPathInfo
+
+type rebuiltPathInfo struct {
+ loMaxItem btrfsprim.Key
+ hiMaxItem btrfsprim.Key
+}
+
+type rebuiltNodeIndex struct {
+ // leafToRoots contains all leafs (lvl=0) in the filesystem
+ // that pass .isOwnerOK, whether or not they're in the tree.
+ leafToRoots map[btrfsvol.LogicalAddr]rebuiltRoots
+}
+
+func (tree *RebuiltTree) acquireNodeIndex(ctx context.Context) rebuiltNodeIndex {
+ return *tree.forrest.nodeIndex.Acquire(ctx, tree.ID)
}
-func (tree *RebuiltTree) releaseLeafToRoots() {
- tree.forrest.leafs.Release(tree.ID)
+func (tree *RebuiltTree) releaseNodeIndex() {
+ tree.forrest.nodeIndex.Release(tree.ID)
}
-func (tree *RebuiltTree) uncachedLeafToRoots(ctx context.Context) map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] {
+func (tree *RebuiltTree) uncachedNodeIndex(ctx context.Context) rebuiltNodeIndex {
ctx = dlog.WithField(ctx, "btrfs.util.rebuilt-tree.index-nodes", fmt.Sprintf("tree=%v", tree.ID))
- nodeToRoots := make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr])
+ indexer := &rebuiltNodeIndexer{
+ tree: tree,
+ idToTree: make(map[btrfsprim.ObjID]*RebuiltTree),
- var stats textui.Portion[int]
- stats.D = len(tree.forrest.graph.Nodes)
- progressWriter := textui.NewProgress[textui.Portion[int]](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second))
- progress := func() {
- stats.N = len(nodeToRoots)
- progressWriter.Set(stats)
+ nodeToRoots: make(map[btrfsvol.LogicalAddr]rebuiltRoots),
}
-
- progress()
- for _, node := range maps.SortedKeys(tree.forrest.graph.Nodes) {
- tree.indexNode(ctx, node, nodeToRoots, progress, nil)
+ for ancestor := tree; ancestor != nil; ancestor = ancestor.Parent {
+ indexer.idToTree[ancestor.ID] = ancestor
}
- progressWriter.Done()
- ret := make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr])
- for node, roots := range nodeToRoots {
+ ret := rebuiltNodeIndex{
+ leafToRoots: make(map[btrfsvol.LogicalAddr]rebuiltRoots),
+ }
+ for node, roots := range indexer.run(ctx) {
if tree.forrest.graph.Nodes[node].Level == 0 && len(roots) > 0 {
- ret[node] = roots
+ ret.leafToRoots[node] = roots
}
}
return ret
}
-func (tree *RebuiltTree) indexNode(ctx context.Context, node btrfsvol.LogicalAddr, index map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr], progress func(), stack []btrfsvol.LogicalAddr) {
- defer progress()
+type rebuiltNodeIndexer struct {
+ // Input
+ tree *RebuiltTree
+ idToTree map[btrfsprim.ObjID]*RebuiltTree
+
+ // Output
+ nodeToRoots map[btrfsvol.LogicalAddr]rebuiltRoots
+
+ // State
+ stats textui.Portion[int]
+ progressWriter *textui.Progress[textui.Portion[int]]
+}
+
+func (indexer *rebuiltNodeIndexer) run(ctx context.Context) map[btrfsvol.LogicalAddr]rebuiltRoots {
+ indexer.stats.D = len(indexer.tree.forrest.graph.Nodes)
+ indexer.progressWriter = textui.NewProgress[textui.Portion[int]](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second))
+ indexer.updateProgress()
+ for _, node := range maps.SortedKeys(indexer.tree.forrest.graph.Nodes) {
+ indexer.node(ctx, node, nil)
+ }
+ indexer.progressWriter.Done()
+ return indexer.nodeToRoots
+}
+
+func (indexer *rebuiltNodeIndexer) updateProgress() {
+ indexer.stats.N = len(indexer.nodeToRoots)
+ indexer.progressWriter.Set(indexer.stats)
+}
+
+func (indexer *rebuiltNodeIndexer) node(ctx context.Context, node btrfsvol.LogicalAddr, stack []btrfsvol.LogicalAddr) {
+ defer indexer.updateProgress()
if err := ctx.Err(); err != nil {
return
}
- if maps.HasKey(index, node) {
+ if maps.HasKey(indexer.nodeToRoots, node) {
return
}
if slices.Contains(node, stack) {
@@ -124,35 +160,86 @@ func (tree *RebuiltTree) indexNode(ctx context.Context, node btrfsvol.LogicalAdd
// have already checked for loops.
panic("loop")
}
- if !tree.isOwnerOK(tree.forrest.graph.Nodes[node].Owner, tree.forrest.graph.Nodes[node].Generation) {
- index[node] = nil
+ nodeInfo := indexer.tree.forrest.graph.Nodes[node]
+ if !indexer.tree.isOwnerOK(nodeInfo.Owner, nodeInfo.Generation) {
+ indexer.nodeToRoots[node] = nil
return
}
- // tree.leafToRoots
stack = append(stack, node)
- var roots containers.Set[btrfsvol.LogicalAddr]
- for _, kp := range tree.forrest.graph.EdgesTo[node] {
- if !tree.isOwnerOK(tree.forrest.graph.Nodes[kp.FromNode].Owner, tree.forrest.graph.Nodes[kp.FromNode].Generation) {
- continue
+ var roots rebuiltRoots
+nextKP:
+ for _, kp := range indexer.tree.forrest.graph.EdgesTo[node] {
+ // The cheap expectations to check.
+ if kp.ToLevel != nodeInfo.Level ||
+ kp.ToKey.Compare(nodeInfo.MinItem(indexer.tree.forrest.graph)) > 0 ||
+ kp.ToGeneration != nodeInfo.Generation {
+ continue nextKP
+ }
+ // The MaxItem expectation is only cheap to check if
+ // the KP isn't in the last slot. If it isn't in the
+ // last slot, we'll wait to check it until after we've
+ // indexed kp.FromNode.
+ if kp.FromSlot+1 < len(indexer.tree.forrest.graph.EdgesFrom[kp.FromNode]) {
+ kpMaxItem := indexer.tree.forrest.graph.EdgesFrom[kp.FromNode][kp.FromSlot+1].ToKey.Mm()
+ if kpMaxItem.Compare(nodeInfo.MaxItem(indexer.tree.forrest.graph)) < 0 {
+ continue nextKP
+ }
}
- tree.indexNode(ctx, kp.FromNode, index, progress, stack)
- if len(index[kp.FromNode]) > 0 {
- if roots == nil {
- roots = make(containers.Set[btrfsvol.LogicalAddr])
+ // isOwnerOK is O(n) on the number of parents, so
+ // avoid doing it if possible.
+ if fromTree := indexer.idToTree[kp.FromTree]; fromTree == nil || !fromTree.isOwnerOK(nodeInfo.Owner, nodeInfo.Generation) {
+ continue nextKP
+ }
+
+ indexer.node(ctx, kp.FromNode, stack)
+ if len(indexer.nodeToRoots[kp.FromNode]) > 0 {
+ for root, rootInfo := range indexer.nodeToRoots[kp.FromNode] {
+ if kp.FromSlot+1 < len(indexer.tree.forrest.graph.EdgesFrom[kp.FromNode]) {
+ rootInfo.loMaxItem = indexer.tree.forrest.graph.EdgesFrom[kp.FromNode][kp.FromSlot+1].ToKey.Mm()
+ rootInfo.hiMaxItem = rootInfo.loMaxItem
+ } else {
+ // OK, now check the MaxItem expectation.
+ //
+ // We'll use the hiMaxItem, because it only needs to be
+ // valid in *one* of the paths to this node.
+ kpMaxItem := rootInfo.hiMaxItem
+ if kpMaxItem.Compare(nodeInfo.MaxItem(indexer.tree.forrest.graph)) < 0 {
+ continue nextKP
+ }
+ }
+ oldRootInfo, ok := roots[root]
+ if ok && rootInfo.loMaxItem.Compare(oldRootInfo.loMaxItem) > 0 {
+ rootInfo.loMaxItem = oldRootInfo.loMaxItem
+ }
+ if ok && rootInfo.hiMaxItem.Compare(oldRootInfo.hiMaxItem) < 0 {
+ rootInfo.hiMaxItem = oldRootInfo.hiMaxItem
+ }
+ if roots == nil {
+ roots = make(rebuiltRoots)
+ }
+ roots[root] = rootInfo
}
- roots.InsertFrom(index[kp.FromNode])
}
}
if roots == nil {
- roots = containers.NewSet[btrfsvol.LogicalAddr](node)
+ roots = rebuiltRoots{
+ node: rebuiltPathInfo{
+ loMaxItem: btrfsprim.MaxKey,
+ hiMaxItem: btrfsprim.MaxKey,
+ },
+ }
}
- index[node] = roots
+ indexer.nodeToRoots[node] = roots
}
// isOwnerOK returns whether it is permissible for a node with
// .Head.Owner=owner and .Head.Generation=gen to be in this tree.
func (tree *RebuiltTree) isOwnerOK(owner btrfsprim.ObjID, gen btrfsprim.Generation) bool {
+ // It is important that this not perform allocations, even in
+ // the "false"/failure case. It will be called lots of times
+ // in a tight loop for both values that pass and values that
+ // fail.
for {
if owner == tree.ID {
return true
@@ -226,12 +313,12 @@ func (tree *RebuiltTree) items(ctx context.Context, inc bool) containers.SortedM
defer tree.mu.RUnlock()
var leafs []btrfsvol.LogicalAddr
- for leaf, roots := range tree.acquireLeafToRoots(ctx) {
- if tree.Roots.HasAny(roots) == inc {
+ for leaf, roots := range tree.acquireNodeIndex(ctx).leafToRoots {
+ if maps.HaveAnyKeysInCommon(tree.Roots, roots) == inc {
leafs = append(leafs, leaf)
}
}
- tree.releaseLeafToRoots()
+ tree.releaseNodeIndex()
slices.Sort(leafs)
var stats rebuiltItemStats
@@ -320,37 +407,45 @@ func (tree *RebuiltTree) RebuiltAddRoot(ctx context.Context, rootNode btrfsvol.L
ctx = dlog.WithField(ctx, "btrfs.util.rebuilt-tree.add-root", fmt.Sprintf("tree=%v rootNode=%v", tree.ID, rootNode))
dlog.Info(ctx, "adding root...")
- var stats rebuiltRootStats
- leafToRoots := tree.acquireLeafToRoots(ctx)
- stats.Leafs.D = len(leafToRoots)
- progressWriter := textui.NewProgress[rebuiltRootStats](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second))
- for i, leaf := range maps.SortedKeys(leafToRoots) {
- stats.Leafs.N = i
- progressWriter.Set(stats)
+ shouldFlush := tree.ID == btrfsprim.ROOT_TREE_OBJECTID || tree.ID == btrfsprim.UUID_TREE_OBJECTID
- if tree.Roots.HasAny(leafToRoots[leaf]) || !leafToRoots[leaf].Has(rootNode) {
- continue
- }
+ if extCB, ok := tree.forrest.cb.(RebuiltForrestExtendedCallbacks); ok {
+ var stats rebuiltRootStats
+ leafToRoots := tree.acquireNodeIndex(ctx).leafToRoots
+ stats.Leafs.D = len(leafToRoots)
+ progressWriter := textui.NewProgress[rebuiltRootStats](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second))
+ for i, leaf := range maps.SortedKeys(leafToRoots) {
+ stats.Leafs.N = i
+ progressWriter.Set(stats)
- stats.AddedLeafs++
- progressWriter.Set(stats)
+ if maps.HaveAnyKeysInCommon(tree.Roots, leafToRoots[leaf]) || !maps.HasKey(leafToRoots[leaf], rootNode) {
+ continue
+ }
- for _, itemKey := range tree.forrest.graph.Nodes[leaf].Items {
- tree.forrest.cb.AddedItem(ctx, tree.ID, itemKey)
- stats.AddedItems++
+ stats.AddedLeafs++
progressWriter.Set(stats)
+
+ for _, itemKey := range tree.forrest.graph.Nodes[leaf].Items {
+ extCB.AddedItem(ctx, tree.ID, itemKey)
+ stats.AddedItems++
+ progressWriter.Set(stats)
+ }
+ }
+ stats.Leafs.N = len(leafToRoots)
+ tree.releaseNodeIndex()
+ progressWriter.Set(stats)
+ progressWriter.Done()
+
+ if stats.AddedItems == 0 {
+ shouldFlush = false
}
}
- stats.Leafs.N = len(leafToRoots)
- tree.releaseLeafToRoots()
- progressWriter.Set(stats)
- progressWriter.Done()
tree.Roots.Insert(rootNode)
tree.forrest.incItems.Delete(tree.ID) // force re-gen
tree.forrest.excItems.Delete(tree.ID) // force re-gen
- if (tree.ID == btrfsprim.ROOT_TREE_OBJECTID || tree.ID == btrfsprim.UUID_TREE_OBJECTID) && stats.AddedItems > 0 {
+ if shouldFlush {
tree.forrest.flushNegativeCache(ctx)
}
tree.forrest.cb.AddedRoot(ctx, tree.ID, rootNode)
@@ -391,14 +486,14 @@ func (tree *RebuiltTree) RebuiltLeafToRoots(ctx context.Context, leaf btrfsvol.L
tree.mu.RLock()
defer tree.mu.RUnlock()
ret := make(containers.Set[btrfsvol.LogicalAddr])
- for root := range tree.acquireLeafToRoots(ctx)[leaf] {
+ for root := range tree.acquireNodeIndex(ctx).leafToRoots[leaf] {
if tree.Roots.Has(root) {
panic(fmt.Errorf("should not happen: (tree=%v).RebuiltLeafToRoots(leaf=%v): tree contains root=%v but not leaf",
tree.ID, leaf, root))
}
ret.Insert(root)
}
- tree.releaseLeafToRoots()
+ tree.releaseNodeIndex()
if len(ret) == 0 {
return nil
}
diff --git a/lib/textui/log.go b/lib/textui/log.go
index e5d3f60..25bb4be 100644
--- a/lib/textui/log.go
+++ b/lib/textui/log.go
@@ -319,6 +319,8 @@ func fieldOrd(key string) int {
case "btrfs.inspect.rebuild-trees.rebuild.settle.item":
return -25
// step=rebuild, substep=process-items (2b/3)
+ case "btrfs.inspect.rebuild-trees.rebuild.process.substep":
+ return -26
case "btrfs.inspect.rebuild-trees.rebuild.process.item":
return -25
// step=rebuild, substep=apply-augments (3/3)