summaryrefslogtreecommitdiff
path: root/lib/diskio
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2022-07-13 21:31:23 -0600
committerLuke Shumaker <lukeshu@lukeshu.com>2022-07-13 21:39:43 -0600
commit436e1681c9fcda246c6d84526fc79c87adc7b28d (patch)
tree75e52fb976c1666e50b32ab61043704f127a91f7 /lib/diskio
parent72a458520fccafe4df8c02c811cb6f64a310616e (diff)
Move lib/kmp to lib/diskio
Diffstat (limited to 'lib/diskio')
-rw-r--r--lib/diskio/kmp.go85
-rw-r--r--lib/diskio/kmp_test.go62
-rw-r--r--lib/diskio/testdata/fuzz/FuzzFindAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d3
-rw-r--r--lib/diskio/testdata/fuzz/FuzzFindAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e6003
-rw-r--r--lib/diskio/testdata/fuzz/FuzzFindAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd603
-rw-r--r--lib/diskio/testdata/fuzz/FuzzFindAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e419537383
-rw-r--r--lib/diskio/testdata/fuzz/FuzzFindAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d3
7 files changed, 162 insertions, 0 deletions
diff --git a/lib/diskio/kmp.go b/lib/diskio/kmp.go
new file mode 100644
index 0000000..4c0f531
--- /dev/null
+++ b/lib/diskio/kmp.go
@@ -0,0 +1,85 @@
+// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package diskio
+
+import (
+ "errors"
+ "io"
+)
+
+// buildKMPTable takes the string 'substr', and returns a table such
+// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and
+// 'substr[:val] == substr[matchLen-val:matchLen]'.
+func buildKMPTable(substr []byte) []int {
+ table := make([]int, len(substr))
+ for j := range table {
+ if j == 0 {
+ // First entry must always be 0 (in order to
+ // satisfy 'val < matchLen').
+ continue
+ }
+ val := table[j-1]
+ // not a match; go back
+ for val > 0 && substr[j] != substr[val] {
+ val = table[val-1]
+ }
+ // is a match; go forward
+ if substr[val] == substr[j] {
+ val++
+ }
+ table[j] = val
+ }
+ return table
+}
+
+// FindAll returns the starting-position of all possibly-overlapping
+// occurances of 'substr' in the 'r' stream.
+//
+// Will panic if len(substr)==0.
+//
+// Uses the Knuth-Morris-Pratt algorithm.
+func FindAll(r io.ByteReader, substr []byte) ([]int64, error) {
+ if len(substr) == 0 {
+ panic(errors.New("diskio.FindAll: empty substring"))
+ }
+ table := buildKMPTable(substr)
+
+ var matches []int64
+ var curMatchBeg int64
+ var curMatchLen int
+
+ pos := int64(-1) // if 'r' were a slice; define 'pos' such that 'chr=r[pos]'
+ for {
+ // I/O
+ var chr byte
+ chr, err := r.ReadByte()
+ if err != nil {
+ if errors.Is(err, io.EOF) {
+ err = nil
+ }
+ return matches, err
+ }
+ pos++
+
+ // Consider 'chr'
+ for curMatchLen > 0 && chr != substr[curMatchLen] { // shorten the match
+ overlap := table[curMatchLen-1]
+ curMatchBeg += int64(curMatchLen - overlap)
+ curMatchLen = overlap
+ }
+ if chr == substr[curMatchLen] { // lengthen the match
+ if curMatchLen == 0 {
+ curMatchBeg = pos
+ }
+ curMatchLen++
+ if curMatchLen == len(substr) {
+ matches = append(matches, curMatchBeg)
+ overlap := table[curMatchLen-1]
+ curMatchBeg += int64(curMatchLen - overlap)
+ curMatchLen = overlap
+ }
+ }
+ }
+}
diff --git a/lib/diskio/kmp_test.go b/lib/diskio/kmp_test.go
new file mode 100644
index 0000000..6c6ef78
--- /dev/null
+++ b/lib/diskio/kmp_test.go
@@ -0,0 +1,62 @@
+// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package diskio
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestBuildKMPTable(t *testing.T) {
+ substr := []byte("ababaa")
+ table := buildKMPTable(substr)
+ assert.Equal(t,
+ []int{0, 0, 1, 2, 3, 1},
+ table)
+ for j, val := range table {
+ matchLen := j + 1
+ assert.Equalf(t, substr[:val], substr[matchLen-val:matchLen],
+ "for table[%d]=%d", j, val)
+ }
+}
+
+func FuzzBuildKMPTable(f *testing.F) {
+ f.Add([]byte("ababaa"))
+ f.Fuzz(func(t *testing.T, substr []byte) {
+ table := buildKMPTable(substr)
+ assert.Equal(t, len(substr), len(table), "length")
+ for j, val := range table {
+ matchLen := j + 1
+ assert.Equalf(t, substr[:val], substr[matchLen-val:matchLen],
+ "for table[%d]=%d", j, val)
+ }
+ })
+}
+
+func NaiveFindAll(str, substr []byte) []int64 {
+ var matches []int64
+ for i := range str {
+ if bytes.HasPrefix(str[i:], substr) {
+ matches = append(matches, int64(i))
+ }
+ }
+ return matches
+}
+
+func FuzzFindAll(f *testing.F) {
+ f.Fuzz(func(t *testing.T, str, substr []byte) {
+ if len(substr) == 0 {
+ t.Skip()
+ }
+ t.Logf("str =%q", str)
+ t.Logf("substr=%q", substr)
+ exp := NaiveFindAll(str, substr)
+ act, err := FindAll(bytes.NewReader(str), substr)
+ assert.NoError(t, err)
+ assert.Equal(t, exp, act)
+ })
+}
diff --git a/lib/diskio/testdata/fuzz/FuzzFindAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d b/lib/diskio/testdata/fuzz/FuzzFindAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d
new file mode 100644
index 0000000..9d14adf
--- /dev/null
+++ b/lib/diskio/testdata/fuzz/FuzzFindAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("0")
+[]byte("1")
diff --git a/lib/diskio/testdata/fuzz/FuzzFindAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 b/lib/diskio/testdata/fuzz/FuzzFindAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600
new file mode 100644
index 0000000..269f061
--- /dev/null
+++ b/lib/diskio/testdata/fuzz/FuzzFindAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("0")
+[]byte("0")
diff --git a/lib/diskio/testdata/fuzz/FuzzFindAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 b/lib/diskio/testdata/fuzz/FuzzFindAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60
new file mode 100644
index 0000000..b8f1562
--- /dev/null
+++ b/lib/diskio/testdata/fuzz/FuzzFindAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("0")
+[]byte("")
diff --git a/lib/diskio/testdata/fuzz/FuzzFindAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 b/lib/diskio/testdata/fuzz/FuzzFindAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738
new file mode 100644
index 0000000..be67506
--- /dev/null
+++ b/lib/diskio/testdata/fuzz/FuzzFindAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("\xde\xdb!")
+[]byte("\xde\xdb")
diff --git a/lib/diskio/testdata/fuzz/FuzzFindAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d b/lib/diskio/testdata/fuzz/FuzzFindAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d
new file mode 100644
index 0000000..c3bfa37
--- /dev/null
+++ b/lib/diskio/testdata/fuzz/FuzzFindAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("\x10\x10\x15")
+[]byte("\x10\x15")