diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2022-07-13 19:01:22 -0600 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2022-07-13 19:01:22 -0600 |
commit | a0fc940be0ea888c7b380cd95723da65bbd32bcb (patch) | |
tree | c332cd95de9c882d23728e9fe040e63c97ff007f /lib/kmp/kmp.go | |
parent | ae84737c1bc0a5e137e1a25d7344f2ab8e420489 (diff) |
Implement KMP string search
Diffstat (limited to 'lib/kmp/kmp.go')
-rw-r--r-- | lib/kmp/kmp.go | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/lib/kmp/kmp.go b/lib/kmp/kmp.go new file mode 100644 index 0000000..66b5516 --- /dev/null +++ b/lib/kmp/kmp.go @@ -0,0 +1,83 @@ +// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package kmp + +import ( + "errors" + "io" +) + +// buildTable takes the string 'substr', and returns a table such +// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and +// 'substr[:val] == substr[matchLen-val:matchLen]'. +func buildTable(substr []byte) []int { + table := make([]int, len(substr)) + for j := range table { + if j == 0 { + // First entry must always be 0 (in order to + // satisfy 'val < matchLen'). + continue + } + val := table[j-1] + // not a match; go back + for val > 0 && substr[j] != substr[val] { + val = table[val-1] + } + // is a match; go forward + if substr[val] == substr[j] { + val++ + } + table[j] = val + } + return table +} + +// FindAll returns the starting-position of all possibly-overlapping +// occurances of 'substr' in the 'r' stream. +// +// Will panic if len(substr)==0. +func FindAll(r io.ByteReader, substr []byte) ([]int64, error) { + if len(substr) == 0 { + panic(errors.New("kmp.FindAll: empty substring")) + } + table := buildTable(substr) + + var matches []int64 + var curMatchBeg int64 + var curMatchLen int + + pos := int64(-1) // if 'r' were a slice; define 'pos' such that 'chr=r[pos]' + for { + // I/O + var chr byte + chr, err := r.ReadByte() + if err != nil { + if errors.Is(err, io.EOF) { + err = nil + } + return matches, err + } + pos++ + + // Consider 'chr' + for curMatchLen > 0 && chr != substr[curMatchLen] { // shorten the match + overlap := table[curMatchLen-1] + curMatchBeg += int64(curMatchLen - overlap) + curMatchLen = overlap + } + if chr == substr[curMatchLen] { // lengthen the match + if curMatchLen == 0 { + curMatchBeg = pos + } + curMatchLen++ + if curMatchLen == len(substr) { + matches = append(matches, curMatchBeg) + overlap := table[curMatchLen-1] + curMatchBeg += int64(curMatchLen - overlap) + curMatchLen = overlap + } + } + } +} |