summaryrefslogtreecommitdiff
path: root/cmd/generate
diff options
context:
space:
mode:
authorLuke T. Shumaker <lukeshu@lukeshu.com>2025-05-20 15:58:18 -0400
committerLuke T. Shumaker <lukeshu@lukeshu.com>2025-05-21 00:04:19 -0400
commit731bde4f68014caeca82d72e43c04a7b755148f8 (patch)
tree725d0851550e15eb83290308fec14d837e40409f /cmd/generate
parent6fc6b0f6b440a92334a668da1af468dd84531a4b (diff)
mv cmd/generate/mailstuff/ lib/mailstuff/ # and supporting
Diffstat (limited to 'cmd/generate')
-rw-r--r--cmd/generate/forge_part_pipermail.go2
-rw-r--r--cmd/generate/mailstuff/jwz.md47
-rw-r--r--cmd/generate/mailstuff/mbox.go38
-rw-r--r--cmd/generate/mailstuff/thread.go114
-rw-r--r--cmd/generate/mailstuff/thread_alg.go226
5 files changed, 1 insertions, 426 deletions
diff --git a/cmd/generate/forge_part_pipermail.go b/cmd/generate/forge_part_pipermail.go
index af6a009..e7c9e4c 100644
--- a/cmd/generate/forge_part_pipermail.go
+++ b/cmd/generate/forge_part_pipermail.go
@@ -12,7 +12,7 @@ import (
"strings"
"time"
- "git.lukeshu.com/www/cmd/generate/mailstuff"
+ "git.lukeshu.com/www/lib/mailstuff"
)
var (
diff --git a/cmd/generate/mailstuff/jwz.md b/cmd/generate/mailstuff/jwz.md
deleted file mode 100644
index 91e03f5..0000000
--- a/cmd/generate/mailstuff/jwz.md
+++ /dev/null
@@ -1,47 +0,0 @@
-To: Jamie Zawinski <jwz@jwz.org>
-Subject: message threading
-
-Hi,
-
-I'm implementing message threading, and have been referencing both
-your document <https://www.jwz.org/doc/threading.html> and RFC 5256.
-I'm not sure whether you're interested in updating a document that's
-more than 25 years old, but if you are: I hope you find the following
-feedback valuable.
-
-You write that the algorithm in RFC 5256 is merely a "restating" of
-your algorithm, but I noticed 3 (minor) differences:
-
-1. In your step 1.C, the RFC says to check whether this would create a
- loop, and if it would to skip creating the link; your version only
- says to perform this check in step 1.B.
-
-2. The RFC says to sort the messages by date between your steps 4 and
- 5; that is: when grouping by subject, containers in the root set
- should be processed in date-order (you do not specify an order),
- and that if container in the root set is empty then the subject
- should be taken from the earliest-date child (you say to use an
- arbitrary child).
-
-3. The RFC precisely states how to trim a subject down to a "base
- subject," rather than simply saying "Strip ``Re:'', ``RE:'',
- ``RE[5]:'', ``Re: Re[4]: Re:'' and so on."
-
-Additionally, there are two minor points on which I found their
-version to be clearer:
-
-1. The RFC specifies how to handle messages without a Message-Id or
- with a duplicate Message-Id (on page 9), as well as how to
- normalize a Message-Id (by referring to RFC 2822). This is perhaps
- out-of-scope of your algorithm document, but I feel that it would
- be worth mentioning in your background or definitions section.
-
-2. In your step 1.B, I did not understand what "If they are already
- linked, don't change the existing links" meant until I read the
- RFC, which words it as "If a message already has a parent, don't
- change the existing link." It was unclear to me what "they" was
- referring to in your version.
-
---
-Happy hacking,
-~ Luke T. Shumaker
diff --git a/cmd/generate/mailstuff/mbox.go b/cmd/generate/mailstuff/mbox.go
deleted file mode 100644
index 8700c24..0000000
--- a/cmd/generate/mailstuff/mbox.go
+++ /dev/null
@@ -1,38 +0,0 @@
-package mailstuff
-
-import (
- "bytes"
- "io"
- "net/mail"
-)
-
-func ReadMBox(r io.Reader) ([]*mail.Message, error) {
- rest, err := io.ReadAll(r)
- if err != nil {
- return nil, err
- }
-
- const terminator = "\nFrom "
-
- var parts [][]byte
- for {
- pos := bytes.Index(rest, []byte(terminator))
- if pos < 0 {
- parts = append(parts, rest)
- break
- }
- parts = append(parts, rest[:pos+1])
- rest = rest[pos+1:]
- }
-
- ret := make([]*mail.Message, len(parts))
- for i := range len(parts) {
- msg, err := mail.ReadMessage(bytes.NewReader(parts[i]))
- if err != nil {
- return nil, err
- }
- ret[i] = msg
- }
-
- return ret, nil
-}
diff --git a/cmd/generate/mailstuff/thread.go b/cmd/generate/mailstuff/thread.go
deleted file mode 100644
index 2cdf9a4..0000000
--- a/cmd/generate/mailstuff/thread.go
+++ /dev/null
@@ -1,114 +0,0 @@
-package mailstuff
-
-import (
- "fmt"
- "net/mail"
- "regexp"
- "strings"
-)
-
-type Set[T comparable] map[T]struct{}
-
-func (s Set[T]) Insert(val T) {
- s[val] = struct{}{}
-}
-
-func mapHas[K comparable, V any](m map[K]V, k K) bool {
- _, ok := m[k]
- return ok
-}
-
-func (s Set[T]) Has(val T) bool {
- return mapHas(s, val)
-}
-
-func (s Set[T]) PickOne() T {
- for v := range s {
- return v
- }
- var zero T
- return zero
-}
-
-type MessageID string
-
-type ThreadedMessage struct {
- *mail.Message
- Parent *ThreadedMessage
- Children Set[*ThreadedMessage]
-}
-
-var reReplyID = regexp.MustCompile("<[^> \t\r\n]+>")
-
-func rfc2822parse(msg *mail.Message) *jwzMessage {
- // TODO: This is bad, and needs a real implementation.
- ret := &jwzMessage{
- Subject: msg.Header.Get("Subject"),
- ID: jwzID(msg.Header.Get("Message-ID")),
- }
- refIDs := strings.Fields(msg.Header.Get("References"))
- strings.Fields(msg.Header.Get("References"))
- if replyID := reReplyID.FindString(msg.Header.Get("In-Reply-To")); replyID != "" {
- refIDs = append(refIDs, replyID)
- }
- ret.References = make([]jwzID, len(refIDs))
- for i := range refIDs {
- ret.References[i] = jwzID(refIDs[i])
- }
- return ret
-}
-
-func ThreadMessages(msgs []*mail.Message) (Set[*ThreadedMessage], map[MessageID]*ThreadedMessage) {
- jwzMsgs := make(map[jwzID]*jwzMessage, len(msgs))
- retMsgs := make(map[jwzID]*ThreadedMessage, len(msgs))
- bogusCnt := 0
- for _, msg := range msgs {
- jwzMsg := rfc2822parse(msg)
-
- // RFC 5256:
- //
- // If a message does not contain a Message-ID header
- // line, or the Message-ID header line does not
- // contain a valid Message ID, then assign a unique
- // Message ID to this message.
- //
- // If two or more messages have the same Message ID,
- // then only use that Message ID in the first (lowest
- // sequence number) message, and assign a unique
- // Message ID to each of the subsequent messages with
- // a duplicate of that Message ID.
- for jwzMsg.ID == "" || mapHas(jwzMsgs, jwzMsg.ID) {
- jwzMsg.ID = jwzID(fmt.Sprintf("bogus.%d", bogusCnt))
- bogusCnt++
- }
-
- jwzMsgs[jwzMsg.ID] = jwzMsg
- retMsgs[jwzMsg.ID] = &ThreadedMessage{
- Message: msg,
- }
- }
-
- jwzThreads := jwzThreadMessages(jwzMsgs)
-
- var convertMessage func(*jwzContainer) *ThreadedMessage
- convertMessage = func(in *jwzContainer) *ThreadedMessage {
- var out *ThreadedMessage
- if in.Message == nil {
- out = new(ThreadedMessage)
- } else {
- out = retMsgs[in.Message.ID]
- }
- out.Children = make(Set[*ThreadedMessage], len(in.Children))
- for inChild := range in.Children {
- outChild := convertMessage(inChild)
- out.Children.Insert(outChild)
- outChild.Parent = out
- }
- return out
- }
- retThreads := make(Set[*ThreadedMessage], len(jwzThreads))
- for inThread := range jwzThreads {
- retThreads.Insert(convertMessage(inThread))
- }
- return retThreads, retMsgs
-}
diff --git a/cmd/generate/mailstuff/thread_alg.go b/cmd/generate/mailstuff/thread_alg.go
deleted file mode 100644
index 1b351e9..0000000
--- a/cmd/generate/mailstuff/thread_alg.go
+++ /dev/null
@@ -1,226 +0,0 @@
-package mailstuff
-
-import (
- "regexp"
- "strings"
-)
-
-// https://www.jwz.org/doc/threading.html
-
-// TODO: See ./jwz.md for RFC 5256 changes we might want to bring in.
-
-// Definitions /////////////////////////////////////////////////////////////////
-
-type jwzContainer struct {
- Message *jwzMessage
- Parent *jwzContainer
- Children Set[*jwzContainer]
-}
-
-type jwzMessage struct {
- Subject string
- ID jwzID
- References []jwzID
-}
-
-type jwzID = MessageID //string
-
-func (ancestor *jwzContainer) IsAncestorOf(descendent *jwzContainer) bool {
- if ancestor == descendent {
- return true
- }
- for child := range ancestor.Children {
- if child.IsAncestorOf(descendent) {
- return true
- }
- }
- return false
-}
-
-// The Algorithm ///////////////////////////////////////////////////////////////
-
-var jwzSubjectRE = regexp.MustCompile(`^(?:\s*[Rr][Ee](?:\[[0-9]+\])?:)*`)
-
-func jwzThreadMessages(msgs map[jwzID]*jwzMessage) Set[*jwzContainer] {
- idTable := make(map[jwzID]*jwzContainer, len(msgs))
-
- // 1. For each message
- for _, msg := range msgs {
- // A.
- msgContainer := idTable[msg.ID]
- if msgContainer != nil && msgContainer.Message == nil {
- msgContainer.Message = msg
- } else {
- msgContainer = &jwzContainer{
- Message: msg,
- Children: make(Set[*jwzContainer]),
- }
- idTable[msg.ID] = msgContainer
- }
- // B.
- for _, refID := range msg.References {
- refContainer := idTable[refID]
- if refContainer == nil {
- refContainer = &jwzContainer{
- Children: make(Set[*jwzContainer]),
- }
- idTable[refID] = refContainer
- }
- }
- for i := 0; i+1 < len(msg.References); i++ {
- parent := idTable[msg.References[i]]
- child := idTable[msg.References[i+1]]
- if child.Parent == nil && !parent.IsAncestorOf(child) && !child.IsAncestorOf(parent) {
- parent.Children.Insert(child)
- child.Parent = parent
- }
- }
- // C.
- if len(msg.References) == 0 {
- if msgContainer.Parent != nil {
- delete(msgContainer.Parent.Children, msgContainer)
- }
- msgContainer.Parent = nil
- } else {
- msgContainer.Parent = idTable[msg.References[len(msg.References)-1]]
- msgContainer.Parent.Children.Insert(msgContainer)
- }
- }
-
- // 2. Find the root Set
- root := &jwzContainer{
- Children: make(Set[*jwzContainer]),
- }
- for _, container := range idTable {
- if container.Parent == nil {
- container.Parent = root
- root.Children.Insert(container)
- }
- }
-
- // 3. Discard id_table
- idTable = nil
-
- // 4. Prune empty containers
- var recurse func(*jwzContainer)
- recurse = func(container *jwzContainer) {
- // Recurse. This is a touch complicated because
- // `recurse(child)` might insert into
- // `container.Children`, and those insertions might
- // not be emitted by the range loop
- for visited := make(Set[*jwzContainer]); ; {
- beforeSize := len(visited)
- for child := range container.Children {
- if visited.Has(child) {
- continue
- }
- recurse(child)
- visited.Insert(child)
- }
- if len(visited) == beforeSize {
- break
- }
- }
- if container.Parent == nil {
- return
- }
- // Main.
- if container.Message == nil {
- if len(container.Children) == 0 { // A.
- delete(container.Parent.Children, container)
- } else { // B.
- if len(container.Children) == 1 || container.Parent != root {
- for child := range container.Children {
- container.Parent.Children.Insert(child)
- child.Parent = container.Parent
- }
- delete(container.Parent.Children, container)
- }
- }
- }
- }
- recurse(root)
-
- // 5. Group root Set by subject
- // A.
- subjectTable := make(map[string]*jwzContainer)
- // B.
- for this := range root.Children {
- var subject string
- if this.Message != nil {
- subject = this.Message.Subject
- } else {
- subject = this.Children.PickOne().Message.Subject
- }
- prefix := jwzSubjectRE.FindString(subject)
- subject = strings.TrimSpace(subject[len(prefix):])
- if subject == "" {
- continue
- }
- if other := subjectTable[subject]; other == nil {
- subjectTable[subject] = this
- } else if other.Message == nil {
- subjectTable[subject] = this
- } else if jwzSubjectRE.MatchString(other.Message.Subject) && prefix == "" {
- subjectTable[subject] = this
- }
- }
- // C.
- for this := range root.Children {
- var subject string
- if this.Message != nil {
- subject = this.Message.Subject
- } else {
- subject = this.Children.PickOne().Message.Subject
- }
- prefix := jwzSubjectRE.FindString(subject)
- subject = strings.TrimSpace(subject[len(prefix):])
-
- other := subjectTable[subject]
- if other == nil || other == this {
- continue
- }
-
- switch {
- case this.Message == nil && other.Message == nil:
- for child := range this.Children {
- other.Children.Insert(child)
- child.Parent = other
- }
- delete(root.Children, this)
- case (this.Message == nil) != (other.Message == nil):
- var empty, nonEmpty *jwzContainer
- if this.Message == nil {
- empty = this
- nonEmpty = other
- } else {
- empty = other
- nonEmpty = this
- }
- empty.Children.Insert(nonEmpty)
- nonEmpty.Parent = empty
- case other.Message != nil && !jwzSubjectRE.MatchString(other.Message.Subject) && prefix != "":
- other.Children.Insert(this)
- this.Parent = other
- // skip the reverse of the above case--it happened implicitly
- default:
- newParent := &jwzContainer{
- Children: make(Set[*jwzContainer], 2),
- }
- newParent.Children.Insert(this)
- this.Parent = newParent
- newParent.Children.Insert(other)
- other.Parent = newParent
- subjectTable[subject] = newParent
- root.Children.Insert(newParent)
- delete(root.Children, this)
- delete(root.Children, other)
- }
- }
-
- // 6. Now you're done threading
- for child := range root.Children {
- child.Parent = nil
- }
- return root.Children
-}