From 731bde4f68014caeca82d72e43c04a7b755148f8 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Tue, 20 May 2025 15:58:18 -0400 Subject: mv cmd/generate/mailstuff/ lib/mailstuff/ # and supporting --- cmd/generate/forge_part_pipermail.go | 2 +- cmd/generate/mailstuff/jwz.md | 47 -------- cmd/generate/mailstuff/mbox.go | 38 ------ cmd/generate/mailstuff/thread.go | 114 ------------------ cmd/generate/mailstuff/thread_alg.go | 226 ----------------------------------- lib/mailstuff/jwz.md | 47 ++++++++ lib/mailstuff/mbox.go | 38 ++++++ lib/mailstuff/thread.go | 114 ++++++++++++++++++ lib/mailstuff/thread_alg.go | 226 +++++++++++++++++++++++++++++++++++ 9 files changed, 426 insertions(+), 426 deletions(-) delete mode 100644 cmd/generate/mailstuff/jwz.md delete mode 100644 cmd/generate/mailstuff/mbox.go delete mode 100644 cmd/generate/mailstuff/thread.go delete mode 100644 cmd/generate/mailstuff/thread_alg.go create mode 100644 lib/mailstuff/jwz.md create mode 100644 lib/mailstuff/mbox.go create mode 100644 lib/mailstuff/thread.go create mode 100644 lib/mailstuff/thread_alg.go diff --git a/cmd/generate/forge_part_pipermail.go b/cmd/generate/forge_part_pipermail.go index af6a009..e7c9e4c 100644 --- a/cmd/generate/forge_part_pipermail.go +++ b/cmd/generate/forge_part_pipermail.go @@ -12,7 +12,7 @@ import ( "strings" "time" - "git.lukeshu.com/www/cmd/generate/mailstuff" + "git.lukeshu.com/www/lib/mailstuff" ) var ( diff --git a/cmd/generate/mailstuff/jwz.md b/cmd/generate/mailstuff/jwz.md deleted file mode 100644 index 91e03f5..0000000 --- a/cmd/generate/mailstuff/jwz.md +++ /dev/null @@ -1,47 +0,0 @@ -To: Jamie Zawinski -Subject: message threading - -Hi, - -I'm implementing message threading, and have been referencing both -your document and RFC 5256. -I'm not sure whether you're interested in updating a document that's -more than 25 years old, but if you are: I hope you find the following -feedback valuable. - -You write that the algorithm in RFC 5256 is merely a "restating" of -your algorithm, but I noticed 3 (minor) differences: - -1. In your step 1.C, the RFC says to check whether this would create a - loop, and if it would to skip creating the link; your version only - says to perform this check in step 1.B. - -2. The RFC says to sort the messages by date between your steps 4 and - 5; that is: when grouping by subject, containers in the root set - should be processed in date-order (you do not specify an order), - and that if container in the root set is empty then the subject - should be taken from the earliest-date child (you say to use an - arbitrary child). - -3. The RFC precisely states how to trim a subject down to a "base - subject," rather than simply saying "Strip ``Re:'', ``RE:'', - ``RE[5]:'', ``Re: Re[4]: Re:'' and so on." - -Additionally, there are two minor points on which I found their -version to be clearer: - -1. The RFC specifies how to handle messages without a Message-Id or - with a duplicate Message-Id (on page 9), as well as how to - normalize a Message-Id (by referring to RFC 2822). This is perhaps - out-of-scope of your algorithm document, but I feel that it would - be worth mentioning in your background or definitions section. - -2. In your step 1.B, I did not understand what "If they are already - linked, don't change the existing links" meant until I read the - RFC, which words it as "If a message already has a parent, don't - change the existing link." It was unclear to me what "they" was - referring to in your version. - --- -Happy hacking, -~ Luke T. Shumaker diff --git a/cmd/generate/mailstuff/mbox.go b/cmd/generate/mailstuff/mbox.go deleted file mode 100644 index 8700c24..0000000 --- a/cmd/generate/mailstuff/mbox.go +++ /dev/null @@ -1,38 +0,0 @@ -package mailstuff - -import ( - "bytes" - "io" - "net/mail" -) - -func ReadMBox(r io.Reader) ([]*mail.Message, error) { - rest, err := io.ReadAll(r) - if err != nil { - return nil, err - } - - const terminator = "\nFrom " - - var parts [][]byte - for { - pos := bytes.Index(rest, []byte(terminator)) - if pos < 0 { - parts = append(parts, rest) - break - } - parts = append(parts, rest[:pos+1]) - rest = rest[pos+1:] - } - - ret := make([]*mail.Message, len(parts)) - for i := range len(parts) { - msg, err := mail.ReadMessage(bytes.NewReader(parts[i])) - if err != nil { - return nil, err - } - ret[i] = msg - } - - return ret, nil -} diff --git a/cmd/generate/mailstuff/thread.go b/cmd/generate/mailstuff/thread.go deleted file mode 100644 index 2cdf9a4..0000000 --- a/cmd/generate/mailstuff/thread.go +++ /dev/null @@ -1,114 +0,0 @@ -package mailstuff - -import ( - "fmt" - "net/mail" - "regexp" - "strings" -) - -type Set[T comparable] map[T]struct{} - -func (s Set[T]) Insert(val T) { - s[val] = struct{}{} -} - -func mapHas[K comparable, V any](m map[K]V, k K) bool { - _, ok := m[k] - return ok -} - -func (s Set[T]) Has(val T) bool { - return mapHas(s, val) -} - -func (s Set[T]) PickOne() T { - for v := range s { - return v - } - var zero T - return zero -} - -type MessageID string - -type ThreadedMessage struct { - *mail.Message - Parent *ThreadedMessage - Children Set[*ThreadedMessage] -} - -var reReplyID = regexp.MustCompile("<[^> \t\r\n]+>") - -func rfc2822parse(msg *mail.Message) *jwzMessage { - // TODO: This is bad, and needs a real implementation. - ret := &jwzMessage{ - Subject: msg.Header.Get("Subject"), - ID: jwzID(msg.Header.Get("Message-ID")), - } - refIDs := strings.Fields(msg.Header.Get("References")) - strings.Fields(msg.Header.Get("References")) - if replyID := reReplyID.FindString(msg.Header.Get("In-Reply-To")); replyID != "" { - refIDs = append(refIDs, replyID) - } - ret.References = make([]jwzID, len(refIDs)) - for i := range refIDs { - ret.References[i] = jwzID(refIDs[i]) - } - return ret -} - -func ThreadMessages(msgs []*mail.Message) (Set[*ThreadedMessage], map[MessageID]*ThreadedMessage) { - jwzMsgs := make(map[jwzID]*jwzMessage, len(msgs)) - retMsgs := make(map[jwzID]*ThreadedMessage, len(msgs)) - bogusCnt := 0 - for _, msg := range msgs { - jwzMsg := rfc2822parse(msg) - - // RFC 5256: - // - // If a message does not contain a Message-ID header - // line, or the Message-ID header line does not - // contain a valid Message ID, then assign a unique - // Message ID to this message. - // - // If two or more messages have the same Message ID, - // then only use that Message ID in the first (lowest - // sequence number) message, and assign a unique - // Message ID to each of the subsequent messages with - // a duplicate of that Message ID. - for jwzMsg.ID == "" || mapHas(jwzMsgs, jwzMsg.ID) { - jwzMsg.ID = jwzID(fmt.Sprintf("bogus.%d", bogusCnt)) - bogusCnt++ - } - - jwzMsgs[jwzMsg.ID] = jwzMsg - retMsgs[jwzMsg.ID] = &ThreadedMessage{ - Message: msg, - } - } - - jwzThreads := jwzThreadMessages(jwzMsgs) - - var convertMessage func(*jwzContainer) *ThreadedMessage - convertMessage = func(in *jwzContainer) *ThreadedMessage { - var out *ThreadedMessage - if in.Message == nil { - out = new(ThreadedMessage) - } else { - out = retMsgs[in.Message.ID] - } - out.Children = make(Set[*ThreadedMessage], len(in.Children)) - for inChild := range in.Children { - outChild := convertMessage(inChild) - out.Children.Insert(outChild) - outChild.Parent = out - } - return out - } - retThreads := make(Set[*ThreadedMessage], len(jwzThreads)) - for inThread := range jwzThreads { - retThreads.Insert(convertMessage(inThread)) - } - return retThreads, retMsgs -} diff --git a/cmd/generate/mailstuff/thread_alg.go b/cmd/generate/mailstuff/thread_alg.go deleted file mode 100644 index 1b351e9..0000000 --- a/cmd/generate/mailstuff/thread_alg.go +++ /dev/null @@ -1,226 +0,0 @@ -package mailstuff - -import ( - "regexp" - "strings" -) - -// https://www.jwz.org/doc/threading.html - -// TODO: See ./jwz.md for RFC 5256 changes we might want to bring in. - -// Definitions ///////////////////////////////////////////////////////////////// - -type jwzContainer struct { - Message *jwzMessage - Parent *jwzContainer - Children Set[*jwzContainer] -} - -type jwzMessage struct { - Subject string - ID jwzID - References []jwzID -} - -type jwzID = MessageID //string - -func (ancestor *jwzContainer) IsAncestorOf(descendent *jwzContainer) bool { - if ancestor == descendent { - return true - } - for child := range ancestor.Children { - if child.IsAncestorOf(descendent) { - return true - } - } - return false -} - -// The Algorithm /////////////////////////////////////////////////////////////// - -var jwzSubjectRE = regexp.MustCompile(`^(?:\s*[Rr][Ee](?:\[[0-9]+\])?:)*`) - -func jwzThreadMessages(msgs map[jwzID]*jwzMessage) Set[*jwzContainer] { - idTable := make(map[jwzID]*jwzContainer, len(msgs)) - - // 1. For each message - for _, msg := range msgs { - // A. - msgContainer := idTable[msg.ID] - if msgContainer != nil && msgContainer.Message == nil { - msgContainer.Message = msg - } else { - msgContainer = &jwzContainer{ - Message: msg, - Children: make(Set[*jwzContainer]), - } - idTable[msg.ID] = msgContainer - } - // B. - for _, refID := range msg.References { - refContainer := idTable[refID] - if refContainer == nil { - refContainer = &jwzContainer{ - Children: make(Set[*jwzContainer]), - } - idTable[refID] = refContainer - } - } - for i := 0; i+1 < len(msg.References); i++ { - parent := idTable[msg.References[i]] - child := idTable[msg.References[i+1]] - if child.Parent == nil && !parent.IsAncestorOf(child) && !child.IsAncestorOf(parent) { - parent.Children.Insert(child) - child.Parent = parent - } - } - // C. - if len(msg.References) == 0 { - if msgContainer.Parent != nil { - delete(msgContainer.Parent.Children, msgContainer) - } - msgContainer.Parent = nil - } else { - msgContainer.Parent = idTable[msg.References[len(msg.References)-1]] - msgContainer.Parent.Children.Insert(msgContainer) - } - } - - // 2. Find the root Set - root := &jwzContainer{ - Children: make(Set[*jwzContainer]), - } - for _, container := range idTable { - if container.Parent == nil { - container.Parent = root - root.Children.Insert(container) - } - } - - // 3. Discard id_table - idTable = nil - - // 4. Prune empty containers - var recurse func(*jwzContainer) - recurse = func(container *jwzContainer) { - // Recurse. This is a touch complicated because - // `recurse(child)` might insert into - // `container.Children`, and those insertions might - // not be emitted by the range loop - for visited := make(Set[*jwzContainer]); ; { - beforeSize := len(visited) - for child := range container.Children { - if visited.Has(child) { - continue - } - recurse(child) - visited.Insert(child) - } - if len(visited) == beforeSize { - break - } - } - if container.Parent == nil { - return - } - // Main. - if container.Message == nil { - if len(container.Children) == 0 { // A. - delete(container.Parent.Children, container) - } else { // B. - if len(container.Children) == 1 || container.Parent != root { - for child := range container.Children { - container.Parent.Children.Insert(child) - child.Parent = container.Parent - } - delete(container.Parent.Children, container) - } - } - } - } - recurse(root) - - // 5. Group root Set by subject - // A. - subjectTable := make(map[string]*jwzContainer) - // B. - for this := range root.Children { - var subject string - if this.Message != nil { - subject = this.Message.Subject - } else { - subject = this.Children.PickOne().Message.Subject - } - prefix := jwzSubjectRE.FindString(subject) - subject = strings.TrimSpace(subject[len(prefix):]) - if subject == "" { - continue - } - if other := subjectTable[subject]; other == nil { - subjectTable[subject] = this - } else if other.Message == nil { - subjectTable[subject] = this - } else if jwzSubjectRE.MatchString(other.Message.Subject) && prefix == "" { - subjectTable[subject] = this - } - } - // C. - for this := range root.Children { - var subject string - if this.Message != nil { - subject = this.Message.Subject - } else { - subject = this.Children.PickOne().Message.Subject - } - prefix := jwzSubjectRE.FindString(subject) - subject = strings.TrimSpace(subject[len(prefix):]) - - other := subjectTable[subject] - if other == nil || other == this { - continue - } - - switch { - case this.Message == nil && other.Message == nil: - for child := range this.Children { - other.Children.Insert(child) - child.Parent = other - } - delete(root.Children, this) - case (this.Message == nil) != (other.Message == nil): - var empty, nonEmpty *jwzContainer - if this.Message == nil { - empty = this - nonEmpty = other - } else { - empty = other - nonEmpty = this - } - empty.Children.Insert(nonEmpty) - nonEmpty.Parent = empty - case other.Message != nil && !jwzSubjectRE.MatchString(other.Message.Subject) && prefix != "": - other.Children.Insert(this) - this.Parent = other - // skip the reverse of the above case--it happened implicitly - default: - newParent := &jwzContainer{ - Children: make(Set[*jwzContainer], 2), - } - newParent.Children.Insert(this) - this.Parent = newParent - newParent.Children.Insert(other) - other.Parent = newParent - subjectTable[subject] = newParent - root.Children.Insert(newParent) - delete(root.Children, this) - delete(root.Children, other) - } - } - - // 6. Now you're done threading - for child := range root.Children { - child.Parent = nil - } - return root.Children -} diff --git a/lib/mailstuff/jwz.md b/lib/mailstuff/jwz.md new file mode 100644 index 0000000..91e03f5 --- /dev/null +++ b/lib/mailstuff/jwz.md @@ -0,0 +1,47 @@ +To: Jamie Zawinski +Subject: message threading + +Hi, + +I'm implementing message threading, and have been referencing both +your document and RFC 5256. +I'm not sure whether you're interested in updating a document that's +more than 25 years old, but if you are: I hope you find the following +feedback valuable. + +You write that the algorithm in RFC 5256 is merely a "restating" of +your algorithm, but I noticed 3 (minor) differences: + +1. In your step 1.C, the RFC says to check whether this would create a + loop, and if it would to skip creating the link; your version only + says to perform this check in step 1.B. + +2. The RFC says to sort the messages by date between your steps 4 and + 5; that is: when grouping by subject, containers in the root set + should be processed in date-order (you do not specify an order), + and that if container in the root set is empty then the subject + should be taken from the earliest-date child (you say to use an + arbitrary child). + +3. The RFC precisely states how to trim a subject down to a "base + subject," rather than simply saying "Strip ``Re:'', ``RE:'', + ``RE[5]:'', ``Re: Re[4]: Re:'' and so on." + +Additionally, there are two minor points on which I found their +version to be clearer: + +1. The RFC specifies how to handle messages without a Message-Id or + with a duplicate Message-Id (on page 9), as well as how to + normalize a Message-Id (by referring to RFC 2822). This is perhaps + out-of-scope of your algorithm document, but I feel that it would + be worth mentioning in your background or definitions section. + +2. In your step 1.B, I did not understand what "If they are already + linked, don't change the existing links" meant until I read the + RFC, which words it as "If a message already has a parent, don't + change the existing link." It was unclear to me what "they" was + referring to in your version. + +-- +Happy hacking, +~ Luke T. Shumaker diff --git a/lib/mailstuff/mbox.go b/lib/mailstuff/mbox.go new file mode 100644 index 0000000..8700c24 --- /dev/null +++ b/lib/mailstuff/mbox.go @@ -0,0 +1,38 @@ +package mailstuff + +import ( + "bytes" + "io" + "net/mail" +) + +func ReadMBox(r io.Reader) ([]*mail.Message, error) { + rest, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + const terminator = "\nFrom " + + var parts [][]byte + for { + pos := bytes.Index(rest, []byte(terminator)) + if pos < 0 { + parts = append(parts, rest) + break + } + parts = append(parts, rest[:pos+1]) + rest = rest[pos+1:] + } + + ret := make([]*mail.Message, len(parts)) + for i := range len(parts) { + msg, err := mail.ReadMessage(bytes.NewReader(parts[i])) + if err != nil { + return nil, err + } + ret[i] = msg + } + + return ret, nil +} diff --git a/lib/mailstuff/thread.go b/lib/mailstuff/thread.go new file mode 100644 index 0000000..2cdf9a4 --- /dev/null +++ b/lib/mailstuff/thread.go @@ -0,0 +1,114 @@ +package mailstuff + +import ( + "fmt" + "net/mail" + "regexp" + "strings" +) + +type Set[T comparable] map[T]struct{} + +func (s Set[T]) Insert(val T) { + s[val] = struct{}{} +} + +func mapHas[K comparable, V any](m map[K]V, k K) bool { + _, ok := m[k] + return ok +} + +func (s Set[T]) Has(val T) bool { + return mapHas(s, val) +} + +func (s Set[T]) PickOne() T { + for v := range s { + return v + } + var zero T + return zero +} + +type MessageID string + +type ThreadedMessage struct { + *mail.Message + Parent *ThreadedMessage + Children Set[*ThreadedMessage] +} + +var reReplyID = regexp.MustCompile("<[^> \t\r\n]+>") + +func rfc2822parse(msg *mail.Message) *jwzMessage { + // TODO: This is bad, and needs a real implementation. + ret := &jwzMessage{ + Subject: msg.Header.Get("Subject"), + ID: jwzID(msg.Header.Get("Message-ID")), + } + refIDs := strings.Fields(msg.Header.Get("References")) + strings.Fields(msg.Header.Get("References")) + if replyID := reReplyID.FindString(msg.Header.Get("In-Reply-To")); replyID != "" { + refIDs = append(refIDs, replyID) + } + ret.References = make([]jwzID, len(refIDs)) + for i := range refIDs { + ret.References[i] = jwzID(refIDs[i]) + } + return ret +} + +func ThreadMessages(msgs []*mail.Message) (Set[*ThreadedMessage], map[MessageID]*ThreadedMessage) { + jwzMsgs := make(map[jwzID]*jwzMessage, len(msgs)) + retMsgs := make(map[jwzID]*ThreadedMessage, len(msgs)) + bogusCnt := 0 + for _, msg := range msgs { + jwzMsg := rfc2822parse(msg) + + // RFC 5256: + // + // If a message does not contain a Message-ID header + // line, or the Message-ID header line does not + // contain a valid Message ID, then assign a unique + // Message ID to this message. + // + // If two or more messages have the same Message ID, + // then only use that Message ID in the first (lowest + // sequence number) message, and assign a unique + // Message ID to each of the subsequent messages with + // a duplicate of that Message ID. + for jwzMsg.ID == "" || mapHas(jwzMsgs, jwzMsg.ID) { + jwzMsg.ID = jwzID(fmt.Sprintf("bogus.%d", bogusCnt)) + bogusCnt++ + } + + jwzMsgs[jwzMsg.ID] = jwzMsg + retMsgs[jwzMsg.ID] = &ThreadedMessage{ + Message: msg, + } + } + + jwzThreads := jwzThreadMessages(jwzMsgs) + + var convertMessage func(*jwzContainer) *ThreadedMessage + convertMessage = func(in *jwzContainer) *ThreadedMessage { + var out *ThreadedMessage + if in.Message == nil { + out = new(ThreadedMessage) + } else { + out = retMsgs[in.Message.ID] + } + out.Children = make(Set[*ThreadedMessage], len(in.Children)) + for inChild := range in.Children { + outChild := convertMessage(inChild) + out.Children.Insert(outChild) + outChild.Parent = out + } + return out + } + retThreads := make(Set[*ThreadedMessage], len(jwzThreads)) + for inThread := range jwzThreads { + retThreads.Insert(convertMessage(inThread)) + } + return retThreads, retMsgs +} diff --git a/lib/mailstuff/thread_alg.go b/lib/mailstuff/thread_alg.go new file mode 100644 index 0000000..1b351e9 --- /dev/null +++ b/lib/mailstuff/thread_alg.go @@ -0,0 +1,226 @@ +package mailstuff + +import ( + "regexp" + "strings" +) + +// https://www.jwz.org/doc/threading.html + +// TODO: See ./jwz.md for RFC 5256 changes we might want to bring in. + +// Definitions ///////////////////////////////////////////////////////////////// + +type jwzContainer struct { + Message *jwzMessage + Parent *jwzContainer + Children Set[*jwzContainer] +} + +type jwzMessage struct { + Subject string + ID jwzID + References []jwzID +} + +type jwzID = MessageID //string + +func (ancestor *jwzContainer) IsAncestorOf(descendent *jwzContainer) bool { + if ancestor == descendent { + return true + } + for child := range ancestor.Children { + if child.IsAncestorOf(descendent) { + return true + } + } + return false +} + +// The Algorithm /////////////////////////////////////////////////////////////// + +var jwzSubjectRE = regexp.MustCompile(`^(?:\s*[Rr][Ee](?:\[[0-9]+\])?:)*`) + +func jwzThreadMessages(msgs map[jwzID]*jwzMessage) Set[*jwzContainer] { + idTable := make(map[jwzID]*jwzContainer, len(msgs)) + + // 1. For each message + for _, msg := range msgs { + // A. + msgContainer := idTable[msg.ID] + if msgContainer != nil && msgContainer.Message == nil { + msgContainer.Message = msg + } else { + msgContainer = &jwzContainer{ + Message: msg, + Children: make(Set[*jwzContainer]), + } + idTable[msg.ID] = msgContainer + } + // B. + for _, refID := range msg.References { + refContainer := idTable[refID] + if refContainer == nil { + refContainer = &jwzContainer{ + Children: make(Set[*jwzContainer]), + } + idTable[refID] = refContainer + } + } + for i := 0; i+1 < len(msg.References); i++ { + parent := idTable[msg.References[i]] + child := idTable[msg.References[i+1]] + if child.Parent == nil && !parent.IsAncestorOf(child) && !child.IsAncestorOf(parent) { + parent.Children.Insert(child) + child.Parent = parent + } + } + // C. + if len(msg.References) == 0 { + if msgContainer.Parent != nil { + delete(msgContainer.Parent.Children, msgContainer) + } + msgContainer.Parent = nil + } else { + msgContainer.Parent = idTable[msg.References[len(msg.References)-1]] + msgContainer.Parent.Children.Insert(msgContainer) + } + } + + // 2. Find the root Set + root := &jwzContainer{ + Children: make(Set[*jwzContainer]), + } + for _, container := range idTable { + if container.Parent == nil { + container.Parent = root + root.Children.Insert(container) + } + } + + // 3. Discard id_table + idTable = nil + + // 4. Prune empty containers + var recurse func(*jwzContainer) + recurse = func(container *jwzContainer) { + // Recurse. This is a touch complicated because + // `recurse(child)` might insert into + // `container.Children`, and those insertions might + // not be emitted by the range loop + for visited := make(Set[*jwzContainer]); ; { + beforeSize := len(visited) + for child := range container.Children { + if visited.Has(child) { + continue + } + recurse(child) + visited.Insert(child) + } + if len(visited) == beforeSize { + break + } + } + if container.Parent == nil { + return + } + // Main. + if container.Message == nil { + if len(container.Children) == 0 { // A. + delete(container.Parent.Children, container) + } else { // B. + if len(container.Children) == 1 || container.Parent != root { + for child := range container.Children { + container.Parent.Children.Insert(child) + child.Parent = container.Parent + } + delete(container.Parent.Children, container) + } + } + } + } + recurse(root) + + // 5. Group root Set by subject + // A. + subjectTable := make(map[string]*jwzContainer) + // B. + for this := range root.Children { + var subject string + if this.Message != nil { + subject = this.Message.Subject + } else { + subject = this.Children.PickOne().Message.Subject + } + prefix := jwzSubjectRE.FindString(subject) + subject = strings.TrimSpace(subject[len(prefix):]) + if subject == "" { + continue + } + if other := subjectTable[subject]; other == nil { + subjectTable[subject] = this + } else if other.Message == nil { + subjectTable[subject] = this + } else if jwzSubjectRE.MatchString(other.Message.Subject) && prefix == "" { + subjectTable[subject] = this + } + } + // C. + for this := range root.Children { + var subject string + if this.Message != nil { + subject = this.Message.Subject + } else { + subject = this.Children.PickOne().Message.Subject + } + prefix := jwzSubjectRE.FindString(subject) + subject = strings.TrimSpace(subject[len(prefix):]) + + other := subjectTable[subject] + if other == nil || other == this { + continue + } + + switch { + case this.Message == nil && other.Message == nil: + for child := range this.Children { + other.Children.Insert(child) + child.Parent = other + } + delete(root.Children, this) + case (this.Message == nil) != (other.Message == nil): + var empty, nonEmpty *jwzContainer + if this.Message == nil { + empty = this + nonEmpty = other + } else { + empty = other + nonEmpty = this + } + empty.Children.Insert(nonEmpty) + nonEmpty.Parent = empty + case other.Message != nil && !jwzSubjectRE.MatchString(other.Message.Subject) && prefix != "": + other.Children.Insert(this) + this.Parent = other + // skip the reverse of the above case--it happened implicitly + default: + newParent := &jwzContainer{ + Children: make(Set[*jwzContainer], 2), + } + newParent.Children.Insert(this) + this.Parent = newParent + newParent.Children.Insert(other) + other.Parent = newParent + subjectTable[subject] = newParent + root.Children.Insert(newParent) + delete(root.Children, this) + delete(root.Children, other) + } + } + + // 6. Now you're done threading + for child := range root.Children { + child.Parent = nil + } + return root.Children +} -- cgit v1.2.3-54-g00ecf