summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke T. Shumaker <lukeshu@lukeshu.com>2024-06-08 21:29:36 -0600
committerLuke T. Shumaker <lukeshu@lukeshu.com>2024-06-08 21:29:36 -0600
commit8cc4eb82796727f20accfce8d049f677e6210824 (patch)
treef6d1986b3cf0898a5a1aae24e1b9e7658853abcc
parent5dc2e9533a111d75ff91a56dd50af8e03ebf5f5f (diff)
wip pipermail threading
-rw-r--r--cmd/generate/forge_forgejo.go1
-rw-r--r--cmd/generate/forge_pipermail.go69
-rw-r--r--cmd/generate/httpcache.go45
-rw-r--r--cmd/generate/mailstuff/jwz.md47
-rw-r--r--cmd/generate/mailstuff/thread.go278
-rw-r--r--cmd/generate/mailstuff/thread_alg.go226
6 files changed, 445 insertions, 221 deletions
diff --git a/cmd/generate/forge_forgejo.go b/cmd/generate/forge_forgejo.go
index 7d7c8ae..62234b9 100644
--- a/cmd/generate/forge_forgejo.go
+++ b/cmd/generate/forge_forgejo.go
@@ -56,7 +56,6 @@ func (f Forgejo) FetchStatus(urls []string) (string, error) {
func (f Forgejo) FetchSubmittedAt(urls []string) (time.Time, error) {
for _, u := range urls {
m := reForgejoPR.FindStringSubmatch(u)
- fmt.Printf("u=%q m=%#v\n", u, m)
if m == nil || m[1] != f.Authority {
continue
}
diff --git a/cmd/generate/forge_pipermail.go b/cmd/generate/forge_pipermail.go
index e015bb5..56e7ef2 100644
--- a/cmd/generate/forge_pipermail.go
+++ b/cmd/generate/forge_pipermail.go
@@ -1,12 +1,17 @@
package main
import (
+ "errors"
"fmt"
+ "net/mail"
"net/url"
+ "os"
"regexp"
"strconv"
"strings"
"time"
+
+ "git.lukeshu.com/www/cmd/generate/mailstuff"
)
var (
@@ -77,9 +82,20 @@ func (PiperMail) nextMonth(ym string) string {
}
}
-func (PiperMail) messageID(u string) (string, error) {
-}
+func (p PiperMail) threadLen(thread *mailstuff.ThreadedMessage) int {
+ if thread == nil {
+ return 0
+ }
+ ret := 0
+ if thread.Message != nil {
+ ret++
+ }
+ for child := range thread.Children {
+ ret += p.threadLen(child)
+ }
+ return ret
+}
func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) {
for _, u := range urls {
@@ -95,14 +111,14 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) {
if err != nil {
return time.Time{}, User{}, err
}
- var msgid string
+ var msgid mailstuff.MessageID
for _, line := range strings.Split(htmlStr, "\n") {
if m := rePiperMailReply.FindStringSubmatch(line); m != nil {
ru, err := url.Parse(m[1])
if err != nil {
continue
}
- if msgid = ru.Query().Get("In-Reply-To"); msgid != "" {
+ if msgid = mailstuff.MessageID(ru.Query().Get("In-Reply-To")); msgid != "" {
break
}
}
@@ -110,9 +126,48 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) {
if msgid == "" {
continue
}
- mboxStr, err := httpGet(uBase+uYM+".txt.gz", nil)
- if err != nil {
- return time.Time{}, User{}, err
+
+ var thread *mailstuff.ThreadedMessage
+ for ym, mbox := uYM, []*mail.Message(nil); true; ym = p.nextMonth(ym) {
+ lenBefore := p.threadLen(thread)
+
+ mboxStr, err := httpGet(uBase+ym+".txt.gz", nil)
+ if err != nil && (ym == uYM || !errors.Is(err, os.ErrNotExist)) {
+ return time.Time{}, User{}, err
+ }
+ _mbox, err := mailstuff.ReadMBox(strings.NewReader(mboxStr))
+ if err != nil {
+ return time.Time{}, User{}, err
+ }
+ mbox = append(mbox, _mbox...)
+ _, messages := mailstuff.ThreadMessages(mbox)
+ thread = messages[msgid]
+
+ if p.threadLen(thread) == lenBefore {
+ break
+ }
+ }
+
+ var retTime time.Time
+ var retUser User
+
+ var walk func(*mailstuff.ThreadedMessage)
+ walk = func(msg *mailstuff.ThreadedMessage) {
+ date, dateErr := msg.Header.Date()
+ froms, fromErr := msg.Header.AddressList("From")
+ if dateErr == nil && fromErr == nil && len(froms) > 0 && (retTime.IsZero() || date.After(retTime)) {
+ retTime = date
+ retUser.Name = froms[0].Name
+ if retUser.Name == "" {
+ retUser.Name = froms[0].Address
+ }
+ retUser.URL = "mailto:" + froms[0].Address
+ }
+ }
+ walk(thread)
+
+ if !retTime.IsZero() {
+ return retTime, retUser, nil
}
}
return time.Time{}, User{}, nil
diff --git a/cmd/generate/httpcache.go b/cmd/generate/httpcache.go
index 1fb0429..2192737 100644
--- a/cmd/generate/httpcache.go
+++ b/cmd/generate/httpcache.go
@@ -11,7 +11,32 @@ import (
"sort"
)
-var httpCache = map[string]string{}
+type httpCacheEntry struct {
+ Body string
+ Err error
+}
+
+var httpCache = map[string]httpCacheEntry{}
+
+type httpStatusError struct {
+ StatusCode int
+ Status string
+}
+
+// Is implements the interface for [errors.Is].
+func (e *httpStatusError) Is(target error) bool {
+ switch target {
+ case os.ErrNotExist:
+ return e.StatusCode == http.StatusNotFound
+ default:
+ return false
+ }
+}
+
+// Error implements [error].
+func (e *httpStatusError) Error() string {
+ return fmt.Sprintf("unexpected HTTP status: %v", e.Status)
+}
func httpGet(u string, hdr map[string]string) (string, error) {
cacheKey := url.QueryEscape(u)
@@ -26,23 +51,25 @@ func httpGet(u string, hdr map[string]string) (string, error) {
if cache, ok := httpCache[cacheKey]; ok {
fmt.Printf("CACHE-GET %q\n", u)
- return cache, nil
+ return cache.Body, cache.Err
}
if err := os.Mkdir(".http-cache", 0777); err != nil && !os.IsExist(err) {
return "", err
}
cacheFile := filepath.Join(".http-cache", cacheKey)
if bs, err := os.ReadFile(cacheFile); err == nil {
- httpCache[cacheKey] = string(bs)
fmt.Printf("CACHE-GET %q\n", u)
- return httpCache[cacheKey], nil
+ httpCache[cacheKey] = httpCacheEntry{Body: string(bs)}
+ return httpCache[cacheKey].Body, nil
} else if !os.IsNotExist(err) {
+ httpCache[cacheKey] = httpCacheEntry{Err: err}
return "", err
}
fmt.Printf("GET %q...", u)
req, err := http.NewRequest(http.MethodGet, u, nil)
if err != nil {
+ httpCache[cacheKey] = httpCacheEntry{Err: err}
return "", err
}
for k, v := range hdr {
@@ -51,23 +78,27 @@ func httpGet(u string, hdr map[string]string) (string, error) {
resp, err := http.DefaultClient.Do(req)
if err != nil {
fmt.Printf(" err\n")
+ httpCache[cacheKey] = httpCacheEntry{Err: err}
return "", err
}
if resp.StatusCode != http.StatusOK {
fmt.Printf(" err\n")
- return "", fmt.Errorf("unexpected HTTP status: %v", resp.Status)
+ httpCache[cacheKey] = httpCacheEntry{Err: err}
+ return "", &httpStatusError{StatusCode: resp.StatusCode, Status: resp.Status}
}
bs, err := io.ReadAll(resp.Body)
if err != nil {
fmt.Printf(" err\n")
+ httpCache[cacheKey] = httpCacheEntry{Err: err}
+ httpCache[cacheKey] = httpCacheEntry{Err: err}
return "", err
}
fmt.Printf(" ok\n")
if err := os.WriteFile(cacheFile, bs, 0666); err != nil {
return "", err
}
- httpCache[cacheKey] = string(bs)
- return httpCache[cacheKey], nil
+ httpCache[cacheKey] = httpCacheEntry{Body: string(bs)}
+ return httpCache[cacheKey].Body, nil
}
func httpGetJSON(u string, hdr map[string]string, out any) error {
diff --git a/cmd/generate/mailstuff/jwz.md b/cmd/generate/mailstuff/jwz.md
new file mode 100644
index 0000000..54f0a45
--- /dev/null
+++ b/cmd/generate/mailstuff/jwz.md
@@ -0,0 +1,47 @@
+To: Jamie Zawinski <jwz@jwz.org>
+Subject: message threading
+
+Hi,
+
+I'm implementing message threading, and have been referencing both
+your document <https://www.jwz.org/doc/threading.html> and RFC 5256.
+I'm not sure whether you're interested in updating a document that's
+more than 25 years old, but if you are: I hope you find the following
+feedback valuable.
+
+You write that the algorithm in RFC 5256 is merely a "restating" of
+your algorithm, but I noticed 3 (minor) differences:
+
+1. In your step 1.C, the RFC says to check whether this would create a
+ loop, and if it would to skip creating the link; your version only
+ says to perform this check in step 1.B.
+
+2. The RFC says to sort the messages by date between your steps 4 and
+ 5; that is: when grouping by subject, containers in the root set
+ should be processed in date-order (you do not specify an order),
+ and that if container in the root set is empty then the subject
+ should be taken from the earliest-date child (you say to use an
+ arbitrary child).
+
+3. The RFC precisely states how to trim a subject down to a "base
+ subject," rather than simply saying "Strip ``Re:'', ``RE:'',
+ ``RE[5]:'', ``Re: Re[4]: Re:'' and so on."
+
+Additionally, there are two minor points on which I found their
+version to be clearer:
+
+1. The RFC specifies how to handle messages without a Message-Id or
+ with a duplicate Message-Id (on page 9), as well as how to
+ normalize a Message-Id (by referring to RFC 2822). This is perhaps
+ out-of-scope of your algorithm document, but I feel that it would
+ be worth mentioning in your background or definitions section.
+
+2. In your step 1.B, I did not understand what "If they are already
+ linked, don't change the existing links" meant until I read the
+ RFC, which words it as "If a message already has a parent, don't
+ change the existing link." It was unclear to me what "they" was
+ referring to in your version.
+
+--
+Happy hacking,
+~ Luke T. Shumaker
diff --git a/cmd/generate/mailstuff/thread.go b/cmd/generate/mailstuff/thread.go
index c6fa181..2cdf9a4 100644
--- a/cmd/generate/mailstuff/thread.go
+++ b/cmd/generate/mailstuff/thread.go
@@ -1,22 +1,28 @@
package mailstuff
import (
+ "fmt"
+ "net/mail"
"regexp"
"strings"
)
-type set[T comparable] map[T]struct{}
+type Set[T comparable] map[T]struct{}
-func (s set[T]) Insert(val T) {
+func (s Set[T]) Insert(val T) {
s[val] = struct{}{}
}
-func (s set[T]) Has(val T) bool {
- _, ok := s[val]
+func mapHas[K comparable, V any](m map[K]V, k K) bool {
+ _, ok := m[k]
return ok
}
-func (s set[T]) PickOne() T {
+func (s Set[T]) Has(val T) bool {
+ return mapHas(s, val)
+}
+
+func (s Set[T]) PickOne() T {
for v := range s {
return v
}
@@ -24,225 +30,85 @@ func (s set[T]) PickOne() T {
return zero
}
-////////////////////////////////////////////////////////////////////////////////
-
-// https://www.jwz.org/doc/threading.html
+type MessageID string
-// Definitions /////////////////////////////////////////////////////////////////
-
-type jwzContainer struct {
- Message *jwzMessage
- Parent *jwzContainer
- Children set[*jwzContainer]
+type ThreadedMessage struct {
+ *mail.Message
+ Parent *ThreadedMessage
+ Children Set[*ThreadedMessage]
}
-type jwzMessage struct {
- Subject string
- ID jwzID
- References []jwzID
-}
+var reReplyID = regexp.MustCompile("<[^> \t\r\n]+>")
-type jwzID string
-
-func (ancestor *jwzContainer) IsAncestorOf(descendent *jwzContainer) bool {
- if ancestor == descendent {
- return true
+func rfc2822parse(msg *mail.Message) *jwzMessage {
+ // TODO: This is bad, and needs a real implementation.
+ ret := &jwzMessage{
+ Subject: msg.Header.Get("Subject"),
+ ID: jwzID(msg.Header.Get("Message-ID")),
}
- for child := range ancestor.Children {
- if child.IsAncestorOf(descendent) {
- return true
- }
+ refIDs := strings.Fields(msg.Header.Get("References"))
+ strings.Fields(msg.Header.Get("References"))
+ if replyID := reReplyID.FindString(msg.Header.Get("In-Reply-To")); replyID != "" {
+ refIDs = append(refIDs, replyID)
}
- return false
+ ret.References = make([]jwzID, len(refIDs))
+ for i := range refIDs {
+ ret.References[i] = jwzID(refIDs[i])
+ }
+ return ret
}
-// The Algorithm ///////////////////////////////////////////////////////////////
-
-var jwzSubjectRE = regexp.MustCompile(`^(?:\s*[Rr][Ee](?:\[[0-9]+\])?:)*`)
-
-func jwzThreadMessages(msgs map[jwzID]*jwzMessage) set[*jwzContainer] {
- idTable := make(map[jwzID]*jwzContainer, len(msgs))
-
- // 1. For each message
+func ThreadMessages(msgs []*mail.Message) (Set[*ThreadedMessage], map[MessageID]*ThreadedMessage) {
+ jwzMsgs := make(map[jwzID]*jwzMessage, len(msgs))
+ retMsgs := make(map[jwzID]*ThreadedMessage, len(msgs))
+ bogusCnt := 0
for _, msg := range msgs {
- // A.
- msgContainer := idTable[msg.ID]
- if msgContainer != nil && msgContainer.Message == nil {
- msgContainer.Message = msg
- } else {
- msgContainer = &jwzContainer{
- Message: msg,
- Children: make(set[*jwzContainer]),
- }
- idTable[msg.ID] = msgContainer
- }
- // B.
- for _, refID := range msg.References {
- refContainer := idTable[refID]
- if refContainer == nil {
- refContainer = &jwzContainer{
- Children: make(set[*jwzContainer]),
- }
- idTable[refID] = refContainer
- }
+ jwzMsg := rfc2822parse(msg)
+
+ // RFC 5256:
+ //
+ // If a message does not contain a Message-ID header
+ // line, or the Message-ID header line does not
+ // contain a valid Message ID, then assign a unique
+ // Message ID to this message.
+ //
+ // If two or more messages have the same Message ID,
+ // then only use that Message ID in the first (lowest
+ // sequence number) message, and assign a unique
+ // Message ID to each of the subsequent messages with
+ // a duplicate of that Message ID.
+ for jwzMsg.ID == "" || mapHas(jwzMsgs, jwzMsg.ID) {
+ jwzMsg.ID = jwzID(fmt.Sprintf("bogus.%d", bogusCnt))
+ bogusCnt++
}
- for i := 0; i+1 < len(msg.References); i++ {
- parent := idTable[msg.References[i]]
- child := idTable[msg.References[i+1]]
- if !parent.IsAncestorOf(child) && !child.IsAncestorOf(parent) {
- parent.Children.Insert(child)
- child.Parent = parent
- }
- }
- // C.
- if len(msg.References) == 0 {
- if msgContainer.Parent != nil {
- delete(msgContainer.Parent.Children, msgContainer)
- }
- msgContainer.Parent = nil
- } else {
- msgContainer.Parent = idTable[msg.References[len(msg.References)-1]]
- msgContainer.Parent.Children.Insert(msgContainer)
- }
- }
- // 2. Find the root set
- roots := make(set[*jwzContainer])
- for _, container := range idTable {
- if container.Parent == nil {
- roots.Insert(container)
+ jwzMsgs[jwzMsg.ID] = jwzMsg
+ retMsgs[jwzMsg.ID] = &ThreadedMessage{
+ Message: msg,
}
}
- // 3. Discard id_table
- idTable = nil
+ jwzThreads := jwzThreadMessages(jwzMsgs)
- // 4. Prune empty containers
- pseudoRoot := &jwzContainer{
- Children: roots,
- }
- for root := range roots {
- root.Parent = pseudoRoot
- }
- var recurse func(*jwzContainer)
- recurse = func(container *jwzContainer) {
- // Recurse. This is a touch complicated because
- // `recurse(child)` might insert into
- // `container.Children`, and those insertions might
- // not be emitted by the range loop
- for visited := make(set[*jwzContainer]); ; {
- beforeSize := len(visited)
- for child := range container.Children {
- if visited.Has(child) {
- continue
- }
- recurse(child)
- visited.Insert(child)
- }
- if len(visited) == beforeSize {
- break
- }
- }
- // Main.
- if container.Message == nil {
- if len(container.Children) == 0 { // A.
- delete(container.Parent.Children, container)
- } else { // B.
- if len(container.Children) == 1 || container.Parent != pseudoRoot {
- for child := range container.Children {
- container.Parent.Children.Insert(child)
- child.Parent = container.Parent
- }
- delete(container.Parent.Children, container)
- }
- }
- }
- }
- for root := range roots {
- recurse(root)
- }
- for root := range roots {
- root.Parent = nil
- }
- pseudoRoot = nil
-
- // 5. Group root set by subject
- // A.
- subjectTable := make(map[string]*jwzContainer)
- // B.
- for root := range roots {
- var subject string
- if root.Message != nil {
- subject = root.Message.Subject
+ var convertMessage func(*jwzContainer) *ThreadedMessage
+ convertMessage = func(in *jwzContainer) *ThreadedMessage {
+ var out *ThreadedMessage
+ if in.Message == nil {
+ out = new(ThreadedMessage)
} else {
- subject = root.Children.PickOne().Message.Subject
+ out = retMsgs[in.Message.ID]
}
- prefix := jwzSubjectRE.FindString(subject)
- subject = strings.TrimSpace(subject[len(prefix):])
- if subject == "" {
- continue
- }
- if other := subjectTable[subject]; other == nil {
- subjectTable[subject] = root
- } else if other.Message == nil {
- subjectTable[subject] = root
- } else if jwzSubjectRE.MatchString(other.Message.Subject) && prefix == "" {
- subjectTable[subject] = root
+ out.Children = make(Set[*ThreadedMessage], len(in.Children))
+ for inChild := range in.Children {
+ outChild := convertMessage(inChild)
+ out.Children.Insert(outChild)
+ outChild.Parent = out
}
+ return out
}
- // C.
- for root := range roots {
- var subject string
- if root.Message != nil {
- subject = root.Message.Subject
- } else {
- subject = root.Children.PickOne().Message.Subject
- }
- prefix := jwzSubjectRE.FindString(subject)
- subject = strings.TrimSpace(subject[len(prefix):])
-
- other := subjectTable[subject]
- if other == nil || other == root {
- continue
- }
-
- switch {
- case root.Message == nil && other.Message == nil:
- for child := range root.Children {
- other.Children.Insert(child)
- child.Parent = other
- }
- delete(roots, root)
- case (root.Message == nil) != (other.Message == nil):
- var empty, nonEmpty *jwzContainer
- if root.Message == nil {
- empty = root
- nonEmpty = other
- } else {
- empty = other
- nonEmpty = root
- }
- empty.Children.Insert(nonEmpty)
- nonEmpty.Parent = empty
- case other.Message != nil && !jwzSubjectRE.MatchString(other.Message.Subject) && prefix != "":
- other.Children.Insert(root)
- root.Parent = other
- // skip the reverse of the above case--it happened implicitly
- default:
- newRoot := &jwzContainer{
- Children: make(set[*jwzContainer], 2),
- }
- newRoot.Children.Insert(root)
- root.Parent = newRoot
- newRoot.Children.Insert(other)
- other.Parent = newRoot
- subjectTable[subject] = newRoot
- roots.Insert(newRoot)
- delete(roots, root)
- delete(roots, other)
- }
+ retThreads := make(Set[*ThreadedMessage], len(jwzThreads))
+ for inThread := range jwzThreads {
+ retThreads.Insert(convertMessage(inThread))
}
-
- // 6. Now you're done threading
- return roots
+ return retThreads, retMsgs
}
diff --git a/cmd/generate/mailstuff/thread_alg.go b/cmd/generate/mailstuff/thread_alg.go
new file mode 100644
index 0000000..1b351e9
--- /dev/null
+++ b/cmd/generate/mailstuff/thread_alg.go
@@ -0,0 +1,226 @@
+package mailstuff
+
+import (
+ "regexp"
+ "strings"
+)
+
+// https://www.jwz.org/doc/threading.html
+
+// TODO: See ./jwz.md for RFC 5256 changes we might want to bring in.
+
+// Definitions /////////////////////////////////////////////////////////////////
+
+type jwzContainer struct {
+ Message *jwzMessage
+ Parent *jwzContainer
+ Children Set[*jwzContainer]
+}
+
+type jwzMessage struct {
+ Subject string
+ ID jwzID
+ References []jwzID
+}
+
+type jwzID = MessageID //string
+
+func (ancestor *jwzContainer) IsAncestorOf(descendent *jwzContainer) bool {
+ if ancestor == descendent {
+ return true
+ }
+ for child := range ancestor.Children {
+ if child.IsAncestorOf(descendent) {
+ return true
+ }
+ }
+ return false
+}
+
+// The Algorithm ///////////////////////////////////////////////////////////////
+
+var jwzSubjectRE = regexp.MustCompile(`^(?:\s*[Rr][Ee](?:\[[0-9]+\])?:)*`)
+
+func jwzThreadMessages(msgs map[jwzID]*jwzMessage) Set[*jwzContainer] {
+ idTable := make(map[jwzID]*jwzContainer, len(msgs))
+
+ // 1. For each message
+ for _, msg := range msgs {
+ // A.
+ msgContainer := idTable[msg.ID]
+ if msgContainer != nil && msgContainer.Message == nil {
+ msgContainer.Message = msg
+ } else {
+ msgContainer = &jwzContainer{
+ Message: msg,
+ Children: make(Set[*jwzContainer]),
+ }
+ idTable[msg.ID] = msgContainer
+ }
+ // B.
+ for _, refID := range msg.References {
+ refContainer := idTable[refID]
+ if refContainer == nil {
+ refContainer = &jwzContainer{
+ Children: make(Set[*jwzContainer]),
+ }
+ idTable[refID] = refContainer
+ }
+ }
+ for i := 0; i+1 < len(msg.References); i++ {
+ parent := idTable[msg.References[i]]
+ child := idTable[msg.References[i+1]]
+ if child.Parent == nil && !parent.IsAncestorOf(child) && !child.IsAncestorOf(parent) {
+ parent.Children.Insert(child)
+ child.Parent = parent
+ }
+ }
+ // C.
+ if len(msg.References) == 0 {
+ if msgContainer.Parent != nil {
+ delete(msgContainer.Parent.Children, msgContainer)
+ }
+ msgContainer.Parent = nil
+ } else {
+ msgContainer.Parent = idTable[msg.References[len(msg.References)-1]]
+ msgContainer.Parent.Children.Insert(msgContainer)
+ }
+ }
+
+ // 2. Find the root Set
+ root := &jwzContainer{
+ Children: make(Set[*jwzContainer]),
+ }
+ for _, container := range idTable {
+ if container.Parent == nil {
+ container.Parent = root
+ root.Children.Insert(container)
+ }
+ }
+
+ // 3. Discard id_table
+ idTable = nil
+
+ // 4. Prune empty containers
+ var recurse func(*jwzContainer)
+ recurse = func(container *jwzContainer) {
+ // Recurse. This is a touch complicated because
+ // `recurse(child)` might insert into
+ // `container.Children`, and those insertions might
+ // not be emitted by the range loop
+ for visited := make(Set[*jwzContainer]); ; {
+ beforeSize := len(visited)
+ for child := range container.Children {
+ if visited.Has(child) {
+ continue
+ }
+ recurse(child)
+ visited.Insert(child)
+ }
+ if len(visited) == beforeSize {
+ break
+ }
+ }
+ if container.Parent == nil {
+ return
+ }
+ // Main.
+ if container.Message == nil {
+ if len(container.Children) == 0 { // A.
+ delete(container.Parent.Children, container)
+ } else { // B.
+ if len(container.Children) == 1 || container.Parent != root {
+ for child := range container.Children {
+ container.Parent.Children.Insert(child)
+ child.Parent = container.Parent
+ }
+ delete(container.Parent.Children, container)
+ }
+ }
+ }
+ }
+ recurse(root)
+
+ // 5. Group root Set by subject
+ // A.
+ subjectTable := make(map[string]*jwzContainer)
+ // B.
+ for this := range root.Children {
+ var subject string
+ if this.Message != nil {
+ subject = this.Message.Subject
+ } else {
+ subject = this.Children.PickOne().Message.Subject
+ }
+ prefix := jwzSubjectRE.FindString(subject)
+ subject = strings.TrimSpace(subject[len(prefix):])
+ if subject == "" {
+ continue
+ }
+ if other := subjectTable[subject]; other == nil {
+ subjectTable[subject] = this
+ } else if other.Message == nil {
+ subjectTable[subject] = this
+ } else if jwzSubjectRE.MatchString(other.Message.Subject) && prefix == "" {
+ subjectTable[subject] = this
+ }
+ }
+ // C.
+ for this := range root.Children {
+ var subject string
+ if this.Message != nil {
+ subject = this.Message.Subject
+ } else {
+ subject = this.Children.PickOne().Message.Subject
+ }
+ prefix := jwzSubjectRE.FindString(subject)
+ subject = strings.TrimSpace(subject[len(prefix):])
+
+ other := subjectTable[subject]
+ if other == nil || other == this {
+ continue
+ }
+
+ switch {
+ case this.Message == nil && other.Message == nil:
+ for child := range this.Children {
+ other.Children.Insert(child)
+ child.Parent = other
+ }
+ delete(root.Children, this)
+ case (this.Message == nil) != (other.Message == nil):
+ var empty, nonEmpty *jwzContainer
+ if this.Message == nil {
+ empty = this
+ nonEmpty = other
+ } else {
+ empty = other
+ nonEmpty = this
+ }
+ empty.Children.Insert(nonEmpty)
+ nonEmpty.Parent = empty
+ case other.Message != nil && !jwzSubjectRE.MatchString(other.Message.Subject) && prefix != "":
+ other.Children.Insert(this)
+ this.Parent = other
+ // skip the reverse of the above case--it happened implicitly
+ default:
+ newParent := &jwzContainer{
+ Children: make(Set[*jwzContainer], 2),
+ }
+ newParent.Children.Insert(this)
+ this.Parent = newParent
+ newParent.Children.Insert(other)
+ other.Parent = newParent
+ subjectTable[subject] = newParent
+ root.Children.Insert(newParent)
+ delete(root.Children, this)
+ delete(root.Children, other)
+ }
+ }
+
+ // 6. Now you're done threading
+ for child := range root.Children {
+ child.Parent = nil
+ }
+ return root.Children
+}