diff options
author | Luke T. Shumaker <lukeshu@lukeshu.com> | 2024-06-08 21:45:35 -0600 |
---|---|---|
committer | Luke T. Shumaker <lukeshu@lukeshu.com> | 2024-06-08 21:47:34 -0600 |
commit | 0bd9be7a0ba08fdecb065790f48d2704c88d22c0 (patch) | |
tree | 8460db4aef7edbb8ae30d17227fa468a4d08d811 /cmd | |
parent | 8cc4eb82796727f20accfce8d049f677e6210824 (diff) |
fix pipermail threading
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/generate/forge_pipermail.go | 32 |
1 files changed, 25 insertions, 7 deletions
diff --git a/cmd/generate/forge_pipermail.go b/cmd/generate/forge_pipermail.go index 56e7ef2..ccc58f0 100644 --- a/cmd/generate/forge_pipermail.go +++ b/cmd/generate/forge_pipermail.go @@ -1,6 +1,7 @@ package main import ( + "compress/gzip" "errors" "fmt" "net/mail" @@ -17,7 +18,7 @@ import ( var ( rePiperMailMessage = regexp.MustCompile(`^(https?://.*/pipermail/.*/)([0-4]{4}-(?:January|February|March|April|May|June|July|August|September|October|November|December))/([0-9]+)\.html$`) rePiperMailDate = regexp.MustCompile(`^\s*<I>([^<]+)</I>\s*$`) - rePiperMailReply = regexp.MustCompile(`^\s*<LINK REL="made" HREF="(.*)">\s$`) + rePiperMailReply = regexp.MustCompile(`^\s*<LINK REL="made" HREF="(.*)">\s*$`) ) type PiperMail struct{} @@ -109,7 +110,7 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) { htmlStr, err := httpGet(u, nil) if err != nil { - return time.Time{}, User{}, err + return time.Time{}, User{}, fmt.Errorf("could not fetch message: %w", err) } var msgid mailstuff.MessageID for _, line := range strings.Split(htmlStr, "\n") { @@ -131,13 +132,24 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) { for ym, mbox := uYM, []*mail.Message(nil); true; ym = p.nextMonth(ym) { lenBefore := p.threadLen(thread) - mboxStr, err := httpGet(uBase+ym+".txt.gz", nil) - if err != nil && (ym == uYM || !errors.Is(err, os.ErrNotExist)) { - return time.Time{}, User{}, err + mboxGzStr, err := httpGet(uBase+ym+".txt.gz", nil) + if err != nil { + if (ym == uYM || !errors.Is(err, os.ErrNotExist)) { + return time.Time{}, User{}, fmt.Errorf("could not fetch mbox for %s: %w", ym, err) + } + break + } + gzReader, err := gzip.NewReader(strings.NewReader(mboxGzStr)) + if err != nil { + return time.Time{}, User{}, fmt.Errorf("could not read mbox gz: %w", err) } - _mbox, err := mailstuff.ReadMBox(strings.NewReader(mboxStr)) + _mbox, err := mailstuff.ReadMBox(gzReader) if err != nil { - return time.Time{}, User{}, err + gzReader.Close() + return time.Time{}, User{}, fmt.Errorf("could not parse mbox: %w", err) + } + if err := gzReader.Close(); err != nil { + return time.Time{}, User{}, fmt.Errorf("close gz: %w", err) } mbox = append(mbox, _mbox...) _, messages := mailstuff.ThreadMessages(mbox) @@ -147,6 +159,9 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) { break } } + if thread == nil { + continue + } var retTime time.Time var retUser User @@ -163,6 +178,9 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) { } retUser.URL = "mailto:" + froms[0].Address } + for child := range msg.Children { + walk(child) + } } walk(thread) |