package main import ( "compress/gzip" "errors" "fmt" "net/mail" "net/url" "os" "regexp" "strconv" "strings" "time" "git.lukeshu.com/www/cmd/generate/mailstuff" ) var ( rePiperMailMessage = regexp.MustCompile(`^(https?://.*/pipermail/.*/)([0-4]{4}-(?:January|February|March|April|May|June|July|August|September|October|November|December))/([0-9]+)\.html$`) rePiperMailDate = regexp.MustCompile(`^\s*([^<]+)\s*$`) rePiperMailReply = regexp.MustCompile(`^\s*\s*$`) ) type PartPiperMail struct{} var _ Forge = PartPiperMail{} func (PartPiperMail) FetchStatus(urls []string) (string, error) { return "", nil } func (PartPiperMail) FetchSubmittedAt(urls []string) (time.Time, error) { for _, u := range urls { if !rePiperMailMessage.MatchString(u) { continue } htmlStr, err := httpGet(u, nil) if err != nil { return time.Time{}, err } for _, line := range strings.Split(htmlStr, "\n") { if m := rePiperMailDate.FindStringSubmatch(line); m != nil { return time.Parse(time.UnixDate, m[1]) } } } return time.Time{}, nil } func (PartPiperMail) nextMonth(ym string) string { yStr, mStr, ok := strings.Cut(ym, "-") if !ok { panic(fmt.Errorf("invalid year-month: %q", ym)) } switch mStr { case "January": return yStr + "-February" case "February": return yStr + "-March" case "March": return yStr + "-April" case "April": return yStr + "-May" case "May": return yStr + "-June" case "June": return yStr + "-July" case "July": return yStr + "-August" case "August": return yStr + "-September" case "September": return yStr + "-October" case "October": return yStr + "-November" case "November": return yStr + "-December" case "December": y, _ := strconv.Atoi(yStr) return fmt.Sprintf("%d-January", y+1) default: panic(fmt.Errorf("invalid year-month: %q", ym)) } } func (p PartPiperMail) threadLen(thread *mailstuff.ThreadedMessage) int { if thread == nil { return 0 } ret := 0 if thread.Message != nil { ret++ } for child := range thread.Children { ret += p.threadLen(child) } return ret } func (p PartPiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) { for _, u := range urls { m := rePiperMailMessage.FindStringSubmatch(u) if m == nil { continue } uBase := m[1] uYM := m[2] //uInt := m[3] htmlStr, err := httpGet(u, nil) if err != nil { return time.Time{}, User{}, fmt.Errorf("could not fetch message: %w", err) } var msgid mailstuff.MessageID for _, line := range strings.Split(htmlStr, "\n") { if m := rePiperMailReply.FindStringSubmatch(line); m != nil { ru, err := url.Parse(m[1]) if err != nil { continue } if msgid = mailstuff.MessageID(ru.Query().Get("In-Reply-To")); msgid != "" { break } } } if msgid == "" { continue } var thread *mailstuff.ThreadedMessage for ym, mbox := uYM, []*mail.Message(nil); true; ym = p.nextMonth(ym) { lenBefore := p.threadLen(thread) mboxGzStr, err := httpGet(uBase+ym+".txt.gz", nil) if err != nil { if ym == uYM || !errors.Is(err, os.ErrNotExist) { return time.Time{}, User{}, fmt.Errorf("could not fetch mbox for %s: %w", ym, err) } break } gzReader, err := gzip.NewReader(strings.NewReader(mboxGzStr)) if err != nil { return time.Time{}, User{}, fmt.Errorf("could not read mbox gz: %w", err) } _mbox, err := mailstuff.ReadMBox(gzReader) if err != nil { gzReader.Close() return time.Time{}, User{}, fmt.Errorf("could not parse mbox: %w", err) } if err := gzReader.Close(); err != nil { return time.Time{}, User{}, fmt.Errorf("close gz: %w", err) } mbox = append(mbox, _mbox...) _, messages := mailstuff.ThreadMessages(mbox) thread = messages[msgid] if p.threadLen(thread) == lenBefore { break } } if thread == nil { continue } var retTime time.Time var retUser User var walk func(*mailstuff.ThreadedMessage) walk = func(msg *mailstuff.ThreadedMessage) { date, dateErr := msg.Header.Date() froms, fromErr := msg.Header.AddressList("From") if dateErr == nil && fromErr == nil && len(froms) > 0 && (retTime.IsZero() || date.After(retTime)) { retTime = date retUser.Name = froms[0].Name if retUser.Name == "" { retUser.Name = froms[0].Address } retUser.URL = "mailto:" + froms[0].Address } for child := range msg.Children { walk(child) } } walk(thread) if !retTime.IsZero() { return retTime, retUser, nil } } return time.Time{}, User{}, nil }