diff options
35 files changed, 2737 insertions, 535 deletions
@@ -5,3 +5,5 @@ /public/imworkingon/index.html /public/imworkingon/imworkingon.css /public/imworkingon/imworkingon.css.map + +/public/posix/index.html @@ -1,12 +1,15 @@ +# build = not checked in to Git build/files = public/imworkingon/index.html build/files += public/imworkingon/imworkingon.css +build/files += public/posix/index.html +# generate = checked in to Git generate/files = public/sponsor/liberapay-icon.svg generate/files += public/sponsor/kofi-icon.png generate/files += public/sponsor/patreon-icon.svg generate/files += public/sponsor/github-icon.svg -################################################################################ +# boilerplate ################################################################## all: build .PHONY: all @@ -25,14 +28,20 @@ generate-clean: rm -f -- $(generate/files) .PHONY: generate generate-clean -################################################################################ +# build = not checked in to Git ################################################ public/imworkingon/index.html: FORCE - TZ=America/Denver go run ./cmd/generate + TZ=America/Denver go run ./cmd/gen-imworkingon .PRECIOUS: public/imworkingon/index.html public/imworkingon/imworkingon.css: public/imworkingon/imworkingon.scss sass $< $@ +public/posix/index.html: FORCE + mkdir -p $(@D) + go run ./cmd/gen-posix + +# generate = checked in to Git ################################################# + public/sponsor/liberapay-icon.svg: curl -L https://liberapay.com/assets/liberapay/icon-v2_black.svg >$@ public/sponsor/kofi-icon.png: diff --git a/cmd/generate/calendar.go b/cmd/gen-imworkingon/calendar.go index 29c3318..29c3318 100644 --- a/cmd/generate/calendar.go +++ b/cmd/gen-imworkingon/calendar.go diff --git a/cmd/gen-imworkingon/forge_forgejo.go b/cmd/gen-imworkingon/forge_forgejo.go new file mode 100644 index 0000000..34ec767 --- /dev/null +++ b/cmd/gen-imworkingon/forge_forgejo.go @@ -0,0 +1,185 @@ +package main + +import ( + "fmt" + "regexp" + "time" + + "git.lukeshu.com/www/lib/httpcache" +) + +var reForgejoPR = regexp.MustCompile(`^https://([^/]+)/([^/?#]+)/([^/?#]+)/pulls/([0-9]+)(?:\?[^#]*)?(?:#.*)?$`) + +type Forgejo struct { + Authority string +} + +var _ Forge = Forgejo{} + +func (f Forgejo) FetchStatus(urls []string) (string, error) { + return fetchPerURLStatus(urls, func(u string) (string, error) { + m := reForgejoPR.FindStringSubmatch(u) + if m == nil || m[1] != f.Authority { + return "", nil + } + authority := m[1] + user := m[2] + repo := m[3] + prnum := m[4] + + urlStr := "https://" + authority + "/api/v1/repos/" + user + "/" + repo + "/pulls/" + prnum + + var obj struct { + // State values are "open" and "closed". + State string `json:"state"` + Merged bool `json:"merged"` + MergeCommitSha string `json:"merge_commit_sha"` + } + if err := httpcache.GetJSON(urlStr, nil, &obj); err != nil { + return "", err + } + ret := obj.State + if obj.Merged { + ret = statusMerged + tag, err := getGitTagThatContainsAll("https://"+authority+"/"+user+"/"+repo, obj.MergeCommitSha) + if err != nil { + return "", err + } + if tag != "" { + ret = fmt.Sprintf(statusReleasedFmt, tag) + } + } + + return ret, nil + }) +} + +func (f Forgejo) FetchSubmittedAt(urls []string) (time.Time, error) { + return fetchPerURLSubmittedAt(urls, func(u string) (time.Time, error) { + m := reForgejoPR.FindStringSubmatch(u) + if m == nil || m[1] != f.Authority { + return time.Time{}, nil + } + authority := m[1] + user := m[2] + repo := m[3] + prnum := m[4] + + urlStr := "https://" + authority + "/api/v1/repos/" + user + "/" + repo + "/pulls/" + prnum + + var obj struct { + CreatedAt time.Time `json:"created_at"` + } + if err := httpcache.GetJSON(urlStr, nil, &obj); err != nil { + return time.Time{}, err + } + return obj.CreatedAt, nil + }) +} + +func (f Forgejo) FetchLastUpdated(urls []string) (time.Time, User, error) { + return fetchPerURLLastUpdated(urls, func(u string) (time.Time, User, error) { + m := reForgejoPR.FindStringSubmatch(u) + if m == nil || m[1] != f.Authority { + return time.Time{}, User{}, nil + } + authority := m[1] + user := m[2] + repo := m[3] + prnum := m[4] + + urlStr := "https://" + authority + "/api/v1/repos/" + user + "/" + repo + "/pulls/" + prnum + + var obj struct { + UpdatedAt time.Time `json:"updated_at"` + + CreatedAt time.Time `json:"created_at"` + CreatedBy struct { + Login string `json:"login"` + HTMLURL string `json:"html_url"` + } `json:"user"` + + MergedAt time.Time `json:"merged_at"` + MergedBy struct { + Login string `json:"login"` + HTMLURL string `json:"html_url"` + } `json:"merged_by"` + } + if err := httpcache.GetJSON(urlStr, nil, &obj); err != nil { + return time.Time{}, User{}, err + } + + retUpdatedAt := obj.UpdatedAt + var retUser User + + if retUser == (User{}) && withinOneSecond(obj.CreatedAt, retUpdatedAt) { + retUser.Name = obj.CreatedBy.Login + retUser.URL = obj.CreatedBy.HTMLURL + } + if retUser == (User{}) && withinOneSecond(obj.MergedAt, retUpdatedAt) { + retUser.Name = obj.MergedBy.Login + retUser.URL = obj.MergedBy.HTMLURL + } + if retUser == (User{}) { + // "normal" comments + var comments []struct { + UpdatedAt time.Time `json:"updated_at"` + User struct { + Login string `json:"login"` + HTMLURL string `json:"html_url"` + } `json:"user"` + } + if err := httpcache.GetPaginatedJSON("https://api.github.com/repos/"+user+"/"+repo+"/issues/"+prnum+"/comments", nil, &comments, githubPagination); err != nil { + return time.Time{}, User{}, err + } + for _, comment := range comments { + if withinOneSecond(comment.UpdatedAt, retUpdatedAt) { + retUser.Name = comment.User.Login + retUser.URL = comment.User.HTMLURL + break + } + } + } + if retUser == (User{}) { + // comments on a specific part of the diff + var reviewComments []struct { + UpdatedAt time.Time `json:"updated_at"` + User struct { + Login string `json:"login"` + HTMLURL string `json:"html_url"` + } `json:"user"` + } + if err := httpcache.GetPaginatedJSON("https://api.github.com/repos/"+user+"/"+repo+"/pulls/"+prnum+"/comments", nil, &reviewComments, githubPagination); err != nil { + return time.Time{}, User{}, err + } + for _, comment := range reviewComments { + if withinOneSecond(comment.UpdatedAt, retUpdatedAt) { + retUser.Name = comment.User.Login + retUser.URL = comment.User.HTMLURL + break + } + } + } + if retUser == (User{}) { + var events []struct { + CreatedAt time.Time `json:"created_at"` + Actor struct { + Login string `json:"login"` + HTMLURL string `json:"html_url"` + } `json:"actor"` + } + if err := httpcache.GetJSON("https://api.github.com/repos/"+user+"/"+repo+"/issues/"+prnum+"/events", nil, &events); err != nil { + return time.Time{}, User{}, err + } + for _, event := range events { + if withinOneSecond(event.CreatedAt, retUpdatedAt) { + retUser.Name = event.Actor.Login + retUser.URL = event.Actor.HTMLURL + break + } + } + } + + return retUpdatedAt, retUser, nil + }) +} diff --git a/cmd/gen-imworkingon/forge_gerrit.go b/cmd/gen-imworkingon/forge_gerrit.go new file mode 100644 index 0000000..05f0386 --- /dev/null +++ b/cmd/gen-imworkingon/forge_gerrit.go @@ -0,0 +1,160 @@ +package main + +import ( + "encoding" + "encoding/json" + "fmt" + "net/url" + "regexp" + "strings" + "time" + + "git.lukeshu.com/www/lib/httpcache" +) + +// httpGetGerritJSON is like [httpcache.GetJSON], but +// https://gerrit-review.googlesource.com/Documentation/rest-api.html#output +func httpGetGerritJSON(u string, hdr map[string]string, out any) error { + str, err := httpcache.Get(u, hdr) + if err != nil { + return err + } + if _, body, ok := strings.Cut(str, "\n"); ok { + str = body + } + return json.Unmarshal([]byte(str), out) +} + +const GerritTimeFormat = "2006-01-02 15:04:05.000000000" + +type GerritTime struct { + Val time.Time +} + +var ( + _ fmt.Stringer = GerritTime{} + _ encoding.TextMarshaler = GerritTime{} + _ encoding.TextUnmarshaler = (*GerritTime)(nil) +) + +// String implements [fmt.Stringer]. +func (t GerritTime) String() string { + return t.Val.Format(GerritTimeFormat) +} + +// MarshalText implements [encoding.TextMarshaler]. +func (t GerritTime) MarshalText() ([]byte, error) { + return []byte(t.String()), nil +} + +// UnmarshalText implements [encoding.TextUnmarshaler]. +func (t *GerritTime) UnmarshalText(data []byte) error { + val, err := time.Parse(GerritTimeFormat, string(data)) + if err != nil { + return err + } + t.Val = val + return nil +} + +type Gerrit struct{} + +var _ Forge = Gerrit{} + +var reGoogleGerritCL = regexp.MustCompile(`https://([a-z]+-review\.googlesource\.com)/c/([^?#]+)/\+/([0-9]+)(?:\?[^#]*)?(?:#.*)?$`) + +func (Gerrit) FetchStatus(urls []string) (string, error) { + return fetchPerURLStatus(urls, func(u string) (string, error) { + m := reGoogleGerritCL.FindStringSubmatch(u) + if m == nil { + return "", nil + } + authority := m[1] + projectID := m[2] + changeID := m[3] + + urlStr := "https://" + authority + "/changes/" + url.PathEscape(projectID) + "~" + changeID + "?o=MESSAGES&o=DETAILED_ACCOUNTS" + + var obj struct { + Status string `json:"status"` + } + if err := httpGetGerritJSON(urlStr, nil, &obj); err != nil { + return "", err + } + // https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#change-info + switch obj.Status { + case "NEW": + return "open", nil + case "MERGED": + return "merged", nil + case "ABANDONED": + return "closed", nil + } + return "", nil + }) +} + +func (Gerrit) FetchSubmittedAt(urls []string) (time.Time, error) { + return fetchPerURLSubmittedAt(urls, func(u string) (time.Time, error) { + m := reGoogleGerritCL.FindStringSubmatch(u) + if m == nil { + return time.Time{}, nil + } + authority := m[1] + projectID := m[2] + changeID := m[3] + + urlStr := "https://" + authority + "/changes/" + url.PathEscape(projectID) + "~" + changeID + "?o=MESSAGES&o=DETAILED_ACCOUNTS" + + var obj struct { + Created GerritTime `json:"created"` + } + if err := httpGetGerritJSON(urlStr, nil, &obj); err != nil { + return time.Time{}, err + } + return obj.Created.Val, nil + }) +} + +func (Gerrit) FetchLastUpdated(urls []string) (time.Time, User, error) { + return fetchPerURLLastUpdated(urls, func(u string) (time.Time, User, error) { + m := reGoogleGerritCL.FindStringSubmatch(u) + if m == nil { + return time.Time{}, User{}, nil + } + authority := m[1] + projectID := m[2] + changeID := m[3] + + urlStr := "https://" + authority + "/changes/" + url.PathEscape(projectID) + "~" + changeID + "?o=MESSAGES&o=DETAILED_ACCOUNTS" + + var obj struct { + Updated GerritTime `json:"updated"` + Messages []struct { + Author struct { + AccountID int `json:"_account_id"` + Name string `json:"name"` + DisplayName string `json:"display_name"` + } `json:"author"` + Date GerritTime `json:"date"` + } `json:"messages"` + } + if err := httpGetGerritJSON(urlStr, nil, &obj); err != nil { + return time.Time{}, User{}, err + } + retUpdatedAt := obj.Updated.Val + var retUser User + for _, message := range obj.Messages { + if withinOneSecond(message.Date.Val, retUpdatedAt) { + if message.Author.DisplayName != "" { + retUser.Name = message.Author.DisplayName + } else { + retUser.Name = message.Author.Name + } + retUser.URL = fmt.Sprintf("https://%s/dashboard/%d", authority, message.Author.AccountID) + break + } + } + return retUpdatedAt, retUser, nil + }) +} diff --git a/cmd/gen-imworkingon/forge_github.go b/cmd/gen-imworkingon/forge_github.go new file mode 100644 index 0000000..b657ad7 --- /dev/null +++ b/cmd/gen-imworkingon/forge_github.go @@ -0,0 +1,208 @@ +package main + +import ( + "fmt" + "net/url" + "regexp" + "time" + + "git.lukeshu.com/www/lib/httpcache" +) + +var reGitHubPR = regexp.MustCompile(`^https://github\.com/([^/?#]+)/([^/?#]+)/pull/([0-9]+)(?:\?[^#]*)?(?:#.*)?$`) + +func githubPagination(i int) url.Values { + params := make(url.Values) + params.Set("page", fmt.Sprintf("%v", i+1)) + return params +} + +type GitHub struct{} + +var _ Forge = GitHub{} + +func (GitHub) FetchStatus(urls []string) (string, error) { + for _, u := range urls { + if reGoogleGerritCL.MatchString(u) { + return "", nil + } + } + return fetchPerURLStatus(urls, func(u string) (string, error) { + m := reGitHubPR.FindStringSubmatch(u) + if m == nil { + return "", nil + } + user := m[1] + repo := m[2] + prnum := m[3] + + urlStr := "https://api.github.com/repos/" + user + "/" + repo + "/pulls/" + prnum + + var obj struct { + // State values are "open" and "closed". + State string `json:"state"` + Merged bool `json:"merged"` + MergeCommitSha string `json:"merge_commit_sha"` + } + if err := httpcache.GetJSON(urlStr, nil, &obj); err != nil { + return "", err + } + ret := obj.State + if obj.Merged { + ret = statusMerged + tag, err := getGitTagThatContainsAll("https://github.com/"+user+"/"+repo, obj.MergeCommitSha) + if err != nil { + return "", err + } + if tag != "" { + ret = fmt.Sprintf(statusReleasedFmt, tag) + } + } else if obj.State == "closed" { + var mergeCommits []string + for _, u := range urls { + if m := reGitHubCommit.FindStringSubmatch(u); m != nil && m[1] == user && m[2] == repo { + mergeCommits = append(mergeCommits, m[3]) + } + } + tag, err := getGitTagThatContainsAll("https://github.com/"+user+"/"+repo, mergeCommits...) + if err == nil && tag != "" { + ret = fmt.Sprintf(statusReleasedFmt, tag) + } + } + + return ret, nil + }) +} + +func (GitHub) FetchSubmittedAt(urls []string) (time.Time, error) { + return fetchPerURLSubmittedAt(urls, func(u string) (time.Time, error) { + m := reGitHubPR.FindStringSubmatch(u) + if m == nil { + return time.Time{}, nil + } + user := m[1] + repo := m[2] + prnum := m[3] + + urlStr := "https://api.github.com/repos/" + user + "/" + repo + "/pulls/" + prnum + + var obj struct { + CreatedAt time.Time `json:"created_at"` + } + if err := httpcache.GetJSON(urlStr, nil, &obj); err != nil { + return time.Time{}, err + } + return obj.CreatedAt, nil + }) +} + +func (GitHub) FetchLastUpdated(urls []string) (time.Time, User, error) { + for _, u := range urls { + if reGoogleGerritCL.MatchString(u) { + return time.Time{}, User{}, nil + } + } + return fetchPerURLLastUpdated(urls, func(u string) (time.Time, User, error) { + m := reGitHubPR.FindStringSubmatch(u) + if m == nil { + return time.Time{}, User{}, nil + } + user := m[1] + repo := m[2] + prnum := m[3] + + urlStr := "https://api.github.com/repos/" + user + "/" + repo + "/pulls/" + prnum + + var obj struct { + UpdatedAt time.Time `json:"updated_at"` + + CreatedAt time.Time `json:"created_at"` + CreatedBy struct { + Login string `json:"login"` + HTMLURL string `json:"html_url"` + } `json:"user"` + + MergedAt time.Time `json:"merged_at"` + MergedBy struct { + Login string `json:"login"` + HTMLURL string `json:"html_url"` + } `json:"merged_by"` + } + if err := httpcache.GetJSON(urlStr, nil, &obj); err != nil { + return time.Time{}, User{}, err + } + + retUpdatedAt := obj.UpdatedAt + var retUser User + + if retUser == (User{}) && withinOneSecond(obj.CreatedAt, retUpdatedAt) { + retUser.Name = obj.CreatedBy.Login + retUser.URL = obj.CreatedBy.HTMLURL + } + if retUser == (User{}) && withinOneSecond(obj.MergedAt, retUpdatedAt) { + retUser.Name = obj.MergedBy.Login + retUser.URL = obj.MergedBy.HTMLURL + } + if retUser == (User{}) { + // "normal" comments + var comments []struct { + UpdatedAt time.Time `json:"updated_at"` + User struct { + Login string `json:"login"` + HTMLURL string `json:"html_url"` + } `json:"user"` + } + if err := httpcache.GetPaginatedJSON("https://api.github.com/repos/"+user+"/"+repo+"/issues/"+prnum+"/comments", nil, &comments, githubPagination); err != nil { + return time.Time{}, User{}, err + } + for _, comment := range comments { + if withinOneSecond(comment.UpdatedAt, retUpdatedAt) { + retUser.Name = comment.User.Login + retUser.URL = comment.User.HTMLURL + break + } + } + } + if retUser == (User{}) { + // comments on a specific part of the diff + var reviewComments []struct { + UpdatedAt time.Time `json:"updated_at"` + User struct { + Login string `json:"login"` + HTMLURL string `json:"html_url"` + } `json:"user"` + } + if err := httpcache.GetPaginatedJSON("https://api.github.com/repos/"+user+"/"+repo+"/pulls/"+prnum+"/comments", nil, &reviewComments, githubPagination); err != nil { + return time.Time{}, User{}, err + } + for _, comment := range reviewComments { + if withinOneSecond(comment.UpdatedAt, retUpdatedAt) { + retUser.Name = comment.User.Login + retUser.URL = comment.User.HTMLURL + break + } + } + } + if retUser == (User{}) { + var events []struct { + CreatedAt time.Time `json:"created_at"` + Actor struct { + Login string `json:"login"` + HTMLURL string `json:"html_url"` + } `json:"actor"` + } + if err := httpcache.GetJSON("https://api.github.com/repos/"+user+"/"+repo+"/issues/"+prnum+"/events", nil, &events); err != nil { + return time.Time{}, User{}, err + } + for _, event := range events { + if withinOneSecond(event.CreatedAt, retUpdatedAt) { + retUser.Name = event.Actor.Login + retUser.URL = event.Actor.HTMLURL + break + } + } + } + + return retUpdatedAt, retUser, nil + }) +} diff --git a/cmd/gen-imworkingon/forge_gitlab.go b/cmd/gen-imworkingon/forge_gitlab.go new file mode 100644 index 0000000..84a2285 --- /dev/null +++ b/cmd/gen-imworkingon/forge_gitlab.go @@ -0,0 +1,173 @@ +package main + +import ( + "fmt" + "net/url" + "regexp" + "time" + + "git.lukeshu.com/www/lib/httpcache" +) + +var reGitLabMR = regexp.MustCompile(`^https://([^/]+)/([^?#]+)/-/merge_requests/([0-9]+)(?:\?[^#]*)?(?:#.*)?$`) + +type GitLab struct{} + +var _ Forge = GitLab{} + +func (GitLab) FetchStatus(urls []string) (string, error) { + for _, u := range urls { + m := reGitLabMR.FindStringSubmatch(u) + if m == nil { + continue + } + authority := m[1] + projectID := m[2] + mrnum := m[3] + + urlStr := "https://" + authority + "/api/v4/projects/" + url.QueryEscape(projectID) + "/merge_requests/" + mrnum + + var obj struct { + // State values are "opened", "closed", "locked", and "merged". + State string `json:"state"` + MergeCommitSha string `json:"merge_commit_sha"` + SquashCommitSha string `json:"squash_commit_sha"` + } + if err := httpcache.GetJSON(urlStr, nil, &obj); err != nil { + return "", err + } + + ret := obj.State + if ret == "opened" { + ret = statusOpen + } + + if ret == "merged" { + ret = statusMerged + var mergeCommit string + if obj.MergeCommitSha != "" { + mergeCommit = obj.MergeCommitSha + } + if obj.SquashCommitSha != "" { + mergeCommit = obj.SquashCommitSha + } + if mergeCommit != "" { + tag, err := getGitTagThatContainsAll("https://"+authority+"/"+projectID+".git", mergeCommit) + if err != nil { + return "", err + } + if tag != "" { + ret = fmt.Sprintf(statusReleasedFmt, tag) + } + } + } + + return ret, nil + } + return "", nil +} + +func (GitLab) FetchSubmittedAt(urls []string) (time.Time, error) { + for _, u := range urls { + m := reGitLabMR.FindStringSubmatch(u) + if m == nil { + continue + } + authority := m[1] + projectID := m[2] + mrnum := m[3] + + urlStr := "https://" + authority + "/api/v4/projects/" + url.QueryEscape(projectID) + "/merge_requests/" + mrnum + + var obj struct { + CreatedAt time.Time `json:"created_at"` + } + if err := httpcache.GetJSON(urlStr, nil, &obj); err != nil { + return time.Time{}, err + } + return obj.CreatedAt, nil + } + return time.Time{}, nil +} + +func (GitLab) FetchLastUpdated(urls []string) (time.Time, User, error) { + for _, u := range urls { + m := reGitLabMR.FindStringSubmatch(u) + if m == nil { + continue + } + authority := m[1] + projectID := m[2] + mrnum := m[3] + + urlStr := "https://" + authority + "/api/v4/projects/" + url.QueryEscape(projectID) + "/merge_requests/" + mrnum + + var obj struct { + ID int `json:"id"` + + UpdatedAt time.Time `json:"updated_at"` + + CreatedAt time.Time `json:"created_at"` + CreatedBy struct { + Username string `json:"username"` + WebURL string `json:"web_url"` + } `json:"author"` + + MergedAt time.Time `json:"merged_at"` + MergedBy struct { + Username string `json:"username"` + WebURL string `json:"web_url"` + } `json:"merged_by"` + } + if err := httpcache.GetJSON(urlStr, nil, &obj); err != nil { + return time.Time{}, User{}, err + } + + retUpdatedAt := obj.UpdatedAt + var retUser User + + if retUser == (User{}) && withinOneSecond(obj.CreatedAt, retUpdatedAt) { + retUser.Name = obj.CreatedBy.Username + retUser.URL = obj.CreatedBy.WebURL + } + if retUser == (User{}) && withinOneSecond(obj.MergedAt, retUpdatedAt) { + retUser.Name = obj.MergedBy.Username + retUser.URL = obj.MergedBy.WebURL + } + if retUser == (User{}) { + var notes struct { + Notes []struct { + UpdatedAt time.Time `json:"updated_at"` + Author struct { + Username string `json:"username"` + Path string `json:"path"` + } `json:"author"` + ResolvedAt time.Time `json:"resolved_at"` + ResolvedBy struct { + ResolvedAt time.Time `json:"resolved_at"` + Username string `json:"username"` + Path string `json:"path"` + } `json:"resolved_by"` + } `json:"notes"` + } + if err := httpcache.GetJSON(fmt.Sprintf("https://%s/%s/noteable/merge_request/%d/notes", authority, projectID, obj.ID), map[string]string{"X-Last-Fetched-At": "0"}, ¬es); err != nil { + return time.Time{}, User{}, err + } + for _, note := range notes.Notes { + if withinOneSecond(note.UpdatedAt, retUpdatedAt) { + if withinOneSecond(note.UpdatedAt, note.ResolvedAt) { + retUser.Name = note.ResolvedBy.Username + retUser.URL = "https://" + authority + note.ResolvedBy.Path + } else { + retUser.Name = note.Author.Username + retUser.URL = "https://" + authority + note.Author.Path + } + break + } + } + } + + return retUpdatedAt, retUser, nil + } + return time.Time{}, User{}, nil +} diff --git a/cmd/gen-imworkingon/forge_part_git.go b/cmd/gen-imworkingon/forge_part_git.go new file mode 100644 index 0000000..5175750 --- /dev/null +++ b/cmd/gen-imworkingon/forge_part_git.go @@ -0,0 +1,82 @@ +package main + +import ( + "fmt" + "regexp" + "time" + + "git.lukeshu.com/www/lib/httpcache" +) + +var reGitHubCommit = regexp.MustCompile(`^https://github\.com/([^/?#]+)/([^/?#]+)/commit/([0-9a-f]+)(?:\?[^#]*)?(?:#.*)?$`) + +type PartGit struct{} + +var _ Forge = PartGit{} + +func (PartGit) FetchStatus(urls []string) (string, error) { + var gitURL string + var gitCommits []string + for _, u := range urls { + if m := reGitHubCommit.FindStringSubmatch(u); m != nil { + user := m[1] + repo := m[2] + hash := m[3] + + gitURL = "https://github.com/" + user + "/" + repo + gitCommits = append(gitCommits, hash) + } + } + if len(gitCommits) == 0 { + return "", nil + } + ret := statusMerged + tag, err := getGitTagThatContainsAll(gitURL, gitCommits...) + if err != nil { + return "", err + } + if tag != "" { + ret = fmt.Sprintf(statusReleasedFmt, tag) + } + return ret, nil +} + +func (PartGit) FetchSubmittedAt(urls []string) (time.Time, error) { + return time.Time{}, nil +} + +func (PartGit) FetchLastUpdated(urls []string) (time.Time, User, error) { + var ret time.Time + for _, u := range urls { + if m := reGitHubCommit.FindStringSubmatch(u); m != nil { + user := m[1] + repo := m[2] + hash := m[3] + + urlStr := "https://api.github.com/repos/" + user + "/" + repo + "/commits/" + hash + var obj struct { + Commit struct { + Author struct { + Date time.Time `json:"date"` + } `json:"author"` + Committer struct { + Date time.Time `json:"date"` + } `json:"committer"` + } `json:"commit"` + } + if err := httpcache.GetJSON(urlStr, nil, &obj); err != nil { + return time.Time{}, User{}, err + } + if obj.Commit.Author.Date.After(ret) { + ret = obj.Commit.Author.Date + } + if obj.Commit.Committer.Date.After(ret) { + ret = obj.Commit.Committer.Date + } + } + } + if ret.IsZero() { + return time.Time{}, User{}, nil + } + return time.Time{}, User{}, nil +} diff --git a/cmd/gen-imworkingon/forge_part_pipermail.go b/cmd/gen-imworkingon/forge_part_pipermail.go new file mode 100644 index 0000000..9db498b --- /dev/null +++ b/cmd/gen-imworkingon/forge_part_pipermail.go @@ -0,0 +1,193 @@ +package main + +import ( + "compress/gzip" + "errors" + "fmt" + "net/mail" + "net/url" + "os" + "regexp" + "strconv" + "strings" + "time" + + "git.lukeshu.com/www/lib/httpcache" + "git.lukeshu.com/www/lib/mailstuff" +) + +var ( + rePiperMailMessage = regexp.MustCompile(`^(https?://.*/pipermail/.*/)([0-4]{4}-(?:January|February|March|April|May|June|July|August|September|October|November|December))/([0-9]+)\.html$`) + rePiperMailDate = regexp.MustCompile(`^\s*<I>([^<]+)</I>\s*$`) + rePiperMailReply = regexp.MustCompile(`^\s*<LINK REL="made" HREF="(.*)">\s*$`) +) + +type PartPiperMail struct{} + +var _ Forge = PartPiperMail{} + +func (PartPiperMail) FetchStatus(urls []string) (string, error) { + return "", nil +} + +func (PartPiperMail) FetchSubmittedAt(urls []string) (time.Time, error) { + for _, u := range urls { + if !rePiperMailMessage.MatchString(u) { + continue + } + htmlStr, err := httpcache.Get(u, nil) + if err != nil { + return time.Time{}, err + } + for _, line := range strings.Split(htmlStr, "\n") { + if m := rePiperMailDate.FindStringSubmatch(line); m != nil { + return time.Parse(time.UnixDate, m[1]) + } + } + } + return time.Time{}, nil +} + +func (PartPiperMail) nextMonth(ym string) string { + yStr, mStr, ok := strings.Cut(ym, "-") + if !ok { + panic(fmt.Errorf("invalid year-month: %q", ym)) + } + switch mStr { + case "January": + return yStr + "-February" + case "February": + return yStr + "-March" + case "March": + return yStr + "-April" + case "April": + return yStr + "-May" + case "May": + return yStr + "-June" + case "June": + return yStr + "-July" + case "July": + return yStr + "-August" + case "August": + return yStr + "-September" + case "September": + return yStr + "-October" + case "October": + return yStr + "-November" + case "November": + return yStr + "-December" + case "December": + y, _ := strconv.Atoi(yStr) + return fmt.Sprintf("%d-January", y+1) + default: + panic(fmt.Errorf("invalid year-month: %q", ym)) + } +} + +func (p PartPiperMail) threadLen(thread *mailstuff.ThreadedMessage) int { + if thread == nil { + return 0 + } + + ret := 0 + if thread.Message != nil { + ret++ + } + for child := range thread.Children { + ret += p.threadLen(child) + } + return ret +} + +func (p PartPiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) { + for _, u := range urls { + m := rePiperMailMessage.FindStringSubmatch(u) + if m == nil { + continue + } + uBase := m[1] + uYM := m[2] + //uInt := m[3] + + htmlStr, err := httpcache.Get(u, nil) + if err != nil { + return time.Time{}, User{}, fmt.Errorf("could not fetch message: %w", err) + } + var msgid mailstuff.MessageID + for _, line := range strings.Split(htmlStr, "\n") { + if m := rePiperMailReply.FindStringSubmatch(line); m != nil { + ru, err := url.Parse(m[1]) + if err != nil { + continue + } + if msgid = mailstuff.MessageID(ru.Query().Get("In-Reply-To")); msgid != "" { + break + } + } + } + if msgid == "" { + continue + } + + var thread *mailstuff.ThreadedMessage + for ym, mbox := uYM, []*mail.Message(nil); true; ym = p.nextMonth(ym) { + lenBefore := p.threadLen(thread) + + mboxGzStr, err := httpcache.Get(uBase+ym+".txt.gz", nil) + if err != nil { + if ym == uYM || !errors.Is(err, os.ErrNotExist) { + return time.Time{}, User{}, fmt.Errorf("could not fetch mbox for %s: %w", ym, err) + } + break + } + gzReader, err := gzip.NewReader(strings.NewReader(mboxGzStr)) + if err != nil { + return time.Time{}, User{}, fmt.Errorf("could not read mbox gz: %w", err) + } + _mbox, err := mailstuff.ReadMBox(gzReader) + if err != nil { + gzReader.Close() + return time.Time{}, User{}, fmt.Errorf("could not parse mbox: %w", err) + } + if err := gzReader.Close(); err != nil { + return time.Time{}, User{}, fmt.Errorf("close gz: %w", err) + } + mbox = append(mbox, _mbox...) + _, messages := mailstuff.ThreadMessages(mbox) + thread = messages[msgid] + + if p.threadLen(thread) == lenBefore { + break + } + } + if thread == nil { + continue + } + + var retTime time.Time + var retUser User + + var walk func(*mailstuff.ThreadedMessage) + walk = func(msg *mailstuff.ThreadedMessage) { + date, dateErr := msg.Header.Date() + froms, fromErr := msg.Header.AddressList("From") + if dateErr == nil && fromErr == nil && len(froms) > 0 && (retTime.IsZero() || date.After(retTime)) { + retTime = date + retUser.Name = froms[0].Name + if retUser.Name == "" { + retUser.Name = froms[0].Address + } + retUser.URL = "mailto:" + froms[0].Address + } + for child := range msg.Children { + walk(child) + } + } + walk(thread) + + if !retTime.IsZero() { + return retTime, retUser, nil + } + } + return time.Time{}, User{}, nil +} diff --git a/cmd/generate/gitcache.go b/cmd/gen-imworkingon/gitcache.go index 7caf024..844408d 100644 --- a/cmd/generate/gitcache.go +++ b/cmd/gen-imworkingon/gitcache.go @@ -5,6 +5,7 @@ import ( "os" "os/exec" "strings" + "time" "git.mothstuff.lol/lukeshu/eclipse/lib/gitcache" ) @@ -12,7 +13,8 @@ import ( var gitFetched = map[string]struct{}{} var gitCache = &gitcache.Cache{ - Dir: ".git-cache", + Dir: ".git-cache", + MinPeriod: 1 * time.Hour, } func withGit(u string, fn func(dir string) error) error { diff --git a/cmd/generate/imworkingon.html.tmpl b/cmd/gen-imworkingon/imworkingon.html.tmpl index 85a56e1..415a252 100644 --- a/cmd/generate/imworkingon.html.tmpl +++ b/cmd/gen-imworkingon/imworkingon.html.tmpl @@ -15,15 +15,19 @@ <nav> <p>This page provides several views into what I'm doing to improve the ecosystem:</p> <ol> - <li><a href="#tags">Top themes</a></li> + <!--<li><a href="#tags">Top themes</a></li>--> <li><a href="#contribs-pending">In-progress work</a></li> <li><a href="#contribs-completed">Completed work</a></li> <li><a href="#standups">Daily statuses</a></li> </ol> </nav> + + <p>The "In-progress work" and "Completed work" sections do <em>not</em> include routine maintenance on <a href="https://parabola.nu">Parabola GNU/Linux-libre</a>, which is also a solid chunk of what I do.</p> + <p>If you find this work valuable, please consider <a class="em" href="../sponsor/">sponsoring me</a>.</p> </section> + <!-- <section id="tags"> <h2>Top themes <a href="#tags">🔗</a></h2> {{- range $tagName, $tagInfo := .Tags }} @@ -33,11 +37,12 @@ </article> {{- end }} </section> + --> {{- define "contrib" }} {{ $contrib := . }} {{ $upstream := $contrib | getUpstream }} - <article class="contrib {{ $contrib.StatusClass }}-contrib"> + <article class="contrib {{ $contrib.StatusClass }}-contrib" {{- if $contrib.ID }}id="contrib-{{ $contrib.ID }}"{{ end }}> <div class="contrib-upstream-name"><a class="em" href="{{ index $upstream.URLs 0 }}">{{ $upstream.Name }}</a></div> <div class="contrib-upstream-desc">{{ $upstream.Desc | md2html }}</div> <div class="contrib-urls"> diff --git a/cmd/generate/main.go b/cmd/gen-imworkingon/main.go index e322e5c..c0c9723 100644 --- a/cmd/generate/main.go +++ b/cmd/gen-imworkingon/main.go @@ -4,15 +4,17 @@ import ( "bytes" _ "embed" "fmt" + "html/template" "os" "reflect" + "slices" "sort" "strings" "time" - "html/template" - "github.com/yuin/goldmark" + + "git.lukeshu.com/www/lib/httpcache" ) func MarkdownToHTML(md string) (template.HTML, error) { @@ -23,10 +25,6 @@ func MarkdownToHTML(md string) (template.HTML, error) { return template.HTML(html.String()), nil } -var githubProjects = map[string]string{ - "flori/json": "ruby-json", -} - func main() { if err := mainWithError(); err != nil { fmt.Fprintf(os.Stderr, "%s: error: %v\n", os.Args[0], err) @@ -41,10 +39,25 @@ var timeTagTmpl = template.Must(template.New("time.tag.tmpl"). Parse(`<time datetime="{{ .Machine }}" title="{{ .HumanVerbose }}">{{ .HumanPretty }}</time>`)) func mainWithError() error { - standups, err := ReadStandups("https://fosstodon.org", "lukeshu") + httpcache.UserAgent = "https://git.lukeshu.com/www/tree/cmd/gen-imworkingon" + + standups, err := ReadStandups("https://social.coop", "lukeshu") + if err != nil { + return err + } + _standups, err := ReadStandups("https://fosstodon.org", "lukeshu") if err != nil { return err } + standups = append(standups, _standups...) + standupIgnoreList := []string{ + "https://fosstodon.org/@lukeshu/112198267818432116", + "https://fosstodon.org/@lukeshu/112198241414760456", + } + standups = slices.DeleteFunc(standups, func(status *MastodonStatus) bool { + return slices.Contains(standupIgnoreList, status.URL) + }) + contribs, err := ReadContribs("imworkingon/contribs.yml") if err != nil { return err @@ -125,12 +138,34 @@ func mainWithError() error { } } } + // Now try to synthesize an upstream. if m := reGitHubPR.FindStringSubmatch(c.URLs[0]); m != nil { user := m[1] repo := m[2] - return Upstream{URLs: []string{c.URLs[0]}, Name: user + "/" + repo} + return Upstream{ + URLs: []string{"https://github.com/" + user + "/" + repo}, + Name: user + "/" + repo, + } + } + if m := reGitLabMR.FindStringSubmatch(c.URLs[0]); m != nil { + authority := m[1] + projectID := m[2] + if authority == "gitlab.archlinux.org" && strings.HasPrefix(projectID, "archlinux/packaging/packages/") { + return Upstream{ + URLs: []string{"https://" + authority + "/" + projectID}, + Name: strings.Replace(projectID, "/packages/", "/", 1), + } + } + return Upstream{ + URLs: []string{"https://" + authority + "/" + projectID}, + Name: projectID, + } + } + // :( + return Upstream{ + URLs: []string{c.URLs[0]}, + Name: "???", } - return Upstream{URLs: []string{c.URLs[0]}, Name: "???"} }, }). Parse(htmlTmplStr)) diff --git a/cmd/gen-imworkingon/src_contribs.go b/cmd/gen-imworkingon/src_contribs.go new file mode 100644 index 0000000..5694156 --- /dev/null +++ b/cmd/gen-imworkingon/src_contribs.go @@ -0,0 +1,223 @@ +package main + +import ( + "fmt" + "os" + "strings" + "time" + + "sigs.k8s.io/yaml" +) + +type User struct { + Name string `json:"name"` + URL string `json:"url"` +} + +type Contribution struct { + ID string + URLs []string `json:"urls"` + Tags []string `json:"tags"` + SponsoredBy string `json:"sponsored-by"` + Desc string `json:"desc"` + + SubmittedAt time.Time `json:"submitted-at"` + LastUpdatedAt time.Time `json:"last-updated-at"` + LastUpdatedBy User `json:"last-updated-by"` + Status string `json:"status"` + + StatusClass string `json:"-"` +} + +func ReadContribs(filename string) ([]Contribution, error) { + bs, err := os.ReadFile(filename) + if err != nil { + return nil, fmt.Errorf("contribs: %q: %w", filename, err) + } + var ret []Contribution + if err := yaml.UnmarshalStrict(bs, &ret); err != nil { + return nil, fmt.Errorf("contribs: %q: %w", filename, err) + } + for i := range ret { + contrib := ret[i] + if err := contrib.Fill(); err != nil { + return nil, fmt.Errorf("contribs: %q: %w", filename, err) + } + ret[i] = contrib + } + return ret, nil +} + +func (c *Contribution) Fill() error { + var err error + if c.SubmittedAt.IsZero() { + c.SubmittedAt, err = c.fetchSubmittedAt() + if err != nil { + return err + } + } + if c.LastUpdatedAt.IsZero() { + c.LastUpdatedAt, c.LastUpdatedBy, err = c.fetchLastUpdated() + if err != nil { + return err + } + } + if c.Status == "" { + c.Status, err = c.fetchStatus() + if err != nil { + return err + } + } + c.StatusClass, err = classifyStatus(c.Status) + if err != nil { + return err + } + for _, u := range c.URLs { + if m := reGoogleGerritCL.FindStringSubmatch(u); m != nil && m[1] == "go-review.googlesource.com" { + c.URLs = append(c.URLs, "https://golang.org/cl/"+m[3]) + } + } + return nil +} + +func classifyStatus(status string) (string, error) { + switch { + case strings.Contains(status, "released") || strings.Contains(status, "deployed"): + return "released", nil + case strings.Contains(status, "merged"): + return "merged", nil + case strings.Contains(status, "open"): + return "open", nil + case strings.Contains(status, "closed") || strings.Contains(status, "locked"): + return "closed", nil + default: + return "", fmt.Errorf("unrecognized status string: %q", status) + } +} + +const ( + statusOpen = "open" + statusMerged = "merged, not yet in a release" + statusReleasedFmt = "merged, released in %s" +) + +type Forge interface { + FetchStatus(urls []string) (string, error) + FetchSubmittedAt(urls []string) (time.Time, error) + FetchLastUpdated(urls []string) (time.Time, User, error) +} + +var forges = []Forge{ + // precedence only matters for .FetchStatus. + + // highest precedence + Gerrit{}, + GitHub{}, + GitLab{}, + Forgejo{"codeberg.org"}, + PartPiperMail{}, + PartGit{}, + // lowest precedence +} + +func fetchPerURLStatus(urls []string, perURL func(string) (string, error)) (string, error) { + for _, u := range urls { + status, err := perURL(u) + if err != nil { + return "", err + } + if status != "" { + return status, nil + } + } + return "", nil +} + +func (c Contribution) fetchStatus() (string, error) { + for _, forge := range forges { + status, err := forge.FetchStatus(c.URLs) + if err != nil { + return "", err + } + if status != "" { + return status, nil + } + } + return "", fmt.Errorf("idk how to get status for %q", c.URLs[0]) +} + +func fetchPerURLSubmittedAt(urls []string, perURL func(string) (time.Time, error)) (time.Time, error) { + var ret time.Time + for _, u := range urls { + submittedAt, err := perURL(u) + if err != nil { + return time.Time{}, err + } + if !submittedAt.IsZero() && (ret.IsZero() || submittedAt.Before(ret)) { + ret = submittedAt + } + } + return ret, nil +} + +func (c Contribution) fetchSubmittedAt() (time.Time, error) { + var ret time.Time + for _, forge := range forges { + submittedAt, err := forge.FetchSubmittedAt(c.URLs) + if err != nil { + return time.Time{}, err + } + if !submittedAt.IsZero() && (ret.IsZero() || submittedAt.Before(ret)) { + ret = submittedAt + } + } + if !ret.IsZero() { + return ret, nil + } + return time.Time{}, fmt.Errorf("idk how to get created timestamp for %q", c.URLs[0]) +} + +func withinOneSecond(a, b time.Time) bool { + d := a.Sub(b) + if d < 0 { + d = -d + } + return d <= time.Second +} + +func fetchPerURLLastUpdated(urls []string, perURL func(string) (time.Time, User, error)) (time.Time, User, error) { + var ret struct { + time.Time + User + } + for _, u := range urls { + updatedAt, updatedBy, err := perURL(u) + if err != nil { + return time.Time{}, User{}, err + } + if !updatedAt.IsZero() && (ret.Time.IsZero() || updatedAt.After(ret.Time)) { + ret.Time, ret.User = updatedAt, updatedBy + } + } + return ret.Time, ret.User, nil +} + +func (c Contribution) fetchLastUpdated() (time.Time, User, error) { + var ret struct { + time.Time + User + } + for _, forge := range forges { + updatedAt, updatedBy, err := forge.FetchLastUpdated(c.URLs) + if err != nil { + return time.Time{}, User{}, err + } + if !updatedAt.IsZero() && (ret.Time.IsZero() || updatedAt.After(ret.Time)) { + ret.Time, ret.User = updatedAt, updatedBy + } + } + if !ret.Time.IsZero() { + return ret.Time, ret.User, nil + } + return time.Time{}, User{}, nil //fmt.Errorf("idk how to get updated timestamp for %q", c.URLs[0]) +} diff --git a/cmd/generate/src_contribs_test.go b/cmd/gen-imworkingon/src_contribs_test.go index 57ffc0f..57ffc0f 100644 --- a/cmd/generate/src_contribs_test.go +++ b/cmd/gen-imworkingon/src_contribs_test.go diff --git a/cmd/generate/src_mastodon.go b/cmd/gen-imworkingon/src_mastodon.go index b4b54a8..a3b9617 100644 --- a/cmd/generate/src_mastodon.go +++ b/cmd/gen-imworkingon/src_mastodon.go @@ -3,8 +3,9 @@ package main import ( "html/template" "net/url" - "slices" "time" + + "git.lukeshu.com/www/lib/httpcache" ) type MastodonStatus struct { @@ -19,12 +20,12 @@ func ReadStandups(server, username string) ([]*MastodonStatus, error) { var account struct { ID string `json:"id"` } - if err := httpGetJSON(server+"/api/v1/accounts/lookup?acct="+username, &account); err != nil { + if err := httpcache.GetJSON(server+"/api/v1/accounts/lookup?acct="+username, nil, &account); err != nil { return nil, err } var statuses []*MastodonStatus - if err := httpGetPaginatedJSON(server+"/api/v1/accounts/"+account.ID+"/statuses", &statuses, func(_ int) url.Values { + if err := httpcache.GetPaginatedJSON(server+"/api/v1/accounts/"+account.ID+"/statuses", nil, &statuses, func(_ int) url.Values { params := make(url.Values) params.Set("tagged", "DailyStandUp") params.Set("exclude_reblogs", "true") @@ -36,13 +37,5 @@ func ReadStandups(server, username string) ([]*MastodonStatus, error) { return nil, err } - ignoreList := []string{ - "https://fosstodon.org/@lukeshu/112198267818432116", - "https://fosstodon.org/@lukeshu/112198241414760456", - } - statuses = slices.DeleteFunc(statuses, func(status *MastodonStatus) bool { - return slices.Contains(ignoreList, status.URL) - }) - return statuses, nil } diff --git a/cmd/generate/src_tags.go b/cmd/gen-imworkingon/src_tags.go index 8dcf554..8dcf554 100644 --- a/cmd/generate/src_tags.go +++ b/cmd/gen-imworkingon/src_tags.go diff --git a/cmd/generate/src_upstreams.go b/cmd/gen-imworkingon/src_upstreams.go index 03f72ec..03f72ec 100644 --- a/cmd/generate/src_upstreams.go +++ b/cmd/gen-imworkingon/src_upstreams.go diff --git a/cmd/gen-posix/data.go b/cmd/gen-posix/data.go new file mode 100644 index 0000000..165ecbd --- /dev/null +++ b/cmd/gen-posix/data.go @@ -0,0 +1,211 @@ +package main + +import ( + "fmt" + "os" + "os/exec" + "regexp" + "strings" + + "git.lukeshu.com/www/lib/httpcache" +) + +var IEEESA = Vendor{ + Name: "IEEE-SA", + GetURL: func(id string) string { return fmt.Sprintf("http://standards.ieee.org/findstds/standard/%s.html", id) }, + GetName: func(id string, url string) string { + html, err := httpcache.Get(url, nil) + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + cmd := exec.Command("nokogiri", "-e", `puts $_.css("meta[name=\"des\"], meta[name=\"designation\"]").first["content"]`) + cmd.Stderr = os.Stderr + cmd.Stdin = strings.NewReader(html) + d, err := cmd.Output() + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + return strings.TrimSuffix(string(d), "\n") + }, +} + +var reIEEE = regexp.MustCompile(`standardNumber":"([^"]*)"`) + +var IEEEXplore = Vendor{ + Name: "IEEE Xplore", + GetURL: func(id string) string { return fmt.Sprintf("http://ieeexplore.ieee.org/servlet/opac?punumber=%s", id) }, + GetName: func(id string, url string) string { + if strings.HasSuffix(url, "ERROR") { + return "ERROR" + } + html, err := httpcache.Get(url, nil) + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + m := reIEEE.FindStringSubmatch(html) + if m == nil { + panic(fmt.Errorf("URL=%q did not contain expected JSON", url)) + } + return m[1] + }, +} + +var TOG = Vendor{ + Name: "The Open Group", + GetURL: func(id string) string { return fmt.Sprintf("https://www2.opengroup.org/ogsys/catalog/%s", id) }, + GetName: func(id string, url string) string { return id }, +} + +var TOGOnline = Vendor{ + Name: "online", + GetURL: func(id string) string { return fmt.Sprintf("http://pubs.opengroup.org/onlinepubs/%s/", id) }, + GetName: func(id string, url string) string { return url }, +} + +var ISO = Vendor{ + Name: "ISO", + GetURL: func(id string) string { + return fmt.Sprintf("http://www.iso.org/iso/home/store/catalogue_tc/catalogue_detail.htm?csnumber=%s", id) + }, + GetName: func(id string, url string) string { + html, err := httpcache.Get(url, nil) + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + cmd := exec.Command("nokogiri", "-e", `puts $_.css("[itemprop=\"name\"]").first.text`) + cmd.Stderr = os.Stderr + cmd.Stdin = strings.NewReader(html) + d, err := cmd.Output() + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + return strings.TrimSuffix(string(d), "\n") + }, +} + +var Vendors = []Vendor{IEEESA, TOG, ISO} + +var Editions = []Edition{ + {Name: "POSIX-2001 (Issue 6)", Docs: []Document{ + {Vendor: IEEESA, Type: Full, ID: "1003.1-2001", Resellers: []Document{ + {Vendor: IEEEXplore, Type: Full, ID: "7683"}, + }}, + }}, + {Name: "----->XBD-2001", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C950"}, + {Vendor: ISO, Type: Full, ID: "37312"}, + }}, + {Name: "----->XSH-2001", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C951"}, + {Vendor: ISO, Type: Full, ID: "37313"}, + }}, + {Name: "----->XCU-2001", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C952"}, + {Vendor: ISO, Type: Full, ID: "37314"}, + }}, + {Name: "----->XRAT-2001", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C953"}, + {Vendor: ISO, Type: Full, ID: "37315"}, + }}, + + {Name: "POSIX-2001, 2002 Edition", Docs: []Document{ + {Vendor: IEEESA, Type: Patch, ID: "1003.1-2001-Cor_1-2002", Resellers: []Document{ + {Vendor: IEEEXplore, Type: Patch, ID: "9507"}, + }}, + {Vendor: TOG, Type: Patch, ID: "U057", Resellers: []Document{ + {Vendor: TOG, Type: Full, ID: "T031"}, + }}, + }}, + {Name: "----->XBD-2001, 2002 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C031"}, + {Vendor: ISO, Type: Full, ID: "38789", Resellers: []Document{ + {Vendor: IEEESA, Type: Full, ID: "9945-1-2003"}, + }}, + }}, + {Name: "----->XSH-2001, 2002 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C032"}, + {Vendor: ISO, Type: Full, ID: "38790", Resellers: []Document{ + {Vendor: IEEESA, Type: Full, ID: "9945-2-2003"}, + }}, + }}, + {Name: "----->XCU-2001, 2002 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C033"}, + {Vendor: ISO, Type: Full, ID: "38791", Resellers: []Document{ + {Vendor: IEEESA, Type: Full, ID: "9945-3-2003"}, + }}, + }}, + {Name: "----->XRAT-2001, 2002 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C034"}, + {Vendor: ISO, Type: Full, ID: "38792", Resellers: []Document{ + {Vendor: IEEESA, Type: Full, ID: "9945-4-2003"}, + }}, + }}, + + {Name: "POSIX-2001, 2004 Edition", Docs: []Document{ + {Vendor: IEEESA, Type: Patch, ID: "1003.1-2001-Cor_2-2004", Resellers: []Document{ + {Vendor: IEEEXplore, Type: Patch, ID: "9022"}, + {Vendor: IEEEXplore, Type: Full, ID: "9156"}, + }}, + {Vendor: TOG, Type: Patch, ID: "U059", Resellers: []Document{ + {Vendor: TOG, Type: Full, ID: "T041"}, + {Vendor: TOGOnline, Type: Full, ID: "009695399"}, + }}, + }}, + {Name: "----->XBD-2001, 2004 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C046"}, + {Vendor: ISO, Type: Patch, ID: "40687"}, + }}, + {Name: "----->XSH-2001, 2004 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C047"}, + {Vendor: ISO, Type: Patch, ID: "40688"}, + }}, + {Name: "----->XCU-2001, 2004 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C048"}, + {Vendor: ISO, Type: Patch, ID: "40690"}, + }}, + {Name: "----->XRAT-2001, 2004 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C049"}, + {Vendor: ISO, Type: Patch, ID: "40691"}, + }}, + + {Name: "POSIX-2008 (Issue 7)", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C082", Resellers: []Document{ + {Vendor: TOGOnline, Type: Full, ID: "9699919799.2008edition"}, + }}, + + {Vendor: IEEESA, Type: Full, ID: "1003.1-2008", Resellers: []Document{ + {Vendor: IEEEXplore, Type: Full, ID: "4694974"}, + }}, + + {Vendor: ISO, Type: Full, ID: "50516", Resellers: []Document{ + {Vendor: IEEESA, Type: Full, ID: "9945-2009"}, + {Vendor: IEEEXplore, Type: Full, ID: "5393777"}, + }}, + }}, + {Name: "POSIX-2008, 2013 Edition", Docs: []Document{ + {Vendor: TOG, Type: Patch, ID: "U130", Resellers: []Document{ + {Vendor: TOG, Type: Full, ID: "C138"}, + {Vendor: TOGOnline, Type: Full, ID: "9699919799.2013edition"}, + }}, + + {Vendor: IEEESA, Type: Patch, ID: "1003.1-2008-Cor_1-2013", Resellers: []Document{ + {Vendor: IEEEXplore, Type: Patch, ID: "6482152"}, + {Vendor: IEEEXplore, Type: Full, ID: "6506089"}, + }}, + + {Vendor: ISO, Type: Patch, ID: "62005"}, + }}, + {Name: "POSIX-2008, 2016 Edition", Docs: []Document{ + {Vendor: TOG, Type: Patch, ID: "U160", Resellers: []Document{ + {Vendor: TOG, Type: Full, ID: "C165"}, + {Vendor: TOGOnline, Type: Full, ID: "9699919799.2016edition"}, + }}, + + {Vendor: IEEESA, Type: Patch, ID: "1003.1-2008-Cor_2-2016", Resellers: []Document{ + {Vendor: IEEEXplore, Type: Patch, ID: "7542096"}, + {Vendor: IEEEXplore, Type: Full, ID: "7582336"}, + }}, + }}, +} + +// SUSv2 http://pubs.opengroup.org/onlinepubs/007908799/ diff --git a/cmd/gen-posix/http_hacks.go b/cmd/gen-posix/http_hacks.go new file mode 100644 index 0000000..16b8a8d --- /dev/null +++ b/cmd/gen-posix/http_hacks.go @@ -0,0 +1,156 @@ +package main + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "strings" + + "git.lukeshu.com/www/lib/httpcache" +) + +func _checkURL(url string) (string, error) { + switch { + case strings.HasPrefix(url, "https://web.archive.org/"): + _, err := httpcache.Get(url, nil) + return url, err + case strings.HasPrefix(url, "https://www2.opengroup.org/ogsys/catalog/"): + _, err := httpcache.Get(url, nil) + if err == nil { + return url, nil + } + if !errors.Is(err, os.ErrNotExist) { // don't hide non-404 errors + return "", err + } + suffix := strings.TrimPrefix(url, "https://www2.opengroup.org/ogsys/catalog/") + url2 := "https://publications.opengroup.org/" + strings.ToLower(suffix) + _, err = httpcache.Get(url2, nil) + if err == nil { + return url2, nil + } + if !errors.Is(err, os.ErrNotExist) { // don't hide non-404 errors + return "", err + } + url3, err := _checkURL("https://web.archive.org/web/20170102/" + url) + if err == nil { + return url3, nil + } + return url+"#ERROR", nil + case url == "http://ieeexplore.ieee.org/servlet/opac?punumber=7394900": + return url+"#ERROR", nil + default: + _, err := httpcache.Get(url, nil) + if err != nil && errors.Is(err, os.ErrNotExist) { + return _checkURL("https://web.archive.org/web/20170102/" + url) + } + return url, err + } +} + +func checkURL(url string) string { + url2, err := _checkURL(url) + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + return url2 +} + +func nokogiriIgnoreFailure(htmlBytes []byte, expr string) string { + cmd := exec.Command("nokogiri", "-e", "puts "+expr) + cmd.Stderr = io.Discard + cmd.Stdin = bytes.NewReader(htmlBytes) + outBytes, _ := cmd.Output() + return strings.TrimSpace(string(outBytes)) +} + +func mockRedirect(url string) *http.Response { + resp, err := http.ReadResponse(bufio.NewReader(strings.NewReader(""+ + "HTTP/1.1 302 Found\r\n"+ + "Location: "+url+"\r\n"+ + "\r\n")), nil) + if err != nil { + panic(err) + } + return resp +} + +func mockForbidden() *http.Response { + resp, err := http.ReadResponse(bufio.NewReader(strings.NewReader(""+ + "HTTP/1.1 403 Forbidden\r\n"+ + "\r\n")), nil) + if err != nil { + panic(err) + } + return resp +} + +func modifyResponse(url string, entry httpcache.CacheEntry, resp *http.Response) *http.Response { + switch { + case strings.HasPrefix(url, "https://web.archive.org/"): + htmlBytes, _ := io.ReadAll(resp.Body) + _ = resp.Body.Close() + + // native Wayback Machine redirect + redirect := nokogiriIgnoreFailure(htmlBytes, `$_.css("p.impatient a").first["href"]`) + if strings.HasPrefix(redirect, "https://web.archive.org/web/") { + return mockRedirect(redirect) + } + + // silly TOG SSO + if strings.Contains(url, "sso.opengroup.org") { + if bytes.Contains(htmlBytes, []byte("document.forms.postbinding.submit()")) { + redirect := nokogiriIgnoreFailure(htmlBytes, `$_.css("#postbinding").first["action"]`) + if redirect != "" { + return mockRedirect(redirect) + } + } + if bytes.Contains(htmlBytes, []byte("General Authorization Error")) { + return mockForbidden() + } + } + + // We drained resp.Body, so re-create it. + resp, err := http.ReadResponse(bufio.NewReader(strings.NewReader(string(entry))), nil) + if err != nil { + panic(err) + } + return resp + default: + return resp + } +} + +type mock404 struct { + Msg string +} + +// Is implements the interface for [errors.Is]. +func (e *mock404) Is(target error) bool { + return target == os.ErrNotExist +} + +// Error implements [error]. +func (e *mock404) Error() string { + return e.Msg +} + +func checkRedirect(req *http.Request, via []*http.Request) error { + // net/http.defaultCheckRedirect + if len(via) >= 10 { + return errors.New("stopped after 10 redirects") + } + + // detect redirects that should be 404s + oldURL := via[len(via)-1].URL + newURL := req.URL + if (newURL.Path == "/" || newURL.Path == "") && !(oldURL.Path == "/" || oldURL.Path == "") { + return &mock404{Msg: fmt.Sprintf("should have been a 404: %q redirected to %q", oldURL.String(), newURL.String())} + } + + return nil +} diff --git a/cmd/gen-posix/main.go b/cmd/gen-posix/main.go new file mode 100644 index 0000000..6da598b --- /dev/null +++ b/cmd/gen-posix/main.go @@ -0,0 +1,214 @@ +package main + +import ( + "bytes" + "fmt" + "html/template" + "os" + + "git.lukeshu.com/www/lib/httpcache" +) + +var urls = map[string]string{} +var names = map[string]string{} + +func (doc Document) URL() string { + if doc.ID == "" { + return "" + } + key := doc.Vendor.Name + "\000" + doc.ID + if _, have := urls[key]; !have { + urls[key] = checkURL(doc.Vendor.GetURL(doc.ID)) + } + return urls[key] +} + +func (doc Document) Name() string { + if doc.ID == "" { + var names []string + for _, full := range doc.Fulls() { + names = append(names, full.Name()) + } + switch len(names) { + case 0: + return "???" + case 1: + return names[0] + default: + // BUG(lukeshu): Handle un-IDed Documents with + // multiple IDed resellers. + panic("TODO") + } + } + key := doc.Vendor.Name + "\000" + doc.ID + if _, have := names[key]; !have { + names[key] = doc.Vendor.GetName(doc.ID, doc.URL()) + } + return names[key] +} + +func (doc Document) Fulls() []Document { + var ret []Document + for _, reseller := range doc.Resellers { + if doc.ID != "" && reseller.Vendor.Name == doc.Vendor.Name && reseller.ID == doc.ID { + continue + } + if reseller.Type == Full { + ret = append(ret, reseller) + } + } + return ret +} + +func (doc Document) Patches() []Document { + var ret []Document + for _, reseller := range doc.Resellers { + if doc.ID != "" && reseller.Vendor.Name == doc.Vendor.Name && reseller.ID == doc.ID { + continue + } + if reseller.Type == Patch { + ret = append(ret, reseller) + } + } + return ret +} + +func (doc Document) AsFull() *Document { + if doc.Vendor.Name == "" && doc.ID == "" { + return nil + } + ret := doc + ret.Resellers = ret.Fulls() + switch doc.Type { + case Full: + // Nothing to do + case Patch: + if len(ret.Resellers) == 0 { + return nil + } + ret.Type = Full + ret.ID = func() string { + var ids []string + for _, reseller := range ret.Resellers { + if reseller.Vendor.Name == doc.Vendor.Name { + ids = append(ids, reseller.ID) + } + } + switch len(ids) { + case 0: + return "" + case 1: + return ids[0] + default: + panic("wut") + } + }() + if ret.ID != "" { + ret.Resellers = ret.Fulls() + } + default: + panic("uhh") + } + return &ret +} + +func (doc Document) AsPatch() *Document { + if doc.Vendor.Name == "" && doc.ID == "" { + return nil + } + ret := doc + switch doc.Type { + case Full: + return nil + case Patch: + ret.Resellers = doc.Patches() + default: + panic("no") + } + return &ret +} + +func (ed Edition) DocsOrdered() []Document { + // This chould be O(n), but this niaeve implementation is + // O(n^2). It's OK, n is small. + s := make([]Document, len(Vendors)) + for i, vnd := range Vendors { + for _, doc := range ed.Docs { + if doc.Vendor.Name == vnd.Name { + s[i] = doc + } + } + } + return s +} + +var tmpl = `{{define "document"}}{{if .}} + {{if .URL}}<a href="{{.URL}}" title="{{.Name}}">{{.Name}}</a>{{else}}{{.Name}}{{end}} + {{range .Resellers}} + <a href="{{.URL}}" title="{{.Name}}">({{.Vendor.Name}})</a> + {{end}} +{{end}}{{end}} +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>POSIX Editions</title> + <style> + body { font-size: 8px; } + table { border-collapse: collapse; } + th, td { border: solid 1px black; } + tr:not(:first-child):not(:nth-child(2)) th { + font-family: monospace; + text-align: left; + } + </style> + </head> + <body> + <p>There's a typo in the "standardNumber" in IEEE + Xplore's records forfor the 2004 edition of 1003.1; + it says 2014 instead or 2004. The actual document + says 2004 though.</p> + <table> + <caption><p>POSIX: C & Shell (1997-present)</p></caption> + <tr><td rowspan=2></td>{{range .Vendors}}<th colspan=2>{{.Name}}</th>{{end}}</tr> + <tr>{{range .Vendors}}<th>Full</th><th>Patch</th>{{end}}</tr> + {{range .Editions}}<tr> + <th>{{.Name}}</th> + {{range .DocsOrdered}} + <td>{{template "document" .AsFull}}</td><td>{{template "document" .AsPatch}}</td> + {{end}} + </tr>{{end}} + </table> + </body> +</html> +` + +func mainWithError() error { + httpcache.UserAgent = "https://git.lukeshu.com/www/tree/cmd/gen-posix" + httpcache.ModifyResponse = modifyResponse + httpcache.CheckRedirect = checkRedirect + + tmpl := template.Must(template.New("page").Parse(tmpl)) + + var out bytes.Buffer + if err := tmpl.Execute(&out, map[string]interface{}{ + "Vendors": Vendors, + "Editions": Editions, + }); err != nil { + return err + } + if err := os.WriteFile("public/posix/index.new.html", out.Bytes(), 0666); err != nil { + return err + } + if err := os.Rename("public/posix/index.new.html", "public/posix/index.html"); err != nil { + return err + } + return nil +} + +func main() { + if err := mainWithError(); err != nil { + fmt.Fprintf(os.Stderr, "%s: error: %v\n", os.Args[0], err) + os.Exit(1) + } +} diff --git a/cmd/gen-posix/types.go b/cmd/gen-posix/types.go new file mode 100644 index 0000000..9bb4c2d --- /dev/null +++ b/cmd/gen-posix/types.go @@ -0,0 +1,26 @@ +package main + +type Vendor struct { + Name string + GetURL func(id string) string + GetName func(id string, url string) string +} + +type Type int + +const ( + Full Type = 0 + Patch Type = 1 +) + +type Edition struct { + Name string + Docs []Document +} + +type Document struct { + Vendor Vendor + Type Type + ID string + Resellers []Document +} diff --git a/cmd/generate/httpcache.go b/cmd/generate/httpcache.go deleted file mode 100644 index 04762e3..0000000 --- a/cmd/generate/httpcache.go +++ /dev/null @@ -1,95 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "io" - "net/http" - "net/url" - "os" - "path/filepath" -) - -var httpCache = map[string]string{} - -func httpGet(u string) (string, error) { - if cache, ok := httpCache[u]; ok { - fmt.Printf("CACHE-GET %q\n", u) - return cache, nil - } - if err := os.Mkdir(".http-cache", 0777); err != nil && !os.IsExist(err) { - return "", err - } - cacheFile := filepath.Join(".http-cache", url.QueryEscape(u)) - if bs, err := os.ReadFile(cacheFile); err == nil { - httpCache[u] = string(bs) - fmt.Printf("CACHE-GET %q\n", u) - return httpCache[u], nil - } else if !os.IsNotExist(err) { - return "", err - } - - fmt.Printf("GET %q...", u) - resp, err := http.Get(u) - if err != nil { - fmt.Printf(" err\n") - return "", err - } - if resp.StatusCode != http.StatusOK { - fmt.Printf(" err\n") - return "", fmt.Errorf("unexpected HTTP status: %v", resp.Status) - } - bs, err := io.ReadAll(resp.Body) - if err != nil { - fmt.Printf(" err\n") - return "", err - } - fmt.Printf(" ok\n") - if err := os.WriteFile(cacheFile, bs, 0666); err != nil { - return "", err - } - httpCache[u] = string(bs) - return httpCache[u], nil -} - -func httpGetJSON(u string, out any) error { - str, err := httpGet(u) - if err != nil { - return err - } - return json.Unmarshal([]byte(str), out) -} - -func httpGetPaginatedJSON[T any](uStr string, out *[]T, pageFn func(i int) url.Values) error { - u, err := url.Parse(uStr) - if err != nil { - return err - } - query := u.Query() - - for i := 0; true; i++ { - pageParams := pageFn(i) - for k, v := range pageParams { - query[k] = v - } - - u.RawQuery = query.Encode() - var resp []T - if err := httpGetJSON(u.String(), &resp); err != nil { - return err - } - fmt.Printf(" -> %d records\n", len(resp)) - if len(resp) == 0 { - break - } - *out = append(*out, resp...) - } - - return nil -} - -func githubPagination(i int) url.Values { - params := make(url.Values) - params.Set("page", fmt.Sprintf("%v", i+1)) - return params -} diff --git a/cmd/generate/src_contribs.go b/cmd/generate/src_contribs.go deleted file mode 100644 index 6db6764..0000000 --- a/cmd/generate/src_contribs.go +++ /dev/null @@ -1,400 +0,0 @@ -package main - -import ( - "fmt" - "net/url" - "os" - "regexp" - "strings" - "time" - - "sigs.k8s.io/yaml" -) - -type User struct { - Name string `json:"name"` - URL string `json:"url"` -} - -type Contribution struct { - URLs []string `json:"urls"` - Tags []string `json:"tags"` - SponsoredBy string `json:"sponsored-by"` - Desc string `json:"desc"` - - SubmittedAt time.Time `json:"submitted-at"` - LastUpdatedAt time.Time `json:"last-updated-at"` - LastUpdatedBy User `json:"last-updated-by"` - Status string `json:"status"` - - StatusClass string `json:"-"` -} - -func ReadContribs(filename string) ([]Contribution, error) { - bs, err := os.ReadFile(filename) - if err != nil { - return nil, fmt.Errorf("contribs: %q: %w", filename, err) - } - var ret []Contribution - if err := yaml.UnmarshalStrict(bs, &ret); err != nil { - return nil, fmt.Errorf("contribs: %q: %w", filename, err) - } - for i := range ret { - contrib := ret[i] - if err := contrib.Fill(); err != nil { - return nil, fmt.Errorf("contribs: %q: %w", filename, err) - } - ret[i] = contrib - } - return ret, nil -} - -func (c *Contribution) Fill() error { - var err error - if c.SubmittedAt.IsZero() { - c.SubmittedAt, err = c.fetchSubmittedAt() - if err != nil { - return err - } - } - if c.LastUpdatedAt.IsZero() { - c.LastUpdatedAt, c.LastUpdatedBy, err = c.fetchLastUpdated() - if err != nil { - return err - } - } - if c.Status == "" { - c.Status, err = c.fetchStatus() - if err != nil { - return err - } - } - c.StatusClass, err = classifyStatus(c.Status) - if err != nil { - return err - } - for _, u := range c.URLs { - if m := reGoLangGerritCL.FindStringSubmatch(u); m != nil { - c.URLs = append(c.URLs, "https://golang.org/cl/"+m[1]) - } - } - return nil -} - -func classifyStatus(status string) (string, error) { - switch { - case strings.Contains(status, "released") || strings.Contains(status, "deployed"): - return "released", nil - case strings.Contains(status, "merged"): - return "merged", nil - case strings.Contains(status, "open"): - return "open", nil - case strings.Contains(status, "closed") || strings.Contains(status, "locked"): - return "closed", nil - default: - return "", fmt.Errorf("unrecognized status string: %q", status) - } -} - -var ( - reGoLangGerritCL = regexp.MustCompile(`https://go-review\.googlesource\.com/c/[^/?#]+/\+/([0-9]+)(?:\?[^#]*)?(?:#.*)?$`) - reGitHubPR = regexp.MustCompile(`^https://github\.com/([^/?#]+)/([^/?#]+)/pull/([0-9]+)(?:\?[^#]*)?(?:#.*)?$`) - reGitHubCommit = regexp.MustCompile(`^https://github\.com/([^/?#]+)/([^/?#]+)/commit/([0-9a-f]+)(?:\?[^#]*)?(?:#.*)?$`) - reGitLabMR = regexp.MustCompile(`^https://([^/]+)/([^?#]+)/-/merge_requests/([0-9]+)(?:\?[^#]*)?(?:#.*)?$`) - rePiperMailDate = regexp.MustCompile(`^\s*<I>([^<]+)</I>\s*$`) -) - -const ( - statusOpen = "open" - statusMerged = "merged, not yet in a release" - statusReleasedFmt = "merged, released in %s" -) - -func (c Contribution) fetchStatus() (string, error) { - if m := reGitHubPR.FindStringSubmatch(c.URLs[0]); m != nil { - user := m[1] - repo := m[2] - prnum := m[3] - - urlStr := "https://api.github.com/repos/" + user + "/" + repo + "/pulls/" + prnum - - var obj struct { - // State values are "open" and "closed". - State string `json:"state"` - Merged bool `json:"merged"` - MergeCommitSha string `json:"merge_commit_sha"` - } - if err := httpGetJSON(urlStr, &obj); err != nil { - return "", err - } - ret := obj.State - if obj.Merged { - ret = statusMerged - tag, err := getGitTagThatContainsAll("https://github.com/"+user+"/"+repo, obj.MergeCommitSha) - if err != nil { - return "", err - } - if tag != "" { - ret = fmt.Sprintf(statusReleasedFmt, tag) - } - } - - return ret, nil - } - if m := reGitLabMR.FindStringSubmatch(c.URLs[0]); m != nil { - authority := m[1] - projectID := m[2] - mrnum := m[3] - - urlStr := "https://" + authority + "/api/v4/projects/" + url.QueryEscape(projectID) + "/merge_requests/" + mrnum - - var obj struct { - // State values are "opened", "closed", "locked", and "merged". - State string `json:"state"` - MergeCommitSha string `json:"merge_commit_sha"` - SquashCommitSha string `json:"squash_commit_sha"` - } - if err := httpGetJSON(urlStr, &obj); err != nil { - return "", err - } - - ret := obj.State - if ret == "opened" { - ret = statusOpen - } - - if ret == "merged" { - ret = statusMerged - var mergeCommit string - if obj.MergeCommitSha != "" { - mergeCommit = obj.MergeCommitSha - } - if obj.SquashCommitSha != "" { - mergeCommit = obj.SquashCommitSha - } - if mergeCommit != "" { - tag, err := getGitTagThatContainsAll("https://"+authority+"/"+projectID+".git", mergeCommit) - if err != nil { - return "", err - } - if tag != "" { - ret = fmt.Sprintf(statusReleasedFmt, tag) - } - } - } - - return ret, nil - } - if len(c.URLs) > 1 { - var gitURL string - var gitCommits []string - for _, u := range c.URLs[1:] { - if m := reGitHubCommit.FindStringSubmatch(u); m != nil { - user := m[1] - repo := m[2] - hash := m[3] - - gitURL = "https://github.com/" + user + "/" + repo - gitCommits = append(gitCommits, hash) - } - } - if len(gitCommits) > 0 { - ret := statusMerged - tag, err := getGitTagThatContainsAll(gitURL, gitCommits...) - if err != nil { - return "", err - } - if tag != "" { - ret = fmt.Sprintf(statusReleasedFmt, tag) - } - return ret, nil - } - } - return "", fmt.Errorf("idk how to get status for %q", c.URLs[0]) -} - -func (c Contribution) fetchSubmittedAt() (time.Time, error) { - if m := reGitHubPR.FindStringSubmatch(c.URLs[0]); m != nil { - user := m[1] - repo := m[2] - prnum := m[3] - - urlStr := "https://api.github.com/repos/" + user + "/" + repo + "/pulls/" + prnum - - var obj struct { - CreatedAt time.Time `json:"created_at"` - } - if err := httpGetJSON(urlStr, &obj); err != nil { - return time.Time{}, err - } - return obj.CreatedAt, nil - } - if m := reGitLabMR.FindStringSubmatch(c.URLs[0]); m != nil { - authority := m[1] - projectID := m[2] - mrnum := m[3] - - urlStr := "https://" + authority + "/api/v4/projects/" + url.QueryEscape(projectID) + "/merge_requests/" + mrnum - - var obj struct { - CreatedAt time.Time `json:"created_at"` - } - if err := httpGetJSON(urlStr, &obj); err != nil { - return time.Time{}, err - } - return obj.CreatedAt, nil - } - if strings.Contains(c.URLs[0], "/pipermail/") { - htmlStr, err := httpGet(c.URLs[0]) - if err != nil { - return time.Time{}, err - } - for _, line := range strings.Split(htmlStr, "\n") { - if m := rePiperMailDate.FindStringSubmatch(line); m != nil { - return time.Parse(time.UnixDate, m[1]) - } - } - } - return time.Time{}, fmt.Errorf("idk how to get created timestamp for %q", c.URLs[0]) -} - -func (c Contribution) fetchLastUpdated() (time.Time, User, error) { - if m := reGitHubPR.FindStringSubmatch(c.URLs[0]); m != nil { - user := m[1] - repo := m[2] - prnum := m[3] - - var obj struct { - UpdatedAt time.Time `json:"updated_at"` - MergedAt time.Time `json:"merged_at"` - MergedBy struct { - Login string `json:"login"` - HTMLURL string `json:"html_url"` - } `json:"merged_by"` - } - if err := httpGetJSON("https://api.github.com/repos/"+user+"/"+repo+"/pulls/"+prnum, &obj); err != nil { - return time.Time{}, User{}, err - } - - retUpdatedAt := obj.UpdatedAt - var retUser User - - if obj.MergedAt == retUpdatedAt { - retUser.Name = obj.MergedBy.Login - retUser.URL = obj.MergedBy.HTMLURL - } - if retUser == (User{}) { - // "normal" comments - var comments []struct { - UpdatedAt time.Time `json:"updated_at"` - User struct { - Login string `json:"login"` - HTMLURL string `json:"html_url"` - } `json:"user"` - } - if err := httpGetPaginatedJSON("https://api.github.com/repos/"+user+"/"+repo+"/issues/"+prnum+"/comments", &comments, githubPagination); err != nil { - return time.Time{}, User{}, err - } - for _, comment := range comments { - if comment.UpdatedAt == retUpdatedAt || comment.UpdatedAt.Add(1*time.Second) == retUpdatedAt { - retUser.Name = comment.User.Login - retUser.URL = comment.User.HTMLURL - break - } - } - } - if retUser == (User{}) { - // comments on a specific part of the diff - var reviewComments []struct { - UpdatedAt time.Time `json:"updated_at"` - User struct { - Login string `json:"login"` - HTMLURL string `json:"html_url"` - } `json:"user"` - } - if err := httpGetPaginatedJSON("https://api.github.com/repos/"+user+"/"+repo+"/pulls/"+prnum+"/comments", &reviewComments, githubPagination); err != nil { - return time.Time{}, User{}, err - } - for _, comment := range reviewComments { - if comment.UpdatedAt == retUpdatedAt { - retUser.Name = comment.User.Login - retUser.URL = comment.User.HTMLURL - break - } - } - } - if retUser == (User{}) { - var events []struct { - CreatedAt time.Time `json:"created_at"` - Actor struct { - Login string `json:"login"` - HTMLURL string `json:"html_url"` - } `json:"actor"` - } - if err := httpGetJSON("https://api.github.com/repos/"+user+"/"+repo+"/issues/"+prnum+"/events", &events); err != nil { - return time.Time{}, User{}, err - } - for _, event := range events { - if event.CreatedAt == retUpdatedAt { - retUser.Name = event.Actor.Login - retUser.URL = event.Actor.HTMLURL - break - } - } - } - - return retUpdatedAt, retUser, nil - } - if m := reGitLabMR.FindStringSubmatch(c.URLs[0]); m != nil { - authority := m[1] - projectID := m[2] - mrnum := m[3] - - urlStr := "https://" + authority + "/api/v4/projects/" + url.QueryEscape(projectID) + "/merge_requests/" + mrnum - - var obj struct { - UpdatedAt time.Time `json:"updated_at"` - } - if err := httpGetJSON(urlStr, &obj); err != nil { - return time.Time{}, User{}, err - } - return obj.UpdatedAt, User{}, nil - } - - var ret time.Time - if len(c.URLs) > 1 { - for _, u := range c.URLs[1:] { - if m := reGitHubCommit.FindStringSubmatch(u); m != nil { - user := m[1] - repo := m[2] - hash := m[3] - - urlStr := "https://api.github.com/repos/" + user + "/" + repo + "/commits/" + hash - var obj struct { - Commit struct { - Author struct { - Date time.Time `json:"date"` - } `json:"author"` - Committer struct { - Date time.Time `json:"date"` - } `json:"committer"` - } `json:"commit"` - } - if err := httpGetJSON(urlStr, &obj); err != nil { - return time.Time{}, User{}, err - } - if obj.Commit.Author.Date.After(ret) { - ret = obj.Commit.Author.Date - } - if obj.Commit.Committer.Date.After(ret) { - ret = obj.Commit.Committer.Date - } - } - } - } - if !ret.IsZero() { - return ret, User{}, nil - } - - return time.Time{}, User{}, nil //fmt.Errorf("idk how to get updated timestamp for %q", c.URLs[0]) -} diff --git a/imworkingon/contribs.yml b/imworkingon/contribs.yml index 6394421..05c7ef4 100644 --- a/imworkingon/contribs.yml +++ b/imworkingon/contribs.yml @@ -1,15 +1,19 @@ -- urls: [https://github.com/flori/json/pull/567] +- urls: + - https://github.com/flori/json/pull/567 + - https://github.com/flori/json/commit/c57d33ec39344f7a6ae2786b8ac36892a51b03fe tags: [Ruby, JSON, SoftwareFreedom] + id: ruby-json desc: | ruby-json contains code that is not Free under the FSF's definition, not Open Source under the OSI's definition, and not - GPL-compatible. This has coused much consternation among folks + GPL-compatible. This has caused much consternation among folks who care about any of those 3 things. This PR replaces that non-Free code with Free code, removing friction for Ruby users on GNU/Linux distros that care about those 3 things. - urls: [https://gitlab.archlinux.org/archlinux/mkinitcpio/mkinitcpio/-/merge_requests/328] + id: mkinitcpio-arm-zimage tags: [ARM, boot] sponsored-by: Umorpha Systems desc: | @@ -22,6 +26,8 @@ This PR removes that friction by teaching mkinitcpio to understand ARM zImage files. + + See also: [mkinitcpio#362](#contrib-mkinitcpio-arm-zimage-tests) - urls: [https://gitlab.archlinux.org/archlinux/mkinitcpio/mkinitcpio/-/merge_requests/277] tags: [boot] sponsored-by: Umorpha Systems @@ -71,7 +77,7 @@ implementions for other filesystems take. - urls: [https://github.com/liberapay/liberapay.com/pull/2334] tags: [federated] - status: merged+deployed + status: merged + deployed desc: | When managing your profile, Liberapay nominally supports using your [Libravatar federated avatar](https://www.libravatar.org/) as @@ -85,13 +91,15 @@ This PR makes it easier to contribute to gotk4 by improving developer documentation and automated checks. - urls: [https://gitlab.archlinux.org/archlinux/mkinitcpio/mkinitcpio/-/merge_requests/362] + id: mkinitcpio-arm-zimage-tests tags: [ARM, boot, testing] desc: | - This PR adds tests for the earlier ARM zImage work. This was - split off into a separate PR from the main ARM zImage PR because - the maintainers had concerns about merging binary test files (very - understandable, especially given the recent XZ issue!), but didn't - want to hold up the main work. + This PR adds tests for the [earlier ARM zImage + work](#contrib-mkinitcpio-arm-zimage). This was split off into a + separate PR from the main ARM zImage PR because the maintainers + had concerns about merging binary test files (very understandable, + especially given the recent XZ issue!), but didn't want to hold up + the main work. - urls: - https://github.com/golang/net/pull/208 - https://go-review.googlesource.com/c/net/+/580855 @@ -119,8 +127,145 @@ `html.UnescapeString` that were found when working on the documentation parser in gotk4. - urls: [https://github.com/luigifab/awf-extended/pull/9] + status: "merged, released in v2.9.0" tags: [Parabola, GTK] desc: | Just a minor touch-up to `configure.ac` that I noticed could be made when updating Parabola's `pcr/awf` package. Parabola makes other software better! +- urls: [https://gitlab.archlinux.org/archlinux/packaging/packages/systemd/-/merge_requests/12] + tags: [Parabola, init-freedom] + desc: | + Some changes to the way that Arch Linux packages systemd that + should make it easier for distros downstream of Arch (certainly + Parabola, hopefully Artix) to provide init-freedom and support + other init systems. +- urls: [https://chromium-review.googlesource.com/c/chromiumos/platform/vboot_reference/+/5586382] + id: vboot-32 + tags: [boot] + desc: | + This fixes a bug in the code that both (1) may allow a + specially-crafted partition to bypass a bounds check, and (2) + makes it so that the code does not compile when `sizeof(size_t)=4` + (that is: x86-32). + + See also: [libreboot#218](#contrib-libreboot-32) +- urls: [https://codeberg.org/libreboot/lbmk/pulls/218] + id: libreboot-32 + tags: [boot] + desc: | + This has the Libreboot build-system apply the [fix I submitted to + vboot](#contrib-vboot-32), so that Libreboot can be compiled on + x86-32. Libreboot does not use the affected vboot functionality, + but the bug was preventing things from compiling. +- urls: + - https://sourceware.org/pipermail/binutils/2024-June/134608.html + - https://sourceware.org/pipermail/gdb-patches/2024-June/209720.html + tags: [GNU, supply-chain-security] + status: open + desc: | + The binutils-gdb sources bundle a number of files from other + sources (including the autotools, libtools, readline, texinfo, + gnulib, zlib, and GDB). I audited the binutils-gdb sources to + pin-point exactly which versions were being bundled and what + patches were being applied, then wrote a `./bootstrap` script to + automate that bundling. + + As the recent XZ issue taught us, this kind of audit is an + important part of supply-chain security. The `./bootstrap` script + will greatly ease this type of audit in the future, and can even + enable enforcing up-to-date-ness of the audit in CI. + + Also, hopefully this will make it easier to keep binutils' and + GDB's bundled dependencies more up-to-date in the future; as many + are quite out-of-date right now. +- urls: + - https://gitlab.gnome.org/GNOME/glib/-/merge_requests/4187 + tags: [GTK, docs] + desc: | + While GI-DocGen markup is largely backward-compatible with GTK-Doc + markup, it isn't completely backward-compatible. This fixes some + mistakes from when Glib migrated from GTK-Doc to GI-DocGen. I + scanned for places where GI-DocGen was emitting unknown HTML tags, + which indicate such a mistake. Notably, some of the rendered + gregex docs were unreadable. +- urls: [https://github.com/systemd/systemd/pull/34067] + desc: | + `systemd-nspawn` is a container runtime (like Docker or runc or + whathaveyou). Notably, nspawn is what Parabola's build-system + uses for hermetic builds. + + Currently nspawn does not support FUSE filesystems inside of the + container. This PR enhances nspawn to support FUSE. + + This is of particular utility for build systems, because it will + allow build scripts to mount a FUSE overlayfs/unionfs of the root + filesystem, which is useful for building software that does not + support a `DESTDIR`-type setting to install to an alternate root. + (Recent versions of the Linux kernel support unprivileged + in-kernel overlayfs, but at this time it is still too restrictive + to support this use-case.) +- urls: [https://github.com/mailprocessing/mailprocessing/pull/16] + desc: | + mailprocessing's `maildirproc`(1) program is a mail-filter daemon + that can sort emails into folders and such. Somewhere between + 1.0.1 and 1.2.7 the daemon lost the ability to gracefully + shutdown. This can slow down shutdown of the entire system as the + service manager waits for maildirproc to respond to the SIGINT or + SIGTERM that it was sent... but it never would. This PR fixes + that. +- urls: [https://gitlab.archlinux.org/archlinux/packaging/packages/ruby/-/merge_requests/6] + tags: [Parabola, Ruby] + desc: | + Ruby's standard library has been going through a process of + "gemification" where it is converted into a set of Gems that are + simply vendored into the main Ruby distribution. + + GNU/Linux distros tend to like to de-vendor things. This + de-vendoring process is usually inherently a little messy. This + MR tries to make it a little less messy in the case of Ruby on + Arch Linux and downstream distros. + + Besides hopefully making things easier on the Arch devs in the + future, it should also make it easier for downstream distros that + patch Ruby, such as applying [flori/json#567](#contrib-ruby-json). +- urls: [https://gitlab.archlinux.org/archlinux/packaging/packages/ruby/-/merge_requests/7] + tags: [Parabola, Ruby, docs] + desc: | + Arch Linux's `ruby-docs` package (version 3.2.5-1) is incomplete; + the `/usr/share/doc/ruby/capi/html/` directory is empty except for + `doxygen_crawl.html`. This fixes that, so that it includes the + full Doxygen output. +- urls: [https://github.com/flori/json/pull/599] + tags: [Ruby, JSON] + desc: | + The benchmark numbers given for the Ruby stdlib JSON + encoder/decoder are quite outdated, and the benchmark code has + been removed. This PR restores and fixes the benchmark code. + + This is helpful for justifying that + [flori/json#567](#contrib-ruby-json) actually improves performance + rather than hurting it. While I believe Software Freedom would be + worth hurting performance, it doesn't have to! +- urls: [https://github.com/python/typeshed/pull/13169] + tags: [GNU, GDB] + status: "merged, released in types-gdb 15.0.0.20241204" + desc: | + GDB has an internal Python module to support using Python to write + GDB extensions or using Python interactively in a GDB session. + Unfortunately, GDB does not provide type information for the + Python module; instead folks writing GDB extensions in Python must + rely on the Typeshed if they want to type-check their code. + + This PR fixes several mistakes in the type-information. +# - urls: ["https://sourceware.org/bugzilla/show_bug.cgi?id=32428"] +# tags: [GNU, GDB] +# - urls: ["https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118212"] +# tags: [GNU, GCC] +- urls: [https://github.com/9fans/plan9port/pull/692] + tags: [Plan9, 9P, docs] + desc: | + Plan 9 from Userspace's `9pserve`/lib9pclient implement a + non-standard `openfd` extension to the 9P2000 protocol. However, + the documentation for the extension is inaccurate. Fix that, as + no where else but the source code documents how it actually works. diff --git a/imworkingon/upstreams.yml b/imworkingon/upstreams.yml index cc96eb6..7257c6d 100644 --- a/imworkingon/upstreams.yml +++ b/imworkingon/upstreams.yml @@ -1,4 +1,6 @@ -- urls: [https://github.com/flori/json] +- urls: + - https://github.com/ruby/json + - https://github.com/flori/json name: ruby-json desc: | Ruby's standard JSON gem (which comes bundled with the core Ruby @@ -51,3 +53,40 @@ name: Go desc: | The Go programming language. +- urls: + - https://libreboot.org + - https://codeberg.org/libreboot + name: Libreboot + desc: | + Libreboot is a distribution of coreboot, a Free Software + motherboard-firmware platform. +- urls: + - https://www.gnu.org/software/binutils + - https://sourceware.org/binutils + - https://sourceware.org/pipermail/binutils + + - https://www.gnu.org/software/gdb + - https://sourceware.org/gdb + - https://sourceware.org/pipermail/gdb-patches + name: GNU Binutils / GDB + desc: | + The GNU Binutils are the GNU utilities for working with binary + object files: the main ones are the `ld` linker that combines + compiler outputs into a complete program, and the `as` assembler. + + GDB is the GNU Project Debugger for debugging compiled programs. +- urls: + - https://chromium-review.googlesource.com/c/chromiumos/platform/vboot_reference/ # v1 & v2 + - https://chromium-review.googlesource.com/c/chromiumos/platform/vboot/ # v3 rewrite + name: vboot + desc: | + vboot is Google's Verified Boot reference implementation, and is + used by the coreboot userspace tools. +- urls: [https://github.com/python/typeshed] + desc: | + The Python Typeshed is a collection of type-annotations for + popular Python libraries whose upstream don't provide + type-annotations. + + This allows using `mypy` or other type-checkers to validate code + that uses such libraries. diff --git a/lib/httpcache/httpcache.go b/lib/httpcache/httpcache.go new file mode 100644 index 0000000..b2cc7fe --- /dev/null +++ b/lib/httpcache/httpcache.go @@ -0,0 +1,211 @@ +package httpcache + +import ( + "bufio" + hash "crypto/md5" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "os" + "path/filepath" + "sort" + "strings" +) + +var ( + UserAgent string + ModifyResponse func(url string, entry CacheEntry, resp *http.Response) *http.Response + CheckRedirect func(req *http.Request, via []*http.Request) error +) + +type CacheEntry string + +var memCache = map[string]CacheEntry{} + +type httpStatusError struct { + StatusCode int + Status string +} + +// Is implements the interface for [errors.Is]. +func (e *httpStatusError) Is(target error) bool { + switch target { + case os.ErrNotExist: + return e.StatusCode == http.StatusNotFound + default: + return false + } +} + +// Error implements [error]. +func (e *httpStatusError) Error() string { + return fmt.Sprintf("unexpected HTTP status: %v", e.Status) +} + +type transport struct{} + +func (t *transport) RoundTrip(req *http.Request) (*http.Response, error) { + // Return an error for things that are the fault of things + // not-on-this-box. Panic for things that are the fault of + // this box. + + // Initialize. + if err := os.Mkdir(".http-cache", 0777); err != nil && !os.IsExist(err) { + panic(err) + } + + // Calculate cache-key. + u := req.URL.String() + cacheKey := url.QueryEscape(u) + hdrKeys := make([]string, 0, len(req.Header)) + for k := range req.Header { + switch k { + case "User-Agent": + case "Referer": + default: + hdrKeys = append(hdrKeys, http.CanonicalHeaderKey(k)) + } + } + sort.Strings(hdrKeys) + for _, k := range hdrKeys { + cacheKey += "|" + url.QueryEscape(k) + ":" + url.QueryEscape(req.Header[k][0]) + } + if len(cacheKey) >= 255 { + prefix := cacheKey[:255-(hash.Size*2)] + csum := hash.Sum([]byte(cacheKey)) + suffix := hex.EncodeToString(csum[:]) + cacheKey = prefix + suffix + } + cacheFile := filepath.Join(".http-cache", cacheKey) + + // Check the mem cache. + if _, ok := memCache[cacheKey]; ok { + fmt.Printf("GET|CACHE|MEM %q...", u) + goto end + } + // Check the file cache. + if bs, err := os.ReadFile(cacheFile); err == nil { + str := string(bs) + if strings.HasPrefix(str, "HTTP/") || strings.HasPrefix(str, "CLIENT/") { + fmt.Printf("GET|CACHE|FILE %q...", u) + memCache[cacheKey] = CacheEntry(str) + goto end + } + } + + // Do the request for real. + fmt.Printf("GET|NET %q...", u) + if resp, err := http.DefaultTransport.RoundTrip(req); err == nil { + var buf strings.Builder + if err := resp.Write(&buf); err != nil { + panic(err) + } + memCache[cacheKey] = CacheEntry(buf.String()) + } else { + memCache[cacheKey] = CacheEntry("CLIENT/" + err.Error()) + } + + // Record the response to the file cache. + if err := os.WriteFile(cacheFile, []byte(memCache[cacheKey]), 0666); err != nil { + panic(err) + } + +end: + // Turn the cache entry into an http.Response (or error) + var ret_resp *http.Response + var ret_err error + entry := memCache[cacheKey] + switch { + case strings.HasPrefix(string(entry), "HTTP/"): + var err error + ret_resp, err = http.ReadResponse(bufio.NewReader(strings.NewReader(string(entry))), nil) + if err != nil { + panic(fmt.Errorf("invalid cache entry: %v", err)) + } + if ModifyResponse != nil { + ret_resp = ModifyResponse(u, entry, ret_resp) + } + case strings.HasPrefix(string(entry), "CLIENT/"): + ret_err = errors.New(string(entry)[len("CLIENT/"):]) + default: + panic("invalid cache entry: invalid prefix") + } + + // Return. + if ret_err != nil { + fmt.Printf(" err\n") + } else { + fmt.Printf(" http %v\n", ret_resp.StatusCode) + } + return ret_resp, ret_err +} + +func Get(u string, hdr map[string]string) (string, error) { + if UserAgent == "" { + panic("main() must set the user agent string") + } + req, err := http.NewRequest(http.MethodGet, u, nil) + if err != nil { + panic(fmt.Errorf("should not happen: http.NewRequest: %v", err)) + } + req.Header.Set("User-Agent", UserAgent) + for k, v := range hdr { + req.Header.Add(k, v) + } + client := &http.Client{ + Transport: &transport{}, + CheckRedirect: CheckRedirect, + } + resp, err := client.Do(req) + if err != nil { + return "", err + } + if resp.StatusCode != http.StatusOK { + return "", &httpStatusError{StatusCode: resp.StatusCode, Status: resp.Status} + } + bs, err := io.ReadAll(resp.Body) + if err != nil { + panic(fmt.Errorf("should not happen: strings.Reader.Read: %v", err)) + } + return string(bs), nil +} + +func GetJSON(u string, hdr map[string]string, out any) error { + str, err := Get(u, hdr) + if err != nil { + return err + } + return json.Unmarshal([]byte(str), out) +} + +func GetPaginatedJSON[T any](uStr string, hdr map[string]string, out *[]T, pageFn func(i int) url.Values) error { + u, err := url.Parse(uStr) + if err != nil { + return err + } + query := u.Query() + + for i := 0; true; i++ { + pageParams := pageFn(i) + for k, v := range pageParams { + query[k] = v + } + + u.RawQuery = query.Encode() + var resp []T + if err := GetJSON(u.String(), hdr, &resp); err != nil { + return err + } + fmt.Printf(" -> %d records\n", len(resp)) + if len(resp) == 0 { + break + } + *out = append(*out, resp...) + } + + return nil +} diff --git a/lib/mailstuff/jwz.md b/lib/mailstuff/jwz.md new file mode 100644 index 0000000..91e03f5 --- /dev/null +++ b/lib/mailstuff/jwz.md @@ -0,0 +1,47 @@ +To: Jamie Zawinski <jwz@jwz.org> +Subject: message threading + +Hi, + +I'm implementing message threading, and have been referencing both +your document <https://www.jwz.org/doc/threading.html> and RFC 5256. +I'm not sure whether you're interested in updating a document that's +more than 25 years old, but if you are: I hope you find the following +feedback valuable. + +You write that the algorithm in RFC 5256 is merely a "restating" of +your algorithm, but I noticed 3 (minor) differences: + +1. In your step 1.C, the RFC says to check whether this would create a + loop, and if it would to skip creating the link; your version only + says to perform this check in step 1.B. + +2. The RFC says to sort the messages by date between your steps 4 and + 5; that is: when grouping by subject, containers in the root set + should be processed in date-order (you do not specify an order), + and that if container in the root set is empty then the subject + should be taken from the earliest-date child (you say to use an + arbitrary child). + +3. The RFC precisely states how to trim a subject down to a "base + subject," rather than simply saying "Strip ``Re:'', ``RE:'', + ``RE[5]:'', ``Re: Re[4]: Re:'' and so on." + +Additionally, there are two minor points on which I found their +version to be clearer: + +1. The RFC specifies how to handle messages without a Message-Id or + with a duplicate Message-Id (on page 9), as well as how to + normalize a Message-Id (by referring to RFC 2822). This is perhaps + out-of-scope of your algorithm document, but I feel that it would + be worth mentioning in your background or definitions section. + +2. In your step 1.B, I did not understand what "If they are already + linked, don't change the existing links" meant until I read the + RFC, which words it as "If a message already has a parent, don't + change the existing link." It was unclear to me what "they" was + referring to in your version. + +-- +Happy hacking, +~ Luke T. Shumaker diff --git a/lib/mailstuff/mbox.go b/lib/mailstuff/mbox.go new file mode 100644 index 0000000..8700c24 --- /dev/null +++ b/lib/mailstuff/mbox.go @@ -0,0 +1,38 @@ +package mailstuff + +import ( + "bytes" + "io" + "net/mail" +) + +func ReadMBox(r io.Reader) ([]*mail.Message, error) { + rest, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + const terminator = "\nFrom " + + var parts [][]byte + for { + pos := bytes.Index(rest, []byte(terminator)) + if pos < 0 { + parts = append(parts, rest) + break + } + parts = append(parts, rest[:pos+1]) + rest = rest[pos+1:] + } + + ret := make([]*mail.Message, len(parts)) + for i := range len(parts) { + msg, err := mail.ReadMessage(bytes.NewReader(parts[i])) + if err != nil { + return nil, err + } + ret[i] = msg + } + + return ret, nil +} diff --git a/lib/mailstuff/thread.go b/lib/mailstuff/thread.go new file mode 100644 index 0000000..2cdf9a4 --- /dev/null +++ b/lib/mailstuff/thread.go @@ -0,0 +1,114 @@ +package mailstuff + +import ( + "fmt" + "net/mail" + "regexp" + "strings" +) + +type Set[T comparable] map[T]struct{} + +func (s Set[T]) Insert(val T) { + s[val] = struct{}{} +} + +func mapHas[K comparable, V any](m map[K]V, k K) bool { + _, ok := m[k] + return ok +} + +func (s Set[T]) Has(val T) bool { + return mapHas(s, val) +} + +func (s Set[T]) PickOne() T { + for v := range s { + return v + } + var zero T + return zero +} + +type MessageID string + +type ThreadedMessage struct { + *mail.Message + Parent *ThreadedMessage + Children Set[*ThreadedMessage] +} + +var reReplyID = regexp.MustCompile("<[^> \t\r\n]+>") + +func rfc2822parse(msg *mail.Message) *jwzMessage { + // TODO: This is bad, and needs a real implementation. + ret := &jwzMessage{ + Subject: msg.Header.Get("Subject"), + ID: jwzID(msg.Header.Get("Message-ID")), + } + refIDs := strings.Fields(msg.Header.Get("References")) + strings.Fields(msg.Header.Get("References")) + if replyID := reReplyID.FindString(msg.Header.Get("In-Reply-To")); replyID != "" { + refIDs = append(refIDs, replyID) + } + ret.References = make([]jwzID, len(refIDs)) + for i := range refIDs { + ret.References[i] = jwzID(refIDs[i]) + } + return ret +} + +func ThreadMessages(msgs []*mail.Message) (Set[*ThreadedMessage], map[MessageID]*ThreadedMessage) { + jwzMsgs := make(map[jwzID]*jwzMessage, len(msgs)) + retMsgs := make(map[jwzID]*ThreadedMessage, len(msgs)) + bogusCnt := 0 + for _, msg := range msgs { + jwzMsg := rfc2822parse(msg) + + // RFC 5256: + // + // If a message does not contain a Message-ID header + // line, or the Message-ID header line does not + // contain a valid Message ID, then assign a unique + // Message ID to this message. + // + // If two or more messages have the same Message ID, + // then only use that Message ID in the first (lowest + // sequence number) message, and assign a unique + // Message ID to each of the subsequent messages with + // a duplicate of that Message ID. + for jwzMsg.ID == "" || mapHas(jwzMsgs, jwzMsg.ID) { + jwzMsg.ID = jwzID(fmt.Sprintf("bogus.%d", bogusCnt)) + bogusCnt++ + } + + jwzMsgs[jwzMsg.ID] = jwzMsg + retMsgs[jwzMsg.ID] = &ThreadedMessage{ + Message: msg, + } + } + + jwzThreads := jwzThreadMessages(jwzMsgs) + + var convertMessage func(*jwzContainer) *ThreadedMessage + convertMessage = func(in *jwzContainer) *ThreadedMessage { + var out *ThreadedMessage + if in.Message == nil { + out = new(ThreadedMessage) + } else { + out = retMsgs[in.Message.ID] + } + out.Children = make(Set[*ThreadedMessage], len(in.Children)) + for inChild := range in.Children { + outChild := convertMessage(inChild) + out.Children.Insert(outChild) + outChild.Parent = out + } + return out + } + retThreads := make(Set[*ThreadedMessage], len(jwzThreads)) + for inThread := range jwzThreads { + retThreads.Insert(convertMessage(inThread)) + } + return retThreads, retMsgs +} diff --git a/lib/mailstuff/thread_alg.go b/lib/mailstuff/thread_alg.go new file mode 100644 index 0000000..1b351e9 --- /dev/null +++ b/lib/mailstuff/thread_alg.go @@ -0,0 +1,226 @@ +package mailstuff + +import ( + "regexp" + "strings" +) + +// https://www.jwz.org/doc/threading.html + +// TODO: See ./jwz.md for RFC 5256 changes we might want to bring in. + +// Definitions ///////////////////////////////////////////////////////////////// + +type jwzContainer struct { + Message *jwzMessage + Parent *jwzContainer + Children Set[*jwzContainer] +} + +type jwzMessage struct { + Subject string + ID jwzID + References []jwzID +} + +type jwzID = MessageID //string + +func (ancestor *jwzContainer) IsAncestorOf(descendent *jwzContainer) bool { + if ancestor == descendent { + return true + } + for child := range ancestor.Children { + if child.IsAncestorOf(descendent) { + return true + } + } + return false +} + +// The Algorithm /////////////////////////////////////////////////////////////// + +var jwzSubjectRE = regexp.MustCompile(`^(?:\s*[Rr][Ee](?:\[[0-9]+\])?:)*`) + +func jwzThreadMessages(msgs map[jwzID]*jwzMessage) Set[*jwzContainer] { + idTable := make(map[jwzID]*jwzContainer, len(msgs)) + + // 1. For each message + for _, msg := range msgs { + // A. + msgContainer := idTable[msg.ID] + if msgContainer != nil && msgContainer.Message == nil { + msgContainer.Message = msg + } else { + msgContainer = &jwzContainer{ + Message: msg, + Children: make(Set[*jwzContainer]), + } + idTable[msg.ID] = msgContainer + } + // B. + for _, refID := range msg.References { + refContainer := idTable[refID] + if refContainer == nil { + refContainer = &jwzContainer{ + Children: make(Set[*jwzContainer]), + } + idTable[refID] = refContainer + } + } + for i := 0; i+1 < len(msg.References); i++ { + parent := idTable[msg.References[i]] + child := idTable[msg.References[i+1]] + if child.Parent == nil && !parent.IsAncestorOf(child) && !child.IsAncestorOf(parent) { + parent.Children.Insert(child) + child.Parent = parent + } + } + // C. + if len(msg.References) == 0 { + if msgContainer.Parent != nil { + delete(msgContainer.Parent.Children, msgContainer) + } + msgContainer.Parent = nil + } else { + msgContainer.Parent = idTable[msg.References[len(msg.References)-1]] + msgContainer.Parent.Children.Insert(msgContainer) + } + } + + // 2. Find the root Set + root := &jwzContainer{ + Children: make(Set[*jwzContainer]), + } + for _, container := range idTable { + if container.Parent == nil { + container.Parent = root + root.Children.Insert(container) + } + } + + // 3. Discard id_table + idTable = nil + + // 4. Prune empty containers + var recurse func(*jwzContainer) + recurse = func(container *jwzContainer) { + // Recurse. This is a touch complicated because + // `recurse(child)` might insert into + // `container.Children`, and those insertions might + // not be emitted by the range loop + for visited := make(Set[*jwzContainer]); ; { + beforeSize := len(visited) + for child := range container.Children { + if visited.Has(child) { + continue + } + recurse(child) + visited.Insert(child) + } + if len(visited) == beforeSize { + break + } + } + if container.Parent == nil { + return + } + // Main. + if container.Message == nil { + if len(container.Children) == 0 { // A. + delete(container.Parent.Children, container) + } else { // B. + if len(container.Children) == 1 || container.Parent != root { + for child := range container.Children { + container.Parent.Children.Insert(child) + child.Parent = container.Parent + } + delete(container.Parent.Children, container) + } + } + } + } + recurse(root) + + // 5. Group root Set by subject + // A. + subjectTable := make(map[string]*jwzContainer) + // B. + for this := range root.Children { + var subject string + if this.Message != nil { + subject = this.Message.Subject + } else { + subject = this.Children.PickOne().Message.Subject + } + prefix := jwzSubjectRE.FindString(subject) + subject = strings.TrimSpace(subject[len(prefix):]) + if subject == "" { + continue + } + if other := subjectTable[subject]; other == nil { + subjectTable[subject] = this + } else if other.Message == nil { + subjectTable[subject] = this + } else if jwzSubjectRE.MatchString(other.Message.Subject) && prefix == "" { + subjectTable[subject] = this + } + } + // C. + for this := range root.Children { + var subject string + if this.Message != nil { + subject = this.Message.Subject + } else { + subject = this.Children.PickOne().Message.Subject + } + prefix := jwzSubjectRE.FindString(subject) + subject = strings.TrimSpace(subject[len(prefix):]) + + other := subjectTable[subject] + if other == nil || other == this { + continue + } + + switch { + case this.Message == nil && other.Message == nil: + for child := range this.Children { + other.Children.Insert(child) + child.Parent = other + } + delete(root.Children, this) + case (this.Message == nil) != (other.Message == nil): + var empty, nonEmpty *jwzContainer + if this.Message == nil { + empty = this + nonEmpty = other + } else { + empty = other + nonEmpty = this + } + empty.Children.Insert(nonEmpty) + nonEmpty.Parent = empty + case other.Message != nil && !jwzSubjectRE.MatchString(other.Message.Subject) && prefix != "": + other.Children.Insert(this) + this.Parent = other + // skip the reverse of the above case--it happened implicitly + default: + newParent := &jwzContainer{ + Children: make(Set[*jwzContainer], 2), + } + newParent.Children.Insert(this) + this.Parent = newParent + newParent.Children.Insert(other) + other.Parent = newParent + subjectTable[subject] = newParent + root.Children.Insert(newParent) + delete(root.Children, this) + delete(root.Children, other) + } + } + + // 6. Now you're done threading + for child := range root.Children { + child.Parent = nil + } + return root.Children +} diff --git a/public/dump b/public/dump index 271a203..507d527 120000 --- a/public/dump +++ b/public/dump @@ -1 +1 @@ -/home/lukeshu/blog/dump/
\ No newline at end of file +/home/lukeshu/dump/
\ No newline at end of file diff --git a/public/imworkingon/imworkingon.scss b/public/imworkingon/imworkingon.scss index c5adece..5405fef 100644 --- a/public/imworkingon/imworkingon.scss +++ b/public/imworkingon/imworkingon.scss @@ -7,7 +7,7 @@ article { border: solid 1px #333333; - border-radius: 1em; + border-radius: 8px; margin: 0.5em; } diff --git a/public/index.html b/public/index.html index 28d5477..35e75bc 100644 --- a/public/index.html +++ b/public/index.html @@ -117,7 +117,7 @@ <li>Social web:<ul> - <li>I am <a rel="me" href="https://fosstodon.org/@lukeshu">@lukeshu@fosstodon.org</a> on Mastodon.</li> + <li>I am <s><a href="https://fosstodon.org/@lukeshu">@lukeshu@fosstodon.org</a></s><a rel="me" href="https://social.coop/@lukeshu">@lukeshu@social.coop</a> on Mastodon.</li> <li>I am <a href="https://news.ycombinator.com/user?id=LukeShu">LukeShu</a> on Hacker News.</li> diff --git a/public/resume b/public/resume index 1201e40..a3e6b53 120000 --- a/public/resume +++ b/public/resume @@ -1 +1 @@ -/home/lukeshu/blog/resume/
\ No newline at end of file +/home/lukeshu/resume/
\ No newline at end of file diff --git a/public/sponsor/index.html b/public/sponsor/index.html index 339794c..04f565b 100644 --- a/public/sponsor/index.html +++ b/public/sponsor/index.html @@ -59,7 +59,7 @@ <li><a class="donate-btn kofi" href="https://ko-fi.com/lukeshu"> <img src="kofi-icon.png" alt="Ko-fi icon" /> - Ko-fi<!-- (0% fee, requires non-free JS) --> + Ko-fi<!-- (0% fee for one-offs, 5% fee for recurring; requires non-free JS) --> </a></li> <li><a class="donate-btn patreon" href="https://patreon.com/lukeshu"> <img src="patreon-icon.svg" |