diff options
Diffstat (limited to 'cmd/gen-posix')
-rw-r--r-- | cmd/gen-posix/data.go | 211 | ||||
-rw-r--r-- | cmd/gen-posix/http_hacks.go | 156 | ||||
-rw-r--r-- | cmd/gen-posix/main.go | 214 | ||||
-rw-r--r-- | cmd/gen-posix/types.go | 26 |
4 files changed, 607 insertions, 0 deletions
diff --git a/cmd/gen-posix/data.go b/cmd/gen-posix/data.go new file mode 100644 index 0000000..165ecbd --- /dev/null +++ b/cmd/gen-posix/data.go @@ -0,0 +1,211 @@ +package main + +import ( + "fmt" + "os" + "os/exec" + "regexp" + "strings" + + "git.lukeshu.com/www/lib/httpcache" +) + +var IEEESA = Vendor{ + Name: "IEEE-SA", + GetURL: func(id string) string { return fmt.Sprintf("http://standards.ieee.org/findstds/standard/%s.html", id) }, + GetName: func(id string, url string) string { + html, err := httpcache.Get(url, nil) + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + cmd := exec.Command("nokogiri", "-e", `puts $_.css("meta[name=\"des\"], meta[name=\"designation\"]").first["content"]`) + cmd.Stderr = os.Stderr + cmd.Stdin = strings.NewReader(html) + d, err := cmd.Output() + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + return strings.TrimSuffix(string(d), "\n") + }, +} + +var reIEEE = regexp.MustCompile(`standardNumber":"([^"]*)"`) + +var IEEEXplore = Vendor{ + Name: "IEEE Xplore", + GetURL: func(id string) string { return fmt.Sprintf("http://ieeexplore.ieee.org/servlet/opac?punumber=%s", id) }, + GetName: func(id string, url string) string { + if strings.HasSuffix(url, "ERROR") { + return "ERROR" + } + html, err := httpcache.Get(url, nil) + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + m := reIEEE.FindStringSubmatch(html) + if m == nil { + panic(fmt.Errorf("URL=%q did not contain expected JSON", url)) + } + return m[1] + }, +} + +var TOG = Vendor{ + Name: "The Open Group", + GetURL: func(id string) string { return fmt.Sprintf("https://www2.opengroup.org/ogsys/catalog/%s", id) }, + GetName: func(id string, url string) string { return id }, +} + +var TOGOnline = Vendor{ + Name: "online", + GetURL: func(id string) string { return fmt.Sprintf("http://pubs.opengroup.org/onlinepubs/%s/", id) }, + GetName: func(id string, url string) string { return url }, +} + +var ISO = Vendor{ + Name: "ISO", + GetURL: func(id string) string { + return fmt.Sprintf("http://www.iso.org/iso/home/store/catalogue_tc/catalogue_detail.htm?csnumber=%s", id) + }, + GetName: func(id string, url string) string { + html, err := httpcache.Get(url, nil) + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + cmd := exec.Command("nokogiri", "-e", `puts $_.css("[itemprop=\"name\"]").first.text`) + cmd.Stderr = os.Stderr + cmd.Stdin = strings.NewReader(html) + d, err := cmd.Output() + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + return strings.TrimSuffix(string(d), "\n") + }, +} + +var Vendors = []Vendor{IEEESA, TOG, ISO} + +var Editions = []Edition{ + {Name: "POSIX-2001 (Issue 6)", Docs: []Document{ + {Vendor: IEEESA, Type: Full, ID: "1003.1-2001", Resellers: []Document{ + {Vendor: IEEEXplore, Type: Full, ID: "7683"}, + }}, + }}, + {Name: "----->XBD-2001", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C950"}, + {Vendor: ISO, Type: Full, ID: "37312"}, + }}, + {Name: "----->XSH-2001", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C951"}, + {Vendor: ISO, Type: Full, ID: "37313"}, + }}, + {Name: "----->XCU-2001", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C952"}, + {Vendor: ISO, Type: Full, ID: "37314"}, + }}, + {Name: "----->XRAT-2001", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C953"}, + {Vendor: ISO, Type: Full, ID: "37315"}, + }}, + + {Name: "POSIX-2001, 2002 Edition", Docs: []Document{ + {Vendor: IEEESA, Type: Patch, ID: "1003.1-2001-Cor_1-2002", Resellers: []Document{ + {Vendor: IEEEXplore, Type: Patch, ID: "9507"}, + }}, + {Vendor: TOG, Type: Patch, ID: "U057", Resellers: []Document{ + {Vendor: TOG, Type: Full, ID: "T031"}, + }}, + }}, + {Name: "----->XBD-2001, 2002 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C031"}, + {Vendor: ISO, Type: Full, ID: "38789", Resellers: []Document{ + {Vendor: IEEESA, Type: Full, ID: "9945-1-2003"}, + }}, + }}, + {Name: "----->XSH-2001, 2002 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C032"}, + {Vendor: ISO, Type: Full, ID: "38790", Resellers: []Document{ + {Vendor: IEEESA, Type: Full, ID: "9945-2-2003"}, + }}, + }}, + {Name: "----->XCU-2001, 2002 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C033"}, + {Vendor: ISO, Type: Full, ID: "38791", Resellers: []Document{ + {Vendor: IEEESA, Type: Full, ID: "9945-3-2003"}, + }}, + }}, + {Name: "----->XRAT-2001, 2002 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C034"}, + {Vendor: ISO, Type: Full, ID: "38792", Resellers: []Document{ + {Vendor: IEEESA, Type: Full, ID: "9945-4-2003"}, + }}, + }}, + + {Name: "POSIX-2001, 2004 Edition", Docs: []Document{ + {Vendor: IEEESA, Type: Patch, ID: "1003.1-2001-Cor_2-2004", Resellers: []Document{ + {Vendor: IEEEXplore, Type: Patch, ID: "9022"}, + {Vendor: IEEEXplore, Type: Full, ID: "9156"}, + }}, + {Vendor: TOG, Type: Patch, ID: "U059", Resellers: []Document{ + {Vendor: TOG, Type: Full, ID: "T041"}, + {Vendor: TOGOnline, Type: Full, ID: "009695399"}, + }}, + }}, + {Name: "----->XBD-2001, 2004 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C046"}, + {Vendor: ISO, Type: Patch, ID: "40687"}, + }}, + {Name: "----->XSH-2001, 2004 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C047"}, + {Vendor: ISO, Type: Patch, ID: "40688"}, + }}, + {Name: "----->XCU-2001, 2004 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C048"}, + {Vendor: ISO, Type: Patch, ID: "40690"}, + }}, + {Name: "----->XRAT-2001, 2004 Edition", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C049"}, + {Vendor: ISO, Type: Patch, ID: "40691"}, + }}, + + {Name: "POSIX-2008 (Issue 7)", Docs: []Document{ + {Vendor: TOG, Type: Full, ID: "C082", Resellers: []Document{ + {Vendor: TOGOnline, Type: Full, ID: "9699919799.2008edition"}, + }}, + + {Vendor: IEEESA, Type: Full, ID: "1003.1-2008", Resellers: []Document{ + {Vendor: IEEEXplore, Type: Full, ID: "4694974"}, + }}, + + {Vendor: ISO, Type: Full, ID: "50516", Resellers: []Document{ + {Vendor: IEEESA, Type: Full, ID: "9945-2009"}, + {Vendor: IEEEXplore, Type: Full, ID: "5393777"}, + }}, + }}, + {Name: "POSIX-2008, 2013 Edition", Docs: []Document{ + {Vendor: TOG, Type: Patch, ID: "U130", Resellers: []Document{ + {Vendor: TOG, Type: Full, ID: "C138"}, + {Vendor: TOGOnline, Type: Full, ID: "9699919799.2013edition"}, + }}, + + {Vendor: IEEESA, Type: Patch, ID: "1003.1-2008-Cor_1-2013", Resellers: []Document{ + {Vendor: IEEEXplore, Type: Patch, ID: "6482152"}, + {Vendor: IEEEXplore, Type: Full, ID: "6506089"}, + }}, + + {Vendor: ISO, Type: Patch, ID: "62005"}, + }}, + {Name: "POSIX-2008, 2016 Edition", Docs: []Document{ + {Vendor: TOG, Type: Patch, ID: "U160", Resellers: []Document{ + {Vendor: TOG, Type: Full, ID: "C165"}, + {Vendor: TOGOnline, Type: Full, ID: "9699919799.2016edition"}, + }}, + + {Vendor: IEEESA, Type: Patch, ID: "1003.1-2008-Cor_2-2016", Resellers: []Document{ + {Vendor: IEEEXplore, Type: Patch, ID: "7542096"}, + {Vendor: IEEEXplore, Type: Full, ID: "7582336"}, + }}, + }}, +} + +// SUSv2 http://pubs.opengroup.org/onlinepubs/007908799/ diff --git a/cmd/gen-posix/http_hacks.go b/cmd/gen-posix/http_hacks.go new file mode 100644 index 0000000..16b8a8d --- /dev/null +++ b/cmd/gen-posix/http_hacks.go @@ -0,0 +1,156 @@ +package main + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "strings" + + "git.lukeshu.com/www/lib/httpcache" +) + +func _checkURL(url string) (string, error) { + switch { + case strings.HasPrefix(url, "https://web.archive.org/"): + _, err := httpcache.Get(url, nil) + return url, err + case strings.HasPrefix(url, "https://www2.opengroup.org/ogsys/catalog/"): + _, err := httpcache.Get(url, nil) + if err == nil { + return url, nil + } + if !errors.Is(err, os.ErrNotExist) { // don't hide non-404 errors + return "", err + } + suffix := strings.TrimPrefix(url, "https://www2.opengroup.org/ogsys/catalog/") + url2 := "https://publications.opengroup.org/" + strings.ToLower(suffix) + _, err = httpcache.Get(url2, nil) + if err == nil { + return url2, nil + } + if !errors.Is(err, os.ErrNotExist) { // don't hide non-404 errors + return "", err + } + url3, err := _checkURL("https://web.archive.org/web/20170102/" + url) + if err == nil { + return url3, nil + } + return url+"#ERROR", nil + case url == "http://ieeexplore.ieee.org/servlet/opac?punumber=7394900": + return url+"#ERROR", nil + default: + _, err := httpcache.Get(url, nil) + if err != nil && errors.Is(err, os.ErrNotExist) { + return _checkURL("https://web.archive.org/web/20170102/" + url) + } + return url, err + } +} + +func checkURL(url string) string { + url2, err := _checkURL(url) + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + return url2 +} + +func nokogiriIgnoreFailure(htmlBytes []byte, expr string) string { + cmd := exec.Command("nokogiri", "-e", "puts "+expr) + cmd.Stderr = io.Discard + cmd.Stdin = bytes.NewReader(htmlBytes) + outBytes, _ := cmd.Output() + return strings.TrimSpace(string(outBytes)) +} + +func mockRedirect(url string) *http.Response { + resp, err := http.ReadResponse(bufio.NewReader(strings.NewReader(""+ + "HTTP/1.1 302 Found\r\n"+ + "Location: "+url+"\r\n"+ + "\r\n")), nil) + if err != nil { + panic(err) + } + return resp +} + +func mockForbidden() *http.Response { + resp, err := http.ReadResponse(bufio.NewReader(strings.NewReader(""+ + "HTTP/1.1 403 Forbidden\r\n"+ + "\r\n")), nil) + if err != nil { + panic(err) + } + return resp +} + +func modifyResponse(url string, entry httpcache.CacheEntry, resp *http.Response) *http.Response { + switch { + case strings.HasPrefix(url, "https://web.archive.org/"): + htmlBytes, _ := io.ReadAll(resp.Body) + _ = resp.Body.Close() + + // native Wayback Machine redirect + redirect := nokogiriIgnoreFailure(htmlBytes, `$_.css("p.impatient a").first["href"]`) + if strings.HasPrefix(redirect, "https://web.archive.org/web/") { + return mockRedirect(redirect) + } + + // silly TOG SSO + if strings.Contains(url, "sso.opengroup.org") { + if bytes.Contains(htmlBytes, []byte("document.forms.postbinding.submit()")) { + redirect := nokogiriIgnoreFailure(htmlBytes, `$_.css("#postbinding").first["action"]`) + if redirect != "" { + return mockRedirect(redirect) + } + } + if bytes.Contains(htmlBytes, []byte("General Authorization Error")) { + return mockForbidden() + } + } + + // We drained resp.Body, so re-create it. + resp, err := http.ReadResponse(bufio.NewReader(strings.NewReader(string(entry))), nil) + if err != nil { + panic(err) + } + return resp + default: + return resp + } +} + +type mock404 struct { + Msg string +} + +// Is implements the interface for [errors.Is]. +func (e *mock404) Is(target error) bool { + return target == os.ErrNotExist +} + +// Error implements [error]. +func (e *mock404) Error() string { + return e.Msg +} + +func checkRedirect(req *http.Request, via []*http.Request) error { + // net/http.defaultCheckRedirect + if len(via) >= 10 { + return errors.New("stopped after 10 redirects") + } + + // detect redirects that should be 404s + oldURL := via[len(via)-1].URL + newURL := req.URL + if (newURL.Path == "/" || newURL.Path == "") && !(oldURL.Path == "/" || oldURL.Path == "") { + return &mock404{Msg: fmt.Sprintf("should have been a 404: %q redirected to %q", oldURL.String(), newURL.String())} + } + + return nil +} diff --git a/cmd/gen-posix/main.go b/cmd/gen-posix/main.go new file mode 100644 index 0000000..6da598b --- /dev/null +++ b/cmd/gen-posix/main.go @@ -0,0 +1,214 @@ +package main + +import ( + "bytes" + "fmt" + "html/template" + "os" + + "git.lukeshu.com/www/lib/httpcache" +) + +var urls = map[string]string{} +var names = map[string]string{} + +func (doc Document) URL() string { + if doc.ID == "" { + return "" + } + key := doc.Vendor.Name + "\000" + doc.ID + if _, have := urls[key]; !have { + urls[key] = checkURL(doc.Vendor.GetURL(doc.ID)) + } + return urls[key] +} + +func (doc Document) Name() string { + if doc.ID == "" { + var names []string + for _, full := range doc.Fulls() { + names = append(names, full.Name()) + } + switch len(names) { + case 0: + return "???" + case 1: + return names[0] + default: + // BUG(lukeshu): Handle un-IDed Documents with + // multiple IDed resellers. + panic("TODO") + } + } + key := doc.Vendor.Name + "\000" + doc.ID + if _, have := names[key]; !have { + names[key] = doc.Vendor.GetName(doc.ID, doc.URL()) + } + return names[key] +} + +func (doc Document) Fulls() []Document { + var ret []Document + for _, reseller := range doc.Resellers { + if doc.ID != "" && reseller.Vendor.Name == doc.Vendor.Name && reseller.ID == doc.ID { + continue + } + if reseller.Type == Full { + ret = append(ret, reseller) + } + } + return ret +} + +func (doc Document) Patches() []Document { + var ret []Document + for _, reseller := range doc.Resellers { + if doc.ID != "" && reseller.Vendor.Name == doc.Vendor.Name && reseller.ID == doc.ID { + continue + } + if reseller.Type == Patch { + ret = append(ret, reseller) + } + } + return ret +} + +func (doc Document) AsFull() *Document { + if doc.Vendor.Name == "" && doc.ID == "" { + return nil + } + ret := doc + ret.Resellers = ret.Fulls() + switch doc.Type { + case Full: + // Nothing to do + case Patch: + if len(ret.Resellers) == 0 { + return nil + } + ret.Type = Full + ret.ID = func() string { + var ids []string + for _, reseller := range ret.Resellers { + if reseller.Vendor.Name == doc.Vendor.Name { + ids = append(ids, reseller.ID) + } + } + switch len(ids) { + case 0: + return "" + case 1: + return ids[0] + default: + panic("wut") + } + }() + if ret.ID != "" { + ret.Resellers = ret.Fulls() + } + default: + panic("uhh") + } + return &ret +} + +func (doc Document) AsPatch() *Document { + if doc.Vendor.Name == "" && doc.ID == "" { + return nil + } + ret := doc + switch doc.Type { + case Full: + return nil + case Patch: + ret.Resellers = doc.Patches() + default: + panic("no") + } + return &ret +} + +func (ed Edition) DocsOrdered() []Document { + // This chould be O(n), but this niaeve implementation is + // O(n^2). It's OK, n is small. + s := make([]Document, len(Vendors)) + for i, vnd := range Vendors { + for _, doc := range ed.Docs { + if doc.Vendor.Name == vnd.Name { + s[i] = doc + } + } + } + return s +} + +var tmpl = `{{define "document"}}{{if .}} + {{if .URL}}<a href="{{.URL}}" title="{{.Name}}">{{.Name}}</a>{{else}}{{.Name}}{{end}} + {{range .Resellers}} + <a href="{{.URL}}" title="{{.Name}}">({{.Vendor.Name}})</a> + {{end}} +{{end}}{{end}} +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>POSIX Editions</title> + <style> + body { font-size: 8px; } + table { border-collapse: collapse; } + th, td { border: solid 1px black; } + tr:not(:first-child):not(:nth-child(2)) th { + font-family: monospace; + text-align: left; + } + </style> + </head> + <body> + <p>There's a typo in the "standardNumber" in IEEE + Xplore's records forfor the 2004 edition of 1003.1; + it says 2014 instead or 2004. The actual document + says 2004 though.</p> + <table> + <caption><p>POSIX: C & Shell (1997-present)</p></caption> + <tr><td rowspan=2></td>{{range .Vendors}}<th colspan=2>{{.Name}}</th>{{end}}</tr> + <tr>{{range .Vendors}}<th>Full</th><th>Patch</th>{{end}}</tr> + {{range .Editions}}<tr> + <th>{{.Name}}</th> + {{range .DocsOrdered}} + <td>{{template "document" .AsFull}}</td><td>{{template "document" .AsPatch}}</td> + {{end}} + </tr>{{end}} + </table> + </body> +</html> +` + +func mainWithError() error { + httpcache.UserAgent = "https://git.lukeshu.com/www/tree/cmd/gen-posix" + httpcache.ModifyResponse = modifyResponse + httpcache.CheckRedirect = checkRedirect + + tmpl := template.Must(template.New("page").Parse(tmpl)) + + var out bytes.Buffer + if err := tmpl.Execute(&out, map[string]interface{}{ + "Vendors": Vendors, + "Editions": Editions, + }); err != nil { + return err + } + if err := os.WriteFile("public/posix/index.new.html", out.Bytes(), 0666); err != nil { + return err + } + if err := os.Rename("public/posix/index.new.html", "public/posix/index.html"); err != nil { + return err + } + return nil +} + +func main() { + if err := mainWithError(); err != nil { + fmt.Fprintf(os.Stderr, "%s: error: %v\n", os.Args[0], err) + os.Exit(1) + } +} diff --git a/cmd/gen-posix/types.go b/cmd/gen-posix/types.go new file mode 100644 index 0000000..9bb4c2d --- /dev/null +++ b/cmd/gen-posix/types.go @@ -0,0 +1,26 @@ +package main + +type Vendor struct { + Name string + GetURL func(id string) string + GetName func(id string, url string) string +} + +type Type int + +const ( + Full Type = 0 + Patch Type = 1 +) + +type Edition struct { + Name string + Docs []Document +} + +type Document struct { + Vendor Vendor + Type Type + ID string + Resellers []Document +} |