summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--index.atom.erb4
-rw-r--r--page.html.erb7
-rwxr-xr-xpost-commit2
-rw-r--r--public/arch-systemd.html83
-rw-r--r--public/assets/style.css16
-rw-r--r--public/bash-arrays.html409
-rw-r--r--public/bash-redirection.html60
-rw-r--r--public/btrfs-rec.html1176
-rw-r--r--public/btrfs-rec.md1272
-rw-r--r--public/build-bash-1.html109
-rw-r--r--public/crt-sh-architecture.html86
-rw-r--r--public/emacs-as-an-os.html55
-rw-r--r--public/emacs-shells.html86
-rw-r--r--public/fd_printf.html61
-rw-r--r--public/fs-licensing-explanation.html79
-rw-r--r--public/git-go-pre-commit.html70
-rw-r--r--public/http-notes.html131
-rw-r--r--public/index.atom4036
-rw-r--r--public/index.html96
-rw-r--r--public/index.md39
-rw-r--r--public/java-segfault-redux.html218
-rw-r--r--public/java-segfault.html120
-rw-r--r--public/kbd-xmodmap.html240
-rw-r--r--public/lp2015-videos.html38
-rw-r--r--public/make-memoize.html93
-rw-r--r--public/nginx-mediawiki.html87
-rw-r--r--public/pacman-overview.html62
-rw-r--r--public/poor-system-documentation.html57
-rw-r--r--public/posix-pricing.html48
-rw-r--r--public/purdue-cs-login.html195
-rw-r--r--public/rails-improvements.html103
-rw-r--r--public/ryf-routers.html54
-rw-r--r--public/term-colors.html56
-rw-r--r--public/what-im-working-on-fall-2014.html157
-rw-r--r--public/x11-systemd.html387
-rw-r--r--util.rb12
-rwxr-xr-xwrite-atomic2
-rwxr-xr-xwrite-ifchanged2
39 files changed, 9796 insertions, 14 deletions
diff --git a/.gitignore b/.gitignore
index 2d65ef7..6db2345 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,2 @@
-/public/*.html
-/public/index.*
/.var*
.tmp*
diff --git a/index.atom.erb b/index.atom.erb
index a5e1586..ca47602 100644
--- a/index.atom.erb
+++ b/index.atom.erb
@@ -1,12 +1,12 @@
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
- <title>Luke Shumaker's Web Log</title>
+ <title>Luke T. Shumaker's Web Log</title>
<link rel="self" type="application/atom+xml" href="./index.atom"/>
<link rel="alternate" type="text/html" href="./"/>
<link rel="alternate" type="text/markdown" href="./index.md"/>
<updated><%= @pages.map{|p|p.date}.sort.last.rfc3339 %></updated>
- <author><%= Person.new("Luke Shumaker").atom %></author>
+ <author><%= Person.new("Luke T. Shumaker").atom %></author>
<id>https://lukeshu.com/blog/</id>
<% @pages.sort_by{|p| p.date}.reverse.each do |page| %>
diff --git a/page.html.erb b/page.html.erb
index 75c576f..93007de 100644
--- a/page.html.erb
+++ b/page.html.erb
@@ -2,7 +2,8 @@
<html lang="en">
<head>
<meta charset="utf-8">
- <title><%= @page.title %> — Luke Shumaker</title>
+ <title><%= @page.title %> — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="assets/style.css">
<link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
</head>
@@ -12,6 +13,10 @@
<%= @page.content %>
</article>
<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
<%= @page.rights %>
</footer>
</body>
diff --git a/post-commit b/post-commit
index b8f816a..6aa2e4a 100755
--- a/post-commit
+++ b/post-commit
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-# Copyright 2016-2017 Luke Shumaker
+# Copyright 2016-2017 Luke T. Shumaker
set -e
branch=$(git name-rev --name-only HEAD)
diff --git a/public/arch-systemd.html b/public/arch-systemd.html
new file mode 100644
index 0000000..6fa0b42
--- /dev/null
+++ b/public/arch-systemd.html
@@ -0,0 +1,83 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>What Arch Linux's switch to systemd means for users — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » arch-systemd</header>
+<article>
+<h1 id="what-arch-linuxs-switch-to-systemd-means-for-users">What Arch
+Linux’s switch to systemd means for users</h1>
+<p>This is based on a post on <a
+href="http://www.reddit.com/r/archlinux/comments/zoffo/systemd_we_will_keep_making_it_the_distro_we_like/c66nrcb">reddit</a>,
+published on 2012-09-11.</p>
+<p>systemd is a replacement for UNIX System V-style init; instead of
+having <code>/etc/init.d/*</code> or <code>/etc/rc.d/*</code> scripts,
+systemd runs in the background to manage them.</p>
+<p>This has the <strong>advantages</strong> that there is proper
+dependency tracking, easing the life of the administrator and allowing
+for things to be run in parallel safely. It also uses “targets” instead
+of “init levels”, which just makes more sense. It also means that a
+target can be started or stopped on the fly, such as mounting or
+unmounting a drive, which has in the past only been done at boot up and
+shut down.</p>
+<p>The <strong>downside</strong> is that it is (allegedly) big,
+bloated<a href="#fn1" class="footnote-ref" id="fnref1"
+role="doc-noteref"><sup>1</sup></a>, and does (arguably) more than it
+should. Why is there a dedicated systemd-fsck? Why does systemd
+encapsulate the functionality of syslog? That, and it means somebody is
+standing on my lawn.</p>
+<p>The <strong>changes</strong> an Arch user needs to worry about is
+that everything is being moved out of <code>/etc/rc.conf</code>. Arch
+users will still have the choice between systemd and SysV-init, but
+rc.conf is becoming the SysV-init configuration file, rather than the
+general system configuration file. If you will still be using SysV-init,
+basically the only thing in rc.conf will be <code>DAEMONS</code>.<a
+href="#fn2" class="footnote-ref" id="fnref2"
+role="doc-noteref"><sup>2</sup></a> For now there is compatibility for
+the variables that used to be there, but that is going away.</p>
+<aside id="footnotes" class="footnotes footnotes-end-of-document"
+role="doc-endnotes">
+<hr />
+<ol>
+<li id="fn1"><p><em>I</em> don’t think it’s bloated, but that is the
+criticism. Basically, I discount any argument that uses “bloated”
+without backing it up. I was trying to say that it takes a lot of heat
+for being bloated, and that there is be some truth to that (the
+systemd-fsck and syslog comments), but that these claims are largely
+unsubstantiated, and more along the lines of “I would have done it
+differently”. Maybe your ideas are better, but you haven’t written the
+code.</p>
+<p>I personally don’t have an opinion either way about SysV-init vs
+systemd. I recently migrated my boxes to systemd, but that was because
+the SysV init scripts for NFSv4 in Arch are problematic. I suppose this
+is another <strong>advantage</strong> I missed: <em>people generally
+consider systemd “units” to be more robust and easier to write than SysV
+“scripts”.</em></p>
+<p>I’m actually not a fan of either. If I had more time on my hands, I’d
+be running a <code>make</code>-based init system based on a research
+project IBM did a while ago. So I consider myself fairly objective; my
+horse isn’t in this race.<a href="#fnref1" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
+<li id="fn2"><p>You can still have <code>USEDMRAID</code>,
+<code>USELVM</code>, <code>interface</code>, <code>address</code>,
+<code>netmask</code>, and <code>gateway</code>. But those are minor.<a
+href="#fnref2" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+</ol>
+</aside>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2012 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/assets/style.css b/public/assets/style.css
index e653c21..1033f57 100644
--- a/public/assets/style.css
+++ b/public/assets/style.css
@@ -147,3 +147,19 @@ a {
a:hover, a:focus {
text-decoration: underline;
}
+
+a.em, a.em:visited {
+ color: #0000ee;
+ font-weight: bold;
+}
+
+/* beg banner */
+
+aside.sponsor {
+ margin: 0 auto;
+ width: 80%;
+ font-size: 142%;
+ padding: 1em 1.5em;
+ background: #DDDDFF;
+ font-weight: bold;
+}
diff --git a/public/bash-arrays.html b/public/bash-arrays.html
new file mode 100644
index 0000000..4bcaf3c
--- /dev/null
+++ b/public/bash-arrays.html
@@ -0,0 +1,409 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>Bash arrays — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » bash-arrays</header>
+<article>
+<h1 id="bash-arrays">Bash arrays</h1>
+<p>Way too many people don’t understand Bash arrays. Many of them argue
+that if you need arrays, you shouldn’t be using Bash. If we reject the
+notion that one should never use Bash for scripting, then thinking you
+don’t need Bash arrays is what I like to call “wrong”. I don’t even mean
+real scripting; even these little stubs in <code>/usr/bin</code>:</p>
+<pre><code>#!/bin/sh
+java -jar /…/something.jar $* # WRONG!</code></pre>
+<p>Command line arguments are exposed as an array, that little
+<code>$*</code> is accessing it, and is doing the wrong thing (for the
+lazy, the correct thing is <code>-- "$@"</code>). Arrays in Bash offer a
+safe way preserve field separation.</p>
+<p>One of the main sources of bugs (and security holes) in shell scripts
+is field separation. That’s what arrays are about.</p>
+<h2 id="what-field-separation">What? Field separation?</h2>
+<p>Field separation is just splitting a larger unit into a list of
+“fields”. The most common case is when Bash splits a “simple command”
+(in the Bash manual’s terminology) into a list of arguments.
+Understanding how this works is an important prerequisite to
+understanding arrays, and even why they are important.</p>
+<p>Dealing with lists is something that is very common in Bash scripts;
+from dealing with lists of arguments, to lists of files; they pop up a
+lot, and each time, you need to think about how the list is separated.
+In the case of <code>$PATH</code>, the list is separated by colons. In
+the case of <code>$CFLAGS</code>, the list is separated by whitespace.
+In the case of actual arrays, it’s easy, there’s no special character to
+worry about, just quote it, and you’re good to go.</p>
+<h2 id="bash-word-splitting">Bash word splitting</h2>
+<p>When Bash reads a “simple command”, it splits the whole thing into a
+list of “words”. “The first word specifies the command to be executed,
+and is passed as argument zero. The remaining words are passed as
+arguments to the invoked command.” (to quote <code>bash(1)</code>)</p>
+<p>It is often hard for those unfamiliar with Bash to understand when
+something is multiple words, and when it is a single word that just
+contains a space or newline. To help gain an intuitive understanding, I
+recommend using the following command to print a bullet list of words,
+to see how Bash splits them up:</p>
+<pre><code>printf ' -> %s\n' <var>words…</var><hr> -&gt; word one
+ -&gt; multiline
+word
+ -&gt; third word
+</code></pre>
+<p>In a simple command, in absence of quoting, Bash separates the “raw”
+input into words by splitting on spaces and tabs. In other places, such
+as when expanding a variable, it uses the same process, but splits on
+the characters in the <code>$IFS</code> variable (which has the default
+value of space/tab/newline). This process is, creatively enough, called
+“word splitting”.</p>
+<p>In most discussions of Bash arrays, one of the frequent criticisms is
+all the footnotes and “gotchas” about when to quote things. That’s
+because they usually don’t set the context of word splitting.
+<strong>Double quotes (<code>"</code>) inhibit Bash from doing word
+splitting.</strong> That’s it, that’s all they do. Arrays are already
+split into words; without wrapping them in double quotes Bash re-word
+splits them, which is almost <em>never</em> what you want; otherwise,
+you wouldn’t be working with an array.</p>
+<h2 id="normal-array-syntax">Normal array syntax</h2>
+<table>
+ <caption>
+ <h1>Setting an array</h1>
+ <p><var>words…</var> is expanded and subject to word splitting
+ based on <code>$IFS</code>.</p>
+ </caption>
+ <tbody>
+ <tr>
+ <td><code>array=(<var>words…</var>)</code></td>
+ <td>Set the contents of the entire array.</td>
+ </tr><tr>
+ <td><code>array+=(<var>words…</var>)</code></td>
+ <td>Appends <var>words…</var> to the end of the array.</td>
+ </tr><tr>
+ <td><code>array[<var>n</var>]=<var>word</var></code></td>
+ <td>Sets an individual entry in the array, the first entry is at
+ <var>n</var>=0.</td>
+ </tr>
+ </tbody>
+</table>
+<p>Now, for accessing the array. The most important things to
+understanding arrays is to quote them, and understanding the difference
+between <code>@</code> and <code>*</code>.</p>
+<table>
+ <caption>
+ <h1>Getting an entire array</h1>
+ <p>Unless these are wrapped in double quotes, they are subject to
+ word splitting, which defeats the purpose of arrays.</p>
+ <p>I guess it's worth mentioning that if you don't quote them, and
+ word splitting is applied, <code>@</code> and <code>*</code>
+ end up being equivalent.</p>
+ <p>With <code>*</code>, when joining the elements into a single
+ string, the elements are separated by the first character in
+ <code>$IFS</code>, which is, by default, a space.</p>
+ </caption>
+ <tbody>
+ <tr>
+ <td><code>"${array[@]}"</code></td>
+ <td>Evaluates to every element of the array, as a separate
+ words.</td>
+ </tr><tr>
+ <td><code>"${array[*]}"</code></td>
+ <td>Evaluates to every element of the array, as a single
+ word.</td>
+ </tr>
+ </tbody>
+</table>
+<p>It’s really that simple—that covers most usages of arrays, and most
+of the mistakes made with them.</p>
+<p>To help you understand the difference between <code>@</code> and
+<code>*</code>, here is a sample of each:</p>
+<table>
+ <tbody>
+ <tr><th><code>@</code></th><th><code>*</code></th></tr>
+ <tr>
+ <td>Input:<pre><code>#!/bin/bash
+array=(foo bar baz)
+for item in "${array[@]}"; do
+ echo " - &lt;${item}&gt;"
+done</code></pre></td>
+ <td>Input:<pre><code>#!/bin/bash
+array=(foo bar baz)
+for item in "${array[*]}"; do
+ echo " - &lt;${item}&gt;"
+done</code></pre></td>
+ </tr>
+ <tr>
+ <td>Output:<pre><code> - &lt;foo&gt;
+ - &lt;bar&gt;
+ - &lt;baz&gt;</code></pre></td>
+ <td>Output:<pre><code> - &lt;foo bar baz&gt;<br><br><br></code></pre></td>
+ </tr>
+ </tbody>
+</table>
+<p>In most cases, <code>@</code> is what you want, but <code>*</code>
+comes up often enough too.</p>
+<p>To get individual entries, the syntax is
+<code>${array[<var>n</var>]}</code>, where <var>n</var> starts at 0.</p>
+<table>
+ <caption>
+ <h1>Getting a single entry from an array</h1>
+ <p>Also subject to word splitting if you don't wrap it in
+ quotes.</p>
+ </caption>
+ <tbody>
+ <tr>
+ <td><code>"${array[<var>n</var>]}"</code></td>
+ <td>Evaluates to the <var>n</var><sup>th</sup> entry of the
+ array, where the first entry is at <var>n</var>=0.</td>
+ </tr>
+ </tbody>
+</table>
+<p>To get a subset of the array, there are a few options:</p>
+<table>
+ <caption>
+ <h1>Getting subsets of an array</h1>
+ <p>Substitute <code>*</code> for <code>@</code> to get the subset
+ as a <code>$IFS</code>-separated string instead of separate
+ words, as described above.</p>
+ <p>Again, if you don't wrap these in double quotes, they are
+ subject to word splitting, which defeats the purpose of
+ arrays.</p>
+ </caption>
+ <tbody>
+ <tr>
+ <td><code>"${array[@]:<var>start</var>}"</code></td>
+ <td>Evaluates to the entries from <var>n</var>=<var>start</var> to the end
+ of the array.</td>
+ </tr><tr>
+ <td><code>"${array[@]:<var>start</var>:<var>count</var>}"</code></td>
+ <td>Evaluates to <var>count</var> entries, starting at
+ <var>n</var>=<var>start</var>.</td>
+ </tr><tr>
+ <td><code>"${array[@]::<var>count</var>}"</code></td>
+ <td>Evaluates to <var>count</var> entries from the beginning of
+ the array.</td>
+ </tr>
+ </tbody>
+</table>
+<p>Notice that <code>"${array[@]}"</code> is equivalent to
+<code>"${array[@]:0}"</code>.</p>
+<table>
+ <caption>
+ <h1>Getting the length of an array</h1>
+ <p>The is the only situation with arrays where quoting doesn't
+ make a difference.</p>
+ <p>True to my earlier statement, when unquoted, there is no
+ difference between <code>@</code> and <code>*</code>.</p>
+ </caption>
+ <tbody>
+ <tr>
+ <td>
+ <code>${#array[@]}</code>
+ <br>or<br>
+ <code>${#array[*]}</code>
+ </td>
+ <td>
+ Evaluates to the length of the array
+ </td>
+ </tr>
+ </tbody>
+</table>
+<h2 id="argument-array-syntax">Argument array syntax</h2>
+<p>Accessing the arguments is mostly that simple, but that array doesn’t
+actually have a variable name. It’s special. Instead, it is exposed
+through a series of special variables (normal variables can only start
+with letters and underscore), that <em>mostly</em> match up with the
+normal array syntax.</p>
+<p>Setting the arguments array, on the other hand, is pretty different.
+That’s fine, because setting the arguments array is less useful
+anyway.</p>
+<table>
+ <caption>
+ <h1>Accessing the arguments array</h1>
+ <aside>Note that for values of <var>n</var> with more than 1
+ digit, you need to wrap it in <code>{}</code>.
+ Otherwise, <code>"$10"</code> would be parsed
+ as <code>"${1}0"</code>.</aside>
+ </caption>
+ <tbody>
+ <tr><th colspan=2>Individual entries</th></tr>
+ <tr><td><code>${array[0]}</code></td><td><code>$0</code></td></tr>
+ <tr><td><code>${array[1]}</code></td><td><code>$1</code></td></tr>
+ <tr><td colspan=2 style="text-align:center">…</td></tr>
+ <tr><td><code>${array[9]}</code></td><td><code>$9</code></td></tr>
+ <tr><td><code>${array[10]}</code></td><td><code>${10}</code></td></tr>
+ <tr><td colspan=2 style="text-align:center">…</td></tr>
+ <tr><td><code>${array[<var>n</var>]}</code></td><td><code>${<var>n</var>}</code></td></tr>
+ <tr><th colspan=2>Subset arrays (array)</th></tr>
+ <tr><td><code>"${array[@]}"</code></td><td><code>"${@:0}"</code></td></tr>
+ <tr><td><code>"${array[@]:1}"</code></td><td><code>"$@"</code></td></tr>
+ <tr><td><code>"${array[@]:<var>pos</var>}"</code></td><td><code>"${@:<var>pos</var>}"</code></td></tr>
+ <tr><td><code>"${array[@]:<var>pos</var>:<var>len</var>}"</code></td><td><code>"${@:<var>pos</var>:<var>len</var>}"</code></td></tr>
+ <tr><td><code>"${array[@]::<var>len</var>}"</code></td><td><code>"${@::<var>len</var>}"</code></td></tr>
+ <tr><th colspan=2>Subset arrays (string)</th></tr>
+ <tr><td><code>"${array[*]}"</code></td><td><code>"${*:0}"</code></td></tr>
+ <tr><td><code>"${array[*]:1}"</code></td><td><code>"$*"</code></td></tr>
+ <tr><td><code>"${array[*]:<var>pos</var>}"</code></td><td><code>"${*:<var>pos</var>}"</code></td></tr>
+ <tr><td><code>"${array[*]:<var>pos</var>:<var>len</var>}"</code></td><td><code>"${*:<var>pos</var>:<var>len</var>}"</code></td></tr>
+ <tr><td><code>"${array[*]::<var>len</var>}"</code></td><td><code>"${*::<var>len</var>}"</code></td></tr>
+ <tr><th colspan=2>Array length</th></tr>
+ <tr><td><code>${#array[@]}</code></td><td><code>$#</code> + 1</td></tr>
+ <tr><th colspan=2>Setting the array</th></tr>
+ <tr><td><code>array=("${array[0]}" <var>words…</var>)</code></td><td><code>set -- <var>words…</var></code></td></tr>
+ <tr><td><code>array=("${array[0]}" "${array[@]:2}")</code></td><td><code>shift</code></td></tr>
+ <tr><td><code>array=("${array[0]}" "${array[@]:<var>n+1</var>}")</code></td><td><code>shift <var>n</var></code></td></tr>
+ </tbody>
+</table>
+<p>Did you notice what was inconsistent? The variables <code>$*</code>,
+<code>$@</code>, and <code>$#</code> behave like the <var>n</var>=0
+entry doesn’t exist.</p>
+<table>
+ <caption>
+ <h1>Inconsistencies</h1>
+ </caption>
+ <tbody>
+ <tr>
+ <th colspan=3><code>@</code> or <code>*</code></th>
+ </tr><tr>
+ <td><code>"${array[@]}"</code></td>
+ <td>→</td>
+ <td><code>"${array[@]:0}"</code></td>
+ </tr><tr>
+ <td><code>"${@}"</code></td>
+ <td>→</td>
+ <td><code>"${@:1}"</code></td>
+ </tr><tr>
+ <th colspan=3><code>#</code></th>
+ </tr><tr>
+ <td><code>"${#array[@]}"</code></td>
+ <td>→</td>
+ <td>length</td>
+ </tr><tr>
+ <td><code>"${#}"</code></td>
+ <td>→</td>
+ <td>length-1</td>
+ </tr>
+ </tbody>
+</table>
+<p>These make sense because argument 0 is the name of the script—we
+almost never want that when parsing arguments. You’d spend more code
+getting the values that it currently gives you.</p>
+<p>Now, for an explanation of setting the arguments array. You cannot
+set argument <var>n</var>=0. The <code>set</code> command is used to
+manipulate the arguments passed to Bash after the fact—similarly, you
+could use <code>set -x</code> to make Bash behave like you ran it as
+<code>bash -x</code>; like most GNU programs, the <code>--</code> tells
+it to not parse any of the options as flags. The <code>shift</code>
+command shifts each entry <var>n</var> spots to the left, using
+<var>n</var>=1 if no value is specified; and leaving argument 0
+alone.</p>
+<h2 id="but-you-mentioned-gotchas-about-quoting">But you mentioned
+“gotchas” about quoting!</h2>
+<p>But I explained that quoting simply inhibits word splitting, which
+you pretty much never want when working with arrays. If, for some odd
+reason, you do what word splitting, then that’s when you don’t quote.
+Simple, easy to understand.</p>
+<p>I think possibly the only case where you do want word splitting with
+an array is when you didn’t want an array, but it’s what you get
+(arguments are, by necessity, an array). For example:</p>
+<pre><code># Usage: path_ls PATH1 PATH2…
+# Description:
+# Takes any number of PATH-style values; that is,
+# colon-separated lists of directories, and prints a
+# newline-separated list of executables found in them.
+# Bugs:
+# Does not correctly handle programs with a newline in the name,
+# as the output is newline-separated.
+path_ls() {
+ local IFS dirs
+ IFS=:
+ dirs=($@) # The odd-ball time that it needs to be unquoted
+ find -L &quot;${dirs[@]}&quot; -maxdepth 1 -type f -executable \
+ -printf &#39;%f\n&#39; 2&gt;/dev/null | sort -u
+}</code></pre>
+<p>Logically, there shouldn’t be multiple arguments, just a single
+<code>$PATH</code> value; but, we can’t enforce that, as the array can
+have any size. So, we do the robust thing, and just act on the entire
+array, not really caring about the fact that it is an array. Alas, there
+is still a field-separation bug in the program, with the output.</p>
+<h2 id="i-still-dont-think-i-need-arrays-in-my-scripts">I still don’t
+think I need arrays in my scripts</h2>
+<p>Consider the common code:</p>
+<pre><code>ARGS=&#39; -f -q&#39;
+…
+command $ARGS # unquoted variables are a bad code-smell anyway</code></pre>
+<p>Here, <code>$ARGS</code> is field-separated by <code>$IFS</code>,
+which we are assuming has the default value. This is fine, as long as
+<code>$ARGS</code> is known to never need an embedded space; which you
+do as long as it isn’t based on anything outside of the program. But
+wait until you want to do this:</p>
+<pre><code>ARGS=&#39; -f -q&#39;
+…
+if [[ -f &quot;$filename&quot; ]]; then
+ ARGS+=&quot; -F $filename&quot;
+fi
+…
+command $ARGS</code></pre>
+<p>Now you’re hosed if <code>$filename</code> contains a space! More
+than just breaking, it could have unwanted side effects, such as when
+someone figures out how to make
+<code>filename='foo --dangerous-flag'</code>.</p>
+<p>Compare that with the array version:</p>
+<pre><code>ARGS=(-f -q)
+…
+if [[ -f &quot;$filename&quot; ]]; then
+ ARGS+=(-F &quot;$filename&quot;)
+fi
+…
+command &quot;${ARGS[@]}&quot;</code></pre>
+<h2 id="what-about-portability">What about portability?</h2>
+<p>Except for the little stubs that call another program with
+<code>"$@"</code> at the end, trying to write for multiple shells
+(including the ambiguous <code>/bin/sh</code>) is not a task for mere
+mortals. If you do try that, your best bet is probably sticking to
+POSIX. Arrays are not POSIX; except for the arguments array, which is;
+though getting subset arrays from <code>$@</code> and <code>$*</code> is
+not (tip: use <code>set --</code> to re-purpose the arguments
+array).</p>
+<p>Writing for various versions of Bash, though, is pretty do-able.
+Everything here works all the way back in bash-2.0 (December 1996), with
+the following exceptions:</p>
+<ul>
+<li><p>The <code>+=</code> operator wasn’t added until Bash 3.1.</p>
+<ul>
+<li>As a work-around, use
+<code>array[${#array[*]}]=<var>word</var></code> to append a single
+element.</li>
+</ul></li>
+<li><p>Accessing subset arrays of the arguments array is inconsistent if
+<var>pos</var>=0 in <code>${@:<var>pos</var>:<var>len</var>}</code>.</p>
+<ul>
+<li>In Bash 2.x and 3.x, it works as expected, except that argument 0 is
+silently missing. For example <code>${@:0:3}</code> gives arguments 1
+and 2; where <code>${@:1:3}</code> gives arguments 1, 2, and 3. This
+means that if <var>pos</var>=0, then only <var>len</var>-1 arguments are
+given back.</li>
+<li>In Bash 4.0, argument 0 can be accessed, but if <var>pos</var>=0,
+then it only gives back <var>len</var>-1 arguments. So,
+<code>${@:0:3}</code> gives arguments 0 and 1.</li>
+<li>In Bash 4.1 and higher, it works in the way described in the main
+part of this document.</li>
+</ul></li>
+</ul>
+<p>Now, Bash 1.x doesn’t have arrays at all. <code>$@</code> and
+<code>$*</code> work, but using <code>:</code> to select a range of
+elements from them doesn’t. Good thing most boxes have been updated
+since 1996!</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2013 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/bash-redirection.html b/public/bash-redirection.html
new file mode 100644
index 0000000..6f5af47
--- /dev/null
+++ b/public/bash-redirection.html
@@ -0,0 +1,60 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>Bash redirection — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » bash-redirection</header>
+<article>
+<h1 id="bash-redirection">Bash redirection</h1>
+<p>Apparently, too many people don’t understand Bash redirection. They
+might get the basic syntax, but they think of the process as
+declarative; in Bourne-ish shells, it is procedural.</p>
+<p>In Bash, streams are handled in terms of “file descriptors” of “FDs”.
+FD 0 is stdin, FD 1 is stdout, and FD 2 is stderr. The equivalence (or
+lack thereof) between using a numeric file descriptor, and using the
+associated file in <code>/dev/*</code> and <code>/proc/*</code> is
+interesting, but beyond the scope of this article.</p>
+<h2 id="step-1-pipes">Step 1: Pipes</h2>
+<p>To quote the Bash manual:</p>
+<pre><code>A &#39;pipeline&#39; is a sequence of simple commands separated by one of the
+control operators &#39;|&#39; or &#39;|&amp;&#39;.
+
+ The format for a pipeline is
+ [time [-p]] [!] COMMAND1 [ [| or |&amp;] COMMAND2 ...]</code></pre>
+<p>Now, <code>|&amp;</code> is just shorthand for
+<code>2&gt;&amp;1 |</code>, the pipe part happens here, but the
+<code>2&gt;&amp;1</code> part doesn’t happen until step 2.</p>
+<p>First, if the command is part of a pipeline, the pipes are set up.
+For every instance of the <code>|</code> metacharacter, Bash creates a
+pipe (<code>pipe(3)</code>), and duplicates (<code>dup2(3)</code>) the
+write end of the pipe to FD 1 of the process on the left side of the
+<code>|</code>, and duplicate the read end of the pipe to FD 0 of the
+process on the right side.</p>
+<h2 id="step-2-redirections">Step 2: Redirections</h2>
+<p><em>After</em> the initial FD 0 and FD 1 fiddling by pipes is done,
+Bash looks at the redirections. <strong>This means that redirections can
+override pipes.</strong></p>
+<p>Redirections are read left-to-right, and are executed as they are
+read, using <code>dup2(right-side, left-side)</code>. This is where most
+of the confusion comes from, people think of them as declarative, which
+leads to them doing the first of these, when they mean to do the
+second:</p>
+<pre><code>cmd 2&gt;&amp;1 &gt;file # stdout goes to file, stderr goes to stdout
+cmd &gt;file 2&gt;&amp;1 # both stdout and stderr go to file</code></pre>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2014 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/btrfs-rec.html b/public/btrfs-rec.html
new file mode 100644
index 0000000..beaf652
--- /dev/null
+++ b/public/btrfs-rec.html
@@ -0,0 +1,1176 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>Announcing: btrfs-rec: Recover (data from) a broken btrfs filesystem — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » btrfs-rec</header>
+<article>
+<h1
+id="announcing-btrfs-rec-recover-data-from-a-broken-btrfs-filesystem">Announcing:
+btrfs-rec: Recover (data from) a broken btrfs filesystem</h1>
+<blockquote>
+<p>I originally sent this email on 2023-07-10, but it has been caught by
+their bogofilter. Yes, it was <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/README.md?id=18e6066c241cf3d252b6521150843ffc858d8434">plaintext</a>.
+No, I didn't use GMail. Yes, I've successfully participated in vger
+lists in the past. Yes, I've reached out to postmaster; no, I haven't
+received a reply yet (as of 2023-07-14).</p>
+</blockquote>
+<div style="font-family: monospace">
+<p>To: linux-btrfs@vger.kernel.org<br/> From: Luke T. Shumaker
+&lt;lukeshu@lukeshu.com&gt;<br/> Subject: btrfs-rec: Recover (data from)
+a broken btrfs filesystem<br/> Date: Mon, 10 Jul 2023 21:23:41
+-0600<br/> Message-ID:
+&lt;87jzv7uo5e.wl-lukeshu@lukeshu.com&gt;<br/></p>
+</div>
+<p>Inspired by a mis-typed <code>dd</code> command, for the last year
+I've been working on a tool for recovering corrupt btrfs filesystems; at
+first idly here and there, but more actively in the last few months. I
+hope to get it incorporated into btrfs-progs, though perhaps that is
+problematic for a few reasons I'll get to. If the code can't be
+incorporated into btrfs-progs, at least the ideas and algorithms should
+be.</p>
+<p><a
+href="https://git.lukeshu.com/btrfs-progs-ng/">https://git.lukeshu.com/btrfs-progs-ng/</a></p>
+<p>Highlights:</p>
+<ul>
+<li><p>In general, it's more tolerant of corrupt filesystems than
+<code>btrfs check --repair</code>, <code>btrfs rescue</code> or
+<code>btrfs restore</code>.</p></li>
+<li><p><code>btrfs-rec inspect rebuild-mappings</code> is a better
+<code>btrfs rescue chunk-recover</code>.</p></li>
+<li><p><code>btrfs-rec inspect rebuild-trees</code> can re-attach lost
+branches to broken B+ trees.</p></li>
+<li><p><code>btrfs-rec inspect mount</code> is a read-only FUSE
+implementation of btrfs. This is conceptually a replacement for
+<code>btrfs restore</code>.</p></li>
+<li><p>It's entirely written in Go. I'm not saying that's a good thing,
+but it's an interesting thing.</p></li>
+</ul>
+<p>Hopefully some folks will find it useful, or at least neat!</p>
+<ul>
+<li><a href="#motivation">1. Motivation</a></li>
+<li><a href="#overview-of-use">2. Overview of use</a></li>
+<li><a href="#prior-art">3. Prior art</a></li>
+<li><a href="#internalsdesign">4. Internals/Design</a></li>
+<li><a href="#overview-of-the-source-tree-layout">4.1. Overview of the
+source tree layout</a></li>
+<li><a href="#base-decisions-cli-structure-go-json">4.2. Base decisions:
+CLI structure, Go, JSON</a></li>
+<li><a href="#algorithms">4.3. Algorithms</a></li>
+<li><a href="#the-rebuild-mappings-algorithm">4.3.1. The
+<code>rebuild-mappings</code> algorithm</a></li>
+<li><a href="#the---rebuild-algorithm">4.3.2. The <code>--rebuild</code>
+algorithm</a></li>
+<li><a href="#rebuilt-forrest-behavior-looking-up-trees">4.3.2.1.
+rebuilt forrest behavior</a></li>
+<li><a href="#rebuilt-individual-tree-behavior">4.3.2.2. rebuilt
+individual tree behavior</a></li>
+<li><a href="#the-rebuild-trees-algorithm">4.3.3. The
+<code>rebuild-trees</code> algorithm</a></li>
+<li><a href="#initialization">4.3.3.1. initialization</a></li>
+<li><a href="#the-main-loop">4.3.3.2. the main loop</a></li>
+<li><a href="#graph-callbacks">4.3.3.3. graph callbacks</a></li>
+<li><a href="#future-work">5. Future work</a></li>
+<li><a href="#problems-with-merging-this-code-into-btrfs">6. Problems
+for merging this code into btrfs-progs</a></li>
+</ul>
+<h1 id="motivation">1. Motivation</h1>
+<p>Have you ever ended up with a corrupt btrfs filesystem (through no
+fault of btrfs itself, but perhaps a failing drive, or a mistaken
+<code>dd</code> invocation)? Surely losing less than 100MB of data from
+a drive should not render hundreds of GB of perfectly intact data
+unreadable! And yet, the existing tools are unable to even attempt to
+read that data:</p>
+<pre><code>$ btrfs check --repair --force dump-zero.1.img
+enabling repair mode
+Opening filesystem to check...
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+ERROR: cannot open file system</code></pre>
+<p>or</p>
+<pre><code>$ btrfs check --init-extent-tree --force dump-zero.1.img
+Opening filesystem to check...
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+ERROR: cannot open file system</code></pre>
+<p>or</p>
+<pre><code>$ btrfs check --init-csum-tree --force dump-zero.1.img
+Creating a new CRC tree
+Opening filesystem to check...
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+ERROR: cannot open file system</code></pre>
+<p>or</p>
+<pre><code>$ btrfs rescue chunk-recover dump-zero.1.img
+Scanning: DONE in dev0
+corrupt node: root=1 block=160410271744 slot=0, corrupt node: root=1 block=160410271744, nritems too large, have 39 expect range [1,0]
+Couldn&#39;t read tree root
+open with broken chunk error</code></pre>
+<p>or</p>
+<pre><code>$ btrfs rescue zero-log dump-zero.1.img
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ERROR: cannot read chunk root
+ERROR: could not open ctree</code></pre>
+<p>or</p>
+<pre><code>$ mkdir out
+$ btrfs restore dump-zero.1.img out
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+Could not open root, trying backup super
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+Could not open root, trying backup super
+ERROR: superblock bytenr 274877906944 is larger than device size 256060514304
+Could not open root, trying backup super</code></pre>
+<p>or</p>
+<pre><code>$ btrfs restore --list-roots dump-zero.1.img
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+Could not open root, trying backup super
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+Could not open root, trying backup super
+ERROR: superblock bytenr 274877906944 is larger than device size 256060514304
+Could not open root, trying backup super</code></pre>
+<p>or</p>
+<pre><code>$ btrfs-find-root dump-zero.1.img
+WARNING: cannot read chunk root, continue anyway
+Superblock thinks the generation is 6596071
+Superblock thinks the level is 1</code></pre>
+<p>Well, have I got a tool for you!</p>
+<p>(FWIW, I also tried manipulating the filesystem and patching to tools
+to try to get past those errors, only to get a different set of errors.
+Some of these patches I am separately submitting to btrfs-progs.)</p>
+<h1 id="overview-of-use">2. Overview of use</h1>
+<p>There are two <code>btrfs-rec</code> sub-command groups:
+<code>btrfs-rec inspect <var>SUBCMD</var></code> and <code>btrfs-rec
+repair <var>SUBCMD</var></code>, and you can find out about various
+sub-commands with <code>btrfs-rec help</code>. These are both told about
+devices or images with the <code>--pv</code> flag.</p>
+<p><code>btrfs-rec inspect <var>SUBCMD</var></code> commands open the
+filesystem read-only, and (generally speaking) write extracted or
+rebuilt information to stdout. <code>btrfs-rec repair
+<var>SUBCMD</var></code> commands open the filesystem read+write, and
+consume information from <code>btrfs-rec inspect
+<var>SUBCMD</var></code> commands to actually repair the filesystem
+(except I haven't actually implemented any <code>repair</code> commands
+yet... despite the lack of <code>repair</code> commands, I believe that
+<code>btrfs-rec</code> is already a useful because of the
+<code>btrfs-rec inspect mount</code> command to get data out of the
+broken filesystem). This split allows you to try things without being
+scared by WARNINGs about not using these tools unless you're an expert
+or have been told to by a developer.</p>
+<p>In the broken <code>dump-zero.1.img</code> example above (which has a
+perfectly intact superblock, but a totally broken
+<code>CHUNK_TREE</code>), to "repair" it I'd:</p>
+<ol type="1">
+<li><p>Start by using <code>btrfs-rec inspect rebuild-mappings</code> to
+rebuild the broken chunk/dev/blockgroup trees:</p>
+<pre><code>$ btrfs-rec inspect rebuild-mappings \
+ --pv=dump-zero.1.img \
+ &gt; mappings-1.json</code></pre></li>
+<li><p>If it only mostly succeeds, but on stderr tells us about a few
+regions of the image that it wasn't able to figure out the chunks for.
+Using some human-level knowledge, you can write those yourself,
+inserting them into the generated <code>mappings.json</code>, and ask
+<code>rebuild-mappings</code> to normalize what you wrote:</p>
+<pre><code>$ btrfs-rec inspect rebuild-mappings \
+ --pv=dump-zero.1.img \
+ --mappings=&lt;(sed &lt;mappings-1.json \
+ -e &#39;2a{&quot;LAddr&quot;:5242880,&quot;PAddr&quot;:{&quot;Dev&quot;:1,&quot;Addr&quot;:5242880},&quot;Size&quot;:1},&#39; \
+ -e &#39;2a{&quot;LAddr&quot;:13631488,&quot;PAddr&quot;:{&quot;Dev&quot;:1,&quot;Addr&quot;:13631488},&quot;Size&quot;:1},&#39;) \
+ &gt; mappings-2.json</code></pre></li>
+<li><p>Now that it has functioning chunk/dev/blockgroup trees, we can
+use <code>btrfs-rec inspect rebuild-trees</code> to rebuild other trees
+that rely on those:</p>
+<pre><code>$ btrfs-rec inspect rebuild-mappings \
+ --pv=dump-zero.1.img \
+ --mappings=mappings-2.json \
+ &gt; trees.json</code></pre></li>
+<li><p>Now that (hopefully) everything that was damaged has been
+reconstructed, we can use <code>btrfs-rec inspect mount</code> to mount
+the filesystem read-only and copy out our data:</p>
+<pre><code>$ mkdir mnt
+$ sudo btrfs-rec inspect mount \
+ --pv=dump-zero.1.img \
+ --mappings=mappings-2.json \
+ --trees=trees.json \
+ ./mnt</code></pre></li>
+</ol>
+<p>This example is fleshed out more (and the manual edits to
+<code>mappings.json</code> explained more) in <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/examples/main.sh?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./examples/main.sh</code></a>.</p>
+<h1 id="prior-art">3. Prior art</h1>
+<p>Comparing <code>btrfs-rec inspect mount</code> with the existing <a
+href="https://github.com/adam900710/btrfs-fuse">https://github.com/adam900710/btrfs-fuse</a>
+project:</p>
+<ul>
+<li>Again, mine has better fault tolerance</li>
+<li>Mine is read-only</li>
+<li>Mine supports xattrs ("TODO" in Adam's)</li>
+<li>Mine supports separate inode address spaces for subvolumes; Adam's
+doesn't due to limitations in FUSE, mine works around this by lazily
+setting up separate mountpoints for each subvolume (though this does
+mean that the process needs to run as root, which is a bummer).</li>
+</ul>
+<h1 id="internalsdesign">4. Internals/Design</h1>
+<h2 id="overview-of-the-source-tree-layout">4.1. Overview of the source
+tree layout</h2>
+<ul>
+<li><p><a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/examples?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>examples/</code></a>
+has example scripts showing how to use <code>btrfs-rec</code>.</p></li>
+<li><p><a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfs?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>lib/btrfs/</code></a>
+is the core btrfs implementation.</p></li>
+<li><p><a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfscheck?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>lib/btrfscheck/</code></a>
+and <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>lib/btrfsutil/</code></a>
+are libraries for "btrfs-progs" type programs, that are userland-y
+things that I thought should be separate from the core implementation;
+something that frustrated me about libbtrfs was having to figure out "is
+this thing here in support of btrfs bits-on-disk, or in support of a
+higher-level 'how btrfs-progs wants to think about things'?"</p></li>
+<li><p><a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>cmd/btrfs-rec/</code></a>
+is where the command implementations live. If a sub-command fits in a
+single file, it's
+<code>cmd/btrfs-rec/inspect_<var>SUBCMD</var>.go</code>, otherwise, it's
+in a separate <code>cmd/btrfs-rec/inspect/<var>SUBCMD</var>/</code>
+package.</p></li>
+<li><p><a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/textui?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>lib/textui/</code></a>
+is reasonably central to how the commands implement a text/CLI
+user-interface.</p></li>
+<li><p><a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/binstruct?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>lib/binstruct/</code></a>,
+<a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/diskio?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>lib/diskio/</code></a>,
+and <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/streamio?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>lib/streamio/</code></a>
+are non-btrfs-specific libraries related to the problem domain.</p></li>
+<li><p><a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/containers?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>lib/containers/</code></a>,
+<a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/fmtutil?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>lib/fmtutil/</code></a>,
+<a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/maps?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>lib/maps/</code></a>,
+<a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/slices?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>lib/slices/</code></a>,
+and <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/profile?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>lib/profile/</code></a>
+are all generic Go libraries that have nothing to do with btrfs or the
+problem domain, but weren't in the Go standard library and I didn't
+find/know-of exiting implementations that I liked. Of these, all but
+<code>containers</code> are pretty simple utility libraries. Also, some
+of these things have been added to the standard library since I started
+the project.</p></li>
+</ul>
+<h2 id="base-decisions-cli-structure-go-json">4.2. Base decisions: CLI
+structure, Go, JSON</h2>
+<p>I started with trying to enhance btrfs-progs, but ended up writing a
+wholy new program in Go, for several reasons:</p>
+<ul>
+<li><p>writing a new thing: I was having to learn both the btrfs-progs
+codebase and how btrfs-bits-on-disk work, and it got to the point that I
+decided I should just focus on learning btrfs-bits-on-disk.</p></li>
+<li><p>writing a new thing: It was becoming increasingly apparent to me
+that it was going to be an uphill-fight of having recovery-tools share
+the same code as the main-tools, as the routines used by the main-tools
+rightly have validity checks, where recovery-tools want to say "yes, I
+know it's invalid, can you give it to me anyway?".</p></li>
+<li><p>writing it in not-C: I love me some C, but higher level languages
+are good for productivity. And I was trying to write a whole lot of code
+at once, I needed a productivity boost.</p></li>
+<li><p>writing it in not-C: This forced me to learn btrfs-bits-on-disk
+better, instead of just cribbing from btrfs-progs. That knowledge is
+particularly important for having ideas on how to deal with corrupt
+bits-on-disk.</p></li>
+<li><p>writing it in Go: At the time I started, my day job was writing
+Go, so I had Go swapped into my brain. And Go still feels close to C but
+provides <em>a lot</em> of niceness and safety over C.</p></li>
+</ul>
+<p>It turned out that Go was perhaps not the best choice, but we'll come
+back to that.</p>
+<p>I wanted to separate things into a pipeline. For instance: Instead of
+<code>btrfs rescue chunk-recover</code> trying to do everything to
+rebuild a broken chunk tree, I wanted to separate I/O from computation
+from repairs. So I have
+<code>btrfs-rec inspect rebuild-mappings scan</code> that reads all the
+info necessary to rebuild the chunk tree, then dump that as a 2GB glob
+of JSON. Then I can feed that JSON to
+<code>btrfs-rec inspect rebuild-mappings process</code> which actually
+rebuilds the mappings in the chunk tree, and dumps them as JSON. And
+then other commands can consume that <code>mappings.json</code> to use
+that instead of trying to read the chunk tree from the actual FS, so
+that you don't have to make potentially destructive writes to inspect an
+FS with a broken chunk tree, and can inspect it more forensically. Or
+then use <code>btrfs-rec repair
+<var>SOME_SUBCMD_I_HAVENT_WRITTEN_YET</var></code> to write that chunk
+tree in <code>mappings.json</code> back to the filesystem.</p>
+<p>(But also, the separate steps thing was useful just so I could
+iterate on the algorithms of <code>rebuild-mappings process</code>
+separately from having to scan the entire FS)</p>
+<p>So, I made the decision that <code>btrfs-rec inspect
+<var>SUBCMD</var></code> commands should all only open the FS read-only,
+and output their work to a separate file; that writing that info back to
+the FS should be separate in <code>btrfs-rec repair
+<var>SUBCMD</var></code>.</p>
+<p>For connecting those parts of the pipeline, I chose JSON, for a few
+reasons:</p>
+<ul>
+<li><p>I wanted something reasonably human-readable, so that I could
+debug it easier.</p></li>
+<li><p>I wanted something reasonably human-readable, so that human
+end-users could make manual edits; for example, in <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/examples/main.sh?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>examples/main.sh</code></a>
+I have an example of manually editing <code>mappings.json</code> to
+resolve a region that the algorithm couldn't figure out, but with
+knowledge of what caused the corruption a human can.</p></li>
+<li><p>I didn't want to invent my own DSL and have to handle writing a
+parser. (This part didn't pay off! See below.)</p></li>
+<li><p>I wanted something that I thought would have good support in a
+variety of languages, so that if Go is problematic for getting things
+merged upstream it could be rewritten in C (or maybe Rust?) piece-meal
+where each subcommand can be rewritten one at a time.</p></li>
+</ul>
+<p>It turned out that JSON was perhaps not the best choice.</p>
+<p>OK, so: Go and/or JSON maybe being mistakes:</p>
+<ul>
+<li><p>I spent a lot of time getting the garbage collector to not just
+kill performance.</p></li>
+<li><p>The <code>btrfs-rec inspect rebuild-mappings
+<var>SUBCMD</var></code> subcommands all throw a lot of data through the
+JSON encoder/decoder, and I learned that the Go stdlib
+<code>encoding/json</code> package has memory use that grows O(n^2)
+(-ish? I didn't study the implementation, but that's what the curve
+looks like just observing it) on the size of the data being shoved
+through it, so I had to go take a break and go write
+https://pkg.go.dev/git.lukeshu.com/go/lowmemjson which is a
+mostly-drop-in-replacement that tries to be as close-as possible to O(1)
+memory use. So I did end up having to write my own parser anyway
+:(</p></li>
+</ul>
+<h2 id="algorithms">4.3. Algorithms</h2>
+<p>There are 3 algorithms of note in <code>btrfs-rec</code>, that I
+think are worth getting into mainline btrfs-progs even if the code of
+<code>btrfs-rec</code> doesn't get in:</p>
+<ol type="1">
+<li><p>The <code>btrfs-rec inspect rebuild-mappings</code> algoritithm
+to rebuild information from the <code>CHUNK_TREE</code>,
+<code>DEV_TREE</code>, and <code>BLOCK_GROUP_TREE</code>.</p></li>
+<li><p>The <code>btrfs-rec --rebuild</code> algorithm to cope with
+reading broken B+ trees.</p></li>
+<li><p>The <code>btrfs-rec inspect rebuild-trees</code> algorithm to
+re-attach lost branches to broken B+ trees.</p></li>
+</ol>
+<h3 id="the-rebuild-mappings-algorithm">4.3.1. The
+<code>rebuild-mappings</code> algorithm</h3>
+<p>(This step-zero scan is
+<code>btrfs-rec inspect rebuild-mappings scan</code>, and principally
+lives in <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/scan.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./lib/btrfsutil/scan.go</code></a>
+and <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildmappings/scan.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./cmd/btrfs-rec/inspect/rebuildmappings/scan.go</code></a>)</p>
+<ol start="0" type="1">
+<li>Similar to <code>btrfs rescue chunk-recover</code>, scan each device
+for things that look like nodes; keep track of:
+<ul>
+<li>Checksums of every block on the device</li>
+<li>Which physical addresses contain nodes that claim to be at a given
+logical addess.</li>
+<li>Any found Chunk items, BlockGroup items, DevExtent, and CSum items.
+Keep track of the key for each of these, and for CSum items also track
+the generation.</li>
+</ul></li>
+</ol>
+<p>Create a bucket of the data from Chunks, DevExtents, and BlockGroups;
+since these are mostly a Chunk and a DevExtent+BlockGroup store pretty
+much the same information; we can use one to reconstruct the other. How
+we "merge" these and handle conflicts is in <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfs/btrfsvol/lvm.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n121"><code>./lib/btrfs/btrfsvol/lvm.go:addMapping()</code></a>,
+I don't think this part is particularly clever, but given that
+<code>btrfs rescue chunk-recover</code> crashes if it encounters two
+overlapping chunks, I suppose I should spell it out:</p>
+<ul>
+<li><p>A "mapping" is represented as a group of 4 things:</p>
+<ul>
+<li>logical address</li>
+<li>a list of 1 or more physical addresses (device ID and offset)</li>
+<li>size, and a Boolean indicator of whether the size is "locked"</li>
+<li>block group flags, and a Boolean presence-indicator</li>
+</ul></li>
+<li><p>Mappings must be merged if their logical or physical regions
+overlap.</p></li>
+<li><p>If a mapping has a "locked" size, then when merging it may
+subsume smaller mappings with unlocked sizes, but its size cannot be
+changed; trying to merge a locked-size mapping with another mapping that
+is not for a subset region should return an error.</p></li>
+<li><p>If a mapping has block group flags present, then those flags may
+not be changed; it may only be merged with another mapping that does not
+have flags present, or has identical flags.</p></li>
+<li><p>When returning an error because of overlapping non-mergeable
+mappings, just log an error on stderr and keep going. That's an
+important design thing that is different than normal filesystem code; if
+there's an error, yeah, detect and notify about it, <strong>but don't
+bail out of the whole routine</strong>. Just skip that one item or
+whatever.</p></li>
+</ul>
+<p>Now that we know how to "add a mapping", let's do that:</p>
+<p>(The following main-steps are
+<code>btrfs-rec inspect rebuild-mappings process</code>, and principally
+live in <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildmappings/process.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./cmd/btrfs-rec/inspect/rebuildmappings/process.go</code></a>)</p>
+<ol type="1">
+<li><p>Add all found Chunks.</p></li>
+<li><p>Add all found DevExtents.</p></li>
+<li><p>Add a phyical:logical mapping of length nodesize for each node
+that was found.</p></li>
+<li><p>Any mappings from steps 2 or 3 that are missing blockgroup flags
+(that is: they weren't able to be merged with a mapping from step 1),
+use the found BlockGroups to fill in those flags.</p></li>
+<li><p>Now we'll merge all found CSum items into a map of the sums of
+the logical address space. Sort all of the csum items by generation,
+then by address. Loop over them in that order, inserting their sums into
+the map. If two csum items overlap, but agree about the sums of the
+overlapping region, that's fine, just take their union. For overlaps
+that disagree, items with a newer generation kick out items with an
+older generation. If disagreeing items have the same generation... I
+don't think that can happen except by a filesystem bug (i.e. not by a
+failing drive or other external corruption), so I wasn't too concerned
+about it, so I just log an error on stderr and skip the later-processed
+item. See <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go</code></a>.</p>
+<p>Look at regions of the logical address space that meet all the 3
+criteria:</p>
+<ul>
+<li>we have CSum items for them</li>
+<li>we have a BlockGroup for them</li>
+<li>we don't have a Chunk/DevExtent mapping them to the pysical address
+space.</li>
+</ul>
+<p>Pair those CSums up with BlockGroups, and for each BlockGroup, search
+the list of checksums of physical blocks to try to find a physical
+region that matches the logical csums (and isn't already mapped to a
+different logical region). I used a Knuth-Morris-Pratt search, modified
+to handle holes in the logical csum list as wildcards.</p>
+<p>Insert any found mappings into our bucket of mappings.</p></li>
+<li><p>Do the same again, but with a fuzzy search (we can re-use the
+csum map of the logical address space). My implementation of this is
+comparatively time and space intensive; I just walk over the entire
+unmapped physical address space, noting what % of match each BlockGroup
+has if placed at that location. I keep track of the best 2 matches for
+each BlockGroup. If the best match is better than a 50% match, and the
+second best is less than a 50% match, then I add the best match. In my
+experience, the best match is &gt;90% (or at whatever the maximum
+percent is for how much of the BlockGroup has logical sums), and the
+second best is 0% or 1%. The point of tracking both is that if there
+isn't a clear-cut winner, I don't want it to commit to a potentially
+wrong choice.</p></li>
+</ol>
+<h3 id="the---rebuild-algorithm">4.3.2. The <code>--rebuild</code>
+algorithm</h3>
+<p>The <code>--rebuild</code> flag is implied by the
+<code>--trees=trees.json</code> flag, and triggers an algorithm that
+allows "safely" reading from a broken B+ tree, rather than the usual B+
+tree lookup and search functions. I probably should have tried to
+understand the <code>btrfs restore</code> algorithm, maybe I reinvented
+the wheel...</p>
+<p>This algorithm requires a list of all nodes on the filesystem; we
+find these using the same scan as above (<a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/scan.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./lib/btrfsutil/scan.go</code></a>),
+the same procedure as <code>btrfs rescue chunk-recover</code>.</p>
+<p>We walk all of those nodes, and build a reasonably lightweight
+in-memory graph of all nodes (<a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/graph.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./lib/btrfsutil/graph.go</code></a>),
+tracking</p>
+<ul>
+<li>each node's
+<ul>
+<li>logical address</li>
+<li>level</li>
+<li>generation</li>
+<li>tree</li>
+<li>each item's key and size</li>
+</ul></li>
+<li>each keypointer's
+<ul>
+<li>source node</li>
+<li>source slot within the node</li>
+<li>tree of the source node</li>
+<li>destination node</li>
+<li>destination level implied by the level of the source node</li>
+<li>destination key</li>
+<li>destination generation</li>
+</ul></li>
+<li>logical addresses and error messages for nodes that are pointed to
+by a keypointer or the superblock, but can't be read (because that
+logical address isn't mapped, or it doesn't look like a node,
+or...)</li>
+<li>an index such that for a given node we can quickly list both
+keypointers both originating at that node and pointing to that
+node.</li>
+</ul>
+<h4 id="rebuilt-forrest-behavior-looking-up-trees">4.3.2.1. rebuilt
+forrest behavior (looking up trees)</h4>
+<p>(see: <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/rebuilt_forrest.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./lib/btrfsutil/rebuilt_forrest.go</code></a>)</p>
+<ul>
+<li>The <code>ROOT_TREE</code>, <code>CHUNK_TREE</code>,
+<code>TREE_LOG</code>, and <code>BLOCK_GROUP_TREE</code> (the trees
+pointed to directy by the superblock) work as you'd expect.</li>
+<li>For other trees, we (as you'd expect) look up the root item in the
+rebuilt <code>ROOT_TREE</code>, and then (if rootitem.ParentUUID is
+non-zero) eagerly also look up the parent tree (recursing on ourself).
+We try to use the <code>UUID_TREE</code> tree to help with this, but
+fall back to just doing a linear scan over the <code>ROOT_TREE</code>.
+If we fail to look up the parent tree (or its parent, or a more distant
+ancestor), then (depending on a flag) we either make a note of that, or
+error out and fail to look up the child tree. For <code>--rebuild</code>
+and <code>--trees=trees.json</code> we are permissive of this error, and
+just make note of it; but we'll re-use this algorithm in the
+<code>rebuild-trees</code> algorithm below, and it needs the more strict
+handling.</li>
+<li>When creating the rebuilt individual tree, we start by adding the
+root node specified by the superblock/root-item. But we may also add
+additional root nodes grafted on to the tree by the
+<code>--trees=trees.json</code> flag or by the
+<code>rebuild-trees</code> algorithm below. So a tree may have more than
+1 root node.</li>
+</ul>
+<h4 id="rebuilt-individual-tree-behavior">4.3.2.2. rebuilt individual
+tree behavior</h4>
+<p>(see: <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/rebuilt_tree.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./lib/btrfsutil/rebuilt_tree.go</code></a>)</p>
+<p>In order to read from a tree, we first have to build a few indexes.
+We store these indexes in an Adaptive Replacement Cache; they are all
+re-buildable based on the tree's list of roots and the above graph; if
+we have a bunch of trees we don't need to keep all of this in memory at
+once. Note that this is done 100% with the in-memory graph, we don't
+need to read anything from the filesystem during these procedures.</p>
+<ul>
+<li><p>The first index we build is the "node index". This is an index
+that for every node tells us what root(s) the tree would need to have in
+order for the tree to include that node, and also what the highest item
+key would be acceptable in the node if the tree includes that root. We
+track both a <code>loMaxItem</code> and a <code>hiMaxItem</code>, in
+case the tree is real broken and there are multiple paths from the root
+to the node; as these different paths may imply different max-item
+constraints. Put more concretely, the type of the index is:</p>
+<pre><code>map[ nodeID → map[ rootNodeID → {loMaxItem, hiMaxItem} ] ]</code></pre>
+<p>We'll do a loop over the graph, using dynamic-programming memoization
+to figure out ordering and avoid processing the same node twice; for
+each node we'll</p>
+<ul>
+<li><p>Check whether the owner-tree is this tree or one of this tree's
+ancestors (and if it's an ancestor, that the node's generation isn't
+after the point that the child tree was forked from the parent tree). If
+not, we are done processing that node (record an empty/nil set of roots
+for it).</p></li>
+<li><p>Create an empty map of <code>rootID</code> →
+{<code>loMaxItem</code>, <code>hiMaxItem</code>}.</p></li>
+<li><p>Look at each keypointer that that points at the node and:</p>
+<ul>
+<li><p>Skip the keypointer if its expectations of the node aren't met:
+if the level, generation, and min-key constraints don't match up. If the
+keypointer isn't in the last slot in the source node, we also go ahead
+and include checking that the destination node's max-key is under the
+min-key of the keypointer in the next slot, since that's cheap to do
+now.</p></li>
+<li><p>Skip the keypointer if its source node's owner-tree isn't this
+tree or one of this tree's ancestors (and if it's an ancestor, that the
+node's generation isn't after the point that the child tree was forked
+from the parent tree).</p></li>
+<li><p>dynamic-programming recurse and index the keypointer's source
+node.</p></li>
+<li><p>for every root that would result in the keypointer's source node
+being included in the tree:</p>
+<p>. If the keypointer is in the last slot, look at what the what the
+source node's last-item constraints would be if that root is included,
+and can now check the max-item of our destination node. We check against
+the <code>hiMaxItem</code>; as if there is any valid path from the root
+to this node, then we want to be permissive and include it. If that
+check fails, then we're done with this keypointer. Also, make node of
+those <code>loMaxItem</code> and <code>hiMaxItem</code> values, we'll
+use them again in just a moment.</p>
+<p>. Otherwise, set both <code>loMaxItem</code> and
+<code>hiMaxItem</code> to 1-under the min-item of the keypointer in the
+next slot.</p>
+<p>. Insert that <code>loMaxItem</code> and <code>hiMaxItem</code> pair
+into the <code>rootID</code> → {<code>loMaxItem</code>,
+<code>hiMaxItem</code>} map we created above. If an entry already exists
+for this root (since a broken tree might have multiple paths from the
+root to our node), then set <code>loMaxItem</code> to the min of the
+existing entry and our value, and <code>hiMaxItem</code> to the
+max.</p></li>
+</ul></li>
+<li><p>If that <code>rootID</code> → {<code>loMaxItem</code>,
+<code>hiMaxItem</code>} map is still empty, then consider this node to
+be a (potential) root, and insert <code>rootID=thisNode</code> -&gt;
+{<code>loMaxItem=maxKey</code>, <code>hiMaxItem=maxKey</code>} (where
+<code>maxKey</code> is the maximum value of the key datatype).</p></li>
+<li><p>Take that <code>rootID</code> → {<code>loMaxItem</code>,
+<code>hiMaxItem</code>} map and insert it into the index as the entry
+for this node.</p></li>
+</ul></li>
+<li><p>The next index we build is the "item index". This is a "sorted
+map" (implemented as a red-black tree, supporting sub-range iteration)
+of <code>key</code> → {<code>nodeID</code>, <code>slotNumber</code>}; a
+map that for each key tells us where to find the item with that key.</p>
+<ul>
+<li><p>Loop over the node index, and for each node check if both (a) it
+has <code>level==0</code> (is a leaf node containing items), and (b) its
+set of roots that would include it has any overlap with the tree's set
+of roots.</p></li>
+<li><p>Loop over each of those included leaf nodes, and loop over the
+items in each node. Insert the <code>key</code> → {<code>nodeId</code>,
+<code>slot</code>} into our sorted map. If there is already an entry for
+that key, decide which one wins by:</p>
+<ul>
+<li><p>Use the one from the node with the owner-tree that is closer to
+this tree; node with owner=thisTree wins over a node with
+owner=thisTree.parent, which would win over a node with
+owner.thisTree.parent.parent. If that's a tie, then...</p></li>
+<li><p>Use the one from the node with the higher generation. If that's a
+tie, then...</p></li>
+<li><p>I don't know, I have the code <code>panic</code>:</p>
+<pre><code>// TODO: This is a panic because I&#39;m not really sure what the
+// best way to handle this is, and so if this happens I want the
+// program to crash and force me to figure out how to handle it.
+panic(fmt.Errorf(&quot;dup nodes in tree=%v: old=%v=%v ; new=%v=%v&quot;,
+ tree.ID,
+ oldNode, tree.forrest.graph.Nodes[oldNode],
+ newNode, tree.forrest.graph.Nodes[newNode]))</code></pre></li>
+</ul></li>
+</ul>
+<p>Note that this algorithm means that for a given node we may use a few
+items from that node, while having other items from that same node be
+overridden by another node.</p></li>
+<li><p>The final index we build is the "error index". This is an index
+of what errors correspond to which range of keys, so that we can report
+them, and give an idea of "there may be entries missing from this
+directory" and similar.</p>
+<p>For each error, we'll track the min-key and max-key of the range it
+applies to, the node it came from, and what the error string is. We'll
+store these into an interval tree keyed on that min-key/max-key
+range.</p>
+<ul>
+<li><p>Create an empty set <code>nodesToProcess</code>. Now populate
+it:</p>
+<ul>
+<li><p>Once again, we'll loop over the node index, but this time we'll
+only check that there's overlap between the set of roots that would
+include the node and the tree's set of roots. The nodes that are
+included in this tree, insert both that node itself and all node IDs
+that it has keypointers pointing to into the <code>nodesToProcess</code>
+set.</p></li>
+<li><p>Also insert all of the tree's roots into
+<code>nodesToProcess</code>; this is in case the superblock/root-item
+points to an invalid node that we couldn't read.</p></li>
+</ul></li>
+<li><p>Now loop over <code>nodesToProcess</code>. For each node, create
+an empty list of errors. Use the keypointers pointing to and the min
+<code>loMaxItem</code> from the node index to construct a set of
+expectations for the node; this should be reasonably straight-forward,
+given:</p>
+<ul>
+<li><p>If different keypointers have disagreeing levels, insert an error
+in to the list, and don't bother with checking the node's
+level.</p></li>
+<li><p>If different keypointers have disagreeing generations, insert an
+error in to the list, and don't bother with checking the node's
+generation.</p></li>
+<li><p>If different keypointers have different min-item expectations,
+use the max of them.</p></li>
+</ul>
+<p>Then:</p>
+<ul>
+<li>If the node is a "bad node" in the graph, insert the error message
+associated with it. Otherwise, check those expectations against the node
+in the graph.</li>
+</ul>
+<p>If the list of error messages is non-empty, then insert their
+concatenation into the interval tree, with the range set to the min of
+the min-item expectations from the keypointers through the max of the
+<code>hiMaxItem</code>s from the node index. If the min min-item
+expectation turns out to be higher than the max <code>hiMaxItem</code>,
+then set the range to the zero-key through the max-key.</p></li>
+</ul></li>
+</ul>
+<p>From there, it should be trivial to implement the usual B+ tree
+operations using those indexes; exact-lookup using the item index, and
+range-lookups and walks using the item index together with the error
+index. Efficiently searching the <code>CSUM_TREE</code> requires knowing
+item sizes, so that's why we recorded the item sizes into the graph.</p>
+<h3 id="the-rebuild-trees-algorithm">4.3.3. The
+<code>rebuild-trees</code> algorithm</h3>
+<p>The <code>btrfs inspect rebuild-trees</code> algorithm finds nodes to
+attach as extra roots to trees. I think that conceptually it's the the
+simplest of the 3 algorithms, but turned out to be the hardest to get
+right. So... maybe more than the others reference the source code too
+(<a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./cmd/btrfs-rec/inspect/rebuildtrees/</code></a>)
+because I might forget some small but important detail.</p>
+<p>The core idea here is that we're just going to walk each tree,
+inspecting each item in the tree, and checking for any items that are
+implied by other items (e.g.: a dir entry item implies the existence of
+inode item for the inode that it points at). If an implied item is not
+in the tree, but is in some other node, then we look at which potential
+roots we could add to the tree that would add that other node. Then,
+after we've processed all of the items in the filesystem, we go add
+those various roots to the various trees, keeping track of which items
+are added or updated. If any of those added/updated items have a version
+with a newer generation on a different node, see what roots we could add
+to get that newer version. Then add those roots, keeping track of items
+that are added/updated. Once we reach steady-state with the newest
+version of each item has been added, loop back and inspect all
+added/updated items for implied items, keeping track of roots we could
+add. Repeat until a steady-state is reached.</p>
+<p>There are lots of little details in that process, some of which are
+for correctness, and some of which are for "it should run in hours
+instead of weeks."</p>
+<h4 id="initialization">4.3.3.1. initialization</h4>
+<p>First up, we're going to build and in-memory graph, same as above.
+But this time, while we're reading the nodes to do that, we're also
+going to watch for some specific items and record a few things about
+them.</p>
+<p>(see: <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/scan.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./cmd/btrfs-rec/inspect/rebuildtrees/scan.go</code></a>)</p>
+<p>For each {<code>nodeID</code>, <code>slotNumber</code>} pair that
+matches one of these item types, we're going to record:</p>
+<ul>
+<li>flags:
+<ul>
+<li><code>INODE_ITEM</code>s: whether it has the
+<code>INODE_NODATASUM</code> flag set</li>
+</ul></li>
+<li>names:
+<ul>
+<li><code>DIR_INDEX</code> items: the file's name</li>
+</ul></li>
+<li>sizes:
+<ul>
+<li><code>EXTENT_CSUM</code> items: the number of bytes that this is a
+sum for (i.e. the item size over the checksum size, times the block
+size)</li>
+<li><code>EXTENT_DATA</code> items: the number of bytes in this extent
+(i.e. either the item size minus
+<code>offsetof(btrfs_file_extent_item.disk_bytenr)</code> if
+<code>FILE_EXTENT_INLINE</code>, or else the item's
+<code>num_bytes</code>).</li>
+</ul></li>
+<li>data backrefs:
+<ul>
+<li><code>EXTENT_ITEM</code>s and <code>METADATA_ITEM</code>s: a list of
+the same length as the number of refs embedded in the item; for embeded
+ExtentDataRefs, the list entry is the subvolume tree ID that the
+ExtentDataRef points at, otherwise it is zero.</li>
+<li><code>EXTENT_DATA_REF</code> items: a list of length 1, with the
+sole member being the subvolume tree ID that the ExtentDataRef points
+at.</li>
+</ul></li>
+</ul>
+<h4 id="the-main-loop">4.3.3.2. the main loop</h4>
+<p>(see: <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go</code></a>)</p>
+<p>Start with that scan data (graph + info about items), and also a
+rebuilt forrest from the above algorithm, but with:</p>
+<ul>
+<li><p>the flag set so that it refuses to look up a tree if it can't
+look up all of that tree's ancestors</p></li>
+<li><p>an additional "potential-item index" that is similar to the item
+index. It is generated the same way and can cache/evict the same way;
+the difference is that we invert the check for if the set of roots for a
+node has overlap with the tree's set of roots; we're looking for
+<em>potential</em> nodes that we could add to this tree.</p></li>
+<li><p>some callbacks; we'll get to what we do in these callbacks in a
+bit, but for now, what the callbacks are:</p>
+<ul>
+<li><p>a callback that is called for each added/updated item when we add
+a root.</p></li>
+<li><p>a callback that is called whenever we add a root</p></li>
+<li><p>a callback that intercepts looking up a root item</p></li>
+<li><p>a callback that intercepts resolving an UUID to an object
+ID.</p></li>
+</ul></li>
+</ul>
+<p>(The callbacks are in <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go</code></a>)</p>
+<p>We have 5 unordered queues ("work lists"?); these are sets that when
+it's time to drain them we'll sort the members and process them in that
+order.</p>
+<ol type="1">
+<li>the tree queue: a list of tree IDs that we need to crawl</li>
+<li>the retry-item queue: for each tree ID, a set of items that we
+should re-process if we add a root to that tree</li>
+<li>the added-item queue: a set of key/tree pairs identifying items that
+have been added by adding a root to a tree</li>
+<li>the settled-item-queue: a set of key/tree pairs that have have not
+just been added by adding a root, but we've also verified that they are
+the newest-generation item with that key that we could add to the
+tree.</li>
+<li>the augment queue: for each item that we want to add to a tree, the
+list of roots that we could add to get that item.</li>
+</ol>
+<p>The roots all start out empty, except for the tree queue, which we
+seed with the <code>ROOT_TREE</code>, the <code>CHUNK_TREE</code>, and
+the <code>BLOCK_GROUP_TREE</code> (It is a "TODO" task that it should
+probably also be seeded with the <code>TREE_LOG</code>, but as I will
+say below in the "future work" section, I don't actually understand the
+<code>TREE_LOG</code>, so I couldn't implement it).</p>
+<p>Now we're going to loop until the tree queue, added-item queue,
+settled-item queue, and augment queue are all empty (all queues except
+for the retry-item queue). Each loop "pass" has 3 substeps:</p>
+<ol type="1">
+<li><p>Crawl the trees (drain the tree queue, fill the added-item
+queue).</p></li>
+<li><p>Either:</p>
+<ol type="a">
+<li><p>if the added-item queue is non-empty: "settle" those items (drain
+the added-item queue, fill the augment queue and the settled-item
+queue).</p></li>
+<li><p>otherwise: process items (drain the settled-item queue, fill the
+augment queue and the tree queue)</p></li>
+</ol></li>
+<li><p>Apply augments (drain the augment queue and maybe the retry-item
+queue, fill the added-item queue).</p></li>
+</ol>
+<p>OK, let's look at those 3 substeps in more detail:</p>
+<ol type="1">
+<li><p>Crawl the trees; drain the tree queue, fill the added-item
+queue.</p>
+<p>We just look up the tree in the rebuilt forrest, which will (per the
+above <code>--rebuild</code> algorithm) will either fail to look up the
+tree, or succeed, and add to that tree the root node from the
+superblock/root-item. Because we set an item-added callback, when adding
+that root it will loop over the nodes added by that root, and call our
+callback for each item in one of the added nodes. Our callback inserts
+each item into the added-item queue. The forrest also calls our
+root-added callback, but because of the way this algorithm works, that
+turns out to be a no-op at this step.</p>
+<p>I mentioned that we added callbacks to intercept the forrest's
+looking up of root items and resolving UUIDs; we override the forrest's
+"lookup root item" routine and "resolve UUID" routine to instead of
+doing normal lookups on the <code>ROOT_TREE</code> and
+<code>UUID_TREE</code>, use the above <code>Want<var>XXX</var></code>
+routines that we'll define below in the "graph callbacks" section.</p>
+<p>It shouldn't matter what order this queue is processed in, but I sort
+tree IDs numerically.</p>
+<p>The crawling is fairly fast because it's just in-memory, the only
+accesses to disk are looking up root items and resolving UUIDs.</p></li>
+<li><p>Either:</p>
+<ol type="a">
+<li><p>Settle items from the added-item queue to the settled-item queue
+(and fill the augment queue).</p>
+<p>For each item in the queue, we look in the tree's item index to get
+the {node, slot} pair for it, then we do the same in the tree's
+potential-item index. If the potential-item index contains an entry for
+the item's key, then we check if the potential-item's node should "win"
+over the queue item's node, deciding the "winner" using the same routine
+as when building the item index. If the potential-item's node wins, then
+we add the potential node's set of roots to the augment queue. If the
+queue-item's node wins, then we add the item to the settled-item queue
+(except, as an optimization, if the item is of a type that cannot
+possibly imply the existence of another item, then we just drop it and
+don't add it to the settled-item queue).</p>
+<p>It shouldn't matter what order this queue is processed in, but I sort
+it numerically by treeID and then by item key.</p>
+<p>This step is fairly fast because it's entirely in-memory, making no
+accesses to disk.</p></li>
+<li><p>Process items from the settled-item queue (drain the settled-item
+queue, fill the augment queue and the tree queue).</p>
+<p>This step accesses disk, and so the order we process the queue in
+turns out to be pretty important in order to keep our disk access
+patterns cache-friendly. For the most part, we just sort each queue item
+by tree, then by key. But, we have special handling for
+<code>EXTENT_ITEM</code>s, <code>METADATA_ITEM</code>s, and
+<code>EXTENT_DATA_REF</code> items: We break <code>EXTENT_ITEM</code>s
+and <code>METADATA_ITEM</code>s in to "sub-items", treating each ref
+embedded in them as a separate item. For those embedded items that are
+<code>EXTENT_DATA_REF</code>s, and for stand-alone
+<code>EXTENT_DATA_REF</code> items, we sort them not with the
+<code>EXTENT_TREE</code> items, but with the items of the tree that the
+extent data ref points at. Recall that during the intitial scan step, we
+took note of which tree every extent data ref points at, so we can
+perform this sort without accessing disk yet. This splitting does mean
+that we may visit/read an <code>EXTENT_ITEM</code> or
+<code>METADATA_ITEM</code> multiple times as we process the queue, but
+to do otherwise is to solve MinLA, which is NP-hard and also an optimal
+MinLA solution I still think would perform worse than this; there is a
+reasonably lengthy discussion of this in a comment in <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n251"><code>./cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go:sortSettledItemQueue()</code></a>.</p>
+<p>Now we loop over that sorted queue. In the code, this loop is
+deceptively simple. Read the item, then pass it to a function that tells
+us what other items are implied by it. That function is large, but
+simple; it's just a giant table. The trick is how it tells us about
+implied items; we give it set of callbacks that it calls to tell us
+these things; the real complexity is in the callbacks. These "graph
+callbacks" will be discussed in detail below, but as an illustrative
+example: It may call <code>.WantOff()</code> with a tree ID, object ID,
+item type, and offset to specify a precise item that it believes should
+exist.</p>
+<p>If we encounter a <code>ROOT_ITEM</code>, add the tree described by
+that item to the tree queue.</p></li>
+</ol>
+<p>(Both the "can this item even imply the existence of another item"
+check and the "what items are implied by this item" routine are in <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfscheck/graph.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./lib/btrfscheck/graph.go</code></a>)</p></li>
+<li><p>Apply augments; drain the augment queue (and maybe the retry-item
+queue), fill the added-item queuee.</p>
+<p>It is at this point that I call out that the augment queue isn't
+implemented as a simple map/set like the others, the
+<code>treeAugmentQueue struct</code> has special handling for sets of
+different sizes; optimizing the space for empty and len()==1 sized sets,
+and falling back to normal the usual implementation for larger sets;
+this is important because those small sets are the overwhelming
+majority, and otherwise there's no way the program would be able to run
+on my 32GB RAM laptop. Now that I think about it, I bet it would even be
+worth it to add optimized storage for len()==2 sized sets.</p>
+<p>The reason is that each "want" from above is tracked in the queue
+separately; if we were OK merging them, then this optimized storage
+wouldn't be nescessary. But we keep them separate, so that:</p>
+<ul>
+<li><p>For all "wants", including ones with empty sets, graph callbacks
+can check if a want has already been processed; avoiding re-doing any
+work (see the description of the graph callbacks below).</p></li>
+<li><p>For "wants" with non-empty sets, we can see how many different
+"wants" could be satisfied with a given root, in order to decide which
+root to choose.</p></li>
+</ul>
+<p>Anyway, we loop over the trees in the augment queue. For each tree we
+look at that tree's augment queue and look at all the choices of root
+nodes to add (below), and decide on a list to add. The we add each of
+those roots to the tree; the adding of each root triggers several calls
+to our item-added callback (filling the added-item queue), and our
+root-added callback. The root-added callback moves any items from the
+retry-item queue for this tree to the added-item queue.</p>
+<p>How do we decide between choices of root nodes to add? <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n528"><code>./cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go:resolveTreeAugments()</code></a>
+has a good comment explaining the criteria we'd like to optimize for,
+and then code that does an OK-ish job of actually optimizing for
+that:</p>
+<ul>
+<li><p>It loops over the augment queue for that tree, building a list of
+possible roots, for each possible root making note of 3 things:</p>
+<ol type="a">
+<li><p>how many "wants" that root satisfies,</p></li>
+<li><p>how far from treee the root's owner is (owner=tree is a distance
+of 0, owner=tree.parent is a distance of 1, owner=tree.parent.parent is
+a distance of 2, and so on), and</p></li>
+<li><p>what the generation of that root is.</p></li>
+</ol></li>
+<li><p>We sort that list first by highest-count-first, then by
+lowest-distance-first, then by highest-generation-first.</p></li>
+<li><p>We create a "return" set and an "illegal" set. We loop over the
+sorted list; for each possible root if it is in the illegal set, we skip
+it, otherwise we insert it into the return set and for each "want" that
+includes this root we all all roots that satisfy that want to the
+illegal list.</p></li>
+</ul></li>
+</ol>
+<p>It is important that the rebuilt forrest have the flag set so that it
+refuses to look up a tree if it can't look up all of that tree's
+ancestors; otherwise the potential-items index would be garbage as we
+wouldn't have a good idea of which nodes are OK to consider; but this
+does have the downside that it won't even attempt to improve a tree with
+a missing parent. Perhaps the algorithm should flip the flag once the
+loop terminates, and then re-seed the tree queue with each
+<code>ROOT_ITEM</code> from the <code>ROOT_TREE</code>?</p>
+<h4 id="graph-callbacks">4.3.3.3. graph callbacks</h4>
+<p>(see: <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>./cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go</code></a>)</p>
+<p>The graph callbacks are what tie the above together.</p>
+<p>For each of these callbacks, whenever I say that it looks up
+something in a tree's item index or potential-item index, that implies
+looking the tree up from the forrest; if the forrest cannot look up that
+tree, then the callback returns early, after either:</p>
+<ul>
+<li><p>if we are in substep 1 and are processing a tree: we add the tree
+that is being processed to the tree queue. (TODO: Wait, this assumes
+that an augment will be applied to the <code>ROOT_TREE</code> before the
+next pass... if that isn't the case, this will result in the loop never
+terminating... I guess I need to add a separate retry-tree
+queue?)</p></li>
+<li><p>if we are in substep 2 and are processing an item: we add the
+item that is being processed to the retry-item queue for the tree that
+cannot be looked up</p></li>
+</ul>
+<p>The 6 methods in the <code>brfscheck.GraphCallbacks</code> interface
+are:</p>
+<ol type="1">
+<li><p><code>FSErr()</code>: There's an error with the filesystem; this
+callback just spits it out on stderr. I say such a trivial matter
+because, again, for a recovery tool I think it's worth putting care in
+to how you handle errors and where you expect them: We expect them here,
+so we have to check for them to avoid reading invalid data or whatever,
+but we don't actually need to do anything other than watch our
+step.</p></li>
+<li><p><code>Want()</code>: We want an item in a given tree with a given
+object ID and item type, but we don't care about what the item's offset
+is.</p>
+<p>The callback works by searching the item index to see if it can find
+such an item; if so, it has nothing else to do and returns. Otherwise,
+it searches the potential-item index; for each matching item it finds it
+looks in the node index for the node containing that item, and adds the
+roots that would add that node, and adds those roots to a set. Once it
+has finished searching the potential-item index, it adds that set to the
+augment queue (even if that set is still empty).</p></li>
+<li><p><code>WantOff()</code>: The same, but we want a specific
+offset.</p></li>
+<li><p><code>WantDirIndex()</code>: We want a <code>DIR_INDEX</code>
+item for a given inode and filename, but we don't know what the offset
+of that item is.</p>
+<p>First we scan over the item index, looking at all
+<code>DIR_INDEX</code> items for that inode number. For each item, we
+can check the scan data to see what the filename in that
+<code>DIR_INDEX</code> is, so we can see if the item satisfies this want
+without accessing the disk. If there's a match, then there is nothing
+else to do, so we return. Otherwise, we do that same search over the
+potential-item index; if we find any matches, then we build the set of
+roots to add to the augment queue the same as in
+<code>Want</code>.</p></li>
+<li><p><code>WantFileExt()</code>: We want 1 or more
+<code>DATA_EXTENT</code> items in the given tree for the given inode,
+and we want them to cover from 0 to a given size bytes of that file.</p>
+<p>First we walk that range in the item index, to build a list of the
+gaps that we need to fill ("Step 1" in <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n260"><code>rebuild_wantcb.go:_wantRange()</code></a>).
+This walk (<a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n195"><code>rebuild_wantcb.go:_walkRange()</code></a>)
+requires knowing the size of each file extent; so doing this quickly
+without hitting disk is why we recorded the size of each file extent in
+our initialization step.</p>
+<p>Then ("Step 2" in <code>_wantRange()</code>) we iterate over each of
+the gaps, and for each gap do a very similar walk (again, by calling
+<code>_walkRange()</code>, but this time over the potential-item index.
+For each file extent we find that has is entirely within the gap, we
+"want" that extent, and move the beginning of of the gap forward to the
+end of that extent. This algorithm is dumb and greedy, potentially
+making sub-optimal selections; and so could probably stand to be
+improved; but in my real-world use, it seems to be "good
+enough".</p></li>
+<li><p><code>WantCSum()</code>: We want 1 or more
+<code>EXTENT_CSUM</code> items to cover the half-open interval
+[<code>lo_logical_addr</code>, <code>hi_logical_addr</code>). Well,
+maybe. It also takes a subvolume ID and an inode number; and looks up in
+the scan data whether that inode has the <code>INODE_NODATASUM</code>
+flag set; if it does have the flag set, then it returns early without
+looking for any <code>EXTENT_CSUM</code> items. If it doesn't return
+early, then it performs the same want-range routine as
+<code>WantFileExt</code>, but with the appropriate tree, object ID, and
+item types for csums as opposed to data extents.</p></li>
+</ol>
+<p>For each of these callbacks, we generate a "wantKey", a tuple
+representing the function and its arguments; we check the augment-queue
+to see if we've already enqueued a set of roots for that want, and if
+so, that callback can return early without checking the potential-item
+index.</p>
+<h1 id="future-work">5. Future work</h1>
+<p>It's in a reasonably useful place, I think; and so now I'm going to
+take a break from it for a while. But there's still lots of work to
+do:</p>
+<ul>
+<li><p>RAID almost certainly doesn't work.</p></li>
+<li><p>Encryption is not implemented.</p></li>
+<li><p>It doesn't understand (ignores) the <code>TREE_LOG</code>
+(because I don't understand the <code>TREE_LOG</code>).</p></li>
+<li><p><code>btrfs-rec inspect mount</code> should add "lost+found"
+directories for inodes that are included in the subvolume's tree but
+aren't reachable from the tree's root inode</p></li>
+<li><p>I still need to implement <code>btrfs-rec repair
+<var>SUBCMD</var></code> subcommands to write rebuilt-information from
+<code>btrfs-rec inspect</code> back to the filesystem.</p></li>
+<li><p>I need to figure out the error handling/reporting story for
+<code>mount</code>.</p></li>
+<li><p>It needs a lot more tests</p>
+<ul>
+<li>I'd like to get the existing btrfs-progs fsck tests to run on
+it.</li>
+</ul></li>
+<li><p>In the process of writing this email, I realized that I probably
+need to add a retry-tree queue; see the "graph callbacks" section in the
+description of the <code>rebuild-trees</code> algorithm above.</p></li>
+<li><p>Shere are a number of "TODO" comments or panics in the code:</p>
+<ul>
+<li><p>Some of them definitely need done.</p></li>
+<li><p>Some of them are <code>panic("TODO")</code> on the basis that if
+it's seeing something on the filesystem that it doesn't recognize, it's
+probably that I didn't get to implementing that thing/situation, but
+it's possible that the thing is just corrupt. This should only be for
+situations that the node passed the checksum test, so it being corrupt
+would have to be caused by a bug in btrfs rather than a failing drive or
+other corruption; I wasn't too worried about btrfs bugs.</p></li>
+</ul></li>
+<li><p><code>btrfs-rec inspect rebuild-trees</code> is slow, and can
+probably be made a lot faster.</p>
+<p>Just to give you an idea of the speeds, the run-times for the various
+steps on my ThinkPad E15 for a 256GB disk image are as follows:</p>
+<pre><code> btrfs-rec inspect rebuild-mappings scan : 7m 31s
+ btrfs-rec inspect rebuild-mappings list-nodes : 47s
+ btrfs-rec inspect rebuild-mappings process : 8m 22s
+ btrfs-rec inspect rebuild-trees : 1h 4m 55s
+ btrfs-rec inspect ls-files : 29m 55s
+ btrfs-rec inspect ls-trees : 8m 40s</code></pre>
+<p>For the most part, it's all single-threaded (with the main exception
+that in several places I/O has been moved to a separate thread from the
+main CPU-heavy thread), but a lot of the algorithms could be
+parallelized.</p></li>
+<li><p>There are a lot of "tunable" values that I haven't really spent
+time tuning. These are all annotated with <a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/textui/tunable.go?id=18e6066c241cf3d252b6521150843ffc858d8434"><code>textui.Tunable()</code></a>.
+I sort-of intended for them to be adjustable on the CLI.</p></li>
+<li><p>Perhaps the <code>btrfs inspect rebuild-trees</code> algorithm
+could be adjusted to also try to rebuild trees with missing parents; see
+the above discussion of the algorithm.</p></li>
+</ul>
+<h1 id="problems-for-merging-this-code-into-btrfs-progs">6. Problems for
+merging this code into btrfs-progs</h1>
+<ul>
+<li><p>It's written in Go, not C.</p></li>
+<li><p>It's effectively GPLv3+ (not GPLv2-only or GPLv2+) because of use
+of some code under the Apache 2.0 license (2 files in the codebase
+itself that are based off of Apache-licensed code, and use of unmodified
+3rd-party libraries).</p></li>
+<li><p>It uses ARC (Adaptive Replacement Cache), which is patented by
+IBM, and the patent doesn't expire for another 7 months. An important
+property of ARC over LRU is that it is scan-resistant; the above
+algorithms do a lot of scanning. On that note, now that RedHat is owned
+by IBM: who in the company do we need to get to talk to eachother so
+that we can get ARC into the Linux kernel before then?</p></li>
+</ul>
+<div style="font-family: monospace">
+<p>-- <br/> Happy hacking,<br/> ~ Luke Shumaker<br/></p>
+</div>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2023 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/btrfs-rec.md b/public/btrfs-rec.md
new file mode 100644
index 0000000..7c1bf39
--- /dev/null
+++ b/public/btrfs-rec.md
@@ -0,0 +1,1272 @@
+Announcing: btrfs-rec: Recover (data from) a broken btrfs filesystem
+====================================================================
+---
+date: "2023-07-10"
+markdown_options: "-smart"
+---
+
+> I originally sent this email on 2023-07-10, but it has been caught
+> by their bogofilter. Yes, it was
+> [plaintext](https://git.lukeshu.com/btrfs-progs-ng/tree/README.md?id=18e6066c241cf3d252b6521150843ffc858d8434).
+> No, I didn't use GMail. Yes, I've successfully participated in vger
+> lists in the past. Yes, I've reached out to postmaster; no, I
+> haven't received a reply yet (as of 2023-07-14).
+
+<div style="font-family: monospace">
+To: linux-btrfs@vger.kernel.org<br/>
+From: Luke T. Shumaker &lt;lukeshu@lukeshu.com&gt;<br/>
+Subject: btrfs-rec: Recover (data from) a broken btrfs filesystem<br/>
+Date: Mon, 10 Jul 2023 21:23:41 -0600<br/>
+Message-ID: &lt;87jzv7uo5e.wl-lukeshu@lukeshu.com&gt;<br/>
+</div>
+
+Inspired by a mis-typed `dd` command, for the last year I've been
+working on a tool for recovering corrupt btrfs filesystems; at first
+idly here and there, but more actively in the last few months. I hope
+to get it incorporated into btrfs-progs, though perhaps that is
+problematic for a few reasons I'll get to. If the code can't be
+incorporated into btrfs-progs, at least the ideas and algorithms
+should be.
+
+[https://git.lukeshu.com/btrfs-progs-ng/](https://git.lukeshu.com/btrfs-progs-ng/)
+
+Highlights:
+
+ - In general, it's more tolerant of corrupt filesystems than
+ `btrfs check --repair`, `btrfs rescue` or `btrfs restore`.
+
+ - `btrfs-rec inspect rebuild-mappings` is a better
+ `btrfs rescue chunk-recover`.
+
+ - `btrfs-rec inspect rebuild-trees` can re-attach lost branches to
+ broken B+ trees.
+
+ - `btrfs-rec inspect mount` is a read-only FUSE implementation of
+ btrfs. This is conceptually a replacement for `btrfs restore`.
+
+ - It's entirely written in Go. I'm not saying that's a good thing,
+ but it's an interesting thing.
+
+Hopefully some folks will find it useful, or at least neat!
+
+ - [1. Motivation](#motivation)
+ - [2. Overview of use](#overview-of-use)
+ - [3. Prior art](#prior-art)
+ - [4. Internals/Design](#internalsdesign)
+ - [4.1. Overview of the source tree layout](#overview-of-the-source-tree-layout)
+ - [4.2. Base decisions: CLI structure, Go, JSON](#base-decisions-cli-structure-go-json)
+ - [4.3. Algorithms](#algorithms)
+ - [4.3.1. The `rebuild-mappings` algorithm](#the-rebuild-mappings-algorithm)
+ - [4.3.2. The `--rebuild` algorithm](#the---rebuild-algorithm)
+ - [4.3.2.1. rebuilt forrest behavior](#rebuilt-forrest-behavior-looking-up-trees)
+ - [4.3.2.2. rebuilt individual tree behavior](#rebuilt-individual-tree-behavior)
+ - [4.3.3. The `rebuild-trees` algorithm](#the-rebuild-trees-algorithm)
+ - [4.3.3.1. initialization](#initialization)
+ - [4.3.3.2. the main loop](#the-main-loop)
+ - [4.3.3.3. graph callbacks](#graph-callbacks)
+ - [5. Future work](#future-work)
+ - [6. Problems for merging this code into btrfs-progs](#problems-with-merging-this-code-into-btrfs)
+
+# 1. Motivation
+
+Have you ever ended up with a corrupt btrfs filesystem (through no
+fault of btrfs itself, but perhaps a failing drive, or a mistaken `dd`
+invocation)? Surely losing less than 100MB of data from a drive
+should not render hundreds of GB of perfectly intact data unreadable!
+And yet, the existing tools are unable to even attempt to read that
+data:
+
+ $ btrfs check --repair --force dump-zero.1.img
+ enabling repair mode
+ Opening filesystem to check...
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ ERROR: cannot read chunk root
+ ERROR: cannot open file system
+
+or
+
+ $ btrfs check --init-extent-tree --force dump-zero.1.img
+ Opening filesystem to check...
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ ERROR: cannot read chunk root
+ ERROR: cannot open file system
+
+or
+
+ $ btrfs check --init-csum-tree --force dump-zero.1.img
+ Creating a new CRC tree
+ Opening filesystem to check...
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ ERROR: cannot read chunk root
+ ERROR: cannot open file system
+
+or
+
+ $ btrfs rescue chunk-recover dump-zero.1.img
+ Scanning: DONE in dev0
+ corrupt node: root=1 block=160410271744 slot=0, corrupt node: root=1 block=160410271744, nritems too large, have 39 expect range [1,0]
+ Couldn't read tree root
+ open with broken chunk error
+
+or
+
+ $ btrfs rescue zero-log dump-zero.1.img
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ ERROR: cannot read chunk root
+ ERROR: could not open ctree
+
+or
+
+ $ mkdir out
+ $ btrfs restore dump-zero.1.img out
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ ERROR: cannot read chunk root
+ Could not open root, trying backup super
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ ERROR: cannot read chunk root
+ Could not open root, trying backup super
+ ERROR: superblock bytenr 274877906944 is larger than device size 256060514304
+ Could not open root, trying backup super
+
+or
+
+ $ btrfs restore --list-roots dump-zero.1.img
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ ERROR: cannot read chunk root
+ Could not open root, trying backup super
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ ERROR: cannot read chunk root
+ Could not open root, trying backup super
+ ERROR: superblock bytenr 274877906944 is larger than device size 256060514304
+ Could not open root, trying backup super
+
+or
+
+ $ btrfs-find-root dump-zero.1.img
+ WARNING: cannot read chunk root, continue anyway
+ Superblock thinks the generation is 6596071
+ Superblock thinks the level is 1
+
+Well, have I got a tool for you!
+
+(FWIW, I also tried manipulating the filesystem and patching to tools
+to try to get past those errors, only to get a different set of
+errors. Some of these patches I am separately submitting to
+btrfs-progs.)
+
+# 2. Overview of use
+
+There are two `btrfs-rec` sub-command groups:
+<code>btrfs-rec inspect <var>SUBCMD</var></code> and <code>btrfs-rec repair <var>SUBCMD</var></code>, and you can
+find out about various sub-commands with `btrfs-rec help`. These are
+both told about devices or images with the `--pv` flag.
+
+<code>btrfs-rec inspect <var>SUBCMD</var></code> commands open the filesystem read-only, and
+(generally speaking) write extracted or rebuilt information to stdout.
+<code>btrfs-rec repair <var>SUBCMD</var></code> commands open the filesystem read+write, and
+consume information from <code>btrfs-rec inspect <var>SUBCMD</var></code> commands to
+actually repair the filesystem (except I haven't actually implemented
+any `repair` commands yet... despite the lack of `repair` commands, I
+believe that `btrfs-rec` is already a useful because of the `btrfs-rec
+inspect mount` command to get data out of the broken filesystem).
+This split allows you to try things without being scared by WARNINGs
+about not using these tools unless you're an expert or have been told
+to by a developer.
+
+In the broken `dump-zero.1.img` example above (which has a perfectly
+intact superblock, but a totally broken `CHUNK_TREE`), to "repair" it
+I'd:
+
+ 1. Start by using `btrfs-rec inspect rebuild-mappings` to rebuild the
+ broken chunk/dev/blockgroup trees:
+
+ $ btrfs-rec inspect rebuild-mappings \
+ --pv=dump-zero.1.img \
+ > mappings-1.json
+
+ 2. If it only mostly succeeds, but on stderr tells us about a few
+ regions of the image that it wasn't able to figure out the chunks
+ for. Using some human-level knowledge, you can write those
+ yourself, inserting them into the generated `mappings.json`, and
+ ask `rebuild-mappings` to normalize what you wrote:
+
+ $ btrfs-rec inspect rebuild-mappings \
+ --pv=dump-zero.1.img \
+ --mappings=<(sed <mappings-1.json \
+ -e '2a{"LAddr":5242880,"PAddr":{"Dev":1,"Addr":5242880},"Size":1},' \
+ -e '2a{"LAddr":13631488,"PAddr":{"Dev":1,"Addr":13631488},"Size":1},') \
+ > mappings-2.json
+
+ 3. Now that it has functioning chunk/dev/blockgroup trees, we can use
+ `btrfs-rec inspect rebuild-trees` to rebuild other trees that rely
+ on those:
+
+ $ btrfs-rec inspect rebuild-mappings \
+ --pv=dump-zero.1.img \
+ --mappings=mappings-2.json \
+ > trees.json
+
+ 4. Now that (hopefully) everything that was damaged has been
+ reconstructed, we can use `btrfs-rec inspect mount` to mount the
+ filesystem read-only and copy out our data:
+
+ $ mkdir mnt
+ $ sudo btrfs-rec inspect mount \
+ --pv=dump-zero.1.img \
+ --mappings=mappings-2.json \
+ --trees=trees.json \
+ ./mnt
+
+This example is fleshed out more (and the manual edits to
+`mappings.json` explained more) in [`./examples/main.sh`].
+
+# 3. Prior art
+
+Comparing `btrfs-rec inspect mount` with the existing
+[https://github.com/adam900710/btrfs-fuse](https://github.com/adam900710/btrfs-fuse) project:
+
+ - Again, mine has better fault tolerance
+ - Mine is read-only
+ - Mine supports xattrs ("TODO" in Adam's)
+ - Mine supports separate inode address spaces for subvolumes; Adam's
+ doesn't due to limitations in FUSE, mine works around this by
+ lazily setting up separate mountpoints for each subvolume (though
+ this does mean that the process needs to run as root, which is a
+ bummer).
+
+# 4. Internals/Design
+
+## 4.1. Overview of the source tree layout
+
+ - [`examples/`] has example scripts showing how to use `btrfs-rec`.
+
+ - [`lib/btrfs/`] is the core btrfs implementation.
+
+ - [`lib/btrfscheck/`] and [`lib/btrfsutil/`] are libraries for
+ "btrfs-progs" type programs, that are userland-y things that I
+ thought should be separate from the core implementation; something
+ that frustrated me about libbtrfs was having to figure out "is this
+ thing here in support of btrfs bits-on-disk, or in support of a
+ higher-level 'how btrfs-progs wants to think about things'?"
+
+ - [`cmd/btrfs-rec/`] is where the command implementations live. If a
+ sub-command fits in a single file, it's
+ <code>cmd/btrfs-rec/inspect_<var>SUBCMD</var>.go</code>, otherwise, it's in a separate
+ <code>cmd/btrfs-rec/inspect/<var>SUBCMD</var>/</code> package.
+
+ - [`lib/textui/`] is reasonably central to how the commands implement a
+ text/CLI user-interface.
+
+ - [`lib/binstruct/`], [`lib/diskio/`], and [`lib/streamio/`] are
+ non-btrfs-specific libraries related to the problem domain.
+
+ - [`lib/containers/`], [`lib/fmtutil/`], [`lib/maps/`], [`lib/slices/`], and
+ [`lib/profile/`] are all generic Go libraries that have nothing to do
+ with btrfs or the problem domain, but weren't in the Go standard
+ library and I didn't find/know-of exiting implementations that I
+ liked. Of these, all but `containers` are pretty simple utility
+ libraries. Also, some of these things have been added to the
+ standard library since I started the project.
+
+## 4.2. Base decisions: CLI structure, Go, JSON
+
+I started with trying to enhance btrfs-progs, but ended up writing a
+wholy new program in Go, for several reasons:
+
+ - writing a new thing: I was having to learn both the btrfs-progs
+ codebase and how btrfs-bits-on-disk work, and it got to the point
+ that I decided I should just focus on learning btrfs-bits-on-disk.
+
+ - writing a new thing: It was becoming increasingly apparent to me
+ that it was going to be an uphill-fight of having recovery-tools
+ share the same code as the main-tools, as the routines used by the
+ main-tools rightly have validity checks, where recovery-tools want
+ to say "yes, I know it's invalid, can you give it to me anyway?".
+
+ - writing it in not-C: I love me some C, but higher level languages
+ are good for productivity. And I was trying to write a whole lot
+ of code at once, I needed a productivity boost.
+
+ - writing it in not-C: This forced me to learn btrfs-bits-on-disk
+ better, instead of just cribbing from btrfs-progs. That knowledge
+ is particularly important for having ideas on how to deal with
+ corrupt bits-on-disk.
+
+ - writing it in Go: At the time I started, my day job was writing Go,
+ so I had Go swapped into my brain. And Go still feels close to C
+ but provides *a lot* of niceness and safety over C.
+
+It turned out that Go was perhaps not the best choice, but we'll come
+back to that.
+
+I wanted to separate things into a pipeline. For instance: Instead of
+`btrfs rescue chunk-recover` trying to do everything to rebuild a
+broken chunk tree, I wanted to separate I/O from computation from
+repairs. So I have `btrfs-rec inspect rebuild-mappings scan` that
+reads all the info necessary to rebuild the chunk tree, then dump that
+as a 2GB glob of JSON. Then I can feed that JSON to `btrfs-rec
+inspect rebuild-mappings process` which actually rebuilds the mappings
+in the chunk tree, and dumps them as JSON. And then other commands
+can consume that `mappings.json` to use that instead of trying to read
+the chunk tree from the actual FS, so that you don't have to make
+potentially destructive writes to inspect an FS with a broken chunk
+tree, and can inspect it more forensically. Or then use
+<code>btrfs-rec repair <var>SOME_SUBCMD_I_HAVENT_WRITTEN_YET</var></code> to write that
+chunk tree in `mappings.json` back to the filesystem.
+
+(But also, the separate steps thing was useful just so I could iterate
+on the algorithms of `rebuild-mappings process` separately from having
+to scan the entire FS)
+
+So, I made the decision that <code>btrfs-rec inspect <var>SUBCMD</var></code> commands
+should all only open the FS read-only, and output their work to a
+separate file; that writing that info back to the FS should be
+separate in <code>btrfs-rec repair <var>SUBCMD</var></code>.
+
+For connecting those parts of the pipeline, I chose JSON, for a few
+reasons:
+
+ - I wanted something reasonably human-readable, so that I could debug
+ it easier.
+
+ - I wanted something reasonably human-readable, so that human
+ end-users could make manual edits; for example, in
+ [`examples/main.sh`] I have an example of manually editing
+ `mappings.json` to resolve a region that the algorithm couldn't
+ figure out, but with knowledge of what caused the corruption a
+ human can.
+
+ - I didn't want to invent my own DSL and have to handle writing a
+ parser. (This part didn't pay off! See below.)
+
+ - I wanted something that I thought would have good support in a
+ variety of languages, so that if Go is problematic for getting
+ things merged upstream it could be rewritten in C (or maybe Rust?)
+ piece-meal where each subcommand can be rewritten one at a time.
+
+It turned out that JSON was perhaps not the best choice.
+
+OK, so: Go and/or JSON maybe being mistakes:
+
+ - I spent a lot of time getting the garbage collector to not just
+ kill performance.
+
+ - The <code>btrfs-rec inspect rebuild-mappings <var>SUBCMD</var></code> subcommands all
+ throw a lot of data through the JSON encoder/decoder, and I learned
+ that the Go stdlib `encoding/json` package has memory use that
+ grows O(n^2) (-ish? I didn't study the implementation, but that's
+ what the curve looks like just observing it) on the size of the
+ data being shoved through it, so I had to go take a break and go
+ write https://pkg.go.dev/git.lukeshu.com/go/lowmemjson which is a
+ mostly-drop-in-replacement that tries to be as close-as possible to
+ O(1) memory use. So I did end up having to write my own parser
+ anyway :(
+
+## 4.3. Algorithms
+
+There are 3 algorithms of note in `btrfs-rec`, that I think are worth
+getting into mainline btrfs-progs even if the code of `btrfs-rec`
+doesn't get in:
+
+ 1. The `btrfs-rec inspect rebuild-mappings` algoritithm to rebuild
+ information from the `CHUNK_TREE`, `DEV_TREE`, and
+ `BLOCK_GROUP_TREE`.
+
+ 2. The `btrfs-rec --rebuild` algorithm to cope with reading broken B+
+ trees.
+
+ 3. The `btrfs-rec inspect rebuild-trees` algorithm to re-attach lost
+ branches to broken B+ trees.
+
+### 4.3.1. The `rebuild-mappings` algorithm
+
+(This step-zero scan is `btrfs-rec inspect rebuild-mappings scan`, and
+principally lives in [`./lib/btrfsutil/scan.go`] and
+[`./cmd/btrfs-rec/inspect/rebuildmappings/scan.go`])
+
+ 0. Similar to `btrfs rescue chunk-recover`, scan each device for
+ things that look like nodes; keep track of:
+ - Checksums of every block on the device
+ - Which physical addresses contain nodes that claim to be at a
+ given logical addess.
+ - Any found Chunk items, BlockGroup items, DevExtent, and CSum
+ items. Keep track of the key for each of these, and for CSum
+ items also track the generation.
+
+Create a bucket of the data from Chunks, DevExtents, and BlockGroups;
+since these are mostly a Chunk and a DevExtent+BlockGroup store pretty
+much the same information; we can use one to reconstruct the other.
+How we "merge" these and handle conflicts is in
+[`./lib/btrfs/btrfsvol/lvm.go:addMapping()`], I don't think this
+part is particularly clever, but given that `btrfs rescue
+chunk-recover` crashes if it encounters two overlapping chunks, I
+suppose I should spell it out:
+
+ - A "mapping" is represented as a group of 4 things:
+
+ + logical address
+ + a list of 1 or more physical addresses (device ID and offset)
+ + size, and a Boolean indicator of whether the size is "locked"
+ + block group flags, and a Boolean presence-indicator
+
+ - Mappings must be merged if their logical or physical regions
+ overlap.
+
+ - If a mapping has a "locked" size, then when merging it may subsume
+ smaller mappings with unlocked sizes, but its size cannot be
+ changed; trying to merge a locked-size mapping with another mapping
+ that is not for a subset region should return an error.
+
+ - If a mapping has block group flags present, then those flags may
+ not be changed; it may only be merged with another mapping that
+ does not have flags present, or has identical flags.
+
+ - When returning an error because of overlapping non-mergeable
+ mappings, just log an error on stderr and keep going. That's an
+ important design thing that is different than normal filesystem
+ code; if there's an error, yeah, detect and notify about it, **but
+ don't bail out of the whole routine**. Just skip that one item or
+ whatever.
+
+Now that we know how to "add a mapping", let's do that:
+
+(The following main-steps are `btrfs-rec inspect rebuild-mappings
+process`, and principally live in
+[`./cmd/btrfs-rec/inspect/rebuildmappings/process.go`])
+
+ 1. Add all found Chunks.
+
+ 2. Add all found DevExtents.
+
+ 3. Add a phyical:logical mapping of length nodesize for each node
+ that was found.
+
+ 4. Any mappings from steps 2 or 3 that are missing blockgroup flags
+ (that is: they weren't able to be merged with a mapping from step
+ 1), use the found BlockGroups to fill in those flags.
+
+ 5. Now we'll merge all found CSum items into a map of the sums of the
+ logical address space. Sort all of the csum items by generation,
+ then by address. Loop over them in that order, inserting their
+ sums into the map. If two csum items overlap, but agree about the
+ sums of the overlapping region, that's fine, just take their
+ union. For overlaps that disagree, items with a newer generation
+ kick out items with an older generation. If disagreeing items
+ have the same generation... I don't think that can happen except
+ by a filesystem bug (i.e. not by a failing drive or other external
+ corruption), so I wasn't too concerned about it, so I just log an
+ error on stderr and skip the later-processed item. See
+ [`./cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go`].
+
+ Look at regions of the logical address space that meet all the 3
+ criteria:
+
+ - we have CSum items for them
+ - we have a BlockGroup for them
+ - we don't have a Chunk/DevExtent mapping them to the pysical
+ address space.
+
+ Pair those CSums up with BlockGroups, and for each BlockGroup,
+ search the list of checksums of physical blocks to try to find a
+ physical region that matches the logical csums (and isn't already
+ mapped to a different logical region). I used a
+ Knuth-Morris-Pratt search, modified to handle holes in the logical
+ csum list as wildcards.
+
+ Insert any found mappings into our bucket of mappings.
+
+ 6. Do the same again, but with a fuzzy search (we can re-use the csum
+ map of the logical address space). My implementation of this is
+ comparatively time and space intensive; I just walk over the
+ entire unmapped physical address space, noting what % of match
+ each BlockGroup has if placed at that location. I keep track of
+ the best 2 matches for each BlockGroup. If the best match is
+ better than a 50% match, and the second best is less than a 50%
+ match, then I add the best match. In my experience, the best
+ match is >90% (or at whatever the maximum percent is for how much
+ of the BlockGroup has logical sums), and the second best is 0% or
+ 1%. The point of tracking both is that if there isn't a clear-cut
+ winner, I don't want it to commit to a potentially wrong choice.
+
+### 4.3.2. The `--rebuild` algorithm
+
+The `--rebuild` flag is implied by the `--trees=trees.json` flag, and
+triggers an algorithm that allows "safely" reading from a broken B+
+tree, rather than the usual B+ tree lookup and search functions. I
+probably should have tried to understand the `btrfs restore`
+algorithm, maybe I reinvented the wheel...
+
+This algorithm requires a list of all nodes on the filesystem; we find
+these using the same scan as above ([`./lib/btrfsutil/scan.go`]), the
+same procedure as `btrfs rescue chunk-recover`.
+
+We walk all of those nodes, and build a reasonably lightweight
+in-memory graph of all nodes ([`./lib/btrfsutil/graph.go`]), tracking
+
+ - each node's
+ + logical address
+ + level
+ + generation
+ + tree
+ + each item's key and size
+ - each keypointer's
+ + source node
+ + source slot within the node
+ + tree of the source node
+ + destination node
+ + destination level implied by the level of the source node
+ + destination key
+ + destination generation
+ - logical addresses and error messages for nodes that are pointed to
+ by a keypointer or the superblock, but can't be read (because that
+ logical address isn't mapped, or it doesn't look like a node,
+ or...)
+ - an index such that for a given node we can quickly list both
+ keypointers both originating at that node and pointing to that
+ node.
+
+#### 4.3.2.1. rebuilt forrest behavior (looking up trees)
+
+(see: [`./lib/btrfsutil/rebuilt_forrest.go`])
+
+ - The `ROOT_TREE`, `CHUNK_TREE`, `TREE_LOG`, and `BLOCK_GROUP_TREE`
+ (the trees pointed to directy by the superblock) work as you'd
+ expect.
+ - For other trees, we (as you'd expect) look up the root item in the
+ rebuilt `ROOT_TREE`, and then (if rootitem.ParentUUID is non-zero)
+ eagerly also look up the parent tree (recursing on ourself). We
+ try to use the `UUID_TREE` tree to help with this, but fall back to
+ just doing a linear scan over the `ROOT_TREE`. If we fail to look
+ up the parent tree (or its parent, or a more distant ancestor),
+ then (depending on a flag) we either make a note of that, or error
+ out and fail to look up the child tree. For `--rebuild` and
+ `--trees=trees.json` we are permissive of this error, and just make
+ note of it; but we'll re-use this algorithm in the `rebuild-trees`
+ algorithm below, and it needs the more strict handling.
+ - When creating the rebuilt individual tree, we start by adding the
+ root node specified by the superblock/root-item. But we may also
+ add additional root nodes grafted on to the tree by the
+ `--trees=trees.json` flag or by the `rebuild-trees` algorithm
+ below. So a tree may have more than 1 root node.
+
+#### 4.3.2.2. rebuilt individual tree behavior
+
+(see: [`./lib/btrfsutil/rebuilt_tree.go`])
+
+In order to read from a tree, we first have to build a few indexes.
+We store these indexes in an Adaptive Replacement Cache; they are all
+re-buildable based on the tree's list of roots and the above graph; if
+we have a bunch of trees we don't need to keep all of this in memory
+at once. Note that this is done 100% with the in-memory graph, we
+don't need to read anything from the filesystem during these
+procedures.
+
+ - The first index we build is the "node index". This is an index
+ that for every node tells us what root(s) the tree would need to
+ have in order for the tree to include that node, and also what the
+ highest item key would be acceptable in the node if the tree
+ includes that root. We track both a `loMaxItem` and a `hiMaxItem`,
+ in case the tree is real broken and there are multiple paths from
+ the root to the node; as these different paths may imply different
+ max-item constraints. Put more concretely, the type of the index
+ is:
+
+ map[ nodeID → map[ rootNodeID → {loMaxItem, hiMaxItem} ] ]
+
+ We'll do a loop over the graph, using dynamic-programming
+ memoization to figure out ordering and avoid processing the same
+ node twice; for each node we'll
+
+ + Check whether the owner-tree is this tree or one of this tree's
+ ancestors (and if it's an ancestor, that the node's generation
+ isn't after the point that the child tree was forked from the
+ parent tree). If not, we are done processing that node (record
+ an empty/nil set of roots for it).
+
+ + Create an empty map of `rootID` → {`loMaxItem`, `hiMaxItem`}.
+
+ + Look at each keypointer that that points at the node and:
+
+ * Skip the keypointer if its expectations of the node aren't met:
+ if the level, generation, and min-key constraints don't match
+ up. If the keypointer isn't in the last slot in the source
+ node, we also go ahead and include checking that the
+ destination node's max-key is under the min-key of the
+ keypointer in the next slot, since that's cheap to do now.
+
+ * Skip the keypointer if its source node's owner-tree isn't this
+ tree or one of this tree's ancestors (and if it's an ancestor,
+ that the node's generation isn't after the point that the child
+ tree was forked from the parent tree).
+
+ * dynamic-programming recurse and index the keypointer's source
+ node.
+
+ * for every root that would result in the keypointer's source
+ node being included in the tree:
+
+ . If the keypointer is in the last slot, look at what the what
+ the source node's last-item constraints would be if that root
+ is included, and can now check the max-item of our
+ destination node. We check against the `hiMaxItem`; as if
+ there is any valid path from the root to this node, then we
+ want to be permissive and include it. If that check fails,
+ then we're done with this keypointer. Also, make node of
+ those `loMaxItem` and `hiMaxItem` values, we'll use them
+ again in just a moment.
+
+ . Otherwise, set both `loMaxItem` and `hiMaxItem` to 1-under
+ the min-item of the keypointer in the next slot.
+
+ . Insert that `loMaxItem` and `hiMaxItem` pair into the
+ `rootID` → {`loMaxItem`, `hiMaxItem`} map we created above.
+ If an entry already exists for this root (since a broken tree
+ might have multiple paths from the root to our node), then
+ set `loMaxItem` to the min of the existing entry and our
+ value, and `hiMaxItem` to the max.
+
+ + If that `rootID` → {`loMaxItem`, `hiMaxItem`} map is still empty,
+ then consider this node to be a (potential) root, and insert
+ `rootID=thisNode` -> {`loMaxItem=maxKey`, `hiMaxItem=maxKey`}
+ (where `maxKey` is the maximum value of the key datatype).
+
+ + Take that `rootID` → {`loMaxItem`, `hiMaxItem`} map and insert it
+ into the index as the entry for this node.
+
+ - The next index we build is the "item index". This is a "sorted
+ map" (implemented as a red-black tree, supporting sub-range
+ iteration) of `key` → {`nodeID`, `slotNumber`}; a map that for each
+ key tells us where to find the item with that key.
+
+ + Loop over the node index, and for each node check if both (a) it
+ has `level==0` (is a leaf node containing items), and (b) its set
+ of roots that would include it has any overlap with the tree's
+ set of roots.
+
+ + Loop over each of those included leaf nodes, and loop over the
+ items in each node. Insert the `key` → {`nodeId`, `slot`} into
+ our sorted map. If there is already an entry for that key,
+ decide which one wins by:
+
+ * Use the one from the node with the owner-tree that is closer to
+ this tree; node with owner=thisTree wins over a node with
+ owner=thisTree.parent, which would win over a node with
+ owner.thisTree.parent.parent. If that's a tie, then...
+
+ * Use the one from the node with the higher generation. If
+ that's a tie, then...
+
+ * I don't know, I have the code `panic`:
+
+ // TODO: This is a panic because I'm not really sure what the
+ // best way to handle this is, and so if this happens I want the
+ // program to crash and force me to figure out how to handle it.
+ panic(fmt.Errorf("dup nodes in tree=%v: old=%v=%v ; new=%v=%v",
+ tree.ID,
+ oldNode, tree.forrest.graph.Nodes[oldNode],
+ newNode, tree.forrest.graph.Nodes[newNode]))
+
+ Note that this algorithm means that for a given node we may use a
+ few items from that node, while having other items from that same
+ node be overridden by another node.
+
+ - The final index we build is the "error index". This is an index of
+ what errors correspond to which range of keys, so that we can
+ report them, and give an idea of "there may be entries missing from
+ this directory" and similar.
+
+ For each error, we'll track the min-key and max-key of the range it
+ applies to, the node it came from, and what the error string is.
+ We'll store these into an interval tree keyed on that
+ min-key/max-key range.
+
+ + Create an empty set `nodesToProcess`. Now populate it:
+
+ * Once again, we'll loop over the node index, but this time we'll
+ only check that there's overlap between the set of roots that
+ would include the node and the tree's set of roots. The nodes
+ that are included in this tree, insert both that node itself
+ and all node IDs that it has keypointers pointing to into the
+ `nodesToProcess` set.
+
+ * Also insert all of the tree's roots into `nodesToProcess`; this
+ is in case the superblock/root-item points to an invalid node
+ that we couldn't read.
+
+ + Now loop over `nodesToProcess`. For each node, create an empty
+ list of errors. Use the keypointers pointing to and the min
+ `loMaxItem` from the node index to construct a set of
+ expectations for the node; this should be reasonably
+ straight-forward, given:
+
+ * If different keypointers have disagreeing levels, insert an
+ error in to the list, and don't bother with checking the node's
+ level.
+
+ * If different keypointers have disagreeing generations, insert
+ an error in to the list, and don't bother with checking the
+ node's generation.
+
+ * If different keypointers have different min-item expectations,
+ use the max of them.
+
+ Then:
+
+ * If the node is a "bad node" in the graph, insert the error
+ message associated with it. Otherwise, check those
+ expectations against the node in the graph.
+
+ If the list of error messages is non-empty, then insert their
+ concatenation into the interval tree, with the range set to the
+ min of the min-item expectations from the keypointers through the
+ max of the `hiMaxItem`s from the node index. If the min min-item
+ expectation turns out to be higher than the max `hiMaxItem`, then
+ set the range to the zero-key through the max-key.
+
+From there, it should be trivial to implement the usual B+ tree
+operations using those indexes; exact-lookup using the item index, and
+range-lookups and walks using the item index together with the error
+index. Efficiently searching the `CSUM_TREE` requires knowing item
+sizes, so that's why we recorded the item sizes into the graph.
+
+### 4.3.3. The `rebuild-trees` algorithm
+
+The `btrfs inspect rebuild-trees` algorithm finds nodes to attach as
+extra roots to trees. I think that conceptually it's the the simplest
+of the 3 algorithms, but turned out to be the hardest to get right.
+So... maybe more than the others reference the source code too
+([`./cmd/btrfs-rec/inspect/rebuildtrees/`]) because I might forget some
+small but important detail.
+
+The core idea here is that we're just going to walk each tree,
+inspecting each item in the tree, and checking for any items that are
+implied by other items (e.g.: a dir entry item implies the existence
+of inode item for the inode that it points at). If an implied item is
+not in the tree, but is in some other node, then we look at which
+potential roots we could add to the tree that would add that other
+node. Then, after we've processed all of the items in the filesystem,
+we go add those various roots to the various trees, keeping track of
+which items are added or updated. If any of those added/updated items
+have a version with a newer generation on a different node, see what
+roots we could add to get that newer version. Then add those roots,
+keeping track of items that are added/updated. Once we reach
+steady-state with the newest version of each item has been added, loop
+back and inspect all added/updated items for implied items, keeping
+track of roots we could add. Repeat until a steady-state is reached.
+
+There are lots of little details in that process, some of which are
+for correctness, and some of which are for "it should run in hours
+instead of weeks."
+
+#### 4.3.3.1. initialization
+
+First up, we're going to build and in-memory graph, same as above.
+But this time, while we're reading the nodes to do that, we're also
+going to watch for some specific items and record a few things about
+them.
+
+(see: [`./cmd/btrfs-rec/inspect/rebuildtrees/scan.go`])
+
+For each {`nodeID`, `slotNumber`} pair that matches one of these item
+types, we're going to record:
+
+ - flags:
+ + `INODE_ITEM`s: whether it has the `INODE_NODATASUM` flag set
+ - names:
+ + `DIR_INDEX` items: the file's name
+ - sizes:
+ + `EXTENT_CSUM` items: the number of bytes that this is a sum for
+ (i.e. the item size over the checksum size, times the block size)
+ + `EXTENT_DATA` items: the number of bytes in this extent
+ (i.e. either the item size minus
+ `offsetof(btrfs_file_extent_item.disk_bytenr)` if
+ `FILE_EXTENT_INLINE`, or else the item's `num_bytes`).
+ - data backrefs:
+ - `EXTENT_ITEM`s and `METADATA_ITEM`s: a list of the same length as
+ the number of refs embedded in the item; for embeded
+ ExtentDataRefs, the list entry is the subvolume tree ID that the
+ ExtentDataRef points at, otherwise it is zero.
+ - `EXTENT_DATA_REF` items: a list of length 1, with the sole member
+ being the subvolume tree ID that the ExtentDataRef points at.
+
+#### 4.3.3.2. the main loop
+
+(see: [`./cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go`])
+
+Start with that scan data (graph + info about items), and also a
+rebuilt forrest from the above algorithm, but with:
+
+ - the flag set so that it refuses to look up a tree if it can't look
+ up all of that tree's ancestors
+
+ - an additional "potential-item index" that is similar to the item
+ index. It is generated the same way and can cache/evict the same
+ way; the difference is that we invert the check for if the set of
+ roots for a node has overlap with the tree's set of roots; we're
+ looking for *potential* nodes that we could add to this tree.
+
+ - some callbacks; we'll get to what we do in these callbacks in a
+ bit, but for now, what the callbacks are:
+
+ + a callback that is called for each added/updated item when we add
+ a root.
+
+ + a callback that is called whenever we add a root
+
+ + a callback that intercepts looking up a root item
+
+ + a callback that intercepts resolving an UUID to an object ID.
+
+ (The callbacks are in
+ [`./cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go`])
+
+We have 5 unordered queues ("work lists"?); these are sets that when
+it's time to drain them we'll sort the members and process them in
+that order.
+
+ 1. the tree queue: a list of tree IDs that we need to crawl
+ 2. the retry-item queue: for each tree ID, a set of items that we
+ should re-process if we add a root to that tree
+ 3. the added-item queue: a set of key/tree pairs identifying items
+ that have been added by adding a root to a tree
+ 4. the settled-item-queue: a set of key/tree pairs that have have not
+ just been added by adding a root, but we've also verified that
+ they are the newest-generation item with that key that we could
+ add to the tree.
+ 5. the augment queue: for each item that we want to add to a tree,
+ the list of roots that we could add to get that item.
+
+The roots all start out empty, except for the tree queue, which we
+seed with the `ROOT_TREE`, the `CHUNK_TREE`, and the
+`BLOCK_GROUP_TREE` (It is a "TODO" task that it should probably also
+be seeded with the `TREE_LOG`, but as I will say below in the "future
+work" section, I don't actually understand the `TREE_LOG`, so I
+couldn't implement it).
+
+Now we're going to loop until the tree queue, added-item queue,
+settled-item queue, and augment queue are all empty (all queues except
+for the retry-item queue). Each loop "pass" has 3 substeps:
+
+ 1. Crawl the trees (drain the tree queue, fill the added-item queue).
+
+ 2. Either:
+
+ a. if the added-item queue is non-empty: "settle" those items
+ (drain the added-item queue, fill the augment queue and the
+ settled-item queue).
+
+ b. otherwise: process items (drain the settled-item queue, fill
+ the augment queue and the tree queue)
+
+ 3. Apply augments (drain the augment queue and maybe the retry-item
+ queue, fill the added-item queue).
+
+OK, let's look at those 3 substeps in more detail:
+
+ 1. Crawl the trees; drain the tree queue, fill the added-item queue.
+
+ We just look up the tree in the rebuilt forrest, which will (per
+ the above `--rebuild` algorithm) will either fail to look up the
+ tree, or succeed, and add to that tree the root node from the
+ superblock/root-item. Because we set an item-added callback, when
+ adding that root it will loop over the nodes added by that root,
+ and call our callback for each item in one of the added nodes.
+ Our callback inserts each item into the added-item queue. The
+ forrest also calls our root-added callback, but because of the way
+ this algorithm works, that turns out to be a no-op at this step.
+
+ I mentioned that we added callbacks to intercept the forrest's
+ looking up of root items and resolving UUIDs; we override the
+ forrest's "lookup root item" routine and "resolve UUID" routine to
+ instead of doing normal lookups on the `ROOT_TREE` and
+ `UUID_TREE`, use the above <code>Want<var>XXX</var></code> routines that we'll define
+ below in the "graph callbacks" section.
+
+ It shouldn't matter what order this queue is processed in, but I
+ sort tree IDs numerically.
+
+ The crawling is fairly fast because it's just in-memory, the only
+ accesses to disk are looking up root items and resolving UUIDs.
+
+ 2. Either:
+
+ a. Settle items from the added-item queue to the settled-item queue
+ (and fill the augment queue).
+
+ For each item in the queue, we look in the tree's item index to
+ get the {node, slot} pair for it, then we do the same in the
+ tree's potential-item index. If the potential-item index
+ contains an entry for the item's key, then we check if the
+ potential-item's node should "win" over the queue item's node,
+ deciding the "winner" using the same routine as when building
+ the item index. If the potential-item's node wins, then we add
+ the potential node's set of roots to the augment queue. If the
+ queue-item's node wins, then we add the item to the
+ settled-item queue (except, as an optimization, if the item is
+ of a type that cannot possibly imply the existence of another
+ item, then we just drop it and don't add it to the settled-item
+ queue).
+
+ It shouldn't matter what order this queue is processed in, but
+ I sort it numerically by treeID and then by item key.
+
+ This step is fairly fast because it's entirely in-memory,
+ making no accesses to disk.
+
+ b. Process items from the settled-item queue (drain the
+ settled-item queue, fill the augment queue and the tree queue).
+
+ This step accesses disk, and so the order we process the queue
+ in turns out to be pretty important in order to keep our disk
+ access patterns cache-friendly. For the most part, we just
+ sort each queue item by tree, then by key. But, we have
+ special handling for `EXTENT_ITEM`s, `METADATA_ITEM`s, and
+ `EXTENT_DATA_REF` items: We break `EXTENT_ITEM`s and
+ `METADATA_ITEM`s in to "sub-items", treating each ref embedded
+ in them as a separate item. For those embedded items that are
+ `EXTENT_DATA_REF`s, and for stand-alone `EXTENT_DATA_REF`
+ items, we sort them not with the `EXTENT_TREE` items, but with
+ the items of the tree that the extent data ref points at.
+ Recall that during the intitial scan step, we took note of
+ which tree every extent data ref points at, so we can perform
+ this sort without accessing disk yet. This splitting does mean
+ that we may visit/read an `EXTENT_ITEM` or `METADATA_ITEM`
+ multiple times as we process the queue, but to do otherwise is
+ to solve MinLA, which is NP-hard and also an optimal MinLA
+ solution I still think would perform worse than this; there is
+ a reasonably lengthy discussion of this in a comment in
+ [`./cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go:sortSettledItemQueue()`].
+
+ Now we loop over that sorted queue. In the code, this loop is
+ deceptively simple. Read the item, then pass it to a function
+ that tells us what other items are implied by it. That
+ function is large, but simple; it's just a giant table. The
+ trick is how it tells us about implied items; we give it set of
+ callbacks that it calls to tell us these things; the real
+ complexity is in the callbacks. These "graph callbacks" will
+ be discussed in detail below, but as an illustrative example:
+ It may call `.WantOff()` with a tree ID, object ID, item type,
+ and offset to specify a precise item that it believes should
+ exist.
+
+ If we encounter a `ROOT_ITEM`, add the tree described by that
+ item to the tree queue.
+
+ (Both the "can this item even imply the existence of another item"
+ check and the "what items are implied by this item" routine are in
+ [`./lib/btrfscheck/graph.go`])
+
+ 3. Apply augments; drain the augment queue (and maybe the retry-item
+ queue), fill the added-item queuee.
+
+ It is at this point that I call out that the augment queue isn't
+ implemented as a simple map/set like the others, the
+ `treeAugmentQueue struct` has special handling for sets of
+ different sizes; optimizing the space for empty and len()==1 sized
+ sets, and falling back to normal the usual implementation for
+ larger sets; this is important because those small sets are the
+ overwhelming majority, and otherwise there's no way the program
+ would be able to run on my 32GB RAM laptop. Now that I think
+ about it, I bet it would even be worth it to add optimized storage
+ for len()==2 sized sets.
+
+ The reason is that each "want" from above is tracked in the queue
+ separately; if we were OK merging them, then this optimized
+ storage wouldn't be nescessary. But we keep them separate, so
+ that:
+
+ - For all "wants", including ones with empty sets, graph callbacks
+ can check if a want has already been processed; avoiding
+ re-doing any work (see the description of the graph callbacks
+ below).
+
+ - For "wants" with non-empty sets, we can see how many different
+ "wants" could be satisfied with a given root, in order to decide
+ which root to choose.
+
+ Anyway, we loop over the trees in the augment queue. For each
+ tree we look at that tree's augment queue and look at all the
+ choices of root nodes to add (below), and decide on a list to add.
+ The we add each of those roots to the tree; the adding of each
+ root triggers several calls to our item-added callback (filling
+ the added-item queue), and our root-added callback. The
+ root-added callback moves any items from the retry-item queue for
+ this tree to the added-item queue.
+
+ How do we decide between choices of root nodes to add?
+ [`./cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go:resolveTreeAugments()`]
+ has a good comment explaining the criteria we'd like to optimize
+ for, and then code that does an OK-ish job of actually optimizing
+ for that:
+
+ - It loops over the augment queue for that tree, building a list
+ of possible roots, for each possible root making note of 3
+ things:
+
+ a. how many "wants" that root satisfies,
+
+ b. how far from treee the root's owner is (owner=tree is a
+ distance of 0, owner=tree.parent is a distance of 1,
+ owner=tree.parent.parent is a distance of 2, and so on), and
+
+ c. what the generation of that root is.
+
+ - We sort that list first by highest-count-first, then by
+ lowest-distance-first, then by highest-generation-first.
+
+ - We create a "return" set and an "illegal" set. We loop over the
+ sorted list; for each possible root if it is in the illegal set,
+ we skip it, otherwise we insert it into the return set and for
+ each "want" that includes this root we all all roots that
+ satisfy that want to the illegal list.
+
+It is important that the rebuilt forrest have the flag set so that it
+refuses to look up a tree if it can't look up all of that tree's
+ancestors; otherwise the potential-items index would be garbage as we
+wouldn't have a good idea of which nodes are OK to consider; but this
+does have the downside that it won't even attempt to improve a tree
+with a missing parent. Perhaps the algorithm should flip the flag
+once the loop terminates, and then re-seed the tree queue with each
+`ROOT_ITEM` from the `ROOT_TREE`?
+
+#### 4.3.3.3. graph callbacks
+
+(see: [`./cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go`])
+
+The graph callbacks are what tie the above together.
+
+For each of these callbacks, whenever I say that it looks up something
+in a tree's item index or potential-item index, that implies looking
+the tree up from the forrest; if the forrest cannot look up that tree,
+then the callback returns early, after either:
+
+ - if we are in substep 1 and are processing a tree: we add the tree
+ that is being processed to the tree queue. (TODO: Wait, this
+ assumes that an augment will be applied to the `ROOT_TREE` before
+ the next pass... if that isn't the case, this will result in the
+ loop never terminating... I guess I need to add a separate
+ retry-tree queue?)
+
+ - if we are in substep 2 and are processing an item: we add the item
+ that is being processed to the retry-item queue for the tree that
+ cannot be looked up
+
+The 6 methods in the `brfscheck.GraphCallbacks` interface are:
+
+ 1. `FSErr()`: There's an error with the filesystem; this callback
+ just spits it out on stderr. I say such a trivial matter because,
+ again, for a recovery tool I think it's worth putting care in to
+ how you handle errors and where you expect them: We expect them
+ here, so we have to check for them to avoid reading invalid data
+ or whatever, but we don't actually need to do anything other than
+ watch our step.
+
+ 2. `Want()`: We want an item in a given tree with a given object ID
+ and item type, but we don't care about what the item's offset is.
+
+ The callback works by searching the item index to see if it can
+ find such an item; if so, it has nothing else to do and returns.
+ Otherwise, it searches the potential-item index; for each matching
+ item it finds it looks in the node index for the node containing
+ that item, and adds the roots that would add that node, and adds
+ those roots to a set. Once it has finished searching the
+ potential-item index, it adds that set to the augment queue (even
+ if that set is still empty).
+
+ 3. `WantOff()`: The same, but we want a specific offset.
+
+ 4. `WantDirIndex()`: We want a `DIR_INDEX` item for a given inode and
+ filename, but we don't know what the offset of that item is.
+
+ First we scan over the item index, looking at all `DIR_INDEX`
+ items for that inode number. For each item, we can check the scan
+ data to see what the filename in that `DIR_INDEX` is, so we can
+ see if the item satisfies this want without accessing the disk.
+ If there's a match, then there is nothing else to do, so we
+ return. Otherwise, we do that same search over the potential-item
+ index; if we find any matches, then we build the set of roots to
+ add to the augment queue the same as in `Want`.
+
+ 5. `WantFileExt()`: We want 1 or more `DATA_EXTENT` items in the
+ given tree for the given inode, and we want them to cover from 0
+ to a given size bytes of that file.
+
+ First we walk that range in the item index, to build a list of the
+ gaps that we need to fill ("Step 1" in
+ [`rebuild_wantcb.go:_wantRange()`]). This walk
+ ([`rebuild_wantcb.go:_walkRange()`]) requires knowing the size of
+ each file extent; so doing this quickly without hitting disk is
+ why we recorded the size of each file extent in our initialization
+ step.
+
+ Then ("Step 2" in `_wantRange()`) we iterate over each of the
+ gaps, and for each gap do a very similar walk (again, by calling
+ `_walkRange()`, but this time over the potential-item index. For
+ each file extent we find that has is entirely within the gap, we
+ "want" that extent, and move the beginning of of the gap forward
+ to the end of that extent. This algorithm is dumb and greedy,
+ potentially making sub-optimal selections; and so could probably
+ stand to be improved; but in my real-world use, it seems to be
+ "good enough".
+
+ 6. `WantCSum()`: We want 1 or more `EXTENT_CSUM` items to cover the
+ half-open interval [`lo_logical_addr`, `hi_logical_addr`). Well,
+ maybe. It also takes a subvolume ID and an inode number; and
+ looks up in the scan data whether that inode has the
+ `INODE_NODATASUM` flag set; if it does have the flag set, then it
+ returns early without looking for any `EXTENT_CSUM` items. If it
+ doesn't return early, then it performs the same want-range routine
+ as `WantFileExt`, but with the appropriate tree, object ID, and
+ item types for csums as opposed to data extents.
+
+For each of these callbacks, we generate a "wantKey", a tuple
+representing the function and its arguments; we check the
+augment-queue to see if we've already enqueued a set of roots for that
+want, and if so, that callback can return early without checking the
+potential-item index.
+
+# 5. Future work
+
+It's in a reasonably useful place, I think; and so now I'm going to
+take a break from it for a while. But there's still lots of work to
+do:
+
+ - RAID almost certainly doesn't work.
+
+ - Encryption is not implemented.
+
+ - It doesn't understand (ignores) the `TREE_LOG` (because I don't
+ understand the `TREE_LOG`).
+
+ - `btrfs-rec inspect mount` should add "lost+found" directories for
+ inodes that are included in the subvolume's tree but aren't
+ reachable from the tree's root inode
+
+ - I still need to implement <code>btrfs-rec repair <var>SUBCMD</var></code> subcommands to
+ write rebuilt-information from `btrfs-rec inspect` back to the
+ filesystem.
+
+ - I need to figure out the error handling/reporting story for
+ `mount`.
+
+ - It needs a lot more tests
+
+ + I'd like to get the existing btrfs-progs fsck tests to run on
+ it.
+
+ - In the process of writing this email, I realized that I probably
+ need to add a retry-tree queue; see the "graph callbacks" section
+ in the description of the `rebuild-trees` algorithm above.
+
+ - Shere are a number of "TODO" comments or panics in the code:
+
+ + Some of them definitely need done.
+
+ + Some of them are `panic("TODO")` on the basis that if it's
+ seeing something on the filesystem that it doesn't recognize,
+ it's probably that I didn't get to implementing that
+ thing/situation, but it's possible that the thing is just
+ corrupt. This should only be for situations that the node
+ passed the checksum test, so it being corrupt would have to be
+ caused by a bug in btrfs rather than a failing drive or other
+ corruption; I wasn't too worried about btrfs bugs.
+
+ - `btrfs-rec inspect rebuild-trees` is slow, and can probably be made
+ a lot faster.
+
+ Just to give you an idea of the speeds, the run-times for the
+ various steps on my ThinkPad E15 for a 256GB disk image are as
+ follows:
+
+ btrfs-rec inspect rebuild-mappings scan : 7m 31s
+ btrfs-rec inspect rebuild-mappings list-nodes : 47s
+ btrfs-rec inspect rebuild-mappings process : 8m 22s
+ btrfs-rec inspect rebuild-trees : 1h 4m 55s
+ btrfs-rec inspect ls-files : 29m 55s
+ btrfs-rec inspect ls-trees : 8m 40s
+
+ For the most part, it's all single-threaded (with the main
+ exception that in several places I/O has been moved to a separate
+ thread from the main CPU-heavy thread), but a lot of the algorithms
+ could be parallelized.
+
+ - There are a lot of "tunable" values that I haven't really spent
+ time tuning. These are all annotated with [`textui.Tunable()`]. I
+ sort-of intended for them to be adjustable on the CLI.
+
+ - Perhaps the `btrfs inspect rebuild-trees` algorithm could be
+ adjusted to also try to rebuild trees with missing parents; see the
+ above discussion of the algorithm.
+
+# 6. Problems for merging this code into btrfs-progs
+
+ - It's written in Go, not C.
+
+ - It's effectively GPLv3+ (not GPLv2-only or GPLv2+) because of use
+ of some code under the Apache 2.0 license (2 files in the codebase
+ itself that are based off of Apache-licensed code, and use of
+ unmodified 3rd-party libraries).
+
+ - It uses ARC (Adaptive Replacement Cache), which is patented by IBM,
+ and the patent doesn't expire for another 7 months. An important
+ property of ARC over LRU is that it is scan-resistant; the above
+ algorithms do a lot of scanning. On that note, now that RedHat is
+ owned by IBM: who in the company do we need to get to talk to
+ eachother so that we can get ARC into the Linux kernel before then?
+
+<div style="font-family: monospace">
+-- <br/>
+Happy hacking,<br/>
+~ Luke Shumaker<br/>
+</div>
+
+[`./examples/main.sh`]: https://git.lukeshu.com/btrfs-progs-ng/tree/examples/main.sh?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`examples/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/examples?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`lib/btrfs/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfs?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`lib/btrfscheck/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfscheck?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`lib/btrfsutil/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`cmd/btrfs-rec/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`lib/textui/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/textui?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`lib/binstruct/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/binstruct?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`lib/diskio/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/diskio?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`lib/streamio/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/streamio?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`lib/containers/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/containers?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`lib/fmtutil/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/fmtutil?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`lib/maps/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/maps?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`lib/slices/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/slices?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`lib/profile/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/profile?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`examples/main.sh`]: https://git.lukeshu.com/btrfs-progs-ng/tree/examples/main.sh?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./lib/btrfsutil/scan.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/scan.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./cmd/btrfs-rec/inspect/rebuildmappings/scan.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildmappings/scan.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./lib/btrfs/btrfsvol/lvm.go:addMapping()`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfs/btrfsvol/lvm.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n121
+[`./cmd/btrfs-rec/inspect/rebuildmappings/process.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildmappings/process.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./lib/btrfsutil/scan.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/scan.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./lib/btrfsutil/graph.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/graph.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./lib/btrfsutil/rebuilt_forrest.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/rebuilt_forrest.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./lib/btrfsutil/rebuilt_tree.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/rebuilt_tree.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./cmd/btrfs-rec/inspect/rebuildtrees/`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./cmd/btrfs-rec/inspect/rebuildtrees/scan.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/scan.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go:sortSettledItemQueue()`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n251
+[`./lib/btrfscheck/graph.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfscheck/graph.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`./cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go:resolveTreeAugments()`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n528
+[`./cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go?id=18e6066c241cf3d252b6521150843ffc858d8434
+[`rebuild_wantcb.go:_wantRange()`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n260
+[`rebuild_wantcb.go:_walkRange()`]: https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n195
+[`textui.Tunable()`]: https://git.lukeshu.com/btrfs-progs-ng/tree/lib/textui/tunable.go?id=18e6066c241cf3d252b6521150843ffc858d8434
diff --git a/public/build-bash-1.html b/public/build-bash-1.html
new file mode 100644
index 0000000..30b46b7
--- /dev/null
+++ b/public/build-bash-1.html
@@ -0,0 +1,109 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>Building Bash 1.14.7 on a modern system — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » build-bash-1</header>
+<article>
+<h1 id="building-bash-1.14.7-on-a-modern-system">Building Bash 1.14.7 on
+a modern system</h1>
+<p>In a previous revision of my <a href="./bash-arrays.html">Bash arrays
+post</a>, I wrote:</p>
+<blockquote>
+<p>Bash 1.x won’t compile with modern GCC, so I couldn’t verify how it
+behaves.</p>
+</blockquote>
+<p>I recall spending a little time fighting with it, but apparently I
+didn’t try very hard: getting Bash 1.14.7 to build on a modern box is
+mostly just adjusting it to use <code>stdarg</code> instead of the
+no-longer-implemented <code>varargs</code>. There’s also a little
+fiddling with the pre-autoconf automatic configuration.</p>
+<h2 id="stdarg">stdarg</h2>
+<p>Converting to <code>stdarg</code> is pretty simple: For each variadic
+function (functions that take a variable number of arguments), follow
+these steps:</p>
+<ol type="1">
+<li>Replace <code>#include &lt;varargs.h&gt;</code> with
+<code>#include &lt;stdarg.h&gt;</code></li>
+<li>Replace <code>function_name (va_alist) va_dcl</code> with
+<code>function_name (char *format, ...)</code>.</li>
+<li>Removing the declaration and assignment for <code>format</code> from
+the function body.</li>
+<li>Replace <code>va_start (args);</code> with
+<code>va_start (args, format);</code> in the function bodies.</li>
+<li>Replace <code>function_name ();</code> with
+<code>function_name (char *, ...)</code> in header files and/or at the
+top of C files.</li>
+</ol>
+<p>There’s one function that uses the variable name <code>control</code>
+instead of <code>format</code>.</p>
+<p>I’ve prepared <a href="./bash-1.14.7-gcc4-stdarg.patch">a patch</a>
+that does this.</p>
+<h2 id="configuration">Configuration</h2>
+<p>Instead of using autoconf-style tests to test for compiler and
+platform features, Bash 1 used the file <code>machines.h</code> that had
+<code>#ifdefs</code> and a huge database of of different operating
+systems for different platforms. It’s gross. And quite likely won’t
+handle your modern operating system.</p>
+<p>I made these two small changes to <code>machines.h</code> to get it
+to work correctly on my box:</p>
+<ol type="1">
+<li>Replace <code>#if defined (i386)</code> with
+<code>#if defined (i386) || defined (__x86_64__)</code>. The purpose of
+this is obvious.</li>
+<li>Add <code>#define USE_TERMCAP_EMULATION</code> to the section for
+Linux [sic] on i386
+(<code># if !defined (done386) &amp;&amp; (defined (__linux__) || defined (linux))</code>).
+What this does is tell it to link against libcurses to use curses
+termcap emulation, instead of linking against libtermcap (which doesn’t
+exist on modern GNU/Linux systems).</li>
+</ol>
+<p>Again, I’ve prepared <a href="./bash-1.14.7-machines-config.patch">a
+patch</a> that does this.</p>
+<h2 id="building">Building</h2>
+<p>With those adjustments, it should build, but with quite a few
+warnings. Making a couple of changes to <code>CFLAGS</code> should fix
+that:</p>
+<pre><code>make CFLAGS=&#39;-O -g -Werror -Wno-int-to-pointer-cast -Wno-pointer-to-int-cast -Wno-deprecated-declarations -include stdio.h -include stdlib.h -include string.h -Dexp2=bash_exp2&#39;</code></pre>
+<p>That’s a doozy! Let’s break it down:</p>
+<ul>
+<li><code>-O -g</code> The default value for CFLAGS (defined in
+<code>cpp-Makefile</code>)</li>
+<li><code>-Werror</code> Treat warnings as errors; force us to deal with
+any issues.</li>
+<li><code>-Wno-int-to-pointer-cast -Wno-pointer-to-int-cast</code> Allow
+casting between integers and pointers. Unfortunately, the way this
+version of Bash was designed requires this.</li>
+<li><code>-Wno-deprecated-declarations</code> The <code>getwd</code>
+function in <code>unistd.h</code> is considered deprecated (use
+<code>getcwd</code> instead). However, if <code>getcwd</code> is
+available, Bash uses it’s own <code>getwd</code> wrapper around
+<code>getcwd</code> (implemented in <code>general.c</code>), and only
+uses the signature from <code>unistd.h</code>, not the actuall
+implementation from libc.</li>
+<li><code>-include stdio.h -include stdlib.h -include string.h</code>
+Several files are missing these header file includes. If not for
+<code>-Werror</code>, the default function signature fallbacks would
+work.</li>
+<li><code>-Dexp2=bash_exp2</code> Avoid a conflict between the parser’s
+<code>exp2</code> helper function and <code>math.h</code>’s base-2
+exponential function.</li>
+</ul>
+<p>Have fun, software archaeologists!</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2015 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/crt-sh-architecture.html b/public/crt-sh-architecture.html
new file mode 100644
index 0000000..b9d1b06
--- /dev/null
+++ b/public/crt-sh-architecture.html
@@ -0,0 +1,86 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>The interesting architecture of crt.sh — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » crt-sh-architecture</header>
+<article>
+<h1 id="the-interesting-architecture-of-crt.sh">The interesting
+architecture of crt.sh</h1>
+<p>A while back I wrote myself a little dashboard for monitoring TLS
+certificates for my domains. Right now it works by talking to <a
+href="https://crt.sh/" class="uri">https://crt.sh/</a>. Sometimes this
+works great, but sometimes crt.sh is really slow. Plus, it’s another
+thing that could be compromised.</p>
+<p>So, I started looking at how crt.sh works. It’s kinda cool.</p>
+<p>There are only 3 separate processes:</p>
+<ul>
+<li>Cron
+<ul>
+<li><a
+href="https://github.com/crtsh/ct_monitor"><code>ct_monitor</code></a>
+is program that uses libcurl to get CT log changes and libpq to put them
+into the database.</li>
+</ul></li>
+<li>PostgreSQL
+<ul>
+<li><a
+href="https://github.com/crtsh/certwatch_db"><code>certwatch_db</code></a>
+is the core web application, written in PL/pgSQL. It even includes the
+HTML templating and query parameter handling. Of course, there are a
+couple of things not entirely done in pgSQL…</li>
+<li><a
+href="https://github.com/crtsh/libx509pq"><code>libx509pq</code></a>
+adds a set of <code>x509_*</code> functions callable from pgSQL for
+parsing X509 certificates.</li>
+<li><a
+href="https://github.com/crtsh/libcablintpq"><code>libcablintpq</code></a>
+adds the <code>cablint_embedded(bytea)</code> function to pgSQL.</li>
+<li><a
+href="https://github.com/crtsh/libx509lintpq"><code>libx509lintpq</code></a>
+adds the <code>x509lint_embedded(bytea,integer)</code> function to
+pgSQL.</li>
+</ul></li>
+<li>Apache HTTPD
+<ul>
+<li><a
+href="https://github.com/crtsh/mod_certwatch"><code>mod_certwatch</code></a>
+is a pretty thin wrapper that turns every HTTP request into an SQL
+statement sent to PostgreSQL, via…</li>
+<li><a
+href="https://github.com/crtsh/mod_pgconn"><code>mod_pgconn</code></a>,
+which manages PostgreSQL connections.</li>
+</ul></li>
+</ul>
+<p>The interface exposes HTML, ATOM, and JSON. All from code written in
+SQL.</p>
+<p>And then I guess it’s behind an nginx-based load-balancer or somesuch
+(based on the 504 Gateway Timout messages it’s given me). But that’s not
+interesting.</p>
+<p>The actual website is <a
+href="https://groups.google.com/d/msg/mozilla.dev.security.policy/EPv_u9V06n0/gPJY5T7ILlQJ">run
+from a read-only slave</a> of the master DB that the
+<code>ct_monitor</code> cron-job updates; which makes several security
+considerations go away, and makes horizontal scaling easy.</p>
+<p>Anyway, I thought it was neat that so much of it runs inside the
+database; you don’t see that terribly often. I also thought the little
+shims to make that possible were neat. I didn’t get deep enough in to it
+to end up running my own instance or clone, but I thought my notes on it
+were worth sharing.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2018 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/emacs-as-an-os.html b/public/emacs-as-an-os.html
new file mode 100644
index 0000000..da8c904
--- /dev/null
+++ b/public/emacs-as-an-os.html
@@ -0,0 +1,55 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>Emacs as an operating system — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » emacs-as-an-os</header>
+<article>
+<h1 id="emacs-as-an-operating-system">Emacs as an operating system</h1>
+<p>This was originally published on <a
+href="https://news.ycombinator.com/item?id=6292742">Hacker News</a> on
+2013-08-29.</p>
+<p>Calling Emacs an OS is dubious, it certainly isn’t a general purpose
+OS, and won’t run on real hardware. But, let me make the case that Emacs
+is an OS.</p>
+<p>Emacs has two parts, the C part, and the Emacs Lisp part.</p>
+<p>The C part isn’t just a Lisp interpreter, it is a Lisp Machine
+emulator. It doesn’t particularly resemble any of the real Lisp
+machines. The TCP, Keyboard/Mouse, display support, and filesystem are
+done at the hardware level (the operations to work with these things are
+among the primitive operations provided by the hardware). Of these, the
+display being handled by the hardware isn’t particularly uncommon,
+historically; the filesystem is a little stranger.</p>
+<p>The Lisp part of Emacs is the operating system that runs on that
+emulated hardware. It’s not a particularly powerful OS, it not a
+multitasking system. It has many packages available for it (though not
+until recently was there a official package manager). It has reasonably
+powerful IPC mechanisms. It has shells, mail clients (MUAs and MSAs),
+web browsers, web servers and more, all written entirely in Emacs
+Lisp.</p>
+<p>You might say, “but a lot of that is being done by the host operating
+system!” Sure, some of it is, but all of it is sufficiently low level.
+If you wanted to share the filesystem with another OS running in a VM,
+you might do it by sharing it as a network filesystem; this is necessary
+when the VM OS is not designed around running in a VM. However, because
+Emacs OS will always be running in the Emacs VM, we can optimize it by
+having the Emacs VM include processor features mapping the native OS,
+and have the Emacs OS be aware of them. It would be slower and more code
+to do that all over the network.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2013 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/emacs-shells.html b/public/emacs-shells.html
new file mode 100644
index 0000000..b0a7647
--- /dev/null
+++ b/public/emacs-shells.html
@@ -0,0 +1,86 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>A summary of Emacs' bundled shell and terminal modes — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » emacs-shells</header>
+<article>
+<h1 id="a-summary-of-emacs-bundled-shell-and-terminal-modes">A summary
+of Emacs’ bundled shell and terminal modes</h1>
+<p>This is based on a post on <a
+href="http://www.reddit.com/r/emacs/comments/1bzl8b/how_can_i_get_a_dumbersimpler_shell_in_emacs/c9blzyb">reddit</a>,
+published on 2013-04-09.</p>
+<p>Emacs comes bundled with a few different shell and terminal modes. It
+can be hard to keep them straight. What’s the difference between
+<code>M-x term</code> and <code>M-x ansi-term</code>?</p>
+<p>Here’s a good breakdown of the different bundled shells and terminals
+for Emacs, from dumbest to most Emacs-y.</p>
+<h2 id="term-mode">term-mode</h2>
+<p>Your VT100-esque terminal emulator; it does what most terminal
+programs do. Ncurses-things work OK, but dumping large amounts of text
+can be slow. By default it asks you which shell to run, defaulting to
+the environmental variable <code>$SHELL</code> (<code>/bin/bash</code>
+for me). There are two modes of operation:</p>
+<ul>
+<li>char mode: Keys are sent immediately to the shell (including keys
+that are normally Emacs keystrokes), with the following exceptions:
+<ul>
+<li><code>(term-escape-char) (term-escape-char)</code> sends
+<code>(term-escape-char)</code> to the shell (see above for what the
+default value is).</li>
+<li><code>(term-escape-char) &lt;anything-else&gt;</code> is like doing
+equates to <code>C-x &lt;anything-else&gt;</code> in normal
+Emacs.</li>
+<li><code>(term-escape-char) C-j</code> switches to line mode.</li>
+</ul></li>
+<li>line mode: Editing is done like in a normal Emacs buffer,
+<code>&lt;enter&gt;</code> sends the current line to the shell. This is
+useful for working with a program’s output.
+<ul>
+<li><code>C-c C-k</code> switches to char mode.</li>
+</ul></li>
+</ul>
+<p>This mode is activated with</p>
+<pre><code>; Creates or switches to an existing &quot;*terminal*&quot; buffer.
+; The default &#39;term-escape-char&#39; is &quot;C-c&quot;
+M-x term</code></pre>
+<p>or</p>
+<pre><code>; Creates a new &quot;*ansi-term*&quot; or &quot;*ansi-term*&lt;n&gt;&quot; buffer.
+; The default &#39;term-escape-char&#39; is &quot;C-c&quot; and &quot;C-x&quot;
+M-x ansi-term</code></pre>
+<h2 id="shell-mode">shell-mode</h2>
+<p>The name is a misnomer; shell-mode is a terminal emulator, not a
+shell; it’s called that because it is used for running a shell (bash,
+zsh, …). The idea of this mode is to use an external shell, but make it
+Emacs-y. History is not handled by the shell, but by Emacs;
+<code>M-p</code> and <code>M-n</code> access the history, while
+arrows/<code>C-p</code>/<code>C-n</code> move the point (which is is
+consistent with other Emacs REPL-type interfaces). It ignores VT100-type
+terminal colors, and colorizes things itself (it inspects words to see
+if they are directories, in the case of <code>ls</code>). This has the
+benefit that it does syntax highlighting on the currently being typed
+command. Ncurses programs will of course not work. This mode is
+activated with:</p>
+<pre><code>M-x shell</code></pre>
+<h2 id="eshell-mode">eshell-mode</h2>
+<p>This is a shell+terminal, entirely written in Emacs lisp.
+(Interestingly, it doesn’t set <code>$SHELL</code>, so that will be
+whatever it was when you launched Emacs). This won’t even be running zsh
+or bash, it will be running “esh”, part of Emacs.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2013 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/fd_printf.html b/public/fd_printf.html
new file mode 100644
index 0000000..b4d6c2b
--- /dev/null
+++ b/public/fd_printf.html
@@ -0,0 +1,61 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>`dprintf`: print formatted text directly to a file descriptor — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » fd_printf</header>
+<article>
+<h1
+id="dprintf-print-formatted-text-directly-to-a-file-descriptor"><code>dprintf</code>:
+print formatted text directly to a file descriptor</h1>
+<p>This already existed as <code>dprintf(3)</code>. I now feel stupid
+for having Implemented <code>fd_printf</code>.</p>
+<p>The original post is as follows:</p>
+<hr />
+<p>I wrote this while debugging some code, and thought it might be
+useful to others:</p>
+<pre><code>#define _GNU_SOURCE /* vasprintf() */
+#include &lt;stdarg.h&gt; /* va_start()/va_end() */
+#include &lt;stdio.h&gt; /* vasprintf() */
+#include &lt;stdlib.h&gt; /* free() */
+#include &lt;unistd.h&gt; /* write() */
+
+int
+fd_printf(int fd, const char *format, ...)
+{
+ va_list arg;
+ int len;
+ char *str;
+
+ va_start(arg, format);
+ len = vasprintf(&amp;str, format, arg);
+ va_end(arg);
+
+ write(fd, str, len);
+
+ free(str);
+ return len;
+}</code></pre>
+<p>It is a version of <code>printf</code> that prints to a file
+descriptor—where <code>fprintf</code> prints to a <code>FILE*</code>
+data structure.</p>
+<p>The appeal of this is that <code>FILE*</code> I/O is buffered—which
+means mixing it with raw file descriptor I/O is going to produce weird
+results.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2013 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="http://www.wtfpl.net/txt/copying/">WTFPL-2</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/fs-licensing-explanation.html b/public/fs-licensing-explanation.html
new file mode 100644
index 0000000..13e3db7
--- /dev/null
+++ b/public/fs-licensing-explanation.html
@@ -0,0 +1,79 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>An explanation of how "copyleft" licensing works — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » fs-licensing-explanation</header>
+<article>
+<h1 id="an-explanation-of-how-copyleft-licensing-works">An explanation
+of how “copyleft” licensing works</h1>
+<p>This is based on a post on <a
+href="http://www.reddit.com/r/freesoftware/comments/18xplw/can_software_be_free_gnu_and_still_be_owned_by_an/c8ixwq2">reddit</a>,
+published on 2013-02-21.</p>
+<blockquote>
+<p>While reading the man page for readline I noticed the copyright
+section said “Readline is Copyright (C) 1989-2011 Free Software
+Foundation Inc”. How can software be both licensed under GNU and
+copyrighted to a single group? It was my understanding that once code
+became free it didn’t belong to any particular group or individual.</p>
+<p>[LiveCode is GPLv3, but also sells non-free licenses] Can you really
+have the same code under two conflicting licenses? Once licensed under
+GPL3 wouldn’t they too be required to adhere to its rules?</p>
+</blockquote>
+<p>I believe that GNU/the FSF has an FAQ that addresses this, but I
+can’t find it, so here we go.</p>
+<h3 id="glossary">Glossary:</h3>
+<ul>
+<li>“<em>Copyright</em>” is the right to control how copies are made of
+something.</li>
+<li>Something for which no one holds the copyright is in the “<em>public
+domain</em>”, because anyone (“the public”) is allowed to do
+<em>anything</em> with it.</li>
+<li>A “<em>license</em>” is basically a legal document that says “I
+promise not to sue you if make copies in these specific ways.”</li>
+<li>A “<em>non-free</em>” license basically says “There are no
+conditions under which you can make copies that I won’t sue you.”</li>
+<li>A “<em>permissive</em>” (type of free) license basically says “You
+can do whatever you want, BUT have to give me credit”, and is very
+similar to the public domain. If the copyright holder didn’t have the
+copyright, they couldn’t sue you to make sure that you gave them credit,
+and nobody would have to give them credit.</li>
+<li>A “<em>copyleft</em>” (type of free) license basically says, “You
+can do whatever you want, BUT anyone who gets a copy from you has to be
+able to do whatever they want too.” If the copyright holder didn’t have
+the copyright, they couldn’t sue you to make sure that you gave the
+source to people go got it from you, and non-free versions of these
+programs would start to exist.</li>
+</ul>
+<h3 id="specific-questions">Specific questions:</h3>
+<p>Readline: The GNU GPL is a copyleft license. If you make a modified
+version of Readline, and give it to others without letting them have the
+source code, the FSF will sue you. They can do this because they have
+the copyright on Readline, and in the GNU GPL (the license they used) it
+only says that they won’t sue you if you distribute the source with the
+modified version. If they didn’t have the copyright, they couldn’t sue
+you, and the GNU GPL would be worthless.</p>
+<p>LiveCode: The copyright holder for something is not required to obey
+the license—the license is only a promise not to sue you; of course they
+won’t sue themselves. They can also offer different terms to different
+people. They can tell most people “I won’t sue you as long as you share
+the source,” but if someone gave them a little money, they might say, “I
+also promise not sue sue this guy, even if he doesn’t give out the
+source.”</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2013 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/git-go-pre-commit.html b/public/git-go-pre-commit.html
new file mode 100644
index 0000000..1fbac13
--- /dev/null
+++ b/public/git-go-pre-commit.html
@@ -0,0 +1,70 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>A git pre-commit hook for automatically formatting Go code — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » git-go-pre-commit</header>
+<article>
+<h1 id="a-git-pre-commit-hook-for-automatically-formatting-go-code">A
+git pre-commit hook for automatically formatting Go code</h1>
+<p>One of the (many) wonderful things about the Go programming language
+is the <code>gofmt</code> tool, which formats your source in a canonical
+way. I thought it would be nice to integrate this in my <code>git</code>
+workflow by adding it in a pre-commit hook to automatically format my
+source code when I committed it.</p>
+<p>The Go distribution contains a git pre-commit hook that checks
+whether the source code is formatted, and aborts the commit if it isn’t.
+I don’t remember if I was aware of this at the time (or if it even
+existed at the time, or if it is new), but I wanted it to go ahead and
+format the code for me.</p>
+<p>I found a few solutions online, but they were all missing
+something—support for partial commits. I frequently use
+<code>git add -p</code>/<code>git gui</code> to commit a subset of the
+changes I’ve made to a file, the existing solutions would end up adding
+the entire set of changes to my commit.</p>
+<p>I ended up writing a solution that only formats the version of the
+that is staged for commit; here’s my
+<code>.git/hooks/pre-commit</code>:</p>
+<pre><code>#!/bin/bash
+
+# This would only loop over files that are already staged for commit.
+# git diff --cached --numstat |
+# while read add del file; do
+# …
+# done
+
+shopt -s globstar
+for file in **/*.go; do
+ tmp=&quot;$(mktemp &quot;$file.bak.XXXXXXXXXX&quot;)&quot;
+ mv &quot;$file&quot; &quot;$tmp&quot;
+ git checkout &quot;$file&quot;
+ gofmt -w &quot;$file&quot;
+ git add &quot;$file&quot;
+ mv &quot;$tmp&quot; &quot;$file&quot;
+done</code></pre>
+<p>It’s still not perfect. It will try to operate on every
+<code>*.go</code> file—which might do weird things if you have a file
+that hasn’t been checked in at all. This also has the effect of
+formatting files that were checked in without being formatted, but
+weren’t modified in this commit.</p>
+<p>I don’t remember why I did that—as you can see from the comment, I
+knew how to only select files that were staged for commit. I haven’t
+worked on any projects in Go in a while—if I return to one of them, and
+remember why I did that, I will update this page.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2013 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="http://www.wtfpl.net/txt/copying/">WTFPL-2</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/http-notes.html b/public/http-notes.html
new file mode 100644
index 0000000..f7b3588
--- /dev/null
+++ b/public/http-notes.html
@@ -0,0 +1,131 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>Notes on subtleties of HTTP implementation — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » http-notes</header>
+<article>
+<h1 id="notes-on-subtleties-of-http-implementation">Notes on subtleties
+of HTTP implementation</h1>
+<p>I may add to this as time goes on, but I’ve written up some notes on
+subtleties HTTP/1.1 message syntax as specified in RFC 2730.</p>
+<h2 id="why-the-absolute-form-is-used-for-proxy-requests">Why the
+absolute-form is used for proxy requests</h2>
+<p><a
+href="https://tools.ietf.org/html/rfc7230#section-5.3.2">RFC7230§5.3.2</a>
+says that a (non-CONNECT) request to an HTTP proxy should look like</p>
+<pre><code>GET http://authority/path HTTP/1.1</code></pre>
+<p>rather than the usual</p>
+<pre><code>GET /path HTTP/1.1
+Host: authority</code></pre>
+<p>And doesn’t give a hint as to why the message syntax is different
+here.</p>
+<p><a
+href="https://parsiya.net/blog/2016-07-28-thick-client-proxying---part-6-how-https-proxies-work/#3-1-1-why-not-use-the-host-header">A
+blog post by Parsia Hakimian</a> claims that the reason is that it’s a
+legacy behavior inherited from HTTP/1.0, which had proxies, but not the
+Host header field. Which is mostly true. But we can also realize that
+the usual syntax does not allow specifying a URI scheme, which means
+that we cannot specify a transport. Sure, the only two HTTP transports
+we might expect to use today are TCP (scheme: http) and TLS (scheme:
+https), and TLS requires we use a CONNECT request to the proxy, meaning
+that the only option left is a TCP transport; but that is no reason to
+avoid building generality into the protocol.</p>
+<h2 id="on-taking-short-cuts-based-on-early-header-field-values">On
+taking short-cuts based on early header field values</h2>
+<p><a
+href="https://tools.ietf.org/html/rfc7230#section-3.2.2">RFC7230§3.2.2</a>
+says:</p>
+<blockquote>
+<pre><code>The order in which header fields with differing field names are
+received is not significant. However, it is good practice to send
+header fields that contain control data first, such as Host on
+requests and Date on responses, so that implementations can decide
+when not to handle a message as early as possible.</code></pre>
+</blockquote>
+<p>Which is great! We can make an optimization!</p>
+<p>This is only a valid optimization for deciding to <em>not handle</em>
+a message. You cannot use it to decide to route to a backend early based
+on this. Part of the reason is that <a
+href="https://tools.ietf.org/html/rfc7230#section-5.4">§5.4</a> tells us
+we must inspect the entire header field set to know if we need to
+respond with a 400 status code:</p>
+<blockquote>
+<pre><code>A server MUST respond with a 400 (Bad Request) status code to any
+HTTP/1.1 request message that lacks a Host header field and to any
+request message that contains more than one Host header field or a
+Host header field with an invalid field-value.</code></pre>
+</blockquote>
+<p>However, if I decide not to handle a request based on the Host header
+field, the correct thing to do is to send a 404 status code. Which
+implies that I have parsed the remainder of the header field set to
+validate the message syntax. We need to parse the entire field-set to
+know if we need to send a 400 or a 404. Did this just kill the
+possibility of using the optimization?</p>
+<p>Well, there are a number of “A server MUST respond with a XXX code
+if” rules that can all be triggered on the same request. So we get to
+choose which to use. And fortunately for optimizing implementations, <a
+href="https://tools.ietf.org/html/rfc7230#section-3.2.5">§3.2.5</a> gave
+us:</p>
+<blockquote>
+<pre><code>A server that receives a ... set of fields,
+larger than it wishes to process MUST respond with an appropriate 4xx
+(Client Error) status code.</code></pre>
+</blockquote>
+<p>Since the header field set is longer than we want to process (since
+we want to short-cut processing), we are free to respond with whichever
+4XX status code we like!</p>
+<h2 id="on-normalizing-target-uris">On normalizing target URIs</h2>
+<p>An implementer is tempted to normalize URIs all over the place, just
+for safety and sanitation. After all, <a
+href="https://tools.ietf.org/html/rfc3986#section-6.1">RFC3986§6.1</a>
+says it’s safe!</p>
+<p>Unfortunately, most URI normalization implementations will normalize
+an empty path to “/”. Which is not always safe; <a
+href="https://tools.ietf.org/html/rfc7230#section-2.7.3">RFC7230§2.7.3</a>,
+which defines this “equivalence”, actually says:</p>
+<blockquote>
+<pre><code> When not being used in
+absolute form as the request target of an OPTIONS request, an empty
+path component is equivalent to an absolute path of &quot;/&quot;, so the
+normal form is to provide a path of &quot;/&quot; instead.</code></pre>
+</blockquote>
+<p>Which means we can’t use the usual normalization implementation if we
+are making an OPTIONS request!</p>
+<p>Why is that? Well, if we turn to <a
+href="https://tools.ietf.org/html/rfc7230#section-5.3.4">§5.3.4</a>, we
+find the answer. One of the special cases for when the request target is
+not a URI, is that we may use “*” as the target for an OPTIONS request
+to request information about the origin server itself, rather than a
+resource on that server.</p>
+<p>However, as discussed above, the target in a request to a proxy must
+be an absolute URI (and <a
+href="https://tools.ietf.org/html/rfc7230#section-5.3.2">§5.3.2</a> says
+that the origin server must also understand this syntax). So, we must
+define a way to map “*” to an absolute URI.</p>
+<p>Naively, one might be tempted to use “/*” as the path. But that would
+make it impossible to have a resource actually named “/*”. So, we must
+define a special case in the URI syntax that doesn’t obstruct a real
+path.</p>
+<p>If we didn’t have this special case in the URI normalization rules,
+and we handled the “/” path as the same as empty in the OPTIONS handler
+of the last proxy server, then it would be impossible to request OPTIONS
+for the “/” resources, as it would get translated into “*” and treated
+as OPTIONS for the entire server.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2016 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/index.atom b/public/index.atom
new file mode 100644
index 0000000..6423bca
--- /dev/null
+++ b/public/index.atom
@@ -0,0 +1,4036 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+
+ <title>Luke T. Shumaker's Web Log</title>
+ <link rel="self" type="application/atom+xml" href="./index.atom"/>
+ <link rel="alternate" type="text/html" href="./"/>
+ <link rel="alternate" type="text/markdown" href="./index.md"/>
+ <updated>2023-07-10T00:00:00+00:00</updated>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <id>https://lukeshu.com/blog/</id>
+
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./btrfs-rec.html"/>
+ <link rel="alternate" type="text/markdown" href="./btrfs-rec.md"/>
+ <id>https://lukeshu.com/blog/btrfs-rec.html</id>
+ <updated>2023-07-10T00:00:00+00:00</updated>
+ <published>2023-07-10T00:00:00+00:00</published>
+ <title>Announcing: btrfs-rec: Recover (data from) a broken btrfs filesystem</title>
+ <content type="html">&lt;h1
+id="announcing-btrfs-rec-recover-data-from-a-broken-btrfs-filesystem"&gt;Announcing:
+btrfs-rec: Recover (data from) a broken btrfs filesystem&lt;/h1&gt;
+&lt;blockquote&gt;
+&lt;p&gt;I originally sent this email on 2023-07-10, but it has been caught by
+their bogofilter. Yes, it was &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/README.md?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;plaintext&lt;/a&gt;.
+No, I didn't use GMail. Yes, I've successfully participated in vger
+lists in the past. Yes, I've reached out to postmaster; no, I haven't
+received a reply yet (as of 2023-07-14).&lt;/p&gt;
+&lt;/blockquote&gt;
+&lt;div style="font-family: monospace"&gt;
+&lt;p&gt;To: linux-btrfs@vger.kernel.org&lt;br/&gt; From: Luke T. Shumaker
+&amp;lt;lukeshu@lukeshu.com&amp;gt;&lt;br/&gt; Subject: btrfs-rec: Recover (data from)
+a broken btrfs filesystem&lt;br/&gt; Date: Mon, 10 Jul 2023 21:23:41
+-0600&lt;br/&gt; Message-ID:
+&amp;lt;87jzv7uo5e.wl-lukeshu@lukeshu.com&amp;gt;&lt;br/&gt;&lt;/p&gt;
+&lt;/div&gt;
+&lt;p&gt;Inspired by a mis-typed &lt;code&gt;dd&lt;/code&gt; command, for the last year
+I've been working on a tool for recovering corrupt btrfs filesystems; at
+first idly here and there, but more actively in the last few months. I
+hope to get it incorporated into btrfs-progs, though perhaps that is
+problematic for a few reasons I'll get to. If the code can't be
+incorporated into btrfs-progs, at least the ideas and algorithms should
+be.&lt;/p&gt;
+&lt;p&gt;&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/"&gt;https://git.lukeshu.com/btrfs-progs-ng/&lt;/a&gt;&lt;/p&gt;
+&lt;p&gt;Highlights:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;In general, it's more tolerant of corrupt filesystems than
+&lt;code&gt;btrfs check --repair&lt;/code&gt;, &lt;code&gt;btrfs rescue&lt;/code&gt; or
+&lt;code&gt;btrfs restore&lt;/code&gt;.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;btrfs-rec inspect rebuild-mappings&lt;/code&gt; is a better
+&lt;code&gt;btrfs rescue chunk-recover&lt;/code&gt;.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;btrfs-rec inspect rebuild-trees&lt;/code&gt; can re-attach lost
+branches to broken B+ trees.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;btrfs-rec inspect mount&lt;/code&gt; is a read-only FUSE
+implementation of btrfs. This is conceptually a replacement for
+&lt;code&gt;btrfs restore&lt;/code&gt;.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;It's entirely written in Go. I'm not saying that's a good thing,
+but it's an interesting thing.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;Hopefully some folks will find it useful, or at least neat!&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;a href="#motivation"&gt;1. Motivation&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#overview-of-use"&gt;2. Overview of use&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#prior-art"&gt;3. Prior art&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#internalsdesign"&gt;4. Internals/Design&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#overview-of-the-source-tree-layout"&gt;4.1. Overview of the
+source tree layout&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#base-decisions-cli-structure-go-json"&gt;4.2. Base decisions:
+CLI structure, Go, JSON&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#algorithms"&gt;4.3. Algorithms&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#the-rebuild-mappings-algorithm"&gt;4.3.1. The
+&lt;code&gt;rebuild-mappings&lt;/code&gt; algorithm&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#the---rebuild-algorithm"&gt;4.3.2. The &lt;code&gt;--rebuild&lt;/code&gt;
+algorithm&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#rebuilt-forrest-behavior-looking-up-trees"&gt;4.3.2.1.
+rebuilt forrest behavior&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#rebuilt-individual-tree-behavior"&gt;4.3.2.2. rebuilt
+individual tree behavior&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#the-rebuild-trees-algorithm"&gt;4.3.3. The
+&lt;code&gt;rebuild-trees&lt;/code&gt; algorithm&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#initialization"&gt;4.3.3.1. initialization&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#the-main-loop"&gt;4.3.3.2. the main loop&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#graph-callbacks"&gt;4.3.3.3. graph callbacks&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#future-work"&gt;5. Future work&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a href="#problems-with-merging-this-code-into-btrfs"&gt;6. Problems
+for merging this code into btrfs-progs&lt;/a&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h1 id="motivation"&gt;1. Motivation&lt;/h1&gt;
+&lt;p&gt;Have you ever ended up with a corrupt btrfs filesystem (through no
+fault of btrfs itself, but perhaps a failing drive, or a mistaken
+&lt;code&gt;dd&lt;/code&gt; invocation)? Surely losing less than 100MB of data from
+a drive should not render hundreds of GB of perfectly intact data
+unreadable! And yet, the existing tools are unable to even attempt to
+read that data:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;$ btrfs check --repair --force dump-zero.1.img
+enabling repair mode
+Opening filesystem to check...
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+ERROR: cannot open file system&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;or&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;$ btrfs check --init-extent-tree --force dump-zero.1.img
+Opening filesystem to check...
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+ERROR: cannot open file system&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;or&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;$ btrfs check --init-csum-tree --force dump-zero.1.img
+Creating a new CRC tree
+Opening filesystem to check...
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+ERROR: cannot open file system&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;or&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;$ btrfs rescue chunk-recover dump-zero.1.img
+Scanning: DONE in dev0
+corrupt node: root=1 block=160410271744 slot=0, corrupt node: root=1 block=160410271744, nritems too large, have 39 expect range [1,0]
+Couldn&amp;#39;t read tree root
+open with broken chunk error&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;or&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;$ btrfs rescue zero-log dump-zero.1.img
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+ERROR: cannot read chunk root
+ERROR: could not open ctree&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;or&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;$ mkdir out
+$ btrfs restore dump-zero.1.img out
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+Could not open root, trying backup super
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+Could not open root, trying backup super
+ERROR: superblock bytenr 274877906944 is larger than device size 256060514304
+Could not open root, trying backup super&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;or&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;$ btrfs restore --list-roots dump-zero.1.img
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+Could not open root, trying backup super
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+checksum verify failed on 1048576 wanted 0xf81c950a found 0xd66a46e0
+bad tree block 1048576, bytenr mismatch, want=1048576, have=11553381380038442733
+ERROR: cannot read chunk root
+Could not open root, trying backup super
+ERROR: superblock bytenr 274877906944 is larger than device size 256060514304
+Could not open root, trying backup super&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;or&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;$ btrfs-find-root dump-zero.1.img
+WARNING: cannot read chunk root, continue anyway
+Superblock thinks the generation is 6596071
+Superblock thinks the level is 1&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Well, have I got a tool for you!&lt;/p&gt;
+&lt;p&gt;(FWIW, I also tried manipulating the filesystem and patching to tools
+to try to get past those errors, only to get a different set of errors.
+Some of these patches I am separately submitting to btrfs-progs.)&lt;/p&gt;
+&lt;h1 id="overview-of-use"&gt;2. Overview of use&lt;/h1&gt;
+&lt;p&gt;There are two &lt;code&gt;btrfs-rec&lt;/code&gt; sub-command groups:
+&lt;code&gt;btrfs-rec inspect &lt;var&gt;SUBCMD&lt;/var&gt;&lt;/code&gt; and &lt;code&gt;btrfs-rec
+repair &lt;var&gt;SUBCMD&lt;/var&gt;&lt;/code&gt;, and you can find out about various
+sub-commands with &lt;code&gt;btrfs-rec help&lt;/code&gt;. These are both told about
+devices or images with the &lt;code&gt;--pv&lt;/code&gt; flag.&lt;/p&gt;
+&lt;p&gt;&lt;code&gt;btrfs-rec inspect &lt;var&gt;SUBCMD&lt;/var&gt;&lt;/code&gt; commands open the
+filesystem read-only, and (generally speaking) write extracted or
+rebuilt information to stdout. &lt;code&gt;btrfs-rec repair
+&lt;var&gt;SUBCMD&lt;/var&gt;&lt;/code&gt; commands open the filesystem read+write, and
+consume information from &lt;code&gt;btrfs-rec inspect
+&lt;var&gt;SUBCMD&lt;/var&gt;&lt;/code&gt; commands to actually repair the filesystem
+(except I haven't actually implemented any &lt;code&gt;repair&lt;/code&gt; commands
+yet... despite the lack of &lt;code&gt;repair&lt;/code&gt; commands, I believe that
+&lt;code&gt;btrfs-rec&lt;/code&gt; is already a useful because of the
+&lt;code&gt;btrfs-rec inspect mount&lt;/code&gt; command to get data out of the
+broken filesystem). This split allows you to try things without being
+scared by WARNINGs about not using these tools unless you're an expert
+or have been told to by a developer.&lt;/p&gt;
+&lt;p&gt;In the broken &lt;code&gt;dump-zero.1.img&lt;/code&gt; example above (which has a
+perfectly intact superblock, but a totally broken
+&lt;code&gt;CHUNK_TREE&lt;/code&gt;), to "repair" it I'd:&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;&lt;p&gt;Start by using &lt;code&gt;btrfs-rec inspect rebuild-mappings&lt;/code&gt; to
+rebuild the broken chunk/dev/blockgroup trees:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;$ btrfs-rec inspect rebuild-mappings \
+ --pv=dump-zero.1.img \
+ &amp;gt; mappings-1.json&lt;/code&gt;&lt;/pre&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;If it only mostly succeeds, but on stderr tells us about a few
+regions of the image that it wasn't able to figure out the chunks for.
+Using some human-level knowledge, you can write those yourself,
+inserting them into the generated &lt;code&gt;mappings.json&lt;/code&gt;, and ask
+&lt;code&gt;rebuild-mappings&lt;/code&gt; to normalize what you wrote:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;$ btrfs-rec inspect rebuild-mappings \
+ --pv=dump-zero.1.img \
+ --mappings=&amp;lt;(sed &amp;lt;mappings-1.json \
+ -e &amp;#39;2a{&amp;quot;LAddr&amp;quot;:5242880,&amp;quot;PAddr&amp;quot;:{&amp;quot;Dev&amp;quot;:1,&amp;quot;Addr&amp;quot;:5242880},&amp;quot;Size&amp;quot;:1},&amp;#39; \
+ -e &amp;#39;2a{&amp;quot;LAddr&amp;quot;:13631488,&amp;quot;PAddr&amp;quot;:{&amp;quot;Dev&amp;quot;:1,&amp;quot;Addr&amp;quot;:13631488},&amp;quot;Size&amp;quot;:1},&amp;#39;) \
+ &amp;gt; mappings-2.json&lt;/code&gt;&lt;/pre&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Now that it has functioning chunk/dev/blockgroup trees, we can
+use &lt;code&gt;btrfs-rec inspect rebuild-trees&lt;/code&gt; to rebuild other trees
+that rely on those:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;$ btrfs-rec inspect rebuild-mappings \
+ --pv=dump-zero.1.img \
+ --mappings=mappings-2.json \
+ &amp;gt; trees.json&lt;/code&gt;&lt;/pre&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Now that (hopefully) everything that was damaged has been
+reconstructed, we can use &lt;code&gt;btrfs-rec inspect mount&lt;/code&gt; to mount
+the filesystem read-only and copy out our data:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;$ mkdir mnt
+$ sudo btrfs-rec inspect mount \
+ --pv=dump-zero.1.img \
+ --mappings=mappings-2.json \
+ --trees=trees.json \
+ ./mnt&lt;/code&gt;&lt;/pre&gt;&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;This example is fleshed out more (and the manual edits to
+&lt;code&gt;mappings.json&lt;/code&gt; explained more) in &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/examples/main.sh?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./examples/main.sh&lt;/code&gt;&lt;/a&gt;.&lt;/p&gt;
+&lt;h1 id="prior-art"&gt;3. Prior art&lt;/h1&gt;
+&lt;p&gt;Comparing &lt;code&gt;btrfs-rec inspect mount&lt;/code&gt; with the existing &lt;a
+href="https://github.com/adam900710/btrfs-fuse"&gt;https://github.com/adam900710/btrfs-fuse&lt;/a&gt;
+project:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;Again, mine has better fault tolerance&lt;/li&gt;
+&lt;li&gt;Mine is read-only&lt;/li&gt;
+&lt;li&gt;Mine supports xattrs ("TODO" in Adam's)&lt;/li&gt;
+&lt;li&gt;Mine supports separate inode address spaces for subvolumes; Adam's
+doesn't due to limitations in FUSE, mine works around this by lazily
+setting up separate mountpoints for each subvolume (though this does
+mean that the process needs to run as root, which is a bummer).&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h1 id="internalsdesign"&gt;4. Internals/Design&lt;/h1&gt;
+&lt;h2 id="overview-of-the-source-tree-layout"&gt;4.1. Overview of the source
+tree layout&lt;/h2&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/examples?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;examples/&lt;/code&gt;&lt;/a&gt;
+has example scripts showing how to use &lt;code&gt;btrfs-rec&lt;/code&gt;.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfs?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;lib/btrfs/&lt;/code&gt;&lt;/a&gt;
+is the core btrfs implementation.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfscheck?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;lib/btrfscheck/&lt;/code&gt;&lt;/a&gt;
+and &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;lib/btrfsutil/&lt;/code&gt;&lt;/a&gt;
+are libraries for "btrfs-progs" type programs, that are userland-y
+things that I thought should be separate from the core implementation;
+something that frustrated me about libbtrfs was having to figure out "is
+this thing here in support of btrfs bits-on-disk, or in support of a
+higher-level 'how btrfs-progs wants to think about things'?"&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;cmd/btrfs-rec/&lt;/code&gt;&lt;/a&gt;
+is where the command implementations live. If a sub-command fits in a
+single file, it's
+&lt;code&gt;cmd/btrfs-rec/inspect_&lt;var&gt;SUBCMD&lt;/var&gt;.go&lt;/code&gt;, otherwise, it's
+in a separate &lt;code&gt;cmd/btrfs-rec/inspect/&lt;var&gt;SUBCMD&lt;/var&gt;/&lt;/code&gt;
+package.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/textui?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;lib/textui/&lt;/code&gt;&lt;/a&gt;
+is reasonably central to how the commands implement a text/CLI
+user-interface.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/binstruct?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;lib/binstruct/&lt;/code&gt;&lt;/a&gt;,
+&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/diskio?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;lib/diskio/&lt;/code&gt;&lt;/a&gt;,
+and &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/streamio?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;lib/streamio/&lt;/code&gt;&lt;/a&gt;
+are non-btrfs-specific libraries related to the problem domain.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/containers?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;lib/containers/&lt;/code&gt;&lt;/a&gt;,
+&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/fmtutil?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;lib/fmtutil/&lt;/code&gt;&lt;/a&gt;,
+&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/maps?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;lib/maps/&lt;/code&gt;&lt;/a&gt;,
+&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/slices?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;lib/slices/&lt;/code&gt;&lt;/a&gt;,
+and &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/profile?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;lib/profile/&lt;/code&gt;&lt;/a&gt;
+are all generic Go libraries that have nothing to do with btrfs or the
+problem domain, but weren't in the Go standard library and I didn't
+find/know-of exiting implementations that I liked. Of these, all but
+&lt;code&gt;containers&lt;/code&gt; are pretty simple utility libraries. Also, some
+of these things have been added to the standard library since I started
+the project.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h2 id="base-decisions-cli-structure-go-json"&gt;4.2. Base decisions: CLI
+structure, Go, JSON&lt;/h2&gt;
+&lt;p&gt;I started with trying to enhance btrfs-progs, but ended up writing a
+wholy new program in Go, for several reasons:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;writing a new thing: I was having to learn both the btrfs-progs
+codebase and how btrfs-bits-on-disk work, and it got to the point that I
+decided I should just focus on learning btrfs-bits-on-disk.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;writing a new thing: It was becoming increasingly apparent to me
+that it was going to be an uphill-fight of having recovery-tools share
+the same code as the main-tools, as the routines used by the main-tools
+rightly have validity checks, where recovery-tools want to say "yes, I
+know it's invalid, can you give it to me anyway?".&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;writing it in not-C: I love me some C, but higher level languages
+are good for productivity. And I was trying to write a whole lot of code
+at once, I needed a productivity boost.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;writing it in not-C: This forced me to learn btrfs-bits-on-disk
+better, instead of just cribbing from btrfs-progs. That knowledge is
+particularly important for having ideas on how to deal with corrupt
+bits-on-disk.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;writing it in Go: At the time I started, my day job was writing
+Go, so I had Go swapped into my brain. And Go still feels close to C but
+provides &lt;em&gt;a lot&lt;/em&gt; of niceness and safety over C.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;It turned out that Go was perhaps not the best choice, but we'll come
+back to that.&lt;/p&gt;
+&lt;p&gt;I wanted to separate things into a pipeline. For instance: Instead of
+&lt;code&gt;btrfs rescue chunk-recover&lt;/code&gt; trying to do everything to
+rebuild a broken chunk tree, I wanted to separate I/O from computation
+from repairs. So I have
+&lt;code&gt;btrfs-rec inspect rebuild-mappings scan&lt;/code&gt; that reads all the
+info necessary to rebuild the chunk tree, then dump that as a 2GB glob
+of JSON. Then I can feed that JSON to
+&lt;code&gt;btrfs-rec inspect rebuild-mappings process&lt;/code&gt; which actually
+rebuilds the mappings in the chunk tree, and dumps them as JSON. And
+then other commands can consume that &lt;code&gt;mappings.json&lt;/code&gt; to use
+that instead of trying to read the chunk tree from the actual FS, so
+that you don't have to make potentially destructive writes to inspect an
+FS with a broken chunk tree, and can inspect it more forensically. Or
+then use &lt;code&gt;btrfs-rec repair
+&lt;var&gt;SOME_SUBCMD_I_HAVENT_WRITTEN_YET&lt;/var&gt;&lt;/code&gt; to write that chunk
+tree in &lt;code&gt;mappings.json&lt;/code&gt; back to the filesystem.&lt;/p&gt;
+&lt;p&gt;(But also, the separate steps thing was useful just so I could
+iterate on the algorithms of &lt;code&gt;rebuild-mappings process&lt;/code&gt;
+separately from having to scan the entire FS)&lt;/p&gt;
+&lt;p&gt;So, I made the decision that &lt;code&gt;btrfs-rec inspect
+&lt;var&gt;SUBCMD&lt;/var&gt;&lt;/code&gt; commands should all only open the FS read-only,
+and output their work to a separate file; that writing that info back to
+the FS should be separate in &lt;code&gt;btrfs-rec repair
+&lt;var&gt;SUBCMD&lt;/var&gt;&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;For connecting those parts of the pipeline, I chose JSON, for a few
+reasons:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;I wanted something reasonably human-readable, so that I could
+debug it easier.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;I wanted something reasonably human-readable, so that human
+end-users could make manual edits; for example, in &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/examples/main.sh?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;examples/main.sh&lt;/code&gt;&lt;/a&gt;
+I have an example of manually editing &lt;code&gt;mappings.json&lt;/code&gt; to
+resolve a region that the algorithm couldn't figure out, but with
+knowledge of what caused the corruption a human can.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;I didn't want to invent my own DSL and have to handle writing a
+parser. (This part didn't pay off! See below.)&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;I wanted something that I thought would have good support in a
+variety of languages, so that if Go is problematic for getting things
+merged upstream it could be rewritten in C (or maybe Rust?) piece-meal
+where each subcommand can be rewritten one at a time.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;It turned out that JSON was perhaps not the best choice.&lt;/p&gt;
+&lt;p&gt;OK, so: Go and/or JSON maybe being mistakes:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;I spent a lot of time getting the garbage collector to not just
+kill performance.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;The &lt;code&gt;btrfs-rec inspect rebuild-mappings
+&lt;var&gt;SUBCMD&lt;/var&gt;&lt;/code&gt; subcommands all throw a lot of data through the
+JSON encoder/decoder, and I learned that the Go stdlib
+&lt;code&gt;encoding/json&lt;/code&gt; package has memory use that grows O(n^2)
+(-ish? I didn't study the implementation, but that's what the curve
+looks like just observing it) on the size of the data being shoved
+through it, so I had to go take a break and go write
+https://pkg.go.dev/git.lukeshu.com/go/lowmemjson which is a
+mostly-drop-in-replacement that tries to be as close-as possible to O(1)
+memory use. So I did end up having to write my own parser anyway
+:(&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h2 id="algorithms"&gt;4.3. Algorithms&lt;/h2&gt;
+&lt;p&gt;There are 3 algorithms of note in &lt;code&gt;btrfs-rec&lt;/code&gt;, that I
+think are worth getting into mainline btrfs-progs even if the code of
+&lt;code&gt;btrfs-rec&lt;/code&gt; doesn't get in:&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;&lt;p&gt;The &lt;code&gt;btrfs-rec inspect rebuild-mappings&lt;/code&gt; algoritithm
+to rebuild information from the &lt;code&gt;CHUNK_TREE&lt;/code&gt;,
+&lt;code&gt;DEV_TREE&lt;/code&gt;, and &lt;code&gt;BLOCK_GROUP_TREE&lt;/code&gt;.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;The &lt;code&gt;btrfs-rec --rebuild&lt;/code&gt; algorithm to cope with
+reading broken B+ trees.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;The &lt;code&gt;btrfs-rec inspect rebuild-trees&lt;/code&gt; algorithm to
+re-attach lost branches to broken B+ trees.&lt;/p&gt;&lt;/li&gt;
+&lt;/ol&gt;
+&lt;h3 id="the-rebuild-mappings-algorithm"&gt;4.3.1. The
+&lt;code&gt;rebuild-mappings&lt;/code&gt; algorithm&lt;/h3&gt;
+&lt;p&gt;(This step-zero scan is
+&lt;code&gt;btrfs-rec inspect rebuild-mappings scan&lt;/code&gt;, and principally
+lives in &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/scan.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./lib/btrfsutil/scan.go&lt;/code&gt;&lt;/a&gt;
+and &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildmappings/scan.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./cmd/btrfs-rec/inspect/rebuildmappings/scan.go&lt;/code&gt;&lt;/a&gt;)&lt;/p&gt;
+&lt;ol start="0" type="1"&gt;
+&lt;li&gt;Similar to &lt;code&gt;btrfs rescue chunk-recover&lt;/code&gt;, scan each device
+for things that look like nodes; keep track of:
+&lt;ul&gt;
+&lt;li&gt;Checksums of every block on the device&lt;/li&gt;
+&lt;li&gt;Which physical addresses contain nodes that claim to be at a given
+logical addess.&lt;/li&gt;
+&lt;li&gt;Any found Chunk items, BlockGroup items, DevExtent, and CSum items.
+Keep track of the key for each of these, and for CSum items also track
+the generation.&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;Create a bucket of the data from Chunks, DevExtents, and BlockGroups;
+since these are mostly a Chunk and a DevExtent+BlockGroup store pretty
+much the same information; we can use one to reconstruct the other. How
+we "merge" these and handle conflicts is in &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfs/btrfsvol/lvm.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n121"&gt;&lt;code&gt;./lib/btrfs/btrfsvol/lvm.go:addMapping()&lt;/code&gt;&lt;/a&gt;,
+I don't think this part is particularly clever, but given that
+&lt;code&gt;btrfs rescue chunk-recover&lt;/code&gt; crashes if it encounters two
+overlapping chunks, I suppose I should spell it out:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;A "mapping" is represented as a group of 4 things:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;logical address&lt;/li&gt;
+&lt;li&gt;a list of 1 or more physical addresses (device ID and offset)&lt;/li&gt;
+&lt;li&gt;size, and a Boolean indicator of whether the size is "locked"&lt;/li&gt;
+&lt;li&gt;block group flags, and a Boolean presence-indicator&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Mappings must be merged if their logical or physical regions
+overlap.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;If a mapping has a "locked" size, then when merging it may
+subsume smaller mappings with unlocked sizes, but its size cannot be
+changed; trying to merge a locked-size mapping with another mapping that
+is not for a subset region should return an error.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;If a mapping has block group flags present, then those flags may
+not be changed; it may only be merged with another mapping that does not
+have flags present, or has identical flags.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;When returning an error because of overlapping non-mergeable
+mappings, just log an error on stderr and keep going. That's an
+important design thing that is different than normal filesystem code; if
+there's an error, yeah, detect and notify about it, &lt;strong&gt;but don't
+bail out of the whole routine&lt;/strong&gt;. Just skip that one item or
+whatever.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;Now that we know how to "add a mapping", let's do that:&lt;/p&gt;
+&lt;p&gt;(The following main-steps are
+&lt;code&gt;btrfs-rec inspect rebuild-mappings process&lt;/code&gt;, and principally
+live in &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildmappings/process.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./cmd/btrfs-rec/inspect/rebuildmappings/process.go&lt;/code&gt;&lt;/a&gt;)&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;&lt;p&gt;Add all found Chunks.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Add all found DevExtents.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Add a phyical:logical mapping of length nodesize for each node
+that was found.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Any mappings from steps 2 or 3 that are missing blockgroup flags
+(that is: they weren't able to be merged with a mapping from step 1),
+use the found BlockGroups to fill in those flags.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Now we'll merge all found CSum items into a map of the sums of
+the logical address space. Sort all of the csum items by generation,
+then by address. Loop over them in that order, inserting their sums into
+the map. If two csum items overlap, but agree about the sums of the
+overlapping region, that's fine, just take their union. For overlaps
+that disagree, items with a newer generation kick out items with an
+older generation. If disagreeing items have the same generation... I
+don't think that can happen except by a filesystem bug (i.e. not by a
+failing drive or other external corruption), so I wasn't too concerned
+about it, so I just log an error on stderr and skip the later-processed
+item. See &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go&lt;/code&gt;&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;Look at regions of the logical address space that meet all the 3
+criteria:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;we have CSum items for them&lt;/li&gt;
+&lt;li&gt;we have a BlockGroup for them&lt;/li&gt;
+&lt;li&gt;we don't have a Chunk/DevExtent mapping them to the pysical address
+space.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;Pair those CSums up with BlockGroups, and for each BlockGroup, search
+the list of checksums of physical blocks to try to find a physical
+region that matches the logical csums (and isn't already mapped to a
+different logical region). I used a Knuth-Morris-Pratt search, modified
+to handle holes in the logical csum list as wildcards.&lt;/p&gt;
+&lt;p&gt;Insert any found mappings into our bucket of mappings.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Do the same again, but with a fuzzy search (we can re-use the
+csum map of the logical address space). My implementation of this is
+comparatively time and space intensive; I just walk over the entire
+unmapped physical address space, noting what % of match each BlockGroup
+has if placed at that location. I keep track of the best 2 matches for
+each BlockGroup. If the best match is better than a 50% match, and the
+second best is less than a 50% match, then I add the best match. In my
+experience, the best match is &amp;gt;90% (or at whatever the maximum
+percent is for how much of the BlockGroup has logical sums), and the
+second best is 0% or 1%. The point of tracking both is that if there
+isn't a clear-cut winner, I don't want it to commit to a potentially
+wrong choice.&lt;/p&gt;&lt;/li&gt;
+&lt;/ol&gt;
+&lt;h3 id="the---rebuild-algorithm"&gt;4.3.2. The &lt;code&gt;--rebuild&lt;/code&gt;
+algorithm&lt;/h3&gt;
+&lt;p&gt;The &lt;code&gt;--rebuild&lt;/code&gt; flag is implied by the
+&lt;code&gt;--trees=trees.json&lt;/code&gt; flag, and triggers an algorithm that
+allows "safely" reading from a broken B+ tree, rather than the usual B+
+tree lookup and search functions. I probably should have tried to
+understand the &lt;code&gt;btrfs restore&lt;/code&gt; algorithm, maybe I reinvented
+the wheel...&lt;/p&gt;
+&lt;p&gt;This algorithm requires a list of all nodes on the filesystem; we
+find these using the same scan as above (&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/scan.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./lib/btrfsutil/scan.go&lt;/code&gt;&lt;/a&gt;),
+the same procedure as &lt;code&gt;btrfs rescue chunk-recover&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;We walk all of those nodes, and build a reasonably lightweight
+in-memory graph of all nodes (&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/graph.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./lib/btrfsutil/graph.go&lt;/code&gt;&lt;/a&gt;),
+tracking&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;each node's
+&lt;ul&gt;
+&lt;li&gt;logical address&lt;/li&gt;
+&lt;li&gt;level&lt;/li&gt;
+&lt;li&gt;generation&lt;/li&gt;
+&lt;li&gt;tree&lt;/li&gt;
+&lt;li&gt;each item's key and size&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;each keypointer's
+&lt;ul&gt;
+&lt;li&gt;source node&lt;/li&gt;
+&lt;li&gt;source slot within the node&lt;/li&gt;
+&lt;li&gt;tree of the source node&lt;/li&gt;
+&lt;li&gt;destination node&lt;/li&gt;
+&lt;li&gt;destination level implied by the level of the source node&lt;/li&gt;
+&lt;li&gt;destination key&lt;/li&gt;
+&lt;li&gt;destination generation&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;logical addresses and error messages for nodes that are pointed to
+by a keypointer or the superblock, but can't be read (because that
+logical address isn't mapped, or it doesn't look like a node,
+or...)&lt;/li&gt;
+&lt;li&gt;an index such that for a given node we can quickly list both
+keypointers both originating at that node and pointing to that
+node.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h4 id="rebuilt-forrest-behavior-looking-up-trees"&gt;4.3.2.1. rebuilt
+forrest behavior (looking up trees)&lt;/h4&gt;
+&lt;p&gt;(see: &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/rebuilt_forrest.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./lib/btrfsutil/rebuilt_forrest.go&lt;/code&gt;&lt;/a&gt;)&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;The &lt;code&gt;ROOT_TREE&lt;/code&gt;, &lt;code&gt;CHUNK_TREE&lt;/code&gt;,
+&lt;code&gt;TREE_LOG&lt;/code&gt;, and &lt;code&gt;BLOCK_GROUP_TREE&lt;/code&gt; (the trees
+pointed to directy by the superblock) work as you'd expect.&lt;/li&gt;
+&lt;li&gt;For other trees, we (as you'd expect) look up the root item in the
+rebuilt &lt;code&gt;ROOT_TREE&lt;/code&gt;, and then (if rootitem.ParentUUID is
+non-zero) eagerly also look up the parent tree (recursing on ourself).
+We try to use the &lt;code&gt;UUID_TREE&lt;/code&gt; tree to help with this, but
+fall back to just doing a linear scan over the &lt;code&gt;ROOT_TREE&lt;/code&gt;.
+If we fail to look up the parent tree (or its parent, or a more distant
+ancestor), then (depending on a flag) we either make a note of that, or
+error out and fail to look up the child tree. For &lt;code&gt;--rebuild&lt;/code&gt;
+and &lt;code&gt;--trees=trees.json&lt;/code&gt; we are permissive of this error, and
+just make note of it; but we'll re-use this algorithm in the
+&lt;code&gt;rebuild-trees&lt;/code&gt; algorithm below, and it needs the more strict
+handling.&lt;/li&gt;
+&lt;li&gt;When creating the rebuilt individual tree, we start by adding the
+root node specified by the superblock/root-item. But we may also add
+additional root nodes grafted on to the tree by the
+&lt;code&gt;--trees=trees.json&lt;/code&gt; flag or by the
+&lt;code&gt;rebuild-trees&lt;/code&gt; algorithm below. So a tree may have more than
+1 root node.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h4 id="rebuilt-individual-tree-behavior"&gt;4.3.2.2. rebuilt individual
+tree behavior&lt;/h4&gt;
+&lt;p&gt;(see: &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfsutil/rebuilt_tree.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./lib/btrfsutil/rebuilt_tree.go&lt;/code&gt;&lt;/a&gt;)&lt;/p&gt;
+&lt;p&gt;In order to read from a tree, we first have to build a few indexes.
+We store these indexes in an Adaptive Replacement Cache; they are all
+re-buildable based on the tree's list of roots and the above graph; if
+we have a bunch of trees we don't need to keep all of this in memory at
+once. Note that this is done 100% with the in-memory graph, we don't
+need to read anything from the filesystem during these procedures.&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;The first index we build is the "node index". This is an index
+that for every node tells us what root(s) the tree would need to have in
+order for the tree to include that node, and also what the highest item
+key would be acceptable in the node if the tree includes that root. We
+track both a &lt;code&gt;loMaxItem&lt;/code&gt; and a &lt;code&gt;hiMaxItem&lt;/code&gt;, in
+case the tree is real broken and there are multiple paths from the root
+to the node; as these different paths may imply different max-item
+constraints. Put more concretely, the type of the index is:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;map[ nodeID → map[ rootNodeID → {loMaxItem, hiMaxItem} ] ]&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;We'll do a loop over the graph, using dynamic-programming memoization
+to figure out ordering and avoid processing the same node twice; for
+each node we'll&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;Check whether the owner-tree is this tree or one of this tree's
+ancestors (and if it's an ancestor, that the node's generation isn't
+after the point that the child tree was forked from the parent tree). If
+not, we are done processing that node (record an empty/nil set of roots
+for it).&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Create an empty map of &lt;code&gt;rootID&lt;/code&gt; →
+{&lt;code&gt;loMaxItem&lt;/code&gt;, &lt;code&gt;hiMaxItem&lt;/code&gt;}.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Look at each keypointer that that points at the node and:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;Skip the keypointer if its expectations of the node aren't met:
+if the level, generation, and min-key constraints don't match up. If the
+keypointer isn't in the last slot in the source node, we also go ahead
+and include checking that the destination node's max-key is under the
+min-key of the keypointer in the next slot, since that's cheap to do
+now.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Skip the keypointer if its source node's owner-tree isn't this
+tree or one of this tree's ancestors (and if it's an ancestor, that the
+node's generation isn't after the point that the child tree was forked
+from the parent tree).&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;dynamic-programming recurse and index the keypointer's source
+node.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;for every root that would result in the keypointer's source node
+being included in the tree:&lt;/p&gt;
+&lt;p&gt;. If the keypointer is in the last slot, look at what the what the
+source node's last-item constraints would be if that root is included,
+and can now check the max-item of our destination node. We check against
+the &lt;code&gt;hiMaxItem&lt;/code&gt;; as if there is any valid path from the root
+to this node, then we want to be permissive and include it. If that
+check fails, then we're done with this keypointer. Also, make node of
+those &lt;code&gt;loMaxItem&lt;/code&gt; and &lt;code&gt;hiMaxItem&lt;/code&gt; values, we'll
+use them again in just a moment.&lt;/p&gt;
+&lt;p&gt;. Otherwise, set both &lt;code&gt;loMaxItem&lt;/code&gt; and
+&lt;code&gt;hiMaxItem&lt;/code&gt; to 1-under the min-item of the keypointer in the
+next slot.&lt;/p&gt;
+&lt;p&gt;. Insert that &lt;code&gt;loMaxItem&lt;/code&gt; and &lt;code&gt;hiMaxItem&lt;/code&gt; pair
+into the &lt;code&gt;rootID&lt;/code&gt; → {&lt;code&gt;loMaxItem&lt;/code&gt;,
+&lt;code&gt;hiMaxItem&lt;/code&gt;} map we created above. If an entry already exists
+for this root (since a broken tree might have multiple paths from the
+root to our node), then set &lt;code&gt;loMaxItem&lt;/code&gt; to the min of the
+existing entry and our value, and &lt;code&gt;hiMaxItem&lt;/code&gt; to the
+max.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;If that &lt;code&gt;rootID&lt;/code&gt; → {&lt;code&gt;loMaxItem&lt;/code&gt;,
+&lt;code&gt;hiMaxItem&lt;/code&gt;} map is still empty, then consider this node to
+be a (potential) root, and insert &lt;code&gt;rootID=thisNode&lt;/code&gt; -&amp;gt;
+{&lt;code&gt;loMaxItem=maxKey&lt;/code&gt;, &lt;code&gt;hiMaxItem=maxKey&lt;/code&gt;} (where
+&lt;code&gt;maxKey&lt;/code&gt; is the maximum value of the key datatype).&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Take that &lt;code&gt;rootID&lt;/code&gt; → {&lt;code&gt;loMaxItem&lt;/code&gt;,
+&lt;code&gt;hiMaxItem&lt;/code&gt;} map and insert it into the index as the entry
+for this node.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;The next index we build is the "item index". This is a "sorted
+map" (implemented as a red-black tree, supporting sub-range iteration)
+of &lt;code&gt;key&lt;/code&gt; → {&lt;code&gt;nodeID&lt;/code&gt;, &lt;code&gt;slotNumber&lt;/code&gt;}; a
+map that for each key tells us where to find the item with that key.&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;Loop over the node index, and for each node check if both (a) it
+has &lt;code&gt;level==0&lt;/code&gt; (is a leaf node containing items), and (b) its
+set of roots that would include it has any overlap with the tree's set
+of roots.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Loop over each of those included leaf nodes, and loop over the
+items in each node. Insert the &lt;code&gt;key&lt;/code&gt; → {&lt;code&gt;nodeId&lt;/code&gt;,
+&lt;code&gt;slot&lt;/code&gt;} into our sorted map. If there is already an entry for
+that key, decide which one wins by:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;Use the one from the node with the owner-tree that is closer to
+this tree; node with owner=thisTree wins over a node with
+owner=thisTree.parent, which would win over a node with
+owner.thisTree.parent.parent. If that's a tie, then...&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Use the one from the node with the higher generation. If that's a
+tie, then...&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;I don't know, I have the code &lt;code&gt;panic&lt;/code&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;// TODO: This is a panic because I&amp;#39;m not really sure what the
+// best way to handle this is, and so if this happens I want the
+// program to crash and force me to figure out how to handle it.
+panic(fmt.Errorf(&amp;quot;dup nodes in tree=%v: old=%v=%v ; new=%v=%v&amp;quot;,
+ tree.ID,
+ oldNode, tree.forrest.graph.Nodes[oldNode],
+ newNode, tree.forrest.graph.Nodes[newNode]))&lt;/code&gt;&lt;/pre&gt;&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;Note that this algorithm means that for a given node we may use a few
+items from that node, while having other items from that same node be
+overridden by another node.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;The final index we build is the "error index". This is an index
+of what errors correspond to which range of keys, so that we can report
+them, and give an idea of "there may be entries missing from this
+directory" and similar.&lt;/p&gt;
+&lt;p&gt;For each error, we'll track the min-key and max-key of the range it
+applies to, the node it came from, and what the error string is. We'll
+store these into an interval tree keyed on that min-key/max-key
+range.&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;Create an empty set &lt;code&gt;nodesToProcess&lt;/code&gt;. Now populate
+it:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;Once again, we'll loop over the node index, but this time we'll
+only check that there's overlap between the set of roots that would
+include the node and the tree's set of roots. The nodes that are
+included in this tree, insert both that node itself and all node IDs
+that it has keypointers pointing to into the &lt;code&gt;nodesToProcess&lt;/code&gt;
+set.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Also insert all of the tree's roots into
+&lt;code&gt;nodesToProcess&lt;/code&gt;; this is in case the superblock/root-item
+points to an invalid node that we couldn't read.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Now loop over &lt;code&gt;nodesToProcess&lt;/code&gt;. For each node, create
+an empty list of errors. Use the keypointers pointing to and the min
+&lt;code&gt;loMaxItem&lt;/code&gt; from the node index to construct a set of
+expectations for the node; this should be reasonably straight-forward,
+given:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;If different keypointers have disagreeing levels, insert an error
+in to the list, and don't bother with checking the node's
+level.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;If different keypointers have disagreeing generations, insert an
+error in to the list, and don't bother with checking the node's
+generation.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;If different keypointers have different min-item expectations,
+use the max of them.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;Then:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;If the node is a "bad node" in the graph, insert the error message
+associated with it. Otherwise, check those expectations against the node
+in the graph.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;If the list of error messages is non-empty, then insert their
+concatenation into the interval tree, with the range set to the min of
+the min-item expectations from the keypointers through the max of the
+&lt;code&gt;hiMaxItem&lt;/code&gt;s from the node index. If the min min-item
+expectation turns out to be higher than the max &lt;code&gt;hiMaxItem&lt;/code&gt;,
+then set the range to the zero-key through the max-key.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;From there, it should be trivial to implement the usual B+ tree
+operations using those indexes; exact-lookup using the item index, and
+range-lookups and walks using the item index together with the error
+index. Efficiently searching the &lt;code&gt;CSUM_TREE&lt;/code&gt; requires knowing
+item sizes, so that's why we recorded the item sizes into the graph.&lt;/p&gt;
+&lt;h3 id="the-rebuild-trees-algorithm"&gt;4.3.3. The
+&lt;code&gt;rebuild-trees&lt;/code&gt; algorithm&lt;/h3&gt;
+&lt;p&gt;The &lt;code&gt;btrfs inspect rebuild-trees&lt;/code&gt; algorithm finds nodes to
+attach as extra roots to trees. I think that conceptually it's the the
+simplest of the 3 algorithms, but turned out to be the hardest to get
+right. So... maybe more than the others reference the source code too
+(&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./cmd/btrfs-rec/inspect/rebuildtrees/&lt;/code&gt;&lt;/a&gt;)
+because I might forget some small but important detail.&lt;/p&gt;
+&lt;p&gt;The core idea here is that we're just going to walk each tree,
+inspecting each item in the tree, and checking for any items that are
+implied by other items (e.g.: a dir entry item implies the existence of
+inode item for the inode that it points at). If an implied item is not
+in the tree, but is in some other node, then we look at which potential
+roots we could add to the tree that would add that other node. Then,
+after we've processed all of the items in the filesystem, we go add
+those various roots to the various trees, keeping track of which items
+are added or updated. If any of those added/updated items have a version
+with a newer generation on a different node, see what roots we could add
+to get that newer version. Then add those roots, keeping track of items
+that are added/updated. Once we reach steady-state with the newest
+version of each item has been added, loop back and inspect all
+added/updated items for implied items, keeping track of roots we could
+add. Repeat until a steady-state is reached.&lt;/p&gt;
+&lt;p&gt;There are lots of little details in that process, some of which are
+for correctness, and some of which are for "it should run in hours
+instead of weeks."&lt;/p&gt;
+&lt;h4 id="initialization"&gt;4.3.3.1. initialization&lt;/h4&gt;
+&lt;p&gt;First up, we're going to build and in-memory graph, same as above.
+But this time, while we're reading the nodes to do that, we're also
+going to watch for some specific items and record a few things about
+them.&lt;/p&gt;
+&lt;p&gt;(see: &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/scan.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./cmd/btrfs-rec/inspect/rebuildtrees/scan.go&lt;/code&gt;&lt;/a&gt;)&lt;/p&gt;
+&lt;p&gt;For each {&lt;code&gt;nodeID&lt;/code&gt;, &lt;code&gt;slotNumber&lt;/code&gt;} pair that
+matches one of these item types, we're going to record:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;flags:
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;INODE_ITEM&lt;/code&gt;s: whether it has the
+&lt;code&gt;INODE_NODATASUM&lt;/code&gt; flag set&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;names:
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;DIR_INDEX&lt;/code&gt; items: the file's name&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;sizes:
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;EXTENT_CSUM&lt;/code&gt; items: the number of bytes that this is a
+sum for (i.e. the item size over the checksum size, times the block
+size)&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;EXTENT_DATA&lt;/code&gt; items: the number of bytes in this extent
+(i.e. either the item size minus
+&lt;code&gt;offsetof(btrfs_file_extent_item.disk_bytenr)&lt;/code&gt; if
+&lt;code&gt;FILE_EXTENT_INLINE&lt;/code&gt;, or else the item's
+&lt;code&gt;num_bytes&lt;/code&gt;).&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;data backrefs:
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;EXTENT_ITEM&lt;/code&gt;s and &lt;code&gt;METADATA_ITEM&lt;/code&gt;s: a list of
+the same length as the number of refs embedded in the item; for embeded
+ExtentDataRefs, the list entry is the subvolume tree ID that the
+ExtentDataRef points at, otherwise it is zero.&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;EXTENT_DATA_REF&lt;/code&gt; items: a list of length 1, with the
+sole member being the subvolume tree ID that the ExtentDataRef points
+at.&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h4 id="the-main-loop"&gt;4.3.3.2. the main loop&lt;/h4&gt;
+&lt;p&gt;(see: &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go&lt;/code&gt;&lt;/a&gt;)&lt;/p&gt;
+&lt;p&gt;Start with that scan data (graph + info about items), and also a
+rebuilt forrest from the above algorithm, but with:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;the flag set so that it refuses to look up a tree if it can't
+look up all of that tree's ancestors&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;an additional "potential-item index" that is similar to the item
+index. It is generated the same way and can cache/evict the same way;
+the difference is that we invert the check for if the set of roots for a
+node has overlap with the tree's set of roots; we're looking for
+&lt;em&gt;potential&lt;/em&gt; nodes that we could add to this tree.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;some callbacks; we'll get to what we do in these callbacks in a
+bit, but for now, what the callbacks are:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;a callback that is called for each added/updated item when we add
+a root.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;a callback that is called whenever we add a root&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;a callback that intercepts looking up a root item&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;a callback that intercepts resolving an UUID to an object
+ID.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;(The callbacks are in &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go&lt;/code&gt;&lt;/a&gt;)&lt;/p&gt;
+&lt;p&gt;We have 5 unordered queues ("work lists"?); these are sets that when
+it's time to drain them we'll sort the members and process them in that
+order.&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;the tree queue: a list of tree IDs that we need to crawl&lt;/li&gt;
+&lt;li&gt;the retry-item queue: for each tree ID, a set of items that we
+should re-process if we add a root to that tree&lt;/li&gt;
+&lt;li&gt;the added-item queue: a set of key/tree pairs identifying items that
+have been added by adding a root to a tree&lt;/li&gt;
+&lt;li&gt;the settled-item-queue: a set of key/tree pairs that have have not
+just been added by adding a root, but we've also verified that they are
+the newest-generation item with that key that we could add to the
+tree.&lt;/li&gt;
+&lt;li&gt;the augment queue: for each item that we want to add to a tree, the
+list of roots that we could add to get that item.&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;The roots all start out empty, except for the tree queue, which we
+seed with the &lt;code&gt;ROOT_TREE&lt;/code&gt;, the &lt;code&gt;CHUNK_TREE&lt;/code&gt;, and
+the &lt;code&gt;BLOCK_GROUP_TREE&lt;/code&gt; (It is a "TODO" task that it should
+probably also be seeded with the &lt;code&gt;TREE_LOG&lt;/code&gt;, but as I will
+say below in the "future work" section, I don't actually understand the
+&lt;code&gt;TREE_LOG&lt;/code&gt;, so I couldn't implement it).&lt;/p&gt;
+&lt;p&gt;Now we're going to loop until the tree queue, added-item queue,
+settled-item queue, and augment queue are all empty (all queues except
+for the retry-item queue). Each loop "pass" has 3 substeps:&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;&lt;p&gt;Crawl the trees (drain the tree queue, fill the added-item
+queue).&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Either:&lt;/p&gt;
+&lt;ol type="a"&gt;
+&lt;li&gt;&lt;p&gt;if the added-item queue is non-empty: "settle" those items (drain
+the added-item queue, fill the augment queue and the settled-item
+queue).&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;otherwise: process items (drain the settled-item queue, fill the
+augment queue and the tree queue)&lt;/p&gt;&lt;/li&gt;
+&lt;/ol&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Apply augments (drain the augment queue and maybe the retry-item
+queue, fill the added-item queue).&lt;/p&gt;&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;OK, let's look at those 3 substeps in more detail:&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;&lt;p&gt;Crawl the trees; drain the tree queue, fill the added-item
+queue.&lt;/p&gt;
+&lt;p&gt;We just look up the tree in the rebuilt forrest, which will (per the
+above &lt;code&gt;--rebuild&lt;/code&gt; algorithm) will either fail to look up the
+tree, or succeed, and add to that tree the root node from the
+superblock/root-item. Because we set an item-added callback, when adding
+that root it will loop over the nodes added by that root, and call our
+callback for each item in one of the added nodes. Our callback inserts
+each item into the added-item queue. The forrest also calls our
+root-added callback, but because of the way this algorithm works, that
+turns out to be a no-op at this step.&lt;/p&gt;
+&lt;p&gt;I mentioned that we added callbacks to intercept the forrest's
+looking up of root items and resolving UUIDs; we override the forrest's
+"lookup root item" routine and "resolve UUID" routine to instead of
+doing normal lookups on the &lt;code&gt;ROOT_TREE&lt;/code&gt; and
+&lt;code&gt;UUID_TREE&lt;/code&gt;, use the above &lt;code&gt;Want&lt;var&gt;XXX&lt;/var&gt;&lt;/code&gt;
+routines that we'll define below in the "graph callbacks" section.&lt;/p&gt;
+&lt;p&gt;It shouldn't matter what order this queue is processed in, but I sort
+tree IDs numerically.&lt;/p&gt;
+&lt;p&gt;The crawling is fairly fast because it's just in-memory, the only
+accesses to disk are looking up root items and resolving UUIDs.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Either:&lt;/p&gt;
+&lt;ol type="a"&gt;
+&lt;li&gt;&lt;p&gt;Settle items from the added-item queue to the settled-item queue
+(and fill the augment queue).&lt;/p&gt;
+&lt;p&gt;For each item in the queue, we look in the tree's item index to get
+the {node, slot} pair for it, then we do the same in the tree's
+potential-item index. If the potential-item index contains an entry for
+the item's key, then we check if the potential-item's node should "win"
+over the queue item's node, deciding the "winner" using the same routine
+as when building the item index. If the potential-item's node wins, then
+we add the potential node's set of roots to the augment queue. If the
+queue-item's node wins, then we add the item to the settled-item queue
+(except, as an optimization, if the item is of a type that cannot
+possibly imply the existence of another item, then we just drop it and
+don't add it to the settled-item queue).&lt;/p&gt;
+&lt;p&gt;It shouldn't matter what order this queue is processed in, but I sort
+it numerically by treeID and then by item key.&lt;/p&gt;
+&lt;p&gt;This step is fairly fast because it's entirely in-memory, making no
+accesses to disk.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Process items from the settled-item queue (drain the settled-item
+queue, fill the augment queue and the tree queue).&lt;/p&gt;
+&lt;p&gt;This step accesses disk, and so the order we process the queue in
+turns out to be pretty important in order to keep our disk access
+patterns cache-friendly. For the most part, we just sort each queue item
+by tree, then by key. But, we have special handling for
+&lt;code&gt;EXTENT_ITEM&lt;/code&gt;s, &lt;code&gt;METADATA_ITEM&lt;/code&gt;s, and
+&lt;code&gt;EXTENT_DATA_REF&lt;/code&gt; items: We break &lt;code&gt;EXTENT_ITEM&lt;/code&gt;s
+and &lt;code&gt;METADATA_ITEM&lt;/code&gt;s in to "sub-items", treating each ref
+embedded in them as a separate item. For those embedded items that are
+&lt;code&gt;EXTENT_DATA_REF&lt;/code&gt;s, and for stand-alone
+&lt;code&gt;EXTENT_DATA_REF&lt;/code&gt; items, we sort them not with the
+&lt;code&gt;EXTENT_TREE&lt;/code&gt; items, but with the items of the tree that the
+extent data ref points at. Recall that during the intitial scan step, we
+took note of which tree every extent data ref points at, so we can
+perform this sort without accessing disk yet. This splitting does mean
+that we may visit/read an &lt;code&gt;EXTENT_ITEM&lt;/code&gt; or
+&lt;code&gt;METADATA_ITEM&lt;/code&gt; multiple times as we process the queue, but
+to do otherwise is to solve MinLA, which is NP-hard and also an optimal
+MinLA solution I still think would perform worse than this; there is a
+reasonably lengthy discussion of this in a comment in &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n251"&gt;&lt;code&gt;./cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go:sortSettledItemQueue()&lt;/code&gt;&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;Now we loop over that sorted queue. In the code, this loop is
+deceptively simple. Read the item, then pass it to a function that tells
+us what other items are implied by it. That function is large, but
+simple; it's just a giant table. The trick is how it tells us about
+implied items; we give it set of callbacks that it calls to tell us
+these things; the real complexity is in the callbacks. These "graph
+callbacks" will be discussed in detail below, but as an illustrative
+example: It may call &lt;code&gt;.WantOff()&lt;/code&gt; with a tree ID, object ID,
+item type, and offset to specify a precise item that it believes should
+exist.&lt;/p&gt;
+&lt;p&gt;If we encounter a &lt;code&gt;ROOT_ITEM&lt;/code&gt;, add the tree described by
+that item to the tree queue.&lt;/p&gt;&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;(Both the "can this item even imply the existence of another item"
+check and the "what items are implied by this item" routine are in &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/btrfscheck/graph.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./lib/btrfscheck/graph.go&lt;/code&gt;&lt;/a&gt;)&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Apply augments; drain the augment queue (and maybe the retry-item
+queue), fill the added-item queuee.&lt;/p&gt;
+&lt;p&gt;It is at this point that I call out that the augment queue isn't
+implemented as a simple map/set like the others, the
+&lt;code&gt;treeAugmentQueue struct&lt;/code&gt; has special handling for sets of
+different sizes; optimizing the space for empty and len()==1 sized sets,
+and falling back to normal the usual implementation for larger sets;
+this is important because those small sets are the overwhelming
+majority, and otherwise there's no way the program would be able to run
+on my 32GB RAM laptop. Now that I think about it, I bet it would even be
+worth it to add optimized storage for len()==2 sized sets.&lt;/p&gt;
+&lt;p&gt;The reason is that each "want" from above is tracked in the queue
+separately; if we were OK merging them, then this optimized storage
+wouldn't be nescessary. But we keep them separate, so that:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;For all "wants", including ones with empty sets, graph callbacks
+can check if a want has already been processed; avoiding re-doing any
+work (see the description of the graph callbacks below).&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;For "wants" with non-empty sets, we can see how many different
+"wants" could be satisfied with a given root, in order to decide which
+root to choose.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;Anyway, we loop over the trees in the augment queue. For each tree we
+look at that tree's augment queue and look at all the choices of root
+nodes to add (below), and decide on a list to add. The we add each of
+those roots to the tree; the adding of each root triggers several calls
+to our item-added callback (filling the added-item queue), and our
+root-added callback. The root-added callback moves any items from the
+retry-item queue for this tree to the added-item queue.&lt;/p&gt;
+&lt;p&gt;How do we decide between choices of root nodes to add? &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n528"&gt;&lt;code&gt;./cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go:resolveTreeAugments()&lt;/code&gt;&lt;/a&gt;
+has a good comment explaining the criteria we'd like to optimize for,
+and then code that does an OK-ish job of actually optimizing for
+that:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;It loops over the augment queue for that tree, building a list of
+possible roots, for each possible root making note of 3 things:&lt;/p&gt;
+&lt;ol type="a"&gt;
+&lt;li&gt;&lt;p&gt;how many "wants" that root satisfies,&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;how far from treee the root's owner is (owner=tree is a distance
+of 0, owner=tree.parent is a distance of 1, owner=tree.parent.parent is
+a distance of 2, and so on), and&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;what the generation of that root is.&lt;/p&gt;&lt;/li&gt;
+&lt;/ol&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;We sort that list first by highest-count-first, then by
+lowest-distance-first, then by highest-generation-first.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;We create a "return" set and an "illegal" set. We loop over the
+sorted list; for each possible root if it is in the illegal set, we skip
+it, otherwise we insert it into the return set and for each "want" that
+includes this root we all all roots that satisfy that want to the
+illegal list.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;It is important that the rebuilt forrest have the flag set so that it
+refuses to look up a tree if it can't look up all of that tree's
+ancestors; otherwise the potential-items index would be garbage as we
+wouldn't have a good idea of which nodes are OK to consider; but this
+does have the downside that it won't even attempt to improve a tree with
+a missing parent. Perhaps the algorithm should flip the flag once the
+loop terminates, and then re-seed the tree queue with each
+&lt;code&gt;ROOT_ITEM&lt;/code&gt; from the &lt;code&gt;ROOT_TREE&lt;/code&gt;?&lt;/p&gt;
+&lt;h4 id="graph-callbacks"&gt;4.3.3.3. graph callbacks&lt;/h4&gt;
+&lt;p&gt;(see: &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;./cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go&lt;/code&gt;&lt;/a&gt;)&lt;/p&gt;
+&lt;p&gt;The graph callbacks are what tie the above together.&lt;/p&gt;
+&lt;p&gt;For each of these callbacks, whenever I say that it looks up
+something in a tree's item index or potential-item index, that implies
+looking the tree up from the forrest; if the forrest cannot look up that
+tree, then the callback returns early, after either:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;if we are in substep 1 and are processing a tree: we add the tree
+that is being processed to the tree queue. (TODO: Wait, this assumes
+that an augment will be applied to the &lt;code&gt;ROOT_TREE&lt;/code&gt; before the
+next pass... if that isn't the case, this will result in the loop never
+terminating... I guess I need to add a separate retry-tree
+queue?)&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;if we are in substep 2 and are processing an item: we add the
+item that is being processed to the retry-item queue for the tree that
+cannot be looked up&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;The 6 methods in the &lt;code&gt;brfscheck.GraphCallbacks&lt;/code&gt; interface
+are:&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;FSErr()&lt;/code&gt;: There's an error with the filesystem; this
+callback just spits it out on stderr. I say such a trivial matter
+because, again, for a recovery tool I think it's worth putting care in
+to how you handle errors and where you expect them: We expect them here,
+so we have to check for them to avoid reading invalid data or whatever,
+but we don't actually need to do anything other than watch our
+step.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;Want()&lt;/code&gt;: We want an item in a given tree with a given
+object ID and item type, but we don't care about what the item's offset
+is.&lt;/p&gt;
+&lt;p&gt;The callback works by searching the item index to see if it can find
+such an item; if so, it has nothing else to do and returns. Otherwise,
+it searches the potential-item index; for each matching item it finds it
+looks in the node index for the node containing that item, and adds the
+roots that would add that node, and adds those roots to a set. Once it
+has finished searching the potential-item index, it adds that set to the
+augment queue (even if that set is still empty).&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;WantOff()&lt;/code&gt;: The same, but we want a specific
+offset.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;WantDirIndex()&lt;/code&gt;: We want a &lt;code&gt;DIR_INDEX&lt;/code&gt;
+item for a given inode and filename, but we don't know what the offset
+of that item is.&lt;/p&gt;
+&lt;p&gt;First we scan over the item index, looking at all
+&lt;code&gt;DIR_INDEX&lt;/code&gt; items for that inode number. For each item, we
+can check the scan data to see what the filename in that
+&lt;code&gt;DIR_INDEX&lt;/code&gt; is, so we can see if the item satisfies this want
+without accessing the disk. If there's a match, then there is nothing
+else to do, so we return. Otherwise, we do that same search over the
+potential-item index; if we find any matches, then we build the set of
+roots to add to the augment queue the same as in
+&lt;code&gt;Want&lt;/code&gt;.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;WantFileExt()&lt;/code&gt;: We want 1 or more
+&lt;code&gt;DATA_EXTENT&lt;/code&gt; items in the given tree for the given inode,
+and we want them to cover from 0 to a given size bytes of that file.&lt;/p&gt;
+&lt;p&gt;First we walk that range in the item index, to build a list of the
+gaps that we need to fill ("Step 1" in &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n260"&gt;&lt;code&gt;rebuild_wantcb.go:_wantRange()&lt;/code&gt;&lt;/a&gt;).
+This walk (&lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_wantcb.go?id=18e6066c241cf3d252b6521150843ffc858d8434#n195"&gt;&lt;code&gt;rebuild_wantcb.go:_walkRange()&lt;/code&gt;&lt;/a&gt;)
+requires knowing the size of each file extent; so doing this quickly
+without hitting disk is why we recorded the size of each file extent in
+our initialization step.&lt;/p&gt;
+&lt;p&gt;Then ("Step 2" in &lt;code&gt;_wantRange()&lt;/code&gt;) we iterate over each of
+the gaps, and for each gap do a very similar walk (again, by calling
+&lt;code&gt;_walkRange()&lt;/code&gt;, but this time over the potential-item index.
+For each file extent we find that has is entirely within the gap, we
+"want" that extent, and move the beginning of of the gap forward to the
+end of that extent. This algorithm is dumb and greedy, potentially
+making sub-optimal selections; and so could probably stand to be
+improved; but in my real-world use, it seems to be "good
+enough".&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;WantCSum()&lt;/code&gt;: We want 1 or more
+&lt;code&gt;EXTENT_CSUM&lt;/code&gt; items to cover the half-open interval
+[&lt;code&gt;lo_logical_addr&lt;/code&gt;, &lt;code&gt;hi_logical_addr&lt;/code&gt;). Well,
+maybe. It also takes a subvolume ID and an inode number; and looks up in
+the scan data whether that inode has the &lt;code&gt;INODE_NODATASUM&lt;/code&gt;
+flag set; if it does have the flag set, then it returns early without
+looking for any &lt;code&gt;EXTENT_CSUM&lt;/code&gt; items. If it doesn't return
+early, then it performs the same want-range routine as
+&lt;code&gt;WantFileExt&lt;/code&gt;, but with the appropriate tree, object ID, and
+item types for csums as opposed to data extents.&lt;/p&gt;&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;For each of these callbacks, we generate a "wantKey", a tuple
+representing the function and its arguments; we check the augment-queue
+to see if we've already enqueued a set of roots for that want, and if
+so, that callback can return early without checking the potential-item
+index.&lt;/p&gt;
+&lt;h1 id="future-work"&gt;5. Future work&lt;/h1&gt;
+&lt;p&gt;It's in a reasonably useful place, I think; and so now I'm going to
+take a break from it for a while. But there's still lots of work to
+do:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;RAID almost certainly doesn't work.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Encryption is not implemented.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;It doesn't understand (ignores) the &lt;code&gt;TREE_LOG&lt;/code&gt;
+(because I don't understand the &lt;code&gt;TREE_LOG&lt;/code&gt;).&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;btrfs-rec inspect mount&lt;/code&gt; should add "lost+found"
+directories for inodes that are included in the subvolume's tree but
+aren't reachable from the tree's root inode&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;I still need to implement &lt;code&gt;btrfs-rec repair
+&lt;var&gt;SUBCMD&lt;/var&gt;&lt;/code&gt; subcommands to write rebuilt-information from
+&lt;code&gt;btrfs-rec inspect&lt;/code&gt; back to the filesystem.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;I need to figure out the error handling/reporting story for
+&lt;code&gt;mount&lt;/code&gt;.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;It needs a lot more tests&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;I'd like to get the existing btrfs-progs fsck tests to run on
+it.&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;In the process of writing this email, I realized that I probably
+need to add a retry-tree queue; see the "graph callbacks" section in the
+description of the &lt;code&gt;rebuild-trees&lt;/code&gt; algorithm above.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Shere are a number of "TODO" comments or panics in the code:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;Some of them definitely need done.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Some of them are &lt;code&gt;panic("TODO")&lt;/code&gt; on the basis that if
+it's seeing something on the filesystem that it doesn't recognize, it's
+probably that I didn't get to implementing that thing/situation, but
+it's possible that the thing is just corrupt. This should only be for
+situations that the node passed the checksum test, so it being corrupt
+would have to be caused by a bug in btrfs rather than a failing drive or
+other corruption; I wasn't too worried about btrfs bugs.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;btrfs-rec inspect rebuild-trees&lt;/code&gt; is slow, and can
+probably be made a lot faster.&lt;/p&gt;
+&lt;p&gt;Just to give you an idea of the speeds, the run-times for the various
+steps on my ThinkPad E15 for a 256GB disk image are as follows:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt; btrfs-rec inspect rebuild-mappings scan : 7m 31s
+ btrfs-rec inspect rebuild-mappings list-nodes : 47s
+ btrfs-rec inspect rebuild-mappings process : 8m 22s
+ btrfs-rec inspect rebuild-trees : 1h 4m 55s
+ btrfs-rec inspect ls-files : 29m 55s
+ btrfs-rec inspect ls-trees : 8m 40s&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;For the most part, it's all single-threaded (with the main exception
+that in several places I/O has been moved to a separate thread from the
+main CPU-heavy thread), but a lot of the algorithms could be
+parallelized.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;There are a lot of "tunable" values that I haven't really spent
+time tuning. These are all annotated with &lt;a
+href="https://git.lukeshu.com/btrfs-progs-ng/tree/lib/textui/tunable.go?id=18e6066c241cf3d252b6521150843ffc858d8434"&gt;&lt;code&gt;textui.Tunable()&lt;/code&gt;&lt;/a&gt;.
+I sort-of intended for them to be adjustable on the CLI.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Perhaps the &lt;code&gt;btrfs inspect rebuild-trees&lt;/code&gt; algorithm
+could be adjusted to also try to rebuild trees with missing parents; see
+the above discussion of the algorithm.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h1 id="problems-for-merging-this-code-into-btrfs-progs"&gt;6. Problems for
+merging this code into btrfs-progs&lt;/h1&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;It's written in Go, not C.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;It's effectively GPLv3+ (not GPLv2-only or GPLv2+) because of use
+of some code under the Apache 2.0 license (2 files in the codebase
+itself that are based off of Apache-licensed code, and use of unmodified
+3rd-party libraries).&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;It uses ARC (Adaptive Replacement Cache), which is patented by
+IBM, and the patent doesn't expire for another 7 months. An important
+property of ARC over LRU is that it is scan-resistant; the above
+algorithms do a lot of scanning. On that note, now that RedHat is owned
+by IBM: who in the company do we need to get to talk to eachother so
+that we can get ARC into the Linux kernel before then?&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;div style="font-family: monospace"&gt;
+&lt;p&gt;-- &lt;br/&gt; Happy hacking,&lt;br/&gt; ~ Luke Shumaker&lt;br/&gt;&lt;/p&gt;
+&lt;/div&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2023 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./posix-pricing.html"/>
+ <link rel="alternate" type="text/markdown" href="./posix-pricing.md"/>
+ <id>https://lukeshu.com/blog/posix-pricing.html</id>
+ <updated>2018-02-09T00:00:00+00:00</updated>
+ <published>2018-02-09T00:00:00+00:00</published>
+ <title>POSIX pricing and availability; or: Do you really need the PDF?</title>
+ <content type="html">&lt;h1
+id="posix-pricing-and-availability-or-do-you-really-need-the-pdf"&gt;POSIX
+pricing and availability; or: Do you really need the PDF?&lt;/h1&gt;
+&lt;p&gt;The Open Group and IEEE are weird about POSIX pricing. They’re
+protective of the PDF, making you pay &lt;a
+href="http://standards.ieee.org/findstds/standard/1003.1-2008.html"&gt;hundreds
+of dollars&lt;/a&gt; for the PDF; but will happily post an HTML version for
+free both &lt;a
+href="http://pubs.opengroup.org/onlinepubs/9699919799/"&gt;online&lt;/a&gt;, and
+(with free account creation) download as a &lt;a
+href="https://www2.opengroup.org/ogsys/catalog/t101"&gt;a .zip&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;They also offer a special license to the “Linux man-pages” project,
+allowing them to &lt;a
+href="https://www.kernel.org/pub/linux/docs/man-pages/man-pages-posix/"&gt;distribute&lt;/a&gt;
+the man page portions of POSIX (most of it is written as a series of man
+pages) for free; so on a GNU/Linux box, you probably have most of POSIX
+already downloaded in manual sections 0p, 1p, and 3p.&lt;/p&gt;
+&lt;p&gt;Anyway, the only thing you aren’t getting with the free HTML version
+is a line number next to every line of text. It’s generated from the
+same troff sources. So, in an article or in a discussion, I’m not
+cheating you out of specification details by citing the webpage.&lt;/p&gt;
+&lt;p&gt;If you’re concerned that you’re looking at the correct version of the
+webpage or man pages, the current version (as of February 2018) of POSIX
+is “POSIX-2008, 2016 edition.”&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2018 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./kbd-xmodmap.html"/>
+ <link rel="alternate" type="text/markdown" href="./kbd-xmodmap.md"/>
+ <id>https://lukeshu.com/blog/kbd-xmodmap.html</id>
+ <updated>2018-02-09T00:00:00+00:00</updated>
+ <published>2018-02-09T00:00:00+00:00</published>
+ <title>GNU/Linux Keyboard Maps: xmodmap</title>
+ <content type="html">&lt;h1 id="gnulinux-keyboard-maps-xmodmap"&gt;GNU/Linux Keyboard Maps:
+xmodmap&lt;/h1&gt;
+&lt;p&gt;The modmap subsystem is part of the core &lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html"&gt;X11
+protocol&lt;/a&gt;. However, it has been replaced by the &lt;a
+href="https://www.x.org/releases/current/doc/kbproto/xkbproto.html"&gt;X
+Keyboard (XKB) Extension&lt;/a&gt; to the protocol, which defines a facade
+that emulates the legacy modmap subsystem so that old programs still
+work—including those that manipulate the modmap directly!&lt;/p&gt;
+&lt;p&gt;For people who like to Keep It Stupid Simple, the XKB extension looks
+horribly complicated and gross—even ignoring protocol details, the
+configuration syntax is a monstrosity! There’s no way to say something
+like “I’d like to remap Caps-Lock to be Control”, you have to copy and
+edit the entire keyboard definition, which includes mucking with vector
+graphics of the physical keyboard layout! So it’s very tempting to
+pretend that XKB doesn’t exist, and it’s still using modmap.&lt;/p&gt;
+&lt;p&gt;However, this is a leaky abstraction; for instance: when running the
+&lt;code&gt;xmodmap&lt;/code&gt; command to manipulate the modmap, if you have
+multiple keyboards plugged in, the result can depend on which keyboard
+you used to press “enter” after typing the command!&lt;/p&gt;
+&lt;p&gt;Despite only existing as a compatibility shim today, I think it is
+important to understand the modmap subsystem to understand modern
+XKB.&lt;/p&gt;
+&lt;h2 id="conceptual-overview"&gt;Conceptual overview&lt;/h2&gt;
+&lt;p&gt;There are 3 fundamental tasks that the modmap subsystem performs:&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;&lt;code&gt;keyboard: map keycode -&amp;gt; keysym&lt;/code&gt;
+(client-side)&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;keyboard: map keycode -&amp;gt; modifier bitmask&lt;/code&gt;
+(server-side)&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;pointer: map physical button -&amp;gt; logical button&lt;/code&gt;
+(server-side)&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;You’re thinking: “Great, so the X server does these things for us!”
+Nope! Not entirely, anyway. It does the keycode-&amp;gt;modifier lookup, and
+the mouse-button lookup, but the keycode-&amp;gt;keysym lookup must be done
+client-side by querying the mapping stored on the server. Generally,
+this is done automatically inside of libX11/libxcb, and the actual
+client application code doesn’t need to worry about it.&lt;/p&gt;
+&lt;p&gt;So, what’s the difference between a keycode and a keysym, and how’s
+the modifier bitmask work?&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;keycode: A numeric ID for a hardware button; this is as close the
+the hardware as X11 modmaps let us get. These are conceptually identical
+to Linux kernel keycodes, but the numbers don’t match up. Xorg keycodes
+are typically &lt;code&gt;linux_keycode+8&lt;/code&gt;.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;keysym: A 29-bit integer code that is meaningful to applications.
+A mapping of these to symbolic names is defined in
+&lt;code&gt;&amp;lt;X11/keysymdef.h&amp;gt;&lt;/code&gt; and augmented by
+&lt;code&gt;/usr/share/X11/XKeysymDB&lt;/code&gt;. See:
+&lt;code&gt;XStringToKeysym()&lt;/code&gt; and &lt;code&gt;XKeysymToString()&lt;/code&gt;. We
+will generally use the symbolic name in the modmap file. The symbolic
+names are case-sensitive.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Modifier state: An 8-bit bitmask of modifier keys (names are
+case-insensitive):&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;1 &amp;lt;&amp;lt; 0 : shift
+1 &amp;lt;&amp;lt; 1 : lock
+1 &amp;lt;&amp;lt; 2 : control
+1 &amp;lt;&amp;lt; 3 : mod1
+1 &amp;lt;&amp;lt; 4 : mod2
+1 &amp;lt;&amp;lt; 5 : mod3
+1 &amp;lt;&amp;lt; 6 : mod4
+1 &amp;lt;&amp;lt; 7 : mod5&lt;/code&gt;&lt;/pre&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;With that knowledge, and the libX11/libxcb API docs, you can probably
+figure out how to interact with the modmap subsystem from C, but who
+does that? Everyone just uses the &lt;code&gt;xmodmap(1)&lt;/code&gt; command.&lt;/p&gt;
+&lt;h2 id="the-x11-protocol"&gt;The X11 protocol&lt;/h2&gt;
+&lt;p&gt;As I said, the modifier and button lookup is handled server-side;
+each of the &lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#events:input"&gt;input
+events&lt;/a&gt; ({Key,Button}{Press,Release}, and MotionNotify) and &lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#events:pointer_window"&gt;pointer
+window events&lt;/a&gt; ({Enter,Leave}Notify) include a bitmask of active
+keyboard modifiers and pointer buttons. Each are given an 8-bit
+bitmask—hence 8 key modifiers. For some reason, only up to Button5 is
+included in the bitmask; the upper 3 bits are always zero; but the
+Button{Press,Release} events will happily deliver events for up to
+Button255!&lt;/p&gt;
+&lt;p&gt;The X11 protocol has 6 request types for dealing with these 3
+mappings; an accessor and a mutator pair for each. Since the 2 of the
+mappings are done server-side, of these, most clients will only use
+GetKeyboardMapping. Anyway, let’s look at those 6 requests, grouped by
+the mappings that they work with (pardon the Java-like pseudo-code
+syntax for indicating logical argument and return types):&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;keyboard: map keycode -&amp;gt; keysym&lt;/code&gt;&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:GetKeyboardMapping"&gt;GetKeyboardMapping&lt;/a&gt;
+::
+&lt;code&gt;List&amp;lt;keycode&amp;gt; -&amp;gt; Map&amp;lt;keycode,List&amp;lt;keysym&amp;gt;&amp;gt;&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:ChangeKeyboardMapping"&gt;ChangeKeyboardMapping&lt;/a&gt;
+:: &lt;code&gt;Map&amp;lt;keycode,List&amp;lt;keysym&amp;gt;&amp;gt; -&amp;gt; ()&lt;/code&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;&lt;code&gt;GetKeyboardMapping&lt;/code&gt; returns the keycode-&amp;gt;keysym
+mappings for the requested keycodes; this way clients can choose to look
+up only the keycodes that they need to handle (the ones that got sent to
+them). Each keycode gets a list of keysyms; which keysym they should use
+from that list depends on which modifiers are pressed.
+&lt;code&gt;ChangeKeyboardMapping&lt;/code&gt; changes the mapping for the given
+keycodes; not all keycodes must be given, any keycodes that aren’t
+included in the request aren’t changed.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;keyboard: map keycode -&amp;gt; modifier bitmask&lt;/code&gt;&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:GetModifierMapping"&gt;GetModifierMapping&lt;/a&gt;
+:: &lt;code&gt;() -&amp;gt; Map&amp;lt;modifier,List&amp;lt;keycode&amp;gt;&amp;gt;&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:SetModifierMapping"&gt;SetModifierMapping&lt;/a&gt;
+:: &lt;code&gt;Map&amp;lt;modifier,List&amp;lt;keycode&amp;gt;&amp;gt; -&amp;gt; ()&lt;/code&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;The modifiers mapping is a lot smaller than the keysym mapping; you
+must operate on the entire mapping at once. For each modifier bit,
+there’s a list of keycodes that will cause that modifier bit to be
+flipped in the events that are delivered while it is pressed.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;pointer: map physical button -&amp;gt; logical button&lt;/code&gt;&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:GetPointerMapping"&gt;GetPointerMapping&lt;/a&gt;
+&lt;code&gt;() -&amp;gt; List&amp;lt;logicalButton&amp;gt;&lt;/code&gt; (indexed by
+&lt;code&gt;physicalButton-1&lt;/code&gt;)&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:SetPointerMapping"&gt;SetPointerMapping&lt;/a&gt;
+&lt;code&gt;List&amp;lt;logicalButton&amp;gt; -&amp;gt; ()&lt;/code&gt; (indexed by
+&lt;code&gt;physicalButton-1&lt;/code&gt;)&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;Like the modifier mapping, the button mapping is expected to be
+small, most mice only have 5-7 buttons (left, middle, right, scroll up,
+scroll down, scroll left, scroll right—that’s right, X11 handles scroll
+events as button presses), though some fancy gaming mice have more than
+that, but not much more.&lt;/p&gt;&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;I mentioned earlier that the keycode-&amp;gt;keysym mapping isn’t
+actually done by the X server, and is done in the client; whenever a
+client receives a key event or pointer button event, it must do a
+&lt;code&gt;Get*Mapping&lt;/code&gt; request to see what that translates to. Of
+course, doing a that for every keystroke would be crazy; but at the same
+time, the each client is expected to know about changes to the mappings
+that happen at run-time. So, each of the “set”/“change” commands
+generate a &lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#events:MappingNotify"&gt;MappingNotify&lt;/a&gt;
+event that is sent to all clients, so they know when they must dump
+their cache of mappings.&lt;/p&gt;
+&lt;p&gt;For completeness, if you are looking at this as background for
+understanding XKB, I should also mention:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:GetKeyboardControl"&gt;GetKeyboardControl&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:ChangeKeyboardControl"&gt;ChangeKeyboardControl&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:GetPointerControl"&gt;GetPointerControl&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:ChangePointerControl"&gt;ChangePointerControl&lt;/a&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h2 id="the-xmodmap-command"&gt;The &lt;code&gt;xmodmap&lt;/code&gt; command&lt;/h2&gt;
+&lt;p&gt;The &lt;code&gt;xmodmap&lt;/code&gt; command reads a configuration file and
+modifies the maps in the X server to match. The &lt;code&gt;xmodmap&lt;/code&gt;
+config file has its own little quirky syntax. For one, the comment
+character is &lt;code&gt;!&lt;/code&gt; (and comments may only start at the
+&lt;em&gt;beginning&lt;/em&gt; of the line, but that’s fairly common).&lt;/p&gt;
+&lt;p&gt;There are 8 commands that &lt;code&gt;xmodmap&lt;/code&gt; recognizes. Let’s look
+at those, grouped by the 3 tasks that the modmap subsystem performs:&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;keyboard: map keycode -&amp;gt; keysym&lt;/code&gt;&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;keycode KEYCODE = PLAIN [SHIFT [MODE_SWITCH [MODE_SWITCH+SHIFT ]]]&lt;/code&gt;&lt;/p&gt;
+&lt;p&gt;Actually takes a list of up to 8 keysyms, but only the first 4 have
+standard uses.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;keysym OLD_KEYSYM = NEW_KEYSYMS...&lt;/code&gt;&lt;/p&gt;
+&lt;p&gt;Takes the keycodes mapped to &lt;code&gt;OLD_KEYSYM&lt;/code&gt; and maps them to
+&lt;code&gt;NEW_KEYSYM&lt;/code&gt;.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;keysym any = KEYSYMS...&lt;/code&gt;&lt;/p&gt;
+&lt;p&gt;Finds an otherwise unused keycode, and has it map to the specified
+keysyms.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;keyboard: map keycode -&amp;gt; modifier bitmask&lt;/code&gt;&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;clear MODIFIER&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;add MODIFIERNAME = KEYSYMS...&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;remove MODIFIERNAME = KEYSYMS...&lt;/code&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;Wait, the modmap subsystem maps &lt;em&gt;keycodes&lt;/em&gt; to modifiers, but
+the commands take &lt;em&gt;keysyms&lt;/em&gt;? Yup! When executing one of these
+commands, it first looks up those keysyms in the keyboard map to
+translate them in to a set of keycodes, then associates those keycodes
+with that modifier. But how does it look up keysym-&amp;gt;keycode; the
+protocol only supports querying keycode-&amp;gt;keysym? It &lt;a
+href="https://cgit.freedesktop.org/xorg/app/xmodmap/tree/handle.c?h=xmodmap-1.0.9#n59"&gt;loops&lt;/a&gt;
+over &lt;em&gt;every&lt;/em&gt; keycode finding all the matches.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;pointer: map physical button -&amp;gt; logical button&lt;/code&gt;&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;pointer = default&lt;/code&gt;&lt;/p&gt;
+&lt;p&gt;This is equivalent to &lt;code&gt;pointer = 1 2 3 4 5 6...&lt;/code&gt; where the
+list is as long as the number of buttons that there are.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;&lt;code&gt;pointer = NUMBERS...&lt;/code&gt;&lt;/p&gt;
+&lt;p&gt;&lt;code&gt;pointer = A B C D...&lt;/code&gt; sets the physical button 1 to
+logical button A, physical button 2 to logical button B, and so on.
+Setting a physical button to logical button 0 disables that
+button.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;/ol&gt;
+&lt;h2 id="appendix"&gt;Appendix:&lt;/h2&gt;
+&lt;p&gt;I use this snippet in my Emacs configuration to make editing xmodmap
+files nicer:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;;; http://www.emacswiki.org/emacs/XModMapMode
+(when (not (fboundp &amp;#39;xmodmap-mode))
+ (define-generic-mode &amp;#39;xmodmap-mode
+ &amp;#39;(?!)
+ &amp;#39;(&amp;quot;add&amp;quot; &amp;quot;clear&amp;quot; &amp;quot;keycode&amp;quot; &amp;quot;keysym&amp;quot; &amp;quot;pointer&amp;quot; &amp;quot;remove&amp;quot;)
+ nil
+ &amp;#39;(&amp;quot;[xX]modmap\\(rc\\)?\\&amp;#39;&amp;quot;)
+ nil
+ &amp;quot;Simple mode for xmodmap files.&amp;quot;))&lt;/code&gt;&lt;/pre&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2018 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./crt-sh-architecture.html"/>
+ <link rel="alternate" type="text/markdown" href="./crt-sh-architecture.md"/>
+ <id>https://lukeshu.com/blog/crt-sh-architecture.html</id>
+ <updated>2018-02-09T00:00:00+00:00</updated>
+ <published>2018-02-09T00:00:00+00:00</published>
+ <title>The interesting architecture of crt.sh</title>
+ <content type="html">&lt;h1 id="the-interesting-architecture-of-crt.sh"&gt;The interesting
+architecture of crt.sh&lt;/h1&gt;
+&lt;p&gt;A while back I wrote myself a little dashboard for monitoring TLS
+certificates for my domains. Right now it works by talking to &lt;a
+href="https://crt.sh/" class="uri"&gt;https://crt.sh/&lt;/a&gt;. Sometimes this
+works great, but sometimes crt.sh is really slow. Plus, it’s another
+thing that could be compromised.&lt;/p&gt;
+&lt;p&gt;So, I started looking at how crt.sh works. It’s kinda cool.&lt;/p&gt;
+&lt;p&gt;There are only 3 separate processes:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;Cron
+&lt;ul&gt;
+&lt;li&gt;&lt;a
+href="https://github.com/crtsh/ct_monitor"&gt;&lt;code&gt;ct_monitor&lt;/code&gt;&lt;/a&gt;
+is program that uses libcurl to get CT log changes and libpq to put them
+into the database.&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;PostgreSQL
+&lt;ul&gt;
+&lt;li&gt;&lt;a
+href="https://github.com/crtsh/certwatch_db"&gt;&lt;code&gt;certwatch_db&lt;/code&gt;&lt;/a&gt;
+is the core web application, written in PL/pgSQL. It even includes the
+HTML templating and query parameter handling. Of course, there are a
+couple of things not entirely done in pgSQL…&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://github.com/crtsh/libx509pq"&gt;&lt;code&gt;libx509pq&lt;/code&gt;&lt;/a&gt;
+adds a set of &lt;code&gt;x509_*&lt;/code&gt; functions callable from pgSQL for
+parsing X509 certificates.&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://github.com/crtsh/libcablintpq"&gt;&lt;code&gt;libcablintpq&lt;/code&gt;&lt;/a&gt;
+adds the &lt;code&gt;cablint_embedded(bytea)&lt;/code&gt; function to pgSQL.&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://github.com/crtsh/libx509lintpq"&gt;&lt;code&gt;libx509lintpq&lt;/code&gt;&lt;/a&gt;
+adds the &lt;code&gt;x509lint_embedded(bytea,integer)&lt;/code&gt; function to
+pgSQL.&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;Apache HTTPD
+&lt;ul&gt;
+&lt;li&gt;&lt;a
+href="https://github.com/crtsh/mod_certwatch"&gt;&lt;code&gt;mod_certwatch&lt;/code&gt;&lt;/a&gt;
+is a pretty thin wrapper that turns every HTTP request into an SQL
+statement sent to PostgreSQL, via…&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://github.com/crtsh/mod_pgconn"&gt;&lt;code&gt;mod_pgconn&lt;/code&gt;&lt;/a&gt;,
+which manages PostgreSQL connections.&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;The interface exposes HTML, ATOM, and JSON. All from code written in
+SQL.&lt;/p&gt;
+&lt;p&gt;And then I guess it’s behind an nginx-based load-balancer or somesuch
+(based on the 504 Gateway Timout messages it’s given me). But that’s not
+interesting.&lt;/p&gt;
+&lt;p&gt;The actual website is &lt;a
+href="https://groups.google.com/d/msg/mozilla.dev.security.policy/EPv_u9V06n0/gPJY5T7ILlQJ"&gt;run
+from a read-only slave&lt;/a&gt; of the master DB that the
+&lt;code&gt;ct_monitor&lt;/code&gt; cron-job updates; which makes several security
+considerations go away, and makes horizontal scaling easy.&lt;/p&gt;
+&lt;p&gt;Anyway, I thought it was neat that so much of it runs inside the
+database; you don’t see that terribly often. I also thought the little
+shims to make that possible were neat. I didn’t get deep enough in to it
+to end up running my own instance or clone, but I thought my notes on it
+were worth sharing.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2018 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./http-notes.html"/>
+ <link rel="alternate" type="text/markdown" href="./http-notes.md"/>
+ <id>https://lukeshu.com/blog/http-notes.html</id>
+ <updated>2016-09-30T00:00:00+00:00</updated>
+ <published>2016-09-30T00:00:00+00:00</published>
+ <title>Notes on subtleties of HTTP implementation</title>
+ <content type="html">&lt;h1 id="notes-on-subtleties-of-http-implementation"&gt;Notes on subtleties
+of HTTP implementation&lt;/h1&gt;
+&lt;p&gt;I may add to this as time goes on, but I’ve written up some notes on
+subtleties HTTP/1.1 message syntax as specified in RFC 2730.&lt;/p&gt;
+&lt;h2 id="why-the-absolute-form-is-used-for-proxy-requests"&gt;Why the
+absolute-form is used for proxy requests&lt;/h2&gt;
+&lt;p&gt;&lt;a
+href="https://tools.ietf.org/html/rfc7230#section-5.3.2"&gt;RFC7230§5.3.2&lt;/a&gt;
+says that a (non-CONNECT) request to an HTTP proxy should look like&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;GET http://authority/path HTTP/1.1&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;rather than the usual&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;GET /path HTTP/1.1
+Host: authority&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;And doesn’t give a hint as to why the message syntax is different
+here.&lt;/p&gt;
+&lt;p&gt;&lt;a
+href="https://parsiya.net/blog/2016-07-28-thick-client-proxying---part-6-how-https-proxies-work/#3-1-1-why-not-use-the-host-header"&gt;A
+blog post by Parsia Hakimian&lt;/a&gt; claims that the reason is that it’s a
+legacy behavior inherited from HTTP/1.0, which had proxies, but not the
+Host header field. Which is mostly true. But we can also realize that
+the usual syntax does not allow specifying a URI scheme, which means
+that we cannot specify a transport. Sure, the only two HTTP transports
+we might expect to use today are TCP (scheme: http) and TLS (scheme:
+https), and TLS requires we use a CONNECT request to the proxy, meaning
+that the only option left is a TCP transport; but that is no reason to
+avoid building generality into the protocol.&lt;/p&gt;
+&lt;h2 id="on-taking-short-cuts-based-on-early-header-field-values"&gt;On
+taking short-cuts based on early header field values&lt;/h2&gt;
+&lt;p&gt;&lt;a
+href="https://tools.ietf.org/html/rfc7230#section-3.2.2"&gt;RFC7230§3.2.2&lt;/a&gt;
+says:&lt;/p&gt;
+&lt;blockquote&gt;
+&lt;pre&gt;&lt;code&gt;The order in which header fields with differing field names are
+received is not significant. However, it is good practice to send
+header fields that contain control data first, such as Host on
+requests and Date on responses, so that implementations can decide
+when not to handle a message as early as possible.&lt;/code&gt;&lt;/pre&gt;
+&lt;/blockquote&gt;
+&lt;p&gt;Which is great! We can make an optimization!&lt;/p&gt;
+&lt;p&gt;This is only a valid optimization for deciding to &lt;em&gt;not handle&lt;/em&gt;
+a message. You cannot use it to decide to route to a backend early based
+on this. Part of the reason is that &lt;a
+href="https://tools.ietf.org/html/rfc7230#section-5.4"&gt;§5.4&lt;/a&gt; tells us
+we must inspect the entire header field set to know if we need to
+respond with a 400 status code:&lt;/p&gt;
+&lt;blockquote&gt;
+&lt;pre&gt;&lt;code&gt;A server MUST respond with a 400 (Bad Request) status code to any
+HTTP/1.1 request message that lacks a Host header field and to any
+request message that contains more than one Host header field or a
+Host header field with an invalid field-value.&lt;/code&gt;&lt;/pre&gt;
+&lt;/blockquote&gt;
+&lt;p&gt;However, if I decide not to handle a request based on the Host header
+field, the correct thing to do is to send a 404 status code. Which
+implies that I have parsed the remainder of the header field set to
+validate the message syntax. We need to parse the entire field-set to
+know if we need to send a 400 or a 404. Did this just kill the
+possibility of using the optimization?&lt;/p&gt;
+&lt;p&gt;Well, there are a number of “A server MUST respond with a XXX code
+if” rules that can all be triggered on the same request. So we get to
+choose which to use. And fortunately for optimizing implementations, &lt;a
+href="https://tools.ietf.org/html/rfc7230#section-3.2.5"&gt;§3.2.5&lt;/a&gt; gave
+us:&lt;/p&gt;
+&lt;blockquote&gt;
+&lt;pre&gt;&lt;code&gt;A server that receives a ... set of fields,
+larger than it wishes to process MUST respond with an appropriate 4xx
+(Client Error) status code.&lt;/code&gt;&lt;/pre&gt;
+&lt;/blockquote&gt;
+&lt;p&gt;Since the header field set is longer than we want to process (since
+we want to short-cut processing), we are free to respond with whichever
+4XX status code we like!&lt;/p&gt;
+&lt;h2 id="on-normalizing-target-uris"&gt;On normalizing target URIs&lt;/h2&gt;
+&lt;p&gt;An implementer is tempted to normalize URIs all over the place, just
+for safety and sanitation. After all, &lt;a
+href="https://tools.ietf.org/html/rfc3986#section-6.1"&gt;RFC3986§6.1&lt;/a&gt;
+says it’s safe!&lt;/p&gt;
+&lt;p&gt;Unfortunately, most URI normalization implementations will normalize
+an empty path to “/”. Which is not always safe; &lt;a
+href="https://tools.ietf.org/html/rfc7230#section-2.7.3"&gt;RFC7230§2.7.3&lt;/a&gt;,
+which defines this “equivalence”, actually says:&lt;/p&gt;
+&lt;blockquote&gt;
+&lt;pre&gt;&lt;code&gt; When not being used in
+absolute form as the request target of an OPTIONS request, an empty
+path component is equivalent to an absolute path of &amp;quot;/&amp;quot;, so the
+normal form is to provide a path of &amp;quot;/&amp;quot; instead.&lt;/code&gt;&lt;/pre&gt;
+&lt;/blockquote&gt;
+&lt;p&gt;Which means we can’t use the usual normalization implementation if we
+are making an OPTIONS request!&lt;/p&gt;
+&lt;p&gt;Why is that? Well, if we turn to &lt;a
+href="https://tools.ietf.org/html/rfc7230#section-5.3.4"&gt;§5.3.4&lt;/a&gt;, we
+find the answer. One of the special cases for when the request target is
+not a URI, is that we may use “*” as the target for an OPTIONS request
+to request information about the origin server itself, rather than a
+resource on that server.&lt;/p&gt;
+&lt;p&gt;However, as discussed above, the target in a request to a proxy must
+be an absolute URI (and &lt;a
+href="https://tools.ietf.org/html/rfc7230#section-5.3.2"&gt;§5.3.2&lt;/a&gt; says
+that the origin server must also understand this syntax). So, we must
+define a way to map “*” to an absolute URI.&lt;/p&gt;
+&lt;p&gt;Naively, one might be tempted to use “/*” as the path. But that would
+make it impossible to have a resource actually named “/*”. So, we must
+define a special case in the URI syntax that doesn’t obstruct a real
+path.&lt;/p&gt;
+&lt;p&gt;If we didn’t have this special case in the URI normalization rules,
+and we handled the “/” path as the same as empty in the OPTIONS handler
+of the last proxy server, then it would be impossible to request OPTIONS
+for the “/” resources, as it would get translated into “*” and treated
+as OPTIONS for the entire server.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2016 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./x11-systemd.html"/>
+ <link rel="alternate" type="text/markdown" href="./x11-systemd.md"/>
+ <id>https://lukeshu.com/blog/x11-systemd.html</id>
+ <updated>2016-02-28T00:00:00+00:00</updated>
+ <published>2016-02-28T00:00:00+00:00</published>
+ <title>My X11 setup with systemd</title>
+ <content type="html">&lt;h1 id="my-x11-setup-with-systemd"&gt;My X11 setup with systemd&lt;/h1&gt;
+&lt;p&gt;Somewhere along the way, I decided to use systemd user sessions to
+manage the various parts of my X11 environment would be a good idea. If
+that was a good idea or not… we’ll see.&lt;/p&gt;
+&lt;p&gt;I’ve sort-of been running this setup as my daily-driver for &lt;a
+href="https://lukeshu.com/git/dotfiles.git/commit/?id=a9935b7a12a522937d91cb44a0e138132b555e16"&gt;a
+bit over a year&lt;/a&gt;, continually tweaking it though.&lt;/p&gt;
+&lt;p&gt;My setup is substantially different than the one on &lt;a
+href="https://wiki.archlinux.org/index.php/Systemd/User"&gt;ArchWiki&lt;/a&gt;,
+because the ArchWiki solution assumes that there is only ever one X
+server for a user; I like the ability to run &lt;code&gt;Xorg&lt;/code&gt; on my
+real monitor, and also have &lt;code&gt;Xvnc&lt;/code&gt; running headless, or start
+my desktop environment on a remote X server. Though, I would like to
+figure out how to use systemd socket activation for the X server, as the
+ArchWiki solution does.&lt;/p&gt;
+&lt;p&gt;This means that all of my graphical units take &lt;code&gt;DISPLAY&lt;/code&gt;
+as an &lt;code&gt;@&lt;/code&gt; argument. To get this to all work out, this goes in
+each &lt;code&gt;.service&lt;/code&gt; file, unless otherwise noted:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;[Unit]
+After=X11@%i.target
+Requisite=X11@%i.target
+[Service]
+Environment=DISPLAY=%I&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;We’ll get to &lt;code&gt;X11@.target&lt;/code&gt; later, what it says is “I
+should only be running if X11 is running”.&lt;/p&gt;
+&lt;p&gt;I eschew complex XDMs or &lt;code&gt;startx&lt;/code&gt; wrapper scripts, opting
+for the more simple &lt;code&gt;xinit&lt;/code&gt;, which I either run on login for
+some boxes (my media station), or type &lt;code&gt;xinit&lt;/code&gt; when I want
+X11 on others (most everything else). Essentially, what
+&lt;code&gt;xinit&lt;/code&gt; does is run &lt;code&gt;~/.xserverrc&lt;/code&gt; (or
+&lt;code&gt;/etc/X11/xinit/xserverrc&lt;/code&gt;) to start the server, then once
+the server is started (which it takes a substantial amount of magic to
+detect) it runs run &lt;code&gt;~/.xinitrc&lt;/code&gt; (or
+&lt;code&gt;/etc/X11/xinit/xinitrc&lt;/code&gt;) to start the clients. Once
+&lt;code&gt;.xinitrc&lt;/code&gt; finishes running, it stops the X server and exits.
+Now, when I say “run”, I don’t mean execute, it passes each file to the
+system shell (&lt;code&gt;/bin/sh&lt;/code&gt;) as input.&lt;/p&gt;
+&lt;p&gt;Xorg requires a TTY to run on; if we log in to a TTY with
+&lt;code&gt;logind&lt;/code&gt;, it will give us the &lt;code&gt;XDG_VTNR&lt;/code&gt; variable
+to tell us which one we have, so I pass this to &lt;code&gt;X&lt;/code&gt; in &lt;a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/X11/serverrc"&gt;my
+&lt;code&gt;.xserverrc&lt;/code&gt;&lt;/a&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;#!/hint/sh
+if [ -z &amp;quot;$XDG_VTNR&amp;quot; ]; then
+ exec /usr/bin/X -nolisten tcp &amp;quot;$@&amp;quot;
+else
+ exec /usr/bin/X -nolisten tcp &amp;quot;$@&amp;quot; vt$XDG_VTNR
+fi&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;This was the default for &lt;a
+href="https://projects.archlinux.org/svntogit/packages.git/commit/trunk/xserverrc?h=packages/xorg-xinit&amp;amp;id=f9f5de58df03aae6c8a8c8231a83327d19b943a1"&gt;a
+while&lt;/a&gt; in Arch, to support &lt;code&gt;logind&lt;/code&gt;, but was &lt;a
+href="https://projects.archlinux.org/svntogit/packages.git/commit/trunk/xserverrc?h=packages/xorg-xinit&amp;amp;id=5a163ddd5dae300e7da4b027e28c37ad3b535804"&gt;later
+removed&lt;/a&gt; in part because &lt;code&gt;startx&lt;/code&gt; (which calls
+&lt;code&gt;xinit&lt;/code&gt;) started adding it as an argument as well, so
+&lt;code&gt;vt$XDG_VTNR&lt;/code&gt; was being listed as an argument twice, which is
+an error. IMO, that was a problem in &lt;code&gt;startx&lt;/code&gt;, and they
+shouldn’t have removed it from the default system
+&lt;code&gt;xserverrc&lt;/code&gt;, but that’s just me. So I copy/pasted it into my
+user &lt;code&gt;xserverrc&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;That’s the boring part, though. Where the magic starts happening is
+in &lt;a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/X11/clientrc"&gt;my
+&lt;code&gt;.xinitrc&lt;/code&gt;&lt;/a&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;#!/hint/sh
+
+if [ -z &amp;quot;$XDG_RUNTIME_DIR&amp;quot; ]; then
+ printf &amp;quot;XDG_RUNTIME_DIR isn&amp;#39;t set\n&amp;quot; &amp;gt;&amp;amp;2
+ exit 6
+fi
+
+_DISPLAY=&amp;quot;$(systemd-escape -- &amp;quot;$DISPLAY&amp;quot;)&amp;quot;
+trap &amp;quot;rm -f $(printf &amp;#39;%q&amp;#39; &amp;quot;${XDG_RUNTIME_DIR}/x11-wm@${_DISPLAY}&amp;quot;)&amp;quot; EXIT
+mkfifo &amp;quot;${XDG_RUNTIME_DIR}/x11-wm@${_DISPLAY}&amp;quot;
+
+cat &amp;lt; &amp;quot;${XDG_RUNTIME_DIR}/x11-wm@${_DISPLAY}&amp;quot; &amp;amp;
+systemctl --user start &amp;quot;X11@${_DISPLAY}.target&amp;quot; &amp;amp;
+wait
+systemctl --user stop &amp;quot;X11@${_DISPLAY}.target&amp;quot;&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;There are two contracts/interfaces here: the
+&lt;code&gt;X11@DISPLAY.target&lt;/code&gt; systemd target, and the
+&lt;code&gt;${XDG_RUNTIME_DIR}/x11-wm@DISPLAY&lt;/code&gt; named pipe. The systemd
+&lt;code&gt;.target&lt;/code&gt; should be pretty self explanatory; the most
+important part is that it starts the window manager. The named pipe is
+just a hacky way of blocking until the window manager exits
+(“traditional” &lt;code&gt;.xinitrc&lt;/code&gt; files end with the line
+&lt;code&gt;exec your-window-manager&lt;/code&gt;, so this mimics that behavior). It
+works by assuming that the window manager will open the pipe at startup,
+and keep it open (without necessarily writing anything to it); when the
+window manager exits, the pipe will get closed, sending EOF to the
+&lt;code&gt;wait&lt;/code&gt;ed-for &lt;code&gt;cat&lt;/code&gt;, allowing it to exit, letting
+the script resume. The window manager (WMII) is made to have the pipe
+opened by executing it this way in &lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/wmii@.service"&gt;its
+&lt;code&gt;.service&lt;/code&gt; file&lt;/a&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;ExecStart=/usr/bin/env bash -c &amp;#39;exec 8&amp;gt;${XDG_RUNTIME_DIR}/x11-wm@%I; exec wmii&amp;#39;&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;which just opens the file on file descriptor 8, then launches the
+window manager normally. The only further logic required by the window
+manager with regard to the pipe is that in the window manager &lt;a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/wmii-hg/config.sh"&gt;configuration&lt;/a&gt;,
+I should close that file descriptor after forking any process that isn’t
+“part of” the window manager:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;runcmd() (
+ ...
+ exec 8&amp;gt;&amp;amp;- # xinit/systemd handshake
+ ...
+)&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;So, back to the &lt;code&gt;X11@DISPLAY.target&lt;/code&gt;; I configure what it
+“does” with symlinks in the &lt;code&gt;.requires&lt;/code&gt; and
+&lt;code&gt;.wants&lt;/code&gt; directories:&lt;/p&gt;
+&lt;ul class="tree"&gt;
+&lt;li&gt;
+&lt;p&gt;&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user"&gt;.config/systemd/user/&lt;/a&gt;&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/X11@.target"&gt;X11@.target&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/X11@.target.requires"&gt;X11@.target.requires&lt;/a&gt;/
+&lt;ul&gt;
+&lt;li&gt;wmii@.service -&amp;gt; ../&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/wmii@.service"&gt;wmii@.service&lt;/a&gt;&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/X11@.target.wants"&gt;X11@.target.wants&lt;/a&gt;/
+&lt;ul&gt;
+&lt;li&gt;xmodmap@.service -&amp;gt; ../&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xmodmap@.service"&gt;xmodmap@.service&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;xresources-dpi@.service -&amp;gt; ../&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xresources-dpi@.service"&gt;xresources-dpi@.service&lt;/a&gt;&lt;/li&gt;
+&lt;li&gt;xresources@.service -&amp;gt; ../&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xresources@.service"&gt;xresources@.service&lt;/a&gt;&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;The &lt;code&gt;.requires&lt;/code&gt; directory is how I configure which window
+manager it starts. This would allow me to configure different window
+managers on different displays, by creating a &lt;code&gt;.requires&lt;/code&gt;
+directory with the &lt;code&gt;DISPLAY&lt;/code&gt; included,
+e.g. &lt;code&gt;X11@:2.requires&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;The &lt;code&gt;.wants&lt;/code&gt; directory is for general X display setup;
+it’s analogous to &lt;code&gt;/etc/X11/xinit/xinitrc.d/&lt;/code&gt;. All of the
+files in it are simple &lt;code&gt;Type=oneshot&lt;/code&gt; service files. The &lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xmodmap@.service"&gt;xmodmap&lt;/a&gt;
+and &lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xresources@.service"&gt;xresources&lt;/a&gt;
+files are pretty boring, they’re just systemd versions of the couple
+lines that just about every traditional &lt;code&gt;.xinitrc&lt;/code&gt; contains,
+the biggest difference being that they look at &lt;a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/X11/modmap"&gt;&lt;code&gt;~/.config/X11/modmap&lt;/code&gt;&lt;/a&gt;
+and &lt;a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/X11/resources"&gt;&lt;code&gt;~/.config/X11/resources&lt;/code&gt;&lt;/a&gt;
+instead of the traditional locations &lt;code&gt;~/.xmodmap&lt;/code&gt; and
+&lt;code&gt;~/.Xresources&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;What’s possibly of note is &lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xresources-dpi@.service"&gt;&lt;code&gt;xresources-dpi@.service&lt;/code&gt;&lt;/a&gt;.
+In X11, there are two sources of DPI information, the X display
+resolution, and the XRDB &lt;code&gt;Xft.dpi&lt;/code&gt; setting. It isn’t defined
+which takes precedence (to my knowledge), and even if it were (is),
+application authors wouldn’t be arsed to actually do the right thing.
+For years, Firefox (well, Iceweasel) happily listened to the X display
+resolution, but recently it decided to only look at
+&lt;code&gt;Xft.dpi&lt;/code&gt;, which objectively seems a little silly, since the
+X display resolution is always present, but &lt;code&gt;Xft.dpi&lt;/code&gt; isn’t.
+Anyway, Mozilla’s change drove me to to create a &lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.local/bin/xrdb-set-dpi"&gt;script&lt;/a&gt;
+to make the &lt;code&gt;Xft.dpi&lt;/code&gt; setting match the X display resolution.
+Disclaimer: I have no idea if it works if the X server has multiple
+displays (with possibly varying resolution).&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;#!/usr/bin/env bash
+dpi=$(LC_ALL=C xdpyinfo|sed -rn &amp;#39;s/^\s*resolution:\s*(.*) dots per inch$/\1/p&amp;#39;)
+xrdb -merge &amp;lt;&amp;lt;&amp;lt;&amp;quot;Xft.dpi: ${dpi}&amp;quot;&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Since we want XRDB to be set up before any other programs launch, we
+give both of the &lt;code&gt;xresources&lt;/code&gt; units
+&lt;code&gt;Before=X11@%i.target&lt;/code&gt; (instead of &lt;code&gt;After=&lt;/code&gt; like
+everything else). Also, two programs writing to &lt;code&gt;xrdb&lt;/code&gt; at the
+same time has the same problem as two programs writing to the same file;
+one might trash the other’s changes. So, I stuck
+&lt;code&gt;Conflicts=xresources@:i.service&lt;/code&gt; into
+&lt;code&gt;xresources-dpi.service&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;And that’s the “core” of my X11 systemd setup. But, you generally
+want more things running than just the window manager, like a desktop
+notification daemon, a system panel, and an X composition manager
+(unless your window manager is bloated and has a composition manager
+built in). Since these things are probably window-manager specific, I’ve
+stuck them in a directory &lt;code&gt;wmii@.service.wants&lt;/code&gt;:&lt;/p&gt;
+&lt;ul class="tree"&gt;
+&lt;li&gt;
+&lt;p&gt;&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user"&gt;.config/systemd/user/&lt;/a&gt;&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/wmii@.service.wants"&gt;wmii@.service.wants&lt;/a&gt;/
+&lt;ul&gt;
+&lt;li&gt;dunst@.service -&amp;gt; ../&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/dunst@.service"&gt;dunst@.service&lt;/a&gt;       
+# a notification daemon&lt;/li&gt;
+&lt;li&gt;lxpanel@.service -&amp;gt; ../&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/lxpanel@.service"&gt;lxpanel@.service&lt;/a&gt;   
+# a system panel&lt;/li&gt;
+&lt;li&gt;rbar@97_acpi.service -&amp;gt; ../&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/rbar@.service"&gt;rbar@.service&lt;/a&gt;  
+# wmii stuff&lt;/li&gt;
+&lt;li&gt;rbar@99_clock.service -&amp;gt; ../&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/rbar@.service"&gt;rbar@.service&lt;/a&gt; 
+# wmii stuff&lt;/li&gt;
+&lt;li&gt;xcompmgr@.service -&amp;gt; ../&lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xcompmgr@.service"&gt;xcompmgr@.service&lt;/a&gt; 
+# an X composition manager&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;For the window manager &lt;code&gt;.service&lt;/code&gt;, I &lt;em&gt;could&lt;/em&gt; just
+say &lt;code&gt;Type=simple&lt;/code&gt; and call it a day (and I did for a while).
+But, I like to have &lt;code&gt;lxpanel&lt;/code&gt; show up on all of my WMII tags
+(desktops), so I have &lt;a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/wmii-hg/config.sh"&gt;my
+WMII configuration&lt;/a&gt; stick this in the WMII &lt;a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/wmii-hg/rules"&gt;&lt;code&gt;/rules&lt;/code&gt;&lt;/a&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;/panel/ tags=/.*/ floating=always&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Unfortunately, for this to work, &lt;code&gt;lxpanel&lt;/code&gt; must be started
+&lt;em&gt;after&lt;/em&gt; that gets inserted into WMII’s rules. That wasn’t a
+problem pre-systemd, because &lt;code&gt;lxpanel&lt;/code&gt; was started by my WMII
+configuration, so ordering was simple. For systemd to get this right, I
+must have a way of notifying systemd that WMII’s fully started, and it’s
+safe to start &lt;code&gt;lxpanel&lt;/code&gt;. So, I stuck this in &lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/wmii@.service"&gt;my
+WMII &lt;code&gt;.service&lt;/code&gt; file&lt;/a&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;# This assumes that you write READY=1 to $NOTIFY_SOCKET in wmiirc
+Type=notify
+NotifyAccess=all&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;and this in &lt;a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/wmii-hg/wmiirc"&gt;my
+WMII configuration&lt;/a&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;systemd-notify --ready || true&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Now, this setup means that &lt;code&gt;NOTIFY_SOCKET&lt;/code&gt; is set for all
+the children of &lt;code&gt;wmii&lt;/code&gt;; I’d rather not have it leak into the
+applications that I start from the window manager, so I also stuck
+&lt;code&gt;unset NOTIFY_SOCKET&lt;/code&gt; after forking a process that isn’t part
+of the window manager:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;runcmd() (
+ ...
+ unset NOTIFY_SOCKET # systemd
+ ...
+ exec 8&amp;gt;&amp;amp;- # xinit/systemd handshake
+ ...
+)&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Unfortunately, because of a couple of &lt;a
+href="https://github.com/systemd/systemd/issues/2739"&gt;bugs&lt;/a&gt; and &lt;a
+href="https://github.com/systemd/systemd/issues/2737"&gt;race
+conditions&lt;/a&gt; in systemd, &lt;code&gt;systemd-notify&lt;/code&gt; isn’t reliable.
+If systemd can’t receive the &lt;code&gt;READY=1&lt;/code&gt; signal from my WMII
+configuration, there are two consequences:&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;&lt;code&gt;lxpanel&lt;/code&gt; will never start, because it will always be
+waiting for &lt;code&gt;wmii&lt;/code&gt; to be ready, which will never happen.&lt;/li&gt;
+&lt;li&gt;After a couple of minutes, systemd will consider &lt;code&gt;wmii&lt;/code&gt;
+to be timed out, which is a failure, so then it will kill
+&lt;code&gt;wmii&lt;/code&gt;, and exit my X11 session. That’s no good!&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;Using &lt;code&gt;socat&lt;/code&gt; to send the message to systemd instead of
+&lt;code&gt;systemd-notify&lt;/code&gt; “should” always work, because it tries to
+read from both ends of the bi-directional stream, and I can’t imagine
+that getting EOF from the &lt;code&gt;UNIX-SENDTO&lt;/code&gt; end will ever be
+faster than the systemd manager from handling the datagram that got
+sent. Which is to say, “we work around the race condition by being slow
+and shitty.”&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;socat STDIO UNIX-SENDTO:&amp;quot;$NOTIFY_SOCKET&amp;quot; &amp;lt;&amp;lt;&amp;lt;READY=1 || true&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;But, I don’t like that. I’d rather write my WMII configuration to the
+world as I wish it existed, and have workarounds encapsulated elsewhere;
+&lt;a
+href="http://blog.robertelder.org/interfaces-most-important-software-engineering-concept/"&gt;“If
+you have to cut corners in your project, do it inside the
+implementation, and wrap a very good interface around it.”&lt;/a&gt;. So, I
+wrote a &lt;code&gt;systemd-notify&lt;/code&gt; compatible &lt;a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/wmii-hg/workarounds.sh"&gt;function&lt;/a&gt;
+that ultimately calls &lt;code&gt;socat&lt;/code&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;##
+# Just like systemd-notify(1), but slower, which is a shitty
+# workaround for a race condition in systemd.
+##
+systemd-notify() {
+ local args
+ args=&amp;quot;$(getopt -n systemd-notify -o h -l help,version,ready,pid::,status:,booted -- &amp;quot;$@&amp;quot;)&amp;quot;
+ ret=$?; [[ $ret == 0 ]] || return $ret
+ eval set -- &amp;quot;$args&amp;quot;
+
+ local arg_ready=false
+ local arg_pid=0
+ local arg_status=
+ while [[ $# -gt 0 ]]; do
+ case &amp;quot;$1&amp;quot; in
+ -h|--help) command systemd-notify --help; return $?;;
+ --version) command systemd-notify --version; return $?;;
+ --ready) arg_ready=true; shift 1;;
+ --pid) arg_pid=${2:-$$}; shift 2;;
+ --status) arg_status=$2; shift 2;;
+ --booted) command systemd-notify --booted; return $?;;
+ --) shift 1; break;;
+ esac
+ done
+
+ local our_env=()
+ if $arg_ready; then
+ our_env+=(&amp;quot;READY=1&amp;quot;)
+ fi
+ if [[ -n &amp;quot;$arg_status&amp;quot; ]]; then
+ our_env+=(&amp;quot;STATUS=$arg_status&amp;quot;)
+ fi
+ if [[ &amp;quot;$arg_pid&amp;quot; -gt 0 ]]; then
+ our_env+=(&amp;quot;MAINPID=$arg_pid&amp;quot;)
+ fi
+ our_env+=(&amp;quot;$@&amp;quot;)
+ local n
+ printf -v n &amp;#39;%s\n&amp;#39; &amp;quot;${our_env[@]}&amp;quot;
+ socat STDIO UNIX-SENDTO:&amp;quot;$NOTIFY_SOCKET&amp;quot; &amp;lt;&amp;lt;&amp;lt;&amp;quot;$n&amp;quot;
+}&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;So, one day when the systemd bugs have been fixed (and presumably the
+Linux kernel supports passing the cgroup of a process as part of its
+credentials), I can remove that from &lt;code&gt;workarounds.sh&lt;/code&gt;, and
+not have to touch anything else in my WMII configuration (I do use
+&lt;code&gt;systemd-notify&lt;/code&gt; in a couple of other, non-essential, places
+too; this wasn’t to avoid having to change just 1 line).&lt;/p&gt;
+&lt;p&gt;So, now that &lt;code&gt;wmii@.service&lt;/code&gt; properly has
+&lt;code&gt;Type=notify&lt;/code&gt;, I can just stick
+&lt;code&gt;After=wmii@.service&lt;/code&gt; into my &lt;code&gt;lxpanel@.service&lt;/code&gt;,
+right? Wrong! Well, I &lt;em&gt;could&lt;/em&gt;, but my &lt;code&gt;lxpanel&lt;/code&gt;
+service has nothing to do with WMII; why should I couple them? Instead,
+I create &lt;a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/wm-running@.target"&gt;&lt;code&gt;wm-running@.target&lt;/code&gt;&lt;/a&gt;
+that can be used as a synchronization point:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;# wmii@.service
+Before=wm-running@%i.target
+
+# lxpanel@.service
+After=X11@%i.target wm-running@%i.target
+Requires=wm-running@%i.target&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Finally, I have my desktop started and running. Now, I’d like for
+programs that aren’t part of the window manager to not dump their stdout
+and stderr into WMII’s part of the journal, like to have a record of
+which graphical programs crashed, and like to have a prettier
+cgroup/process graph. So, I use &lt;code&gt;systemd-run&lt;/code&gt; to run external
+programs from the window manager:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;runcmd() (
+ ...
+ unset NOTIFY_SOCKET # systemd
+ ...
+ exec 8&amp;gt;&amp;amp;- # xinit/systemd handshake
+ exec systemd-run --user --scope -- sh -c &amp;quot;$*&amp;quot;
+)&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;I run them as a scope instead of a service so that they inherit
+environment variables, and don’t have to mess with getting
+&lt;code&gt;DISPLAY&lt;/code&gt; or &lt;code&gt;XAUTHORITY&lt;/code&gt; into their units (as I
+&lt;em&gt;don’t&lt;/em&gt; want to make them global variables in my systemd user
+session).&lt;/p&gt;
+&lt;p&gt;I’d like to get &lt;code&gt;lxpanel&lt;/code&gt; to also use
+&lt;code&gt;systemd-run&lt;/code&gt; when launching programs, but it’s a low
+priority because I don’t really actually use &lt;code&gt;lxpanel&lt;/code&gt; to
+launch programs, I just have the menu there to make sure that I didn’t
+break the icons for programs that I package (I did that once back when I
+was Parabola’s packager for Iceweasel and IceCat).&lt;/p&gt;
+&lt;p&gt;And that’s how I use systemd with X11.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2016 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./java-segfault-redux.html"/>
+ <link rel="alternate" type="text/markdown" href="./java-segfault-redux.md"/>
+ <id>https://lukeshu.com/blog/java-segfault-redux.html</id>
+ <updated>2016-02-28T00:00:00+00:00</updated>
+ <published>2016-02-28T00:00:00+00:00</published>
+ <title>My favorite bug: segfaults in Java (redux)</title>
+ <content type="html">&lt;h1 id="my-favorite-bug-segfaults-in-java-redux"&gt;My favorite bug:
+segfaults in Java (redux)&lt;/h1&gt;
+&lt;p&gt;Two years ago, I &lt;a href="./java-segfault.html"&gt;wrote&lt;/a&gt; about one
+of my favorite bugs that I’d squashed two years before that. About a
+year after that, someone posted it &lt;a
+href="https://news.ycombinator.com/item?id=9283571"&gt;on Hacker
+News&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;There was some fun discussion about it, but also some confusion.
+After finishing a season of mentoring team 4272, I’ve decided that it
+would be fun to re-visit the article, and dig up the old actual code,
+instead of pseudo-code, hopefully improving the clarity (and providing a
+light introduction for anyone wanting to get into modifying the current
+SmartDashbaord).&lt;/p&gt;
+&lt;h2 id="the-context"&gt;The context&lt;/h2&gt;
+&lt;p&gt;In 2012, I was a high school senior, and lead programmer programmer
+on the FIRST Robotics Competition team 1024. For the unfamiliar, the
+relevant part of the setup is that there are 2 minute and 15 second
+matches in which you have a 120 pound robot that sometimes runs
+autonomously, and sometimes is controlled over WiFi from a person at a
+laptop running stock “driver station” software and modifiable
+“dashboard” software.&lt;/p&gt;
+&lt;p&gt;That year, we mostly used the dashboard software to allow the human
+driver and operator to monitor sensors on the robot, one of them being a
+video feed from a web-cam mounted on it. This was really easy because
+the new standard dashboard program had a click-and drag interface to add
+stock widgets; you just had to make sure the code on the robot was
+actually sending the data.&lt;/p&gt;
+&lt;p&gt;That’s great, until when debugging things, the dashboard would
+suddenly vanish. If it was run manually from a terminal (instead of
+letting the driver station software launch it), you would see a core
+dump indicating a segmentation fault.&lt;/p&gt;
+&lt;p&gt;This wasn’t just us either; I spoke with people on other teams,
+everyone who was streaming video had this issue. But, because it only
+happened every couple of minutes, and a match is only 2:15, it didn’t
+need to run very long, they just crossed their fingers and hoped it
+didn’t happen during a match.&lt;/p&gt;
+&lt;p&gt;The dashboard was written in Java, and the source was available
+(under a 3-clause BSD license) via read-only SVN at
+&lt;code&gt;http://firstforge.wpi.edu/svn/repos/smart_dashboard/trunk&lt;/code&gt;
+(which is unfortunately no longer online, fortunately I’d posted some
+snapshots on the web). So I dove in, hunting for the bug.&lt;/p&gt;
+&lt;p&gt;The repository was divided into several NetBeans projects (not
+exhaustively listed):&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;a
+href="https://gitorious.org/absfrc/sources.git/?p=absfrc:sources.git;a=blob_plain;f=smartdashboard-client-2012-1-any.src.tar.xz;hb=HEAD"&gt;&lt;code&gt;client/smartdashboard&lt;/code&gt;&lt;/a&gt;:
+The main dashboard program, has a plugin architecture.&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://gitorious.org/absfrc/sources.git/?p=absfrc:sources.git;a=blob_plain;f=wpijavacv-208-1-any.src.tar.xz;hb=HEAD"&gt;&lt;code&gt;WPIJavaCV&lt;/code&gt;&lt;/a&gt;:
+A higher-level wrapper around JavaCV, itself a Java Native Interface
+(JNI) wrapper to talk to OpenCV (C and C++).&lt;/li&gt;
+&lt;li&gt;&lt;a
+href="https://gitorious.org/absfrc/sources.git/?p=absfrc:sources.git;a=blob_plain;f=smartdashboard-extension-wpicameraextension-210-1-any.src.tar.xz;hb=HEAD"&gt;&lt;code&gt;extensions/camera/WPICameraExtension&lt;/code&gt;&lt;/a&gt;:
+The standard camera feed plugin, processes the video through
+WPIJavaCV.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;I figured that the bug must be somewhere in the C or C++ code that
+was being called by JavaCV, because that’s the language where segfaults
+happen. It was especially a pain to track down the pointers that were
+causing the issue, because it was hard with native debuggers to see
+through all of the JVM stuff to the OpenCV code, and the OpenCV stuff is
+opaque to Java debuggers.&lt;/p&gt;
+&lt;p&gt;Eventually the issue lead me back into the WPICameraExtension, then
+into WPIJavaCV—there was a native pointer being stored in a Java
+variable; Java code called the native routine to &lt;code&gt;free()&lt;/code&gt; the
+structure, but then tried to feed it to another routine later. This lead
+to difficulty again—tracking objects with Java debuggers was hard
+because they don’t expect the program to suddenly segfault; it’s Java
+code, Java doesn’t segfault, it throws exceptions!&lt;/p&gt;
+&lt;p&gt;With the help of &lt;code&gt;println()&lt;/code&gt; I was eventually able to see
+that some code was executing in an order that straight didn’t make
+sense.&lt;/p&gt;
+&lt;h2 id="the-bug"&gt;The bug&lt;/h2&gt;
+&lt;p&gt;The basic flow of WPIJavaCV is you have a &lt;code&gt;WPICamera&lt;/code&gt;, and
+you call &lt;code&gt;.getNewImage()&lt;/code&gt; on it, which gives you a
+&lt;code&gt;WPIImage&lt;/code&gt;, which you could do all kinds of fancy OpenCV
+things on, but then ultimately call &lt;code&gt;.getBufferedImage()&lt;/code&gt;,
+which gives you a &lt;code&gt;java.awt.image.BufferedImage&lt;/code&gt; that you can
+pass to Swing to draw on the screen. You do this every for frame. Which
+is exactly what &lt;code&gt;WPICameraExtension.java&lt;/code&gt; did, except that
+“all kinds of fancy OpenCV things” consisted only of:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;public WPIImage processImage(WPIColorImage rawImage) {
+ return rawImage;
+}&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;The idea was that you would extend the class, overriding that one
+method, if you wanted to do anything fancy.&lt;/p&gt;
+&lt;p&gt;One of the neat things about WPIJavaCV was that every OpenCV object
+class extended had a &lt;code&gt;finalize()&lt;/code&gt; method (via inheriting from
+the abstract class &lt;code&gt;WPIDisposable&lt;/code&gt;) that freed the underlying
+C/C++ memory, so you didn’t have to worry about memory leaks like in
+plain JavaCV. To inherit from &lt;code&gt;WPIDisposable&lt;/code&gt;, you had to
+write a &lt;code&gt;disposed()&lt;/code&gt; method that actually freed the memory.
+This was better than writing &lt;code&gt;finalize()&lt;/code&gt; directly, because
+it did some safety with NULL pointers and idempotency if you wanted to
+manually free something early.&lt;/p&gt;
+&lt;p&gt;Now, &lt;code&gt;edu.wpi.first.WPIImage.disposed()&lt;/code&gt; called &lt;code&gt;&lt;a
+href="https://github.com/bytedeco/javacv/blob/svn/src/com/googlecode/javacv/cpp/opencv_core.java#L398"&gt;com.googlecode.javacv.cpp.opencv_core.IplImage&lt;/a&gt;.release()&lt;/code&gt;,
+which called (via JNI) &lt;code&gt;IplImage:::release()&lt;/code&gt;, which called
+libc &lt;code&gt;free()&lt;/code&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;@Override
+protected void disposed() {
+ image.release();
+}&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Elsewhere, the C buffer for the image was copied into a Java buffer
+via a similar chain kicked off by
+&lt;code&gt;edu.wpi.first.WPIImage.getBufferedImage()&lt;/code&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;/**
+ * Copies this {@link WPIImage} into a {@link BufferedImage}.
+ * This method will always generate a new image.
+ * @return a copy of the image
+ */
+public BufferedImage getBufferedImage() {
+ validateDisposed();
+
+ return image.getBufferedImage();
+}&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;The &lt;code&gt;println()&lt;/code&gt; output I saw that didn’t make sense was
+that &lt;code&gt;someFrame.finalize()&lt;/code&gt; was running before
+&lt;code&gt;someFrame.getBuffereImage()&lt;/code&gt; had returned!&lt;/p&gt;
+&lt;p&gt;You see, if it is waiting for the return value of a method
+&lt;code&gt;m()&lt;/code&gt; of object &lt;code&gt;a&lt;/code&gt;, and code in &lt;code&gt;m()&lt;/code&gt;
+that is yet to be executed doesn’t access any other methods or
+properties of &lt;code&gt;a&lt;/code&gt;, then it will go ahead and consider
+&lt;code&gt;a&lt;/code&gt; eligible for garbage collection before &lt;code&gt;m()&lt;/code&gt;
+has finished running.&lt;/p&gt;
+&lt;p&gt;Put another way, &lt;code&gt;this&lt;/code&gt; is passed to a method just like
+any other argument. If a method is done accessing &lt;code&gt;this&lt;/code&gt;,
+then it’s “safe” for the JVM to go ahead and garbage collect it.&lt;/p&gt;
+&lt;p&gt;That is normally a safe “optimization” to make… except for when a
+destructor method (&lt;code&gt;finalize()&lt;/code&gt;) is defined for the object;
+the destructor can have side effects, and Java has no way to know
+whether it is safe for them to happen before &lt;code&gt;m()&lt;/code&gt; has
+finished running.&lt;/p&gt;
+&lt;p&gt;I’m not entirely sure if this is a “bug” in the compiler or the
+language specification, but I do believe that it’s broken behavior.&lt;/p&gt;
+&lt;p&gt;Anyway, in this case it’s unsafe with WPI’s code.&lt;/p&gt;
+&lt;h2 id="my-work-around"&gt;My work-around&lt;/h2&gt;
+&lt;p&gt;My work-around was to change this function in
+&lt;code&gt;WPIImage&lt;/code&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;public BufferedImage getBufferedImage() {
+ validateDisposed();
+
+ return image.getBufferedImage(); // `this` may get garbage collected before it returns!
+}&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;In the above code, &lt;code&gt;this&lt;/code&gt; is a &lt;code&gt;WPIImage&lt;/code&gt;, and
+it may get garbage collected between the time that
+&lt;code&gt;image.getBufferedImage()&lt;/code&gt; is dispatched, and the time that
+&lt;code&gt;image.getBufferedImage()&lt;/code&gt; accesses native memory. When it is
+garbage collected, it calls &lt;code&gt;image.release()&lt;/code&gt;, which
+&lt;code&gt;free()&lt;/code&gt;s that native memory. That seems pretty unlikely to
+happen; that’s a very small gap of time. However, running 30 times a
+second, eventually bad luck with the garbage collector happens, and the
+program crashes.&lt;/p&gt;
+&lt;p&gt;The work-around was to insert a bogus call to this to keep
+&lt;code&gt;this&lt;/code&gt; around until after we were also done with
+&lt;code&gt;image&lt;/code&gt;:&lt;/p&gt;
+&lt;p&gt;to this:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;public BufferedImage getBufferedImage() {
+ validateDisposed();
+ BufferedImage ret = image.getBufferedImage();
+ getWidth(); // bogus call to keep `this` around
+ return ret;
+}&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Yeah. After spending weeks wading through though thousands of lines
+of Java, C, and C++, a bogus call to a method I didn’t care about was
+the fix.&lt;/p&gt;
+&lt;p&gt;TheLoneWolfling on Hacker News noted that they’d be worried about the
+JVM optimizing out the call to &lt;code&gt;getWidth()&lt;/code&gt;. I’m not, because
+&lt;code&gt;WPIImage.getWidth()&lt;/code&gt; calls &lt;code&gt;IplImage.width()&lt;/code&gt;,
+which is declared as &lt;code&gt;native&lt;/code&gt;; the JVM must run it because it
+might have side effects. On the other hand, looking back, I think I just
+shrunk the window for things to go wrong: it may be possible for the
+garbage collection to trigger in the time between
+&lt;code&gt;getWidth()&lt;/code&gt; being dispatched and &lt;code&gt;width()&lt;/code&gt;
+running. Perhaps there was something in the C/C++ code that made it
+safe, I don’t recall, and don’t care quite enough to dig into OpenCV
+internals again. Or perhaps I’m mis-remembering the fix (which I don’t
+actually have a file of), and I called some other method that
+&lt;em&gt;could&lt;/em&gt; get optimized out (though I &lt;em&gt;do&lt;/em&gt; believe that it
+was either &lt;code&gt;getWidth()&lt;/code&gt; or &lt;code&gt;getHeight()&lt;/code&gt;).&lt;/p&gt;
+&lt;h2 id="wpis-fix"&gt;WPI’s fix&lt;/h2&gt;
+&lt;p&gt;Four years later, the SmartDashboard is still being used! But it no
+longer has this bug, and it’s not using my workaround. So, how did the
+WPILib developers fix it?&lt;/p&gt;
+&lt;p&gt;Well, the code now lives &lt;a
+href="https://usfirst.collab.net/gerrit/#/admin/projects/"&gt;in git at
+collab.net&lt;/a&gt;, so I decided to take a look.&lt;/p&gt;
+&lt;p&gt;The stripped out WPIJavaCV from the main video feed widget, and now
+use a purely Java implementation of MPJPEG streaming.&lt;/p&gt;
+&lt;p&gt;However, the old video feed widget is still available as an extension
+(so that you can still do cool things with &lt;code&gt;processImage&lt;/code&gt;),
+and it also no longer has this bug. Their fix was to put a mutex around
+all accesses to &lt;code&gt;image&lt;/code&gt;, which should have been the obvious
+solution to me.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2016 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./nginx-mediawiki.html"/>
+ <link rel="alternate" type="text/markdown" href="./nginx-mediawiki.md"/>
+ <id>https://lukeshu.com/blog/nginx-mediawiki.html</id>
+ <updated>2015-05-19T00:00:00+00:00</updated>
+ <published>2015-05-19T00:00:00+00:00</published>
+ <title>An Nginx configuration for MediaWiki</title>
+ <content type="html">&lt;h1 id="an-nginx-configuration-for-mediawiki"&gt;An Nginx configuration for
+MediaWiki&lt;/h1&gt;
+&lt;p&gt;There are &lt;a href="http://wiki.nginx.org/MediaWiki"&gt;several&lt;/a&gt; &lt;a
+href="https://wiki.archlinux.org/index.php/MediaWiki#Nginx"&gt;example&lt;/a&gt;
+&lt;a
+href="https://www.mediawiki.org/wiki/Manual:Short_URL/wiki/Page_title_--_nginx_rewrite--root_access"&gt;Nginx&lt;/a&gt;
+&lt;a
+href="https://www.mediawiki.org/wiki/Manual:Short_URL/Page_title_-_nginx,_Root_Access,_PHP_as_a_CGI_module"&gt;configurations&lt;/a&gt;
+&lt;a href="http://wiki.nginx.org/RHEL_5.4_%2B_Nginx_%2B_Mediawiki"&gt;for&lt;/a&gt;
+&lt;a
+href="http://stackoverflow.com/questions/11080666/mediawiki-on-nginx"&gt;MediaWiki&lt;/a&gt;
+floating around the web. Many of them don’t block the user from
+accessing things like &lt;code&gt;/serialized/&lt;/code&gt;. Many of them also &lt;a
+href="https://labs.parabola.nu/issues/725"&gt;don’t correctly handle&lt;/a&gt; a
+wiki page named &lt;code&gt;FAQ&lt;/code&gt;, since that is a name of a file in the
+MediaWiki root! In fact, the configuration used on the official Nginx
+Wiki has both of those issues!&lt;/p&gt;
+&lt;p&gt;This is because most of the configurations floating around basically
+try to pass all requests through, and blacklist certain requests, either
+denying them, or passing them through to &lt;code&gt;index.php&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;It’s my view that blacklisting is inferior to whitelisting in
+situations like this. So, I developed the following configuration that
+instead works by whitelisting certain paths.&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;root /path/to/your/mediawiki; # obviously, change this line
+
+index index.php;
+location / { try_files /var/empty @rewrite; }
+location /images/ { try_files $uri $uri/ @rewrite; }
+location /skins/ { try_files $uri $uri/ @rewrite; }
+location /api.php { try_files /var/empty @php; }
+location /api.php5 { try_files /var/empty @php; }
+location /img_auth.php { try_files /var/empty @php; }
+location /img_auth.php5 { try_files /var/empty @php; }
+location /index.php { try_files /var/empty @php; }
+location /index.php5 { try_files /var/empty @php; }
+location /load.php { try_files /var/empty @php; }
+location /load.php5 { try_files /var/empty @php; }
+location /opensearch_desc.php { try_files /var/empty @php; }
+location /opensearch_desc.php5 { try_files /var/empty @php; }
+location /profileinfo.php { try_files /var/empty @php; }
+location /thumb.php { try_files /var/empty @php; }
+location /thumb.php5 { try_files /var/empty @php; }
+location /thumb_handler.php { try_files /var/empty @php; }
+location /thumb_handler.php5 { try_files /var/empty @php; }
+location /wiki.phtml { try_files /var/empty @php; }
+
+location @rewrite {
+ rewrite ^/(.*)$ /index.php?title=$1&amp;amp;$args;
+}
+
+location @php {
+ # obviously, change this according to your PHP setup
+ include fastcgi.conf;
+ fastcgi_pass unix:/run/php-fpm/wiki.sock;
+}&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;We are now using this configuration on &lt;a
+href="https://wiki.parabola.nu/"&gt;ParabolaWiki&lt;/a&gt;, but with an alias for
+&lt;code&gt;location = /favicon.ico&lt;/code&gt; to the correct file in the skin,
+and with FastCGI caching for PHP.&lt;/p&gt;
+&lt;p&gt;The only thing I don’t like about this is the
+&lt;code&gt;try_files /var/emtpy&lt;/code&gt; bits—surely there is a better way to
+have it go to one of the &lt;code&gt;@&lt;/code&gt; location blocks, but I couldn’t
+figure it out.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2015 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./lp2015-videos.html"/>
+ <link rel="alternate" type="text/markdown" href="./lp2015-videos.md"/>
+ <id>https://lukeshu.com/blog/lp2015-videos.html</id>
+ <updated>2015-03-22T00:00:00+00:00</updated>
+ <published>2015-03-22T00:00:00+00:00</published>
+ <title>I took some videos at LibrePlanet</title>
+ <content type="html">&lt;h1 id="i-took-some-videos-at-libreplanet"&gt;I took some videos at
+LibrePlanet&lt;/h1&gt;
+&lt;p&gt;I’m at &lt;a href="https://libreplanet.org/2015/"&gt;LibrePlanet&lt;/a&gt;, and
+have been loving the talks. For most of yesterday, there was a series of
+short “lightning” talks in room 144. I decided to hang out in that room
+for the later part of the day, because while most of the talks were live
+streamed and recorded, there were no cameras in room 144; so I couldn’t
+watch them later.&lt;/p&gt;
+&lt;p&gt;Way too late in the day, I remembered that I have the capability to
+record videos, so I cought the last two talks in 144.&lt;/p&gt;
+&lt;p&gt;I appologize for the changing orientation.&lt;/p&gt;
+&lt;p&gt;&lt;a
+href="https://lukeshu.com/dump/lp-2015-last-2-short-talks.ogg"&gt;Here’s
+the video I took&lt;/a&gt;.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2015 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./build-bash-1.html"/>
+ <link rel="alternate" type="text/markdown" href="./build-bash-1.md"/>
+ <id>https://lukeshu.com/blog/build-bash-1.html</id>
+ <updated>2015-03-18T00:00:00+00:00</updated>
+ <published>2015-03-18T00:00:00+00:00</published>
+ <title>Building Bash 1.14.7 on a modern system</title>
+ <content type="html">&lt;h1 id="building-bash-1.14.7-on-a-modern-system"&gt;Building Bash 1.14.7 on
+a modern system&lt;/h1&gt;
+&lt;p&gt;In a previous revision of my &lt;a href="./bash-arrays.html"&gt;Bash arrays
+post&lt;/a&gt;, I wrote:&lt;/p&gt;
+&lt;blockquote&gt;
+&lt;p&gt;Bash 1.x won’t compile with modern GCC, so I couldn’t verify how it
+behaves.&lt;/p&gt;
+&lt;/blockquote&gt;
+&lt;p&gt;I recall spending a little time fighting with it, but apparently I
+didn’t try very hard: getting Bash 1.14.7 to build on a modern box is
+mostly just adjusting it to use &lt;code&gt;stdarg&lt;/code&gt; instead of the
+no-longer-implemented &lt;code&gt;varargs&lt;/code&gt;. There’s also a little
+fiddling with the pre-autoconf automatic configuration.&lt;/p&gt;
+&lt;h2 id="stdarg"&gt;stdarg&lt;/h2&gt;
+&lt;p&gt;Converting to &lt;code&gt;stdarg&lt;/code&gt; is pretty simple: For each variadic
+function (functions that take a variable number of arguments), follow
+these steps:&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;Replace &lt;code&gt;#include &amp;lt;varargs.h&amp;gt;&lt;/code&gt; with
+&lt;code&gt;#include &amp;lt;stdarg.h&amp;gt;&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;Replace &lt;code&gt;function_name (va_alist) va_dcl&lt;/code&gt; with
+&lt;code&gt;function_name (char *format, ...)&lt;/code&gt;.&lt;/li&gt;
+&lt;li&gt;Removing the declaration and assignment for &lt;code&gt;format&lt;/code&gt; from
+the function body.&lt;/li&gt;
+&lt;li&gt;Replace &lt;code&gt;va_start (args);&lt;/code&gt; with
+&lt;code&gt;va_start (args, format);&lt;/code&gt; in the function bodies.&lt;/li&gt;
+&lt;li&gt;Replace &lt;code&gt;function_name ();&lt;/code&gt; with
+&lt;code&gt;function_name (char *, ...)&lt;/code&gt; in header files and/or at the
+top of C files.&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;There’s one function that uses the variable name &lt;code&gt;control&lt;/code&gt;
+instead of &lt;code&gt;format&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;I’ve prepared &lt;a href="./bash-1.14.7-gcc4-stdarg.patch"&gt;a patch&lt;/a&gt;
+that does this.&lt;/p&gt;
+&lt;h2 id="configuration"&gt;Configuration&lt;/h2&gt;
+&lt;p&gt;Instead of using autoconf-style tests to test for compiler and
+platform features, Bash 1 used the file &lt;code&gt;machines.h&lt;/code&gt; that had
+&lt;code&gt;#ifdefs&lt;/code&gt; and a huge database of of different operating
+systems for different platforms. It’s gross. And quite likely won’t
+handle your modern operating system.&lt;/p&gt;
+&lt;p&gt;I made these two small changes to &lt;code&gt;machines.h&lt;/code&gt; to get it
+to work correctly on my box:&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;Replace &lt;code&gt;#if defined (i386)&lt;/code&gt; with
+&lt;code&gt;#if defined (i386) || defined (__x86_64__)&lt;/code&gt;. The purpose of
+this is obvious.&lt;/li&gt;
+&lt;li&gt;Add &lt;code&gt;#define USE_TERMCAP_EMULATION&lt;/code&gt; to the section for
+Linux [sic] on i386
+(&lt;code&gt;# if !defined (done386) &amp;amp;&amp;amp; (defined (__linux__) || defined (linux))&lt;/code&gt;).
+What this does is tell it to link against libcurses to use curses
+termcap emulation, instead of linking against libtermcap (which doesn’t
+exist on modern GNU/Linux systems).&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;Again, I’ve prepared &lt;a href="./bash-1.14.7-machines-config.patch"&gt;a
+patch&lt;/a&gt; that does this.&lt;/p&gt;
+&lt;h2 id="building"&gt;Building&lt;/h2&gt;
+&lt;p&gt;With those adjustments, it should build, but with quite a few
+warnings. Making a couple of changes to &lt;code&gt;CFLAGS&lt;/code&gt; should fix
+that:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;make CFLAGS=&amp;#39;-O -g -Werror -Wno-int-to-pointer-cast -Wno-pointer-to-int-cast -Wno-deprecated-declarations -include stdio.h -include stdlib.h -include string.h -Dexp2=bash_exp2&amp;#39;&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;That’s a doozy! Let’s break it down:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;-O -g&lt;/code&gt; The default value for CFLAGS (defined in
+&lt;code&gt;cpp-Makefile&lt;/code&gt;)&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;-Werror&lt;/code&gt; Treat warnings as errors; force us to deal with
+any issues.&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;-Wno-int-to-pointer-cast -Wno-pointer-to-int-cast&lt;/code&gt; Allow
+casting between integers and pointers. Unfortunately, the way this
+version of Bash was designed requires this.&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;-Wno-deprecated-declarations&lt;/code&gt; The &lt;code&gt;getwd&lt;/code&gt;
+function in &lt;code&gt;unistd.h&lt;/code&gt; is considered deprecated (use
+&lt;code&gt;getcwd&lt;/code&gt; instead). However, if &lt;code&gt;getcwd&lt;/code&gt; is
+available, Bash uses it’s own &lt;code&gt;getwd&lt;/code&gt; wrapper around
+&lt;code&gt;getcwd&lt;/code&gt; (implemented in &lt;code&gt;general.c&lt;/code&gt;), and only
+uses the signature from &lt;code&gt;unistd.h&lt;/code&gt;, not the actuall
+implementation from libc.&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;-include stdio.h -include stdlib.h -include string.h&lt;/code&gt;
+Several files are missing these header file includes. If not for
+&lt;code&gt;-Werror&lt;/code&gt;, the default function signature fallbacks would
+work.&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;-Dexp2=bash_exp2&lt;/code&gt; Avoid a conflict between the parser’s
+&lt;code&gt;exp2&lt;/code&gt; helper function and &lt;code&gt;math.h&lt;/code&gt;’s base-2
+exponential function.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;Have fun, software archaeologists!&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2015 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./purdue-cs-login.html"/>
+ <link rel="alternate" type="text/markdown" href="./purdue-cs-login.md"/>
+ <id>https://lukeshu.com/blog/purdue-cs-login.html</id>
+ <updated>2015-02-06T00:00:00+00:00</updated>
+ <published>2015-02-06T00:00:00+00:00</published>
+ <title>Customizing your login on Purdue CS computers (WIP, but updated)</title>
+ <content type="html">&lt;h1
+id="customizing-your-login-on-purdue-cs-computers-wip-but-updated"&gt;Customizing
+your login on Purdue CS computers (WIP, but updated)&lt;/h1&gt;
+&lt;blockquote&gt;
+&lt;p&gt;This article is currently a Work-In-Progress. Other than the one
+place where I say “I’m not sure”, the GDM section is complete. The
+network shares section is a mess, but has some good information.&lt;/p&gt;
+&lt;/blockquote&gt;
+&lt;p&gt;Most CS students at Purdue spend a lot of time on the lab boxes, but
+don’t know a lot about them. This document tries to fix that.&lt;/p&gt;
+&lt;p&gt;The lab boxes all run Gentoo.&lt;/p&gt;
+&lt;h2 id="gdm-the-gnome-display-manager"&gt;GDM, the Gnome Display
+Manager&lt;/h2&gt;
+&lt;p&gt;The boxes run &lt;code&gt;gdm&lt;/code&gt; (Gnome Display Manager) 2.20.11 for
+the login screen. This is an old version, and has a couple behaviors
+that are slightly different than new versions, but here are the
+important bits:&lt;/p&gt;
+&lt;p&gt;System configuration:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;/usr/share/gdm/defaults.conf&lt;/code&gt; (lower precidence)&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;/etc/X11/gdm/custom.conf&lt;/code&gt; (higher precidence)&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;User configuration:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;~/.dmrc&lt;/code&gt; (more recent versions use
+&lt;code&gt;~/.desktop&lt;/code&gt;, but Purdue boxes aren’t running more recent
+versions)&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h3 id="purdues-gdm-configuration"&gt;Purdue’s GDM configuration&lt;/h3&gt;
+&lt;p&gt;Now, &lt;code&gt;custom.conf&lt;/code&gt; sets&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;BaseXsession=/usr/local/share/xsessions/Xsession
+SessionDesktopDir=/usr/local/share/xsessions/&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;This is important, because there are &lt;em&gt;multiple&lt;/em&gt; locations that
+look like these files; I take it that they were used at sometime in the
+past. Don’t get tricked into thinking that it looks at
+&lt;code&gt;/etc/X11/gdm/Xsession&lt;/code&gt; (which exists, and is where it would
+look by default).&lt;/p&gt;
+&lt;p&gt;If you look at the GDM login screen, it has a “Sessions” button that
+opens a prompt where you can select any of several sessions:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;Last session&lt;/li&gt;
+&lt;li&gt;1. MATE (&lt;code&gt;mate.desktop&lt;/code&gt;;
+&lt;code&gt;Exec=mate-session&lt;/code&gt;)&lt;/li&gt;
+&lt;li&gt;2. CS Default Session (&lt;code&gt;default.desktop&lt;/code&gt;;
+&lt;code&gt;Exec=default&lt;/code&gt;)&lt;/li&gt;
+&lt;li&gt;3. Custom Session (&lt;code&gt;custom.desktop&lt;/code&gt;;
+&lt;code&gt;Exec=custom&lt;/code&gt;)&lt;/li&gt;
+&lt;li&gt;4. FVWM2 (&lt;code&gt;fvwm2.desktop&lt;/code&gt;; &lt;code&gt;Exec=fvwm2&lt;/code&gt;)&lt;/li&gt;
+&lt;li&gt;5. gnome.desktop (&lt;code&gt;gnome.desktop&lt;/code&gt;;
+&lt;code&gt;Exec=gnome-session&lt;/code&gt;)&lt;/li&gt;
+&lt;li&gt;6. KDE (&lt;code&gt;kde.desktop&lt;/code&gt;, &lt;code&gt;Exec=startkde&lt;/code&gt;)&lt;/li&gt;
+&lt;li&gt;Failsafe MATE (&lt;code&gt;ShowGnomeFailsafeSession=true&lt;/code&gt;)&lt;/li&gt;
+&lt;li&gt;Failsafe Terminal (&lt;code&gt;ShowXtermFailsafeSession=true&lt;/code&gt;)&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;The main 6 are configured by the &lt;code&gt;.desktop&lt;/code&gt; files in
+&lt;code&gt;SessionDesktopDir=/usr/local/share/xsessions&lt;/code&gt;; the last 2
+are auto-generated. The reason &lt;code&gt;ShowGnomeFailsafeSession&lt;/code&gt;
+correctly generates a Mate session instead of a Gnome session is because
+of the patch
+&lt;code&gt;/p/portage/*/overlay/gnome-base/gdm/files/gdm-2.20.11-mate.patch&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;I’m not sure why Gnome shows up as &lt;code&gt;gnome.desktop&lt;/code&gt; instead
+of &lt;code&gt;GNOME&lt;/code&gt; as specified by &lt;code&gt;gnome.desktop:Name&lt;/code&gt;. I
+imagine it might be something related to the aforementioned patch, but I
+can’t find anything in the patch that looks like it would screw that up;
+at least not without a better understanding of GDM’s code.&lt;/p&gt;
+&lt;p&gt;Which of the main 6 is used by default (“Last Session”) is configured
+with &lt;code&gt;~/.dmrc:Session&lt;/code&gt;, which contains the basename of the
+associated &lt;code&gt;.desktop&lt;/code&gt; file (that is, without any directory
+part or file extension).&lt;/p&gt;
+&lt;p&gt;Every one of the &lt;code&gt;.desktop&lt;/code&gt; files sets
+&lt;code&gt;Type=XSession&lt;/code&gt;, which means that instead of running the
+argument in &lt;code&gt;Exec=&lt;/code&gt; directly, it passes it as arguments to
+the &lt;code&gt;Xsession&lt;/code&gt; program (in the location configured by
+&lt;code&gt;BaseXsession&lt;/code&gt;).&lt;/p&gt;
+&lt;h4 id="xsession"&gt;Xsession&lt;/h4&gt;
+&lt;p&gt;So, now we get to read
+&lt;code&gt;/usr/local/share/xsessions/Xsession&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;Before it does anything else, it:&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;&lt;code&gt;. /etc/profile.env&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;unset ROOTPATH&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;Try to set up logging to one of &lt;code&gt;~/.xsession-errors&lt;/code&gt;,
+&lt;code&gt;$TMPDIR/xses-$USER&lt;/code&gt;, or &lt;code&gt;/tmp/xses-$USER&lt;/code&gt; (it
+tries them in that order).&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;xsetroot -default&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;Fiddles with the maximum number of processes.&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;After that, it handles these 3 “special” arguments that were given to
+it by various &lt;code&gt;.desktop&lt;/code&gt; &lt;code&gt;Exec=&lt;/code&gt; lines:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;failsafe&lt;/code&gt;: Runs a single xterm window. NB: This is NOT
+run by either of the failsafe options. It is likey a vestiage from a
+prior configuration.&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;startkde&lt;/code&gt;: Displays a message saying KDE is no longer
+available.&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;gnome-session&lt;/code&gt;: Displays a message saying GNOME has been
+replaced by MATE.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;Assuming that none of those were triggered, it then does:&lt;/p&gt;
+&lt;ol type="1"&gt;
+&lt;li&gt;&lt;code&gt;source ~/.xprofile&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;xrdb -merge ~/.Xresources&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;xmodmap ~/.xmodmaprc&lt;/code&gt;&lt;/li&gt;
+&lt;/ol&gt;
+&lt;p&gt;Finally, it has a switch statement over the arguments given to it by
+the various &lt;code&gt;.desktop&lt;/code&gt; &lt;code&gt;Exec=&lt;/code&gt; lines:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;custom&lt;/code&gt;: Executes &lt;code&gt;~/.xsession&lt;/code&gt;.&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;default&lt;/code&gt;: Executes &lt;code&gt;~/.Xrc.cs&lt;/code&gt;.&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;mate-session&lt;/code&gt;: It has this whole script to start DBus,
+run the &lt;code&gt;mate-session&lt;/code&gt; command, then cleanup when it’s
+done.&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;*&lt;/code&gt; (&lt;code&gt;fvwm2&lt;/code&gt;): Runs
+&lt;code&gt;eval exec "$@"&lt;/code&gt;, which results in it executing the
+&lt;code&gt;fvwm2&lt;/code&gt; command.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h2 id="network-shares"&gt;Network Shares&lt;/h2&gt;
+&lt;p&gt;Your data is on various hosts. I believe most undergrads have their
+data on &lt;code&gt;data.cs.purdue.edu&lt;/code&gt; (or just &lt;a
+href="https://en.wikipedia.org/wiki/Data_%28Star_Trek%29"&gt;&lt;code&gt;data&lt;/code&gt;&lt;/a&gt;).
+Others have theirs on &lt;a
+href="http://swfanon.wikia.com/wiki/Antor"&gt;&lt;code&gt;antor&lt;/code&gt;&lt;/a&gt; or &lt;a
+href="https://en.wikipedia.org/wiki/Tux"&gt;&lt;code&gt;tux&lt;/code&gt;&lt;/a&gt; (that I
+know of).&lt;/p&gt;
+&lt;p&gt;Most of the boxes with tons of storage have many network cards; each
+with a different IP; a single host’s IPs are mostly the same, but with
+varying 3rd octets. For example, &lt;code&gt;data&lt;/code&gt; is 128.10.X.13. If
+you need a particular value of X, but don’t want to remember the other
+octets; they are individually addressed with
+&lt;code&gt;BASENAME-NUMBER.cs.purdue.edu&lt;/code&gt;. For example,
+&lt;code&gt;data-25.cs.purdu.edu&lt;/code&gt; is 128.10.25.13.&lt;/p&gt;
+&lt;p&gt;They use &lt;a
+href="https://www.kernel.org/pub/linux/daemons/autofs/"&gt;AutoFS&lt;/a&gt; quite
+extensively. The maps are generated dynamically by
+&lt;code&gt;/etc/autofs/*.map&lt;/code&gt;, which are all symlinks to
+&lt;code&gt;/usr/libexec/amd2autofs&lt;/code&gt;. As far as I can tell,
+&lt;code&gt;amd2autofs&lt;/code&gt; is custom to Purdue. Its source lives in
+&lt;code&gt;/p/portage/*/overlay/net-fs/autofs/files/amd2autofs.c&lt;/code&gt;. The
+name appears to be a misnomer; seems to claim to dynamically translate
+from the configuration of &lt;a href="http://www.am-utils.org/"&gt;Auto
+Mounter Daemon (AMD)&lt;/a&gt; to AutoFS, but it actually talks to NIS. It
+does so using the &lt;code&gt;yp&lt;/code&gt; interface, which is in Glibc for
+compatibility, but is undocumented. For documentation for that
+interface, look at the one of the BSDs, or Mac OS X. From the comments
+in the file, it appears that it once did look at the AMD configuration,
+but has since been changed.&lt;/p&gt;
+&lt;p&gt;There are 3 mountpoints using AutoFS: &lt;code&gt;/homes&lt;/code&gt;,
+&lt;code&gt;/p&lt;/code&gt;, and &lt;code&gt;/u&lt;/code&gt;. &lt;code&gt;/homes&lt;/code&gt; creates
+symlinks on-demand from &lt;code&gt;/homes/USERNAME&lt;/code&gt; to
+&lt;code&gt;/u/BUCKET/USERNAME&lt;/code&gt;. &lt;code&gt;/u&lt;/code&gt; mounts NFS shares to
+&lt;code&gt;/u/SERVERNAME&lt;/code&gt; on-demand, and creates symlinks from
+&lt;code&gt;/u/BUCKET&lt;/code&gt; to &lt;code&gt;/u/SERVERNAME/BUCKET&lt;/code&gt; on-demand.
+&lt;code&gt;/p&lt;/code&gt; mounts on-demand various NFS shares that are organized
+by topic; the Xinu/MIPS tools are in &lt;code&gt;/p/xinu&lt;/code&gt;, the Portage
+tree is in &lt;code&gt;/p/portage&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;I’m not sure how &lt;code&gt;scratch&lt;/code&gt; works; it seems to be
+heterogenous between different servers and families of lab boxes.
+Sometimes it’s in &lt;code&gt;/u&lt;/code&gt;, sometimes it isn’t.&lt;/p&gt;
+&lt;p&gt;This 3rd-party documentation was very helpful to me: &lt;a
+href="http://www.linux-consulting.com/Amd_AutoFS/"
+class="uri"&gt;http://www.linux-consulting.com/Amd_AutoFS/&lt;/a&gt; It’s where
+Gentoo points for the AutoFS homepage, as it doesn’t have a real
+homepage. Arch just points to FreshMeat. Debian points to
+kernel.org.&lt;/p&gt;
+&lt;h3 id="lore"&gt;Lore&lt;/h3&gt;
+&lt;p&gt;&lt;a
+href="https://en.wikipedia.org/wiki/List_of_Star_Trek:_The_Next_Generation_characters#Lore"&gt;&lt;code&gt;lore&lt;/code&gt;&lt;/a&gt;&lt;/p&gt;
+&lt;p&gt;Lore is a SunOS 5.10 box running on Sun-Fire V445 (sun4u) hardware.
+SunOS is NOT GNU/Linux, and sun4u is NOT x86.&lt;/p&gt;
+&lt;p&gt;Instead of &lt;code&gt;/etc/fstab&lt;/code&gt; it is
+&lt;code&gt;/etc/mnttab&lt;/code&gt;.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2015 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./make-memoize.html"/>
+ <link rel="alternate" type="text/markdown" href="./make-memoize.md"/>
+ <id>https://lukeshu.com/blog/make-memoize.html</id>
+ <updated>2014-11-20T00:00:00+00:00</updated>
+ <published>2014-11-20T00:00:00+00:00</published>
+ <title>A memoization routine for GNU Make functions</title>
+ <content type="html">&lt;h1 id="a-memoization-routine-for-gnu-make-functions"&gt;A memoization
+routine for GNU Make functions&lt;/h1&gt;
+&lt;p&gt;I’m a big fan of &lt;a href="https://www.gnu.org/software/make/"&gt;GNU
+Make&lt;/a&gt;. I’m pretty knowledgeable about it, and was pretty active on
+the help-make mailing list for a while. Something that many experienced
+make-ers know of is John Graham-Cumming’s “GNU Make Standard Library”,
+or &lt;a href="http://gmsl.sourceforge.net/"&gt;GMSL&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;I don’t like to use it, as I’m capable of defining macros myself as I
+need them instead of pulling in a 3rd party dependency (and generally
+like to stay away from the kind of Makefile that would lean heavily on
+something like GMSL).&lt;/p&gt;
+&lt;p&gt;However, one really neat thing that GMSL offers is a way to memoize
+expensive functions (such as those that shell out). I was considering
+pulling in GMSL for one of my projects, almost just for the
+&lt;code&gt;memoize&lt;/code&gt; function.&lt;/p&gt;
+&lt;p&gt;However, John’s &lt;code&gt;memoize&lt;/code&gt; has a couple short-comings that
+made it unsuitable for my needs.&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;Only allows functions that take one argument.&lt;/li&gt;
+&lt;li&gt;Considers empty values to be unset; for my needs, an empty string is
+a valid value that should be cached.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;So, I implemented my own, more flexible memoization routine for
+Make.&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;# This definition of `rest` is equivalent to that in GMSL
+rest = $(wordlist 2,$(words $1),$1)
+
+# How to use: Define 2 variables (the type you would pass to $(call):
+# `_&lt;var&gt;NAME&lt;/var&gt;_main` and `_&lt;var&gt;NAME&lt;/var&gt;_hash`. Now, `_&lt;var&gt;NAME&lt;/var&gt;_main` is the function getting
+# memoized, and _&lt;var&gt;NAME&lt;/var&gt;_hash is a function that hashes the function arguments
+# into a string suitable for a variable name.
+#
+# Then, define the final function like:
+#
+# &lt;var&gt;NAME&lt;/var&gt; = $(foreach func,&lt;var&gt;NAME&lt;/var&gt;,$(memoized))
+
+_main = $(_$(func)_main)
+_hash = __memoized_$(_$(func)_hash)
+memoized = $(if $($(_hash)),,$(eval $(_hash) := _ $(_main)))$(call rest,$($(_hash)))&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;However, I later removed it from the Makefile, as I &lt;a
+href="https://projects.parabola.nu/~lukeshu/maven-dist.git/commit/?id=fec5a7281b3824cb952aa0bb76bbbaa41eaafdf9"&gt;re-implemented&lt;/a&gt;
+the bits that it memoized in a more efficient way, such that memoization
+was no longer needed, and the whole thing was faster.&lt;/p&gt;
+&lt;p&gt;Later, I realized that my memoized routine could have been improved
+by replacing &lt;code&gt;func&lt;/code&gt; with &lt;code&gt;$0&lt;/code&gt;, which would
+simplify how the final function is declared:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;# This definition of `rest` is equivalent to that in GMSL
+rest = $(wordlist 2,$(words $1),$1)
+
+# How to use:
+#
+# _&lt;var&gt;NAME&lt;/var&gt;_main = &lt;var&gt;your main function to be memoized&lt;/var&gt;
+# _&lt;var&gt;NAME&lt;/var&gt;_hash = &lt;var&gt;your hash function for parameters&lt;/var&gt;
+# &lt;var&gt;NAME&lt;/var&gt; = $(memoized)
+#
+# The output of your hash function should be a string following
+# the same rules that variable names follow.
+
+_main = $(_$0_main)
+_hash = __memoized_$(_$0_hash)
+memoized = $(if $($(_hash)),,$(eval $(_hash) := _ $(_main)))$(call rest,$($(_hash)))&lt;/pre&gt;
+&lt;p&gt;&lt;/code&gt;&lt;/p&gt;
+&lt;p&gt;Now, I’m pretty sure that should work, but I have only actually
+tested the first version.&lt;/p&gt;
+&lt;h2 id="tldr"&gt;TL;DR&lt;/h2&gt;
+&lt;p&gt;Avoid doing things in Make that would make you lean on complex
+solutions like an external memoize function.&lt;/p&gt;
+&lt;p&gt;However, if you do end up needing a more flexible memoize routine, I
+wrote one that you can use.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2014 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="http://www.wtfpl.net/txt/copying/"&gt;WTFPL-2&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./ryf-routers.html"/>
+ <link rel="alternate" type="text/markdown" href="./ryf-routers.md"/>
+ <id>https://lukeshu.com/blog/ryf-routers.html</id>
+ <updated>2014-09-12T00:00:00+00:00</updated>
+ <published>2014-09-12T00:00:00+00:00</published>
+ <title>I'm excited about the new RYF-certified routers from ThinkPenguin</title>
+ <content type="html">&lt;h1
+id="im-excited-about-the-new-ryf-certified-routers-from-thinkpenguin"&gt;I’m
+excited about the new RYF-certified routers from ThinkPenguin&lt;/h1&gt;
+&lt;p&gt;I just learned that on Wednesday, the FSF &lt;a
+href="https://www.fsf.org/resources/hw/endorsement/thinkpenguin"&gt;awarded&lt;/a&gt;
+the &lt;abbr title="Respects Your Freedom"&gt;RYF&lt;/abbr&gt; certification to the
+&lt;a href="https://www.thinkpenguin.com/TPE-NWIFIROUTER"&gt;Think Penguin
+TPE-NWIFIROUTER&lt;/a&gt; wireless router.&lt;/p&gt;
+&lt;p&gt;I didn’t find this information directly published up front, but
+simply: It is a re-branded &lt;strong&gt;TP-Link TL-841ND&lt;/strong&gt; modded to
+be running &lt;a href="http://librecmc.com/"&gt;libreCMC&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;I’ve been a fan of the TL-841/740 line of routers for several years
+now. They are dirt cheap (if you go to Newegg and sort by “cheapest,”
+it’s frequently the TL-740N), are extremely reliable, and run OpenWRT
+like a champ. They are my go-to routers.&lt;/p&gt;
+&lt;p&gt;(And they sure beat the snot out of the Arris TG862 that it seems
+like everyone has in their homes now. I hate that thing, it even has
+buggy packet scheduling.)&lt;/p&gt;
+&lt;p&gt;So this announcement is &lt;del&gt;doubly&lt;/del&gt;triply exciting for me:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;I have a solid recommendation for a router that doesn’t require me
+or them to manually install an after-market firmware (buy it from
+ThinkPenguin).&lt;/li&gt;
+&lt;li&gt;If it’s for me, or someone technical, I can cut costs by getting a
+stock TP-Link from Newegg, installing libreCMC ourselves.&lt;/li&gt;
+&lt;li&gt;I can install a 100% libre distribution on my existing routers
+(until recently, they were not supported by any of the libre
+distributions, not for technical reasons, but lack of manpower).&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;I hope to get libreCMC installed on my boxes this weekend!&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2014 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./what-im-working-on-fall-2014.html"/>
+ <link rel="alternate" type="text/markdown" href="./what-im-working-on-fall-2014.md"/>
+ <id>https://lukeshu.com/blog/what-im-working-on-fall-2014.html</id>
+ <updated>2014-09-11T00:00:00+00:00</updated>
+ <published>2014-09-11T00:00:00+00:00</published>
+ <title>What I'm working on (Fall 2014)</title>
+ <content type="html">&lt;h1 id="what-im-working-on-fall-2014"&gt;What I’m working on (Fall
+2014)&lt;/h1&gt;
+&lt;p&gt;I realized today that I haven’t updated my log in a while, and I
+don’t have any “finished” stuff to show off right now, but I should just
+talk about all the cool stuff I’m working on right now.&lt;/p&gt;
+&lt;h2 id="static-parsing-of-subshells"&gt;Static parsing of subshells&lt;/h2&gt;
+&lt;p&gt;Last year I wrote a shell (for my Systems Programming class);
+however, I went above-and-beyond and added some really novel features.
+In my opinion, the most significant is that it parses arbitrarily deep
+subshells in one pass, instead of deferring them until execution. No
+shell that I know of does this.&lt;/p&gt;
+&lt;p&gt;At first this sounds like a really difficult, but minor feature.
+Until you think about scripting, and maintenance of those scripts. Being
+able to do a full syntax check of a script is &lt;em&gt;crucial&lt;/em&gt; for
+long-term maintenance, yet it’s something that is missing from every
+major shell. I’d love to get this code merged into bash. It would be
+incredibly useful for &lt;a
+href="/git/mirror/parabola/packages/libretools.git"&gt;some software I
+maintain&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;Anyway, I’m trying to publish this code, but because of a recent
+kerfuffle with a student publishing all of his projects on the web (and
+other students trying to pass it off as their own), I’m being cautious
+with this and making sure Purdue is alright with what I’m putting
+online.&lt;/p&gt;
+&lt;h2 id="stateless-user-configuration-for-pamnss"&gt;&lt;a
+href="https://lukeshu.com/git/mirror/parabola/hackers.git/log/?h=lukeshu/restructure"&gt;Stateless
+user configuration for PAM/NSS&lt;/a&gt;&lt;/h2&gt;
+&lt;p&gt;Parabola GNU/Linux-libre users know that over this summer, we had a
+&lt;em&gt;mess&lt;/em&gt; with server outages. One of the servers is still out (due
+to things out of our control), and we don’t have some of the data on it
+(because volunteer developers are terrible about back-ups,
+apparently).&lt;/p&gt;
+&lt;p&gt;This has caused us to look at how we manage our servers, back-ups,
+and several other things.&lt;/p&gt;
+&lt;p&gt;One thing that I’ve taken on as my pet project is making sure that if
+a server goes down, or we need to migrate (for example, Jon is telling
+us that he wants us to hurry up and switch to the new 64 bit hardware so
+he can turn off the 32 bit box), we can spin up a new server from
+scratch pretty easily. Part of that is making configurations stateless,
+and dynamic based on external data; having data be located in one place
+instead of duplicated across 12 config files and 3 databases… on the
+same box.&lt;/p&gt;
+&lt;p&gt;Right now, that’s looking like some custom software interfacing with
+OpenLDAP and OpenSSH via sockets (OpenLDAP being a middle-man between us
+and PAM (Linux) and NSS (libc)). However, the OpenLDAP documentation is…
+inconsistent and frustrating. I might end up hacking up the LDAP modules
+for NSS and PAM to talk to our system directly, and cut OpenLDAP out of
+the picture. We’ll see!&lt;/p&gt;
+&lt;p&gt;PS: Pablo says that tomorrow we should be getting out-of-band access
+to the drive of the server that is down, so that we can finally restore
+those services on a different server.&lt;/p&gt;
+&lt;h2 id="project-leaguer"&gt;&lt;a
+href="https://lukeshu.com/git/mirror/leaguer.git/"&gt;Project
+Leaguer&lt;/a&gt;&lt;/h2&gt;
+&lt;p&gt;Last year, some friends and I began writing some “eSports tournament
+management software”, primarily targeting League of Legends (though it
+has a module system that will allow it to support tons of different data
+sources). We mostly got it done last semester, but it had some rough
+spots and sharp edges we need to work out. Because we were all out of
+communication for the summer, we didn’t work on it very much (but we did
+a little!). It’s weird that I care about this, because I’m not a gamer.
+Huh, I guess coding with friends is just fun.&lt;/p&gt;
+&lt;p&gt;Anyway, this year, &lt;a
+href="https://github.com/AndrewMurrell"&gt;Andrew&lt;/a&gt;, &lt;a
+href="https://github.com/DavisLWebb"&gt;Davis&lt;/a&gt;, and I are planning to
+get it to a polished state by the end of the semester. We could probably
+do it faster, but we’d all also like to focus on classes and other
+projects a little more.&lt;/p&gt;
+&lt;h2 id="c1"&gt;C+=1&lt;/h2&gt;
+&lt;p&gt;People tend to lump C and C++ together, which upsets me, because I
+love C, but have a dislike for C++. That’s not to say that C++ is
+entirely bad; it has some good features. My “favorite” code is actually
+code that is basically C, but takes advantage of a couple C++ features,
+while still being idiomatic C, not C++.&lt;/p&gt;
+&lt;p&gt;Anyway, with the perspective of history (what worked and what
+didn’t), and a slightly opinionated view on language design (I’m pretty
+much a Rob Pike fan-boy), I thought I’d try to tackle “object-oriented
+C” with roughly the same design criteria as Stroustrup had when
+designing C++. I’m calling mine C+=1, for obvious reasons.&lt;/p&gt;
+&lt;p&gt;I haven’t published anything yet, because calling it “working” would
+be stretching the truth. But I am using it for my assignments in CS 334
+(Intro to Graphics), so it should move along fairly quickly, as my grade
+depends on it.&lt;/p&gt;
+&lt;p&gt;I’m not taking it too seriously; I don’t expect it to be much more
+than a toy language, but it is an excuse to dive into the GCC
+internals.&lt;/p&gt;
+&lt;h2 id="projects-that-ive-put-on-the-back-burner"&gt;Projects that I’ve put
+on the back-burner&lt;/h2&gt;
+&lt;p&gt;I’ve got several other projects that I’m putting on hold for a
+while.&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;maven-dist&lt;/code&gt; (was hosted with Parabola, apparently I
+haven’t pushed it anywhere except the server that is down): A tool to
+build Apache Maven from source. That sounds easy, it’s open source,
+right? Well, except that Maven is the build system from hell. It doesn’t
+support cyclic dependencies, yet uses them internally to build itself.
+It &lt;em&gt;loves&lt;/em&gt; to just get binaries from Maven Central to “optimize”
+the build process. It depends on code that depends on compiler bugs that
+no longer exist (which I guess means that &lt;em&gt;no one&lt;/em&gt; has tried to
+build it from source after it was originally published). I’ve been
+working on-and-off on this for more than a year. My favorite part of it
+was writing a &lt;a href="/dump/jflex2jlex.sed.txt"&gt;sed script&lt;/a&gt; that
+translates a JFlex grammar specification into a JLex grammar, which is
+used to bootstrap JFlex; its both gross and delightful at the same
+time.&lt;/li&gt;
+&lt;li&gt;Integration between &lt;code&gt;dbscripts&lt;/code&gt; and
+&lt;code&gt;abslibre&lt;/code&gt;. If you search IRC logs, mailing lists, and
+ParabolaWiki, you can find numerous rants by me against &lt;a
+href="/git/mirror/parabola/dbscripts.git/tree/db-sync"&gt;&lt;code&gt;dbscripts:db-sync&lt;/code&gt;&lt;/a&gt;.
+I just hate the data-flow, it is almost designed to make things get out
+of sync, and broken. I mean, does &lt;a
+href="/dump/parabola-data-flow.svg"&gt;this&lt;/a&gt; look like a simple diagram?
+For contrast, &lt;a href="/dump/parabola-data-flow-xbs.svg"&gt;here’s&lt;/a&gt; a
+rough (slightly incomplete) diagram of what I want to replace it
+with.&lt;/li&gt;
+&lt;li&gt;Git backend for MediaWiki (or, pulling out the rendering module of
+MediaWiki). I’ve made decent progress on that front, but there is
+&lt;em&gt;crazy&lt;/em&gt; de-normalization going on in the MediaWiki schema that
+makes this very difficult. I’m sure some of it is for historical
+reasons, and some of it for performance, but either way it is a mess for
+someone trying to neatly gut that part of the codebase.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h2 id="other"&gt;Other&lt;/h2&gt;
+&lt;p&gt;I should consider doing a write-up of deterministic-&lt;code&gt;tar&lt;/code&gt;
+behavior (something that I’ve been implementing in Parabola for a while,
+meanwhile the Debian people have also been working on it).&lt;/p&gt;
+&lt;p&gt;I should also consider doing a “post-mortem” of &lt;a
+href="https://lukeshu.com/git/mirror/parabola/packages/pbs-tools.git/"&gt;PBS&lt;/a&gt;,
+which never actually got used, but launched XBS (part of the
+&lt;code&gt;dbscripts&lt;/code&gt;/&lt;code&gt;abslibre&lt;/code&gt; integration mentioned
+above), as well as serving as a good test-bed for features that did get
+implemented.&lt;/p&gt;
+&lt;p&gt;I over-use the word “anyway.”&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2014 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./rails-improvements.html"/>
+ <link rel="alternate" type="text/markdown" href="./rails-improvements.md"/>
+ <id>https://lukeshu.com/blog/rails-improvements.html</id>
+ <updated>2014-05-08T00:00:00+00:00</updated>
+ <published>2014-05-08T00:00:00+00:00</published>
+ <title>Miscellaneous ways to improve your Rails experience</title>
+ <content type="html">&lt;h1
+id="miscellaneous-ways-to-improve-your-rails-experience"&gt;Miscellaneous
+ways to improve your Rails experience&lt;/h1&gt;
+&lt;p&gt;Recently, I’ve been working on &lt;a
+href="https://github.com/LukeShu/leaguer"&gt;a Rails web application&lt;/a&gt;,
+that’s really the baby of a friend of mine. Anyway, through its
+development, I’ve come up with a couple things that should make your
+interactions with Rails more pleasant.&lt;/p&gt;
+&lt;h2
+id="auto-reload-classes-from-other-directories-than-app"&gt;Auto-(re)load
+classes from other directories than &lt;code&gt;app/&lt;/code&gt;&lt;/h2&gt;
+&lt;p&gt;The development server automatically loads and reloads files from the
+&lt;code&gt;app/&lt;/code&gt; directory, which is extremely nice. However, most web
+applications are going to involve modules that aren’t in that directory;
+and editing those files requires re-starting the server for the changes
+to take effect.&lt;/p&gt;
+&lt;p&gt;Adding the following lines to your &lt;a
+href="https://github.com/LukeShu/leaguer/blob/c846cd71411ec3373a5229cacafe0df6b3673543/config/application.rb#L15"&gt;&lt;code&gt;config/application.rb&lt;/code&gt;&lt;/a&gt;
+will allow it to automatically load and reload files from the
+&lt;code&gt;lib/&lt;/code&gt; directory. You can of course change this to whichever
+directory/ies you like.&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;module YourApp
+ class Application &amp;lt; Rails::Application
+ …
+ config.autoload_paths += [&amp;quot;#{Rails.root}/lib&amp;quot;]
+ config.watchable_dirs[&amp;quot;#{Rails.root}/lib&amp;quot;] = [:rb]
+ …
+ end
+end&lt;/code&gt;&lt;/pre&gt;
+&lt;h2 id="have-submit_tag-generate-a-button-instead-of-an-input"&gt;Have
+&lt;code&gt;submit_tag&lt;/code&gt; generate a button instead of an input&lt;/h2&gt;
+&lt;p&gt;In HTML, the &lt;code&gt;&amp;lt;input type="submit"&amp;gt;&lt;/code&gt; tag styles
+slightly differently than other inputs or buttons. It is impossible to
+precisely controll the hight via CSS, which makes designing forms a
+pain. This is particularly noticable if you use Bootstrap 3, and put it
+next to another button; the submit button will be slightly shorter
+vertically.&lt;/p&gt;
+&lt;p&gt;The obvious fix here is to use
+&lt;code&gt;&amp;lt;button type="submit"&amp;gt;&lt;/code&gt; instead. The following code
+will modify the default Rails form helpers to generate a button tag
+instead of an input tag. Just stick the code in &lt;a
+href="https://github.com/LukeShu/leaguer/blob/521eae01be1ca3f69b47b3170a0548c3268f4a22/config/initializers/form_improvements.rb"&gt;&lt;code&gt;config/initializers/form_improvements.rb&lt;/code&gt;&lt;/a&gt;;
+it will override
+&lt;code&gt;ActionView::Hlepers::FormTagHelper#submit_tag&lt;/code&gt;. It is mostly
+the standard definition of the function, except for the last line, which
+has changed.&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;# -*- ruby-indent-level: 2; indent-tabs-mode: nil -*-
+module ActionView
+ module Helpers
+ module FormTagHelper
+
+ # This is modified from actionpack-4.0.2/lib/action_view/helpers/form_tag_helper.rb#submit_tag
+ def submit_tag(value = &amp;quot;Save changes&amp;quot;, options = {})
+ options = options.stringify_keys
+
+ if disable_with = options.delete(&amp;quot;disable_with&amp;quot;)
+ message = &amp;quot;:disable_with option is deprecated and will be removed from Rails 4.1. &amp;quot; \
+ &amp;quot;Use &amp;#39;data: { disable_with: \&amp;#39;Text\&amp;#39; }&amp;#39; instead.&amp;quot;
+ ActiveSupport::Deprecation.warn message
+
+ options[&amp;quot;data-disable-with&amp;quot;] = disable_with
+ end
+
+ if confirm = options.delete(&amp;quot;confirm&amp;quot;)
+ message = &amp;quot;:confirm option is deprecated and will be removed from Rails 4.1. &amp;quot; \
+ &amp;quot;Use &amp;#39;data: { confirm: \&amp;#39;Text\&amp;#39; }&amp;#39; instead&amp;#39;.&amp;quot;
+ ActiveSupport::Deprecation.warn message
+
+ options[&amp;quot;data-confirm&amp;quot;] = confirm
+ end
+
+ content_tag(:button, value, { &amp;quot;type&amp;quot; =&amp;gt; &amp;quot;submit&amp;quot;, &amp;quot;name&amp;quot; =&amp;gt; &amp;quot;commit&amp;quot;, &amp;quot;value&amp;quot; =&amp;gt; value }.update(options))
+ end
+
+ end
+ end
+end&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;I’ll probably update this page as I tweak other things I don’t
+like.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2014 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./bash-redirection.html"/>
+ <link rel="alternate" type="text/markdown" href="./bash-redirection.md"/>
+ <id>https://lukeshu.com/blog/bash-redirection.html</id>
+ <updated>2014-02-13T00:00:00+00:00</updated>
+ <published>2014-02-13T00:00:00+00:00</published>
+ <title>Bash redirection</title>
+ <content type="html">&lt;h1 id="bash-redirection"&gt;Bash redirection&lt;/h1&gt;
+&lt;p&gt;Apparently, too many people don’t understand Bash redirection. They
+might get the basic syntax, but they think of the process as
+declarative; in Bourne-ish shells, it is procedural.&lt;/p&gt;
+&lt;p&gt;In Bash, streams are handled in terms of “file descriptors” of “FDs”.
+FD 0 is stdin, FD 1 is stdout, and FD 2 is stderr. The equivalence (or
+lack thereof) between using a numeric file descriptor, and using the
+associated file in &lt;code&gt;/dev/*&lt;/code&gt; and &lt;code&gt;/proc/*&lt;/code&gt; is
+interesting, but beyond the scope of this article.&lt;/p&gt;
+&lt;h2 id="step-1-pipes"&gt;Step 1: Pipes&lt;/h2&gt;
+&lt;p&gt;To quote the Bash manual:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;A &amp;#39;pipeline&amp;#39; is a sequence of simple commands separated by one of the
+control operators &amp;#39;|&amp;#39; or &amp;#39;|&amp;amp;&amp;#39;.
+
+ The format for a pipeline is
+ [time [-p]] [!] COMMAND1 [ [| or |&amp;amp;] COMMAND2 ...]&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Now, &lt;code&gt;|&amp;amp;&lt;/code&gt; is just shorthand for
+&lt;code&gt;2&amp;gt;&amp;amp;1 |&lt;/code&gt;, the pipe part happens here, but the
+&lt;code&gt;2&amp;gt;&amp;amp;1&lt;/code&gt; part doesn’t happen until step 2.&lt;/p&gt;
+&lt;p&gt;First, if the command is part of a pipeline, the pipes are set up.
+For every instance of the &lt;code&gt;|&lt;/code&gt; metacharacter, Bash creates a
+pipe (&lt;code&gt;pipe(3)&lt;/code&gt;), and duplicates (&lt;code&gt;dup2(3)&lt;/code&gt;) the
+write end of the pipe to FD 1 of the process on the left side of the
+&lt;code&gt;|&lt;/code&gt;, and duplicate the read end of the pipe to FD 0 of the
+process on the right side.&lt;/p&gt;
+&lt;h2 id="step-2-redirections"&gt;Step 2: Redirections&lt;/h2&gt;
+&lt;p&gt;&lt;em&gt;After&lt;/em&gt; the initial FD 0 and FD 1 fiddling by pipes is done,
+Bash looks at the redirections. &lt;strong&gt;This means that redirections can
+override pipes.&lt;/strong&gt;&lt;/p&gt;
+&lt;p&gt;Redirections are read left-to-right, and are executed as they are
+read, using &lt;code&gt;dup2(right-side, left-side)&lt;/code&gt;. This is where most
+of the confusion comes from, people think of them as declarative, which
+leads to them doing the first of these, when they mean to do the
+second:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;cmd 2&amp;gt;&amp;amp;1 &amp;gt;file # stdout goes to file, stderr goes to stdout
+cmd &amp;gt;file 2&amp;gt;&amp;amp;1 # both stdout and stderr go to file&lt;/code&gt;&lt;/pre&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2014 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./java-segfault.html"/>
+ <link rel="alternate" type="text/markdown" href="./java-segfault.md"/>
+ <id>https://lukeshu.com/blog/java-segfault.html</id>
+ <updated>2014-01-13T00:00:00+00:00</updated>
+ <published>2014-01-13T00:00:00+00:00</published>
+ <title>My favorite bug: segfaults in Java</title>
+ <content type="html">&lt;h1 id="my-favorite-bug-segfaults-in-java"&gt;My favorite bug: segfaults in
+Java&lt;/h1&gt;
+&lt;blockquote&gt;
+&lt;p&gt;Update: Two years later, I wrote a more detailed version of this
+article: &lt;a href="./java-segfault-redux.html"&gt;My favorite bug: segfaults
+in Java (redux)&lt;/a&gt;.&lt;/p&gt;
+&lt;/blockquote&gt;
+&lt;p&gt;I’ve told this story orally a number of times, but realized that I
+have never written it down. This is my favorite bug story; it might not
+be my hardest bug, but it is the one I most like to tell.&lt;/p&gt;
+&lt;h2 id="the-context"&gt;The context&lt;/h2&gt;
+&lt;p&gt;In 2012, I was a Senior programmer on the FIRST Robotics Competition
+team 1024. For the unfamiliar, the relevant part of the setup is that
+there are 2 minute and 15 second matches in which you have a 120 pound
+robot that sometimes runs autonomously, and sometimes is controlled over
+WiFi from a person at a laptop running stock “driver station” software
+and modifiable “dashboard” software.&lt;/p&gt;
+&lt;p&gt;That year, we mostly used the dashboard software to allow the human
+driver and operator to monitor sensors on the robot, one of them being a
+video feed from a web-cam mounted on it. This was really easy because
+the new standard dashboard program had a click-and drag interface to add
+stock widgets; you just had to make sure the code on the robot was
+actually sending the data.&lt;/p&gt;
+&lt;p&gt;That’s great, until when debugging things, the dashboard would
+suddenly vanish. If it was run manually from a terminal (instead of
+letting the driver station software launch it), you would see a core
+dump indicating a segmentation fault.&lt;/p&gt;
+&lt;p&gt;This wasn’t just us either; I spoke with people on other teams,
+everyone who was streaming video had this issue. But, because it only
+happened every couple of minutes, and a match is only 2:15, it didn’t
+need to run very long, they just crossed their fingers and hoped it
+didn’t happen during a match.&lt;/p&gt;
+&lt;p&gt;The dashboard was written in Java, and the source was available
+(under a 3-clause BSD license), so I dove in, hunting for the bug. Now,
+the program did use Java Native Interface to talk to OpenCV, which the
+video ran through; so I figured that it must be a bug in the C/C++ code
+that was being called. It was especially a pain to track down the
+pointers that were causing the issue, because it was hard with native
+debuggers to see through all of the JVM stuff to the OpenCV code, and
+the OpenCV stuff is opaque to Java debuggers.&lt;/p&gt;
+&lt;p&gt;Eventually the issue lead me back into the Java code—there was a
+native pointer being stored in a Java variable; Java code called the
+native routine to &lt;code&gt;free()&lt;/code&gt; the structure, but then tried to
+feed it to another routine later. This lead to difficulty again—tracking
+objects with Java debuggers was hard because they don’t expect the
+program to suddenly segfault; it’s Java code, Java doesn’t segfault, it
+throws exceptions!&lt;/p&gt;
+&lt;p&gt;With the help of &lt;code&gt;println()&lt;/code&gt; I was eventually able to see
+that some code was executing in an order that straight didn’t make
+sense.&lt;/p&gt;
+&lt;h2 id="the-bug"&gt;The bug&lt;/h2&gt;
+&lt;p&gt;The issue was that Java was making an unsafe optimization (I never
+bothered to figure out if it is the compiler or the JVM making the
+mistake, I was satisfied once I had a work-around).&lt;/p&gt;
+&lt;p&gt;Java was doing something similar to tail-call optimization with
+regard to garbage collection. You see, if it is waiting for the return
+value of a method &lt;code&gt;m()&lt;/code&gt; of object &lt;code&gt;o&lt;/code&gt;, and code in
+&lt;code&gt;m()&lt;/code&gt; that is yet to be executed doesn’t access any other
+methods or properties of &lt;code&gt;o&lt;/code&gt;, then it will go ahead and
+consider &lt;code&gt;o&lt;/code&gt; eligible for garbage collection before
+&lt;code&gt;m()&lt;/code&gt; has finished running.&lt;/p&gt;
+&lt;p&gt;That is normally a safe optimization to make… except for when a
+destructor method (&lt;code&gt;finalize()&lt;/code&gt;) is defined for the object;
+the destructor can have side effects, and Java has no way to know
+whether it is safe for them to happen before &lt;code&gt;m()&lt;/code&gt; has
+finished running.&lt;/p&gt;
+&lt;h2 id="the-work-around"&gt;The work-around&lt;/h2&gt;
+&lt;p&gt;The routine that the segmentation fault was occurring in was
+something like:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;public type1 getFrame() {
+ type2 child = this.getChild();
+ type3 var = this.something();
+ // `this` may now be garbage collected
+ return child.somethingElse(var); // segfault comes here
+}&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Where the destructor method of &lt;code&gt;this&lt;/code&gt; calls a method that
+will &lt;code&gt;free()&lt;/code&gt; native memory that is also accessed by
+&lt;code&gt;child&lt;/code&gt;; if &lt;code&gt;this&lt;/code&gt; is garbage collected before
+&lt;code&gt;child.somethingElse()&lt;/code&gt; runs, the backing native code will
+try to access memory that has been &lt;code&gt;free()&lt;/code&gt;ed, and receive a
+segmentation fault. That usually didn’t happen, as the routines were
+pretty fast. However, running 30 times a second, eventually bad luck
+with the garbage collector happens, and the program crashes.&lt;/p&gt;
+&lt;p&gt;The work-around was to insert a bogus call to this to keep
+&lt;code&gt;this&lt;/code&gt; around until after we were also done with
+&lt;code&gt;child&lt;/code&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;public type1 getFrame() {
+ type2 child = this.getChild();
+ type3 var = this.something();
+ type1 ret = child.somethingElse(var);
+ this.getSize(); // bogus call to keep `this` around
+ return ret;
+}&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Yeah. After spending weeks wading through though thousands of lines
+of Java, C, and C++, a bogus call to a method I didn’t care about was
+the fix.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2014 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./bash-arrays.html"/>
+ <link rel="alternate" type="text/markdown" href="./bash-arrays.md"/>
+ <id>https://lukeshu.com/blog/bash-arrays.html</id>
+ <updated>2013-10-13T00:00:00+00:00</updated>
+ <published>2013-10-13T00:00:00+00:00</published>
+ <title>Bash arrays</title>
+ <content type="html">&lt;h1 id="bash-arrays"&gt;Bash arrays&lt;/h1&gt;
+&lt;p&gt;Way too many people don’t understand Bash arrays. Many of them argue
+that if you need arrays, you shouldn’t be using Bash. If we reject the
+notion that one should never use Bash for scripting, then thinking you
+don’t need Bash arrays is what I like to call “wrong”. I don’t even mean
+real scripting; even these little stubs in &lt;code&gt;/usr/bin&lt;/code&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;#!/bin/sh
+java -jar /…/something.jar $* # WRONG!&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Command line arguments are exposed as an array, that little
+&lt;code&gt;$*&lt;/code&gt; is accessing it, and is doing the wrong thing (for the
+lazy, the correct thing is &lt;code&gt;-- "$@"&lt;/code&gt;). Arrays in Bash offer a
+safe way preserve field separation.&lt;/p&gt;
+&lt;p&gt;One of the main sources of bugs (and security holes) in shell scripts
+is field separation. That’s what arrays are about.&lt;/p&gt;
+&lt;h2 id="what-field-separation"&gt;What? Field separation?&lt;/h2&gt;
+&lt;p&gt;Field separation is just splitting a larger unit into a list of
+“fields”. The most common case is when Bash splits a “simple command”
+(in the Bash manual’s terminology) into a list of arguments.
+Understanding how this works is an important prerequisite to
+understanding arrays, and even why they are important.&lt;/p&gt;
+&lt;p&gt;Dealing with lists is something that is very common in Bash scripts;
+from dealing with lists of arguments, to lists of files; they pop up a
+lot, and each time, you need to think about how the list is separated.
+In the case of &lt;code&gt;$PATH&lt;/code&gt;, the list is separated by colons. In
+the case of &lt;code&gt;$CFLAGS&lt;/code&gt;, the list is separated by whitespace.
+In the case of actual arrays, it’s easy, there’s no special character to
+worry about, just quote it, and you’re good to go.&lt;/p&gt;
+&lt;h2 id="bash-word-splitting"&gt;Bash word splitting&lt;/h2&gt;
+&lt;p&gt;When Bash reads a “simple command”, it splits the whole thing into a
+list of “words”. “The first word specifies the command to be executed,
+and is passed as argument zero. The remaining words are passed as
+arguments to the invoked command.” (to quote &lt;code&gt;bash(1)&lt;/code&gt;)&lt;/p&gt;
+&lt;p&gt;It is often hard for those unfamiliar with Bash to understand when
+something is multiple words, and when it is a single word that just
+contains a space or newline. To help gain an intuitive understanding, I
+recommend using the following command to print a bullet list of words,
+to see how Bash splits them up:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;printf ' -&gt; %s\n' &lt;var&gt;words…&lt;/var&gt;&lt;hr&gt; -&amp;gt; word one
+ -&amp;gt; multiline
+word
+ -&amp;gt; third word
+&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;In a simple command, in absence of quoting, Bash separates the “raw”
+input into words by splitting on spaces and tabs. In other places, such
+as when expanding a variable, it uses the same process, but splits on
+the characters in the &lt;code&gt;$IFS&lt;/code&gt; variable (which has the default
+value of space/tab/newline). This process is, creatively enough, called
+“word splitting”.&lt;/p&gt;
+&lt;p&gt;In most discussions of Bash arrays, one of the frequent criticisms is
+all the footnotes and “gotchas” about when to quote things. That’s
+because they usually don’t set the context of word splitting.
+&lt;strong&gt;Double quotes (&lt;code&gt;"&lt;/code&gt;) inhibit Bash from doing word
+splitting.&lt;/strong&gt; That’s it, that’s all they do. Arrays are already
+split into words; without wrapping them in double quotes Bash re-word
+splits them, which is almost &lt;em&gt;never&lt;/em&gt; what you want; otherwise,
+you wouldn’t be working with an array.&lt;/p&gt;
+&lt;h2 id="normal-array-syntax"&gt;Normal array syntax&lt;/h2&gt;
+&lt;table&gt;
+ &lt;caption&gt;
+ &lt;h1&gt;Setting an array&lt;/h1&gt;
+ &lt;p&gt;&lt;var&gt;words…&lt;/var&gt; is expanded and subject to word splitting
+ based on &lt;code&gt;$IFS&lt;/code&gt;.&lt;/p&gt;
+ &lt;/caption&gt;
+ &lt;tbody&gt;
+ &lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;array=(&lt;var&gt;words…&lt;/var&gt;)&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;Set the contents of the entire array.&lt;/td&gt;
+ &lt;/tr&gt;&lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;array+=(&lt;var&gt;words…&lt;/var&gt;)&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;Appends &lt;var&gt;words…&lt;/var&gt; to the end of the array.&lt;/td&gt;
+ &lt;/tr&gt;&lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;array[&lt;var&gt;n&lt;/var&gt;]=&lt;var&gt;word&lt;/var&gt;&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;Sets an individual entry in the array, the first entry is at
+ &lt;var&gt;n&lt;/var&gt;=0.&lt;/td&gt;
+ &lt;/tr&gt;
+ &lt;/tbody&gt;
+&lt;/table&gt;
+&lt;p&gt;Now, for accessing the array. The most important things to
+understanding arrays is to quote them, and understanding the difference
+between &lt;code&gt;@&lt;/code&gt; and &lt;code&gt;*&lt;/code&gt;.&lt;/p&gt;
+&lt;table&gt;
+ &lt;caption&gt;
+ &lt;h1&gt;Getting an entire array&lt;/h1&gt;
+ &lt;p&gt;Unless these are wrapped in double quotes, they are subject to
+ word splitting, which defeats the purpose of arrays.&lt;/p&gt;
+ &lt;p&gt;I guess it's worth mentioning that if you don't quote them, and
+ word splitting is applied, &lt;code&gt;@&lt;/code&gt; and &lt;code&gt;*&lt;/code&gt;
+ end up being equivalent.&lt;/p&gt;
+ &lt;p&gt;With &lt;code&gt;*&lt;/code&gt;, when joining the elements into a single
+ string, the elements are separated by the first character in
+ &lt;code&gt;$IFS&lt;/code&gt;, which is, by default, a space.&lt;/p&gt;
+ &lt;/caption&gt;
+ &lt;tbody&gt;
+ &lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;"${array[@]}"&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;Evaluates to every element of the array, as a separate
+ words.&lt;/td&gt;
+ &lt;/tr&gt;&lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;"${array[*]}"&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;Evaluates to every element of the array, as a single
+ word.&lt;/td&gt;
+ &lt;/tr&gt;
+ &lt;/tbody&gt;
+&lt;/table&gt;
+&lt;p&gt;It’s really that simple—that covers most usages of arrays, and most
+of the mistakes made with them.&lt;/p&gt;
+&lt;p&gt;To help you understand the difference between &lt;code&gt;@&lt;/code&gt; and
+&lt;code&gt;*&lt;/code&gt;, here is a sample of each:&lt;/p&gt;
+&lt;table&gt;
+ &lt;tbody&gt;
+ &lt;tr&gt;&lt;th&gt;&lt;code&gt;@&lt;/code&gt;&lt;/th&gt;&lt;th&gt;&lt;code&gt;*&lt;/code&gt;&lt;/th&gt;&lt;/tr&gt;
+ &lt;tr&gt;
+ &lt;td&gt;Input:&lt;pre&gt;&lt;code&gt;#!/bin/bash
+array=(foo bar baz)
+for item in "${array[@]}"; do
+ echo " - &amp;lt;${item}&amp;gt;"
+done&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
+ &lt;td&gt;Input:&lt;pre&gt;&lt;code&gt;#!/bin/bash
+array=(foo bar baz)
+for item in "${array[*]}"; do
+ echo " - &amp;lt;${item}&amp;gt;"
+done&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
+ &lt;/tr&gt;
+ &lt;tr&gt;
+ &lt;td&gt;Output:&lt;pre&gt;&lt;code&gt; - &amp;lt;foo&amp;gt;
+ - &amp;lt;bar&amp;gt;
+ - &amp;lt;baz&amp;gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
+ &lt;td&gt;Output:&lt;pre&gt;&lt;code&gt; - &amp;lt;foo bar baz&amp;gt;&lt;br&gt;&lt;br&gt;&lt;br&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
+ &lt;/tr&gt;
+ &lt;/tbody&gt;
+&lt;/table&gt;
+&lt;p&gt;In most cases, &lt;code&gt;@&lt;/code&gt; is what you want, but &lt;code&gt;*&lt;/code&gt;
+comes up often enough too.&lt;/p&gt;
+&lt;p&gt;To get individual entries, the syntax is
+&lt;code&gt;${array[&lt;var&gt;n&lt;/var&gt;]}&lt;/code&gt;, where &lt;var&gt;n&lt;/var&gt; starts at 0.&lt;/p&gt;
+&lt;table&gt;
+ &lt;caption&gt;
+ &lt;h1&gt;Getting a single entry from an array&lt;/h1&gt;
+ &lt;p&gt;Also subject to word splitting if you don't wrap it in
+ quotes.&lt;/p&gt;
+ &lt;/caption&gt;
+ &lt;tbody&gt;
+ &lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;"${array[&lt;var&gt;n&lt;/var&gt;]}"&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;Evaluates to the &lt;var&gt;n&lt;/var&gt;&lt;sup&gt;th&lt;/sup&gt; entry of the
+ array, where the first entry is at &lt;var&gt;n&lt;/var&gt;=0.&lt;/td&gt;
+ &lt;/tr&gt;
+ &lt;/tbody&gt;
+&lt;/table&gt;
+&lt;p&gt;To get a subset of the array, there are a few options:&lt;/p&gt;
+&lt;table&gt;
+ &lt;caption&gt;
+ &lt;h1&gt;Getting subsets of an array&lt;/h1&gt;
+ &lt;p&gt;Substitute &lt;code&gt;*&lt;/code&gt; for &lt;code&gt;@&lt;/code&gt; to get the subset
+ as a &lt;code&gt;$IFS&lt;/code&gt;-separated string instead of separate
+ words, as described above.&lt;/p&gt;
+ &lt;p&gt;Again, if you don't wrap these in double quotes, they are
+ subject to word splitting, which defeats the purpose of
+ arrays.&lt;/p&gt;
+ &lt;/caption&gt;
+ &lt;tbody&gt;
+ &lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;"${array[@]:&lt;var&gt;start&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;Evaluates to the entries from &lt;var&gt;n&lt;/var&gt;=&lt;var&gt;start&lt;/var&gt; to the end
+ of the array.&lt;/td&gt;
+ &lt;/tr&gt;&lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;"${array[@]:&lt;var&gt;start&lt;/var&gt;:&lt;var&gt;count&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;Evaluates to &lt;var&gt;count&lt;/var&gt; entries, starting at
+ &lt;var&gt;n&lt;/var&gt;=&lt;var&gt;start&lt;/var&gt;.&lt;/td&gt;
+ &lt;/tr&gt;&lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;"${array[@]::&lt;var&gt;count&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;Evaluates to &lt;var&gt;count&lt;/var&gt; entries from the beginning of
+ the array.&lt;/td&gt;
+ &lt;/tr&gt;
+ &lt;/tbody&gt;
+&lt;/table&gt;
+&lt;p&gt;Notice that &lt;code&gt;"${array[@]}"&lt;/code&gt; is equivalent to
+&lt;code&gt;"${array[@]:0}"&lt;/code&gt;.&lt;/p&gt;
+&lt;table&gt;
+ &lt;caption&gt;
+ &lt;h1&gt;Getting the length of an array&lt;/h1&gt;
+ &lt;p&gt;The is the only situation with arrays where quoting doesn't
+ make a difference.&lt;/p&gt;
+ &lt;p&gt;True to my earlier statement, when unquoted, there is no
+ difference between &lt;code&gt;@&lt;/code&gt; and &lt;code&gt;*&lt;/code&gt;.&lt;/p&gt;
+ &lt;/caption&gt;
+ &lt;tbody&gt;
+ &lt;tr&gt;
+ &lt;td&gt;
+ &lt;code&gt;${#array[@]}&lt;/code&gt;
+ &lt;br&gt;or&lt;br&gt;
+ &lt;code&gt;${#array[*]}&lt;/code&gt;
+ &lt;/td&gt;
+ &lt;td&gt;
+ Evaluates to the length of the array
+ &lt;/td&gt;
+ &lt;/tr&gt;
+ &lt;/tbody&gt;
+&lt;/table&gt;
+&lt;h2 id="argument-array-syntax"&gt;Argument array syntax&lt;/h2&gt;
+&lt;p&gt;Accessing the arguments is mostly that simple, but that array doesn’t
+actually have a variable name. It’s special. Instead, it is exposed
+through a series of special variables (normal variables can only start
+with letters and underscore), that &lt;em&gt;mostly&lt;/em&gt; match up with the
+normal array syntax.&lt;/p&gt;
+&lt;p&gt;Setting the arguments array, on the other hand, is pretty different.
+That’s fine, because setting the arguments array is less useful
+anyway.&lt;/p&gt;
+&lt;table&gt;
+ &lt;caption&gt;
+ &lt;h1&gt;Accessing the arguments array&lt;/h1&gt;
+ &lt;aside&gt;Note that for values of &lt;var&gt;n&lt;/var&gt; with more than 1
+ digit, you need to wrap it in &lt;code&gt;{}&lt;/code&gt;.
+ Otherwise, &lt;code&gt;"$10"&lt;/code&gt; would be parsed
+ as &lt;code&gt;"${1}0"&lt;/code&gt;.&lt;/aside&gt;
+ &lt;/caption&gt;
+ &lt;tbody&gt;
+ &lt;tr&gt;&lt;th colspan=2&gt;Individual entries&lt;/th&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;${array[0]}&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;$0&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;${array[1]}&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;$1&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td colspan=2 style="text-align:center"&gt;…&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;${array[9]}&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;$9&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;${array[10]}&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;${10}&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td colspan=2 style="text-align:center"&gt;…&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;${array[&lt;var&gt;n&lt;/var&gt;]}&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;${&lt;var&gt;n&lt;/var&gt;}&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;th colspan=2&gt;Subset arrays (array)&lt;/th&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;"${array[@]}"&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;"${@:0}"&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;"${array[@]:1}"&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;"$@"&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;"${array[@]:&lt;var&gt;pos&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;"${@:&lt;var&gt;pos&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;"${array[@]:&lt;var&gt;pos&lt;/var&gt;:&lt;var&gt;len&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;"${@:&lt;var&gt;pos&lt;/var&gt;:&lt;var&gt;len&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;"${array[@]::&lt;var&gt;len&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;"${@::&lt;var&gt;len&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;th colspan=2&gt;Subset arrays (string)&lt;/th&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;"${array[*]}"&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;"${*:0}"&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;"${array[*]:1}"&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;"$*"&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;"${array[*]:&lt;var&gt;pos&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;"${*:&lt;var&gt;pos&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;"${array[*]:&lt;var&gt;pos&lt;/var&gt;:&lt;var&gt;len&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;"${*:&lt;var&gt;pos&lt;/var&gt;:&lt;var&gt;len&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;"${array[*]::&lt;var&gt;len&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;"${*::&lt;var&gt;len&lt;/var&gt;}"&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;th colspan=2&gt;Array length&lt;/th&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;${#array[@]}&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;$#&lt;/code&gt; + 1&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;th colspan=2&gt;Setting the array&lt;/th&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;array=("${array[0]}" &lt;var&gt;words…&lt;/var&gt;)&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;set -- &lt;var&gt;words…&lt;/var&gt;&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;array=("${array[0]}" "${array[@]:2}")&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;shift&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;tr&gt;&lt;td&gt;&lt;code&gt;array=("${array[0]}" "${array[@]:&lt;var&gt;n+1&lt;/var&gt;}")&lt;/code&gt;&lt;/td&gt;&lt;td&gt;&lt;code&gt;shift &lt;var&gt;n&lt;/var&gt;&lt;/code&gt;&lt;/td&gt;&lt;/tr&gt;
+ &lt;/tbody&gt;
+&lt;/table&gt;
+&lt;p&gt;Did you notice what was inconsistent? The variables &lt;code&gt;$*&lt;/code&gt;,
+&lt;code&gt;$@&lt;/code&gt;, and &lt;code&gt;$#&lt;/code&gt; behave like the &lt;var&gt;n&lt;/var&gt;=0
+entry doesn’t exist.&lt;/p&gt;
+&lt;table&gt;
+ &lt;caption&gt;
+ &lt;h1&gt;Inconsistencies&lt;/h1&gt;
+ &lt;/caption&gt;
+ &lt;tbody&gt;
+ &lt;tr&gt;
+ &lt;th colspan=3&gt;&lt;code&gt;@&lt;/code&gt; or &lt;code&gt;*&lt;/code&gt;&lt;/th&gt;
+ &lt;/tr&gt;&lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;"${array[@]}"&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;→&lt;/td&gt;
+ &lt;td&gt;&lt;code&gt;"${array[@]:0}"&lt;/code&gt;&lt;/td&gt;
+ &lt;/tr&gt;&lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;"${@}"&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;→&lt;/td&gt;
+ &lt;td&gt;&lt;code&gt;"${@:1}"&lt;/code&gt;&lt;/td&gt;
+ &lt;/tr&gt;&lt;tr&gt;
+ &lt;th colspan=3&gt;&lt;code&gt;#&lt;/code&gt;&lt;/th&gt;
+ &lt;/tr&gt;&lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;"${#array[@]}"&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;→&lt;/td&gt;
+ &lt;td&gt;length&lt;/td&gt;
+ &lt;/tr&gt;&lt;tr&gt;
+ &lt;td&gt;&lt;code&gt;"${#}"&lt;/code&gt;&lt;/td&gt;
+ &lt;td&gt;→&lt;/td&gt;
+ &lt;td&gt;length-1&lt;/td&gt;
+ &lt;/tr&gt;
+ &lt;/tbody&gt;
+&lt;/table&gt;
+&lt;p&gt;These make sense because argument 0 is the name of the script—we
+almost never want that when parsing arguments. You’d spend more code
+getting the values that it currently gives you.&lt;/p&gt;
+&lt;p&gt;Now, for an explanation of setting the arguments array. You cannot
+set argument &lt;var&gt;n&lt;/var&gt;=0. The &lt;code&gt;set&lt;/code&gt; command is used to
+manipulate the arguments passed to Bash after the fact—similarly, you
+could use &lt;code&gt;set -x&lt;/code&gt; to make Bash behave like you ran it as
+&lt;code&gt;bash -x&lt;/code&gt;; like most GNU programs, the &lt;code&gt;--&lt;/code&gt; tells
+it to not parse any of the options as flags. The &lt;code&gt;shift&lt;/code&gt;
+command shifts each entry &lt;var&gt;n&lt;/var&gt; spots to the left, using
+&lt;var&gt;n&lt;/var&gt;=1 if no value is specified; and leaving argument 0
+alone.&lt;/p&gt;
+&lt;h2 id="but-you-mentioned-gotchas-about-quoting"&gt;But you mentioned
+“gotchas” about quoting!&lt;/h2&gt;
+&lt;p&gt;But I explained that quoting simply inhibits word splitting, which
+you pretty much never want when working with arrays. If, for some odd
+reason, you do what word splitting, then that’s when you don’t quote.
+Simple, easy to understand.&lt;/p&gt;
+&lt;p&gt;I think possibly the only case where you do want word splitting with
+an array is when you didn’t want an array, but it’s what you get
+(arguments are, by necessity, an array). For example:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;# Usage: path_ls PATH1 PATH2…
+# Description:
+# Takes any number of PATH-style values; that is,
+# colon-separated lists of directories, and prints a
+# newline-separated list of executables found in them.
+# Bugs:
+# Does not correctly handle programs with a newline in the name,
+# as the output is newline-separated.
+path_ls() {
+ local IFS dirs
+ IFS=:
+ dirs=($@) # The odd-ball time that it needs to be unquoted
+ find -L &amp;quot;${dirs[@]}&amp;quot; -maxdepth 1 -type f -executable \
+ -printf &amp;#39;%f\n&amp;#39; 2&amp;gt;/dev/null | sort -u
+}&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Logically, there shouldn’t be multiple arguments, just a single
+&lt;code&gt;$PATH&lt;/code&gt; value; but, we can’t enforce that, as the array can
+have any size. So, we do the robust thing, and just act on the entire
+array, not really caring about the fact that it is an array. Alas, there
+is still a field-separation bug in the program, with the output.&lt;/p&gt;
+&lt;h2 id="i-still-dont-think-i-need-arrays-in-my-scripts"&gt;I still don’t
+think I need arrays in my scripts&lt;/h2&gt;
+&lt;p&gt;Consider the common code:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;ARGS=&amp;#39; -f -q&amp;#39;
+…
+command $ARGS # unquoted variables are a bad code-smell anyway&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Here, &lt;code&gt;$ARGS&lt;/code&gt; is field-separated by &lt;code&gt;$IFS&lt;/code&gt;,
+which we are assuming has the default value. This is fine, as long as
+&lt;code&gt;$ARGS&lt;/code&gt; is known to never need an embedded space; which you
+do as long as it isn’t based on anything outside of the program. But
+wait until you want to do this:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;ARGS=&amp;#39; -f -q&amp;#39;
+…
+if [[ -f &amp;quot;$filename&amp;quot; ]]; then
+ ARGS+=&amp;quot; -F $filename&amp;quot;
+fi
+…
+command $ARGS&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;Now you’re hosed if &lt;code&gt;$filename&lt;/code&gt; contains a space! More
+than just breaking, it could have unwanted side effects, such as when
+someone figures out how to make
+&lt;code&gt;filename='foo --dangerous-flag'&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;Compare that with the array version:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;ARGS=(-f -q)
+…
+if [[ -f &amp;quot;$filename&amp;quot; ]]; then
+ ARGS+=(-F &amp;quot;$filename&amp;quot;)
+fi
+…
+command &amp;quot;${ARGS[@]}&amp;quot;&lt;/code&gt;&lt;/pre&gt;
+&lt;h2 id="what-about-portability"&gt;What about portability?&lt;/h2&gt;
+&lt;p&gt;Except for the little stubs that call another program with
+&lt;code&gt;"$@"&lt;/code&gt; at the end, trying to write for multiple shells
+(including the ambiguous &lt;code&gt;/bin/sh&lt;/code&gt;) is not a task for mere
+mortals. If you do try that, your best bet is probably sticking to
+POSIX. Arrays are not POSIX; except for the arguments array, which is;
+though getting subset arrays from &lt;code&gt;$@&lt;/code&gt; and &lt;code&gt;$*&lt;/code&gt; is
+not (tip: use &lt;code&gt;set --&lt;/code&gt; to re-purpose the arguments
+array).&lt;/p&gt;
+&lt;p&gt;Writing for various versions of Bash, though, is pretty do-able.
+Everything here works all the way back in bash-2.0 (December 1996), with
+the following exceptions:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;The &lt;code&gt;+=&lt;/code&gt; operator wasn’t added until Bash 3.1.&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;As a work-around, use
+&lt;code&gt;array[${#array[*]}]=&lt;var&gt;word&lt;/var&gt;&lt;/code&gt; to append a single
+element.&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Accessing subset arrays of the arguments array is inconsistent if
+&lt;var&gt;pos&lt;/var&gt;=0 in &lt;code&gt;${@:&lt;var&gt;pos&lt;/var&gt;:&lt;var&gt;len&lt;/var&gt;}&lt;/code&gt;.&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;In Bash 2.x and 3.x, it works as expected, except that argument 0 is
+silently missing. For example &lt;code&gt;${@:0:3}&lt;/code&gt; gives arguments 1
+and 2; where &lt;code&gt;${@:1:3}&lt;/code&gt; gives arguments 1, 2, and 3. This
+means that if &lt;var&gt;pos&lt;/var&gt;=0, then only &lt;var&gt;len&lt;/var&gt;-1 arguments are
+given back.&lt;/li&gt;
+&lt;li&gt;In Bash 4.0, argument 0 can be accessed, but if &lt;var&gt;pos&lt;/var&gt;=0,
+then it only gives back &lt;var&gt;len&lt;/var&gt;-1 arguments. So,
+&lt;code&gt;${@:0:3}&lt;/code&gt; gives arguments 0 and 1.&lt;/li&gt;
+&lt;li&gt;In Bash 4.1 and higher, it works in the way described in the main
+part of this document.&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;Now, Bash 1.x doesn’t have arrays at all. &lt;code&gt;$@&lt;/code&gt; and
+&lt;code&gt;$*&lt;/code&gt; work, but using &lt;code&gt;:&lt;/code&gt; to select a range of
+elements from them doesn’t. Good thing most boxes have been updated
+since 1996!&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2013 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./git-go-pre-commit.html"/>
+ <link rel="alternate" type="text/markdown" href="./git-go-pre-commit.md"/>
+ <id>https://lukeshu.com/blog/git-go-pre-commit.html</id>
+ <updated>2013-10-12T00:00:00+00:00</updated>
+ <published>2013-10-12T00:00:00+00:00</published>
+ <title>A git pre-commit hook for automatically formatting Go code</title>
+ <content type="html">&lt;h1 id="a-git-pre-commit-hook-for-automatically-formatting-go-code"&gt;A
+git pre-commit hook for automatically formatting Go code&lt;/h1&gt;
+&lt;p&gt;One of the (many) wonderful things about the Go programming language
+is the &lt;code&gt;gofmt&lt;/code&gt; tool, which formats your source in a canonical
+way. I thought it would be nice to integrate this in my &lt;code&gt;git&lt;/code&gt;
+workflow by adding it in a pre-commit hook to automatically format my
+source code when I committed it.&lt;/p&gt;
+&lt;p&gt;The Go distribution contains a git pre-commit hook that checks
+whether the source code is formatted, and aborts the commit if it isn’t.
+I don’t remember if I was aware of this at the time (or if it even
+existed at the time, or if it is new), but I wanted it to go ahead and
+format the code for me.&lt;/p&gt;
+&lt;p&gt;I found a few solutions online, but they were all missing
+something—support for partial commits. I frequently use
+&lt;code&gt;git add -p&lt;/code&gt;/&lt;code&gt;git gui&lt;/code&gt; to commit a subset of the
+changes I’ve made to a file, the existing solutions would end up adding
+the entire set of changes to my commit.&lt;/p&gt;
+&lt;p&gt;I ended up writing a solution that only formats the version of the
+that is staged for commit; here’s my
+&lt;code&gt;.git/hooks/pre-commit&lt;/code&gt;:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;#!/bin/bash
+
+# This would only loop over files that are already staged for commit.
+# git diff --cached --numstat |
+# while read add del file; do
+# …
+# done
+
+shopt -s globstar
+for file in **/*.go; do
+ tmp=&amp;quot;$(mktemp &amp;quot;$file.bak.XXXXXXXXXX&amp;quot;)&amp;quot;
+ mv &amp;quot;$file&amp;quot; &amp;quot;$tmp&amp;quot;
+ git checkout &amp;quot;$file&amp;quot;
+ gofmt -w &amp;quot;$file&amp;quot;
+ git add &amp;quot;$file&amp;quot;
+ mv &amp;quot;$tmp&amp;quot; &amp;quot;$file&amp;quot;
+done&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;It’s still not perfect. It will try to operate on every
+&lt;code&gt;*.go&lt;/code&gt; file—which might do weird things if you have a file
+that hasn’t been checked in at all. This also has the effect of
+formatting files that were checked in without being formatted, but
+weren’t modified in this commit.&lt;/p&gt;
+&lt;p&gt;I don’t remember why I did that—as you can see from the comment, I
+knew how to only select files that were staged for commit. I haven’t
+worked on any projects in Go in a while—if I return to one of them, and
+remember why I did that, I will update this page.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2013 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="http://www.wtfpl.net/txt/copying/"&gt;WTFPL-2&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./fd_printf.html"/>
+ <link rel="alternate" type="text/markdown" href="./fd_printf.md"/>
+ <id>https://lukeshu.com/blog/fd_printf.html</id>
+ <updated>2013-10-12T00:00:00+00:00</updated>
+ <published>2013-10-12T00:00:00+00:00</published>
+ <title>`dprintf`: print formatted text directly to a file descriptor</title>
+ <content type="html">&lt;h1
+id="dprintf-print-formatted-text-directly-to-a-file-descriptor"&gt;&lt;code&gt;dprintf&lt;/code&gt;:
+print formatted text directly to a file descriptor&lt;/h1&gt;
+&lt;p&gt;This already existed as &lt;code&gt;dprintf(3)&lt;/code&gt;. I now feel stupid
+for having Implemented &lt;code&gt;fd_printf&lt;/code&gt;.&lt;/p&gt;
+&lt;p&gt;The original post is as follows:&lt;/p&gt;
+&lt;hr /&gt;
+&lt;p&gt;I wrote this while debugging some code, and thought it might be
+useful to others:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;#define _GNU_SOURCE /* vasprintf() */
+#include &amp;lt;stdarg.h&amp;gt; /* va_start()/va_end() */
+#include &amp;lt;stdio.h&amp;gt; /* vasprintf() */
+#include &amp;lt;stdlib.h&amp;gt; /* free() */
+#include &amp;lt;unistd.h&amp;gt; /* write() */
+
+int
+fd_printf(int fd, const char *format, ...)
+{
+ va_list arg;
+ int len;
+ char *str;
+
+ va_start(arg, format);
+ len = vasprintf(&amp;amp;str, format, arg);
+ va_end(arg);
+
+ write(fd, str, len);
+
+ free(str);
+ return len;
+}&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;It is a version of &lt;code&gt;printf&lt;/code&gt; that prints to a file
+descriptor—where &lt;code&gt;fprintf&lt;/code&gt; prints to a &lt;code&gt;FILE*&lt;/code&gt;
+data structure.&lt;/p&gt;
+&lt;p&gt;The appeal of this is that &lt;code&gt;FILE*&lt;/code&gt; I/O is buffered—which
+means mixing it with raw file descriptor I/O is going to produce weird
+results.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2013 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="http://www.wtfpl.net/txt/copying/"&gt;WTFPL-2&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./emacs-as-an-os.html"/>
+ <link rel="alternate" type="text/markdown" href="./emacs-as-an-os.md"/>
+ <id>https://lukeshu.com/blog/emacs-as-an-os.html</id>
+ <updated>2013-08-29T00:00:00+00:00</updated>
+ <published>2013-08-29T00:00:00+00:00</published>
+ <title>Emacs as an operating system</title>
+ <content type="html">&lt;h1 id="emacs-as-an-operating-system"&gt;Emacs as an operating system&lt;/h1&gt;
+&lt;p&gt;This was originally published on &lt;a
+href="https://news.ycombinator.com/item?id=6292742"&gt;Hacker News&lt;/a&gt; on
+2013-08-29.&lt;/p&gt;
+&lt;p&gt;Calling Emacs an OS is dubious, it certainly isn’t a general purpose
+OS, and won’t run on real hardware. But, let me make the case that Emacs
+is an OS.&lt;/p&gt;
+&lt;p&gt;Emacs has two parts, the C part, and the Emacs Lisp part.&lt;/p&gt;
+&lt;p&gt;The C part isn’t just a Lisp interpreter, it is a Lisp Machine
+emulator. It doesn’t particularly resemble any of the real Lisp
+machines. The TCP, Keyboard/Mouse, display support, and filesystem are
+done at the hardware level (the operations to work with these things are
+among the primitive operations provided by the hardware). Of these, the
+display being handled by the hardware isn’t particularly uncommon,
+historically; the filesystem is a little stranger.&lt;/p&gt;
+&lt;p&gt;The Lisp part of Emacs is the operating system that runs on that
+emulated hardware. It’s not a particularly powerful OS, it not a
+multitasking system. It has many packages available for it (though not
+until recently was there a official package manager). It has reasonably
+powerful IPC mechanisms. It has shells, mail clients (MUAs and MSAs),
+web browsers, web servers and more, all written entirely in Emacs
+Lisp.&lt;/p&gt;
+&lt;p&gt;You might say, “but a lot of that is being done by the host operating
+system!” Sure, some of it is, but all of it is sufficiently low level.
+If you wanted to share the filesystem with another OS running in a VM,
+you might do it by sharing it as a network filesystem; this is necessary
+when the VM OS is not designed around running in a VM. However, because
+Emacs OS will always be running in the Emacs VM, we can optimize it by
+having the Emacs VM include processor features mapping the native OS,
+and have the Emacs OS be aware of them. It would be slower and more code
+to do that all over the network.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2013 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./emacs-shells.html"/>
+ <link rel="alternate" type="text/markdown" href="./emacs-shells.md"/>
+ <id>https://lukeshu.com/blog/emacs-shells.html</id>
+ <updated>2013-04-09T00:00:00+00:00</updated>
+ <published>2013-04-09T00:00:00+00:00</published>
+ <title>A summary of Emacs' bundled shell and terminal modes</title>
+ <content type="html">&lt;h1 id="a-summary-of-emacs-bundled-shell-and-terminal-modes"&gt;A summary
+of Emacs’ bundled shell and terminal modes&lt;/h1&gt;
+&lt;p&gt;This is based on a post on &lt;a
+href="http://www.reddit.com/r/emacs/comments/1bzl8b/how_can_i_get_a_dumbersimpler_shell_in_emacs/c9blzyb"&gt;reddit&lt;/a&gt;,
+published on 2013-04-09.&lt;/p&gt;
+&lt;p&gt;Emacs comes bundled with a few different shell and terminal modes. It
+can be hard to keep them straight. What’s the difference between
+&lt;code&gt;M-x term&lt;/code&gt; and &lt;code&gt;M-x ansi-term&lt;/code&gt;?&lt;/p&gt;
+&lt;p&gt;Here’s a good breakdown of the different bundled shells and terminals
+for Emacs, from dumbest to most Emacs-y.&lt;/p&gt;
+&lt;h2 id="term-mode"&gt;term-mode&lt;/h2&gt;
+&lt;p&gt;Your VT100-esque terminal emulator; it does what most terminal
+programs do. Ncurses-things work OK, but dumping large amounts of text
+can be slow. By default it asks you which shell to run, defaulting to
+the environmental variable &lt;code&gt;$SHELL&lt;/code&gt; (&lt;code&gt;/bin/bash&lt;/code&gt;
+for me). There are two modes of operation:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;char mode: Keys are sent immediately to the shell (including keys
+that are normally Emacs keystrokes), with the following exceptions:
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;(term-escape-char) (term-escape-char)&lt;/code&gt; sends
+&lt;code&gt;(term-escape-char)&lt;/code&gt; to the shell (see above for what the
+default value is).&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;(term-escape-char) &amp;lt;anything-else&amp;gt;&lt;/code&gt; is like doing
+equates to &lt;code&gt;C-x &amp;lt;anything-else&amp;gt;&lt;/code&gt; in normal
+Emacs.&lt;/li&gt;
+&lt;li&gt;&lt;code&gt;(term-escape-char) C-j&lt;/code&gt; switches to line mode.&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;li&gt;line mode: Editing is done like in a normal Emacs buffer,
+&lt;code&gt;&amp;lt;enter&amp;gt;&lt;/code&gt; sends the current line to the shell. This is
+useful for working with a program’s output.
+&lt;ul&gt;
+&lt;li&gt;&lt;code&gt;C-c C-k&lt;/code&gt; switches to char mode.&lt;/li&gt;
+&lt;/ul&gt;&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;This mode is activated with&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;; Creates or switches to an existing &amp;quot;*terminal*&amp;quot; buffer.
+; The default &amp;#39;term-escape-char&amp;#39; is &amp;quot;C-c&amp;quot;
+M-x term&lt;/code&gt;&lt;/pre&gt;
+&lt;p&gt;or&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;; Creates a new &amp;quot;*ansi-term*&amp;quot; or &amp;quot;*ansi-term*&amp;lt;n&amp;gt;&amp;quot; buffer.
+; The default &amp;#39;term-escape-char&amp;#39; is &amp;quot;C-c&amp;quot; and &amp;quot;C-x&amp;quot;
+M-x ansi-term&lt;/code&gt;&lt;/pre&gt;
+&lt;h2 id="shell-mode"&gt;shell-mode&lt;/h2&gt;
+&lt;p&gt;The name is a misnomer; shell-mode is a terminal emulator, not a
+shell; it’s called that because it is used for running a shell (bash,
+zsh, …). The idea of this mode is to use an external shell, but make it
+Emacs-y. History is not handled by the shell, but by Emacs;
+&lt;code&gt;M-p&lt;/code&gt; and &lt;code&gt;M-n&lt;/code&gt; access the history, while
+arrows/&lt;code&gt;C-p&lt;/code&gt;/&lt;code&gt;C-n&lt;/code&gt; move the point (which is is
+consistent with other Emacs REPL-type interfaces). It ignores VT100-type
+terminal colors, and colorizes things itself (it inspects words to see
+if they are directories, in the case of &lt;code&gt;ls&lt;/code&gt;). This has the
+benefit that it does syntax highlighting on the currently being typed
+command. Ncurses programs will of course not work. This mode is
+activated with:&lt;/p&gt;
+&lt;pre&gt;&lt;code&gt;M-x shell&lt;/code&gt;&lt;/pre&gt;
+&lt;h2 id="eshell-mode"&gt;eshell-mode&lt;/h2&gt;
+&lt;p&gt;This is a shell+terminal, entirely written in Emacs lisp.
+(Interestingly, it doesn’t set &lt;code&gt;$SHELL&lt;/code&gt;, so that will be
+whatever it was when you launched Emacs). This won’t even be running zsh
+or bash, it will be running “esh”, part of Emacs.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2013 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./term-colors.html"/>
+ <link rel="alternate" type="text/markdown" href="./term-colors.md"/>
+ <id>https://lukeshu.com/blog/term-colors.html</id>
+ <updated>2013-03-21T00:00:00+00:00</updated>
+ <published>2013-03-21T00:00:00+00:00</published>
+ <title>An explanation of common terminal emulator color codes</title>
+ <content type="html">&lt;h1 id="an-explanation-of-common-terminal-emulator-color-codes"&gt;An
+explanation of common terminal emulator color codes&lt;/h1&gt;
+&lt;p&gt;This is based on a post on &lt;a
+href="http://www.reddit.com/r/commandline/comments/1aotaj/solarized_is_a_sixteen_color_palette_designed_for/c8ztxpt?context=1"&gt;reddit&lt;/a&gt;,
+published on 2013-03-21.&lt;/p&gt;
+&lt;blockquote&gt;
+&lt;p&gt;So all terminals support the same 256 colors? What about 88 color
+mode: is that a subset?&lt;/p&gt;
+&lt;/blockquote&gt;
+&lt;p&gt;TL;DR: yes&lt;/p&gt;
+&lt;p&gt;Terminal compatibility is crazy complex, because nobody actually
+reads the spec, they just write something that is compatible for their
+tests. Then things have to be compatible with that terminal’s
+quirks.&lt;/p&gt;
+&lt;p&gt;But, here’s how 8-color, 16-color, and 256 color work. IIRC, 88 color
+is a subset of the 256 color scheme, but I’m not sure.&lt;/p&gt;
+&lt;p&gt;&lt;strong&gt;8 colors: (actually 9)&lt;/strong&gt; First we had 8 colors (9 with
+“default”, which doesn’t have to be one of the 8). These are always
+roughly the same color: black, red, green, yellow/orange, blue, purple,
+cyan, and white, which are colors 0–7 respectively. Color 9 is
+default.&lt;/p&gt;
+&lt;p&gt;&lt;strong&gt;16 colors: (actually 18)&lt;/strong&gt; Later, someone wanted to
+add more colors, so they added a “bright” attribute. So when bright is
+on, you get “bright red” instead of “red”. Hence 8*2=16 (plus two more
+for “default” and “bright default”).&lt;/p&gt;
+&lt;p&gt;&lt;strong&gt;256 colors: (actually 274)&lt;/strong&gt; You may have noticed,
+colors 0–7 and 9 are used, but 8 isn’t. So, someone decided that color 8
+should put the terminal into 256 color mode. In this mode, it reads
+another byte, which is an 8-bit RGB value (2 bits for red, 2 for green,
+2 for blue). The bright property has no effect on these colors. However,
+a terminal can display 256-color-mode colors and 16-color-mode colors at
+the same time, so you actually get 256+18 colors.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2013 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./fs-licensing-explanation.html"/>
+ <link rel="alternate" type="text/markdown" href="./fs-licensing-explanation.md"/>
+ <id>https://lukeshu.com/blog/fs-licensing-explanation.html</id>
+ <updated>2013-02-21T00:00:00+00:00</updated>
+ <published>2013-02-21T00:00:00+00:00</published>
+ <title>An explanation of how "copyleft" licensing works</title>
+ <content type="html">&lt;h1 id="an-explanation-of-how-copyleft-licensing-works"&gt;An explanation
+of how “copyleft” licensing works&lt;/h1&gt;
+&lt;p&gt;This is based on a post on &lt;a
+href="http://www.reddit.com/r/freesoftware/comments/18xplw/can_software_be_free_gnu_and_still_be_owned_by_an/c8ixwq2"&gt;reddit&lt;/a&gt;,
+published on 2013-02-21.&lt;/p&gt;
+&lt;blockquote&gt;
+&lt;p&gt;While reading the man page for readline I noticed the copyright
+section said “Readline is Copyright (C) 1989-2011 Free Software
+Foundation Inc”. How can software be both licensed under GNU and
+copyrighted to a single group? It was my understanding that once code
+became free it didn’t belong to any particular group or individual.&lt;/p&gt;
+&lt;p&gt;[LiveCode is GPLv3, but also sells non-free licenses] Can you really
+have the same code under two conflicting licenses? Once licensed under
+GPL3 wouldn’t they too be required to adhere to its rules?&lt;/p&gt;
+&lt;/blockquote&gt;
+&lt;p&gt;I believe that GNU/the FSF has an FAQ that addresses this, but I
+can’t find it, so here we go.&lt;/p&gt;
+&lt;h3 id="glossary"&gt;Glossary:&lt;/h3&gt;
+&lt;ul&gt;
+&lt;li&gt;“&lt;em&gt;Copyright&lt;/em&gt;” is the right to control how copies are made of
+something.&lt;/li&gt;
+&lt;li&gt;Something for which no one holds the copyright is in the “&lt;em&gt;public
+domain&lt;/em&gt;”, because anyone (“the public”) is allowed to do
+&lt;em&gt;anything&lt;/em&gt; with it.&lt;/li&gt;
+&lt;li&gt;A “&lt;em&gt;license&lt;/em&gt;” is basically a legal document that says “I
+promise not to sue you if make copies in these specific ways.”&lt;/li&gt;
+&lt;li&gt;A “&lt;em&gt;non-free&lt;/em&gt;” license basically says “There are no
+conditions under which you can make copies that I won’t sue you.”&lt;/li&gt;
+&lt;li&gt;A “&lt;em&gt;permissive&lt;/em&gt;” (type of free) license basically says “You
+can do whatever you want, BUT have to give me credit”, and is very
+similar to the public domain. If the copyright holder didn’t have the
+copyright, they couldn’t sue you to make sure that you gave them credit,
+and nobody would have to give them credit.&lt;/li&gt;
+&lt;li&gt;A “&lt;em&gt;copyleft&lt;/em&gt;” (type of free) license basically says, “You
+can do whatever you want, BUT anyone who gets a copy from you has to be
+able to do whatever they want too.” If the copyright holder didn’t have
+the copyright, they couldn’t sue you to make sure that you gave the
+source to people go got it from you, and non-free versions of these
+programs would start to exist.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;h3 id="specific-questions"&gt;Specific questions:&lt;/h3&gt;
+&lt;p&gt;Readline: The GNU GPL is a copyleft license. If you make a modified
+version of Readline, and give it to others without letting them have the
+source code, the FSF will sue you. They can do this because they have
+the copyright on Readline, and in the GNU GPL (the license they used) it
+only says that they won’t sue you if you distribute the source with the
+modified version. If they didn’t have the copyright, they couldn’t sue
+you, and the GNU GPL would be worthless.&lt;/p&gt;
+&lt;p&gt;LiveCode: The copyright holder for something is not required to obey
+the license—the license is only a promise not to sue you; of course they
+won’t sue themselves. They can also offer different terms to different
+people. They can tell most people “I won’t sue you as long as you share
+the source,” but if someone gave them a little money, they might say, “I
+also promise not sue sue this guy, even if he doesn’t give out the
+source.”&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2013 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./pacman-overview.html"/>
+ <link rel="alternate" type="text/markdown" href="./pacman-overview.md"/>
+ <id>https://lukeshu.com/blog/pacman-overview.html</id>
+ <updated>2013-01-23T00:00:00+00:00</updated>
+ <published>2013-01-23T00:00:00+00:00</published>
+ <title>A quick overview of usage of the Pacman package manager</title>
+ <content type="html">&lt;h1 id="a-quick-overview-of-usage-of-the-pacman-package-manager"&gt;A quick
+overview of usage of the Pacman package manager&lt;/h1&gt;
+&lt;p&gt;This was originally published on &lt;a
+href="https://news.ycombinator.com/item?id=5101416"&gt;Hacker News&lt;/a&gt; on
+2013-01-23.&lt;/p&gt;
+&lt;p&gt;Note: I’ve over-done quotation marks to make it clear when precise
+wording matters.&lt;/p&gt;
+&lt;p&gt;&lt;code&gt;pacman&lt;/code&gt; is a little awkward, but I prefer it to apt/dpkg,
+which have sub-commands, each with their own flags, some of which are
+undocumented. pacman, on the other hand, has ALL options documented in
+one fairly short man page.&lt;/p&gt;
+&lt;p&gt;The trick to understanding pacman is to understand how it maintains
+databases of packages, and what it means to “sync”.&lt;/p&gt;
+&lt;p&gt;There are several “databases” that pacman deals with:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;“the database”, (&lt;code&gt;/var/lib/pacman/local/&lt;/code&gt;)&lt;br&gt; The
+database of currently installed packages&lt;/li&gt;
+&lt;li&gt;“package databases”,
+(&lt;code&gt;/var/lib/pacman/sync/${repo}.db&lt;/code&gt;)&lt;br&gt; There is one of these
+for each repository. It is a file that is fetched over plain http(s)
+from the server; it is not modified locally, only updated.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;The “operation” of pacman is set with a capital flag, one of “DQRSTU”
+(plus &lt;code&gt;-V&lt;/code&gt; and &lt;code&gt;-h&lt;/code&gt; for version and help). Of
+these, “DTU” are “low-level” (analogous to dpkg) and “QRS” are
+“high-level” (analogous to apt).&lt;/p&gt;
+&lt;p&gt;To give a brief explanation of cover the “high-level” operations, and
+which databases they deal with:&lt;/p&gt;
+&lt;ul&gt;
+&lt;li&gt;“Q” Queries “the database” of locally installed packages.&lt;/li&gt;
+&lt;li&gt;“S” deals with “package databases”, and Syncing “the database” with
+them; meaning it installs/updates packages that are in package
+databases, but not installed on the local system.&lt;/li&gt;
+&lt;li&gt;“R” Removes packages “the database”; removing them from the local
+system.&lt;/li&gt;
+&lt;/ul&gt;
+&lt;p&gt;The biggest “gotcha” is that “S” deals with all operations with
+“package databases”, not just syncing “the database” with them.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2013 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./poor-system-documentation.html"/>
+ <link rel="alternate" type="text/markdown" href="./poor-system-documentation.md"/>
+ <id>https://lukeshu.com/blog/poor-system-documentation.html</id>
+ <updated>2012-09-12T00:00:00+00:00</updated>
+ <published>2012-09-12T00:00:00+00:00</published>
+ <title>Why documentation on GNU/Linux sucks</title>
+ <content type="html">&lt;h1 id="why-documentation-on-gnulinux-sucks"&gt;Why documentation on
+GNU/Linux sucks&lt;/h1&gt;
+&lt;p&gt;This is based on a post on &lt;a
+href="http://www.reddit.com/r/archlinux/comments/zoffo/systemd_we_will_keep_making_it_the_distro_we_like/c66uu57"&gt;reddit&lt;/a&gt;,
+published on 2012-09-12.&lt;/p&gt;
+&lt;p&gt;The documentation situation on GNU/Linux based operating systems is
+right now a mess. In the world of documentation, there are basically 3
+camps, the “UNIX” camp, the “GNU” camp, and the “GNU/Linux” camp.&lt;/p&gt;
+&lt;p&gt;The UNIX camp is the &lt;code&gt;man&lt;/code&gt; page camp, they have quality,
+terse but informative man pages, on &lt;em&gt;everything&lt;/em&gt;, including the
+system’s design and all system files. If it was up to the UNIX camp,
+&lt;code&gt;man grub.cfg&lt;/code&gt;, &lt;code&gt;man grub.d&lt;/code&gt;, and
+&lt;code&gt;man grub-mkconfig_lib&lt;/code&gt; would exist and actually be helpful.
+The man page would either include inline examples, or point you to a
+directory. If I were to print off all of the man pages, it would
+actually be a useful manual for the system.&lt;/p&gt;
+&lt;p&gt;Then GNU camp is the &lt;code&gt;info&lt;/code&gt; camp. They basically thought
+that each piece of software was more complex than a man page could
+handle. They essentially think that some individual pieces software
+warrant a book. So, they developed the &lt;code&gt;info&lt;/code&gt; system. The
+info pages are usually quite high quality, but are very long, and a pain
+if you just want a quick look. The &lt;code&gt;info&lt;/code&gt; system can generate
+good HTML (and PDF, etc.) documentation. But the standard
+&lt;code&gt;info&lt;/code&gt; is awkward as hell to use for non-Emacs users.&lt;/p&gt;
+&lt;p&gt;Then we have the “GNU/Linux” camp, they use GNU software, but want to
+use &lt;code&gt;man&lt;/code&gt; pages. This means that we get low-quality man pages
+for GNU software, and then we don’t have a good baseline for
+documentation, developers each try to create their own. The
+documentation that gets written is frequently either low-quality, or
+non-standard. A lot of man pages are auto-generated from
+&lt;code&gt;--help&lt;/code&gt; output or info pages, meaning they are either not
+helpful, or overly verbose with low information density. This camp gets
+the worst of both worlds, and a few problems of its own.&lt;/p&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2012 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+ <entry xmlns="http://www.w3.org/2005/Atom">
+ <link rel="alternate" type="text/html" href="./arch-systemd.html"/>
+ <link rel="alternate" type="text/markdown" href="./arch-systemd.md"/>
+ <id>https://lukeshu.com/blog/arch-systemd.html</id>
+ <updated>2012-09-11T00:00:00+00:00</updated>
+ <published>2012-09-11T00:00:00+00:00</published>
+ <title>What Arch Linux's switch to systemd means for users</title>
+ <content type="html">&lt;h1 id="what-arch-linuxs-switch-to-systemd-means-for-users"&gt;What Arch
+Linux’s switch to systemd means for users&lt;/h1&gt;
+&lt;p&gt;This is based on a post on &lt;a
+href="http://www.reddit.com/r/archlinux/comments/zoffo/systemd_we_will_keep_making_it_the_distro_we_like/c66nrcb"&gt;reddit&lt;/a&gt;,
+published on 2012-09-11.&lt;/p&gt;
+&lt;p&gt;systemd is a replacement for UNIX System V-style init; instead of
+having &lt;code&gt;/etc/init.d/*&lt;/code&gt; or &lt;code&gt;/etc/rc.d/*&lt;/code&gt; scripts,
+systemd runs in the background to manage them.&lt;/p&gt;
+&lt;p&gt;This has the &lt;strong&gt;advantages&lt;/strong&gt; that there is proper
+dependency tracking, easing the life of the administrator and allowing
+for things to be run in parallel safely. It also uses “targets” instead
+of “init levels”, which just makes more sense. It also means that a
+target can be started or stopped on the fly, such as mounting or
+unmounting a drive, which has in the past only been done at boot up and
+shut down.&lt;/p&gt;
+&lt;p&gt;The &lt;strong&gt;downside&lt;/strong&gt; is that it is (allegedly) big,
+bloated&lt;a href="#fn1" class="footnote-ref" id="fnref1"
+role="doc-noteref"&gt;&lt;sup&gt;1&lt;/sup&gt;&lt;/a&gt;, and does (arguably) more than it
+should. Why is there a dedicated systemd-fsck? Why does systemd
+encapsulate the functionality of syslog? That, and it means somebody is
+standing on my lawn.&lt;/p&gt;
+&lt;p&gt;The &lt;strong&gt;changes&lt;/strong&gt; an Arch user needs to worry about is
+that everything is being moved out of &lt;code&gt;/etc/rc.conf&lt;/code&gt;. Arch
+users will still have the choice between systemd and SysV-init, but
+rc.conf is becoming the SysV-init configuration file, rather than the
+general system configuration file. If you will still be using SysV-init,
+basically the only thing in rc.conf will be &lt;code&gt;DAEMONS&lt;/code&gt;.&lt;a
+href="#fn2" class="footnote-ref" id="fnref2"
+role="doc-noteref"&gt;&lt;sup&gt;2&lt;/sup&gt;&lt;/a&gt; For now there is compatibility for
+the variables that used to be there, but that is going away.&lt;/p&gt;
+&lt;aside id="footnotes" class="footnotes footnotes-end-of-document"
+role="doc-endnotes"&gt;
+&lt;hr /&gt;
+&lt;ol&gt;
+&lt;li id="fn1"&gt;&lt;p&gt;&lt;em&gt;I&lt;/em&gt; don’t think it’s bloated, but that is the
+criticism. Basically, I discount any argument that uses “bloated”
+without backing it up. I was trying to say that it takes a lot of heat
+for being bloated, and that there is be some truth to that (the
+systemd-fsck and syslog comments), but that these claims are largely
+unsubstantiated, and more along the lines of “I would have done it
+differently”. Maybe your ideas are better, but you haven’t written the
+code.&lt;/p&gt;
+&lt;p&gt;I personally don’t have an opinion either way about SysV-init vs
+systemd. I recently migrated my boxes to systemd, but that was because
+the SysV init scripts for NFSv4 in Arch are problematic. I suppose this
+is another &lt;strong&gt;advantage&lt;/strong&gt; I missed: &lt;em&gt;people generally
+consider systemd “units” to be more robust and easier to write than SysV
+“scripts”.&lt;/em&gt;&lt;/p&gt;
+&lt;p&gt;I’m actually not a fan of either. If I had more time on my hands, I’d
+be running a &lt;code&gt;make&lt;/code&gt;-based init system based on a research
+project IBM did a while ago. So I consider myself fairly objective; my
+horse isn’t in this race.&lt;a href="#fnref1" class="footnote-back"
+role="doc-backlink"&gt;↩︎&lt;/a&gt;&lt;/p&gt;&lt;/li&gt;
+&lt;li id="fn2"&gt;&lt;p&gt;You can still have &lt;code&gt;USEDMRAID&lt;/code&gt;,
+&lt;code&gt;USELVM&lt;/code&gt;, &lt;code&gt;interface&lt;/code&gt;, &lt;code&gt;address&lt;/code&gt;,
+&lt;code&gt;netmask&lt;/code&gt;, and &lt;code&gt;gateway&lt;/code&gt;. But those are minor.&lt;a
+href="#fnref2" class="footnote-back" role="doc-backlink"&gt;↩︎&lt;/a&gt;&lt;/p&gt;&lt;/li&gt;
+&lt;/ol&gt;
+&lt;/aside&gt;
+</content>
+ <author><name>Luke T. Shumaker</name><uri>https://lukeshu.com/</uri><email>lukeshu@lukeshu.com</email></author>
+ <rights type="html">&lt;p&gt;The content of this page is Copyright © 2012 &lt;a href="mailto:lukeshu@lukeshu.com"&gt;Luke T. Shumaker&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;This page is licensed under the &lt;a href="https://creativecommons.org/licenses/by-sa/4.0/"&gt;CC BY-SA 4.0&lt;/a&gt; license.&lt;/p&gt;</rights>
+ </entry>
+
+</feed>
diff --git a/public/index.html b/public/index.html
new file mode 100644
index 0000000..2c91707
--- /dev/null
+++ b/public/index.html
@@ -0,0 +1,96 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>Web log entries — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » blog</header>
+<article>
+<h1 id="web-log-entries">Web log entries</h1>
+<style>
+li {
+ list-style-type: none;
+}
+time {
+ color: #AAAAAA;
+ font-family: monospace;
+}
+</style>
+<ul>
+<li><time>2023-07-10</time> - <a href="./btrfs-rec.html">Announcing:
+btrfs-rec: Recover (data from) a broken btrfs filesystem</a></li>
+<li><time>2018-02-09</time> - <a href="./posix-pricing.html">POSIX
+pricing and availability; or: Do you really need the PDF?</a></li>
+<li><time>2018-02-09</time> - <a href="./kbd-xmodmap.html">GNU/Linux
+Keyboard Maps: xmodmap</a></li>
+<li><time>2018-02-09</time> - <a href="./crt-sh-architecture.html">The
+interesting architecture of crt.sh</a></li>
+<li><time>2016-09-30</time> - <a href="./http-notes.html">Notes on
+subtleties of HTTP implementation</a></li>
+<li><time>2016-02-28</time> - <a href="./x11-systemd.html">My X11 setup
+with systemd</a></li>
+<li><time>2016-02-28</time> - <a href="./java-segfault-redux.html">My
+favorite bug: segfaults in Java (redux)</a></li>
+<li><time>2015-05-19</time> - <a href="./nginx-mediawiki.html">An Nginx
+configuration for MediaWiki</a></li>
+<li><time>2015-03-22</time> - <a href="./lp2015-videos.html">I took some
+videos at LibrePlanet</a></li>
+<li><time>2015-03-18</time> - <a href="./build-bash-1.html">Building
+Bash 1.14.7 on a modern system</a></li>
+<li><time>2015-02-06</time> - <a
+href="./purdue-cs-login.html">Customizing your login on Purdue CS
+computers (WIP, but updated)</a></li>
+<li><time>2014-11-20</time> - <a href="./make-memoize.html">A
+memoization routine for GNU Make functions</a></li>
+<li><time>2014-09-12</time> - <a href="./ryf-routers.html">I’m excited
+about the new RYF-certified routers from ThinkPenguin</a></li>
+<li><time>2014-09-11</time> - <a
+href="./what-im-working-on-fall-2014.html">What I’m working on (Fall
+2014)</a></li>
+<li><time>2014-05-08</time> - <a
+href="./rails-improvements.html">Miscellaneous ways to improve your
+Rails experience</a></li>
+<li><time>2014-02-13</time> - <a href="./bash-redirection.html">Bash
+redirection</a></li>
+<li><time>2014-01-13</time> - <a href="./java-segfault.html">My favorite
+bug: segfaults in Java</a></li>
+<li><time>2013-10-13</time> - <a href="./bash-arrays.html">Bash
+arrays</a></li>
+<li><time>2013-10-12</time> - <a href="./git-go-pre-commit.html">A git
+pre-commit hook for automatically formatting Go code</a></li>
+<li><time>2013-10-12</time> - <a
+href="./fd_printf.html"><code>dprintf</code>: print formatted text
+directly to a file descriptor</a></li>
+<li><time>2013-08-29</time> - <a href="./emacs-as-an-os.html">Emacs as
+an operating system</a></li>
+<li><time>2013-04-09</time> - <a href="./emacs-shells.html">A summary of
+Emacs’ bundled shell and terminal modes</a></li>
+<li><time>2013-03-21</time> - <a href="./term-colors.html">An
+explanation of common terminal emulator color codes</a></li>
+<li><time>2013-02-21</time> - <a
+href="./fs-licensing-explanation.html">An explanation of how “copyleft”
+licensing works</a></li>
+<li><time>2013-01-23</time> - <a href="./pacman-overview.html">A quick
+overview of usage of the Pacman package manager</a></li>
+<li><time>2012-09-12</time> - <a
+href="./poor-system-documentation.html">Why documentation on GNU/Linux
+sucks</a></li>
+<li><time>2012-09-11</time> - <a href="./arch-systemd.html">What Arch
+Linux’s switch to systemd means for users</a></li>
+</ul>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/index.md b/public/index.md
new file mode 100644
index 0000000..fda3bcc
--- /dev/null
+++ b/public/index.md
@@ -0,0 +1,39 @@
+Web log entries
+===============
+<style>
+li {
+ list-style-type: none;
+}
+time {
+ color: #AAAAAA;
+ font-family: monospace;
+}
+</style>
+
+ * <time>2023-07-10</time> - [Announcing: btrfs-rec: Recover (data from) a broken btrfs filesystem](./btrfs-rec.html)
+ * <time>2018-02-09</time> - [POSIX pricing and availability; or: Do you really need the PDF?](./posix-pricing.html)
+ * <time>2018-02-09</time> - [GNU/Linux Keyboard Maps: xmodmap](./kbd-xmodmap.html)
+ * <time>2018-02-09</time> - [The interesting architecture of crt.sh](./crt-sh-architecture.html)
+ * <time>2016-09-30</time> - [Notes on subtleties of HTTP implementation](./http-notes.html)
+ * <time>2016-02-28</time> - [My X11 setup with systemd](./x11-systemd.html)
+ * <time>2016-02-28</time> - [My favorite bug: segfaults in Java (redux)](./java-segfault-redux.html)
+ * <time>2015-05-19</time> - [An Nginx configuration for MediaWiki](./nginx-mediawiki.html)
+ * <time>2015-03-22</time> - [I took some videos at LibrePlanet](./lp2015-videos.html)
+ * <time>2015-03-18</time> - [Building Bash 1.14.7 on a modern system](./build-bash-1.html)
+ * <time>2015-02-06</time> - [Customizing your login on Purdue CS computers (WIP, but updated)](./purdue-cs-login.html)
+ * <time>2014-11-20</time> - [A memoization routine for GNU Make functions](./make-memoize.html)
+ * <time>2014-09-12</time> - [I'm excited about the new RYF-certified routers from ThinkPenguin](./ryf-routers.html)
+ * <time>2014-09-11</time> - [What I'm working on (Fall 2014)](./what-im-working-on-fall-2014.html)
+ * <time>2014-05-08</time> - [Miscellaneous ways to improve your Rails experience](./rails-improvements.html)
+ * <time>2014-02-13</time> - [Bash redirection](./bash-redirection.html)
+ * <time>2014-01-13</time> - [My favorite bug: segfaults in Java](./java-segfault.html)
+ * <time>2013-10-13</time> - [Bash arrays](./bash-arrays.html)
+ * <time>2013-10-12</time> - [A git pre-commit hook for automatically formatting Go code](./git-go-pre-commit.html)
+ * <time>2013-10-12</time> - [`dprintf`: print formatted text directly to a file descriptor](./fd_printf.html)
+ * <time>2013-08-29</time> - [Emacs as an operating system](./emacs-as-an-os.html)
+ * <time>2013-04-09</time> - [A summary of Emacs' bundled shell and terminal modes](./emacs-shells.html)
+ * <time>2013-03-21</time> - [An explanation of common terminal emulator color codes](./term-colors.html)
+ * <time>2013-02-21</time> - [An explanation of how "copyleft" licensing works](./fs-licensing-explanation.html)
+ * <time>2013-01-23</time> - [A quick overview of usage of the Pacman package manager](./pacman-overview.html)
+ * <time>2012-09-12</time> - [Why documentation on GNU/Linux sucks](./poor-system-documentation.html)
+ * <time>2012-09-11</time> - [What Arch Linux's switch to systemd means for users](./arch-systemd.html)
diff --git a/public/java-segfault-redux.html b/public/java-segfault-redux.html
new file mode 100644
index 0000000..d491dcd
--- /dev/null
+++ b/public/java-segfault-redux.html
@@ -0,0 +1,218 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>My favorite bug: segfaults in Java (redux) — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » java-segfault-redux</header>
+<article>
+<h1 id="my-favorite-bug-segfaults-in-java-redux">My favorite bug:
+segfaults in Java (redux)</h1>
+<p>Two years ago, I <a href="./java-segfault.html">wrote</a> about one
+of my favorite bugs that I’d squashed two years before that. About a
+year after that, someone posted it <a
+href="https://news.ycombinator.com/item?id=9283571">on Hacker
+News</a>.</p>
+<p>There was some fun discussion about it, but also some confusion.
+After finishing a season of mentoring team 4272, I’ve decided that it
+would be fun to re-visit the article, and dig up the old actual code,
+instead of pseudo-code, hopefully improving the clarity (and providing a
+light introduction for anyone wanting to get into modifying the current
+SmartDashbaord).</p>
+<h2 id="the-context">The context</h2>
+<p>In 2012, I was a high school senior, and lead programmer programmer
+on the FIRST Robotics Competition team 1024. For the unfamiliar, the
+relevant part of the setup is that there are 2 minute and 15 second
+matches in which you have a 120 pound robot that sometimes runs
+autonomously, and sometimes is controlled over WiFi from a person at a
+laptop running stock “driver station” software and modifiable
+“dashboard” software.</p>
+<p>That year, we mostly used the dashboard software to allow the human
+driver and operator to monitor sensors on the robot, one of them being a
+video feed from a web-cam mounted on it. This was really easy because
+the new standard dashboard program had a click-and drag interface to add
+stock widgets; you just had to make sure the code on the robot was
+actually sending the data.</p>
+<p>That’s great, until when debugging things, the dashboard would
+suddenly vanish. If it was run manually from a terminal (instead of
+letting the driver station software launch it), you would see a core
+dump indicating a segmentation fault.</p>
+<p>This wasn’t just us either; I spoke with people on other teams,
+everyone who was streaming video had this issue. But, because it only
+happened every couple of minutes, and a match is only 2:15, it didn’t
+need to run very long, they just crossed their fingers and hoped it
+didn’t happen during a match.</p>
+<p>The dashboard was written in Java, and the source was available
+(under a 3-clause BSD license) via read-only SVN at
+<code>http://firstforge.wpi.edu/svn/repos/smart_dashboard/trunk</code>
+(which is unfortunately no longer online, fortunately I’d posted some
+snapshots on the web). So I dove in, hunting for the bug.</p>
+<p>The repository was divided into several NetBeans projects (not
+exhaustively listed):</p>
+<ul>
+<li><a
+href="https://gitorious.org/absfrc/sources.git/?p=absfrc:sources.git;a=blob_plain;f=smartdashboard-client-2012-1-any.src.tar.xz;hb=HEAD"><code>client/smartdashboard</code></a>:
+The main dashboard program, has a plugin architecture.</li>
+<li><a
+href="https://gitorious.org/absfrc/sources.git/?p=absfrc:sources.git;a=blob_plain;f=wpijavacv-208-1-any.src.tar.xz;hb=HEAD"><code>WPIJavaCV</code></a>:
+A higher-level wrapper around JavaCV, itself a Java Native Interface
+(JNI) wrapper to talk to OpenCV (C and C++).</li>
+<li><a
+href="https://gitorious.org/absfrc/sources.git/?p=absfrc:sources.git;a=blob_plain;f=smartdashboard-extension-wpicameraextension-210-1-any.src.tar.xz;hb=HEAD"><code>extensions/camera/WPICameraExtension</code></a>:
+The standard camera feed plugin, processes the video through
+WPIJavaCV.</li>
+</ul>
+<p>I figured that the bug must be somewhere in the C or C++ code that
+was being called by JavaCV, because that’s the language where segfaults
+happen. It was especially a pain to track down the pointers that were
+causing the issue, because it was hard with native debuggers to see
+through all of the JVM stuff to the OpenCV code, and the OpenCV stuff is
+opaque to Java debuggers.</p>
+<p>Eventually the issue lead me back into the WPICameraExtension, then
+into WPIJavaCV—there was a native pointer being stored in a Java
+variable; Java code called the native routine to <code>free()</code> the
+structure, but then tried to feed it to another routine later. This lead
+to difficulty again—tracking objects with Java debuggers was hard
+because they don’t expect the program to suddenly segfault; it’s Java
+code, Java doesn’t segfault, it throws exceptions!</p>
+<p>With the help of <code>println()</code> I was eventually able to see
+that some code was executing in an order that straight didn’t make
+sense.</p>
+<h2 id="the-bug">The bug</h2>
+<p>The basic flow of WPIJavaCV is you have a <code>WPICamera</code>, and
+you call <code>.getNewImage()</code> on it, which gives you a
+<code>WPIImage</code>, which you could do all kinds of fancy OpenCV
+things on, but then ultimately call <code>.getBufferedImage()</code>,
+which gives you a <code>java.awt.image.BufferedImage</code> that you can
+pass to Swing to draw on the screen. You do this every for frame. Which
+is exactly what <code>WPICameraExtension.java</code> did, except that
+“all kinds of fancy OpenCV things” consisted only of:</p>
+<pre><code>public WPIImage processImage(WPIColorImage rawImage) {
+ return rawImage;
+}</code></pre>
+<p>The idea was that you would extend the class, overriding that one
+method, if you wanted to do anything fancy.</p>
+<p>One of the neat things about WPIJavaCV was that every OpenCV object
+class extended had a <code>finalize()</code> method (via inheriting from
+the abstract class <code>WPIDisposable</code>) that freed the underlying
+C/C++ memory, so you didn’t have to worry about memory leaks like in
+plain JavaCV. To inherit from <code>WPIDisposable</code>, you had to
+write a <code>disposed()</code> method that actually freed the memory.
+This was better than writing <code>finalize()</code> directly, because
+it did some safety with NULL pointers and idempotency if you wanted to
+manually free something early.</p>
+<p>Now, <code>edu.wpi.first.WPIImage.disposed()</code> called <code><a
+href="https://github.com/bytedeco/javacv/blob/svn/src/com/googlecode/javacv/cpp/opencv_core.java#L398">com.googlecode.javacv.cpp.opencv_core.IplImage</a>.release()</code>,
+which called (via JNI) <code>IplImage:::release()</code>, which called
+libc <code>free()</code>:</p>
+<pre><code>@Override
+protected void disposed() {
+ image.release();
+}</code></pre>
+<p>Elsewhere, the C buffer for the image was copied into a Java buffer
+via a similar chain kicked off by
+<code>edu.wpi.first.WPIImage.getBufferedImage()</code>:</p>
+<pre><code>/**
+ * Copies this {@link WPIImage} into a {@link BufferedImage}.
+ * This method will always generate a new image.
+ * @return a copy of the image
+ */
+public BufferedImage getBufferedImage() {
+ validateDisposed();
+
+ return image.getBufferedImage();
+}</code></pre>
+<p>The <code>println()</code> output I saw that didn’t make sense was
+that <code>someFrame.finalize()</code> was running before
+<code>someFrame.getBuffereImage()</code> had returned!</p>
+<p>You see, if it is waiting for the return value of a method
+<code>m()</code> of object <code>a</code>, and code in <code>m()</code>
+that is yet to be executed doesn’t access any other methods or
+properties of <code>a</code>, then it will go ahead and consider
+<code>a</code> eligible for garbage collection before <code>m()</code>
+has finished running.</p>
+<p>Put another way, <code>this</code> is passed to a method just like
+any other argument. If a method is done accessing <code>this</code>,
+then it’s “safe” for the JVM to go ahead and garbage collect it.</p>
+<p>That is normally a safe “optimization” to make… except for when a
+destructor method (<code>finalize()</code>) is defined for the object;
+the destructor can have side effects, and Java has no way to know
+whether it is safe for them to happen before <code>m()</code> has
+finished running.</p>
+<p>I’m not entirely sure if this is a “bug” in the compiler or the
+language specification, but I do believe that it’s broken behavior.</p>
+<p>Anyway, in this case it’s unsafe with WPI’s code.</p>
+<h2 id="my-work-around">My work-around</h2>
+<p>My work-around was to change this function in
+<code>WPIImage</code>:</p>
+<pre><code>public BufferedImage getBufferedImage() {
+ validateDisposed();
+
+ return image.getBufferedImage(); // `this` may get garbage collected before it returns!
+}</code></pre>
+<p>In the above code, <code>this</code> is a <code>WPIImage</code>, and
+it may get garbage collected between the time that
+<code>image.getBufferedImage()</code> is dispatched, and the time that
+<code>image.getBufferedImage()</code> accesses native memory. When it is
+garbage collected, it calls <code>image.release()</code>, which
+<code>free()</code>s that native memory. That seems pretty unlikely to
+happen; that’s a very small gap of time. However, running 30 times a
+second, eventually bad luck with the garbage collector happens, and the
+program crashes.</p>
+<p>The work-around was to insert a bogus call to this to keep
+<code>this</code> around until after we were also done with
+<code>image</code>:</p>
+<p>to this:</p>
+<pre><code>public BufferedImage getBufferedImage() {
+ validateDisposed();
+ BufferedImage ret = image.getBufferedImage();
+ getWidth(); // bogus call to keep `this` around
+ return ret;
+}</code></pre>
+<p>Yeah. After spending weeks wading through though thousands of lines
+of Java, C, and C++, a bogus call to a method I didn’t care about was
+the fix.</p>
+<p>TheLoneWolfling on Hacker News noted that they’d be worried about the
+JVM optimizing out the call to <code>getWidth()</code>. I’m not, because
+<code>WPIImage.getWidth()</code> calls <code>IplImage.width()</code>,
+which is declared as <code>native</code>; the JVM must run it because it
+might have side effects. On the other hand, looking back, I think I just
+shrunk the window for things to go wrong: it may be possible for the
+garbage collection to trigger in the time between
+<code>getWidth()</code> being dispatched and <code>width()</code>
+running. Perhaps there was something in the C/C++ code that made it
+safe, I don’t recall, and don’t care quite enough to dig into OpenCV
+internals again. Or perhaps I’m mis-remembering the fix (which I don’t
+actually have a file of), and I called some other method that
+<em>could</em> get optimized out (though I <em>do</em> believe that it
+was either <code>getWidth()</code> or <code>getHeight()</code>).</p>
+<h2 id="wpis-fix">WPI’s fix</h2>
+<p>Four years later, the SmartDashboard is still being used! But it no
+longer has this bug, and it’s not using my workaround. So, how did the
+WPILib developers fix it?</p>
+<p>Well, the code now lives <a
+href="https://usfirst.collab.net/gerrit/#/admin/projects/">in git at
+collab.net</a>, so I decided to take a look.</p>
+<p>The stripped out WPIJavaCV from the main video feed widget, and now
+use a purely Java implementation of MPJPEG streaming.</p>
+<p>However, the old video feed widget is still available as an extension
+(so that you can still do cool things with <code>processImage</code>),
+and it also no longer has this bug. Their fix was to put a mutex around
+all accesses to <code>image</code>, which should have been the obvious
+solution to me.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2016 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/java-segfault.html b/public/java-segfault.html
new file mode 100644
index 0000000..257b223
--- /dev/null
+++ b/public/java-segfault.html
@@ -0,0 +1,120 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>My favorite bug: segfaults in Java — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » java-segfault</header>
+<article>
+<h1 id="my-favorite-bug-segfaults-in-java">My favorite bug: segfaults in
+Java</h1>
+<blockquote>
+<p>Update: Two years later, I wrote a more detailed version of this
+article: <a href="./java-segfault-redux.html">My favorite bug: segfaults
+in Java (redux)</a>.</p>
+</blockquote>
+<p>I’ve told this story orally a number of times, but realized that I
+have never written it down. This is my favorite bug story; it might not
+be my hardest bug, but it is the one I most like to tell.</p>
+<h2 id="the-context">The context</h2>
+<p>In 2012, I was a Senior programmer on the FIRST Robotics Competition
+team 1024. For the unfamiliar, the relevant part of the setup is that
+there are 2 minute and 15 second matches in which you have a 120 pound
+robot that sometimes runs autonomously, and sometimes is controlled over
+WiFi from a person at a laptop running stock “driver station” software
+and modifiable “dashboard” software.</p>
+<p>That year, we mostly used the dashboard software to allow the human
+driver and operator to monitor sensors on the robot, one of them being a
+video feed from a web-cam mounted on it. This was really easy because
+the new standard dashboard program had a click-and drag interface to add
+stock widgets; you just had to make sure the code on the robot was
+actually sending the data.</p>
+<p>That’s great, until when debugging things, the dashboard would
+suddenly vanish. If it was run manually from a terminal (instead of
+letting the driver station software launch it), you would see a core
+dump indicating a segmentation fault.</p>
+<p>This wasn’t just us either; I spoke with people on other teams,
+everyone who was streaming video had this issue. But, because it only
+happened every couple of minutes, and a match is only 2:15, it didn’t
+need to run very long, they just crossed their fingers and hoped it
+didn’t happen during a match.</p>
+<p>The dashboard was written in Java, and the source was available
+(under a 3-clause BSD license), so I dove in, hunting for the bug. Now,
+the program did use Java Native Interface to talk to OpenCV, which the
+video ran through; so I figured that it must be a bug in the C/C++ code
+that was being called. It was especially a pain to track down the
+pointers that were causing the issue, because it was hard with native
+debuggers to see through all of the JVM stuff to the OpenCV code, and
+the OpenCV stuff is opaque to Java debuggers.</p>
+<p>Eventually the issue lead me back into the Java code—there was a
+native pointer being stored in a Java variable; Java code called the
+native routine to <code>free()</code> the structure, but then tried to
+feed it to another routine later. This lead to difficulty again—tracking
+objects with Java debuggers was hard because they don’t expect the
+program to suddenly segfault; it’s Java code, Java doesn’t segfault, it
+throws exceptions!</p>
+<p>With the help of <code>println()</code> I was eventually able to see
+that some code was executing in an order that straight didn’t make
+sense.</p>
+<h2 id="the-bug">The bug</h2>
+<p>The issue was that Java was making an unsafe optimization (I never
+bothered to figure out if it is the compiler or the JVM making the
+mistake, I was satisfied once I had a work-around).</p>
+<p>Java was doing something similar to tail-call optimization with
+regard to garbage collection. You see, if it is waiting for the return
+value of a method <code>m()</code> of object <code>o</code>, and code in
+<code>m()</code> that is yet to be executed doesn’t access any other
+methods or properties of <code>o</code>, then it will go ahead and
+consider <code>o</code> eligible for garbage collection before
+<code>m()</code> has finished running.</p>
+<p>That is normally a safe optimization to make… except for when a
+destructor method (<code>finalize()</code>) is defined for the object;
+the destructor can have side effects, and Java has no way to know
+whether it is safe for them to happen before <code>m()</code> has
+finished running.</p>
+<h2 id="the-work-around">The work-around</h2>
+<p>The routine that the segmentation fault was occurring in was
+something like:</p>
+<pre><code>public type1 getFrame() {
+ type2 child = this.getChild();
+ type3 var = this.something();
+ // `this` may now be garbage collected
+ return child.somethingElse(var); // segfault comes here
+}</code></pre>
+<p>Where the destructor method of <code>this</code> calls a method that
+will <code>free()</code> native memory that is also accessed by
+<code>child</code>; if <code>this</code> is garbage collected before
+<code>child.somethingElse()</code> runs, the backing native code will
+try to access memory that has been <code>free()</code>ed, and receive a
+segmentation fault. That usually didn’t happen, as the routines were
+pretty fast. However, running 30 times a second, eventually bad luck
+with the garbage collector happens, and the program crashes.</p>
+<p>The work-around was to insert a bogus call to this to keep
+<code>this</code> around until after we were also done with
+<code>child</code>:</p>
+<pre><code>public type1 getFrame() {
+ type2 child = this.getChild();
+ type3 var = this.something();
+ type1 ret = child.somethingElse(var);
+ this.getSize(); // bogus call to keep `this` around
+ return ret;
+}</code></pre>
+<p>Yeah. After spending weeks wading through though thousands of lines
+of Java, C, and C++, a bogus call to a method I didn’t care about was
+the fix.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2014 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/kbd-xmodmap.html b/public/kbd-xmodmap.html
new file mode 100644
index 0000000..26bb5c3
--- /dev/null
+++ b/public/kbd-xmodmap.html
@@ -0,0 +1,240 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>GNU/Linux Keyboard Maps: xmodmap — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » kbd-xmodmap</header>
+<article>
+<h1 id="gnulinux-keyboard-maps-xmodmap">GNU/Linux Keyboard Maps:
+xmodmap</h1>
+<p>The modmap subsystem is part of the core <a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html">X11
+protocol</a>. However, it has been replaced by the <a
+href="https://www.x.org/releases/current/doc/kbproto/xkbproto.html">X
+Keyboard (XKB) Extension</a> to the protocol, which defines a facade
+that emulates the legacy modmap subsystem so that old programs still
+work—including those that manipulate the modmap directly!</p>
+<p>For people who like to Keep It Stupid Simple, the XKB extension looks
+horribly complicated and gross—even ignoring protocol details, the
+configuration syntax is a monstrosity! There’s no way to say something
+like “I’d like to remap Caps-Lock to be Control”, you have to copy and
+edit the entire keyboard definition, which includes mucking with vector
+graphics of the physical keyboard layout! So it’s very tempting to
+pretend that XKB doesn’t exist, and it’s still using modmap.</p>
+<p>However, this is a leaky abstraction; for instance: when running the
+<code>xmodmap</code> command to manipulate the modmap, if you have
+multiple keyboards plugged in, the result can depend on which keyboard
+you used to press “enter” after typing the command!</p>
+<p>Despite only existing as a compatibility shim today, I think it is
+important to understand the modmap subsystem to understand modern
+XKB.</p>
+<h2 id="conceptual-overview">Conceptual overview</h2>
+<p>There are 3 fundamental tasks that the modmap subsystem performs:</p>
+<ol type="1">
+<li><code>keyboard: map keycode -&gt; keysym</code>
+(client-side)</li>
+<li><code>keyboard: map keycode -&gt; modifier bitmask</code>
+(server-side)</li>
+<li><code>pointer: map physical button -&gt; logical button</code>
+(server-side)</li>
+</ol>
+<p>You’re thinking: “Great, so the X server does these things for us!”
+Nope! Not entirely, anyway. It does the keycode-&gt;modifier lookup, and
+the mouse-button lookup, but the keycode-&gt;keysym lookup must be done
+client-side by querying the mapping stored on the server. Generally,
+this is done automatically inside of libX11/libxcb, and the actual
+client application code doesn’t need to worry about it.</p>
+<p>So, what’s the difference between a keycode and a keysym, and how’s
+the modifier bitmask work?</p>
+<ul>
+<li><p>keycode: A numeric ID for a hardware button; this is as close the
+the hardware as X11 modmaps let us get. These are conceptually identical
+to Linux kernel keycodes, but the numbers don’t match up. Xorg keycodes
+are typically <code>linux_keycode+8</code>.</p></li>
+<li><p>keysym: A 29-bit integer code that is meaningful to applications.
+A mapping of these to symbolic names is defined in
+<code>&lt;X11/keysymdef.h&gt;</code> and augmented by
+<code>/usr/share/X11/XKeysymDB</code>. See:
+<code>XStringToKeysym()</code> and <code>XKeysymToString()</code>. We
+will generally use the symbolic name in the modmap file. The symbolic
+names are case-sensitive.</p></li>
+<li><p>Modifier state: An 8-bit bitmask of modifier keys (names are
+case-insensitive):</p>
+<pre><code>1 &lt;&lt; 0 : shift
+1 &lt;&lt; 1 : lock
+1 &lt;&lt; 2 : control
+1 &lt;&lt; 3 : mod1
+1 &lt;&lt; 4 : mod2
+1 &lt;&lt; 5 : mod3
+1 &lt;&lt; 6 : mod4
+1 &lt;&lt; 7 : mod5</code></pre></li>
+</ul>
+<p>With that knowledge, and the libX11/libxcb API docs, you can probably
+figure out how to interact with the modmap subsystem from C, but who
+does that? Everyone just uses the <code>xmodmap(1)</code> command.</p>
+<h2 id="the-x11-protocol">The X11 protocol</h2>
+<p>As I said, the modifier and button lookup is handled server-side;
+each of the <a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#events:input">input
+events</a> ({Key,Button}{Press,Release}, and MotionNotify) and <a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#events:pointer_window">pointer
+window events</a> ({Enter,Leave}Notify) include a bitmask of active
+keyboard modifiers and pointer buttons. Each are given an 8-bit
+bitmask—hence 8 key modifiers. For some reason, only up to Button5 is
+included in the bitmask; the upper 3 bits are always zero; but the
+Button{Press,Release} events will happily deliver events for up to
+Button255!</p>
+<p>The X11 protocol has 6 request types for dealing with these 3
+mappings; an accessor and a mutator pair for each. Since the 2 of the
+mappings are done server-side, of these, most clients will only use
+GetKeyboardMapping. Anyway, let’s look at those 6 requests, grouped by
+the mappings that they work with (pardon the Java-like pseudo-code
+syntax for indicating logical argument and return types):</p>
+<ol type="1">
+<li><p><code>keyboard: map keycode -&gt; keysym</code></p>
+<ul>
+<li><a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:GetKeyboardMapping">GetKeyboardMapping</a>
+::
+<code>List&lt;keycode&gt; -&gt; Map&lt;keycode,List&lt;keysym&gt;&gt;</code></li>
+<li><a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:ChangeKeyboardMapping">ChangeKeyboardMapping</a>
+:: <code>Map&lt;keycode,List&lt;keysym&gt;&gt; -&gt; ()</code></li>
+</ul>
+<p><code>GetKeyboardMapping</code> returns the keycode-&gt;keysym
+mappings for the requested keycodes; this way clients can choose to look
+up only the keycodes that they need to handle (the ones that got sent to
+them). Each keycode gets a list of keysyms; which keysym they should use
+from that list depends on which modifiers are pressed.
+<code>ChangeKeyboardMapping</code> changes the mapping for the given
+keycodes; not all keycodes must be given, any keycodes that aren’t
+included in the request aren’t changed.</p></li>
+<li><p><code>keyboard: map keycode -&gt; modifier bitmask</code></p>
+<ul>
+<li><a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:GetModifierMapping">GetModifierMapping</a>
+:: <code>() -&gt; Map&lt;modifier,List&lt;keycode&gt;&gt;</code></li>
+<li><a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:SetModifierMapping">SetModifierMapping</a>
+:: <code>Map&lt;modifier,List&lt;keycode&gt;&gt; -&gt; ()</code></li>
+</ul>
+<p>The modifiers mapping is a lot smaller than the keysym mapping; you
+must operate on the entire mapping at once. For each modifier bit,
+there’s a list of keycodes that will cause that modifier bit to be
+flipped in the events that are delivered while it is pressed.</p></li>
+<li><p><code>pointer: map physical button -&gt; logical button</code></p>
+<ul>
+<li><a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:GetPointerMapping">GetPointerMapping</a>
+<code>() -&gt; List&lt;logicalButton&gt;</code> (indexed by
+<code>physicalButton-1</code>)</li>
+<li><a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:SetPointerMapping">SetPointerMapping</a>
+<code>List&lt;logicalButton&gt; -&gt; ()</code> (indexed by
+<code>physicalButton-1</code>)</li>
+</ul>
+<p>Like the modifier mapping, the button mapping is expected to be
+small, most mice only have 5-7 buttons (left, middle, right, scroll up,
+scroll down, scroll left, scroll right—that’s right, X11 handles scroll
+events as button presses), though some fancy gaming mice have more than
+that, but not much more.</p></li>
+</ol>
+<p>I mentioned earlier that the keycode-&gt;keysym mapping isn’t
+actually done by the X server, and is done in the client; whenever a
+client receives a key event or pointer button event, it must do a
+<code>Get*Mapping</code> request to see what that translates to. Of
+course, doing a that for every keystroke would be crazy; but at the same
+time, the each client is expected to know about changes to the mappings
+that happen at run-time. So, each of the “set”/“change” commands
+generate a <a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#events:MappingNotify">MappingNotify</a>
+event that is sent to all clients, so they know when they must dump
+their cache of mappings.</p>
+<p>For completeness, if you are looking at this as background for
+understanding XKB, I should also mention:</p>
+<ul>
+<li><a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:GetKeyboardControl">GetKeyboardControl</a></li>
+<li><a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:ChangeKeyboardControl">ChangeKeyboardControl</a></li>
+<li><a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:GetPointerControl">GetPointerControl</a></li>
+<li><a
+href="https://www.x.org/releases/current/doc/xproto/x11protocol.html#requests:ChangePointerControl">ChangePointerControl</a></li>
+</ul>
+<h2 id="the-xmodmap-command">The <code>xmodmap</code> command</h2>
+<p>The <code>xmodmap</code> command reads a configuration file and
+modifies the maps in the X server to match. The <code>xmodmap</code>
+config file has its own little quirky syntax. For one, the comment
+character is <code>!</code> (and comments may only start at the
+<em>beginning</em> of the line, but that’s fairly common).</p>
+<p>There are 8 commands that <code>xmodmap</code> recognizes. Let’s look
+at those, grouped by the 3 tasks that the modmap subsystem performs:</p>
+<ol type="1">
+<li><p><code>keyboard: map keycode -&gt; keysym</code></p>
+<ul>
+<li><p><code>keycode KEYCODE = PLAIN [SHIFT [MODE_SWITCH [MODE_SWITCH+SHIFT ]]]</code></p>
+<p>Actually takes a list of up to 8 keysyms, but only the first 4 have
+standard uses.</p></li>
+<li><p><code>keysym OLD_KEYSYM = NEW_KEYSYMS...</code></p>
+<p>Takes the keycodes mapped to <code>OLD_KEYSYM</code> and maps them to
+<code>NEW_KEYSYM</code>.</p></li>
+<li><p><code>keysym any = KEYSYMS...</code></p>
+<p>Finds an otherwise unused keycode, and has it map to the specified
+keysyms.</p></li>
+</ul></li>
+<li><p><code>keyboard: map keycode -&gt; modifier bitmask</code></p>
+<ul>
+<li><code>clear MODIFIER</code></li>
+<li><code>add MODIFIERNAME = KEYSYMS...</code></li>
+<li><code>remove MODIFIERNAME = KEYSYMS...</code></li>
+</ul>
+<p>Wait, the modmap subsystem maps <em>keycodes</em> to modifiers, but
+the commands take <em>keysyms</em>? Yup! When executing one of these
+commands, it first looks up those keysyms in the keyboard map to
+translate them in to a set of keycodes, then associates those keycodes
+with that modifier. But how does it look up keysym-&gt;keycode; the
+protocol only supports querying keycode-&gt;keysym? It <a
+href="https://cgit.freedesktop.org/xorg/app/xmodmap/tree/handle.c?h=xmodmap-1.0.9#n59">loops</a>
+over <em>every</em> keycode finding all the matches.</p></li>
+<li><p><code>pointer: map physical button -&gt; logical button</code></p>
+<ul>
+<li><p><code>pointer = default</code></p>
+<p>This is equivalent to <code>pointer = 1 2 3 4 5 6...</code> where the
+list is as long as the number of buttons that there are.</p></li>
+<li><p><code>pointer = NUMBERS...</code></p>
+<p><code>pointer = A B C D...</code> sets the physical button 1 to
+logical button A, physical button 2 to logical button B, and so on.
+Setting a physical button to logical button 0 disables that
+button.</p></li>
+</ul></li>
+</ol>
+<h2 id="appendix">Appendix:</h2>
+<p>I use this snippet in my Emacs configuration to make editing xmodmap
+files nicer:</p>
+<pre><code>;; http://www.emacswiki.org/emacs/XModMapMode
+(when (not (fboundp &#39;xmodmap-mode))
+ (define-generic-mode &#39;xmodmap-mode
+ &#39;(?!)
+ &#39;(&quot;add&quot; &quot;clear&quot; &quot;keycode&quot; &quot;keysym&quot; &quot;pointer&quot; &quot;remove&quot;)
+ nil
+ &#39;(&quot;[xX]modmap\\(rc\\)?\\&#39;&quot;)
+ nil
+ &quot;Simple mode for xmodmap files.&quot;))</code></pre>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2018 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/lp2015-videos.html b/public/lp2015-videos.html
new file mode 100644
index 0000000..439df0a
--- /dev/null
+++ b/public/lp2015-videos.html
@@ -0,0 +1,38 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>I took some videos at LibrePlanet — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » lp2015-videos</header>
+<article>
+<h1 id="i-took-some-videos-at-libreplanet">I took some videos at
+LibrePlanet</h1>
+<p>I’m at <a href="https://libreplanet.org/2015/">LibrePlanet</a>, and
+have been loving the talks. For most of yesterday, there was a series of
+short “lightning” talks in room 144. I decided to hang out in that room
+for the later part of the day, because while most of the talks were live
+streamed and recorded, there were no cameras in room 144; so I couldn’t
+watch them later.</p>
+<p>Way too late in the day, I remembered that I have the capability to
+record videos, so I cought the last two talks in 144.</p>
+<p>I appologize for the changing orientation.</p>
+<p><a
+href="https://lukeshu.com/dump/lp-2015-last-2-short-talks.ogg">Here’s
+the video I took</a>.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2015 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/make-memoize.html b/public/make-memoize.html
new file mode 100644
index 0000000..f99d4b4
--- /dev/null
+++ b/public/make-memoize.html
@@ -0,0 +1,93 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>A memoization routine for GNU Make functions — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » make-memoize</header>
+<article>
+<h1 id="a-memoization-routine-for-gnu-make-functions">A memoization
+routine for GNU Make functions</h1>
+<p>I’m a big fan of <a href="https://www.gnu.org/software/make/">GNU
+Make</a>. I’m pretty knowledgeable about it, and was pretty active on
+the help-make mailing list for a while. Something that many experienced
+make-ers know of is John Graham-Cumming’s “GNU Make Standard Library”,
+or <a href="http://gmsl.sourceforge.net/">GMSL</a>.</p>
+<p>I don’t like to use it, as I’m capable of defining macros myself as I
+need them instead of pulling in a 3rd party dependency (and generally
+like to stay away from the kind of Makefile that would lean heavily on
+something like GMSL).</p>
+<p>However, one really neat thing that GMSL offers is a way to memoize
+expensive functions (such as those that shell out). I was considering
+pulling in GMSL for one of my projects, almost just for the
+<code>memoize</code> function.</p>
+<p>However, John’s <code>memoize</code> has a couple short-comings that
+made it unsuitable for my needs.</p>
+<ul>
+<li>Only allows functions that take one argument.</li>
+<li>Considers empty values to be unset; for my needs, an empty string is
+a valid value that should be cached.</li>
+</ul>
+<p>So, I implemented my own, more flexible memoization routine for
+Make.</p>
+<pre><code># This definition of `rest` is equivalent to that in GMSL
+rest = $(wordlist 2,$(words $1),$1)
+
+# How to use: Define 2 variables (the type you would pass to $(call):
+# `_<var>NAME</var>_main` and `_<var>NAME</var>_hash`. Now, `_<var>NAME</var>_main` is the function getting
+# memoized, and _<var>NAME</var>_hash is a function that hashes the function arguments
+# into a string suitable for a variable name.
+#
+# Then, define the final function like:
+#
+# <var>NAME</var> = $(foreach func,<var>NAME</var>,$(memoized))
+
+_main = $(_$(func)_main)
+_hash = __memoized_$(_$(func)_hash)
+memoized = $(if $($(_hash)),,$(eval $(_hash) := _ $(_main)))$(call rest,$($(_hash)))</code></pre>
+<p>However, I later removed it from the Makefile, as I <a
+href="https://projects.parabola.nu/~lukeshu/maven-dist.git/commit/?id=fec5a7281b3824cb952aa0bb76bbbaa41eaafdf9">re-implemented</a>
+the bits that it memoized in a more efficient way, such that memoization
+was no longer needed, and the whole thing was faster.</p>
+<p>Later, I realized that my memoized routine could have been improved
+by replacing <code>func</code> with <code>$0</code>, which would
+simplify how the final function is declared:</p>
+<pre><code># This definition of `rest` is equivalent to that in GMSL
+rest = $(wordlist 2,$(words $1),$1)
+
+# How to use:
+#
+# _<var>NAME</var>_main = <var>your main function to be memoized</var>
+# _<var>NAME</var>_hash = <var>your hash function for parameters</var>
+# <var>NAME</var> = $(memoized)
+#
+# The output of your hash function should be a string following
+# the same rules that variable names follow.
+
+_main = $(_$0_main)
+_hash = __memoized_$(_$0_hash)
+memoized = $(if $($(_hash)),,$(eval $(_hash) := _ $(_main)))$(call rest,$($(_hash)))</pre>
+<p></code></p>
+<p>Now, I’m pretty sure that should work, but I have only actually
+tested the first version.</p>
+<h2 id="tldr">TL;DR</h2>
+<p>Avoid doing things in Make that would make you lean on complex
+solutions like an external memoize function.</p>
+<p>However, if you do end up needing a more flexible memoize routine, I
+wrote one that you can use.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2014 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="http://www.wtfpl.net/txt/copying/">WTFPL-2</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/nginx-mediawiki.html b/public/nginx-mediawiki.html
new file mode 100644
index 0000000..9e7ff52
--- /dev/null
+++ b/public/nginx-mediawiki.html
@@ -0,0 +1,87 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>An Nginx configuration for MediaWiki — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » nginx-mediawiki</header>
+<article>
+<h1 id="an-nginx-configuration-for-mediawiki">An Nginx configuration for
+MediaWiki</h1>
+<p>There are <a href="http://wiki.nginx.org/MediaWiki">several</a> <a
+href="https://wiki.archlinux.org/index.php/MediaWiki#Nginx">example</a>
+<a
+href="https://www.mediawiki.org/wiki/Manual:Short_URL/wiki/Page_title_--_nginx_rewrite--root_access">Nginx</a>
+<a
+href="https://www.mediawiki.org/wiki/Manual:Short_URL/Page_title_-_nginx,_Root_Access,_PHP_as_a_CGI_module">configurations</a>
+<a href="http://wiki.nginx.org/RHEL_5.4_%2B_Nginx_%2B_Mediawiki">for</a>
+<a
+href="http://stackoverflow.com/questions/11080666/mediawiki-on-nginx">MediaWiki</a>
+floating around the web. Many of them don’t block the user from
+accessing things like <code>/serialized/</code>. Many of them also <a
+href="https://labs.parabola.nu/issues/725">don’t correctly handle</a> a
+wiki page named <code>FAQ</code>, since that is a name of a file in the
+MediaWiki root! In fact, the configuration used on the official Nginx
+Wiki has both of those issues!</p>
+<p>This is because most of the configurations floating around basically
+try to pass all requests through, and blacklist certain requests, either
+denying them, or passing them through to <code>index.php</code>.</p>
+<p>It’s my view that blacklisting is inferior to whitelisting in
+situations like this. So, I developed the following configuration that
+instead works by whitelisting certain paths.</p>
+<pre><code>root /path/to/your/mediawiki; # obviously, change this line
+
+index index.php;
+location / { try_files /var/empty @rewrite; }
+location /images/ { try_files $uri $uri/ @rewrite; }
+location /skins/ { try_files $uri $uri/ @rewrite; }
+location /api.php { try_files /var/empty @php; }
+location /api.php5 { try_files /var/empty @php; }
+location /img_auth.php { try_files /var/empty @php; }
+location /img_auth.php5 { try_files /var/empty @php; }
+location /index.php { try_files /var/empty @php; }
+location /index.php5 { try_files /var/empty @php; }
+location /load.php { try_files /var/empty @php; }
+location /load.php5 { try_files /var/empty @php; }
+location /opensearch_desc.php { try_files /var/empty @php; }
+location /opensearch_desc.php5 { try_files /var/empty @php; }
+location /profileinfo.php { try_files /var/empty @php; }
+location /thumb.php { try_files /var/empty @php; }
+location /thumb.php5 { try_files /var/empty @php; }
+location /thumb_handler.php { try_files /var/empty @php; }
+location /thumb_handler.php5 { try_files /var/empty @php; }
+location /wiki.phtml { try_files /var/empty @php; }
+
+location @rewrite {
+ rewrite ^/(.*)$ /index.php?title=$1&amp;$args;
+}
+
+location @php {
+ # obviously, change this according to your PHP setup
+ include fastcgi.conf;
+ fastcgi_pass unix:/run/php-fpm/wiki.sock;
+}</code></pre>
+<p>We are now using this configuration on <a
+href="https://wiki.parabola.nu/">ParabolaWiki</a>, but with an alias for
+<code>location = /favicon.ico</code> to the correct file in the skin,
+and with FastCGI caching for PHP.</p>
+<p>The only thing I don’t like about this is the
+<code>try_files /var/emtpy</code> bits—surely there is a better way to
+have it go to one of the <code>@</code> location blocks, but I couldn’t
+figure it out.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2015 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/pacman-overview.html b/public/pacman-overview.html
new file mode 100644
index 0000000..ce11166
--- /dev/null
+++ b/public/pacman-overview.html
@@ -0,0 +1,62 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>A quick overview of usage of the Pacman package manager — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » pacman-overview</header>
+<article>
+<h1 id="a-quick-overview-of-usage-of-the-pacman-package-manager">A quick
+overview of usage of the Pacman package manager</h1>
+<p>This was originally published on <a
+href="https://news.ycombinator.com/item?id=5101416">Hacker News</a> on
+2013-01-23.</p>
+<p>Note: I’ve over-done quotation marks to make it clear when precise
+wording matters.</p>
+<p><code>pacman</code> is a little awkward, but I prefer it to apt/dpkg,
+which have sub-commands, each with their own flags, some of which are
+undocumented. pacman, on the other hand, has ALL options documented in
+one fairly short man page.</p>
+<p>The trick to understanding pacman is to understand how it maintains
+databases of packages, and what it means to “sync”.</p>
+<p>There are several “databases” that pacman deals with:</p>
+<ul>
+<li>“the database”, (<code>/var/lib/pacman/local/</code>)<br> The
+database of currently installed packages</li>
+<li>“package databases”,
+(<code>/var/lib/pacman/sync/${repo}.db</code>)<br> There is one of these
+for each repository. It is a file that is fetched over plain http(s)
+from the server; it is not modified locally, only updated.</li>
+</ul>
+<p>The “operation” of pacman is set with a capital flag, one of “DQRSTU”
+(plus <code>-V</code> and <code>-h</code> for version and help). Of
+these, “DTU” are “low-level” (analogous to dpkg) and “QRS” are
+“high-level” (analogous to apt).</p>
+<p>To give a brief explanation of cover the “high-level” operations, and
+which databases they deal with:</p>
+<ul>
+<li>“Q” Queries “the database” of locally installed packages.</li>
+<li>“S” deals with “package databases”, and Syncing “the database” with
+them; meaning it installs/updates packages that are in package
+databases, but not installed on the local system.</li>
+<li>“R” Removes packages “the database”; removing them from the local
+system.</li>
+</ul>
+<p>The biggest “gotcha” is that “S” deals with all operations with
+“package databases”, not just syncing “the database” with them.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2013 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/poor-system-documentation.html b/public/poor-system-documentation.html
new file mode 100644
index 0000000..d15d470
--- /dev/null
+++ b/public/poor-system-documentation.html
@@ -0,0 +1,57 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>Why documentation on GNU/Linux sucks — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » poor-system-documentation</header>
+<article>
+<h1 id="why-documentation-on-gnulinux-sucks">Why documentation on
+GNU/Linux sucks</h1>
+<p>This is based on a post on <a
+href="http://www.reddit.com/r/archlinux/comments/zoffo/systemd_we_will_keep_making_it_the_distro_we_like/c66uu57">reddit</a>,
+published on 2012-09-12.</p>
+<p>The documentation situation on GNU/Linux based operating systems is
+right now a mess. In the world of documentation, there are basically 3
+camps, the “UNIX” camp, the “GNU” camp, and the “GNU/Linux” camp.</p>
+<p>The UNIX camp is the <code>man</code> page camp, they have quality,
+terse but informative man pages, on <em>everything</em>, including the
+system’s design and all system files. If it was up to the UNIX camp,
+<code>man grub.cfg</code>, <code>man grub.d</code>, and
+<code>man grub-mkconfig_lib</code> would exist and actually be helpful.
+The man page would either include inline examples, or point you to a
+directory. If I were to print off all of the man pages, it would
+actually be a useful manual for the system.</p>
+<p>Then GNU camp is the <code>info</code> camp. They basically thought
+that each piece of software was more complex than a man page could
+handle. They essentially think that some individual pieces software
+warrant a book. So, they developed the <code>info</code> system. The
+info pages are usually quite high quality, but are very long, and a pain
+if you just want a quick look. The <code>info</code> system can generate
+good HTML (and PDF, etc.) documentation. But the standard
+<code>info</code> is awkward as hell to use for non-Emacs users.</p>
+<p>Then we have the “GNU/Linux” camp, they use GNU software, but want to
+use <code>man</code> pages. This means that we get low-quality man pages
+for GNU software, and then we don’t have a good baseline for
+documentation, developers each try to create their own. The
+documentation that gets written is frequently either low-quality, or
+non-standard. A lot of man pages are auto-generated from
+<code>--help</code> output or info pages, meaning they are either not
+helpful, or overly verbose with low information density. This camp gets
+the worst of both worlds, and a few problems of its own.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2012 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/posix-pricing.html b/public/posix-pricing.html
new file mode 100644
index 0000000..dab8f2a
--- /dev/null
+++ b/public/posix-pricing.html
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>POSIX pricing and availability; or: Do you really need the PDF? — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » posix-pricing</header>
+<article>
+<h1
+id="posix-pricing-and-availability-or-do-you-really-need-the-pdf">POSIX
+pricing and availability; or: Do you really need the PDF?</h1>
+<p>The Open Group and IEEE are weird about POSIX pricing. They’re
+protective of the PDF, making you pay <a
+href="http://standards.ieee.org/findstds/standard/1003.1-2008.html">hundreds
+of dollars</a> for the PDF; but will happily post an HTML version for
+free both <a
+href="http://pubs.opengroup.org/onlinepubs/9699919799/">online</a>, and
+(with free account creation) download as a <a
+href="https://www2.opengroup.org/ogsys/catalog/t101">a .zip</a>.</p>
+<p>They also offer a special license to the “Linux man-pages” project,
+allowing them to <a
+href="https://www.kernel.org/pub/linux/docs/man-pages/man-pages-posix/">distribute</a>
+the man page portions of POSIX (most of it is written as a series of man
+pages) for free; so on a GNU/Linux box, you probably have most of POSIX
+already downloaded in manual sections 0p, 1p, and 3p.</p>
+<p>Anyway, the only thing you aren’t getting with the free HTML version
+is a line number next to every line of text. It’s generated from the
+same troff sources. So, in an article or in a discussion, I’m not
+cheating you out of specification details by citing the webpage.</p>
+<p>If you’re concerned that you’re looking at the correct version of the
+webpage or man pages, the current version (as of February 2018) of POSIX
+is “POSIX-2008, 2016 edition.”</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2018 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/purdue-cs-login.html b/public/purdue-cs-login.html
new file mode 100644
index 0000000..7deb432
--- /dev/null
+++ b/public/purdue-cs-login.html
@@ -0,0 +1,195 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>Customizing your login on Purdue CS computers (WIP, but updated) — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » purdue-cs-login</header>
+<article>
+<h1
+id="customizing-your-login-on-purdue-cs-computers-wip-but-updated">Customizing
+your login on Purdue CS computers (WIP, but updated)</h1>
+<blockquote>
+<p>This article is currently a Work-In-Progress. Other than the one
+place where I say “I’m not sure”, the GDM section is complete. The
+network shares section is a mess, but has some good information.</p>
+</blockquote>
+<p>Most CS students at Purdue spend a lot of time on the lab boxes, but
+don’t know a lot about them. This document tries to fix that.</p>
+<p>The lab boxes all run Gentoo.</p>
+<h2 id="gdm-the-gnome-display-manager">GDM, the Gnome Display
+Manager</h2>
+<p>The boxes run <code>gdm</code> (Gnome Display Manager) 2.20.11 for
+the login screen. This is an old version, and has a couple behaviors
+that are slightly different than new versions, but here are the
+important bits:</p>
+<p>System configuration:</p>
+<ul>
+<li><code>/usr/share/gdm/defaults.conf</code> (lower precidence)</li>
+<li><code>/etc/X11/gdm/custom.conf</code> (higher precidence)</li>
+</ul>
+<p>User configuration:</p>
+<ul>
+<li><code>~/.dmrc</code> (more recent versions use
+<code>~/.desktop</code>, but Purdue boxes aren’t running more recent
+versions)</li>
+</ul>
+<h3 id="purdues-gdm-configuration">Purdue’s GDM configuration</h3>
+<p>Now, <code>custom.conf</code> sets</p>
+<pre><code>BaseXsession=/usr/local/share/xsessions/Xsession
+SessionDesktopDir=/usr/local/share/xsessions/</code></pre>
+<p>This is important, because there are <em>multiple</em> locations that
+look like these files; I take it that they were used at sometime in the
+past. Don’t get tricked into thinking that it looks at
+<code>/etc/X11/gdm/Xsession</code> (which exists, and is where it would
+look by default).</p>
+<p>If you look at the GDM login screen, it has a “Sessions” button that
+opens a prompt where you can select any of several sessions:</p>
+<ul>
+<li>Last session</li>
+<li>1. MATE (<code>mate.desktop</code>;
+<code>Exec=mate-session</code>)</li>
+<li>2. CS Default Session (<code>default.desktop</code>;
+<code>Exec=default</code>)</li>
+<li>3. Custom Session (<code>custom.desktop</code>;
+<code>Exec=custom</code>)</li>
+<li>4. FVWM2 (<code>fvwm2.desktop</code>; <code>Exec=fvwm2</code>)</li>
+<li>5. gnome.desktop (<code>gnome.desktop</code>;
+<code>Exec=gnome-session</code>)</li>
+<li>6. KDE (<code>kde.desktop</code>, <code>Exec=startkde</code>)</li>
+<li>Failsafe MATE (<code>ShowGnomeFailsafeSession=true</code>)</li>
+<li>Failsafe Terminal (<code>ShowXtermFailsafeSession=true</code>)</li>
+</ul>
+<p>The main 6 are configured by the <code>.desktop</code> files in
+<code>SessionDesktopDir=/usr/local/share/xsessions</code>; the last 2
+are auto-generated. The reason <code>ShowGnomeFailsafeSession</code>
+correctly generates a Mate session instead of a Gnome session is because
+of the patch
+<code>/p/portage/*/overlay/gnome-base/gdm/files/gdm-2.20.11-mate.patch</code>.</p>
+<p>I’m not sure why Gnome shows up as <code>gnome.desktop</code> instead
+of <code>GNOME</code> as specified by <code>gnome.desktop:Name</code>. I
+imagine it might be something related to the aforementioned patch, but I
+can’t find anything in the patch that looks like it would screw that up;
+at least not without a better understanding of GDM’s code.</p>
+<p>Which of the main 6 is used by default (“Last Session”) is configured
+with <code>~/.dmrc:Session</code>, which contains the basename of the
+associated <code>.desktop</code> file (that is, without any directory
+part or file extension).</p>
+<p>Every one of the <code>.desktop</code> files sets
+<code>Type=XSession</code>, which means that instead of running the
+argument in <code>Exec=</code> directly, it passes it as arguments to
+the <code>Xsession</code> program (in the location configured by
+<code>BaseXsession</code>).</p>
+<h4 id="xsession">Xsession</h4>
+<p>So, now we get to read
+<code>/usr/local/share/xsessions/Xsession</code>.</p>
+<p>Before it does anything else, it:</p>
+<ol type="1">
+<li><code>. /etc/profile.env</code></li>
+<li><code>unset ROOTPATH</code></li>
+<li>Try to set up logging to one of <code>~/.xsession-errors</code>,
+<code>$TMPDIR/xses-$USER</code>, or <code>/tmp/xses-$USER</code> (it
+tries them in that order).</li>
+<li><code>xsetroot -default</code></li>
+<li>Fiddles with the maximum number of processes.</li>
+</ol>
+<p>After that, it handles these 3 “special” arguments that were given to
+it by various <code>.desktop</code> <code>Exec=</code> lines:</p>
+<ul>
+<li><code>failsafe</code>: Runs a single xterm window. NB: This is NOT
+run by either of the failsafe options. It is likey a vestiage from a
+prior configuration.</li>
+<li><code>startkde</code>: Displays a message saying KDE is no longer
+available.</li>
+<li><code>gnome-session</code>: Displays a message saying GNOME has been
+replaced by MATE.</li>
+</ul>
+<p>Assuming that none of those were triggered, it then does:</p>
+<ol type="1">
+<li><code>source ~/.xprofile</code></li>
+<li><code>xrdb -merge ~/.Xresources</code></li>
+<li><code>xmodmap ~/.xmodmaprc</code></li>
+</ol>
+<p>Finally, it has a switch statement over the arguments given to it by
+the various <code>.desktop</code> <code>Exec=</code> lines:</p>
+<ul>
+<li><code>custom</code>: Executes <code>~/.xsession</code>.</li>
+<li><code>default</code>: Executes <code>~/.Xrc.cs</code>.</li>
+<li><code>mate-session</code>: It has this whole script to start DBus,
+run the <code>mate-session</code> command, then cleanup when it’s
+done.</li>
+<li><code>*</code> (<code>fvwm2</code>): Runs
+<code>eval exec "$@"</code>, which results in it executing the
+<code>fvwm2</code> command.</li>
+</ul>
+<h2 id="network-shares">Network Shares</h2>
+<p>Your data is on various hosts. I believe most undergrads have their
+data on <code>data.cs.purdue.edu</code> (or just <a
+href="https://en.wikipedia.org/wiki/Data_%28Star_Trek%29"><code>data</code></a>).
+Others have theirs on <a
+href="http://swfanon.wikia.com/wiki/Antor"><code>antor</code></a> or <a
+href="https://en.wikipedia.org/wiki/Tux"><code>tux</code></a> (that I
+know of).</p>
+<p>Most of the boxes with tons of storage have many network cards; each
+with a different IP; a single host’s IPs are mostly the same, but with
+varying 3rd octets. For example, <code>data</code> is 128.10.X.13. If
+you need a particular value of X, but don’t want to remember the other
+octets; they are individually addressed with
+<code>BASENAME-NUMBER.cs.purdue.edu</code>. For example,
+<code>data-25.cs.purdu.edu</code> is 128.10.25.13.</p>
+<p>They use <a
+href="https://www.kernel.org/pub/linux/daemons/autofs/">AutoFS</a> quite
+extensively. The maps are generated dynamically by
+<code>/etc/autofs/*.map</code>, which are all symlinks to
+<code>/usr/libexec/amd2autofs</code>. As far as I can tell,
+<code>amd2autofs</code> is custom to Purdue. Its source lives in
+<code>/p/portage/*/overlay/net-fs/autofs/files/amd2autofs.c</code>. The
+name appears to be a misnomer; seems to claim to dynamically translate
+from the configuration of <a href="http://www.am-utils.org/">Auto
+Mounter Daemon (AMD)</a> to AutoFS, but it actually talks to NIS. It
+does so using the <code>yp</code> interface, which is in Glibc for
+compatibility, but is undocumented. For documentation for that
+interface, look at the one of the BSDs, or Mac OS X. From the comments
+in the file, it appears that it once did look at the AMD configuration,
+but has since been changed.</p>
+<p>There are 3 mountpoints using AutoFS: <code>/homes</code>,
+<code>/p</code>, and <code>/u</code>. <code>/homes</code> creates
+symlinks on-demand from <code>/homes/USERNAME</code> to
+<code>/u/BUCKET/USERNAME</code>. <code>/u</code> mounts NFS shares to
+<code>/u/SERVERNAME</code> on-demand, and creates symlinks from
+<code>/u/BUCKET</code> to <code>/u/SERVERNAME/BUCKET</code> on-demand.
+<code>/p</code> mounts on-demand various NFS shares that are organized
+by topic; the Xinu/MIPS tools are in <code>/p/xinu</code>, the Portage
+tree is in <code>/p/portage</code>.</p>
+<p>I’m not sure how <code>scratch</code> works; it seems to be
+heterogenous between different servers and families of lab boxes.
+Sometimes it’s in <code>/u</code>, sometimes it isn’t.</p>
+<p>This 3rd-party documentation was very helpful to me: <a
+href="http://www.linux-consulting.com/Amd_AutoFS/"
+class="uri">http://www.linux-consulting.com/Amd_AutoFS/</a> It’s where
+Gentoo points for the AutoFS homepage, as it doesn’t have a real
+homepage. Arch just points to FreshMeat. Debian points to
+kernel.org.</p>
+<h3 id="lore">Lore</h3>
+<p><a
+href="https://en.wikipedia.org/wiki/List_of_Star_Trek:_The_Next_Generation_characters#Lore"><code>lore</code></a></p>
+<p>Lore is a SunOS 5.10 box running on Sun-Fire V445 (sun4u) hardware.
+SunOS is NOT GNU/Linux, and sun4u is NOT x86.</p>
+<p>Instead of <code>/etc/fstab</code> it is
+<code>/etc/mnttab</code>.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2015 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/rails-improvements.html b/public/rails-improvements.html
new file mode 100644
index 0000000..def615e
--- /dev/null
+++ b/public/rails-improvements.html
@@ -0,0 +1,103 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>Miscellaneous ways to improve your Rails experience — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » rails-improvements</header>
+<article>
+<h1
+id="miscellaneous-ways-to-improve-your-rails-experience">Miscellaneous
+ways to improve your Rails experience</h1>
+<p>Recently, I’ve been working on <a
+href="https://github.com/LukeShu/leaguer">a Rails web application</a>,
+that’s really the baby of a friend of mine. Anyway, through its
+development, I’ve come up with a couple things that should make your
+interactions with Rails more pleasant.</p>
+<h2
+id="auto-reload-classes-from-other-directories-than-app">Auto-(re)load
+classes from other directories than <code>app/</code></h2>
+<p>The development server automatically loads and reloads files from the
+<code>app/</code> directory, which is extremely nice. However, most web
+applications are going to involve modules that aren’t in that directory;
+and editing those files requires re-starting the server for the changes
+to take effect.</p>
+<p>Adding the following lines to your <a
+href="https://github.com/LukeShu/leaguer/blob/c846cd71411ec3373a5229cacafe0df6b3673543/config/application.rb#L15"><code>config/application.rb</code></a>
+will allow it to automatically load and reload files from the
+<code>lib/</code> directory. You can of course change this to whichever
+directory/ies you like.</p>
+<pre><code>module YourApp
+ class Application &lt; Rails::Application
+ …
+ config.autoload_paths += [&quot;#{Rails.root}/lib&quot;]
+ config.watchable_dirs[&quot;#{Rails.root}/lib&quot;] = [:rb]
+ …
+ end
+end</code></pre>
+<h2 id="have-submit_tag-generate-a-button-instead-of-an-input">Have
+<code>submit_tag</code> generate a button instead of an input</h2>
+<p>In HTML, the <code>&lt;input type="submit"&gt;</code> tag styles
+slightly differently than other inputs or buttons. It is impossible to
+precisely controll the hight via CSS, which makes designing forms a
+pain. This is particularly noticable if you use Bootstrap 3, and put it
+next to another button; the submit button will be slightly shorter
+vertically.</p>
+<p>The obvious fix here is to use
+<code>&lt;button type="submit"&gt;</code> instead. The following code
+will modify the default Rails form helpers to generate a button tag
+instead of an input tag. Just stick the code in <a
+href="https://github.com/LukeShu/leaguer/blob/521eae01be1ca3f69b47b3170a0548c3268f4a22/config/initializers/form_improvements.rb"><code>config/initializers/form_improvements.rb</code></a>;
+it will override
+<code>ActionView::Hlepers::FormTagHelper#submit_tag</code>. It is mostly
+the standard definition of the function, except for the last line, which
+has changed.</p>
+<pre><code># -*- ruby-indent-level: 2; indent-tabs-mode: nil -*-
+module ActionView
+ module Helpers
+ module FormTagHelper
+
+ # This is modified from actionpack-4.0.2/lib/action_view/helpers/form_tag_helper.rb#submit_tag
+ def submit_tag(value = &quot;Save changes&quot;, options = {})
+ options = options.stringify_keys
+
+ if disable_with = options.delete(&quot;disable_with&quot;)
+ message = &quot;:disable_with option is deprecated and will be removed from Rails 4.1. &quot; \
+ &quot;Use &#39;data: { disable_with: \&#39;Text\&#39; }&#39; instead.&quot;
+ ActiveSupport::Deprecation.warn message
+
+ options[&quot;data-disable-with&quot;] = disable_with
+ end
+
+ if confirm = options.delete(&quot;confirm&quot;)
+ message = &quot;:confirm option is deprecated and will be removed from Rails 4.1. &quot; \
+ &quot;Use &#39;data: { confirm: \&#39;Text\&#39; }&#39; instead&#39;.&quot;
+ ActiveSupport::Deprecation.warn message
+
+ options[&quot;data-confirm&quot;] = confirm
+ end
+
+ content_tag(:button, value, { &quot;type&quot; =&gt; &quot;submit&quot;, &quot;name&quot; =&gt; &quot;commit&quot;, &quot;value&quot; =&gt; value }.update(options))
+ end
+
+ end
+ end
+end</code></pre>
+<p>I’ll probably update this page as I tweak other things I don’t
+like.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2014 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/ryf-routers.html b/public/ryf-routers.html
new file mode 100644
index 0000000..5f8cf7a
--- /dev/null
+++ b/public/ryf-routers.html
@@ -0,0 +1,54 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>I'm excited about the new RYF-certified routers from ThinkPenguin — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » ryf-routers</header>
+<article>
+<h1
+id="im-excited-about-the-new-ryf-certified-routers-from-thinkpenguin">I’m
+excited about the new RYF-certified routers from ThinkPenguin</h1>
+<p>I just learned that on Wednesday, the FSF <a
+href="https://www.fsf.org/resources/hw/endorsement/thinkpenguin">awarded</a>
+the <abbr title="Respects Your Freedom">RYF</abbr> certification to the
+<a href="https://www.thinkpenguin.com/TPE-NWIFIROUTER">Think Penguin
+TPE-NWIFIROUTER</a> wireless router.</p>
+<p>I didn’t find this information directly published up front, but
+simply: It is a re-branded <strong>TP-Link TL-841ND</strong> modded to
+be running <a href="http://librecmc.com/">libreCMC</a>.</p>
+<p>I’ve been a fan of the TL-841/740 line of routers for several years
+now. They are dirt cheap (if you go to Newegg and sort by “cheapest,”
+it’s frequently the TL-740N), are extremely reliable, and run OpenWRT
+like a champ. They are my go-to routers.</p>
+<p>(And they sure beat the snot out of the Arris TG862 that it seems
+like everyone has in their homes now. I hate that thing, it even has
+buggy packet scheduling.)</p>
+<p>So this announcement is <del>doubly</del>triply exciting for me:</p>
+<ul>
+<li>I have a solid recommendation for a router that doesn’t require me
+or them to manually install an after-market firmware (buy it from
+ThinkPenguin).</li>
+<li>If it’s for me, or someone technical, I can cut costs by getting a
+stock TP-Link from Newegg, installing libreCMC ourselves.</li>
+<li>I can install a 100% libre distribution on my existing routers
+(until recently, they were not supported by any of the libre
+distributions, not for technical reasons, but lack of manpower).</li>
+</ul>
+<p>I hope to get libreCMC installed on my boxes this weekend!</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2014 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/term-colors.html b/public/term-colors.html
new file mode 100644
index 0000000..55827b9
--- /dev/null
+++ b/public/term-colors.html
@@ -0,0 +1,56 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>An explanation of common terminal emulator color codes — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » term-colors</header>
+<article>
+<h1 id="an-explanation-of-common-terminal-emulator-color-codes">An
+explanation of common terminal emulator color codes</h1>
+<p>This is based on a post on <a
+href="http://www.reddit.com/r/commandline/comments/1aotaj/solarized_is_a_sixteen_color_palette_designed_for/c8ztxpt?context=1">reddit</a>,
+published on 2013-03-21.</p>
+<blockquote>
+<p>So all terminals support the same 256 colors? What about 88 color
+mode: is that a subset?</p>
+</blockquote>
+<p>TL;DR: yes</p>
+<p>Terminal compatibility is crazy complex, because nobody actually
+reads the spec, they just write something that is compatible for their
+tests. Then things have to be compatible with that terminal’s
+quirks.</p>
+<p>But, here’s how 8-color, 16-color, and 256 color work. IIRC, 88 color
+is a subset of the 256 color scheme, but I’m not sure.</p>
+<p><strong>8 colors: (actually 9)</strong> First we had 8 colors (9 with
+“default”, which doesn’t have to be one of the 8). These are always
+roughly the same color: black, red, green, yellow/orange, blue, purple,
+cyan, and white, which are colors 0–7 respectively. Color 9 is
+default.</p>
+<p><strong>16 colors: (actually 18)</strong> Later, someone wanted to
+add more colors, so they added a “bright” attribute. So when bright is
+on, you get “bright red” instead of “red”. Hence 8*2=16 (plus two more
+for “default” and “bright default”).</p>
+<p><strong>256 colors: (actually 274)</strong> You may have noticed,
+colors 0–7 and 9 are used, but 8 isn’t. So, someone decided that color 8
+should put the terminal into 256 color mode. In this mode, it reads
+another byte, which is an 8-bit RGB value (2 bits for red, 2 for green,
+2 for blue). The bright property has no effect on these colors. However,
+a terminal can display 256-color-mode colors and 16-color-mode colors at
+the same time, so you actually get 256+18 colors.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2013 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/what-im-working-on-fall-2014.html b/public/what-im-working-on-fall-2014.html
new file mode 100644
index 0000000..3f58032
--- /dev/null
+++ b/public/what-im-working-on-fall-2014.html
@@ -0,0 +1,157 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>What I'm working on (Fall 2014) — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » what-im-working-on-fall-2014</header>
+<article>
+<h1 id="what-im-working-on-fall-2014">What I’m working on (Fall
+2014)</h1>
+<p>I realized today that I haven’t updated my log in a while, and I
+don’t have any “finished” stuff to show off right now, but I should just
+talk about all the cool stuff I’m working on right now.</p>
+<h2 id="static-parsing-of-subshells">Static parsing of subshells</h2>
+<p>Last year I wrote a shell (for my Systems Programming class);
+however, I went above-and-beyond and added some really novel features.
+In my opinion, the most significant is that it parses arbitrarily deep
+subshells in one pass, instead of deferring them until execution. No
+shell that I know of does this.</p>
+<p>At first this sounds like a really difficult, but minor feature.
+Until you think about scripting, and maintenance of those scripts. Being
+able to do a full syntax check of a script is <em>crucial</em> for
+long-term maintenance, yet it’s something that is missing from every
+major shell. I’d love to get this code merged into bash. It would be
+incredibly useful for <a
+href="/git/mirror/parabola/packages/libretools.git">some software I
+maintain</a>.</p>
+<p>Anyway, I’m trying to publish this code, but because of a recent
+kerfuffle with a student publishing all of his projects on the web (and
+other students trying to pass it off as their own), I’m being cautious
+with this and making sure Purdue is alright with what I’m putting
+online.</p>
+<h2 id="stateless-user-configuration-for-pamnss"><a
+href="https://lukeshu.com/git/mirror/parabola/hackers.git/log/?h=lukeshu/restructure">Stateless
+user configuration for PAM/NSS</a></h2>
+<p>Parabola GNU/Linux-libre users know that over this summer, we had a
+<em>mess</em> with server outages. One of the servers is still out (due
+to things out of our control), and we don’t have some of the data on it
+(because volunteer developers are terrible about back-ups,
+apparently).</p>
+<p>This has caused us to look at how we manage our servers, back-ups,
+and several other things.</p>
+<p>One thing that I’ve taken on as my pet project is making sure that if
+a server goes down, or we need to migrate (for example, Jon is telling
+us that he wants us to hurry up and switch to the new 64 bit hardware so
+he can turn off the 32 bit box), we can spin up a new server from
+scratch pretty easily. Part of that is making configurations stateless,
+and dynamic based on external data; having data be located in one place
+instead of duplicated across 12 config files and 3 databases… on the
+same box.</p>
+<p>Right now, that’s looking like some custom software interfacing with
+OpenLDAP and OpenSSH via sockets (OpenLDAP being a middle-man between us
+and PAM (Linux) and NSS (libc)). However, the OpenLDAP documentation is…
+inconsistent and frustrating. I might end up hacking up the LDAP modules
+for NSS and PAM to talk to our system directly, and cut OpenLDAP out of
+the picture. We’ll see!</p>
+<p>PS: Pablo says that tomorrow we should be getting out-of-band access
+to the drive of the server that is down, so that we can finally restore
+those services on a different server.</p>
+<h2 id="project-leaguer"><a
+href="https://lukeshu.com/git/mirror/leaguer.git/">Project
+Leaguer</a></h2>
+<p>Last year, some friends and I began writing some “eSports tournament
+management software”, primarily targeting League of Legends (though it
+has a module system that will allow it to support tons of different data
+sources). We mostly got it done last semester, but it had some rough
+spots and sharp edges we need to work out. Because we were all out of
+communication for the summer, we didn’t work on it very much (but we did
+a little!). It’s weird that I care about this, because I’m not a gamer.
+Huh, I guess coding with friends is just fun.</p>
+<p>Anyway, this year, <a
+href="https://github.com/AndrewMurrell">Andrew</a>, <a
+href="https://github.com/DavisLWebb">Davis</a>, and I are planning to
+get it to a polished state by the end of the semester. We could probably
+do it faster, but we’d all also like to focus on classes and other
+projects a little more.</p>
+<h2 id="c1">C+=1</h2>
+<p>People tend to lump C and C++ together, which upsets me, because I
+love C, but have a dislike for C++. That’s not to say that C++ is
+entirely bad; it has some good features. My “favorite” code is actually
+code that is basically C, but takes advantage of a couple C++ features,
+while still being idiomatic C, not C++.</p>
+<p>Anyway, with the perspective of history (what worked and what
+didn’t), and a slightly opinionated view on language design (I’m pretty
+much a Rob Pike fan-boy), I thought I’d try to tackle “object-oriented
+C” with roughly the same design criteria as Stroustrup had when
+designing C++. I’m calling mine C+=1, for obvious reasons.</p>
+<p>I haven’t published anything yet, because calling it “working” would
+be stretching the truth. But I am using it for my assignments in CS 334
+(Intro to Graphics), so it should move along fairly quickly, as my grade
+depends on it.</p>
+<p>I’m not taking it too seriously; I don’t expect it to be much more
+than a toy language, but it is an excuse to dive into the GCC
+internals.</p>
+<h2 id="projects-that-ive-put-on-the-back-burner">Projects that I’ve put
+on the back-burner</h2>
+<p>I’ve got several other projects that I’m putting on hold for a
+while.</p>
+<ul>
+<li><code>maven-dist</code> (was hosted with Parabola, apparently I
+haven’t pushed it anywhere except the server that is down): A tool to
+build Apache Maven from source. That sounds easy, it’s open source,
+right? Well, except that Maven is the build system from hell. It doesn’t
+support cyclic dependencies, yet uses them internally to build itself.
+It <em>loves</em> to just get binaries from Maven Central to “optimize”
+the build process. It depends on code that depends on compiler bugs that
+no longer exist (which I guess means that <em>no one</em> has tried to
+build it from source after it was originally published). I’ve been
+working on-and-off on this for more than a year. My favorite part of it
+was writing a <a href="/dump/jflex2jlex.sed.txt">sed script</a> that
+translates a JFlex grammar specification into a JLex grammar, which is
+used to bootstrap JFlex; its both gross and delightful at the same
+time.</li>
+<li>Integration between <code>dbscripts</code> and
+<code>abslibre</code>. If you search IRC logs, mailing lists, and
+ParabolaWiki, you can find numerous rants by me against <a
+href="/git/mirror/parabola/dbscripts.git/tree/db-sync"><code>dbscripts:db-sync</code></a>.
+I just hate the data-flow, it is almost designed to make things get out
+of sync, and broken. I mean, does <a
+href="/dump/parabola-data-flow.svg">this</a> look like a simple diagram?
+For contrast, <a href="/dump/parabola-data-flow-xbs.svg">here’s</a> a
+rough (slightly incomplete) diagram of what I want to replace it
+with.</li>
+<li>Git backend for MediaWiki (or, pulling out the rendering module of
+MediaWiki). I’ve made decent progress on that front, but there is
+<em>crazy</em> de-normalization going on in the MediaWiki schema that
+makes this very difficult. I’m sure some of it is for historical
+reasons, and some of it for performance, but either way it is a mess for
+someone trying to neatly gut that part of the codebase.</li>
+</ul>
+<h2 id="other">Other</h2>
+<p>I should consider doing a write-up of deterministic-<code>tar</code>
+behavior (something that I’ve been implementing in Parabola for a while,
+meanwhile the Debian people have also been working on it).</p>
+<p>I should also consider doing a “post-mortem” of <a
+href="https://lukeshu.com/git/mirror/parabola/packages/pbs-tools.git/">PBS</a>,
+which never actually got used, but launched XBS (part of the
+<code>dbscripts</code>/<code>abslibre</code> integration mentioned
+above), as well as serving as a good test-bed for features that did get
+implemented.</p>
+<p>I over-use the word “anyway.”</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2014 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/public/x11-systemd.html b/public/x11-systemd.html
new file mode 100644
index 0000000..a26b00d
--- /dev/null
+++ b/public/x11-systemd.html
@@ -0,0 +1,387 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>My X11 setup with systemd — Luke T. Shumaker</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="stylesheet" href="assets/style.css">
+ <link rel="alternate" type="application/atom+xml" href="./index.atom" name="web log entries"/>
+</head>
+<body>
+<header><a href="/">Luke T. Shumaker</a> » <a href=/blog>blog</a> » x11-systemd</header>
+<article>
+<h1 id="my-x11-setup-with-systemd">My X11 setup with systemd</h1>
+<p>Somewhere along the way, I decided to use systemd user sessions to
+manage the various parts of my X11 environment would be a good idea. If
+that was a good idea or not… we’ll see.</p>
+<p>I’ve sort-of been running this setup as my daily-driver for <a
+href="https://lukeshu.com/git/dotfiles.git/commit/?id=a9935b7a12a522937d91cb44a0e138132b555e16">a
+bit over a year</a>, continually tweaking it though.</p>
+<p>My setup is substantially different than the one on <a
+href="https://wiki.archlinux.org/index.php/Systemd/User">ArchWiki</a>,
+because the ArchWiki solution assumes that there is only ever one X
+server for a user; I like the ability to run <code>Xorg</code> on my
+real monitor, and also have <code>Xvnc</code> running headless, or start
+my desktop environment on a remote X server. Though, I would like to
+figure out how to use systemd socket activation for the X server, as the
+ArchWiki solution does.</p>
+<p>This means that all of my graphical units take <code>DISPLAY</code>
+as an <code>@</code> argument. To get this to all work out, this goes in
+each <code>.service</code> file, unless otherwise noted:</p>
+<pre><code>[Unit]
+After=X11@%i.target
+Requisite=X11@%i.target
+[Service]
+Environment=DISPLAY=%I</code></pre>
+<p>We’ll get to <code>X11@.target</code> later, what it says is “I
+should only be running if X11 is running”.</p>
+<p>I eschew complex XDMs or <code>startx</code> wrapper scripts, opting
+for the more simple <code>xinit</code>, which I either run on login for
+some boxes (my media station), or type <code>xinit</code> when I want
+X11 on others (most everything else). Essentially, what
+<code>xinit</code> does is run <code>~/.xserverrc</code> (or
+<code>/etc/X11/xinit/xserverrc</code>) to start the server, then once
+the server is started (which it takes a substantial amount of magic to
+detect) it runs run <code>~/.xinitrc</code> (or
+<code>/etc/X11/xinit/xinitrc</code>) to start the clients. Once
+<code>.xinitrc</code> finishes running, it stops the X server and exits.
+Now, when I say “run”, I don’t mean execute, it passes each file to the
+system shell (<code>/bin/sh</code>) as input.</p>
+<p>Xorg requires a TTY to run on; if we log in to a TTY with
+<code>logind</code>, it will give us the <code>XDG_VTNR</code> variable
+to tell us which one we have, so I pass this to <code>X</code> in <a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/X11/serverrc">my
+<code>.xserverrc</code></a>:</p>
+<pre><code>#!/hint/sh
+if [ -z &quot;$XDG_VTNR&quot; ]; then
+ exec /usr/bin/X -nolisten tcp &quot;$@&quot;
+else
+ exec /usr/bin/X -nolisten tcp &quot;$@&quot; vt$XDG_VTNR
+fi</code></pre>
+<p>This was the default for <a
+href="https://projects.archlinux.org/svntogit/packages.git/commit/trunk/xserverrc?h=packages/xorg-xinit&amp;id=f9f5de58df03aae6c8a8c8231a83327d19b943a1">a
+while</a> in Arch, to support <code>logind</code>, but was <a
+href="https://projects.archlinux.org/svntogit/packages.git/commit/trunk/xserverrc?h=packages/xorg-xinit&amp;id=5a163ddd5dae300e7da4b027e28c37ad3b535804">later
+removed</a> in part because <code>startx</code> (which calls
+<code>xinit</code>) started adding it as an argument as well, so
+<code>vt$XDG_VTNR</code> was being listed as an argument twice, which is
+an error. IMO, that was a problem in <code>startx</code>, and they
+shouldn’t have removed it from the default system
+<code>xserverrc</code>, but that’s just me. So I copy/pasted it into my
+user <code>xserverrc</code>.</p>
+<p>That’s the boring part, though. Where the magic starts happening is
+in <a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/X11/clientrc">my
+<code>.xinitrc</code></a>:</p>
+<pre><code>#!/hint/sh
+
+if [ -z &quot;$XDG_RUNTIME_DIR&quot; ]; then
+ printf &quot;XDG_RUNTIME_DIR isn&#39;t set\n&quot; &gt;&amp;2
+ exit 6
+fi
+
+_DISPLAY=&quot;$(systemd-escape -- &quot;$DISPLAY&quot;)&quot;
+trap &quot;rm -f $(printf &#39;%q&#39; &quot;${XDG_RUNTIME_DIR}/x11-wm@${_DISPLAY}&quot;)&quot; EXIT
+mkfifo &quot;${XDG_RUNTIME_DIR}/x11-wm@${_DISPLAY}&quot;
+
+cat &lt; &quot;${XDG_RUNTIME_DIR}/x11-wm@${_DISPLAY}&quot; &amp;
+systemctl --user start &quot;X11@${_DISPLAY}.target&quot; &amp;
+wait
+systemctl --user stop &quot;X11@${_DISPLAY}.target&quot;</code></pre>
+<p>There are two contracts/interfaces here: the
+<code>X11@DISPLAY.target</code> systemd target, and the
+<code>${XDG_RUNTIME_DIR}/x11-wm@DISPLAY</code> named pipe. The systemd
+<code>.target</code> should be pretty self explanatory; the most
+important part is that it starts the window manager. The named pipe is
+just a hacky way of blocking until the window manager exits
+(“traditional” <code>.xinitrc</code> files end with the line
+<code>exec your-window-manager</code>, so this mimics that behavior). It
+works by assuming that the window manager will open the pipe at startup,
+and keep it open (without necessarily writing anything to it); when the
+window manager exits, the pipe will get closed, sending EOF to the
+<code>wait</code>ed-for <code>cat</code>, allowing it to exit, letting
+the script resume. The window manager (WMII) is made to have the pipe
+opened by executing it this way in <a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/wmii@.service">its
+<code>.service</code> file</a>:</p>
+<pre><code>ExecStart=/usr/bin/env bash -c &#39;exec 8&gt;${XDG_RUNTIME_DIR}/x11-wm@%I; exec wmii&#39;</code></pre>
+<p>which just opens the file on file descriptor 8, then launches the
+window manager normally. The only further logic required by the window
+manager with regard to the pipe is that in the window manager <a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/wmii-hg/config.sh">configuration</a>,
+I should close that file descriptor after forking any process that isn’t
+“part of” the window manager:</p>
+<pre><code>runcmd() (
+ ...
+ exec 8&gt;&amp;- # xinit/systemd handshake
+ ...
+)</code></pre>
+<p>So, back to the <code>X11@DISPLAY.target</code>; I configure what it
+“does” with symlinks in the <code>.requires</code> and
+<code>.wants</code> directories:</p>
+<ul class="tree">
+<li>
+<p><a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user">.config/systemd/user/</a></p>
+<ul>
+<li><a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/X11@.target">X11@.target</a></li>
+<li><a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/X11@.target.requires">X11@.target.requires</a>/
+<ul>
+<li>wmii@.service -&gt; ../<a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/wmii@.service">wmii@.service</a></li>
+</ul></li>
+<li><a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/X11@.target.wants">X11@.target.wants</a>/
+<ul>
+<li>xmodmap@.service -&gt; ../<a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xmodmap@.service">xmodmap@.service</a></li>
+<li>xresources-dpi@.service -&gt; ../<a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xresources-dpi@.service">xresources-dpi@.service</a></li>
+<li>xresources@.service -&gt; ../<a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xresources@.service">xresources@.service</a></li>
+</ul></li>
+</ul>
+</li>
+</ul>
+<p>The <code>.requires</code> directory is how I configure which window
+manager it starts. This would allow me to configure different window
+managers on different displays, by creating a <code>.requires</code>
+directory with the <code>DISPLAY</code> included,
+e.g. <code>X11@:2.requires</code>.</p>
+<p>The <code>.wants</code> directory is for general X display setup;
+it’s analogous to <code>/etc/X11/xinit/xinitrc.d/</code>. All of the
+files in it are simple <code>Type=oneshot</code> service files. The <a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xmodmap@.service">xmodmap</a>
+and <a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xresources@.service">xresources</a>
+files are pretty boring, they’re just systemd versions of the couple
+lines that just about every traditional <code>.xinitrc</code> contains,
+the biggest difference being that they look at <a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/X11/modmap"><code>~/.config/X11/modmap</code></a>
+and <a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/X11/resources"><code>~/.config/X11/resources</code></a>
+instead of the traditional locations <code>~/.xmodmap</code> and
+<code>~/.Xresources</code>.</p>
+<p>What’s possibly of note is <a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xresources-dpi@.service"><code>xresources-dpi@.service</code></a>.
+In X11, there are two sources of DPI information, the X display
+resolution, and the XRDB <code>Xft.dpi</code> setting. It isn’t defined
+which takes precedence (to my knowledge), and even if it were (is),
+application authors wouldn’t be arsed to actually do the right thing.
+For years, Firefox (well, Iceweasel) happily listened to the X display
+resolution, but recently it decided to only look at
+<code>Xft.dpi</code>, which objectively seems a little silly, since the
+X display resolution is always present, but <code>Xft.dpi</code> isn’t.
+Anyway, Mozilla’s change drove me to to create a <a
+href="https://lukeshu.com/git/dotfiles/tree/.local/bin/xrdb-set-dpi">script</a>
+to make the <code>Xft.dpi</code> setting match the X display resolution.
+Disclaimer: I have no idea if it works if the X server has multiple
+displays (with possibly varying resolution).</p>
+<pre><code>#!/usr/bin/env bash
+dpi=$(LC_ALL=C xdpyinfo|sed -rn &#39;s/^\s*resolution:\s*(.*) dots per inch$/\1/p&#39;)
+xrdb -merge &lt;&lt;&lt;&quot;Xft.dpi: ${dpi}&quot;</code></pre>
+<p>Since we want XRDB to be set up before any other programs launch, we
+give both of the <code>xresources</code> units
+<code>Before=X11@%i.target</code> (instead of <code>After=</code> like
+everything else). Also, two programs writing to <code>xrdb</code> at the
+same time has the same problem as two programs writing to the same file;
+one might trash the other’s changes. So, I stuck
+<code>Conflicts=xresources@:i.service</code> into
+<code>xresources-dpi.service</code>.</p>
+<p>And that’s the “core” of my X11 systemd setup. But, you generally
+want more things running than just the window manager, like a desktop
+notification daemon, a system panel, and an X composition manager
+(unless your window manager is bloated and has a composition manager
+built in). Since these things are probably window-manager specific, I’ve
+stuck them in a directory <code>wmii@.service.wants</code>:</p>
+<ul class="tree">
+<li>
+<p><a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user">.config/systemd/user/</a></p>
+<ul>
+<li><a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/wmii@.service.wants">wmii@.service.wants</a>/
+<ul>
+<li>dunst@.service -&gt; ../<a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/dunst@.service">dunst@.service</a>       
+# a notification daemon</li>
+<li>lxpanel@.service -&gt; ../<a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/lxpanel@.service">lxpanel@.service</a>   
+# a system panel</li>
+<li>rbar@97_acpi.service -&gt; ../<a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/rbar@.service">rbar@.service</a>  
+# wmii stuff</li>
+<li>rbar@99_clock.service -&gt; ../<a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/rbar@.service">rbar@.service</a> 
+# wmii stuff</li>
+<li>xcompmgr@.service -&gt; ../<a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/xcompmgr@.service">xcompmgr@.service</a> 
+# an X composition manager</li>
+</ul></li>
+</ul>
+</li>
+</ul>
+<p>For the window manager <code>.service</code>, I <em>could</em> just
+say <code>Type=simple</code> and call it a day (and I did for a while).
+But, I like to have <code>lxpanel</code> show up on all of my WMII tags
+(desktops), so I have <a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/wmii-hg/config.sh">my
+WMII configuration</a> stick this in the WMII <a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/wmii-hg/rules"><code>/rules</code></a>:</p>
+<pre><code>/panel/ tags=/.*/ floating=always</code></pre>
+<p>Unfortunately, for this to work, <code>lxpanel</code> must be started
+<em>after</em> that gets inserted into WMII’s rules. That wasn’t a
+problem pre-systemd, because <code>lxpanel</code> was started by my WMII
+configuration, so ordering was simple. For systemd to get this right, I
+must have a way of notifying systemd that WMII’s fully started, and it’s
+safe to start <code>lxpanel</code>. So, I stuck this in <a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/wmii@.service">my
+WMII <code>.service</code> file</a>:</p>
+<pre><code># This assumes that you write READY=1 to $NOTIFY_SOCKET in wmiirc
+Type=notify
+NotifyAccess=all</code></pre>
+<p>and this in <a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/wmii-hg/wmiirc">my
+WMII configuration</a>:</p>
+<pre><code>systemd-notify --ready || true</code></pre>
+<p>Now, this setup means that <code>NOTIFY_SOCKET</code> is set for all
+the children of <code>wmii</code>; I’d rather not have it leak into the
+applications that I start from the window manager, so I also stuck
+<code>unset NOTIFY_SOCKET</code> after forking a process that isn’t part
+of the window manager:</p>
+<pre><code>runcmd() (
+ ...
+ unset NOTIFY_SOCKET # systemd
+ ...
+ exec 8&gt;&amp;- # xinit/systemd handshake
+ ...
+)</code></pre>
+<p>Unfortunately, because of a couple of <a
+href="https://github.com/systemd/systemd/issues/2739">bugs</a> and <a
+href="https://github.com/systemd/systemd/issues/2737">race
+conditions</a> in systemd, <code>systemd-notify</code> isn’t reliable.
+If systemd can’t receive the <code>READY=1</code> signal from my WMII
+configuration, there are two consequences:</p>
+<ol type="1">
+<li><code>lxpanel</code> will never start, because it will always be
+waiting for <code>wmii</code> to be ready, which will never happen.</li>
+<li>After a couple of minutes, systemd will consider <code>wmii</code>
+to be timed out, which is a failure, so then it will kill
+<code>wmii</code>, and exit my X11 session. That’s no good!</li>
+</ol>
+<p>Using <code>socat</code> to send the message to systemd instead of
+<code>systemd-notify</code> “should” always work, because it tries to
+read from both ends of the bi-directional stream, and I can’t imagine
+that getting EOF from the <code>UNIX-SENDTO</code> end will ever be
+faster than the systemd manager from handling the datagram that got
+sent. Which is to say, “we work around the race condition by being slow
+and shitty.”</p>
+<pre><code>socat STDIO UNIX-SENDTO:&quot;$NOTIFY_SOCKET&quot; &lt;&lt;&lt;READY=1 || true</code></pre>
+<p>But, I don’t like that. I’d rather write my WMII configuration to the
+world as I wish it existed, and have workarounds encapsulated elsewhere;
+<a
+href="http://blog.robertelder.org/interfaces-most-important-software-engineering-concept/">“If
+you have to cut corners in your project, do it inside the
+implementation, and wrap a very good interface around it.”</a>. So, I
+wrote a <code>systemd-notify</code> compatible <a
+href="https://lukeshu.com/git/dotfiles.git/tree/.config/wmii-hg/workarounds.sh">function</a>
+that ultimately calls <code>socat</code>:</p>
+<pre><code>##
+# Just like systemd-notify(1), but slower, which is a shitty
+# workaround for a race condition in systemd.
+##
+systemd-notify() {
+ local args
+ args=&quot;$(getopt -n systemd-notify -o h -l help,version,ready,pid::,status:,booted -- &quot;$@&quot;)&quot;
+ ret=$?; [[ $ret == 0 ]] || return $ret
+ eval set -- &quot;$args&quot;
+
+ local arg_ready=false
+ local arg_pid=0
+ local arg_status=
+ while [[ $# -gt 0 ]]; do
+ case &quot;$1&quot; in
+ -h|--help) command systemd-notify --help; return $?;;
+ --version) command systemd-notify --version; return $?;;
+ --ready) arg_ready=true; shift 1;;
+ --pid) arg_pid=${2:-$$}; shift 2;;
+ --status) arg_status=$2; shift 2;;
+ --booted) command systemd-notify --booted; return $?;;
+ --) shift 1; break;;
+ esac
+ done
+
+ local our_env=()
+ if $arg_ready; then
+ our_env+=(&quot;READY=1&quot;)
+ fi
+ if [[ -n &quot;$arg_status&quot; ]]; then
+ our_env+=(&quot;STATUS=$arg_status&quot;)
+ fi
+ if [[ &quot;$arg_pid&quot; -gt 0 ]]; then
+ our_env+=(&quot;MAINPID=$arg_pid&quot;)
+ fi
+ our_env+=(&quot;$@&quot;)
+ local n
+ printf -v n &#39;%s\n&#39; &quot;${our_env[@]}&quot;
+ socat STDIO UNIX-SENDTO:&quot;$NOTIFY_SOCKET&quot; &lt;&lt;&lt;&quot;$n&quot;
+}</code></pre>
+<p>So, one day when the systemd bugs have been fixed (and presumably the
+Linux kernel supports passing the cgroup of a process as part of its
+credentials), I can remove that from <code>workarounds.sh</code>, and
+not have to touch anything else in my WMII configuration (I do use
+<code>systemd-notify</code> in a couple of other, non-essential, places
+too; this wasn’t to avoid having to change just 1 line).</p>
+<p>So, now that <code>wmii@.service</code> properly has
+<code>Type=notify</code>, I can just stick
+<code>After=wmii@.service</code> into my <code>lxpanel@.service</code>,
+right? Wrong! Well, I <em>could</em>, but my <code>lxpanel</code>
+service has nothing to do with WMII; why should I couple them? Instead,
+I create <a
+href="https://lukeshu.com/git/dotfiles/tree/.config/systemd/user/wm-running@.target"><code>wm-running@.target</code></a>
+that can be used as a synchronization point:</p>
+<pre><code># wmii@.service
+Before=wm-running@%i.target
+
+# lxpanel@.service
+After=X11@%i.target wm-running@%i.target
+Requires=wm-running@%i.target</code></pre>
+<p>Finally, I have my desktop started and running. Now, I’d like for
+programs that aren’t part of the window manager to not dump their stdout
+and stderr into WMII’s part of the journal, like to have a record of
+which graphical programs crashed, and like to have a prettier
+cgroup/process graph. So, I use <code>systemd-run</code> to run external
+programs from the window manager:</p>
+<pre><code>runcmd() (
+ ...
+ unset NOTIFY_SOCKET # systemd
+ ...
+ exec 8&gt;&amp;- # xinit/systemd handshake
+ exec systemd-run --user --scope -- sh -c &quot;$*&quot;
+)</code></pre>
+<p>I run them as a scope instead of a service so that they inherit
+environment variables, and don’t have to mess with getting
+<code>DISPLAY</code> or <code>XAUTHORITY</code> into their units (as I
+<em>don’t</em> want to make them global variables in my systemd user
+session).</p>
+<p>I’d like to get <code>lxpanel</code> to also use
+<code>systemd-run</code> when launching programs, but it’s a low
+priority because I don’t really actually use <code>lxpanel</code> to
+launch programs, I just have the menu there to make sure that I didn’t
+break the icons for programs that I package (I did that once back when I
+was Parabola’s packager for Iceweasel and IceCat).</p>
+<p>And that’s how I use systemd with X11.</p>
+
+</article>
+<footer>
+ <aside class="sponsor"><p>I'd love it if you <a class="em"
+ href="/sponsor/">sponsored me</a>. It will allow me to continue
+ my work on the GNU/Linux ecosystem. Thanks!</p></aside>
+
+<p>The content of this page is Copyright © 2016 <a href="mailto:lukeshu@lukeshu.com">Luke T. Shumaker</a>.</p>
+<p>This page is licensed under the <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> license.</p>
+</footer>
+</body>
+</html>
diff --git a/util.rb b/util.rb
index 31c605f..cff8f1b 100644
--- a/util.rb
+++ b/util.rb
@@ -4,14 +4,14 @@ require 'erb'
require 'date'
$license_urls = {
- "CC BY-SA-3.0" => 'https://creativecommons.org/licenses/by-sa/3.0/',
+ "CC BY-SA 4.0" => 'https://creativecommons.org/licenses/by-sa/4.0/',
'WTFPL-2' => "http://www.wtfpl.net/txt/copying/",
}
$person_uris = {
- "Luke Shumaker" => "https://lukeshu.com/",
+ "Luke T. Shumaker" => "https://lukeshu.com/",
}
$person_emails = {
- "Luke Shumaker" => "lukeshu@sbcglobal.net",
+ "Luke T. Shumaker" => "lukeshu@lukeshu.com",
}
class Person
@@ -78,8 +78,8 @@ class Page
end
def title ; @title ||= pandoc['title'] || input.split("\n",2).first ; end
- def author ; @author ||= Person.new( pandoc['author'] || "Luke Shumaker") ; end
- def license ; @license ||= License.new(pandoc['license'] || "CC BY-SA-3.0") ; end
+ def author ; @author ||= Person.new( pandoc['author'] || "Luke T. Shumaker") ; end
+ def license ; @license ||= License.new(pandoc['license'] || "CC BY-SA 4.0") ; end
def date ; @date ||= Date.parse(pandoc['date']) unless pandoc['date'].nil? ; end
def slug ; @slug ||= infile.sub(/\..*$/,'').sub(/^.*\//,'') ; end
def content ; @content ||= pandoc.to('html5') ; end
@@ -90,7 +90,7 @@ class Page
end
def breadcrumbs
- @breadcrumbs ||= '<a href="/">Luke Shumaker</a> » ' + ( (slug == 'index') ? "blog" : "<a href=/blog>blog</a> » #{slug}" )
+ @breadcrumbs ||= '<a href="/">Luke T. Shumaker</a> » ' + ( (slug == 'index') ? "blog" : "<a href=/blog>blog</a> » #{slug}" )
end
end
diff --git a/write-atomic b/write-atomic
index efb2551..d28f001 100755
--- a/write-atomic
+++ b/write-atomic
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-# Copyright (C) 2015-2016 Luke Shumaker
+# Copyright (C) 2015-2016 Luke T. Shumaker
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
diff --git a/write-ifchanged b/write-ifchanged
index 185ceb0..1ca1128 100755
--- a/write-ifchanged
+++ b/write-ifchanged
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-# Copyright (C) 2015 Luke Shumaker
+# Copyright (C) 2015 Luke T. Shumaker
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by