blob: dd2435851f5eef05522244c137de7b7d6750c8fe (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
|
#!/hint/bash
# json.sh - A pure-Bash validating JSON tokenizer
#
# Copyright © 2024 Luke T. Shumaker <lukeshu@lukeshu.com>
#
# This work is free. You can redistribute it and/or modify it under the
# terms of the Do What The Fuck You Want To Public License, Version 2,
# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
# Usage: json_tokenize myfn <input
#
# Call `myfn` repeatedly with any of the following values:
#
# myfn object_beg
# myfn object_key $val
# myfn object_end
#
# myfn array_beg
# myfn array_end
#
# myfn number $val
# myfn string $val
# myfn null
# myfn true
# myfn false
#
# myfn error $format $args...
#
# Assumptions:
# - `set -e`
# - Input is valid UTF-8
# - LC_* is set to a UTF-8 value
json_tokenize() {
local _json_token=$1
local _json_buf
_json_buf="$(cat)"
_json_value
_json_ws
if [[ -n "$_json_buf" ]]; then
_json_error 'unexpected data after json: %q' "$_json_buf"
fi
}
_json_error() {
"$_json_token" "$@"
return 1
}
_json_expect() {
if [[ "${_json_buf::1}" != "$1" ]]; then
_json_error 'expected character %q, got %q' "$1" "${_json_buf::1}"
fi
_json_buf=${_json_buf:1}
}
_json_ws() {
while [[ "${_json_buf::1}" == [$' \t\n\r'] ]]; do
_json_buf=${_json_buf:1}
done
}
_json_value() {
_json_ws
case "${_json_buf::1}" in
'{' ) _json_object ;;
'[' ) _json_array ;;
'"' ) _json_string string;;
't' ) _json_lit 'true' ;;
'f' ) _json_lit 'false' ;;
'n' ) _json_lit 'null' ;;
[-+0-9] ) _json_number ;;
* ) _json_error 'unexpected value-start character: %q' "${_json_buf::1}" ;;
esac
}
_json_object() {
_json_expect '{'
"$_json_token" object_beg
_json_ws
case "${_json_buf::1}" in
'"' )
local _json_obj_key
while true; do
_json_string object_key
_json_ws
_json_expect ':'
_json_value
_json_ws
case "${_json_buf::1}" in
',' )
_json_buf=${_json_buf:1}
_json_ws
;;
'}' )
_json_buf=${_json_buf:1}
"$_json_token" object_end
return
;;
esac
done
;;
'}' )
_json_buf=${_json_buf:1}
"$_json_token" object_end
return
;;
esac
}
_json_array() {
_json_expect '['
"$_json_token" array_beg
_json_ws
if [[ "${_json_buf::1}" == ']' ]]; then
_json_buf=${_json_buf:1}
"$_json_token" array_end
return
fi
while true; do
_json_value
_json_ws
case "${_json_buf::1}" in
',' )
_json_buf=${_json_buf:1}
_json_ws
;;
']' )
_json_buf=${_json_buf:1}
"$_json_token" array_end
return
;;
esac
done
}
# $1=(key|string)
_json_string() {
_json_expect '"'
local _json_strval=''
local _json_re='^[^\"]+'
local _json_c _json_n _json_n2
while true; do
case "${_json_buf::1}" in
"\\" )
_json_buf=${_json_buf:1}
case "${_json_buf::1}" in
'"' ) _json_strval+='"'; _json_buf=${_json_buf:1} ;;
"\\" ) _json_strval+="\\"; _json_buf=${_json_buf:1} ;;
'/' ) _json_strval+='/'; _json_buf=${_json_buf:1} ;;
'b' ) _json_strval+=$'\b'; _json_buf=${_json_buf:1} ;;
'f' ) _json_strval+=$'\f'; _json_buf=${_json_buf:1} ;;
'n' ) _json_strval+=$'\n'; _json_buf=${_json_buf:1} ;;
'r' ) _json_strval+=$'\r'; _json_buf=${_json_buf:1} ;;
't' ) _json_strval+=$'\t'; _json_buf=${_json_buf:1} ;;
'u' )
if ! [[ ${_json_buf::5} == u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] ]]; then
_json_error 'expected 4 hex characters, got: %q' "${_json_buf:1:4}"
fi
_json_c="$(printf "\\${_json_buf::5}")"
_json_n="0x${_json_buf:1:4}"
_json_buf=${_json_buf:5}
if (( 0xDC00 <= _json_n && _json_n <= 0xDFFF )); then
if ! [[ ${_json_buf::6} == '\u'[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] ]]; then
_json_error 'expected a \\uABCD sequence, got: %q' "${_json_buf::6}"
fi
_json_n2="0x${_json_buf:2:4}"
_json_buf=${_json_buf:6}
if ! (( 0xD800 <= n2 && n2 <= 0xDBFF )); then
_json_error 'expected a UTF-16 high surrogate, got: 0x%x' "$n2"
fi
_json_n=$(( 0x10000 + ((n-0xDC00)<<10) + (n2-0xD8000) ))
printf -v _json_n '0x%08' "$_json_n"
printf -v _json_c "\\U$_json_n"
fi
_json_strval+="$_json_c"
;;
*) _json_error 'unexpected backslash sequence: \\%q' "${json_buf::1}";;
esac
;;
'"' )
_json_buf=${_json_buf:1}
"$_json_token" "$1" "$_json_strval"
return
;;
* )
# Consume multiple characters at once,
# or else this is horribly slow.
[[ $_json_buf =~ $_json_re ]]
_json_strval+=${BASH_REMATCH[0]}
_json_buf=${_json_buf#"${BASH_REMATCH[0]}"}
;;
esac
done
}
_json_number() {
local _json_re='^-?(0|[1-9][0-9]+)(\.[0-9]+)?([eE][-+]?[0-9]+)?'
if ! [[ $_json_buf =~ $_json_re ]]; then
_json_error 'invalid number: %q' "${_json_buf::16}"
fi
_json_buf=${_json_buf:${#BASH_REMATCH[0]}}
"$_json_token" number "${BASH_REMATCH[0]}"
}
_json_lit() {
if [[ ${_json_buf::${#1}} != "$1" ]]; then
_json_error 'expected %q, got: %q' "$1" "${_json_buf::${#1}}"
fi
_json_buf=${_json_buf:${#1}}
"$_json_token" "$1"
}
|