1#include <myaw.h>
  2#include <pwlib/ctype.h>
  3#include <pwlib/parsers.h>
  4
  5static char32_t allowed_terminators[] = { MW_COMMENT, ':', ',', '}', ']', 0 };
  6
  7
  8[[nodiscard]] static bool skip_spaces(MwParser* parser, unsigned* pos, unsigned source_line, char32_t* chr)
  9/*
 10 * Skip spaces and comments before structural element.
 11 *
 12 * On success write first non-space character to `chr`.
 13 */
 14{
 15    for (;;) {
 16        PwValuePtr current_line = &parser->current_line;
 17
 18        *pos = pw_string_skip_spaces(current_line, *pos);
 19
 20        // end of line?
 21        if (pw_string_index_valid(current_line, *pos)) {
 22            // no, return character if not a comment
 23            char32_t c = pw_char_at(current_line, *pos);
 24            if (c != '#') {
 25                *chr = c;
 26                return true;
 27            }
 28        }
 29        // read next line
 30        if (!_mw_read_block_line(parser)) {
 31            if (_mw_end_of_block()) {
 32                mw_exception(parser->line_number, parser->current_indent, "Unexpected end of block");
 33            }
 34            return false;
 35        }
 36        *pos = parser->current_indent;
 37    }
 38}
 39
 40[[nodiscard]] static bool parse_number(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
 41/*
 42 * `start_pos` points to the sign or first digit
 43 */
 44{
 45    int sign = 1;
 46    char32_t chr = pw_char_at(&parser->current_line, start_pos);
 47    if (chr == '+') {
 48        // no op
 49        start_pos++;
 50    } else if (chr == '-') {
 51        sign = -1;
 52        start_pos++;
 53    }
 54    return _pw_parse_num_str(&parser->current_line, start_pos, sign, end_pos, allowed_terminators, false, result);
 55}
 56
 57[[nodiscard]] static bool parse_string(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
 58/*
 59 * `start_pos` points to the opening double quotation mark (")
 60 */
 61{
 62    unsigned closing_quote_pos;
 63    if (_mw_find_closing_quote(&parser->current_line, '"', start_pos + 1, &closing_quote_pos)) {
 64        *end_pos = closing_quote_pos + 1;
 65        return _mw_unescape_line(parser, &parser->current_line,
 66                                 parser->line_number, '"', start_pos + 1, closing_quote_pos, result);
 67    }
 68    mw_exception(parser->line_number, parser->current_indent, "String has no closing quote");
 69    return false;
 70}
 71
 72[[nodiscard]] static bool parse_array(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
 73/*
 74 * `start_pos` points to the next character after opening square bracket
 75 */
 76{
 77    parser->json_depth++;
 78
 79    if (!pw_create(PwTypeId_BasicArray, result)) {
 80        return false;
 81    }
 82    char32_t chr;
 83    if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
 84        return false;
 85    }
 86    if (chr == ']') {
 87        // empty array
 88        *end_pos = start_pos + 1;
 89        parser->json_depth--;
 90        return true;
 91    }
 92    // parse first item
 93    PwValue first_item = PW_NULL;
 94    if (!_mw_parse_json_value(parser, start_pos, &start_pos, &first_item)) {
 95        return false;
 96    }
 97    if (!pw_array_append(result, &first_item)) {
 98        return false;
 99    }
100    // parse subsequent items
101    for (;;) {
102        if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
103            return false;
104        }
105        if (chr == ']') {
106            // done
107            *end_pos = start_pos + 1;
108            parser->json_depth--;
109            return true;
110        }
111        if (chr != ',') {
112            mw_exception(parser->line_number, parser->current_indent, "Array items must be separated with comma");
113            return false;
114        }
115        PwValue item = PW_NULL;
116        if (!_mw_parse_json_value(parser, start_pos + 1, &start_pos, &item)) {
117            return false;
118        }
119        if (!pw_array_append(result, &item)) {
120            return false;
121        }
122    }
123}
124
125[[nodiscard]] static bool parse_object_member(MwParser* parser, unsigned* pos, PwValuePtr result)
126/*
127 * Parse key:value pair starting from `pos` and update `result`.
128 *
129 * Update `pos` on exit.
130 */
131{
132    PwValue key = PW_NULL;
133    if (!parse_string(parser, *pos, pos, &key)) {
134        return false;
135    }
136    char32_t chr;
137    if (!skip_spaces(parser, pos, __LINE__, &chr)) {
138        return false;
139    }
140    if (chr != ':') {
141        mw_exception(parser->line_number, *pos, "Values must be separated from keys with colon");
142        return false;
143    }
144
145    (*pos)++;
146
147    PwValue value = PW_NULL;
148    if (!_mw_parse_json_value(parser, *pos, pos, &value)) {
149        return false;
150    }
151    return pw_map_update(result, &key, &value);
152}
153
154[[nodiscard]] static bool parse_object(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
155/*
156 * `start_pos` points to the next character after opening curly bracket
157 */
158{
159    parser->json_depth++;
160
161    if (!pw_create(PwTypeId_BasicMap, result)) {
162        return false;
163    }
164
165    char32_t chr;
166    if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
167        return false;
168    }
169    if (chr == '}') {
170        // empty object
171        *end_pos = start_pos + 1;
172        parser->json_depth--;
173        return true;
174    }
175    if (chr != '"') {
176        mw_exception(parser->line_number, parser->current_indent, "Keys must be strings");
177        return false;
178    }
179    // parse first member
180    if (!parse_object_member(parser, &start_pos, result)) {
181        return false;
182    }
183    // parse subsequent members
184    for (;;) {
185        if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
186            return false;
187        }
188        if (chr == '}') {
189            // done
190            *end_pos = start_pos + 1;
191            parser->json_depth--;
192            return true;
193        }
194        if (chr != ',') {
195            mw_exception(parser->line_number, parser->current_indent, "Object members must be separated with comma");
196            return false;
197        }
198        start_pos++;
199        if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
200            return false;
201        }
202        if (chr != '"') {
203            mw_exception(parser->line_number, parser->current_indent, "Keys must be strings");
204            return false;
205        }
206        if (!parse_object_member(parser, &start_pos, result)) {
207            return false;
208        }
209    }
210}
211
212[[nodiscard]] static bool check_terminator(MwParser* parser, unsigned pos)
213{
214    char32_t c = pw_char_at(&parser->current_line, pos);
215    if ( ! (pw_isspace(c) || utf32_strchr(allowed_terminators, c))) {
216        mw_exception(parser->line_number, pos, "Unexpected character");
217        return false;
218    }
219    return true;
220}
221
222[[nodiscard]] bool _mw_parse_json_value(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
223{
224    if (parser->json_depth >= parser->max_json_depth) {
225        mw_exception(parser->line_number, parser->current_indent, "Maximum recursion depth exceeded");
226        return false;
227    }
228
229    char32_t first_char;
230    if (!skip_spaces(parser, &start_pos, __LINE__, &first_char)) {
231        return false;
232    }
233
234    if (first_char == '[') {
235        return parse_array(parser, start_pos + 1, end_pos, result);
236    }
237    if (first_char == '{') {
238        return parse_object(parser, start_pos + 1, end_pos, result);
239    }
240    if (first_char == '"') {
241        return parse_string(parser, start_pos, end_pos, result);
242    }
243    if (first_char == '+' || first_char == '-' || pw_is_ascii_digit(first_char)) {
244        return parse_number(parser, start_pos, end_pos, result);
245    }
246    if (pw_substring_eq(&parser->current_line, start_pos, start_pos + 4, "null")) {
247        if (!check_terminator(parser, start_pos + 4)) {
248            return false;
249        }
250        *end_pos = start_pos + 4;
251        pw_destroy(result);
252        return true;
253    }
254    if (pw_substring_eq(&parser->current_line, start_pos, start_pos + 4, "true")) {
255        if (!check_terminator(parser, start_pos + 4)) {
256            return false;
257        }
258        *end_pos = start_pos + 4;
259        pw_destroy(result);
260        *result = PwBool(true);
261        return true;
262    }
263    if (pw_substring_eq(&parser->current_line, start_pos, start_pos + 5, "false")) {
264        if (!check_terminator(parser, start_pos + 4)) {
265            return false;
266        }
267        *end_pos = start_pos + 5;
268        pw_destroy(result);
269        *result = PwBool(false);
270        return true;
271    }
272    mw_exception(parser->line_number, start_pos, "Unexpected character");
273    return false;
274}
275
276[[nodiscard]] bool _mw_json_parser_func(MwParser* parser, PwValuePtr result)
277{
278    unsigned end_pos;
279    if (!_mw_parse_json_value(parser, _mw_get_start_position(parser), &end_pos, result)) {
280        return false;
281    }
282
283    // check trailing characters
284
285    static char garbage[] = "Garbage after JSON value";
286
287    if (_mw_comment_or_end_of_line(parser, end_pos)) {
288
289        // make sure current block has no more data
290        if (_mw_read_block_line(parser)) {
291            mw_exception(parser->line_number, parser->current_indent, garbage);
292            return false;
293        }
294        return _mw_end_of_block();  // true if end of block, false if other error (read error or OOM)
295    }
296    mw_exception(parser->line_number, parser->current_indent, garbage);
297    return false;
298}
299
300[[nodiscard]] bool mw_parse_json(PwValuePtr markup, PwValuePtr result)
301{
302    [[ gnu::cleanup(mw_delete_parser) ]] MwParser* parser = mw_create_parser(markup);
303    if (!parser) {
304        pw_set_status(PwOOM());
305        return false;
306    }
307    // read first line to prepare for parsing and to detect EOF
308    if (!_mw_read_block_line(parser)) {
309        return false;
310    }
311
312    // parse root value
313    unsigned end_pos;
314    if (!_mw_parse_json_value(parser, 0, &end_pos, result)) {
315        return false;
316    }
317
318    // make sure markup has no more data
319
320    static char extra_data[] = "Extra data after parsed value";
321
322    if (!_mw_comment_or_end_of_line(parser, end_pos)) {
323        mw_exception(parser->line_number, parser->current_indent, extra_data);
324        return false;
325    }
326    // make sure current block has no more data
327    bool ret = _mw_read_block_line(parser);
328    if (parser->eof) {
329        // all right
330        ret = true;
331    } else if (ret) {
332        mw_exception(parser->line_number, parser->current_indent, extra_data);
333    }
334    return ret;
335}