1#include <limits.h>
   2#include <stdio.h>
   3#include <stdlib.h>
   4#include <string.h>
   5
   6#include <myaw.h>
   7#include <pwlib/ctype.h>
   8#include <pwlib/parsers.h>
   9
  10#define DEFAULT_LINE_CAPACITY  250
  11
  12#ifdef TRACE_ENABLED
  13    static unsigned tracelevel = 0;
  14
  15#   define _TRACE_INDENT() \
  16        for (unsigned i = 0; i < tracelevel * 4; i++) {  \
  17            fputc(' ', stderr);  \
  18        }
  19
  20#   define _TRACE_POS()  \
  21        _TRACE_INDENT() \
  22        fprintf(stderr, "%s; line %u, block indent %u", \
  23                __func__, parser->line_number, parser->block_indent);
  24
  25#   define TRACE_ENTER() \
  26        do {  \
  27            _TRACE_POS() \
  28            fputs(" {\n", stderr);  \
  29            tracelevel++; \
  30        } while (false)
  31
  32#   define TRACE_EXIT() \
  33        do {  \
  34            tracelevel--; \
  35            _TRACE_INDENT() \
  36            fputs("}\n", stderr);  \
  37        } while (false)
  38
  39#   define TRACEPOINT()  \
  40        do {  \
  41            _TRACE_POS() \
  42            fputc('\n', stderr);  \
  43        } while (false)
  44
  45#   define TRACE(...)  \
  46        do {  \
  47            _TRACE_INDENT() \
  48            fprintf(stderr, "%s: ", __func__); \
  49            fprintf(stderr, __VA_ARGS__);  \
  50            fputc('\n', stderr);  \
  51        } while (false)
  52#else
  53#   define TRACEPOINT()
  54#   define TRACE_ENTER()
  55#   define TRACE_EXIT()
  56#   define TRACE(...)
  57#endif
  58
  59// forward declarations
  60[[nodiscard]] static bool parse_value(MwParser* parser, unsigned* nested_value_pos, PwValuePtr convspec, PwValuePtr result);
  61[[nodiscard]] static bool value_parser_func(MwParser* parser, PwValuePtr result);
  62[[nodiscard]] static bool parse_raw_value(MwParser* parser, PwValuePtr result);
  63[[nodiscard]] static bool parse_literal_string(MwParser* parser, PwValuePtr result);
  64[[nodiscard]] static bool parse_folded_string(MwParser* parser, PwValuePtr result);
  65[[nodiscard]] static bool parse_datetime(MwParser* parser, PwValuePtr result);
  66[[nodiscard]] static bool parse_timestamp(MwParser* parser, PwValuePtr result);
  67
  68static char32_t number_terminators[] = { MW_COMMENT, ':', 0 };
  69
  70
  71MwParser* mw_create_parser(PwValuePtr markup)
  72{
  73    MwParser* parser = allocate(sizeof(MwParser), true);
  74    if (!parser) {
  75        pw_set_status(PwOOM());
  76        return nullptr;
  77    }
  78    parser->markup = pw_clone(markup);
  79
  80    parser->blocklevel = 1;
  81    parser->max_blocklevel = MW_MAX_RECURSION_DEPTH;
  82
  83    parser->json_depth = 1;
  84    parser->max_json_depth = MW_MAX_RECURSION_DEPTH;
  85
  86    parser->skip_comments = true;
  87
  88    if (!pw_create_empty_string(DEFAULT_LINE_CAPACITY, 1, &parser->current_line)) {
  89        goto error;
  90    }
  91    if (!pw_map_va(&parser->custom_parsers,
  92        PwString("raw"),       PwPtr((void*) parse_raw_value),
  93        PwString("literal"),   PwPtr((void*) parse_literal_string),
  94        PwString("folded"),    PwPtr((void*) parse_folded_string),
  95        PwString("datetime"),  PwPtr((void*) parse_datetime),
  96        PwString("timestamp"), PwPtr((void*) parse_timestamp),
  97        PwString("json"),      PwPtr((void*) _mw_json_parser_func)
  98    )) {
  99        goto error;
 100    }
 101    if (!pw_start_read_lines(markup)) {
 102        goto error;
 103    }
 104    return parser;
 105
 106error:
 107    mw_delete_parser(&parser);
 108    return nullptr;
 109}
 110
 111void mw_delete_parser(MwParser** parser_ptr)
 112{
 113    MwParser* parser = *parser_ptr;
 114    *parser_ptr = nullptr;
 115    pw_destroy(&parser->markup);
 116    pw_destroy(&parser->current_line);
 117    pw_destroy(&parser->custom_parsers);
 118    release((void**) &parser, sizeof(MwParser));
 119}
 120
 121[[nodiscard]] bool mw_set_custom_parser(MwParser* parser, char* convspec, MwBlockParserFunc parser_func)
 122{
 123    PwValue key = PwStaticString(convspec);
 124    PwValue value = PwPtr((void*) parser_func);
 125    return pw_map_update(&parser->custom_parsers, &key, &value);
 126}
 127
 128static inline bool have_custom_parser(MwParser* parser, PwValuePtr convspec)
 129{
 130    return pw_map_has_key(&parser->custom_parsers, convspec);
 131}
 132
 133static inline MwBlockParserFunc get_custom_parser(MwParser* parser, PwValuePtr convspec)
 134{
 135    PwValue parser_func = PW_NULL;
 136    if (!pw_map_get(&parser->custom_parsers, convspec, &parser_func)) {
 137        return nullptr;
 138    }
 139    return (MwBlockParserFunc) (parser_func.func_ptr);
 140}
 141
 142bool _mw_end_of_block()
 143{
 144    PwValuePtr status = &current_task->status;
 145    return (status->type_id     == PwTypeId_Status)
 146        && (status->kind        == PwStatusKind_Basic)
 147        && (status->status_code == MweEndOfBlock);
 148}
 149
 150static inline bool end_of_line(PwValuePtr str, unsigned position)
 151/*
 152 * Return true if position is beyond end of line.
 153 */
 154{
 155    return !pw_string_index_valid(str, position);
 156}
 157
 158static inline bool isspace_or_eol_at(PwValuePtr str, unsigned position)
 159{
 160    if (end_of_line(str, position)) {
 161        return true;
 162    } else {
 163        return pw_isspace(pw_char_at(str, position));
 164    }
 165}
 166
 167[[nodiscard]] static bool read_line(MwParser* parser)
 168/*
 169 * Read line into parser->current line and strip trailing spaces.
 170 */
 171{
 172    if (!pw_read_line_inplace(&parser->markup, &parser->current_line)) {
 173        return false;
 174    }
 175    // strip trailing spaces
 176    if (!pw_string_rstrip(&parser->current_line)) {
 177        return false;
 178    }
 179    // measure indent
 180    parser->current_indent = pw_string_skip_spaces(&parser->current_line, 0);
 181
 182    // set current_line
 183    parser->line_number = pw_get_line_number(&parser->markup);
 184
 185    return true;
 186}
 187
 188static inline bool is_comment_line(MwParser* parser)
 189/*
 190 * Return true if current line starts with MW_COMMENT char.
 191 */
 192{
 193    return pw_char_at(&parser->current_line, parser->current_indent) == MW_COMMENT;
 194}
 195
 196[[nodiscard]] bool _mw_read_block_line(MwParser* parser)
 197{
 198    TRACEPOINT();
 199
 200    if (parser->eof) {
 201        if (parser->blocklevel) {
 202            // continue returning this for nested blocks
 203            pw_set_status(PwStatus(MweEndOfBlock));
 204        } else {
 205            pw_set_status(PwStatus(PweEOF));
 206        }
 207        return false;
 208    }
 209    for (;;) {
 210        if (!read_line(parser)) {
 211            PwValue status = pw_get_status();
 212            if (pw_is_eof(&status)) {
 213                parser->eof = true;
 214                pw_destroy(&parser->current_line);
 215                pw_set_status(PwStatus(MweEndOfBlock));
 216            }
 217            return false;
 218        }
 219        if (parser->skip_comments) {
 220            // skip empty lines too
 221            if (pw_strlen(&parser->current_line) == 0) {
 222                continue;
 223            }
 224            if (is_comment_line(parser)) {
 225                continue;
 226            }
 227            parser->skip_comments = false;
 228        }
 229        if (pw_strlen(&parser->current_line) == 0) {
 230            // return empty line as is
 231            return true;
 232        }
 233        if (parser->current_indent >= parser->block_indent) {
 234            // indentation is okay, return line
 235            return true;
 236        }
 237        // unindent detected
 238        if (is_comment_line(parser)) {
 239            // skip unindented comments
 240            continue;
 241        }
 242        TRACE("unindent");
 243        // end of block
 244        if (!pw_unread_line(&parser->markup, &parser->current_line)) {
 245            return false;
 246        }
 247        if (!pw_string_truncate(&parser->current_line, 0)) {
 248            return false;
 249        }
 250        pw_set_status(PwStatus(MweEndOfBlock));
 251        return false;
 252    }
 253}
 254
 255[[nodiscard]] bool _mw_read_block(MwParser* parser, PwValuePtr result)
 256{
 257    TRACEPOINT();
 258
 259    if (!pw_create(PwTypeId_BasicArray, result)) {
 260        return false;
 261    }
 262    for (;;) {
 263        // append line
 264        PwValue line = PW_NULL;
 265        if (!pw_substr(&parser->current_line, parser->block_indent, UINT_MAX, &line)) {
 266            return false;
 267        }
 268        if (!pw_array_append(result, &line)){
 269            return false;
 270        }
 271        // read next line
 272        if (!_mw_read_block_line(parser)) {
 273            if (_mw_end_of_block()) {
 274                return true;
 275            }
 276            return false;
 277        }
 278    }
 279}
 280
 281[[nodiscard]] static bool parse_nested_block(MwParser* parser, unsigned block_pos,
 282                                             MwBlockParserFunc parser_func, PwValuePtr result)
 283/*
 284 * Set block indent to `block_pos` and call parser_func.
 285 */
 286{
 287    if (parser->blocklevel >= parser->max_blocklevel) {
 288        mw_exception(parser->line_number, parser->current_indent, "Too many nested blocks");
 289        return false;
 290    }
 291
 292    // start nested block
 293    parser->blocklevel++;
 294    unsigned saved_block_indent = parser->block_indent;
 295    parser->block_indent = block_pos;
 296
 297    TRACE_ENTER();
 298
 299    // call parser function
 300    bool ret = parser_func(parser, result);
 301
 302    // end nested block
 303    parser->block_indent = saved_block_indent;
 304    parser->blocklevel--;
 305
 306    TRACE_EXIT();
 307    return ret;
 308}
 309
 310[[nodiscard]] static bool parse_nested_block_from_next_line(MwParser* parser,
 311                                                            MwBlockParserFunc parser_func, PwValuePtr result)
 312/*
 313 * Read next line, set block indent to current indent plus one, and call parser_func.
 314 */
 315{
 316    TRACEPOINT();
 317    TRACE("new block_pos %u", parser->block_indent + 1);
 318
 319    // temporarily increment block indent by one and read next line
 320    parser->block_indent++;
 321    parser->skip_comments = true;
 322    bool ret = _mw_read_block_line(parser);
 323    parser->block_indent--;
 324
 325    if (!ret) {
 326        if (_mw_end_of_block()) {
 327            mw_exception(parser->line_number, parser->current_indent, "Empty block");
 328        }
 329        return false;
 330    }
 331
 332    // call parse_nested_block
 333    return parse_nested_block(parser, parser->block_indent + 1, parser_func, result);
 334}
 335
 336unsigned _mw_get_start_position(MwParser* parser)
 337{
 338    if (parser->block_indent < parser->current_indent) {
 339        return parser->current_indent;
 340    } else {
 341        return pw_string_skip_spaces(&parser->current_line, parser->block_indent);
 342    }
 343}
 344
 345bool _mw_comment_or_end_of_line(MwParser* parser, unsigned position)
 346{
 347    position = pw_string_skip_spaces(&parser->current_line, position);
 348    return (end_of_line(&parser->current_line, position)
 349            || pw_char_at(&parser->current_line, position) == MW_COMMENT);
 350}
 351
 352[[nodiscard]] static bool parse_convspec(MwParser* parser, unsigned opening_colon_pos,
 353                                         unsigned* end_pos, PwValuePtr result)
 354/*
 355 * Extract conversion specifier starting from `opening_colon_pos` in the `current_line`.
 356 *
 357 * On success return string and write `end_pos`.
 358 *
 359 * If conversion specified is not detected, return PwNull()
 360 */
 361{
 362    // make result Null
 363    pw_destroy(result);
 364
 365    PwValuePtr current_line = &parser->current_line;
 366
 367    unsigned start_pos = opening_colon_pos + 1;
 368    unsigned closing_colon_pos;
 369    if (!pw_strchr(current_line, ':', start_pos, &closing_colon_pos)) {
 370        return true;
 371    }
 372    if (closing_colon_pos == start_pos) {
 373        // empty conversion specifier
 374        return true;
 375    }
 376    if (!isspace_or_eol_at(current_line, closing_colon_pos + 1)) {
 377        // not a conversion specifier
 378        return true;
 379    }
 380    PwValue convspec = PW_NULL;
 381    if (!pw_substr(current_line, start_pos, closing_colon_pos, &convspec)) {
 382        return false;
 383    }
 384    if (!pw_string_strip(&convspec)) {
 385        return false;
 386    }
 387    if (!have_custom_parser(parser, &convspec)) {
 388        // such a conversion specifier is not defined
 389        return true;
 390    }
 391    *end_pos = closing_colon_pos + 1;
 392    pw_move(result, &convspec);
 393    return true;
 394}
 395
 396[[nodiscard]] static bool parse_raw_value(MwParser* parser, PwValuePtr result)
 397{
 398    TRACEPOINT();
 399
 400    PwValue lines = PW_NULL;
 401    if (!_mw_read_block(parser, &lines)) {
 402        return false;
 403    }
 404    if (pw_array_length(&lines) > 1) {
 405        // append one empty line for ending line break
 406        PwValue empty_line = PW_STRING("");
 407        if (!pw_array_append(&lines, &empty_line)) {
 408            return false;
 409        }
 410    }
 411    // return concatenated lines
 412    return pw_array_join(&lines, '\n', result);
 413}
 414
 415[[nodiscard]] static bool parse_literal_string(MwParser* parser, PwValuePtr result)
 416/*
 417 * Parse current block as a literal string.
 418 */
 419{
 420    TRACEPOINT();
 421
 422    PwValue lines = PW_NULL;
 423    if (!_mw_read_block(parser, &lines)) {
 424        return false;
 425    }
 426
 427    // normalize list of lines
 428
 429    if (!pw_dedent(&lines)) {
 430        return false;
 431    }
 432
 433    // drop empty trailing lines
 434    unsigned len = pw_array_length(&lines);
 435    while (len--) {
 436        PwValue line = PW_NULL;
 437        if (!pw_array_item(&lines, len, &line)) {
 438            return false;
 439        }
 440        if (pw_strlen(&line) != 0) {
 441            break;
 442        }
 443        if (!pw_array_del(&lines, len, len + 1)) {
 444            return false;
 445        }
 446    }
 447
 448    // append one empty line for ending line break
 449    if (pw_array_length(&lines) > 1) {
 450        PwValue empty_line = PW_STRING("");
 451        if (!pw_array_append(&lines, &empty_line)) {
 452            return false;
 453        }
 454    }
 455
 456    // return concatenated lines
 457    return pw_array_join(&lines, '\n', result);
 458}
 459
 460[[nodiscard]] bool _mw_unescape_line(MwParser* parser, PwValuePtr line, unsigned line_number,
 461                                     char32_t quote, unsigned start_pos, unsigned end_pos, PwValuePtr result)
 462{
 463    if (!pw_create_empty_string(end_pos - start_pos,  // unescaped string can be shorter
 464                                line->str_params.char_size, result)) {
 465        return false;
 466    }
 467    unsigned pos = start_pos;
 468    while (pos < end_pos) {
 469        char32_t chr = pw_char_at(line, pos);
 470        if (chr == quote) {
 471            // closing quotation mark detected
 472            break;
 473        }
 474        if (chr != '\\') {
 475            if (!pw_string_append(result, chr)) {
 476                return false;
 477            }
 478        } else {
 479            // start of escape sequence
 480            pos++;
 481            if (pos >= end_pos) {
 482                if (!pw_string_append(result, chr)) {  // leave backslash in the result
 483                    return false;
 484                }
 485            }
 486            int hexlen;
 487            chr = pw_char_at(line, pos);
 488            switch (chr) {
 489
 490                // Simple escape sequences
 491                case '\'':    //  \'   single quote     byte 0x27
 492                case '"':     //  \"   double quote     byte 0x22
 493                case '?':     //  \?   question mark    byte 0x3f
 494                case '\\':    //  \\   backslash        byte 0x5c
 495                    if (!pw_string_append(result, chr)) { return false; };
 496                    break;
 497                case 'a': if (!pw_string_append(result, 0x07)) { return false; } break;  // audible bell
 498                case 'b': if (!pw_string_append(result, 0x08)) { return false; } break;  // backspace
 499                case 'f': if (!pw_string_append(result, 0x0c)) { return false; } break;  // form feed
 500                case 'n': if (!pw_string_append(result, 0x0a)) { return false; } break;  // line feed
 501                case 'r': if (!pw_string_append(result, 0x0d)) { return false; } break;  // carriage return
 502                case 't': if (!pw_string_append(result, 0x09)) { return false; } break;  // horizontal tab
 503                case 'v': if (!pw_string_append(result, 0x0b)) { return false; } break;  // vertical tab
 504
 505                // Numeric escape sequences
 506                case 'o': {
 507                    //  \on{1:3} code unit n... (1-3 octal digits)
 508                    char32_t v = 0;
 509                    for (int i = 0; i < 3; i++) {
 510                        pos++;
 511                        if (pos >= end_pos) {
 512                            if (i == 0) {
 513                                mw_exception(parser->line_number, pos, "Incomplete octal value");
 514                                return false;
 515                            }
 516                            break;
 517                        }
 518                        char32_t c = pw_char_at(line, pos);
 519                        if ('0' <= c && c <= '7') {
 520                            v <<= 3;
 521                            v += c - '0';
 522                        } else {
 523                            mw_exception(parser->line_number, pos, "Bad octal value");
 524                            return false;
 525                        }
 526                    }
 527                    if (!pw_string_append(result, v)) { return false; }
 528                    break;
 529                }
 530                case 'x':
 531                    //  \xn{2}   code unit n... (exactly 2 hexadecimal digits are required)
 532                    hexlen = 2;
 533                    goto parse_hex_value;
 534
 535                // Unicode escape sequences
 536                case 'u':
 537                    //  \un{4}  code point U+n... (exactly 4 hexadecimal digits are required)
 538                    hexlen = 4;
 539                    goto parse_hex_value;
 540                case 'U':
 541                    //  \Un{8}  code point U+n... (exactly 8 hexadecimal digits are required)
 542                    hexlen = 8;
 543
 544                parse_hex_value: {
 545                    char32_t v = 0;
 546                    for (int i = 0; i < hexlen; i++) {
 547                        pos++;
 548                        if (pos >= end_pos) {
 549                            mw_exception(parser->line_number, pos, "Incomplete hexadecimal value");
 550                            return false;
 551                        }
 552                        char32_t c = pw_char_at(line, pos);
 553                        if ('0' <= c && c <= '9') {
 554                            v <<= 4;
 555                            v += c - '0';
 556                        } else if ('a' <= c && c <= 'f') {
 557                            v <<= 4;
 558                            v += c - 'a' + 10;
 559                        } else if ('A' <= c && c <= 'F') {
 560                            v <<= 4;
 561                            v += c - 'A' + 10;
 562                        } else {
 563                            mw_exception(parser->line_number, pos, "Bad hexadecimal value");
 564                            return false;
 565                        }
 566                    }
 567                    if (!pw_string_append(result, v)) { return false; }
 568                    break;
 569                }
 570                default:
 571                    // not a valid escape sequence
 572                    if (!pw_string_append(result, '\\')) { return false; }
 573                    if (!pw_string_append(result, chr)) { return false; }
 574                    break;
 575            }
 576        }
 577        pos++;
 578    }
 579    return true;
 580}
 581
 582[[nodiscard]] static bool fold_lines(MwParser* parser, PwValuePtr lines,
 583                                     char32_t quote, PwValuePtr line_numbers, PwValuePtr result)
 584/*
 585 * Fold list of lines and return concatenated string.
 586 *
 587 * If `quote` is nonzero, unescape lines.
 588 */
 589{
 590    if (!pw_dedent(lines)) {
 591        return false;
 592    }
 593    unsigned len = pw_array_length(lines);
 594
 595    // skip leading empty lines
 596    unsigned start_i = 0;
 597    for (; start_i < len; start_i++) {
 598        PwValue line = PW_NULL;
 599        if (!pw_array_item(lines, start_i, &line)) {
 600            return false;
 601        }
 602        if (pw_strlen(&line) != 0 && !pw_string_isspace(&line)) {
 603            break;
 604        }
 605    }
 606    if (start_i == len) {
 607        // return empty string
 608        pw_destroy(result);
 609        *result = PwString("");
 610        return true;
 611    }
 612
 613    // skip trailing empty lines
 614    unsigned end_i = len;
 615    for (; end_i; end_i--) {
 616        PwValue line = PW_NULL;
 617        if (!pw_array_item(lines, end_i - 1, &line)) {
 618            return false;
 619        }
 620        if (pw_strlen(&line) != 0 && !pw_string_isspace(&line)) {
 621            break;
 622        }
 623    }
 624    if (end_i == 0) {
 625        // return empty string
 626        pw_destroy(result);
 627        *result = PwString("");
 628        return true;
 629    }
 630
 631    // calculate length of result
 632    unsigned result_len = end_i - start_i - 1;  // reserve space for separators
 633    uint8_t char_size = 1;
 634    for (unsigned i = start_i; i < end_i; i++) {
 635        PwValue line = PW_NULL;
 636        if (!pw_array_item(lines, i, &line)) {
 637            return false;
 638        }
 639        result_len += pw_strlen(&line);
 640        uint8_t cs = line.str_params.char_size;
 641        if (cs > char_size) {
 642            char_size = cs;
 643        }
 644    }
 645
 646    // allocate result
 647    if (!pw_create_empty_string(result_len, char_size, result)) {
 648        return false;
 649    }
 650
 651    // concatenate lines
 652    bool prev_LF = false;
 653    for (unsigned i = start_i; i < end_i; i++) {
 654        PwValue line = PW_NULL;
 655        if (!pw_array_item(lines, i, &line)) {
 656            return false;
 657        }
 658        if (i > start_i) {
 659            if (pw_strlen(&line) == 0) {
 660                // treat empty lines as LF
 661                if (!pw_string_append(result, '\n')) {
 662                    return false;
 663                }
 664                prev_LF = true;
 665            } else {
 666                if (prev_LF) {
 667                    // do not append separator if previous line was empty
 668                    prev_LF = false;
 669                } else {
 670                    if (pw_isspace(pw_char_at(&line, 0))) {
 671                        // do not append separator if the line aleady starts with space
 672                    } else {
 673                        if (!pw_string_append(result, ' ')) {
 674                            return false;
 675                        }
 676                    }
 677                }
 678            }
 679        }
 680        if (quote) {
 681            PwValue line_number = PW_NULL;
 682            if (!pw_array_item(line_numbers, i, &line_number)) {
 683                return false;
 684            }
 685            PwValue unescaped = PW_NULL;
 686            if (!_mw_unescape_line(parser, &line, line_number.unsigned_value, quote, 0, pw_strlen(&line), &unescaped)) {
 687                return false;
 688            }
 689            if (!pw_string_append(result, &unescaped)) {
 690                return false;
 691            }
 692        } else {
 693            if (!pw_string_append(result, &line)) {
 694                return false;
 695            }
 696        }
 697    }
 698    return true;
 699}
 700
 701[[nodiscard]] static bool parse_folded_string(MwParser* parser, PwValuePtr result)
 702{
 703    TRACEPOINT();
 704
 705    PwValue lines = PW_NULL;
 706    if (!_mw_read_block(parser, &lines)) {
 707        return false;
 708    }
 709    return fold_lines(parser, &lines, 0, nullptr, result);
 710}
 711
 712bool _mw_find_closing_quote(PwValuePtr line, char32_t quote, unsigned start_pos, unsigned* end_pos)
 713{
 714    for (;;) {
 715        if (!pw_strchr(line, quote, start_pos, end_pos)) {
 716            return false;
 717        }
 718        // check if the quotation mark is not escaped
 719        if (*end_pos && pw_char_at(line, *end_pos - 1) == '\\') {
 720            // continue searching
 721            start_pos = *end_pos + 1;
 722        } else {
 723            return true;
 724        }
 725    }
 726}
 727
 728[[nodiscard]] static bool parse_quoted_string(MwParser* parser, unsigned opening_quote_pos,
 729                                              unsigned* end_pos, PwValuePtr result)
 730/*
 731 * Parse quoted string starting from `opening_quote_pos` in the current line.
 732 *
 733 * Write next position after the closing quotation mark to `end_pos`.
 734 */
 735{
 736    TRACEPOINT();
 737
 738    // Get opening quote. The closing quote should be the same.
 739    char32_t quote = pw_char_at(&parser->current_line, opening_quote_pos);
 740
 741    // process first line
 742    unsigned closing_quote_pos;
 743    if (_mw_find_closing_quote(&parser->current_line, quote, opening_quote_pos + 1, &closing_quote_pos)) {
 744        // single-line string
 745        *end_pos = closing_quote_pos + 1;
 746        return _mw_unescape_line(parser, &parser->current_line, parser->line_number,
 747                                  quote, opening_quote_pos + 1, closing_quote_pos, result);
 748    }
 749
 750    unsigned block_indent = opening_quote_pos + 1;
 751
 752    // make parser read nested block
 753    unsigned saved_block_indent = parser->block_indent;
 754    parser->block_indent = block_indent;
 755    parser->blocklevel++;
 756
 757    // read block
 758    PwValue lines = PW_NULL;
 759    if (!pw_create(PwTypeId_BasicArray, &lines)) {
 760        return false;
 761    }
 762    PwValue line_numbers = PW_NULL;
 763    if (!pw_create(PwTypeId_BasicArray, &line_numbers)) {
 764        return false;
 765    }
 766    bool closing_quote_detected = false;
 767    for (;;) {
 768        // append line number
 769        PwValue n = PwUnsigned(parser->line_number);
 770        if (!pw_array_append(&line_numbers, &n)) {
 771            return false;
 772        }
 773        // append line
 774        if (_mw_find_closing_quote(&parser->current_line, quote, block_indent, end_pos)) {
 775            // final line
 776            PwValue final_line = PW_NULL;
 777            if (!pw_substr(&parser->current_line, block_indent, *end_pos, &final_line)) {
 778                return false;
 779            }
 780            // strip trailing spaces
 781            if (!pw_string_rstrip(&final_line)) {
 782                return false;
 783            }
 784            if (!pw_array_append(&lines, &final_line)) {
 785                return false;
 786            }
 787            (*end_pos)++;
 788            closing_quote_detected = true;
 789            break;
 790        } else {
 791            // intermediate line
 792            PwValue line = PW_NULL;
 793            if (!pw_substr(&parser->current_line, block_indent, UINT_MAX, &line)) {
 794                return false;
 795            }
 796            if (!pw_array_append(&lines, &line)) {
 797                return false;
 798            }
 799        }
 800        // read next line
 801        if (!_mw_read_block_line(parser)) {
 802            if (_mw_end_of_block()) {
 803                break;
 804            }
 805            return false;
 806        }
 807    }
 808
 809    // finished reading nested block
 810    parser->block_indent = saved_block_indent;
 811    parser->blocklevel--;
 812
 813    if (!closing_quote_detected) {
 814
 815        static char unterminated[] = "String has no closing quote";
 816
 817        // the above loop terminated abnormally, need to read next line
 818        if (!_mw_read_block_line(parser)) {
 819            if (_mw_end_of_block()) {
 820                mw_exception(parser->line_number, parser->current_indent, unterminated);
 821            }
 822            return false;
 823        }
 824        // check if the line starts with a quote with the same indent as the opening quote
 825        if (parser->current_indent == opening_quote_pos
 826            && pw_char_at(&parser->current_line, parser->current_indent) == quote) {
 827
 828            *end_pos = opening_quote_pos + 1;
 829        } else {
 830            mw_exception(parser->line_number, parser->current_indent, unterminated);
 831            return false;
 832        }
 833    }
 834
 835    // fold and unescape
 836    return fold_lines(parser, &lines, quote, &line_numbers, result);
 837}
 838
 839[[nodiscard]] static bool parse_datetime(MwParser* parser, PwValuePtr result)
 840/*
 841 * Parse value date/time starting from block indent in the current line.
 842 */
 843{
 844    static char32_t allowed_terminators[] = { MW_COMMENT, 0 };
 845
 846    unsigned start_pos = _mw_get_start_position(parser);
 847    unsigned end_pos;
 848    if (!_pw_parse_datetime(&parser->current_line, start_pos, &end_pos, allowed_terminators, result)) {
 849        if (pw_is_basic_error(&current_task->status, PweBadDatetime)) {
 850            mw_exception(parser->line_number, start_pos, "Bad date/time");
 851        }
 852        return false;
 853    }
 854    if (!_mw_comment_or_end_of_line(parser, end_pos)) {
 855        mw_exception(parser->line_number, start_pos, "Bad date/time");
 856        return false;
 857    }
 858    return true;
 859}
 860
 861[[nodiscard]] static bool parse_timestamp(MwParser* parser, PwValuePtr result)
 862/*
 863 * Parse value as timestamp starting from block indent in the current line.
 864 */
 865{
 866    static char32_t allowed_terminators[] = { MW_COMMENT, 0 };
 867
 868    unsigned start_pos = _mw_get_start_position(parser);
 869    unsigned end_pos;
 870    if (!_pw_parse_timestamp(&parser->current_line, start_pos, &end_pos, allowed_terminators, result)) {
 871        if (pw_is_basic_error(&current_task->status, PweBadTimestamp)
 872            || pw_is_basic_error(&current_task->status, PweNumericOverflow)) {
 873            mw_exception(parser->line_number, start_pos, "Bad timestamp");
 874        }
 875        return false;
 876    }
 877    if (!_mw_comment_or_end_of_line(parser, end_pos)) {
 878        mw_exception(parser->line_number, end_pos, "Bad timestamp");
 879        return false;
 880    }
 881    return true;
 882}
 883
 884[[nodiscard]] bool _mw_parse_number(MwParser* parser, unsigned start_pos, int sign,
 885                                    unsigned* end_pos, char32_t* allowed_terminators, PwValuePtr result)
 886{
 887    TRACEPOINT();
 888    TRACE("start_pos %u", start_pos);
 889
 890    if (!_pw_parse_num_str(&parser->current_line, start_pos, sign, end_pos, allowed_terminators, false, result)) {
 891        if (pw_is_basic_error(&current_task->status, PweBadNumber)
 892            || pw_is_basic_error(&current_task->status, PweNumericOverflow)) {
 893            mw_exception(parser->line_number, start_pos, "Bad number");
 894        }
 895        return false;
 896    }
 897    return true;
 898}
 899
 900[[nodiscard]] static bool parse_list(MwParser* parser, PwValuePtr result)
 901/*
 902 * Parse list.
 903 */
 904{
 905    TRACE_ENTER();
 906
 907    if (!pw_create(PwTypeId_BasicArray, result)) {
 908        return false;
 909    }
 910
 911    /*
 912     * All list items must have the same indent.
 913     * Save indent of the first item (current one) and check it for subsequent items.
 914     */
 915    unsigned item_indent = _mw_get_start_position(parser);
 916
 917    for (;;) {
 918        {
 919            // check if hyphen is followed by space or end of line
 920            unsigned next_pos = item_indent + 1;
 921            if (!isspace_or_eol_at(&parser->current_line, next_pos)) {
 922                mw_exception(parser->line_number, item_indent, "Bad list item");
 923                return false;
 924            }
 925
 926            // parse item as a nested block
 927
 928            PwValue item = PW_NULL;
 929            if (_mw_comment_or_end_of_line(parser, next_pos)) {
 930                if (!parse_nested_block_from_next_line(parser, value_parser_func, &item)) {
 931                    return false;
 932                }
 933            } else {
 934                // nested block starts on the same line, increment block position
 935                next_pos++;
 936                if (!parse_nested_block(parser, next_pos, value_parser_func, &item)) {
 937                    return false;
 938                }
 939            }
 940            if (!pw_array_append(result, &item)) {
 941                return false;
 942            }
 943            if (!_mw_read_block_line(parser)) {
 944                if (_mw_end_of_block()) {
 945                    break;
 946                }
 947                return false;
 948            }
 949            if (parser->current_indent != item_indent) {
 950                mw_exception(parser->line_number, parser->current_indent, "Bad indentation of list item");
 951                return false;
 952            }
 953        }
 954    }
 955    TRACE_EXIT();
 956    return true;
 957}
 958
 959[[nodiscard]] static bool parse_map(MwParser* parser, PwValuePtr first_key,
 960                                    PwValuePtr convspec_arg, unsigned value_pos, PwValuePtr result)
 961/*
 962 * Parse map.
 963 *
 964 * Key is already parsed, continue parsing from `value_pos` in the `current_line`.
 965 */
 966{
 967    TRACE_ENTER();
 968
 969    if (!pw_create(PwTypeId_BasicMap, result)) {
 970        TRACE_EXIT();
 971        return false;
 972    }
 973
 974    PwValue key = pw_clone(first_key);
 975    PwValue convspec = pw_clone(convspec_arg);
 976
 977    /*
 978     * All keys in the map must have the same indent.
 979     * Save indent of the first key (current one) and check it for subsequent keys.
 980     */
 981    unsigned key_indent = _mw_get_start_position(parser);
 982
 983    for (;;) {
 984        TRACE("parse value (line %u) from position %u", parser->line_number, value_pos);
 985        {
 986            // parse value as a nested block
 987
 988            MwBlockParserFunc parser_func = value_parser_func;
 989            if (pw_is_string(&convspec)) {
 990                parser_func = get_custom_parser(parser, &convspec);
 991            }
 992            PwValue value = PW_NULL;
 993            if (_mw_comment_or_end_of_line(parser, value_pos)) {
 994                if (!parse_nested_block_from_next_line(parser, parser_func, &value)) {
 995                    TRACE_EXIT();
 996                    return false;
 997                }
 998            } else {
 999                if (!parse_nested_block(parser, value_pos, parser_func, &value)) {
1000                    TRACE_EXIT();
1001                    return false;
1002                }
1003            }
1004            if (!pw_map_update(result, &key, &value)) {
1005                TRACE_EXIT();
1006                return false;
1007            }
1008        }
1009        TRACE("parse next key");
1010        {
1011            pw_destroy(&key);
1012            pw_destroy(&convspec);
1013
1014            if (!_mw_read_block_line(parser)) {
1015                if (_mw_end_of_block()) {
1016                    TRACE("end of map");
1017                    break;
1018                }
1019                TRACE_EXIT();
1020                return false;
1021            }
1022            if (parser->current_indent != key_indent) {
1023                mw_exception(parser->line_number, parser->current_indent, "Bad indentation of map key");
1024                TRACE_EXIT();
1025                return false;
1026            }
1027            if (!parse_value(parser, &value_pos, &convspec, &key)) {
1028                TRACE_EXIT();
1029                return false;
1030            }
1031        }
1032    }
1033    TRACE_EXIT();
1034    return true;
1035}
1036
1037[[nodiscard]] static bool is_kv_separator(MwParser* parser, unsigned colon_pos,
1038                                          PwValuePtr convspec_out, unsigned *value_pos, bool* result)
1039/*
1040 * Set result true if colon_pos is followed by end of line, space, or conversion specifier.
1041 * Write conversion specifier to `convspec_out` if value is followed by conversion specifier.
1042 * Write position of value to value_pos.
1043 */
1044{
1045    PwValuePtr current_line = &parser->current_line;
1046
1047    unsigned next_pos = colon_pos + 1;
1048
1049    if (end_of_line(current_line, next_pos)) {
1050        *value_pos = next_pos;
1051        *result = true;
1052        return true;
1053    }
1054    char32_t chr = pw_char_at(current_line, next_pos);
1055    if (pw_isspace(chr)) {
1056        *value_pos = next_pos + 1;  // value should be separated from key by at least one space
1057        next_pos = pw_string_skip_spaces(current_line, next_pos);
1058        // cannot be end of line here because current line is R-stripped and EOL is already checked
1059        chr = pw_char_at(current_line, next_pos);
1060        if (chr != ':') {
1061            // separator without conversion specifier
1062            *result = true;
1063            return true;
1064        }
1065    } else if (chr != ':') {
1066        // key not followed immediately by conversion specifier -> not a separator
1067        *result = false;
1068        return true;
1069    }
1070
1071    // try parsing conversion specifier
1072    // value_pos will be updated only if conversion specifier is valid
1073    PwValue convspec = PW_NULL;
1074    if (!parse_convspec(parser, next_pos, value_pos, &convspec)) {
1075        return false;
1076    }
1077    if (pw_is_string(&convspec)) {
1078        if (convspec_out) {
1079            pw_move(convspec_out, &convspec);
1080        }
1081        *result = true;
1082        return true;
1083    }
1084
1085    // bad conversion specifier -> not a separator
1086    *result = false;
1087    return true;
1088}
1089
1090[[nodiscard]] static bool check_value_end(MwParser* parser, PwValuePtr value, unsigned end_pos,
1091                                          unsigned* nested_value_pos, PwValuePtr convspec_out, PwValuePtr result)
1092/*
1093 * Helper function for parse_value.
1094 *
1095 * Check if value ends with key-value separator and parse map.
1096 * If not, check if end_pos points to end of line or comment.
1097 *
1098 * If `nested_value_pos` is provided, the value is _expected_ to be a map key
1099 * and _must_ end with key-value separator.
1100 * The position of the next char after colon is stored in it
1101 * and conversion specifier, if any, is stored in `convspec_out`.
1102 *
1103 * Read next line if nothing to parse on the current_line.
1104 *
1105 * If there's no nested map to parse, return cloned value.
1106 */
1107{
1108    end_pos = pw_string_skip_spaces(&parser->current_line, end_pos);
1109    if (end_of_line(&parser->current_line, end_pos)) {
1110        if (nested_value_pos) {
1111            mw_exception(parser->line_number, end_pos, "Map key expected");
1112            return false;
1113        }
1114        // read next line
1115        if (!_mw_read_block_line(parser)) {
1116            if (!_mw_end_of_block()) {
1117                return false;
1118            }
1119        }
1120        pw_clone2(result, value);
1121        return true;
1122    }
1123
1124    char32_t chr = pw_char_at(&parser->current_line, end_pos);
1125    if (chr == ':') {
1126        // check key-value separator
1127        PwValue convspec = PW_NULL;
1128        unsigned value_pos;
1129        bool kvs;
1130        if (!is_kv_separator(parser, end_pos, &convspec, &value_pos, &kvs)) {
1131            return false;
1132        }
1133        if (kvs) {
1134            // found key-value separator
1135            if (nested_value_pos) {
1136                // the separator was expected, just return the value
1137                *nested_value_pos = value_pos;
1138                pw_move(convspec_out, &convspec);
1139                pw_clone2(result, value);
1140                return true;
1141            }
1142            // parse map
1143            PwValue first_key = pw_clone(value);
1144            return parse_map(parser, &first_key, &convspec, value_pos, result);
1145        }
1146        mw_exception(parser->line_number, end_pos + 1, "Bad character encountered");
1147        return false;
1148    }
1149
1150    if (chr != MW_COMMENT) {
1151        mw_exception(parser->line_number, end_pos, "Bad character encountered");
1152        return false;
1153    }
1154
1155    // read next line
1156    if (!_mw_read_block_line(parser)) {
1157        if (!_mw_end_of_block()) {
1158            return false;
1159        }
1160    }
1161    pw_clone2(result, value);
1162    return true;
1163}
1164
1165[[nodiscard]] static bool parse_value(MwParser* parser, unsigned* nested_value_pos,
1166                                      PwValuePtr convspec_out, PwValuePtr result)
1167/*
1168 * Parse value starting from `current_line[block_indent]` .
1169 *
1170 * If `nested_value_pos` is provided, the value is _expected_ to be a map key
1171 * and _must_ end with colon or include a colon if it's a literal strings.
1172 *
1173 * On success return parsed value.
1174 * If `nested_value_pos' is provided, write position of the next char after colon to it
1175 * and write conversion specifier to `convspec_out` if it's followed by conversion specifier.
1176 *
1177 * On error return status and set `parser->result["error"]`.
1178 */
1179{
1180    TRACEPOINT();
1181
1182    unsigned start_pos = _mw_get_start_position(parser);
1183
1184    // Analyze first character.
1185    char32_t chr = pw_char_at(&parser->current_line, start_pos);
1186
1187    // first, check if value starts with colon that may denote conversion specifier
1188
1189    if (chr == ':') {
1190        // this might be conversion specifier
1191        if (nested_value_pos) {
1192            // we expect map key, and map keys cannot start with colon
1193            // because they would look same as conversion specifier
1194            mw_exception(parser->line_number, start_pos, "Map key expected and it cannot start with colon");
1195            return false;
1196        }
1197        unsigned value_pos;
1198        PwValue convspec =PW_NULL;
1199        if (!parse_convspec(parser, start_pos, &value_pos, &convspec)) {;
1200            return false;
1201        }
1202        if (!pw_is_string(&convspec)) {
1203            // not a conversion specifier
1204            return parse_literal_string(parser, result);
1205        }
1206        // we have conversion specifier
1207        if (end_of_line(&parser->current_line, value_pos)) {
1208
1209            // conversion specifier is followed by LF
1210            // continue parsing CURRENT block from next line
1211            if (!_mw_read_block_line(parser)) {
1212                if (_mw_end_of_block()) {
1213                    mw_exception(parser->line_number, parser->current_indent, "Empty block");
1214                }
1215                return false;
1216            }
1217            // call parser function
1218            MwBlockParserFunc parser_func = get_custom_parser(parser, &convspec);
1219            return parser_func(parser, result);
1220
1221        } else {
1222            // value is on the same line, parse it as nested block
1223            return parse_nested_block(
1224                parser, value_pos, get_custom_parser(parser, &convspec), result
1225            );
1226        }
1227    }
1228
1229    // other values can be map keys
1230
1231    // check for dash
1232
1233    if (chr == '-') {
1234        unsigned next_pos = start_pos + 1;
1235        char32_t next_chr = pw_char_at(&parser->current_line, next_pos);
1236
1237        // if followed by digit, it's a number
1238        if ('0' <= next_chr && next_chr <= '9') {
1239            unsigned end_pos;
1240            PwValue number = PW_NULL;
1241            if (!_mw_parse_number(parser, next_pos, -1, &end_pos, number_terminators, &number)) {
1242                return false;
1243            }
1244            return check_value_end(parser, &number, end_pos, nested_value_pos, convspec_out, result);
1245        }
1246        // if followed by space or end of line, that's a list item
1247        if (isspace_or_eol_at(&parser->current_line, next_pos)) {
1248            if (nested_value_pos) {
1249                mw_exception(parser->line_number, start_pos, "Map key expected and it cannot be a list");
1250                return false;
1251            }
1252            // yes, it's a list item
1253            return parse_list(parser, result);
1254        }
1255        // otherwise, it's a literal string or map
1256        goto parse_literal_string_or_map;
1257    }
1258
1259    // check for quoted string
1260
1261    if (chr == '"' || chr == '\'') {
1262        // quoted string
1263        unsigned start_line = parser->line_number;
1264        unsigned end_pos;
1265        PwValue str = PW_NULL;
1266        if (!parse_quoted_string(parser, start_pos, &end_pos, &str)) {
1267            return false;
1268        }
1269        unsigned end_line = parser->line_number;
1270        if (end_line == start_line) {
1271            // single-line string can be a map key
1272            return check_value_end(parser, &str, end_pos, nested_value_pos, convspec_out, result);
1273        } else if (_mw_comment_or_end_of_line(parser, end_pos)) {
1274            // multi-line string cannot be a key
1275            pw_move(result, &str);
1276            return true;
1277        } else {
1278            mw_exception(parser->line_number, end_pos, "Bad character after quoted string");
1279            return false;
1280        }
1281    }
1282
1283    // check for reserved keywords
1284
1285    TRACE("trying reserved keywords");
1286    if (pw_substring_eq(&parser->current_line, start_pos, start_pos + 4, "null")) {
1287        PwValue null_value = PW_NULL;
1288        return check_value_end(parser, &null_value, start_pos + 4, nested_value_pos, convspec_out, result);
1289    }
1290    if (pw_substring_eq(&parser->current_line, start_pos, start_pos + 4, "true")) {
1291        PwValue true_value = PW_BOOL(true);
1292        return check_value_end(parser, &true_value, start_pos + 4, nested_value_pos, convspec_out, result);
1293    }
1294    if (pw_substring_eq(&parser->current_line, start_pos, start_pos + 5, "false")) {
1295        PwValue false_value = PW_BOOL(false);
1296        return check_value_end(parser, &false_value, start_pos + 5, nested_value_pos, convspec_out, result);
1297    }
1298
1299    // try parsing number
1300
1301    TRACE("not a keyword, trying number");
1302    if (chr == '+') {
1303        char32_t next_chr = pw_char_at(&parser->current_line, start_pos + 1);
1304        if ('0' <= next_chr && next_chr <= '9') {
1305            start_pos++;
1306            chr = next_chr;
1307        }
1308    }
1309    if ('0' <= chr && chr <= '9') {
1310        unsigned end_pos;
1311        PwValue number = PW_NULL;
1312        if (!_mw_parse_number(parser, start_pos, 1, &end_pos, number_terminators, &number)) {
1313            return false;
1314        }
1315        return check_value_end(parser, &number, end_pos, nested_value_pos, convspec_out, result);
1316    }
1317    TRACE("not a number, pasring literal string or map");
1318
1319parse_literal_string_or_map:
1320
1321    // look for key-value separator
1322    for (unsigned pos = start_pos;;) {
1323        unsigned colon_pos;
1324        if (!pw_strchr(&parser->current_line, ':', pos, &colon_pos)) {
1325            break;
1326        }
1327        PwValue convspec = PW_NULL;
1328        unsigned value_pos;
1329        bool kvs;
1330        if (!is_kv_separator(parser, colon_pos, &convspec, &value_pos, &kvs)) {
1331            return false;
1332        }
1333        if (kvs) {
1334            // found key-value separator, get key
1335            PwValue key = PW_NULL;
1336            if (!pw_substr(&parser->current_line, start_pos, colon_pos, &key)) {
1337                return false;
1338            }
1339            // strip trailing spaces
1340            if (!pw_string_rstrip(&key)) {
1341                return false;
1342            }
1343            if (nested_value_pos) {
1344                // key was anticipated, simply return it
1345                *nested_value_pos = value_pos;
1346                pw_move(convspec_out, &convspec);
1347                pw_move(result, &key);
1348                return true;
1349            }
1350            // parse map
1351            return parse_map(parser, &key, &convspec, value_pos, result);
1352        }
1353        pos = colon_pos + 1;
1354    }
1355
1356    // separator not found
1357
1358    if (nested_value_pos) {
1359        // expecting key, but it's a bare literal string
1360        mw_exception(parser->line_number, parser->current_indent, "Not a key");
1361        return false;
1362    }
1363    return parse_literal_string(parser, result);
1364}
1365
1366[[nodiscard]] static bool value_parser_func(MwParser* parser, PwValuePtr result)
1367{
1368    return parse_value(parser, nullptr, nullptr, result);
1369}
1370
1371[[nodiscard]] bool mw_parse(PwValuePtr markup, PwValuePtr result)
1372{
1373    [[ gnu::cleanup(mw_delete_parser) ]] MwParser* parser = mw_create_parser(markup);
1374    if (!parser) {
1375        return false;
1376    }
1377    // read first line to prepare for parsing and to detect EOF
1378    if (!_mw_read_block_line(parser)) {
1379        if (_mw_end_of_block() && parser->eof) {
1380            pw_set_status(PwStatus(PweEOF));
1381        }
1382        return false;
1383    }
1384
1385    // parse top-level value
1386    if (!value_parser_func(parser, result)) {
1387        return false;
1388    }
1389
1390    // make sure markup has no more data
1391    if (_mw_read_block_line(parser)) {
1392        mw_exception(parser->line_number, parser->current_indent, "Extra data after parsed value");
1393        return false;
1394    }
1395    if (_mw_end_of_block() && parser->eof) {
1396        // EOF means success
1397        pw_destroy(&current_task->status);
1398        return true;
1399    }
1400    return false;
1401}