1#include <errno.h>
  2#include <stdlib.h>
  3
  4#include "include/pw.h"
  5#include "include/pwlib/parsers.h"
  6
  7
  8static inline bool end_of_line(PwValuePtr str, unsigned position)
  9/*
 10 * Return true if position is beyond end of line.
 11 */
 12{
 13    return !pw_string_index_valid(str, position);
 14}
 15
 16[[nodiscard]] bool _pw_parse_unsigned(PwValuePtr str, unsigned start_pos, unsigned* end_pos,
 17                                      unsigned radix, bool allow_overflow, PwValuePtr result)
 18{
 19    pw_destroy(result);
 20    *result = PwUnsigned(0);
 21
 22    bool digit_seen = false;
 23    bool separator_seen = false;
 24    unsigned pos = start_pos;
 25    bool ret = false;
 26    for (;;) {
 27        char32_t chr = pw_char_at(str, pos);
 28
 29        // check separator
 30        if (chr == '\'' || chr == '_') {
 31            if (separator_seen) {
 32                pw_set_status(PwStatus(PweBadNumber, "Duplicate separator encountered"));
 33                break;
 34            }
 35            if (!digit_seen) {
 36                pw_set_status(PwStatus(PweBadNumber, "Separator is not allowed in the beginning of the number"));
 37                break;
 38            }
 39            separator_seen = true;
 40            pos++;
 41            if (end_of_line(str, pos)) {
 42                pw_set_status(PwStatus(PweBadNumber, "Premature end of line"));
 43                break;
 44            }
 45            continue;
 46        }
 47        separator_seen = false;
 48
 49        // check digit and convert to number
 50        if (!pw_is_ascii(chr)) {
 51not_a_digit:
 52            if (!digit_seen) {
 53                if (radix == 16) {
 54                    pw_set_status(PwStatus(PweBadNumber, "Expected hex digit"));
 55                } else {
 56                    pw_set_status(PwStatus(PweBadNumber, "Expected digit"));
 57                }
 58            } else {
 59                // not a digit, end of conversion
 60                ret = true;
 61            }
 62            break;
 63        }
 64        if (radix == 16) {
 65            if (!pw_parse_hexdigit(&chr)) {
 66                goto not_a_digit;
 67            }
 68        } else {
 69            if (chr >= '0' && chr < (char32_t) ('0' + radix)) {
 70                chr -= '0';
 71            } else {
 72                goto not_a_digit;
 73            }
 74        }
 75        if (!allow_overflow) {
 76            if (result->unsigned_value > PW_UNSIGNED_MAX / radix) {
 77                // overflow
 78                pw_set_status(PwStatus(PweNumericOverflow));
 79                break;
 80            }
 81        }
 82        PwType_Unsigned new_value = result->unsigned_value * radix + chr;
 83        if (!allow_overflow) {
 84            if (new_value < result->unsigned_value) {
 85                // overflow
 86                pw_set_status(PwStatus(PweNumericOverflow));
 87                break;
 88            }
 89        }
 90        result->unsigned_value = new_value;
 91
 92        pos++;
 93        if (end_of_line(str, pos)) {
 94            // end of line, end of conversion
 95            ret = true;
 96            break;
 97        }
 98        digit_seen = true;
 99    }
100    if (end_pos) {
101        *end_pos = pos;
102    }
103    return ret;
104}
105
106static unsigned skip_digits(PwValuePtr str, unsigned pos)
107{
108    for (;;) {
109        if (end_of_line(str, pos)) {
110            break;
111        }
112        char32_t chr = pw_char_at(str, pos);
113        if (!pw_is_ascii_digit(chr)) {
114            break;
115        }
116        pos++;
117    }
118    return pos;
119}
120
121[[nodiscard]] bool _pw_parse_num_str(PwValuePtr str, unsigned start_pos,
122                                     int sign, unsigned* end_pos, char32_t* allowed_terminators,
123                                     bool allow_overflow, PwValuePtr result)
124{
125    pw_destroy(result);
126    *result = PwSigned(0);
127
128    unsigned pos = start_pos;
129    unsigned radix = 10;
130    bool is_float = false;
131    PwValue base = PW_UNSIGNED(0);
132
133    char32_t chr = pw_char_at(str, pos);
134    if (chr == '0') {
135        // check radix specifier
136        if (end_of_line(str, pos)) {
137            goto done;
138        }
139        switch (pw_char_at(str, pos + 1)) {
140            case 'b':
141            case 'B':
142                radix = 2;
143                pos += 2;
144                break;
145            case 'o':
146            case 'O':
147                radix = 8;
148                pos += 2;
149                break;
150            case 'x':
151            case 'X':
152                radix = 16;
153                pos += 2;
154                break;
155            default:
156                break;
157        }
158        if (end_of_line(str, pos)) {
159            pw_set_status(PwStatus(PweBadNumber, "Premature end of line"));
160            goto error;
161        }
162    }
163
164    if (!_pw_parse_unsigned(str, pos, &pos, radix, allow_overflow, &base)) {
165        goto error;
166    }
167    if (end_of_line(str, pos)) {
168        goto done;
169    }
170
171    // check for fraction
172    chr = pw_char_at(str, pos);
173    if (chr == '.') {
174        if (radix != 10) {
175decimal_float_only:
176            pw_set_status(PwStatus(PweBadNumber, "Only decimal representation is supported for floating point numbers"));
177            goto error;
178        }
179        is_float = true;
180        pos = skip_digits(str, pos + 1);
181        if (end_of_line(str, pos)) {
182            goto done;
183        }
184        chr = pw_char_at(str, pos);
185    }
186    // check for exponent
187    if (chr == 'e' || chr == 'E') {
188        if (radix != 10) {
189            goto decimal_float_only;
190        }
191        is_float = true;
192        pos++;
193        if (end_of_line(str, pos)) {
194            goto done;
195        }
196        chr = pw_char_at(str, pos);
197        if (chr == '-' || chr == '+') {
198            pos++;
199        }
200        unsigned next_pos = skip_digits(str, pos);
201        if (next_pos == pos) {
202            pw_set_status(PwStatus(PweBadNumber, "Bad exponent"));
203            goto error;
204        }
205        pos = next_pos;
206
207    } else if ( ! (pw_isspace(chr) || (allowed_terminators && utf32_strchr(allowed_terminators, chr)))) {
208        pw_set_status(PwStatus(PweBadNumber, "Wrong character encountered"));
209        goto error;
210    }
211
212done:
213    if (is_float) {
214        // parse float
215        unsigned len = pos - start_pos;
216        char number[len + 1];
217        pw_substr_to_utf8(str, start_pos, pos, number);
218        errno = 0;
219        double n = strtod(number, nullptr);
220        if (errno == ERANGE) {
221            if (!allow_overflow) {
222                pw_set_status(PwStatus(PweNumericOverflow));
223                goto error;
224            }
225        } else if (errno) {
226            pw_set_status(PwStatus(PweBadNumber, "Floating point conversion error"));
227            goto error;
228        }
229        if (sign < 0 && n != 0.0) {
230            n = -n;
231        }
232        *result = PwFloat(n);
233    } else {
234        // make integer
235        if (base.unsigned_value > PW_SIGNED_MAX) {
236            if (sign < 0) {
237                if (allow_overflow) {
238                    *result = PwSigned(base.unsigned_value);
239                } else {
240                    pw_set_status(PwStatus(PweNumericOverflow));
241                    goto error;
242                }
243            } else {
244                *result = PwUnsigned(base.unsigned_value);
245            }
246        } else {
247            if (sign < 0 && base.unsigned_value) {
248                *result = PwSigned(-base.unsigned_value);
249            } else {
250                *result = PwSigned(base.unsigned_value);
251            }
252        }
253    }
254    if (end_pos) {
255        *end_pos = pos;
256    }
257    return true;
258
259error:
260    if (end_pos) {
261        *end_pos = pos;
262    }
263    return false;
264}
265
266[[nodiscard]] bool _pw_parse_number(PwValuePtr str, PwValuePtr result, bool allow_overflow)
267{
268    int sign = 1;
269    unsigned start_pos = pw_string_skip_spaces(str, 0);
270    char32_t chr = pw_char_at(str, start_pos);
271    if (chr == '+') {
272        // no op
273        start_pos++;
274    } else if (chr == '-') {
275        sign = -1;
276        start_pos++;
277    }
278    return _pw_parse_num_str(str, start_pos, sign, nullptr, nullptr, false, result);
279}
280
281static bool parse_nanosecond_frac(PwValuePtr str, unsigned* pos, uint32_t* result)
282/*
283 * Parse fractional nanoseconds part in `str` starting from `pos`.
284 * Always update `pos` upon return.
285 * Return true on success and write parsed value to `result`.
286 * On error return false.
287 */
288{
289    unsigned p = *pos;
290    uint32_t nanoseconds = 0;
291    unsigned i = 0;
292    while (!end_of_line(str, p)) {
293        char32_t chr = pw_char_at(str, p);
294        if (!pw_is_ascii_digit(chr)) {
295            break;
296        }
297        if (i == 9) {
298            *pos = p;
299            return false;
300        }
301        nanoseconds *= 10;
302        nanoseconds += chr - '0';
303        i++;
304        p++;
305    }
306    if (i == 0) {
307    }
308    static unsigned order[] = {
309        1000'000'000,  // unused, i starts from 1 here
310        100'000'000,
311        10'000'000,
312        1000'000,
313        100'000,
314        10'000,
315        1000,
316        100,
317        10,
318        1
319    };
320    *result = nanoseconds * order[i];
321    *pos = p;
322    return true;
323}
324
325[[nodiscard]] bool _pw_parse_datetime(PwValuePtr str, unsigned start_pos, unsigned* end_pos,
326                                      char32_t* allowed_terminators, PwValuePtr result)
327{
328    pw_destroy(result);
329    *result = PwDateTime(0, 0, 0, 0, 0, 0);
330
331    unsigned pos = start_pos;
332    char32_t chr;
333
334    // parse YYYY part
335    for (unsigned i = 0; i < 4; i++, pos++) {
336        chr = pw_char_at(str, pos);
337        if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
338        result->year *= 10;
339        result->year += chr - '0';
340    }
341    // skip optional separator
342    if (pw_char_at(str, pos) == '-') {
343        pos++;
344    }
345    // parse MM part
346    for (unsigned i = 0; i < 2; i++, pos++) {
347        chr = pw_char_at(str, pos);
348        if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
349        result->month *= 10;
350        result->month += chr - '0';
351    }
352    // skip optional separator
353    if (pw_char_at(str, pos) == '-') {
354        pos++;
355    }
356    // parse DD part
357    for (unsigned i = 0; i < 2; i++, pos++) {
358        chr = pw_char_at(str, pos);
359        if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
360        result->day *= 10;
361        result->day += chr - '0';
362    }
363    // skip optional separator
364    chr = pw_char_at(str, pos);
365    if (chr == 'T') {
366        pos++;
367    } else {
368        pos = pw_string_skip_spaces(str, pos);
369        if (end_of_line(str, pos)) { goto out; }
370        chr = pw_char_at(str, pos);
371        if (allowed_terminators && utf32_strchr(allowed_terminators, chr)) { goto out; }
372    }
373    // parse HH part
374    for (unsigned i = 0; i < 2; i++, pos++) {
375        chr = pw_char_at(str, pos);
376        if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
377        result->hour *= 10;
378        result->hour += chr - '0';
379    }
380    // skip optional separator
381    if (pw_char_at(str, pos) == ':') {
382        pos++;
383    }
384    // parse MM part
385    for (unsigned i = 0; i < 2; i++, pos++) {
386        chr = pw_char_at(str, pos);
387        if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
388        result->minute *= 10;
389        result->minute += chr - '0';
390    }
391    // skip optional separator
392    if (pw_char_at(str, pos) == ':') {
393        pos++;
394    }
395    // parse SS part
396    for (unsigned i = 0; i < 2; i++, pos++) {
397        chr = pw_char_at(str, pos);
398        if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
399        result->second *= 10;
400        result->second += chr - '0';
401    }
402    // check optional parts
403    chr = pw_char_at(str, pos);
404    if (chr == 'Z') {
405        pos++;
406        goto end_of_datetime;
407    }
408    if ( chr == '.') {
409        // parse nanoseconds
410        pos++;
411        if (!parse_nanosecond_frac(str, &pos, &result->nanosecond)) {
412            goto bad_datetime;
413        }
414        chr = pw_char_at(str, pos);
415    }
416    if (chr == 'Z') {
417        pos++;
418
419    } else if (chr == '+' || chr == '-') {
420        // parse GMT offset
421        int sign = (chr == '-')? -1 : 1;
422        pos++;
423        // parse HH part
424        unsigned offset_hour = 0;
425        for (unsigned i = 0; i < 2; i++, pos++) {
426            chr = pw_char_at(str, pos);
427            if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
428            offset_hour *= 10;
429            offset_hour += chr - '0';
430        }
431        // skip optional separator
432        if (pw_char_at(str, pos) == ':') {
433            pos++;
434        }
435        // parse optional MM part
436        unsigned offset_minute = 0;
437        if (!end_of_line(str, pos)) {
438            chr = pw_char_at(str, pos);
439            if (pw_is_ascii_digit(chr)) {
440                for (unsigned i = 0; i < 2; i++, pos++) {
441                    chr = pw_char_at(str, pos);
442                    if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
443                    offset_minute *= 10;
444                    offset_minute += chr - '0';
445                }
446            }
447        }
448        result->gmt_offset = sign * offset_hour * 60 + offset_minute;
449    }
450
451end_of_datetime:
452    if (end_of_line(str, pos)) {
453        goto out;
454    }
455    chr = pw_char_at(str, pos);
456    if ( ! (pw_isspace(chr) || (allowed_terminators && utf32_strchr(allowed_terminators, chr)))) {
457        goto bad_datetime;
458    }
459
460out:
461    if (end_pos) {
462        *end_pos = pos;
463    }
464    return true;
465
466bad_datetime:
467    pw_set_status(PwStatus(PweBadDatetime));
468    if (end_pos) {
469        *end_pos = pos;
470    }
471    return false;
472}
473
474
475[[nodiscard]] bool pw_parse_datetime(PwValuePtr str, PwValuePtr result)
476{
477    return _pw_parse_datetime(str, pw_string_skip_spaces(str, 0), nullptr, nullptr, result);
478}
479
480
481[[nodiscard]] bool _pw_parse_timestamp(PwValuePtr str, unsigned start_pos, unsigned* end_pos,
482                                       char32_t* allowed_terminators, PwValuePtr result)
483{
484    pw_destroy(result);
485    *result = PwTimestamp(0, 0);
486
487    unsigned pos;
488    PwValue seconds = PW_NULL;
489    if (!_pw_parse_unsigned(str, start_pos, &pos, 10, false, &seconds)) {
490        return false;
491    }
492
493    result->ts_seconds = seconds.unsigned_value;
494
495    if (end_of_line(str, pos)) {
496        goto out;
497    }
498    char32_t chr = pw_char_at(str, pos);
499    if ( chr == '.') {
500        // parse nanoseconds
501        pos++;
502        if (!parse_nanosecond_frac(str, &pos, &result->ts_nanoseconds)) {
503            goto bad_timestamp;
504        }
505    }
506    if (end_of_line(str, pos)) {
507        goto out;
508    }
509    chr = pw_char_at(str, pos);
510    if ( ! (pw_isspace(chr) || (allowed_terminators && utf32_strchr(allowed_terminators, chr)))) {
511        goto bad_timestamp;
512    }
513
514out:
515    if (end_pos) {
516        *end_pos = pos;
517    }
518    return true;
519
520bad_timestamp:
521    pw_set_status(PwStatus(PweBadTimestamp));
522    if (end_pos) {
523        *end_pos = pos;
524    }
525    return false;
526}
527
528
529[[nodiscard]] bool pw_parse_timestamp(PwValuePtr str, PwValuePtr result)
530{
531    return _pw_parse_timestamp(str, pw_string_skip_spaces(str, 0), nullptr, nullptr, result);
532}