1#include <errno.h>
2#include <stdlib.h>
3
4#include "include/pw.h"
5#include "include/pwlib/parsers.h"
6
7
8static inline bool end_of_line(PwValuePtr str, unsigned position)
9/*
10 * Return true if position is beyond end of line.
11 */
12{
13 return !pw_string_index_valid(str, position);
14}
15
16[[nodiscard]] bool _pw_parse_unsigned(PwValuePtr str, unsigned start_pos, unsigned* end_pos,
17 unsigned radix, bool allow_overflow, PwValuePtr result)
18{
19 pw_destroy(result);
20 *result = PwUnsigned(0);
21
22 bool digit_seen = false;
23 bool separator_seen = false;
24 unsigned pos = start_pos;
25 bool ret = false;
26 for (;;) {
27 char32_t chr = pw_char_at(str, pos);
28
29 // check separator
30 if (chr == '\'' || chr == '_') {
31 if (separator_seen) {
32 pw_set_status(PwStatus(PweBadNumber, "Duplicate separator encountered"));
33 break;
34 }
35 if (!digit_seen) {
36 pw_set_status(PwStatus(PweBadNumber, "Separator is not allowed in the beginning of the number"));
37 break;
38 }
39 separator_seen = true;
40 pos++;
41 if (end_of_line(str, pos)) {
42 pw_set_status(PwStatus(PweBadNumber, "Premature end of line"));
43 break;
44 }
45 continue;
46 }
47 separator_seen = false;
48
49 // check digit and convert to number
50 if (!pw_is_ascii(chr)) {
51not_a_digit:
52 if (!digit_seen) {
53 if (radix == 16) {
54 pw_set_status(PwStatus(PweBadNumber, "Expected hex digit"));
55 } else {
56 pw_set_status(PwStatus(PweBadNumber, "Expected digit"));
57 }
58 } else {
59 // not a digit, end of conversion
60 ret = true;
61 }
62 break;
63 }
64 if (radix == 16) {
65 if (!pw_parse_hexdigit(&chr)) {
66 goto not_a_digit;
67 }
68 } else {
69 if (chr >= '0' && chr < (char32_t) ('0' + radix)) {
70 chr -= '0';
71 } else {
72 goto not_a_digit;
73 }
74 }
75 if (!allow_overflow) {
76 if (result->unsigned_value > PW_UNSIGNED_MAX / radix) {
77 // overflow
78 pw_set_status(PwStatus(PweNumericOverflow));
79 break;
80 }
81 }
82 PwType_Unsigned new_value = result->unsigned_value * radix + chr;
83 if (!allow_overflow) {
84 if (new_value < result->unsigned_value) {
85 // overflow
86 pw_set_status(PwStatus(PweNumericOverflow));
87 break;
88 }
89 }
90 result->unsigned_value = new_value;
91
92 pos++;
93 if (end_of_line(str, pos)) {
94 // end of line, end of conversion
95 ret = true;
96 break;
97 }
98 digit_seen = true;
99 }
100 if (end_pos) {
101 *end_pos = pos;
102 }
103 return ret;
104}
105
106static unsigned skip_digits(PwValuePtr str, unsigned pos)
107{
108 for (;;) {
109 if (end_of_line(str, pos)) {
110 break;
111 }
112 char32_t chr = pw_char_at(str, pos);
113 if (!pw_is_ascii_digit(chr)) {
114 break;
115 }
116 pos++;
117 }
118 return pos;
119}
120
121[[nodiscard]] bool _pw_parse_num_str(PwValuePtr str, unsigned start_pos,
122 int sign, unsigned* end_pos, char32_t* allowed_terminators,
123 bool allow_overflow, PwValuePtr result)
124{
125 pw_destroy(result);
126 *result = PwSigned(0);
127
128 unsigned pos = start_pos;
129 unsigned radix = 10;
130 bool is_float = false;
131 PwValue base = PW_UNSIGNED(0);
132
133 char32_t chr = pw_char_at(str, pos);
134 if (chr == '0') {
135 // check radix specifier
136 if (end_of_line(str, pos)) {
137 goto done;
138 }
139 switch (pw_char_at(str, pos + 1)) {
140 case 'b':
141 case 'B':
142 radix = 2;
143 pos += 2;
144 break;
145 case 'o':
146 case 'O':
147 radix = 8;
148 pos += 2;
149 break;
150 case 'x':
151 case 'X':
152 radix = 16;
153 pos += 2;
154 break;
155 default:
156 break;
157 }
158 if (end_of_line(str, pos)) {
159 pw_set_status(PwStatus(PweBadNumber, "Premature end of line"));
160 goto error;
161 }
162 }
163
164 if (!_pw_parse_unsigned(str, pos, &pos, radix, allow_overflow, &base)) {
165 goto error;
166 }
167 if (end_of_line(str, pos)) {
168 goto done;
169 }
170
171 // check for fraction
172 chr = pw_char_at(str, pos);
173 if (chr == '.') {
174 if (radix != 10) {
175decimal_float_only:
176 pw_set_status(PwStatus(PweBadNumber, "Only decimal representation is supported for floating point numbers"));
177 goto error;
178 }
179 is_float = true;
180 pos = skip_digits(str, pos + 1);
181 if (end_of_line(str, pos)) {
182 goto done;
183 }
184 chr = pw_char_at(str, pos);
185 }
186 // check for exponent
187 if (chr == 'e' || chr == 'E') {
188 if (radix != 10) {
189 goto decimal_float_only;
190 }
191 is_float = true;
192 pos++;
193 if (end_of_line(str, pos)) {
194 goto done;
195 }
196 chr = pw_char_at(str, pos);
197 if (chr == '-' || chr == '+') {
198 pos++;
199 }
200 unsigned next_pos = skip_digits(str, pos);
201 if (next_pos == pos) {
202 pw_set_status(PwStatus(PweBadNumber, "Bad exponent"));
203 goto error;
204 }
205 pos = next_pos;
206
207 } else if ( ! (pw_isspace(chr) || (allowed_terminators && utf32_strchr(allowed_terminators, chr)))) {
208 pw_set_status(PwStatus(PweBadNumber, "Wrong character encountered"));
209 goto error;
210 }
211
212done:
213 if (is_float) {
214 // parse float
215 unsigned len = pos - start_pos;
216 char number[len + 1];
217 pw_substr_to_utf8(str, start_pos, pos, number);
218 errno = 0;
219 double n = strtod(number, nullptr);
220 if (errno == ERANGE) {
221 if (!allow_overflow) {
222 pw_set_status(PwStatus(PweNumericOverflow));
223 goto error;
224 }
225 } else if (errno) {
226 pw_set_status(PwStatus(PweBadNumber, "Floating point conversion error"));
227 goto error;
228 }
229 if (sign < 0 && n != 0.0) {
230 n = -n;
231 }
232 *result = PwFloat(n);
233 } else {
234 // make integer
235 if (base.unsigned_value > PW_SIGNED_MAX) {
236 if (sign < 0) {
237 if (allow_overflow) {
238 *result = PwSigned(base.unsigned_value);
239 } else {
240 pw_set_status(PwStatus(PweNumericOverflow));
241 goto error;
242 }
243 } else {
244 *result = PwUnsigned(base.unsigned_value);
245 }
246 } else {
247 if (sign < 0 && base.unsigned_value) {
248 *result = PwSigned(-base.unsigned_value);
249 } else {
250 *result = PwSigned(base.unsigned_value);
251 }
252 }
253 }
254 if (end_pos) {
255 *end_pos = pos;
256 }
257 return true;
258
259error:
260 if (end_pos) {
261 *end_pos = pos;
262 }
263 return false;
264}
265
266[[nodiscard]] bool _pw_parse_number(PwValuePtr str, PwValuePtr result, bool allow_overflow)
267{
268 int sign = 1;
269 unsigned start_pos = pw_string_skip_spaces(str, 0);
270 char32_t chr = pw_char_at(str, start_pos);
271 if (chr == '+') {
272 // no op
273 start_pos++;
274 } else if (chr == '-') {
275 sign = -1;
276 start_pos++;
277 }
278 return _pw_parse_num_str(str, start_pos, sign, nullptr, nullptr, false, result);
279}
280
281static bool parse_nanosecond_frac(PwValuePtr str, unsigned* pos, uint32_t* result)
282/*
283 * Parse fractional nanoseconds part in `str` starting from `pos`.
284 * Always update `pos` upon return.
285 * Return true on success and write parsed value to `result`.
286 * On error return false.
287 */
288{
289 unsigned p = *pos;
290 uint32_t nanoseconds = 0;
291 unsigned i = 0;
292 while (!end_of_line(str, p)) {
293 char32_t chr = pw_char_at(str, p);
294 if (!pw_is_ascii_digit(chr)) {
295 break;
296 }
297 if (i == 9) {
298 *pos = p;
299 return false;
300 }
301 nanoseconds *= 10;
302 nanoseconds += chr - '0';
303 i++;
304 p++;
305 }
306 if (i == 0) {
307 }
308 static unsigned order[] = {
309 1000'000'000, // unused, i starts from 1 here
310 100'000'000,
311 10'000'000,
312 1000'000,
313 100'000,
314 10'000,
315 1000,
316 100,
317 10,
318 1
319 };
320 *result = nanoseconds * order[i];
321 *pos = p;
322 return true;
323}
324
325[[nodiscard]] bool _pw_parse_datetime(PwValuePtr str, unsigned start_pos, unsigned* end_pos,
326 char32_t* allowed_terminators, PwValuePtr result)
327{
328 pw_destroy(result);
329 *result = PwDateTime(0, 0, 0, 0, 0, 0);
330
331 unsigned pos = start_pos;
332 char32_t chr;
333
334 // parse YYYY part
335 for (unsigned i = 0; i < 4; i++, pos++) {
336 chr = pw_char_at(str, pos);
337 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
338 result->year *= 10;
339 result->year += chr - '0';
340 }
341 // skip optional separator
342 if (pw_char_at(str, pos) == '-') {
343 pos++;
344 }
345 // parse MM part
346 for (unsigned i = 0; i < 2; i++, pos++) {
347 chr = pw_char_at(str, pos);
348 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
349 result->month *= 10;
350 result->month += chr - '0';
351 }
352 // skip optional separator
353 if (pw_char_at(str, pos) == '-') {
354 pos++;
355 }
356 // parse DD part
357 for (unsigned i = 0; i < 2; i++, pos++) {
358 chr = pw_char_at(str, pos);
359 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
360 result->day *= 10;
361 result->day += chr - '0';
362 }
363 // skip optional separator
364 chr = pw_char_at(str, pos);
365 if (chr == 'T') {
366 pos++;
367 } else {
368 pos = pw_string_skip_spaces(str, pos);
369 if (end_of_line(str, pos)) { goto out; }
370 chr = pw_char_at(str, pos);
371 if (allowed_terminators && utf32_strchr(allowed_terminators, chr)) { goto out; }
372 }
373 // parse HH part
374 for (unsigned i = 0; i < 2; i++, pos++) {
375 chr = pw_char_at(str, pos);
376 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
377 result->hour *= 10;
378 result->hour += chr - '0';
379 }
380 // skip optional separator
381 if (pw_char_at(str, pos) == ':') {
382 pos++;
383 }
384 // parse MM part
385 for (unsigned i = 0; i < 2; i++, pos++) {
386 chr = pw_char_at(str, pos);
387 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
388 result->minute *= 10;
389 result->minute += chr - '0';
390 }
391 // skip optional separator
392 if (pw_char_at(str, pos) == ':') {
393 pos++;
394 }
395 // parse SS part
396 for (unsigned i = 0; i < 2; i++, pos++) {
397 chr = pw_char_at(str, pos);
398 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
399 result->second *= 10;
400 result->second += chr - '0';
401 }
402 // check optional parts
403 chr = pw_char_at(str, pos);
404 if (chr == 'Z') {
405 pos++;
406 goto end_of_datetime;
407 }
408 if ( chr == '.') {
409 // parse nanoseconds
410 pos++;
411 if (!parse_nanosecond_frac(str, &pos, &result->nanosecond)) {
412 goto bad_datetime;
413 }
414 chr = pw_char_at(str, pos);
415 }
416 if (chr == 'Z') {
417 pos++;
418
419 } else if (chr == '+' || chr == '-') {
420 // parse GMT offset
421 int sign = (chr == '-')? -1 : 1;
422 pos++;
423 // parse HH part
424 unsigned offset_hour = 0;
425 for (unsigned i = 0; i < 2; i++, pos++) {
426 chr = pw_char_at(str, pos);
427 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
428 offset_hour *= 10;
429 offset_hour += chr - '0';
430 }
431 // skip optional separator
432 if (pw_char_at(str, pos) == ':') {
433 pos++;
434 }
435 // parse optional MM part
436 unsigned offset_minute = 0;
437 if (!end_of_line(str, pos)) {
438 chr = pw_char_at(str, pos);
439 if (pw_is_ascii_digit(chr)) {
440 for (unsigned i = 0; i < 2; i++, pos++) {
441 chr = pw_char_at(str, pos);
442 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
443 offset_minute *= 10;
444 offset_minute += chr - '0';
445 }
446 }
447 }
448 result->gmt_offset = sign * offset_hour * 60 + offset_minute;
449 }
450
451end_of_datetime:
452 if (end_of_line(str, pos)) {
453 goto out;
454 }
455 chr = pw_char_at(str, pos);
456 if ( ! (pw_isspace(chr) || (allowed_terminators && utf32_strchr(allowed_terminators, chr)))) {
457 goto bad_datetime;
458 }
459
460out:
461 if (end_pos) {
462 *end_pos = pos;
463 }
464 return true;
465
466bad_datetime:
467 pw_set_status(PwStatus(PweBadDatetime));
468 if (end_pos) {
469 *end_pos = pos;
470 }
471 return false;
472}
473
474
475[[nodiscard]] bool pw_parse_datetime(PwValuePtr str, PwValuePtr result)
476{
477 return _pw_parse_datetime(str, pw_string_skip_spaces(str, 0), nullptr, nullptr, result);
478}
479
480
481[[nodiscard]] bool _pw_parse_timestamp(PwValuePtr str, unsigned start_pos, unsigned* end_pos,
482 char32_t* allowed_terminators, PwValuePtr result)
483{
484 pw_destroy(result);
485 *result = PwTimestamp(0, 0);
486
487 unsigned pos;
488 PwValue seconds = PW_NULL;
489 if (!_pw_parse_unsigned(str, start_pos, &pos, 10, false, &seconds)) {
490 return false;
491 }
492
493 result->ts_seconds = seconds.unsigned_value;
494
495 if (end_of_line(str, pos)) {
496 goto out;
497 }
498 char32_t chr = pw_char_at(str, pos);
499 if ( chr == '.') {
500 // parse nanoseconds
501 pos++;
502 if (!parse_nanosecond_frac(str, &pos, &result->ts_nanoseconds)) {
503 goto bad_timestamp;
504 }
505 }
506 if (end_of_line(str, pos)) {
507 goto out;
508 }
509 chr = pw_char_at(str, pos);
510 if ( ! (pw_isspace(chr) || (allowed_terminators && utf32_strchr(allowed_terminators, chr)))) {
511 goto bad_timestamp;
512 }
513
514out:
515 if (end_pos) {
516 *end_pos = pos;
517 }
518 return true;
519
520bad_timestamp:
521 pw_set_status(PwStatus(PweBadTimestamp));
522 if (end_pos) {
523 *end_pos = pos;
524 }
525 return false;
526}
527
528
529[[nodiscard]] bool pw_parse_timestamp(PwValuePtr str, PwValuePtr result)
530{
531 return _pw_parse_timestamp(str, pw_string_skip_spaces(str, 0), nullptr, nullptr, result);
532}