1#include "include/pw.h"
  2#include "include/pwlib/file.h"
  3#include "src/types/string/string_internal.h"
  4
  5#include <libpussy/alignment.h>
  6
  7
  8uint16_t PwInterfaceId_BufferedFile = 0;
  9
 10uint16_t PwTypeId_BufferedFile = 0;
 11
 12PW_STRUCT(_PwBufferedFile) {
 13    uint8_t* read_buffer;
 14    unsigned read_buffer_size;  // size of read_buffer
 15    unsigned read_data_size;    // size of data in read_buffer
 16    unsigned read_position;     // current position in read_buffer
 17
 18    uint8_t* write_buffer;
 19    unsigned write_buffer_size; // size of write_buffer
 20    unsigned write_position;    // current position in write_buffer, also it's the size of data
 21
 22    // line reader data
 23    uint8_t  partial_utf8[4];   // UTF-8 sequence may span adjacent reads
 24    unsigned partial_utf8_len;
 25    _PwValue pushback;          // for unread_line
 26
 27    // line reader iterator data
 28    bool     iterating;         // indicates that iteration is in progress
 29    unsigned line_number;
 30
 31};
 32
 33
 34static void stop_read_lines(_PwBufferedFile* f)
 35{
 36    f->iterating = false;
 37    pw_destroy(&f->pushback);
 38}
 39
 40static inline bool do_file_read(PwValuePtr file, void* buffer, unsigned buffer_size, unsigned* bytes_read)
 41// call Reader::read for File type
 42{
 43    PwInterface_Reader* reader = (PwInterface_Reader*) pw_get_interface(PwTypeId_File, PwInterfaceId_Reader);
 44    return pw_call2(reader, read, file, buffer, buffer_size, bytes_read);
 45}
 46
 47static inline bool do_file_write(PwValuePtr file, void* data, unsigned size, unsigned* bytes_written)
 48// call Writer::write for File type
 49{
 50    PwInterface_Writer* writer = (PwInterface_Writer*) pw_get_interface(PwTypeId_File, PwInterfaceId_Writer);
 51    return pw_call2(writer, write, file, data, size, bytes_written);
 52}
 53
 54/****************************************************************
 55 * Basic interface
 56 */
 57
 58static bool bfile_create(PwMethod_Basic_create* mthis, PwValuePtr result, PwCtorArgs* ctor_args)
 59{
 60    if (!pw_super(mthis, result, ctor_args)) {
 61        return false;
 62    }
 63
 64    PwBufferedFileCtorArgs* args = pw_this_ctor_args();
 65
 66    _PwBufferedFile* f = pw_this_data(result);
 67    f->read_buffer_size  = align_unsigned_to_page(args->read_bufsize);
 68    f->write_buffer_size = align_unsigned_to_page(args->write_bufsize);
 69
 70    if (f->read_buffer_size) {
 71        f->read_buffer = allocate(f->read_buffer_size, false);
 72        if (!f->read_buffer) {
 73            pw_set_status(PwOOM());
 74            if (!pw_super_call(destroy, mthis, result, nullptr)) { /* no op */ }
 75            return false;
 76        }
 77    }
 78    if (f->write_buffer_size) {
 79        f->write_buffer = allocate(f->write_buffer_size, false);
 80        if (!f->write_buffer) {
 81            release((void**) &f->read_buffer, f->read_buffer_size);
 82            pw_set_status(PwOOM());
 83            if (!pw_super_call(destroy, mthis, result, nullptr)) { /* no op */ }
 84            return false;
 85        }
 86    }
 87    f->pushback = PwNull();
 88    return true;
 89}
 90
 91static bool bfile_destroy(PwMethod_Basic_destroy* mthis, PwValuePtr self, _PwCompoundChain* tail)
 92{
 93    PwValuePtr value_seen = _pw_on_chain(self, tail);
 94    if (value_seen) {
 95        return true;
 96    }
 97
 98    // XXX close() may block asynchronous execution, need to use a worker thread and/or a garbage pool
 99    if (!pw_call(Fd, close, self)) {
100        fprintf(stderr, "Failed %s\n", __func__);
101        pw_dump(stderr, &current_task->status);
102    }
103    _PwBufferedFile* f = pw_this_data(self);
104    if (f->read_buffer) {
105        release((void**) &f->read_buffer, f->read_buffer_size);
106    }
107    if (f->write_buffer) {
108        release((void**) &f->write_buffer, f->write_buffer_size);
109    }
110    pw_destroy(&f->pushback);
111    return pw_super(mthis, self, tail);
112}
113
114static bool bfile_dump(PwMethod_Basic_dump* mthis, PwValuePtr self, FILE* fp, int indent, _PwCompoundChain* tail)
115{
116    if (!pw_super(mthis, self, fp, indent, tail)) {
117        return false;
118    }
119
120    _PwBufferedFile* f = pw_this_data(self);
121
122    _pw_print_indent(fp, indent + 4);
123    fprintf(fp, "read_buffer: %p", (void*) f->read_buffer);
124    if (f->read_buffer) {
125        fprintf(fp, " %u bytes", f->read_buffer_size);
126    }
127    fprintf(fp, " write_buffer: %p", (void*) f->write_buffer);
128    if (f->write_buffer) {
129        fprintf(fp, " %u bytes", f->write_buffer_size);
130    }
131    fputc('\n', fp);
132    return true;
133}
134
135static PwInterface_Basic bfile_basic_interface = {
136    .create  = { .func = bfile_create },
137    .destroy = { .func = bfile_destroy },
138    .dump    = { .func = bfile_dump }
139};
140
141/****************************************************************
142 * BufferedFile interface
143 */
144
145[[nodiscard]] static bool flush(_PwBufferedFile* f, PwValuePtr self)
146{
147    if (f->write_position == 0) {
148        // nothing to write
149        return true;
150    }
151
152    if (f->iterating) {
153        pw_set_status(PwStatus(PweIterationInProgress));
154        return false;
155    }
156
157    unsigned bytes_written;
158    if (do_file_write(self, f->write_buffer, f->write_position, &bytes_written)) {
159        // success, all data is written
160        f->write_position = 0;
161        return true;
162    }
163    f->write_position -= bytes_written;
164    if (f->write_position) {
165        // move unwritten data to the beginning of `data`
166        memmove(f->write_buffer, f->write_buffer + bytes_written, f->write_position);
167    }
168    return false;
169}
170
171static bool bfile_flush(PwMethod_BufferedFile_flush* mthis, PwValuePtr self)
172{
173    _PwBufferedFile* f = pw_this_data(self);
174    return flush(f, self);
175}
176
177static PwInterface_BufferedFile bfile_buffered_file_interface = {
178#define X(name, ...) .name = { .func = bfile_##name } __VA_OPT__(,)
179    PW_BUFFERED_FILE_INTERFACE_METHODS
180#undef X
181};
182
183
184/****************************************************************
185 * Fd interface for BufferedFile
186 */
187
188static void reset_bfile_data(_PwBufferedFile* f)
189{
190    f->read_data_size = 0;
191    f->read_position = 0;
192    f->write_position = 0;
193    f->partial_utf8_len = 0;
194    pw_destroy(&f->pushback);
195}
196
197static bool bfile_close(PwMethod_Fd_close* mthis, PwValuePtr self)
198{
199    _PwBufferedFile* f = pw_this_data(self);
200    stop_read_lines(f);
201    bool flush_result = flush(f, self);
202    reset_bfile_data(f);
203    return pw_super(mthis, self) && flush_result;  // XXX if both flush and close are failed, flush status is lost
204}
205
206static bool bfile_set_fd(PwMethod_Fd_set_fd* mthis, PwValuePtr self, int fd, bool move)
207{
208    _PwBufferedFile* f = pw_this_data(self);
209
210    if (f->iterating) {
211        pw_set_status(PwStatus(PweIterationInProgress));
212        return false;
213    }
214    reset_bfile_data(f);
215    return pw_super(mthis, self, fd, move);
216}
217
218static PwInterface_Fd bfile_fd_interface = {
219    .close  = { .func = bfile_close },
220    .set_fd = { .func = bfile_set_fd }
221};
222
223/****************************************************************
224 * File interface for BufferedFile
225 */
226
227static bool bfile_set_name(PwMethod_File_set_name* mthis, PwValuePtr self, PwValuePtr file_name)
228{
229    _PwBufferedFile* f = pw_this_data(self);
230
231    if (f->iterating) {
232        pw_set_status(PwStatus(PweIterationInProgress));
233        return false;
234    }
235    reset_bfile_data(f);
236    return pw_super(mthis, self, file_name);
237}
238
239static bool bfile_seek(PwMethod_File_seek* mthis, PwValuePtr self, off_t offset, int whence, off_t* position)
240{
241    _PwBufferedFile* f = pw_this_data(self);
242
243    if (f->iterating) {
244        pw_set_status(PwStatus(PweIterationInProgress));
245        return false;
246    }
247    // reset read buffer
248    f->read_data_size = 0;
249    f->read_position = 0;
250    // flush write buffer
251    if (!flush(f, self)) {
252        return false;
253    }
254    // seek
255    return pw_super(mthis, self, offset, whence, position);
256}
257
258static PwInterface_File bfile_file_interface = {
259    .set_name = { .func = bfile_set_name },
260    .seek     = { .func = bfile_seek }
261};
262
263
264/****************************************************************
265 * Reader interface for BufferedFile
266 */
267
268static bool bfile_read(PwMethod_Reader_read* mthis, PwValuePtr self, void* buffer, unsigned buffer_size, unsigned* bytes_read)
269{
270    _PwBufferedFile* f = pw_this_data(self);
271
272    if (f->iterating) {
273        *bytes_read = 0;
274        pw_set_status(PwStatus(PweIterationInProgress));
275        return false;
276    }
277
278    if (f->read_buffer_size == 0) {
279        // return directly from file
280        return pw_super(mthis, self, buffer, buffer_size, bytes_read);
281    }
282
283    if (f->read_position == f->read_data_size) {
284
285        // no data in the read_buffer, read next portion
286        f->read_position = 0;
287
288        if (!pw_super(mthis, self, f->read_buffer, f->read_buffer_size, &f->read_data_size)) {
289            return false;
290        }
291        if (f->read_data_size == 0) {
292            pw_set_status(PwStatus(PweEOF));
293            return false;
294        }
295    }
296    unsigned avail = f->read_data_size - f->read_position;
297    unsigned size = (avail < buffer_size)? avail : buffer_size;
298    memcpy(buffer, f->read_buffer + f->read_position, size);
299    f->read_position += size;
300    *bytes_read = size;
301    return true;
302}
303
304static PwInterface_Reader bfile_reader_interface = {
305#define X(name, ...) .name = { .func = bfile_##name } __VA_OPT__(,)
306    PW_READER_INTERFACE_METHODS
307#undef X
308};
309
310
311/****************************************************************
312 * Writer interface for BufferedFile
313 */
314
315static bool bfile_write(PwMethod_Writer_write* mthis, PwValuePtr self, void* data, unsigned size, unsigned* bytes_written)
316{
317    _PwBufferedFile* f = pw_this_data(self);
318
319    if (bytes_written) {
320        *bytes_written = 0;
321    }
322
323    if (size == 0) {
324        return true;
325    }
326
327    if (f->iterating) {
328        pw_set_status(PwStatus(PweIterationInProgress));
329        return false;
330    }
331
332    if (f->write_buffer_size == 0) {
333        // write directly to file
334        return pw_super(mthis, self, data, size, bytes_written);
335    }
336
337    unsigned remaining_capacity = f->write_buffer_size - f->write_position;
338
339    if (remaining_capacity) {
340        // fill the write_buffer
341        unsigned n = (size < remaining_capacity)? size : remaining_capacity;
342        memcpy(f->write_buffer + f->write_position, data, n);
343        f->write_position += n;
344        if (bytes_written) {
345            *bytes_written += n;
346        }
347        if (f->write_position < f->write_buffer_size) {
348            // write_buffer is not full yet
349            return true;
350        }
351        size -= n;
352        data = ((uint8_t*) data) + n;
353    }
354    // write_buffer is full, flush it
355    if (!flush(f, self)) {
356        return false;
357    }
358    // write directly to file
359    while (size >= f->write_buffer_size) {
360        unsigned n;
361        bool ret = pw_super(mthis, self, data, f->write_buffer_size, &n);
362        if (bytes_written) {
363            *bytes_written += n;
364        }
365        if (!ret) {
366            return false;
367        }
368        size -= n;
369        data = ((uint8_t*) data) + n;
370    }
371
372    if (size) {
373        // move remaining data to the write_buffer
374        memcpy(f->write_buffer, data, size);
375        if (bytes_written) {
376            *bytes_written += size;
377        }
378        f->write_position = size;
379    }
380    return true;
381}
382
383static PwInterface_Writer bfile_writer_interface = {
384#define X(name, ...) .name = { .func = bfile_##name } __VA_OPT__(,)
385    PW_WRITER_INTERFACE_METHODS
386#undef X
387};
388
389
390/****************************************************************
391 * LineReader interface methods for buffered file
392 */
393
394static bool bfile_start(PwMethod_LineReader_start* mthis, PwValuePtr self)
395{
396    _PwBufferedFile* f = pw_this_data(self);
397
398    if (f->read_buffer_size == 0) {
399        pw_set_status(PwStatus(PweNotBufferedFile));
400        return false;
401    }
402
403    f->partial_utf8_len = 0;
404    f->line_number = 0;
405    pw_destroy(&f->pushback);
406    f->iterating = true;
407    return true;
408}
409
410static bool bfile_read_line_inplace(PwMethod_LineReader_read_line_inplace* mthis, PwValuePtr self, PwValuePtr line)
411{
412    _PwBufferedFile* f = pw_this_data(self);
413
414    if (f->read_buffer_size == 0) {
415        pw_set_status(PwStatus(PweNotBufferedFile));
416        return false;
417    }
418    if (!pw_string_truncate(line, 0)) {
419        return false;
420    }
421    if (pw_is_string(&f->pushback)) {
422        if (!pw_string_append(line, &f->pushback)) {
423            return false;
424        }
425        pw_destroy(&f->pushback);
426        f->line_number++;
427        return true;
428    }
429    uint8_t* dest_ptr = _pw_string_start(line);
430    do {
431        if (f->read_position == f->read_data_size) {
432
433            // reached end of data scanning for line break
434
435            f->read_position = 0;
436
437            // read next chunk of file
438            if (!do_file_read(self, f->read_buffer, f->read_buffer_size, &f->read_data_size)) {
439                return false;
440            }
441            if (f->read_data_size == 0) {
442                pw_set_status(PwStatus(PweEOF));
443                f->iterating = false;
444                return false;
445            }
446
447            if (f->partial_utf8_len) {
448                // process partial UTF-8 sequence
449                while (f->partial_utf8_len < sizeof(f->partial_utf8)) {
450
451                    if (f->read_position == f->read_data_size) {
452                        // premature end of file
453                        // XXX warn?
454                        pw_set_status(PwStatus(PweEOF));
455                        f->iterating = false;
456                        return false;
457                    }
458
459                    uint8_t c = f->read_buffer[f->read_position];
460                    // expect continuation bytes only
461                    if ((c & 0xC0) != 0x80) {
462                        // malformed UTF-8 sequence
463                        break;
464                    }
465                    f->read_position++;
466                    f->partial_utf8[f->partial_utf8_len++] = c;
467
468                    uint8_t* ptr = f->partial_utf8;
469                    unsigned bytes_remaining = f->partial_utf8_len;
470                    char32_t chr;
471                    if (_pw_decode_utf8_buffer(&ptr, &bytes_remaining, &chr)) {
472                        if (chr != 0 && chr != 0xFFFFFFFF) {
473                            if (!pw_string_append(line, chr)) {
474                                return false;
475                            }
476                            // update dest_ptr because pw_string_append may reallocate
477                            _pw_string_start_end(line, &dest_ptr);
478                        }
479                        break;
480                    }
481                }
482                f->partial_utf8_len = 0;
483            }
484        }
485
486        uint8_t* ptr = f->read_buffer + f->read_position;
487        unsigned bytes_remaining = f->read_data_size - f->read_position;
488
489        // for optimized append:
490        uint8_t dest_char_size = line->str_params.char_size;
491        unsigned dest_avail = _pw_string_avail(line);
492        unsigned num_appended = 0;
493
494        while (bytes_remaining) {
495            char32_t chr;
496            if (!_pw_decode_utf8_buffer(&ptr, &bytes_remaining, &chr)) {
497                break;
498            }
499            if (chr == 0 || chr == 0xFFFFFFFF) {
500                continue;
501            }
502            /*
503            if (!pw_string_append(line, chr)) {
504                return false;
505            }
506            Optimized code from read_text_file:
507            */
508
509            uint8_t src_char_size = calc_char_size(chr);
510            if (_pw_unlikely(src_char_size > dest_char_size || dest_avail == 0)) {
511                // need to expand string
512                _pw_string_inc_length(line, num_appended);
513                num_appended = 0;
514                unsigned n = (dest_avail == 0)? 64 : 0;
515                if (!_pw_expand_string(line, n, src_char_size)) {
516                    return false;
517                }
518                dest_avail = _pw_string_avail(line);
519                _pw_string_start_end(line, &dest_ptr);
520                dest_char_size = line->str_params.char_size;
521            }
522            dest_ptr += _pw_put_char(dest_ptr, chr, dest_char_size);
523            num_appended++;
524            dest_avail--;
525
526            if (chr == '\n') {
527                _pw_string_inc_length(line, num_appended);
528                f->read_position = f->read_data_size - bytes_remaining;
529                f->line_number++;
530                return true;
531            }
532        }
533        _pw_string_inc_length(line, num_appended);
534
535        // move unprocessed data to partial_utf8
536        pw_assert(bytes_remaining <= sizeof(f->partial_utf8));
537        while (bytes_remaining--) {
538            f->partial_utf8[f->partial_utf8_len++] = *ptr++;
539        }
540        if (f->read_data_size < f->read_buffer_size) {
541            // reached end of file
542            f->read_position = 0;
543            f->read_data_size = 0;
544            f->line_number++;
545            return true;
546        }
547
548        // go read next chunk
549        f->read_position = f->read_data_size;
550
551    } while(true);
552}
553
554static bool bfile_read_line(PwMethod_LineReader_read_line* mthis, PwValuePtr self, PwValuePtr result)
555{
556    PwValue line = PW_STRING();
557    if (!pw_this_call(read_line_inplace, mthis, self, &line)) {
558        return false;
559    }
560    pw_move(result, &line);
561    return true;
562}
563
564static bool bfile_unread_line(PwMethod_LineReader_unread_line* mthis, PwValuePtr self, PwValuePtr line)
565{
566    _PwBufferedFile* f = pw_this_data(self);
567
568    if (pw_is_null(&f->pushback)) {
569        __pw_clone(&f->pushback, line);  // puchback is already Null, so use __pw_clone here
570        f->line_number--;
571        return true;
572    } else {
573        return false;
574    }
575}
576
577static bool bfile_get_line_number(PwMethod_LineReader_get_line_number* mthis, PwValuePtr self, unsigned* result)
578{
579    _PwBufferedFile* f = pw_this_data(self);
580    *result = f->line_number;
581    return true;
582}
583
584static bool bfile_stop(PwMethod_LineReader_stop* mthis,PwValuePtr self)
585{
586    _PwBufferedFile* f = pw_this_data(self);
587    stop_read_lines(f);
588    return true;
589}
590
591static PwInterface_LineReader bfile_line_reader_interface = {
592#define X(name, ...) .name = { .func = bfile_##name } __VA_OPT__(,)
593    PW_LINE_READER_INTERFACE_METHODS
594#undef X
595};
596
597
598/****************************************************************
599 * Append interface methods
600 */
601
602static bool bfile_append_string_data(PwMethod_Append_append_string_data* mthis,
603                                     PwValuePtr self, uint8_t* start_ptr, uint8_t* end_ptr, uint8_t char_size)
604{
605    if (start_ptr >= end_ptr) {
606        return true;
607    }
608
609    _PwBufferedFile* f = pw_this_data(self);
610
611    if (char_size < 2) {
612        // write ASCII and UTF-8 directly to file
613        return do_file_write(self, start_ptr, end_ptr - start_ptr, nullptr);
614    }
615
616    // convert wide chars to UTF-8
617
618    if (f->write_buffer_size == 0) {
619        // invoke unbuffered version
620        return pw_super(mthis, self, start_ptr, end_ptr, char_size);
621    }
622
623    while (start_ptr < end_ptr) {
624        unsigned remaining = f->write_buffer_size - f->write_position;
625        if (remaining < 4) {
626            if (!pw_flush(self)) {
627                return false;
628            }
629        }
630        char32_t codepoint = _pw_next_char(&start_ptr, char_size);
631        f->write_position += pw_char32_to_utf8(codepoint, (char*) &f->write_buffer[f->write_position]);
632    }
633    return true;
634}
635
636static bool bfile_append(PwMethod_Append_append* mthis, PwValuePtr self, PwValuePtr value)
637{
638    pw_assert(pw_is_string(value));
639
640    uint8_t* end_ptr;
641    uint8_t* start_ptr = _pw_string_start_end(value, &end_ptr);
642    return pw_this_call(append_string_data, mthis, self, start_ptr, end_ptr, value->str_params.char_size);
643}
644
645static PwInterface_Append bfile_append_interface = {
646#define X(name, ...) .name = { .func = bfile_##name } __VA_OPT__(,)
647    PW_APPEND_INTERFACE_METHODS
648#undef X
649};
650
651/****************************************************************
652 * Initialization
653 */
654
655extern void _pw_init_file();
656
657[[gnu::constructor]]
658void _pw_init_buffered_file()
659{
660    if (PwInterfaceId_BufferedFile) {
661        return;
662    }
663
664    _pw_init_file();
665
666#   define X(name, ...) #name __VA_OPT__(,)
667    PwInterfaceId_BufferedFile = pw_register_interface("BufferedFile", PW_BUFFERED_FILE_INTERFACE_METHODS, nullptr);
668#   undef X
669
670    PwTypeId_BufferedFile = pw_add_type2(
671        "BufferedFile", _PwBufferedFile,
672        PW_PARENTS,
673            PwTypeId_File,
674        PW_INTERFACES,
675            PwInterfaceId_Basic,        &bfile_basic_interface,
676            PwInterfaceId_BufferedFile, &bfile_buffered_file_interface,
677            PwInterfaceId_File,         &bfile_file_interface,
678            PwInterfaceId_Fd,           &bfile_fd_interface,
679            PwInterfaceId_Reader,       &bfile_reader_interface,
680            PwInterfaceId_Writer,       &bfile_writer_interface,
681            PwInterfaceId_LineReader,   &bfile_line_reader_interface,
682            PwInterfaceId_Append,       &bfile_append_interface
683    );
684}