Fun 0.41.5
The programming language that makes You have fun
Loading...
Searching...
No Matches
parser_utils.c
Go to the documentation of this file.
1/*
2 * This file is part of the Fun programming language.
3 * https://fun-lang.xyz/
4 *
5 * Copyright 2025 Johannes Findeisen <you@hanez.org>
6 * Licensed under the terms of the Apache-2.0 license.
7 * https://opensource.org/license/apache-2-0
8 */
9
19#include "parser.h"
20#include <ctype.h>
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24
33static char *read_file_all(const char *path, size_t *out_len) {
34 FILE *f = fopen(path, "rb");
35 if (!f) return NULL;
36 if (fseek(f, 0, SEEK_END) != 0) {
37 fclose(f);
38 return NULL;
39 }
40 long sz = ftell(f);
41 if (sz < 0) {
42 fclose(f);
43 return NULL;
44 }
45 rewind(f);
46 char *buf = (char *)malloc((size_t)sz + 1);
47 if (!buf) {
48 fclose(f);
49 return NULL;
50 }
51 size_t n = fread(buf, 1, (size_t)sz, f);
52 fclose(f);
53 buf[n] = '\0';
54 if (out_len) *out_len = n;
55 return buf;
56}
57
64static void skip_ws(const char *src, size_t len, size_t *pos) {
65 while (*pos < len) {
66 char c = src[*pos];
67 if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
68 (*pos)++;
69 continue;
70 }
71 break;
72 }
73}
74
78static void skip_line(const char *src, size_t len, size_t *pos) {
79 while (*pos < len && src[*pos] != '\n')
80 (*pos)++;
81 if (*pos < len && src[*pos] == '\n') (*pos)++;
82}
83
88static void skip_comments(const char *src, size_t len, size_t *pos) {
89 for (;;) {
90 skip_ws(src, len, pos);
91 if (*pos + 1 < len && src[*pos] == '/' && src[*pos + 1] == '/') {
92 *pos += 2;
93 skip_line(src, len, pos);
94 continue;
95 }
96 if (*pos + 1 < len && src[*pos] == '/' && src[*pos + 1] == '*') {
97 *pos += 2;
98 while (*pos + 1 < len && !(src[*pos] == '*' && src[*pos + 1] == '/'))
99 (*pos)++;
100 if (*pos + 1 < len) *pos += 2;
101 continue;
102 }
103 break;
104 }
105}
106
111static int starts_with(const char *src, size_t len, size_t pos, const char *kw) {
112 size_t klen = strlen(kw);
113 if (pos + klen > len) return 0;
114 return strncmp(src + pos, kw, klen) == 0;
115}
116
120static void skip_shebang_if_present(const char *src, size_t len, size_t *pos) {
121 if (*pos == 0 && starts_with(src, len, *pos, "#!")) {
122 skip_line(src, len, pos);
123 }
124}
125
130static void skip_identifier(const char *src, size_t len, size_t *pos) {
131 size_t p = *pos;
132 if (p < len && (isalpha((unsigned char)src[p]) || src[p] == '_')) {
133 p++;
134 while (p < len && (isalnum((unsigned char)src[p]) || src[p] == '_'))
135 p++;
136 }
137 *pos = p;
138}
139
144static int consume_char(const char *src, size_t len, size_t *pos, char expected) {
145 skip_ws(src, len, pos);
146 if (*pos < len && src[*pos] == expected) {
147 (*pos)++;
148 return 1;
149 }
150 return 0;
151}
152
160static char *parse_string_literal_any_quote(const char *src, size_t len, size_t *pos) {
161 skip_ws(src, len, pos);
162 if (*pos >= len) return NULL;
163 char quote = src[*pos];
164 if (quote != '"' && quote != '\'') return NULL;
165 (*pos)++; // skip opening quote
166 size_t cap = 64, out_len = 0;
167 char *out = (char *)malloc(cap);
168 if (!out) return NULL;
169 while (*pos < len) {
170 char c = src[*pos];
171 if (c == quote) {
172 (*pos)++;
173 break;
174 }
175 if (c == '\\') {
176 (*pos)++;
177 if (*pos >= len) break;
178 char e = src[*pos];
179 switch (e) {
180 case 'n':
181 c = '\n';
182 break;
183 case 'r':
184 c = '\r';
185 break;
186 case 't':
187 c = '\t';
188 break;
189 case '\\':
190 c = '\\';
191 break;
192 case '"':
193 c = '"';
194 break;
195 case '\'':
196 c = '\'';
197 break;
198 default:
199 c = e;
200 break;
201 }
202 }
203 if (out_len + 1 >= cap) {
204 cap *= 2;
205 char *tmp = (char *)realloc(out, cap);
206 if (!tmp) {
207 free(out);
208 return NULL;
209 }
210 out = tmp;
211 }
212 out[out_len++] = c;
213 (*pos)++;
214 }
215 if (out_len + 1 >= cap) {
216 char *tmp = (char *)realloc(out, cap + 1);
217 if (!tmp) {
218 free(out);
219 return NULL;
220 }
221 out = tmp;
222 }
223 out[out_len] = '\0';
224 return out;
225}
226
227/* === Helpers for identifiers, numbers, booleans, and globals === */
228
232static void skip_spaces(const char *src, size_t len, size_t *pos) {
233 while (*pos < len) {
234 char c = src[*pos];
235 if (c == ' ' || c == '\t' || c == '\r') {
236 (*pos)++;
237 continue;
238 }
239 break;
240 }
241}
242
248static int read_identifier_into(const char *src, size_t len, size_t *pos, char **out_name) {
249 size_t p = *pos;
250 if (p < len && (isalpha((unsigned char)src[p]) || src[p] == '_')) {
251 size_t start = p;
252 p++;
253 while (p < len && (isalnum((unsigned char)src[p]) || src[p] == '_'))
254 p++;
255 size_t n = p - start;
256 char *name = (char *)malloc(n + 1);
257 if (!name) return 0;
258 memcpy(name, src + start, n);
259 name[n] = '\0';
260 *pos = p;
261 *out_name = name;
262 return 1;
263 }
264 return 0;
265}
266
272static uint64_t parse_int_literal_value(const char *src, size_t len, size_t *pos, int *ok) {
273 size_t p = *pos;
274 skip_spaces(src, len, &p);
275 int sign = 1;
276 if (p < len && (src[p] == '+' || src[p] == '-')) {
277 if (src[p] == '-') sign = -1;
278 p++;
279 }
280 if (p >= len) {
281 *ok = 0;
282 return 0;
283 }
284
285 /* Hexadecimal: 0x... or 0X... */
286 if ((p + 1) < len && src[p] == '0' && (src[p + 1] == 'x' || src[p + 1] == 'X')) {
287 p += 2;
288 if (p >= len || !isxdigit((unsigned char)src[p])) {
289 *ok = 0;
290 return 0;
291 }
292 uint64_t val = 0;
293 while (p < len && isxdigit((unsigned char)src[p])) {
294 char c = src[p];
295 int d = (c >= '0' && c <= '9') ? (c - '0')
296 : (c >= 'a' && c <= 'f') ? (c - 'a' + 10)
297 : (c >= 'A' && c <= 'F') ? (c - 'A' + 10)
298 : 0;
299 val = (val << 4) + (uint64_t)d;
300 p++;
301 }
302 *pos = p;
303 *ok = 1;
304 return (uint64_t)((int64_t)sign * (int64_t)val);
305 }
306
307 /* Decimal fallback */
308 if (!isdigit((unsigned char)src[p])) {
309 *ok = 0;
310 return 0;
311 }
312 uint64_t val = 0;
313 while (p < len && isdigit((unsigned char)src[p])) {
314 val = val * 10 + (uint64_t)(src[p] - '0');
315 p++;
316 }
317 *pos = p;
318 *ok = 1;
319 return (uint64_t)((int64_t)sign * (int64_t)val);
320}
321
322/* === Include preprocessor ===
323 * Supports:
324 * - #include "path" (resolved relative to current working directory)
325 * - #include <path> (resolved under /usr/lib/fun/)
326 * Also accepts 'include' without '#', at the start of a line (after spaces/tabs).
327 * Directives are recognized only when not inside strings or block comments.
328 */
329
333static void *xrealloc(void *ptr, size_t newcap) {
334 void *np = realloc(ptr, newcap);
335 return np;
336}
337
341typedef struct {
342 char *buf;
343 size_t len;
344 size_t cap;
345} StrBuf;
346
350static void sb_init(StrBuf *sb) {
351 sb->buf = (char *)malloc(256);
352 sb->cap = sb->buf ? 256 : 0;
353 sb->len = 0;
354 if (sb->buf) sb->buf[0] = '\0';
355}
356
360static void sb_reserve(StrBuf *sb, size_t need) {
361 if (need <= sb->cap) return;
362 size_t nc = sb->cap ? sb->cap : 256;
363 while (nc < need)
364 nc *= 2;
365 char *nb = (char *)xrealloc(sb->buf, nc);
366 if (!nb) return;
367 sb->buf = nb;
368 sb->cap = nc;
369}
370
374static void sb_append_n(StrBuf *sb, const char *s, size_t n) {
375 if (n == 0) return;
376 sb_reserve(sb, sb->len + n + 1);
377 if (!sb->buf) return;
378 memcpy(sb->buf + sb->len, s, n);
379 sb->len += n;
380 sb->buf[sb->len] = '\0';
381}
382
386static void sb_append(StrBuf *sb, const char *s) {
387 sb_append_n(sb, s, strlen(s));
388}
389
393static void sb_append_ch(StrBuf *sb, char c) {
394 sb_reserve(sb, sb->len + 2);
395 if (!sb->buf) return;
396 sb->buf[sb->len++] = c;
397 sb->buf[sb->len] = '\0';
398}
399
400/* ---- Export collection for include-as namespaces ---- */
404typedef struct {
405 char **names;
406 int count;
407 int cap;
408} NameList;
409
413static void nl_init(NameList *nl) {
414 nl->names = NULL;
415 nl->count = 0;
416 nl->cap = 0;
417}
418
422static void nl_add(NameList *nl, const char *name) {
423 if (!name || !name[0]) return;
424 if (nl->count >= nl->cap) {
425 int ncap = nl->cap ? nl->cap * 2 : 8;
426 char **nn = (char **)realloc(nl->names, (size_t)ncap * sizeof(char *));
427 if (!nn) return;
428 nl->names = nn;
429 nl->cap = ncap;
430 }
431 nl->names[nl->count++] = strdup(name);
432}
433
437static void nl_free(NameList *nl) {
438 if (!nl) return;
439 for (int i = 0; i < nl->count; ++i)
440 free(nl->names[i]);
441 free(nl->names);
442 nl->names = NULL;
443 nl->count = nl->cap = 0;
444}
445
446/* Collect top-level (indent=0) exported symbols: function and class names.
447 Ignores lines inside comments/strings and ignores nested indent. */
452static void collect_exports_top_level(const char *text, NameList *out) {
453 if (!text || !out) return;
454 size_t len = strlen(text);
455 int in_line = 0, in_block = 0, in_sq = 0, in_dq = 0, esc = 0;
456 int bol = 1;
457 for (size_t i = 0; i < len;) {
458 char c = text[i];
459
460 if (in_line) {
461 if (c == '\n') {
462 in_line = 0;
463 bol = 1;
464 } else {
465 bol = 0;
466 }
467 i++;
468 continue;
469 }
470 if (in_block) {
471 if (c == '*' && (i + 1) < len && text[i + 1] == '/') {
472 i += 2;
473 bol = 0;
474 in_block = 0;
475 continue;
476 }
477 bol = (c == '\n');
478 i++;
479 continue;
480 }
481 if (in_sq) {
482 if (!esc && c == '\\') {
483 esc = 1;
484 i++;
485 bol = 0;
486 continue;
487 }
488 if (!esc && c == '\'') {
489 in_sq = 0;
490 }
491 esc = 0;
492 bol = (c == '\n');
493 i++;
494 continue;
495 }
496 if (in_dq) {
497 if (!esc && c == '\\') {
498 esc = 1;
499 i++;
500 bol = 0;
501 continue;
502 }
503 if (!esc && c == '"') {
504 in_dq = 0;
505 }
506 esc = 0;
507 bol = (c == '\n');
508 i++;
509 continue;
510 }
511
512 if (c == '/' && (i + 1) < len && text[i + 1] == '/') {
513 in_line = 1;
514 bol = 0;
515 i += 2;
516 continue;
517 }
518 if (c == '/' && (i + 1) < len && text[i + 1] == '*') {
519 in_block = 1;
520 bol = 0;
521 i += 2;
522 continue;
523 }
524 if (c == '\'') {
525 in_sq = 1;
526 bol = 0;
527 i++;
528 continue;
529 }
530 if (c == '"') {
531 in_dq = 1;
532 bol = 0;
533 i++;
534 continue;
535 }
536
537 if (bol) {
538 /* Compute leading spaces to filter out indented constructs */
539 size_t j = i;
540 int spaces = 0;
541 while (j < len && text[j] == ' ') {
542 spaces++;
543 j++;
544 }
545 if (j < len && text[j] == '\t') {
546 /* tabs not allowed for indentation; treat as not top-level */
547 bol = 0;
548 i = j + 1;
549 continue;
550 }
551 /* Only consider top-level (indent == 0) */
552 if (spaces == 0) {
553 /* Check for 'fun ' or 'class ' */
554 const char *kw1 = "fun";
555 const char *kw2 = "class";
556 if (j + 3 <= len && strncmp(text + j, kw1, 3) == 0 && (j + 3 == len || isspace((unsigned char)text[j + 3]))) {
557 size_t p = j + 3;
558 while (p < len && (text[p] == ' ' || text[p] == '\t'))
559 p++;
560 /* read identifier */
561 size_t start = p;
562 if (p < len && (isalpha((unsigned char)text[p]) || text[p] == '_')) {
563 p++;
564 while (p < len && (isalnum((unsigned char)text[p]) || text[p] == '_'))
565 p++;
566 size_t n = p - start;
567 if (n > 0) {
568 char tmp[256];
569 size_t copy = (n < sizeof(tmp) - 1) ? n : (sizeof(tmp) - 1);
570 memcpy(tmp, text + start, copy);
571 tmp[copy] = '\0';
572 nl_add(out, tmp);
573 }
574 }
575 } else if (j + 5 <= len && strncmp(text + j, kw2, 5) == 0 && (j + 5 == len || isspace((unsigned char)text[j + 5]))) {
576 size_t p = j + 5;
577 while (p < len && (text[p] == ' ' || text[p] == '\t'))
578 p++;
579 /* read identifier */
580 size_t start = p;
581 if (p < len && (isalpha((unsigned char)text[p]) || text[p] == '_')) {
582 p++;
583 while (p < len && (isalnum((unsigned char)text[p]) || text[p] == '_'))
584 p++;
585 size_t n = p - start;
586 if (n > 0) {
587 char tmp[256];
588 size_t copy = (n < sizeof(tmp) - 1) ? n : (sizeof(tmp) - 1);
589 memcpy(tmp, text + start, copy);
590 tmp[copy] = '\0';
591 nl_add(out, tmp);
592 }
593 }
594 }
595 }
596 }
597
598 /* move forward one char */
599 bol = (c == '\n');
600 i++;
601 }
602}
603
618static char *preprocess_includes_internal(const char *src, const char *current_path, int depth) {
619 if (!src) return NULL;
620 if (depth > 64) {
621 fprintf(stderr, "Include error: include nesting too deep\n");
622 return strdup("");
623 }
624
625/* Build-time default, can be overridden by compiler define -DDEFAULT_LIB_DIR=".../" */
626#ifndef DEFAULT_LIB_DIR
627#define DEFAULT_LIB_DIR "/usr/share/fun/lib/"
628#endif
629
630 const char *env_lib = getenv("FUN_LIB_DIR");
631 size_t len = strlen(src);
632 StrBuf out;
633 sb_init(&out);
634
635 /* Preserve shebang on the very first line before inserting the initial marker. */
636 size_t shebang_end = 0;
637 int shebang_lines = 0;
638 if (src[0] == '#' && src[1] == '!') {
639 /* find end of line (handle CR, LF, or CRLF) */
640 size_t j = 0;
641 while (src[j] && src[j] != '\n' && src[j] != '\r') j++;
642 /* include line ending */
643 if (src[j] == '\r') {
644 j++;
645 if (src[j] == '\n') j++;
646 } else if (src[j] == '\n') {
647 j++;
648 }
649 /* count lines in shebang we are copying */
650 for (size_t t = 0; t < j; ++t) if (src[t] == '\n') shebang_lines++;
651 if (shebang_lines == 0) shebang_lines = 1; /* single shebang line without LF */
652 shebang_end = j;
653 sb_append_n(&out, src, shebang_end);
654 }
655
656 /* When we know the current file path, emit a leading marker so mapping
657 * can always recover the correct file for regions before any include. */
658 if (current_path && current_path[0]) {
659 sb_append(&out, "// __include_begin__: ");
660 sb_append(&out, current_path);
661 /* annotate physical base line in the original file */
662 char lb[32];
663 int base_line = 1 + (shebang_end ? shebang_lines : 0);
664 snprintf(lb, sizeof(lb), " @line %d", base_line);
665 sb_append(&out, lb);
666 sb_append(&out, "\n");
667 }
668 int in_line = 0, in_block = 0, in_sq = 0, in_dq = 0, esc = 0;
669 int bol = 1; /* beginning of line */
670
671 for (size_t i = shebang_end; i < len;) {
672 char c = src[i];
673
674 /* Detect include directive at BOL, outside comments/strings */
675 if (bol && !in_block && !in_sq && !in_dq) {
676 size_t j = i;
677 /* skip leading spaces/tabs */
678 while (j < len && (src[j] == ' ' || src[j] == '\t'))
679 j++;
680 size_t k = j;
681 if (k < len && src[k] == '#') k++;
682 const char *kw = "include";
683 size_t kwlen = 7;
684 if (k + kwlen <= len && strncmp(src + k, kw, kwlen) == 0) {
685 k += kwlen;
686 /* next must be space/tab or delimiter */
687 while (k < len && (src[k] == ' ' || src[k] == '\t'))
688 k++;
689 if (k < len && (src[k] == '"' || src[k] == '<')) {
690 char opener = src[k];
691 char closer = (opener == '"') ? '"' : '>';
692 k++;
693 size_t path_start = k;
694 while (k < len && src[k] != closer)
695 k++;
696 if (k < len && src[k] == closer) {
697 size_t path_len = k - path_start;
698 char *path = (char *)malloc(path_len + 1);
699 if (path) {
700 memcpy(path, src + path_start, path_len);
701 path[path_len] = '\0';
702
703 /* parse optional 'as <alias>' then advance to end of line */
704 k++;
705 char ns[64];
706 ns[0] = '\0';
707 /* skip spaces/tabs */
708 size_t ap = k;
709 while (ap < len && (src[ap] == ' ' || src[ap] == '\t'))
710 ap++;
711 /* optional 'as' */
712 const char *askw = "as";
713 if (ap + 2 <= len && strncmp(src + ap, askw, 2) == 0 && (ap + 2 == len || isspace((unsigned char)src[ap + 2]))) {
714 ap += 2;
715 while (ap < len && (src[ap] == ' ' || src[ap] == '\t'))
716 ap++;
717 /* read identifier [A-Za-z_][A-Za-z0-9_]* */
718 size_t start = ap;
719 if (ap < len && (isalpha((unsigned char)src[ap]) || src[ap] == '_')) {
720 ap++;
721 while (ap < len && (isalnum((unsigned char)src[ap]) || src[ap] == '_'))
722 ap++;
723 size_t n = ap - start;
724 size_t copy = (n < sizeof(ns) - 1) ? n : (sizeof(ns) - 1);
725 memcpy(ns, src + start, copy);
726 ns[copy] = '\0';
727 }
728 /* ignore anything else on line */
729 }
730 /* advance to end of line */
731 k = ap;
732 while (k < len && src[k] != '\n')
733 k++;
734 if (k < len && src[k] == '\n') k++;
735
736 /* resolve file path; for <...> try FUN_LIB_DIR first, then default */
737 char resolved[1024];
738 char resolved2[1024];
739 size_t inc_len = 0;
740 char *inc = NULL;
741
742 if (opener == '<') {
743 /* Angle-bracket include resolution order:
744 * 1) FUN_LIB_DIR (env), respecting '/' or '\' endings
745 * 2) DEFAULT_LIB_DIR (compile-time define)
746 * 3) "lib/" under current working directory (developer fallback)
747 *
748 * Always assign 'resolved' to the last attempted candidate so errors are informative.
749 */
750 resolved[0] = '\0';
751
752 /* 1) FUN_LIB_DIR */
753 if (env_lib && env_lib[0]) {
754 size_t elen = strlen(env_lib);
755 char last = env_lib[elen ? (elen - 1) : 0];
756 int needs_sep = !(last == '/' || last == '\\');
757 char sep = (last == '\\') ? '\\' : '/';
758 if (needs_sep)
759 snprintf(resolved, sizeof(resolved), "%s%c%s", env_lib, sep, path);
760 else
761 snprintf(resolved, sizeof(resolved), "%s%s", env_lib, path);
762 inc = read_file_all(resolved, &inc_len);
763 }
764
765 /* 2) DEFAULT_LIB_DIR */
766 if (!inc) {
767 snprintf(resolved, sizeof(resolved), "%s%s", DEFAULT_LIB_DIR, path);
768 inc = read_file_all(resolved, &inc_len);
769 }
770
771 /* 3) project-local dev fallback: lib/<path> */
772 if (!inc) {
773 snprintf(resolved, sizeof(resolved), "lib/%s", path);
774 inc = read_file_all(resolved, &inc_len);
775 }
776 } else {
777 /* quoted include: relative path (cwd) */
778 snprintf(resolved, sizeof(resolved), "%s", path);
779 inc = read_file_all(resolved, &inc_len);
780 }
781 free(path);
782
783 if (!inc) {
784 fprintf(stderr, "Include error: cannot read '%s'\n", resolved[0] ? resolved : "(unresolved)");
785 sb_append(&out, "// include error: cannot read ");
786 sb_append(&out, resolved[0] ? resolved : "(unresolved)");
787 sb_append(&out, "\n");
788 } else {
789 /* Strip optional UTF-8 BOM and top-of-file shebang from included text before preprocessing */
790 const char *startp = inc;
791 size_t off = 0;
792 if ((unsigned char)inc[0] == 0xEF && (unsigned char)inc[1] == 0xBB && (unsigned char)inc[2] == 0xBF) {
793 off = 3;
794 }
795 startp = inc + off;
796 if (startp[0] == '#' && startp[1] == '!') {
797 /* skip until end of line, handling CR, LF, CRLF */
798 const char *q = startp;
799 while (*q && *q != '\n' && *q != '\r')
800 q++;
801 if (*q == '\r') {
802 q++;
803 if (*q == '\n') q++;
804 } else if (*q == '\n') {
805 q++;
806 }
807 startp = q;
808 }
809 char *inc_clean = strdup(startp);
810 char *exp = preprocess_includes_internal(inc_clean, resolved, depth + 1);
811 free(inc);
812 free(inc_clean);
813 if (exp) {
814 /* mark file origin for better error messages */
815 sb_append(&out, "// __include_begin__: ");
816 sb_append(&out, resolved);
817 if (ns[0] != '\0') {
818 sb_append(&out, " as ");
819 sb_append(&out, ns);
820 }
821 sb_append(&out, " @line 1");
822 sb_append(&out, "\n");
823
824 /* append expanded included content */
825 sb_append(&out, exp);
826 /* ensure included chunk ends with newline to preserve line structure */
827 if (out.len == 0 || out.buf[out.len - 1] != '\n') sb_append_ch(&out, '\n');
828
829 /* After including, if we know the parent file path, emit a marker to
830 * resume mapping to the including (parent) file for the subsequent text. */
831 if (current_path && current_path[0]) {
832 sb_append(&out, "// __include_begin__: ");
833 sb_append(&out, current_path);
834 /* compute resume line number in parent: next line after include directive */
835 char lb2[32];
836 /* crude estimate: resume at next physical line */
837 snprintf(lb2, sizeof(lb2), " @line %d", 0); /* will be patched below */
838 /* We need actual parent line number. Compute by scanning from start to 'i' */
839 int parent_line = 1 + (shebang_end ? shebang_lines : 0);
840 for (size_t tt = shebang_end; tt < k; ++tt) if (src[tt] == '\n') parent_line++;
841 snprintf(lb2, sizeof(lb2), " @line %d", parent_line);
842 sb_append(&out, lb2);
843 sb_append(&out, "\n");
844 }
845
846 free(exp);
847 }
848 }
849
850 /* move input pointer to start of next line */
851 i = k;
852 bol = 1;
853 continue;
854 }
855 }
856 }
857 }
858 }
859
860 /* normal stateful copy with comment/string tracking */
861 if (in_line) {
862 sb_append_ch(&out, c);
863 if (c == '\n') {
864 in_line = 0;
865 bol = 1;
866 } else {
867 bol = 0;
868 }
869 i++;
870 continue;
871 }
872 if (in_block) {
873 sb_append_ch(&out, c);
874 if (c == '*' && (i + 1) < len && src[i + 1] == '/') {
875 sb_append_ch(&out, '/');
876 i += 2;
877 bol = 0;
878 in_block = 0;
879 continue;
880 }
881 bol = (c == '\n') ? 1 : 0;
882 i++;
883 continue;
884 }
885 if (in_sq) {
886 sb_append_ch(&out, c);
887 if (!esc && c == '\\') {
888 esc = 1;
889 i++;
890 bol = 0;
891 continue;
892 }
893 if (!esc && c == '\'') {
894 in_sq = 0;
895 }
896 esc = 0;
897 bol = (c == '\n') ? 1 : 0;
898 i++;
899 continue;
900 }
901 if (in_dq) {
902 sb_append_ch(&out, c);
903 if (!esc && c == '\\') {
904 esc = 1;
905 i++;
906 bol = 0;
907 continue;
908 }
909 if (!esc && c == '"') {
910 in_dq = 0;
911 }
912 esc = 0;
913 bol = (c == '\n') ? 1 : 0;
914 i++;
915 continue;
916 }
917
918 /* outside any special state */
919 if (c == '/' && (i + 1) < len && src[i + 1] == '/') {
920 sb_append_ch(&out, '/');
921 sb_append_ch(&out, '/');
922 i += 2;
923 in_line = 1;
924 bol = 0;
925 continue;
926 }
927 if (c == '/' && (i + 1) < len && src[i + 1] == '*') {
928 sb_append_ch(&out, '/');
929 sb_append_ch(&out, '*');
930 i += 2;
931 in_block = 1;
932 bol = 0;
933 continue;
934 }
935 if (c == '\'') {
936 sb_append_ch(&out, c);
937 in_sq = 1;
938 bol = 0;
939 i++;
940 continue;
941 }
942 if (c == '"') {
943 sb_append_ch(&out, c);
944 in_dq = 1;
945 bol = 0;
946 i++;
947 continue;
948 }
949
950 sb_append_ch(&out, c);
951 bol = (c == '\n') ? 1 : 0;
952 i++;
953 }
954
955 if (!out.buf) return strdup("");
956 /* ensure NUL-terminated */
957 if (out.cap == out.len) sb_reserve(&out, out.len + 1);
958 if (out.buf) out.buf[out.len] = '\0';
959 return out.buf;
960}
961
965char *preprocess_includes(const char *src) {
966 return preprocess_includes_internal(src, NULL, 0);
967}
968
969/* Variant with known current file path to allow precise resume markers. */
973char *preprocess_includes_with_path(const char *src, const char *current_path) {
974 return preprocess_includes_internal(src, current_path, 0);
975}
976
977/*
978 * Public helper: map a line number in the include-expanded top-level source file
979 * back to the original included file path and inner line number, using the
980 * `// __include_begin__: <path>[ as <alias>]` markers injected by the
981 * preprocessor. Returns 1 on success and fills out_path/out_line; 0 otherwise.
982 */
996int map_expanded_line_to_include_path(const char *path, int line,
997 char *out_path, size_t out_path_cap,
998 int *out_line) {
999 if (!path || line <= 0 || !out_path || out_path_cap == 0 || !out_line) return 0;
1000 out_path[0] = '\0';
1001 *out_line = line;
1002
1003 /* read original top-level file */
1004 size_t fsz = 0;
1005 char *orig = read_file_all(path, &fsz);
1006 if (!orig) return 0;
1007
1008 char *prep = preprocess_includes_internal(orig, path, 0);
1009 free(orig);
1010 if (!prep) return 0;
1011
1012 /* find start offset of requested 1-based line */
1013 size_t len = strlen(prep);
1014 size_t pos = 0;
1015 int cur = 1;
1016 while (pos < len && cur < line) {
1017 if (prep[pos] == '\n') cur++;
1018 pos++;
1019 }
1020 if (cur != line) { free(prep); return 0; }
1021
1022 /* scan backward to find the nearest include/resume marker line */
1023 const char *marker = "// __include_begin__: ";
1024 size_t mlen = strlen(marker);
1025 size_t scan = pos;
1026 while (scan > 0) {
1027 /* find start of current line */
1028 size_t ls = scan;
1029 while (ls > 0 && prep[ls - 1] != '\n') ls--;
1030 /* check if this line starts with marker */
1031 if (ls + mlen <= len && strncmp(prep + ls, marker, mlen) == 0) {
1032 /* Parse marker line: path [as alias] [@line N] */
1033 size_t p = ls + mlen;
1034 size_t eol = p;
1035 while (eol < len && prep[eol] != '\n') eol++;
1036 /* find separators */
1037 size_t pos_as = eol, pos_line = eol;
1038 for (size_t t = p; t + 3 < eol; ++t) {
1039 if (prep[t] == ' ' && strncmp(prep + t, " as ", 4) == 0) { pos_as = t; break; }
1040 }
1041 for (size_t t = p; t + 6 < eol; ++t) {
1042 if (prep[t] == ' ' && strncmp(prep + t, " @line ", 7) == 0) { pos_line = t; break; }
1043 }
1044 size_t path_end = pos_as < pos_line ? pos_as : pos_line;
1045 if (path_end < p) path_end = eol;
1046 size_t copy = (path_end - p) < (out_path_cap - 1) ? (path_end - p) : (out_path_cap - 1);
1047 memcpy(out_path, prep + p, copy);
1048 out_path[copy] = '\0';
1049
1050 /* parse optional base line number */
1051 int base_line = 1;
1052 if (pos_line < eol) {
1053 size_t num_start = pos_line + 7; /* after ' @line ' */
1054 int v = 0;
1055 while (num_start < eol && prep[num_start] == ' ') num_start++;
1056 while (num_start < eol && prep[num_start] >= '0' && prep[num_start] <= '9') {
1057 v = v * 10 + (prep[num_start] - '0');
1058 num_start++;
1059 }
1060 if (v > 0) base_line = v;
1061 }
1062
1063 /* determine the span: from the end of this marker line to the start of the next marker line */
1064 size_t q = eol;
1065 if (q < len && prep[q] == '\n') q++;
1066 size_t span_start = q;
1067 size_t span_end = len;
1068 size_t fwd = q;
1069 while (fwd < len) {
1070 /* find start of next line */
1071 size_t ls2 = fwd;
1072 while (ls2 > 0 && prep[ls2 - 1] != '\n') ls2--;
1073 if (ls2 + mlen <= len && strncmp(prep + ls2, marker, mlen) == 0) {
1074 span_end = ls2; /* region ends right before the next marker */
1075 break;
1076 }
1077 while (fwd < len && prep[fwd] != '\n') fwd++;
1078 if (fwd < len && prep[fwd] == '\n') fwd++;
1079 }
1080
1081 /* If the requested position is not within [span_start, span_end), this
1082 * begin marker does not apply; continue scanning backward. */
1083 if (!(pos >= span_start && pos < span_end)) {
1084 /* not inside this include span */
1085 goto next_scan_back;
1086 }
1087
1088 /* compute inner line as number of newlines from span_start to current pos */
1089 int inner = 1;
1090 size_t cnt = span_start;
1091 while (cnt < pos) { if (prep[cnt] == '\n') inner++; cnt++; }
1092 *out_line = base_line + inner - 1;
1093 free(prep);
1094 return 1;
1095 }
1096 if (ls == 0) break;
1097next_scan_back:
1098 scan = (ls > 0) ? (ls - 1) : 0;
1099 }
1100
1101 free(prep);
1102 return 0;
1103}
1104
1105/* Float literal parser: supports decimal and scientific notation. Returns parsed double and advances pos on success. */
1111static double parse_float_literal_value(const char *src, size_t len, size_t *pos, int *ok) {
1112 size_t p = *pos;
1113 skip_spaces(src, len, &p);
1114 size_t start = p;
1115 int saw_digit = 0;
1116 int saw_dot = 0;
1117 int saw_exp = 0;
1118
1119 /* optional sign */
1120 if (p < len && (src[p] == '+' || src[p] == '-')) p++;
1121
1122 /* integer part */
1123 while (p < len && isdigit((unsigned char)src[p])) {
1124 p++;
1125 saw_digit = 1;
1126 }
1127
1128 /* fractional part */
1129 if (p < len && src[p] == '.') {
1130 saw_dot = 1;
1131 p++;
1132 while (p < len && isdigit((unsigned char)src[p])) {
1133 p++;
1134 saw_digit = 1;
1135 }
1136 }
1137
1138 /* exponent part */
1139 if (p < len && (src[p] == 'e' || src[p] == 'E')) {
1140 saw_exp = 1;
1141 size_t epos = p + 1;
1142 if (epos < len && (src[epos] == '+' || src[epos] == '-')) epos++;
1143 size_t digits_start = epos;
1144 while (epos < len && isdigit((unsigned char)src[epos])) {
1145 epos++;
1146 }
1147 if (epos == digits_start) {
1148 /* no digits after exponent -> not a float */
1149 *ok = 0;
1150 return 0.0;
1151 }
1152 p = epos;
1153 }
1154
1155 if (!saw_digit || (!saw_dot && !saw_exp)) {
1156 *ok = 0;
1157 return 0.0;
1158 }
1159
1160 /* Create temporary buffer to parse with strtod safely */
1161 size_t n = p - start;
1162 char *tmp = (char *)malloc(n + 1);
1163 if (!tmp) {
1164 *ok = 0;
1165 return 0.0;
1166 }
1167 memcpy(tmp, src + start, n);
1168 tmp[n] = '\0';
1169
1170 char *endp = NULL;
1171 double dv = strtod(tmp, &endp);
1172 if (!endp || *endp != '\0') {
1173 free(tmp);
1174 *ok = 0;
1175 return 0.0;
1176 }
1177
1178 *pos = p;
1179 *ok = 1;
1180 free(tmp);
1181 return dv;
1182}
Value out
Definition apop.c:38
Value v
Definition cast.c:22
int k
Definition cast.c:29
int ok
Definition contains.c:38
const char * name
Definition env.c:29
size_t len
Definition input_line.c:102
size_t cap
Definition input_line.c:101
char * buf
Definition input_line.c:103
int n
Definition insert.c:41
Value c
Definition load_const.c:31
Value val
Definition load_local.c:36
free(vals)
Public API for parsing Fun source into bytecode.
#define DEFAULT_LIB_DIR
int map_expanded_line_to_include_path(const char *path, int line, char *out_path, size_t out_path_cap, int *out_line)
Map a line number in expanded source back to original include path/line.
char * preprocess_includes_with_path(const char *src, const char *current_path)
Preprocess includes with a known file path to improve span markers.
char * preprocess_includes(const char *src)
Public wrapper to preprocess includes without a current path.
int64_t exp
Definition pow.c:39
rewind(f)
fclose(f)
FILE * f
Definition read_file.c:38
const char * p
Definition read_file.c:37
long sz
Definition read_file.c:50
uint32_t s
Definition rol.c:31
long t
Definition sleep_ms.c:32
Value start
Definition slice.c:34
List of exported symbol names discovered at top level.
char ** names
Simple growable string buffer.
char * buf
size_t len
size_t cap
#define fprintf
Definition vm.c:200
Value path
Definition write_file.c:33