33static char *read_file_all(
const char *
path,
size_t *out_len) {
34 FILE *
f = fopen(
path,
"rb");
36 if (fseek(
f, 0, SEEK_END) != 0) {
46 char *
buf = (
char *)malloc((
size_t)
sz + 1);
51 size_t n = fread(
buf, 1, (
size_t)
sz,
f);
54 if (out_len) *out_len =
n;
64static void skip_ws(
const char *src,
size_t len,
size_t *pos) {
67 if (
c ==
' ' ||
c ==
'\t' ||
c ==
'\r' ||
c ==
'\n') {
78static void skip_line(
const char *src,
size_t len,
size_t *pos) {
79 while (*pos <
len && src[*pos] !=
'\n')
81 if (*pos <
len && src[*pos] ==
'\n') (*pos)++;
88static void skip_comments(
const char *src,
size_t len,
size_t *pos) {
90 skip_ws(src,
len, pos);
91 if (*pos + 1 <
len && src[*pos] ==
'/' && src[*pos + 1] ==
'/') {
93 skip_line(src,
len, pos);
96 if (*pos + 1 <
len && src[*pos] ==
'/' && src[*pos + 1] ==
'*') {
98 while (*pos + 1 <
len && !(src[*pos] ==
'*' && src[*pos + 1] ==
'/'))
100 if (*pos + 1 <
len) *pos += 2;
111static int starts_with(
const char *src,
size_t len,
size_t pos,
const char *kw) {
112 size_t klen = strlen(kw);
113 if (pos + klen >
len)
return 0;
114 return strncmp(src + pos, kw, klen) == 0;
120static void skip_shebang_if_present(
const char *src,
size_t len,
size_t *pos) {
121 if (*pos == 0 && starts_with(src,
len, *pos,
"#!")) {
122 skip_line(src,
len, pos);
130static void skip_identifier(
const char *src,
size_t len,
size_t *pos) {
132 if (
p <
len && (isalpha((
unsigned char)src[
p]) || src[
p] ==
'_')) {
134 while (
p <
len && (isalnum((
unsigned char)src[
p]) || src[
p] ==
'_'))
144static int consume_char(
const char *src,
size_t len,
size_t *pos,
char expected) {
145 skip_ws(src,
len, pos);
146 if (*pos <
len && src[*pos] == expected) {
160static char *parse_string_literal_any_quote(
const char *src,
size_t len,
size_t *pos) {
161 skip_ws(src,
len, pos);
162 if (*pos >=
len)
return NULL;
163 char quote = src[*pos];
164 if (quote !=
'"' && quote !=
'\'')
return NULL;
166 size_t cap = 64, out_len = 0;
167 char *
out = (
char *)malloc(
cap);
168 if (!
out)
return NULL;
177 if (*pos >=
len)
break;
203 if (out_len + 1 >=
cap) {
205 char *tmp = (
char *)realloc(
out,
cap);
215 if (out_len + 1 >=
cap) {
216 char *tmp = (
char *)realloc(
out,
cap + 1);
232static void skip_spaces(
const char *src,
size_t len,
size_t *pos) {
235 if (
c ==
' ' ||
c ==
'\t' ||
c ==
'\r') {
248static int read_identifier_into(
const char *src,
size_t len,
size_t *pos,
char **out_name) {
250 if (
p <
len && (isalpha((
unsigned char)src[
p]) || src[
p] ==
'_')) {
253 while (
p <
len && (isalnum((
unsigned char)src[
p]) || src[
p] ==
'_'))
256 char *
name = (
char *)malloc(
n + 1);
272static uint64_t parse_int_literal_value(
const char *src,
size_t len,
size_t *pos,
int *
ok) {
274 skip_spaces(src,
len, &
p);
276 if (
p <
len && (src[
p] ==
'+' || src[
p] ==
'-')) {
277 if (src[
p] ==
'-') sign = -1;
286 if ((
p + 1) <
len && src[
p] ==
'0' && (src[
p + 1] ==
'x' || src[
p + 1] ==
'X')) {
288 if (
p >=
len || !isxdigit((
unsigned char)src[
p])) {
293 while (
p <
len && isxdigit((
unsigned char)src[
p])) {
295 int d = (
c >=
'0' &&
c <=
'9') ? (
c -
'0')
296 : (
c >=
'a' &&
c <=
'f') ? (
c -
'a' + 10)
297 : (
c >=
'A' &&
c <=
'F') ? (
c -
'A' + 10)
299 val = (
val << 4) + (uint64_t)d;
304 return (uint64_t)((int64_t)sign * (int64_t)
val);
308 if (!isdigit((
unsigned char)src[
p])) {
313 while (
p <
len && isdigit((
unsigned char)src[
p])) {
314 val =
val * 10 + (uint64_t)(src[
p] -
'0');
319 return (uint64_t)((int64_t)sign * (int64_t)
val);
333static void *xrealloc(
void *ptr,
size_t newcap) {
334 void *np = realloc(ptr, newcap);
350static void sb_init(
StrBuf *sb) {
351 sb->
buf = (
char *)malloc(256);
352 sb->
cap = sb->
buf ? 256 : 0;
354 if (sb->
buf) sb->
buf[0] =
'\0';
360static void sb_reserve(
StrBuf *sb,
size_t need) {
361 if (need <= sb->
cap)
return;
362 size_t nc = sb->
cap ? sb->
cap : 256;
365 char *nb = (
char *)xrealloc(sb->
buf, nc);
374static void sb_append_n(
StrBuf *sb,
const char *
s,
size_t n) {
376 sb_reserve(sb, sb->
len +
n + 1);
377 if (!sb->
buf)
return;
386static void sb_append(
StrBuf *sb,
const char *
s) {
387 sb_append_n(sb,
s, strlen(
s));
393static void sb_append_ch(
StrBuf *sb,
char c) {
394 sb_reserve(sb, sb->
len + 2);
395 if (!sb->
buf)
return;
425 int ncap = nl->
cap ? nl->
cap * 2 : 8;
426 char **nn = (
char **)realloc(nl->
names, (
size_t)ncap *
sizeof(
char *));
439 for (
int i = 0; i < nl->
count; ++i)
452static void collect_exports_top_level(
const char *text,
NameList *
out) {
453 if (!text || !
out)
return;
454 size_t len = strlen(text);
455 int in_line = 0, in_block = 0, in_sq = 0, in_dq = 0, esc = 0;
457 for (
size_t i = 0; i <
len;) {
471 if (
c ==
'*' && (i + 1) <
len && text[i + 1] ==
'/') {
482 if (!esc &&
c ==
'\\') {
488 if (!esc &&
c ==
'\'') {
497 if (!esc &&
c ==
'\\') {
503 if (!esc &&
c ==
'"') {
512 if (
c ==
'/' && (i + 1) <
len && text[i + 1] ==
'/') {
518 if (
c ==
'/' && (i + 1) <
len && text[i + 1] ==
'*') {
541 while (j <
len && text[j] ==
' ') {
545 if (j <
len && text[j] ==
'\t') {
554 const char *kw1 =
"fun";
555 const char *kw2 =
"class";
556 if (j + 3 <=
len && strncmp(text + j, kw1, 3) == 0 && (j + 3 ==
len || isspace((
unsigned char)text[j + 3]))) {
558 while (
p <
len && (text[
p] ==
' ' || text[
p] ==
'\t'))
562 if (
p <
len && (isalpha((
unsigned char)text[
p]) || text[
p] ==
'_')) {
564 while (
p <
len && (isalnum((
unsigned char)text[
p]) || text[
p] ==
'_'))
569 size_t copy = (
n <
sizeof(tmp) - 1) ?
n : (sizeof(tmp) - 1);
570 memcpy(tmp, text +
start, copy);
575 }
else if (j + 5 <=
len && strncmp(text + j, kw2, 5) == 0 && (j + 5 ==
len || isspace((
unsigned char)text[j + 5]))) {
577 while (
p <
len && (text[
p] ==
' ' || text[
p] ==
'\t'))
581 if (
p <
len && (isalpha((
unsigned char)text[
p]) || text[
p] ==
'_')) {
583 while (
p <
len && (isalnum((
unsigned char)text[
p]) || text[
p] ==
'_'))
588 size_t copy = (
n <
sizeof(tmp) - 1) ?
n : (sizeof(tmp) - 1);
589 memcpy(tmp, text +
start, copy);
618static char *preprocess_includes_internal(
const char *src,
const char *current_path,
int depth) {
619 if (!src)
return NULL;
621 fprintf(stderr,
"Include error: include nesting too deep\n");
626#ifndef DEFAULT_LIB_DIR 627#define DEFAULT_LIB_DIR "/usr/share/fun/lib/" 630 const char *env_lib = getenv(
"FUN_LIB_DIR");
631 size_t len = strlen(src);
636 size_t shebang_end = 0;
637 int shebang_lines = 0;
638 if (src[0] ==
'#' && src[1] ==
'!') {
641 while (src[j] && src[j] !=
'\n' && src[j] !=
'\r') j++;
643 if (src[j] ==
'\r') {
645 if (src[j] ==
'\n') j++;
646 }
else if (src[j] ==
'\n') {
650 for (
size_t t = 0;
t < j; ++
t)
if (src[
t] ==
'\n') shebang_lines++;
651 if (shebang_lines == 0) shebang_lines = 1;
653 sb_append_n(&
out, src, shebang_end);
658 if (current_path && current_path[0]) {
659 sb_append(&
out,
"// __include_begin__: ");
660 sb_append(&
out, current_path);
663 int base_line = 1 + (shebang_end ? shebang_lines : 0);
664 snprintf(lb,
sizeof(lb),
" @line %d", base_line);
666 sb_append(&
out,
"\n");
668 int in_line = 0, in_block = 0, in_sq = 0, in_dq = 0, esc = 0;
671 for (
size_t i = shebang_end; i <
len;) {
675 if (bol && !in_block && !in_sq && !in_dq) {
678 while (j <
len && (src[j] ==
' ' || src[j] ==
'\t'))
681 if (
k <
len && src[
k] ==
'#')
k++;
682 const char *kw =
"include";
684 if (
k + kwlen <=
len && strncmp(src +
k, kw, kwlen) == 0) {
687 while (
k <
len && (src[
k] ==
' ' || src[
k] ==
'\t'))
689 if (
k <
len && (src[
k] ==
'"' || src[
k] ==
'<')) {
690 char opener = src[
k];
691 char closer = (opener ==
'"') ?
'"' :
'>';
693 size_t path_start =
k;
694 while (
k <
len && src[
k] != closer)
696 if (
k <
len && src[
k] == closer) {
697 size_t path_len =
k - path_start;
698 char *
path = (
char *)malloc(path_len + 1);
700 memcpy(
path, src + path_start, path_len);
701 path[path_len] =
'\0';
709 while (ap <
len && (src[ap] ==
' ' || src[ap] ==
'\t'))
712 const char *askw =
"as";
713 if (ap + 2 <=
len && strncmp(src + ap, askw, 2) == 0 && (ap + 2 ==
len || isspace((
unsigned char)src[ap + 2]))) {
715 while (ap <
len && (src[ap] ==
' ' || src[ap] ==
'\t'))
719 if (ap <
len && (isalpha((
unsigned char)src[ap]) || src[ap] ==
'_')) {
721 while (ap <
len && (isalnum((
unsigned char)src[ap]) || src[ap] ==
'_'))
724 size_t copy = (
n <
sizeof(ns) - 1) ?
n : (sizeof(ns) - 1);
725 memcpy(ns, src +
start, copy);
732 while (
k <
len && src[
k] !=
'\n')
734 if (
k <
len && src[
k] ==
'\n')
k++;
738 char resolved2[1024];
753 if (env_lib && env_lib[0]) {
754 size_t elen = strlen(env_lib);
755 char last = env_lib[elen ? (elen - 1) : 0];
756 int needs_sep = !(last ==
'/' || last ==
'\\');
757 char sep = (last ==
'\\') ?
'\\' :
'/';
759 snprintf(resolved,
sizeof(resolved),
"%s%c%s", env_lib, sep,
path);
761 snprintf(resolved,
sizeof(resolved),
"%s%s", env_lib,
path);
762 inc = read_file_all(resolved, &inc_len);
768 inc = read_file_all(resolved, &inc_len);
773 snprintf(resolved,
sizeof(resolved),
"lib/%s",
path);
774 inc = read_file_all(resolved, &inc_len);
778 snprintf(resolved,
sizeof(resolved),
"%s",
path);
779 inc = read_file_all(resolved, &inc_len);
784 fprintf(stderr,
"Include error: cannot read '%s'\n", resolved[0] ? resolved :
"(unresolved)");
785 sb_append(&
out,
"// include error: cannot read ");
786 sb_append(&
out, resolved[0] ? resolved :
"(unresolved)");
787 sb_append(&
out,
"\n");
790 const char *startp = inc;
792 if ((
unsigned char)inc[0] == 0xEF && (
unsigned char)inc[1] == 0xBB && (
unsigned char)inc[2] == 0xBF) {
796 if (startp[0] ==
'#' && startp[1] ==
'!') {
798 const char *q = startp;
799 while (*q && *q !=
'\n' && *q !=
'\r')
804 }
else if (*q ==
'\n') {
809 char *inc_clean = strdup(startp);
810 char *
exp = preprocess_includes_internal(inc_clean, resolved, depth + 1);
815 sb_append(&
out,
"// __include_begin__: ");
816 sb_append(&
out, resolved);
818 sb_append(&
out,
" as ");
821 sb_append(&
out,
" @line 1");
822 sb_append(&
out,
"\n");
827 if (
out.len == 0 ||
out.buf[
out.len - 1] !=
'\n') sb_append_ch(&
out,
'\n');
831 if (current_path && current_path[0]) {
832 sb_append(&
out,
"// __include_begin__: ");
833 sb_append(&
out, current_path);
837 snprintf(lb2,
sizeof(lb2),
" @line %d", 0);
839 int parent_line = 1 + (shebang_end ? shebang_lines : 0);
840 for (
size_t tt = shebang_end; tt <
k; ++tt)
if (src[tt] ==
'\n') parent_line++;
841 snprintf(lb2,
sizeof(lb2),
" @line %d", parent_line);
842 sb_append(&
out, lb2);
843 sb_append(&
out,
"\n");
862 sb_append_ch(&
out,
c);
873 sb_append_ch(&
out,
c);
874 if (
c ==
'*' && (i + 1) <
len && src[i + 1] ==
'/') {
875 sb_append_ch(&
out,
'/');
881 bol = (
c ==
'\n') ? 1 : 0;
886 sb_append_ch(&
out,
c);
887 if (!esc &&
c ==
'\\') {
893 if (!esc &&
c ==
'\'') {
897 bol = (
c ==
'\n') ? 1 : 0;
902 sb_append_ch(&
out,
c);
903 if (!esc &&
c ==
'\\') {
909 if (!esc &&
c ==
'"') {
913 bol = (
c ==
'\n') ? 1 : 0;
919 if (
c ==
'/' && (i + 1) <
len && src[i + 1] ==
'/') {
920 sb_append_ch(&
out,
'/');
921 sb_append_ch(&
out,
'/');
927 if (
c ==
'/' && (i + 1) <
len && src[i + 1] ==
'*') {
928 sb_append_ch(&
out,
'/');
929 sb_append_ch(&
out,
'*');
936 sb_append_ch(&
out,
c);
943 sb_append_ch(&
out,
c);
950 sb_append_ch(&
out,
c);
951 bol = (
c ==
'\n') ? 1 : 0;
955 if (!
out.buf)
return strdup(
"");
966 return preprocess_includes_internal(src, NULL, 0);
974 return preprocess_includes_internal(src, current_path, 0);
997 char *out_path,
size_t out_path_cap,
999 if (!
path || line <= 0 || !out_path || out_path_cap == 0 || !out_line)
return 0;
1005 char *orig = read_file_all(
path, &fsz);
1006 if (!orig)
return 0;
1008 char *prep = preprocess_includes_internal(orig,
path, 0);
1010 if (!prep)
return 0;
1013 size_t len = strlen(prep);
1016 while (pos <
len && cur < line) {
1017 if (prep[pos] ==
'\n') cur++;
1020 if (cur != line) {
free(prep);
return 0; }
1023 const char *marker =
"// __include_begin__: ";
1024 size_t mlen = strlen(marker);
1029 while (ls > 0 && prep[ls - 1] !=
'\n') ls--;
1031 if (ls + mlen <=
len && strncmp(prep + ls, marker, mlen) == 0) {
1033 size_t p = ls + mlen;
1035 while (eol <
len && prep[eol] !=
'\n') eol++;
1037 size_t pos_as = eol, pos_line = eol;
1038 for (
size_t t =
p;
t + 3 < eol; ++
t) {
1039 if (prep[
t] ==
' ' && strncmp(prep +
t,
" as ", 4) == 0) { pos_as =
t;
break; }
1041 for (
size_t t =
p;
t + 6 < eol; ++
t) {
1042 if (prep[
t] ==
' ' && strncmp(prep +
t,
" @line ", 7) == 0) { pos_line =
t;
break; }
1044 size_t path_end = pos_as < pos_line ? pos_as : pos_line;
1045 if (path_end <
p) path_end = eol;
1046 size_t copy = (path_end -
p) < (out_path_cap - 1) ? (path_end -
p) : (out_path_cap - 1);
1047 memcpy(out_path, prep +
p, copy);
1048 out_path[copy] =
'\0';
1052 if (pos_line < eol) {
1053 size_t num_start = pos_line + 7;
1055 while (num_start < eol && prep[num_start] ==
' ') num_start++;
1056 while (num_start < eol && prep[num_start] >=
'0' && prep[num_start] <=
'9') {
1057 v =
v * 10 + (prep[num_start] -
'0');
1060 if (
v > 0) base_line =
v;
1065 if (q <
len && prep[q] ==
'\n') q++;
1066 size_t span_start = q;
1067 size_t span_end =
len;
1072 while (ls2 > 0 && prep[ls2 - 1] !=
'\n') ls2--;
1073 if (ls2 + mlen <=
len && strncmp(prep + ls2, marker, mlen) == 0) {
1077 while (fwd <
len && prep[fwd] !=
'\n') fwd++;
1078 if (fwd <
len && prep[fwd] ==
'\n') fwd++;
1083 if (!(pos >= span_start && pos < span_end)) {
1085 goto next_scan_back;
1090 size_t cnt = span_start;
1091 while (cnt < pos) {
if (prep[cnt] ==
'\n') inner++; cnt++; }
1092 *out_line = base_line + inner - 1;
1098 scan = (ls > 0) ? (ls - 1) : 0;
1111static double parse_float_literal_value(
const char *src,
size_t len,
size_t *pos,
int *
ok) {
1113 skip_spaces(src,
len, &
p);
1120 if (
p <
len && (src[
p] ==
'+' || src[
p] ==
'-'))
p++;
1123 while (
p <
len && isdigit((
unsigned char)src[
p])) {
1129 if (
p <
len && src[
p] ==
'.') {
1132 while (
p <
len && isdigit((
unsigned char)src[
p])) {
1139 if (
p <
len && (src[
p] ==
'e' || src[
p] ==
'E')) {
1141 size_t epos =
p + 1;
1142 if (epos <
len && (src[epos] ==
'+' || src[epos] ==
'-')) epos++;
1143 size_t digits_start = epos;
1144 while (epos <
len && isdigit((
unsigned char)src[epos])) {
1147 if (epos == digits_start) {
1155 if (!saw_digit || (!saw_dot && !saw_exp)) {
1162 char *tmp = (
char *)malloc(
n + 1);
1167 memcpy(tmp, src +
start,
n);
1171 double dv = strtod(tmp, &endp);
1172 if (!endp || *endp !=
'\0') {
Public API for parsing Fun source into bytecode.
int map_expanded_line_to_include_path(const char *path, int line, char *out_path, size_t out_path_cap, int *out_line)
Map a line number in expanded source back to original include path/line.
char * preprocess_includes_with_path(const char *src, const char *current_path)
Preprocess includes with a known file path to improve span markers.
char * preprocess_includes(const char *src)
Public wrapper to preprocess includes without a current path.
List of exported symbol names discovered at top level.
Simple growable string buffer.