-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathpnut.c
More file actions
5021 lines (4246 loc) · 142 KB
/
pnut.c
File metadata and controls
5021 lines (4246 loc) · 142 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#ifdef ANNOTATE_WITH_C_CODE
#include "doc/c_to_shell_translation.c"
#include "doc/architecture_of_pnut.c"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h> // for intptr_t
#include <fcntl.h> // for open
#include <unistd.h> // for write
#include <unistd.h> // for isatty
#if 0 // Removed from C annotations
// =========================== configuration options ===========================
//
// Pnut has many compilation options to change the features supported by
// pnut-sh, pnut-awk and pnut-exe, and the code generation.
// The reason for so many options is that pnut is really 2 different compilers
// (pnut-sh/pnut-awk and pnut-exe) sharing the same compiler frontend, with each
// compiler having to accomplish 2 opposite goals:
//
// 1. Be as small and simple as possible to bootstrap a C-like language from a
// POSIX shell implementation. Importantly, this applies to both the
// compiled shell scripts and to the compiler code itself.
//
// 2. Compile real world programs, such as TCC, requiring a large subset of C99
// language.
// Non-bootstrap users of pnut also care about user experience, such as
// informative error messages with line numbers, a nice CLI, a fully
// featured built-in libc and support for more advanced C features.
//
// We resolve this tension by having a shared codebase and preprocessor options
// to enable or disable features as needed. Fortunately, pnut's reader and
// tokenizer are fast compared to the rest of the compilation, so having a
// single codebase with large blocks of inactive code doesn't impact performance
// all that much.
//
// However, the preprocessor directives can make the code slightly harder to
// read and maintain. To make reviewing the code easier, we recommend reviewing
// the annotated pnut-sh.sh and pnut-exe.sh script generated by pnut-sh (with
// the ANNOTATE_WITH_C_CODE option enabled).
//
// Since most options are orthogonal, the number of possible combinations is
// very large. To avoid combinatorial explosion of testing and maintenance,
// we define a few "profiles" that cover the common use cases of pnut:
//
// - PNUT_BOOTSTRAP:
// Profile used to generate pnut-sh.sh/pnut-awk.awk and to bootstrap pnut-exe
// from a POSIX shell/AWK. This profile minimizes the code size and
// complexity of the compilers as much as possible, supporting only the
// features strictly needed to bootstrap pnut.
//
// - (Default):
// General purpose profile used to build pnut-exe for real world usage. This
// profile strikes a balance between features, usability and code size.
//
// - BOOTSTRAP_TCC:
// Profile used to bootstrap TCC from pnut-exe. This profile enables partial
// support for a few more features needed to compile TCC. These features are
// not implemented correctly, and so are not enabled in the general purpose
// profile.
//
// Additionally, if compilation time and code size are not a concern, or to help
// debugging, the NICE_UX and SAFE_MODE options can be enabled to turn on extra
// runtime checks and better error messages.
//
// =============================================================================
#endif
// Pnut-sh specific options
#ifdef target_sh
// Toggles parsing literals with their base (octal, decimal or hexadecimal).
// This is used by the shell code generator to output the literal in the correct base.
#define PARSE_NUMERIC_LITERAL_WITH_BASE
#ifdef PNUT_BOOTSTRAP
#define ALLOW_RECURSIVE_MACROS
#define MINIMAL_RUNTIME
#define SH_INCLUDE_ALL_ALPHANUM_CHARACTERS
// Remove support for complex printf specifiers (flags, width, precision).
// This results in smaller code for the compiler.
#define SH_MINIMAL_PRINTF
#else
// Enable all C features for general pnut usage
#define SUPPORT_ALL_C_FEATURES
// Pnut-sh specific features
#define SH_SUPPORT_SHELL_INCLUDE
#define SH_SUPPORT_ADDRESS_OF
#define SH_OPTIMIZE_LONG_LINES
#endif
// Shell code generation options
#ifndef SH_INLINE_PRINTF_NOT
// Inline printf calls with literal string for smaller and faster code
#define SH_INLINE_PRINTF
#endif
#ifndef SH_INLINE_PUTCHAR_NOT
// Inline putchar calls for smaller and faster code
#define SH_INLINE_PUTCHAR
#endif
// Inline exit calls for smaller code
#define SH_INLINE_EXIT
#ifndef SH_SAVE_VARS_WITH_SET
// Pick calling convention implementation. By default, use let/endlet.
// When SH_SAVE_VARS_WITH_SET is set, local variables are saved using "set"
// builtin, which results in faster code but makes the calling convention
// implementation much harder to read.
#define SH_INITIALIZE_PARAMS_WITH_LET
#endif
#ifndef SH_OPTIMIZE_CONSTANT_PARAMS_NOT
// Use positional parameter for const function parameters.
// Results in smaller and faster code (as it completely removes the cost of
// constant function parameters), but can make it harder to track variable
// usage in the generated code.
// This optimization is preferable to SH_SAVE_VARS_WITH_SET.
#define SH_OPTIMIZE_CONSTANT_PARAMS
#endif
// Built-in runtime library options
#ifndef RT_FREE_UNSETS_VARS_NOT
// Make free unset all variables associated with the freed object
#define RT_FREE_UNSETS_VARS
#endif
#ifndef RT_NO_INIT_GLOBALS_NOT
// Skips the zero-initialization of memory allocated values.
// Prevents the shell environment from being polluted with a large number of
// variables when allocating large data structures.
#define RT_NO_INIT_GLOBALS
#endif
#ifndef RT_USE_LOOKUP_TABLE_NOT
// Use a lookup table for char->int for a faster conversion
#define RT_USE_LOOKUP_TABLE
#endif
#ifdef SH_OPTIMIZE_LONG_LINES_NOT
// Disable long line optimization
#undef SH_OPTIMIZE_LONG_LINES
#endif
// Disabled options:
// Include the C code as comment along with the generated shell code
// #define ANNOTATE_WITH_C_CODE
// Replace character literal with character code instead of character variables.
// Results in smaller and faster code, but with magic numbers in the output.
// #define SH_INLINE_CHAR_LITERAL
// Remove the `set -u` directive in the generated shell code.
// This can be useful for programs that must allocate large amounts of
// zero-initialized memory, with incurring the overhead of actually
// initializing it to zero.
// This option is generally used together with RT_NO_INIT_GLOBALS, which skips
// the zero-initialization of memory. RT_UNSAFE_HEAP then allows access to
// this uninitialized memory without triggering unbound variable errors. This
// is made possible by the fact that uninitialized variables are always
// treated as zero in arithmetic expansions.
// #define RT_UNSAFE_HEAP
// Use a more compact but slower implementation of the runtime library.
// #define RT_COMPACT
// Include the complete runtime library in the generated shell code,
// regardless of whether features are used or not.
// #define INCLUDE_ALL_RUNTIME
// Only emit shell code output at the end of the compilation.
// This is used to evaluate the negative performance impact of allocating
// There is no practical reason to enable this option.
// #define ONE_PASS_GENERATOR_NO_EARLY_OUTPUT
// Profile memory usage in the generated shell code.
// After the `main` function returns, the generated shell code will print the
// number of shell variables used by the program.
// #define SH_PROFILE_MEMORY
#ifdef RT_COMPACT
// Make sure we don't use the long line optimization when RT_COMPACT is on
#undef SH_OPTIMIZE_LONG_LINES
#undef RT_USE_LOOKUP_TABLE
#endif
#elif defined(target_awk)
#ifdef PNUT_BOOTSTRAP
#define ALLOW_RECURSIVE_MACROS
#define MINIMAL_RUNTIME
#else
// Enable all C features for general pnut usage
#define SUPPORT_ALL_C_FEATURES
// Pnut-awk specific features
#define AWK_SUPPORT_ADDRESS_OF
#endif
// Shell code generation options
#ifndef AWK_INLINE_PRINTF_NOT
// Inline printf calls with literal string for smaller and faster code
#define AWK_INLINE_PRINTF
#endif
#ifndef AWK_INLINE_PUTCHAR_NOT
// Inline putchar calls for smaller and faster code
#define AWK_INLINE_PUTCHAR
#endif
// Inline exit calls for smaller code
#define AWK_INLINE_EXIT
// Disabled options:
// Include the C code as comment along with the generated shell code
// #define ANNOTATE_WITH_C_CODE
#elif defined(target_i386_linux) || defined (target_x86_64_linux) || defined (target_x86_64_mac)
// Parse numeric literals with their suffix (U, L, UL, etc).
#define PARSE_NUMERIC_LITERAL_SUFFIX
// Varargs functions are always supported because the built-in libc uses them.
#define SUPPORT_VARIADIC_FUNCTIONS
// Support 64 bit literals on 64 bit platforms
#if defined (target_x86_64_linux) || defined (target_x86_64_mac)
#define SUPPORT_64_BIT_LITERALS
#endif
#ifdef PNUT_BOOTSTRAP
#define ALLOW_RECURSIVE_MACROS
#else
// Enable all C features for general pnut usage
#define SUPPORT_ALL_C_FEATURES
#endif
#ifdef BOOTSTRAP_TCC
#define BOOTSTRAP_LONG
#define NO_BUILTIN_LIBC
#endif
// Disabled options:
// Output machine code as soon as possible, typically after each top level
// declaration, reducing memory usage significantly, but generating slightly
// worse code.
// #define ONE_PASS_GENERATOR
// Use brk syscall for memory allocation instead of mmap.
// #define USE_BRK_SYSCALL
// Add the PNUT_INLINE_INTERRUPT() special function that inserts an interrupt
// check at the call site, useful for debugging.
// #define ENABLE_PNUT_INLINE_INTERRUPT
// Poor man's debug info: adds the name of functions before their entry points.
// #define ADD_DEBUG_INFO
// Treats undefined labels as runtime errors instead of compile-time errors.
// This catches functions that are declared but not defined.
// #define UNDEFINED_LABELS_ARE_RUNTIME_ERRORS
// Use stack allocation for global variables instead of heap allocation.
// #define USE_STACK_FOR_GLOBALS
// Pnut-exe doesn't support generating annotated code, but we still want to be
// able to extract the original C source code from the generated pnut-exe.sh
#ifdef ANNOTATE_WITH_C_CODE
#define SUPPORT_EXTRACT_C_ANNOTATIONS
#undef ANNOTATE_WITH_C_CODE
#endif
#else
// Frontend-only variants of pnut (e.g. for running reader, tokenizer or parser)
// Options that turns Pnut into a C preprocessor or some variant of it
// DEBUG_GETCHAR: Read and print the input character by character.
// DEBUG_CPP: Run preprocessor like gcc -E. This can be useful for debugging the preprocessor.
// DEBUG_PARSER: Runs the tokenizer on the input. Outputs nothing.
// Enable all C features in the frontend
#define SUPPORT_ALL_C_FEATURES
#endif
#ifdef ANNOTATE_WITH_C_CODE
#define SUPPORT_EXTRACT_C_ANNOTATIONS
#endif
#ifdef SUPPORT_ALL_C_FEATURES
#define FULL_CLI_OPTIONS
#define FULL_PREPROCESSOR_SUPPORT
#define SUPPORT_EXTRACT_C_ANNOTATIONS
#define SUPPORT_COMPLEX_INITIALIZER
#define SUPPORT_DO_WHILE
#define SUPPORT_GOTO
#define SUPPORT_SIZEOF
#define SUPPORT_STRUCT_UNION
#define SUPPORT_TYPE_SPECIFIERS
#define SUPPORT_VARIADIC_FUNCTIONS
#endif
#if defined(NICE_UX) || defined(SAFE_MODE)
#define FULL_PREPROCESSOR_SUPPORT
#define INCLUDE_LINE_NUMBER_ON_ERROR
#endif
#if defined(NICE_UX)
#define NICE_ERR_MSG
#endif
// Disabled options:
// Support skip line continuations
// #define SUPPORT_LINE_CONTINUATION
// Support reading input from stdin
// #define SUPPORT_STDIN_INPUT
// Print memory usage statistics at the end of the program.
// #define PRINT_MEMORY_STATS
// ===================== Compatibility macros and typedefs =====================
#ifdef NO_TERNARY_SUPPORT
// M2-Planet doesn't support ternary operator.
// Ternary operator can be implemented using arithmetic operations.
// Note that unlike the standard ternary operator, both sides are always
// evaluated, and the condition expression is evaluated twice.
#define TERNARY(cond, if_true, if_false) (((cond) == 0) * (if_false) + ((cond) != 0) * (if_true))
#else
#define TERNARY(cond, if_true, if_false) ((cond) ? (if_true) : (if_false))
#endif
#ifdef PNUT_CC
// When bootstrapping pnut, intptr_t is not defined.
// On 64 bit platforms, intptr_t is a long long int.
// On 32 bit (including shells) platforms, intptr_t is an int.
#if defined(PNUT_EXE_64)
typedef long long int intptr_t;
#else
typedef int intptr_t;
#endif
typedef int FILE;
#define fdopen(n, m) n // fdopen is a no-op, file descriptors are used directly as *FILE
#ifdef PNUT_SH
// on pnut-sh, the file can only be opened in 3 modes: read, write and append
// if it doesn't exist, it will be created.
#define O_WRONLY 01
#define O_CREAT 00
#define O_TRUNC 00
#else
#define O_WRONLY 01
#define O_CREAT 0100
#define O_TRUNC 01000
#endif
#endif
// =============================================================================
#define ast int
#define true 1
#define false 0
#define EOF (-1)
typedef int bool;
// State for the reader
// - fp: current file pointer
// - fp_filepath: path of the current file being read, used for error messages.
// - fp_dirname: directory of the current file, used to resolve relative paths.
// - include_search_path: search path for system include files.
// - output_fd: the output file descriptor (1 = stdout), used by pnut-exe
// - line_number: line number of the current file, used for error messages.
// - column_number: column number of the current file, used for error messages.
// - last_tok_line_number: line number of the last token read, used for error messages.
// - last_tok_column_number: column number of the last token read, used for error messages.
// - include_stack: the stack to save the state of the reader when including a file.
FILE *fp = 0; // Current file pointer that's being read
char* fp_filepath = 0; // The path of the current file being read
char *fp_dirname = 0; // The directory of the current file being read
char* include_search_path = 0; // Search path for include files
int output_fd = 1; // Output file descriptor (1 = stdout)
#ifdef INCLUDE_LINE_NUMBER_ON_ERROR
int line_number = 1;
int column_number = 0;
int last_tok_line_number = 1;
int last_tok_column_number = 0;
#define INCLUDE_ENTRY_SIZE 5
#else
#define INCLUDE_ENTRY_SIZE 3
#endif
#define INCLUDE_STACK_DEPTH 10
intptr_t include_stack[INCLUDE_STACK_DEPTH * INCLUDE_ENTRY_SIZE];
int include_stack_top = 0; // Point to top of the stack, i.e. the next free entry
void putstr(char *str) {
while (*str) {
putchar(*str);
str += 1;
}
}
#ifdef PNUT_SH
#define putint(n) printf("%d", n)
#define puthex_unsigned(n) printf("%x", n)
#define putoct_unsigned(n) printf("%o", n)
#else
void putint_aux(int n) {
if (n <= -10) putint_aux(n / 10);
putchar('0' - (n % 10));
}
// Output signed integer
void putint(int n) {
if (n < 0) {
putchar('-');
putint_aux(n);
} else {
putint_aux(-n);
}
}
#ifdef target_sh
// Output unsigned integer in hex
void puthex_unsigned(int n) {
// Because n is signed, we clear the upper bits after shifting in case n was negative
if ((n >> 4) & 0x0fffffff) puthex_unsigned((n >> 4) & 0x0fffffff);
putchar("0123456789abcdef"[n & 15]);
}
// Output unsigned integer in octal
void putoct_unsigned(int n) {
// Because n is signed, we clear the upper bits after shifting in case n was negative
if ((n >> 3) & 0x1fffffff) putoct_unsigned((n >> 3) & 0x1fffffff);
putchar('0' + (n & 7));
}
#endif
#endif
#ifdef NICE_ERR_MSG
#if defined(MINIMAL_RUNTIME) || defined(NO_COLOR)
// No isatty support in minimal runtime
#define change_color(color)
#else
#define ANSI_RED "\x1b[31m"
#define ANSI_GREEN "\x1b[32m"
#define ANSI_YELLOW "\x1b[33m"
#define ANSI_RESET "\x1b[0m"
void change_color(char *color) {
if (isatty(1)) {
printf("%s", color);
}
}
#endif
void print_tok_type(int tok); // Imported from debug.c later in this file
void source_code_error(char *error_prefix, char *error_msg, int token) {
// Error header
change_color(ANSI_RED);
printf("%s\n", error_prefix);
printf(" Reason: ");
change_color(ANSI_YELLOW);
printf("%s\n", error_msg);
change_color(ANSI_RESET);
if (token) {
printf(" Offending token: ");
change_color(ANSI_YELLOW);
print_tok_type(token);
change_color(ANSI_RESET);
putchar('\n');
}
#ifdef INCLUDE_LINE_NUMBER_ON_ERROR
if (fp_filepath != 0) {
printf(" Location: ");
change_color(ANSI_GREEN);
printf("%s:%d:%d\n", fp_filepath, last_tok_line_number, last_tok_column_number);
change_color(ANSI_RESET);
}
#endif
exit(1);
}
#elif defined(INCLUDE_LINE_NUMBER_ON_ERROR)
void source_code_error(char *error_prefix, char *error_msg, int token) {
if (fp_filepath != 0) {
printf("%s:%d:%d: ", fp_filepath, last_tok_line_number, last_tok_column_number);
}
printf("%s%s\n", error_prefix, error_msg);
exit(1);
}
#else
#define source_code_error(error_prefix, error_msg, token) \
putstr(error_prefix); putstr(error_msg); putchar('\n'); exit(1);
#endif
void fatal_error(char * const msg) {
#ifdef INCLUDE_LINE_NUMBER_ON_ERROR
// Fatal errors are simpler and without color
if (fp_filepath != 0) {
printf("%s:%d:%d: ", fp_filepath, last_tok_line_number, last_tok_column_number);
}
#endif
putstr(msg); putchar('\n');
exit(1);
}
void syntax_error(char * const msg) {
source_code_error("Syntax error: ", msg, 0);
}
void parse_error(char * const msg, const int tok_) {
source_code_error("Parse error: ", msg, tok_);
}
// Before including a file, we save the state of the reader to the include stack.
// This allows us to restore the previous file pointer and file name when we finish including the file.
void save_include_context() {
if (include_stack_top >= INCLUDE_STACK_DEPTH * INCLUDE_ENTRY_SIZE) fatal_error("Include stack overflow");
if (fp != 0) {
include_stack[include_stack_top] = (intptr_t)fp;
include_stack[include_stack_top + 1] = (intptr_t)fp_filepath;
include_stack[include_stack_top + 2] = (intptr_t)fp_dirname;
#ifdef INCLUDE_LINE_NUMBER_ON_ERROR
// Save the line and column number of the current file
include_stack[include_stack_top + 3] = line_number;
include_stack[include_stack_top + 4] = column_number;
#endif
include_stack_top += INCLUDE_ENTRY_SIZE;
}
}
void restore_include_context() {
if (include_stack_top == 0) fatal_error("Include stack is empty");
fclose(fp);
if (fp_dirname != 0) free(fp_dirname);
// We skip freeing the filepath because it may belong to the string pool
include_stack_top -= INCLUDE_ENTRY_SIZE;
// Must add parentheses because M2-Planet parses the cast operator with higher
// precedence than the dereference operator.
fp = (FILE*) (include_stack[include_stack_top]);
fp_filepath = (char*) (include_stack[include_stack_top + 1]);
fp_dirname = (char*) (include_stack[include_stack_top + 2]);
#ifdef INCLUDE_LINE_NUMBER_ON_ERROR
line_number = include_stack[include_stack_top + 3];
column_number = include_stack[include_stack_top + 4];
#endif
}
// Tokens and AST nodes
enum TOKEN {
// C keywords
KEYWORDS_START = 300,
BREAK_KW,
CASE_KW,
CONTINUE_KW,
DEFAULT_KW,
#ifdef SUPPORT_DO_WHILE
DO_KW,
#endif
ELSE_KW,
FOR_KW,
IF_KW,
RETURN_KW,
#ifdef SUPPORT_SIZEOF
SIZEOF_KW,
#endif
SWITCH_KW,
TYPEDEF_KW,
WHILE_KW,
// Type qualifiers and storage class specifiers
CONST_KW,
#ifdef SUPPORT_TYPE_SPECIFIERS
AUTO_KW,
EXTERN_KW,
REGISTER_KW,
STATIC_KW,
VOLATILE_KW,
INLINE_KW,
#endif
// Type specifiers
// Must be below storage class specifiers for MK_TYPE_SPECIFIER to work correctly
CHAR_KW,
DOUBLE_KW,
ENUM_KW,
FLOAT_KW,
INT_KW,
LONG_KW,
SHORT_KW,
SIGNED_KW,
UNSIGNED_KW,
VOID_KW,
#ifdef SUPPORT_GOTO
GOTO_KW,
#endif
#ifdef SUPPORT_STRUCT_UNION
STRUCT_KW,
UNION_KW,
#endif
KEYWORDS_END,
// Non-character operands
INTEGER = 401, // Integer written in decimal
#ifdef PARSE_NUMERIC_LITERAL_WITH_BASE
INTEGER_HEX = 402, // Integer written in hexadecimal
INTEGER_OCT = 403, // Integer written in octal
#endif
#ifdef PARSE_NUMERIC_LITERAL_SUFFIX
INTEGER_L = 404,
INTEGER_LL,
INTEGER_U,
INTEGER_UL,
INTEGER_ULL,
#endif
CHARACTER = 410, // Fixed value so the ifdef above don't change the value
STRING = 411,
AMP_AMP = 450,
AMP_EQ,
#ifdef SUPPORT_STRUCT_UNION
ARROW,
#endif
BAR_BAR,
BAR_EQ,
CARET_EQ,
EQ_EQ,
GT_EQ,
LSHIFT_EQ,
LSHIFT,
LT_EQ,
MINUS_EQ,
MINUS_MINUS,
EXCL_EQ,
PERCENT_EQ,
PLUS_EQ,
PLUS_PLUS,
RSHIFT_EQ,
RSHIFT,
SLASH_EQ,
STAR_EQ,
#ifdef FULL_PREPROCESSOR_SUPPORT
HASH_HASH,
#endif
PLUS_PLUS_PRE,
MINUS_MINUS_PRE,
PLUS_PLUS_POST,
MINUS_MINUS_POST,
#ifdef SUPPORT_VARIADIC_FUNCTIONS
ELLIPSIS,
#endif
#ifdef SUPPORT_COMPLEX_INITIALIZER
INITIALIZER_LIST,
#endif
DECL,
DECLS,
FUN_DECL,
CAST,
MACRO_ARG = 499,
IDENTIFIER = 500, // 500 because it's easy to remember
TYPE = 501,
MACRO = 502,
LIST = 600, // List object
};
// tokenizer
int ch;
int tok;
int val;
// String pool for C keywords, identifiers and string literals
#define STRING_POOL_SIZE 250000
char string_pool[STRING_POOL_SIZE];
int string_pool_alloc = 0;
int string_start;
int hash;
// These parameters give a perfect hashing of the C keywords
#define HASH_PARAM 1026
#define HASH_PRIME 1009
// Some C implementations place globals on the stack, where size is limited.
#ifdef SMALL_HEAP
#define HEAP_SIZE 131072 // 128 KB
#else
#define HEAP_SIZE 786432 // 768 KB
#endif
intptr_t heap[HEAP_SIZE];
int heap_alloc = HASH_PRIME;
int alloc_obj(const int size) {
if ((heap_alloc += size) > HEAP_SIZE) {
fatal_error("heap overflow");
}
return (heap_alloc - size);
}
int get_op(const ast node) {
return heap[node] & 1023;
}
ast get_nb_children(const ast node) {
return heap[node] >> 10;
}
// Because everything is an int in pnut, it's easy to make mistakes and pass the
// wrong node type to a function. These versions of get_child take the input
// and/or output node type and checks that the node has the expected type before
// returning the child node.
// It also checks that the index is within bounds.
#ifdef SAFE_MODE
int get_val_checked(char* file, int line, ast node) {
if (get_nb_children(node) != 0) {
printf("%s:%d: get_val called on node %d with %d children\n", file, line, get_op(node), get_nb_children(node));
exit(1);
}
return heap[node+1];
}
int get_val_go(char* file, int line, int expected_node, ast node) {
if (get_op(node) != expected_node) {
printf("%s:%d: Expected node %d, got %d\n", file, line, expected_node, get_op(node));
exit(1);
}
return get_val_checked(file, line, node);
}
void set_val_checked(char* file, int line, ast node, int val) {
if (get_nb_children(node) != 0) {
printf("%s:%d: set_val called on node %d with %d children\n", file, line, get_op(node), get_nb_children(node));
exit(1);
}
heap[node+1] = val;
}
ast get_child_checked(char* file, int line, ast node, int i) {
if (i != 0 && i >= get_nb_children(node)) {
printf("%s:%d: Index %d out of bounds for node %d\n", file, line, i, get_op(node));
exit(1);
}
return heap[node+i+1];
}
void set_child_checked(char* file, int line, ast node, int i, ast child) {
if (i != 0 && i >= get_nb_children(node)) {
printf("%s:%d: Index %d out of bounds for node %d\n", file, line, i, get_op(node));
exit(1);
}
heap[node+i+1] = child;
}
// This function checks that the parent node has the expected operator before
// returning the child node.
ast get_child_go(char* file, int line, int expected_parent_node, ast node, int i) {
ast res = get_child_checked(file, line, node, i);
if (get_op(node) != expected_parent_node) {
printf("%s:%d: Expected node %d, got %d\n", file, line, expected_parent_node, get_op(node));
exit(1);
}
return res;
}
// This function checks that the parent node has the expected operator and that
// the child node has the expected operator before returning the child node.
ast get_child__go(char* file, int line, int expected_parent_node, int expected_node, ast node, int i) {
ast res = get_child_checked(file, line, node, i);
if (get_op(node) != expected_parent_node) {
printf("%s:%d: Expected node %d, got %d\n", file, line, expected_parent_node, get_op(node));
exit(1);
}
if (get_op(res) != expected_node) {
printf("%s:%d: Expected child node %d, got %d\n", file, line, expected_node, get_op(res));
exit(1);
}
return res;
}
// This function checks that the parent node has the expected operator and that
// the child node has the expected operator (if child node is not 0) before
// returning the child node.
ast get_child_opt_go(char* file, int line, int expected_parent_node, int expected_node, ast node, int i) {
ast res = get_child_checked(file, line, node, i);
if (get_op(node) != expected_parent_node) {
printf("%s:%d: Expected node %d, got %d\n", file, line, expected_parent_node, get_op(node));
exit(1);
}
if (res > 0 && get_op(res) != expected_node) {
printf("%s:%d: Expected child node %d, got %d\n", file, line, expected_node, get_op(res));
exit(1);
}
return res;
}
#define get_val(node) get_val_checked(__FILE__, __LINE__, node)
#define get_val_(expected_node, node) get_val_go(__FILE__, __LINE__, expected_node, node)
#define set_val(node, val) set_val_checked(__FILE__, __LINE__, node, val)
#define set_child(node, i, child) set_child_checked(__FILE__, __LINE__, node, i, child)
#define get_child(node, i) get_child_checked(__FILE__, __LINE__, node, i)
#define get_child_(expected_parent_node, node, i) get_child_go(__FILE__, __LINE__, expected_parent_node, node, i)
#define get_child__(expected_parent_node, expected_node, node, i) get_child__go(__FILE__, __LINE__, expected_parent_node, expected_node, node, i)
#define get_child_opt_(expected_parent_node, expected_node, node, i) get_child_opt_go(__FILE__, __LINE__, expected_parent_node, expected_node, node, i)
#else
int get_val(const ast node) {
return heap[node+1];
}
void set_val(const ast node, const int val) {
heap[node+1] = val;
}
ast get_child(const ast node, const int i) {
return heap[node+i+1];
}
void set_child(const ast node, const int i, const ast child) {
heap[node+i+1] = child;
}
#define get_val_(expected_node, node) get_val(node)
#define get_child_(expected_parent_node, node, i) get_child(node, i)
#define get_child__(expected_parent_node, expected_node, node, i) get_child(node, i)
#define get_child_opt_(expected_parent_node, expected_node, node, i) get_child(node, i)
#endif
ast ast_result;
ast new_ast0(const int op, const int val) {
ast_result = alloc_obj(2);
heap[ast_result] = op;
set_val(ast_result, val);
return ast_result;
}
ast new_ast1(const int op, const ast child0) {
ast_result = alloc_obj(2);
heap[ast_result] = op + 1024;
set_child(ast_result, 0, child0);
return ast_result;
}
ast new_ast2(const int op, const ast child0, const ast child1) {
ast_result = alloc_obj(3);
heap[ast_result] = op + 2048;
set_child(ast_result, 0, child0);
set_child(ast_result, 1, child1);
return ast_result;
}
ast new_ast3(const int op, const ast child0, const ast child1, const ast child2) {
ast_result = alloc_obj(4);
heap[ast_result] = op + 3072;
set_child(ast_result, 0, child0);
set_child(ast_result, 1, child1);
set_child(ast_result, 2, child2);
return ast_result;
}
ast new_ast4(const int op, const ast child0, const ast child1, const ast child2, const ast child3) {
ast_result = alloc_obj(5);
heap[ast_result] = op + 4096;
set_child(ast_result, 0, child0);
set_child(ast_result, 1, child1);
set_child(ast_result, 2, child2);
set_child(ast_result, 3, child3);
return ast_result;
}
#ifndef target_sh
ast clone_ast(const ast orig) {
int nb_children = get_nb_children(orig);
int i;
// Account for the value of ast nodes with no child
nb_children = TERNARY(nb_children > 0, nb_children, 1);
ast_result = alloc_obj(nb_children + 1);
heap[ast_result] = heap[orig]; // copy operator and nb of children
for (i = 0; i < nb_children; i += 1) {
set_child(ast_result, i, get_child(orig, i));
}
return ast_result;
}
#endif
ast cons(const int child0, const int child1) { return new_ast2(LIST, child0, child1); }
ast car(const int pair) { return get_child_(LIST, pair, 0); }
ast cdr(const int pair) { return get_child_(LIST, pair, 1); }
#ifdef SAFE_MODE
ast car_(const int expected_op, const int pair) { return get_child__(LIST, expected_op, pair, 0); }
ast cdr_(const int expected_op, const int pair) { return get_child_opt_(LIST, expected_op, pair, 1); }
#else
#define car_(expected_op, pair) car(pair)
#define cdr_(expected_op, pair) cdr(pair)
#endif
// void set_car(const int pair, const int value) { return set_child(pair, 0, value); }
void set_cdr(const int pair, const int value) { return set_child(pair, 1, value); }
#ifndef target_sh
ast list1(const int child0) { return new_ast2(LIST, child0, 0); }
ast list2(const int child0, const int child1) { return new_ast2(LIST, child0, new_ast2(LIST, child1, 0)); }
ast list3(const int child0, const int child1, const int child2) { return new_ast2(LIST, child0, new_ast2(LIST, child1, new_ast2(LIST, child2, 0))); }
#endif
#define tail(x) cdr_(LIST, x)
#if defined(target_sh) || defined(target_awk)
// Returns the only element of a singleton list, if it is a singleton list.
// Otherwise, returns 0.
ast list_singleton(const ast list) {
if (list != 0 && tail(list) == 0) {
return car(list);
} else {
return 0;
}
}
#endif
// Symbol table and string pool management
// The symbol table is implemented as a chaining hash table.
// It is stored in the beginning of the heap array, with each top-level entry
// pointing to a linked list of symbols with the same hash value.
// The symbols themselves are stored in the heap after the hash table.
// Each symbol is represented as an object in the heap with the following layout:
// - 0: pointer to next symbol in the chain (0 if last symbol)
// - 1: offset in the string pool where the symbol's string is stored
// - 2: length of the symbol's string
// - 3: token type (IDENTIFIER, MACRO, TYPEDEF, other C keyword, etc)
// - 4: token tag (for macros, typedefs, defstr index, etc)
char *symbol_buf(const int symbol) {
return string_pool + heap[symbol + 1];
}
int symbol_len(const int symbol) {
return heap[symbol + 2];
}
char *symbol_buf_end(const int symbol) {
return string_pool + heap[symbol + 1] + heap[symbol + 2];
}
int symbol_type(const int symbol) {
return heap[symbol + 3];
}
void set_symbol_type(const int symbol, const int type) {
heap[symbol + 3] = type;
}
int symbol_tag(const int symbol) {
return heap[symbol + 4];
}
void set_symbol_tag(const int symbol, const int tag) {
heap[symbol + 4] = tag;
}
#ifdef target_sh