├── .gitignore ├── README ├── bootstrap.c ├── meta.txt └── support.h /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | .DS_Store 3 | *~ 4 | *.swp 5 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | META-II for C 2 | by Long Nguyen 3 | 4 | This project is based on the paper META-II by Val Schorre from 1964. 5 | 6 | META-II is written in itself so it needs to be bootstrapped from 7 | a C implementation in bootstrap.c. 8 | 9 | To build, 10 | 11 | gcc -Wall -o bin/boot bootstrap.c 12 | ./bin/boot meta.txt bin/meta.c 13 | gcc -Wall -o bin/meta bin/meta.c 14 | 15 | The original version of META-II only allowed productions to emit 16 | arbitrary text or the last recognized token. 17 | This version is ZASHQUARED. 18 | -------------------------------------------------------------------------------- /bootstrap.c: -------------------------------------------------------------------------------- 1 | #include "support.h" 2 | 3 | void meta_arg(void) 4 | { 5 | do { 6 | read_literal("$"); 7 | if (test_flag) { 8 | emit("emit_token();"); 9 | emit_nl(); 10 | } 11 | 12 | if (test_flag) { break; } 13 | 14 | read_string(); 15 | if (test_flag) { 16 | emit("emit("); 17 | emit_token(); 18 | emit(");"); 19 | emit_nl(); 20 | } 21 | } while (0); 22 | } 23 | 24 | void meta_output(void) 25 | { 26 | do { 27 | read_literal("{"); 28 | if (test_flag) { 29 | do { 30 | meta_arg(); 31 | } while (test_flag); 32 | test_flag = 1; 33 | error_if_false(); 34 | 35 | read_literal("}"); 36 | error_if_false(); 37 | 38 | emit("emit_nl();"); 39 | emit_nl(); 40 | } 41 | } while (0); 42 | } 43 | 44 | void meta_exp3(void) 45 | { 46 | do { 47 | read_id(); 48 | if (test_flag) { 49 | emit("meta_"); 50 | emit_token(); 51 | emit("();"); 52 | emit_nl(); 53 | } 54 | 55 | if (test_flag) { break; } 56 | 57 | read_string(); 58 | if (test_flag) { 59 | emit("read_literal("); 60 | emit_token(); 61 | emit(");"); 62 | emit_nl(); 63 | } 64 | 65 | if (test_flag) { break; } 66 | 67 | read_literal(".id"); 68 | if (test_flag) { 69 | emit("read_id();"); 70 | emit_nl(); 71 | } 72 | 73 | if (test_flag) { break; } 74 | 75 | read_literal(".number"); 76 | if (test_flag) { 77 | emit("read_number();"); 78 | emit_nl(); 79 | } 80 | 81 | if (test_flag) { break; } 82 | 83 | read_literal(".string"); 84 | if (test_flag) { 85 | emit("read_string();"); 86 | emit_nl(); 87 | } 88 | 89 | if (test_flag) { break; } 90 | 91 | read_literal("("); 92 | if (test_flag) { 93 | meta_exp1(); 94 | error_if_false(); 95 | 96 | read_literal(")"); 97 | error_if_false(); 98 | } 99 | 100 | if (test_flag) { break; } 101 | 102 | read_literal(".e"); 103 | if (test_flag) { 104 | emit("test_flag = 1;"); 105 | emit_nl(); 106 | } 107 | 108 | if (test_flag) { break; } 109 | 110 | read_literal("*"); 111 | if (test_flag) { 112 | emit("do {"); 113 | emit_nl(); 114 | 115 | meta_exp3(); 116 | error_if_false(); 117 | 118 | emit("} while (test_flag);"); 119 | emit_nl(); 120 | 121 | emit("test_flag = 1;"); 122 | emit_nl(); 123 | } 124 | } while (0); 125 | } 126 | 127 | void meta_exp2(void) 128 | { 129 | do { 130 | // open 131 | do { 132 | meta_exp3(); 133 | if (test_flag) { 134 | emit("if (test_flag) {"); 135 | emit_nl(); 136 | } 137 | 138 | if (test_flag) { break; } 139 | 140 | meta_output(); 141 | if (test_flag) { 142 | emit("if (1) {"); 143 | emit_nl(); 144 | } 145 | } while (0); 146 | // close 147 | 148 | if (test_flag) { 149 | do { 150 | // open 151 | do { 152 | meta_exp3(); 153 | if (test_flag) { 154 | emit("error_if_false();"); 155 | emit_nl(); 156 | } 157 | 158 | if (test_flag) { break; } 159 | 160 | meta_output(); 161 | if (test_flag) { 162 | } 163 | } while (0); 164 | // close 165 | 166 | } while (test_flag); 167 | test_flag = 1; 168 | error_if_false(); 169 | 170 | emit("}"); 171 | emit_nl(); 172 | } 173 | } while (0); 174 | } 175 | 176 | void meta_exp1(void) 177 | { 178 | do { 179 | emit("do {"); 180 | emit_nl(); 181 | if (1) { 182 | meta_exp2(); 183 | error_if_false(); 184 | 185 | do { 186 | // open 187 | do { 188 | read_literal("|"); 189 | if (test_flag) { 190 | emit("if (test_flag) { break; }"); 191 | emit_nl(); 192 | 193 | meta_exp2(); 194 | error_if_false(); 195 | } 196 | } while (0); 197 | // close 198 | 199 | } while (test_flag); 200 | test_flag = 1; 201 | error_if_false(); 202 | 203 | emit("} while (0);"); 204 | emit_nl(); 205 | } 206 | } while (0); 207 | } 208 | 209 | void meta_stat(void) 210 | { 211 | do { 212 | read_id(); 213 | if (test_flag) { 214 | emit("void meta_"); 215 | emit_token(); 216 | emit("(void)"); 217 | emit_nl(); 218 | 219 | emit("{"); 220 | emit_nl(); 221 | 222 | read_literal("="); 223 | error_if_false(); 224 | 225 | meta_exp1(); 226 | error_if_false(); 227 | 228 | read_literal(";"); 229 | error_if_false(); 230 | 231 | emit("}"); 232 | emit_nl(); 233 | } 234 | } while (0); 235 | } 236 | 237 | void meta_program(void) 238 | { 239 | do { 240 | read_literal(".syntax"); 241 | if (test_flag) { 242 | read_id(); 243 | error_if_false(); 244 | 245 | emit("#include \"support.h\""); 246 | emit_nl(); 247 | 248 | do { 249 | meta_stat(); 250 | } while (test_flag); 251 | test_flag = 1; 252 | error_if_false(); 253 | 254 | read_literal(".end"); 255 | error_if_false(); 256 | } 257 | } while (0); 258 | } 259 | 260 | // end 261 | -------------------------------------------------------------------------------- /meta.txt: -------------------------------------------------------------------------------- 1 | .syntax meta 2 | 3 | arg = '$' {'emit_token();'} 4 | | .string {'emit(' $ ');'}; 5 | 6 | output = '{' *arg '}' {'emit_nl();'}; 7 | 8 | exp3 = .id {'meta_' $ '();'} 9 | | .string {'read_literal(' $ ');'} 10 | | '.id' {'read_id();'} 11 | | '.number' {'read_number();'} 12 | | '.string' {'read_string();'} 13 | | '(' exp1 ')' 14 | | '.e' {'test_flag = 1;'} 15 | | '*' {'do {'} 16 | exp3 {'} while (test_flag);'} 17 | {'test_flag = 1;'}; 18 | 19 | exp2 = ( exp3 {'if (test_flag) {'} 20 | | output {'if (1) {'} ) 21 | *( exp3 {'error_if_false();'} 22 | | output ) 23 | {'}'}; 24 | 25 | exp1 = {'do {'} exp2 26 | *( '|' {'if (test_flag) { break; }'} exp2 ) 27 | {'} while (0);'}; 28 | 29 | stat = .id {'void meta_' $ '(void)'} 30 | {'{'} 31 | '=' exp1 ';' 32 | {'}'}; 33 | 34 | program = '.syntax' .id {'#include "support.h"'} 35 | *stat 36 | '.end'; 37 | 38 | .end 39 | -------------------------------------------------------------------------------- /support.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | char *output_name = NULL; 6 | FILE *output = NULL; 7 | char *source = NULL; 8 | int pos = 0; 9 | int line = 1; 10 | int test_flag = 0; 11 | char *token = NULL; 12 | 13 | void skip_whitespace(void) 14 | { 15 | while (source[pos] == '\x20' || source[pos] == '\t' || 16 | source[pos] == '\r' || source[pos] == '\n') { 17 | // increment line counter when new line reached 18 | if (source[pos] == '\n') { 19 | line++; 20 | } 21 | pos++; 22 | } 23 | } 24 | 25 | void make_token(int start_pos) 26 | { 27 | int length = pos - start_pos; 28 | free(token); 29 | token = malloc(length + 1); 30 | token[length] = '\0'; 31 | memcpy(token, &source[start_pos], length); 32 | } 33 | 34 | // emits the currently recognized token 35 | void emit_token(void) 36 | { 37 | int i; 38 | // strings are converted to C format 39 | if (token[0] == '\'') { 40 | fprintf(output, "\""); 41 | for (i = 1; token[i] != '\0' && token[i] != '\''; i++) { 42 | switch (token[i]) { 43 | case '\"': 44 | fprintf(output, "\\\""); 45 | break; 46 | case '\\': 47 | fprintf(output, "\\\\"); 48 | break; 49 | default: 50 | fprintf(output, "%c", token[i]); 51 | break; 52 | } 53 | } 54 | fprintf(output, "\""); 55 | return; 56 | } 57 | // if token is not a string, emit as-is 58 | fprintf(output, "%s", token); 59 | } 60 | 61 | void emit(const char *str) 62 | { 63 | fprintf(output, "%s", str); 64 | } 65 | 66 | void emit_nl(void) 67 | { 68 | fprintf(output, "\n"); 69 | } 70 | 71 | void read_literal(const char *literal) 72 | { 73 | int entry_pos; 74 | int i; 75 | 76 | skip_whitespace(); 77 | // compare source with the literal 78 | entry_pos = pos; 79 | i = 0; 80 | while (source[pos] != '\0' && literal[i] != '\0' && 81 | source[pos] == literal[i]) { 82 | pos++; 83 | i++; 84 | } 85 | // if the end of the literal has been reached, comparison successful 86 | if (literal[i] == '\0') { 87 | test_flag = 1; 88 | make_token(entry_pos); 89 | } else { 90 | // reset position 91 | pos = entry_pos; 92 | test_flag = 0; 93 | } 94 | } 95 | 96 | void read_id(void) 97 | { 98 | int entry_pos; 99 | 100 | skip_whitespace(); 101 | // recognize initial alphabetic character 102 | entry_pos = pos; 103 | if (('A' <= source[pos] && source[pos] <= 'Z') || 104 | ('a' <= source[pos] && source[pos] <= 'z')) { 105 | pos++; 106 | test_flag = 1; 107 | } else { 108 | test_flag = 0; 109 | return; 110 | } 111 | // recognize alphanumeric characters 112 | while (('A' <= source[pos] && source[pos] <= 'Z') || 113 | ('a' <= source[pos] && source[pos] <= 'z') || 114 | ('0' <= source[pos] && source[pos] <= '9')) { 115 | pos++; 116 | } 117 | // recognition successful, copy into token 118 | make_token(entry_pos); 119 | } 120 | 121 | void read_number(void) 122 | { 123 | int entry_pos; 124 | 125 | skip_whitespace(); 126 | // recognize optional negative sign 127 | entry_pos = pos; 128 | if (source[pos] == '-') { 129 | pos++; 130 | } 131 | // recognize initial numeric character 132 | if ('0' <= source[pos] && source[pos] <= '9') { 133 | pos++; 134 | test_flag = 1; 135 | } else { 136 | test_flag = 0; 137 | return; 138 | } 139 | // recognize subsequent numeric characters 140 | while ('0' <= source[pos] && source[pos] <= '9') { 141 | pos++; 142 | } 143 | // recognition successful, copy into token 144 | make_token(entry_pos); 145 | } 146 | 147 | void read_string(void) 148 | { 149 | int entry_pos; 150 | 151 | skip_whitespace(); 152 | // recognize initial single quote 153 | entry_pos = pos; 154 | if (source[pos] == '\'') { 155 | pos++; 156 | // test_flag is not set as recognition can still fail 157 | } else { 158 | test_flag = 0; 159 | return; 160 | } 161 | 162 | // recognize contents 163 | while (source[pos] != '\0' && source[pos] != '\'') { 164 | // increment line counter when new line reached 165 | if (source[pos] == '\n') { 166 | line++; 167 | } 168 | pos++; 169 | } 170 | 171 | // recognize final single quote 172 | if (source[pos] == '\'') { 173 | pos++; 174 | test_flag = 1; 175 | make_token(entry_pos); 176 | } else if (source[pos] == '\0') { 177 | // reset position 178 | pos = entry_pos; 179 | test_flag = 0; 180 | } 181 | } 182 | 183 | void error_if_false(void) 184 | { 185 | if (!test_flag) { 186 | fprintf(stderr, "error in line %i at token %s\n", line, token); 187 | fclose(output); 188 | // delete the output file 189 | remove(output_name); 190 | free(source); 191 | free(token); 192 | exit(1); 193 | } 194 | } 195 | 196 | void meta_program(void); 197 | void meta_exp1(void); 198 | 199 | int main(int argc, char *argv[]) 200 | { 201 | FILE *input; 202 | int length; 203 | 204 | if (argc < 3) { 205 | fprintf(stderr, "usage: meta \n"); 206 | exit(1); 207 | } 208 | 209 | // open input and output 210 | input = fopen(argv[1], "r"); 211 | if (input == NULL) { 212 | fprintf(stderr, "invalid input file\n"); 213 | exit(1); 214 | } 215 | output_name = argv[2]; 216 | output = fopen(output_name, "w"); 217 | if (output == NULL) { 218 | fprintf(stderr, "invalid output file\n"); 219 | exit(1); 220 | } 221 | // read entire input into source 222 | fseek(input, 0, SEEK_END); 223 | length = (int)ftell(input); 224 | fseek(input, 0, SEEK_SET); 225 | source = malloc(length + 1); 226 | fread(source, 1, length, input); 227 | source[length] = '\0'; 228 | fclose(input); 229 | 230 | // initially we have empty token; token is never NULL 231 | token = malloc(1); 232 | token[0] = '\0'; 233 | 234 | // run meta 235 | meta_program(); 236 | 237 | fprintf(stderr, "%d %d\n", pos, strlen(source)); 238 | 239 | fclose(output); 240 | free(source); 241 | free(token); 242 | return 0; 243 | } 244 | 245 | /* end */ 246 | --------------------------------------------------------------------------------