├── c.txt ├── html网页数据抓取 ├── 111.docx ├── result.txt ├── LALR分析表.htm ├── result_goto.txt ├── result_action.txt ├── get_gt.txt └── html_date_get.py ├── main.cpp ├── obj └── Debug │ └── main.o ├── bin └── Debug │ ├── SAM.exe │ └── AtmaCompiler.exe ├── result_action.txt ├── main.cpp.save-failed ├── AtmaCompiler.depend ├── README.md ├── AtmaCompiler.layout ├── a.txt ├── b.txt ├── AtmaCompiler.cbp ├── def.h ├── asm.txt ├── asm.s └── result_goto.txt /c.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /html网页数据抓取/111.docx: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /html网页数据抓取/result.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/main.cpp -------------------------------------------------------------------------------- /obj/Debug/main.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/obj/Debug/main.o -------------------------------------------------------------------------------- /bin/Debug/SAM.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/bin/Debug/SAM.exe -------------------------------------------------------------------------------- /result_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/result_action.txt -------------------------------------------------------------------------------- /main.cpp.save-failed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/main.cpp.save-failed -------------------------------------------------------------------------------- /html网页数据抓取/LALR分析表.htm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/html网页数据抓取/LALR分析表.htm -------------------------------------------------------------------------------- /bin/Debug/AtmaCompiler.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/bin/Debug/AtmaCompiler.exe -------------------------------------------------------------------------------- /html网页数据抓取/result_goto.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/html网页数据抓取/result_goto.txt -------------------------------------------------------------------------------- /html网页数据抓取/result_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/html网页数据抓取/result_action.txt -------------------------------------------------------------------------------- /AtmaCompiler.depend: -------------------------------------------------------------------------------- 1 | # depslib dependency file v1.0 2 | 1464792989 source:e:\code\atmacompiler\main.cpp 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pascal简易编译器 2 | Pascal-Simple-Compiler 3 | ## 说明: 4 | 本项目为哈工大编译原理实验内容,实现了Pascal到汇编级的翻译
5 | 可以作为学习时的参考,如需要文法、报告等内容可联系我
6 | ## 本人联系方式: 7 | v-yuthou@microsoft.com
8 | ythou@ir.hit.edu.cn 9 | 10 | 11 | -------------------------------------------------------------------------------- /AtmaCompiler.layout: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /a.txt: -------------------------------------------------------------------------------- 1 | prog zjh 2 | a,b:integer semi 3 | b:real semi 4 | l:array [100][100] of real semi 5 | proc sub 6 | x1:integer semi 7 | begin 8 | x1:=40 semi 9 | end 10 | 11 | begin 12 | a:=10+20 semi 13 | b:=12 semi 14 | c:=(a*x1)+2 semi 15 | a:=2*(a/x1) semi 16 | 17 | if a then 18 | a:=a+c semi 19 | else 20 | a:=a*c semi 21 | endif 22 | 23 | while a-100 do 24 | a:=a+5 semi 25 | endwhile 26 | 27 | while b do 28 | if b then 29 | b:=b-1 semi 30 | endif 31 | endwhile 32 | 33 | if c then 34 | while a do 35 | a:=a-1 semi 36 | endwhile 37 | endif 38 | 39 | write(c) semi 40 | read(b) semi 41 | end $ -------------------------------------------------------------------------------- /b.txt: -------------------------------------------------------------------------------- 1 | x1:=40 2 | T0:=10+20 3 | a:=T0 4 | b:=12 5 | T1:=a*x1 6 | T2:=T1+2 7 | c:=T2 8 | T3:=a/x1 9 | T4:=2*T3 10 | a:=T4 11 | 12 | if a goto tr0 13 | goto fal0 14 | tr0: 15 | T5:=a+c 16 | a:=T5 17 | goto next0 18 | fal0: 19 | T6:=a*c 20 | a:=T6 21 | next0: 22 | 23 | L0: 24 | T7:=a-100 25 | if T7 goto w_tr0 26 | goto w_fal0 27 | w_tr0: 28 | T8:=a+5 29 | a:=T8 30 | goto L0 31 | w_fal0: 32 | 33 | L1: 34 | if b goto w_tr1 35 | goto w_fal1 36 | w_tr1: 37 | 38 | if b goto tr1 39 | goto fal1 40 | tr1: 41 | T9:=b-1 42 | b:=T9 43 | fal1: 44 | goto L1 45 | w_fal1: 46 | 47 | if c goto tr2 48 | goto fal2 49 | tr2: 50 | 51 | L2: 52 | if a goto w_tr2 53 | goto w_fal2 54 | w_tr2: 55 | T10:=a-1 56 | a:=T10 57 | goto L2 58 | w_fal2: 59 | fal2: 60 | -------------------------------------------------------------------------------- /html网页数据抓取/get_gt.txt: -------------------------------------------------------------------------------- 1 | {program',1}, 2 | {program,2}, 3 | {program,1}, 4 | {program_sub,6}, 5 | {Zlist,3}, 6 | {Zlist,1}, 7 | {Zlist,1}, 8 | {Z,2}, 9 | {return_type,1}, 10 | {return_type,1}, 11 | {program_main,4}, 12 | {S,1}, 13 | {S,2}, 14 | {S,1}, 15 | {S,1}, 16 | {S,2}, 17 | {S,2}, 18 | {S,2}, 19 | {S,1}, 20 | {P,3}, 21 | {Type,1}, 22 | {Type,4}, 23 | {num_list,4}, 24 | {num_list,3}, 25 | {simple_T,1}, 26 | {simple_T,1}, 27 | {simple_T,1}, 28 | {List,3}, 29 | {List,1}, 30 | {A,3}, 31 | {A,2}, 32 | {A,2}, 33 | {E,3}, 34 | {E,3}, 35 | {E,1}, 36 | {factor,1}, 37 | {factor,3}, 38 | {factor,3}, 39 | {factor,3}, 40 | {F,3}, 41 | {F,1}, 42 | {F,1}, 43 | {F,1}, 44 | {B,5}, 45 | {B,5}, 46 | {B,2}, 47 | {B,1}, 48 | {B,1}, 49 | {B,3}, 50 | {B,3}, 51 | {relop,1}, 52 | {relop,1}, 53 | {relop,1}, 54 | {relop,1}, 55 | {relop,1}, 56 | {relop,1}, 57 | {C,1}, 58 | {C,1}, 59 | {C1,6}, 60 | {C1,8}, 61 | {C2,6}, 62 | {C_S,3}, 63 | {block,1}, 64 | {block,1}, 65 | {S_list,2}, 66 | {S_list,1}, 67 | {I_S,9}, 68 | {call_S,4}, 69 | {call_S,3}, 70 | {Elist,3}, 71 | {Elist,1}, 72 | {return_S,2}, 73 | {IO_S,4}, 74 | {IO_S,4}, 75 | {IO_S,4}, 76 | -------------------------------------------------------------------------------- /AtmaCompiler.cbp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 42 | 43 | -------------------------------------------------------------------------------- /def.h: -------------------------------------------------------------------------------- 1 | /*#define AND 1 2 | #define ARRAY 2 3 | #define BEGIN 3 4 | #define CASE 4 5 | #define CONST 5 6 | #define DIV 6 7 | #define DO 7 8 | #define DOWNTO 8 9 | #define ElSE 9 10 | #define END 10 11 | #define FILE 11 12 | #define FOR 12 13 | #define FUNC 13 14 | #define GOTO 14 15 | #define IF 15 16 | #define IN 16 17 | #define LABEL 17 18 | #define MOD 18 19 | #define NIL 19 20 | #define NOT 20 21 | #define OF 21 22 | #define OR 22 23 | #define PACKED 23 24 | #define PROC 24 25 | #define PROG 25 26 | #define RECORD 26 27 | #define REPEAT 27 28 | #define SET 28 29 | #define THEN 29 30 | #define TO 30 31 | #define TYPE 31 32 | #define UNTIL 32 33 | #define VAR 33 34 | #define WHILE 34 35 | #define WITH 35 36 | #define INT 36 37 | #define REAL 37 38 | #define ID 38 39 | #define STRING 39 40 | #define PLUS 40 41 | #define MINUS 41 42 | #define MULTI 42 43 | #define RDIV 43 44 | #define EQ 44 45 | #define LT 45 46 | #define GT 46 47 | #define LE 47 48 | #define GE 48 49 | #define NE 49 50 | #define LR_BRAC 50 51 | #define RR_BRAC 51 52 | #define COMMA 52 53 | #define P_MARK 53 54 | #define F_STOP 54 55 | #define RANGE 55 56 | #define COLON 56 57 | #define ASSIGN 57 58 | #define SEMIC 58 59 | #define CAP 59 60 | #define EXP 60 61 | #define LS_BRAC 61 62 | #define RS_BRAC 62 63 | #define Q_MARK 63*/ 64 | #define program 34 65 | #define subprogram_declarations 35 66 | #define identifier_list 36 67 | #define declarations 37 68 | #define compound_statement 38 69 | #define declaration 39 70 | #define type 40 71 | #define standard_type 41 72 | #define subprogram_declarations 42 73 | #define subprogram_declaration 43 74 | #define subprogram_head 44 75 | #define arguments 45 76 | #define parameter_list 46 77 | #define optional_statements 47 78 | #define statement_list 48 79 | #define statement 49 80 | #define procedure_statement 50 81 | #define variable 51 82 | #define expression 52 83 | #define expression_list 53 84 | #define simple_expression 54 85 | #define term 55 86 | #define factor 56 87 | #define sign 57 88 | -------------------------------------------------------------------------------- /asm.txt: -------------------------------------------------------------------------------- 1 | .section .bss 2 | .lcomm zjh, 4 3 | .lcomm a, 4 4 | .lcomm b, 4 5 | .lcomm l, 4 6 | .lcomm sub, 4 7 | .lcomm x1, 4 8 | .lcomm c, 4 9 | .lcomm T0, 4 10 | .lcomm T1, 4 11 | .lcomm T2, 4 12 | .lcomm T3, 4 13 | .lcomm T4, 4 14 | .lcomm T5, 4 15 | .lcomm T6, 4 16 | .lcomm T7, 4 17 | .lcomm T8, 4 18 | .lcomm T9, 4 19 | .lcomm T10, 4 20 | .section .text 21 | movl $40, %eax 22 | movl %eax, x1 23 | movl $10, %eax 24 | addl $20, %eax 25 | movl %eax, T0 26 | movl T0, %eax 27 | movl %eax, a 28 | movl $12, %eax 29 | movl %eax, b 30 | movl a, %eax 31 | mull x1 32 | movl %eax, T1 33 | movl T1, %eax 34 | addl $2, %eax 35 | movl %eax, T2 36 | movl T2, %eax 37 | movl %eax, c 38 | movl a, %eax 39 | divl x1 40 | movl %eax, T3 41 | movl $2, %eax 42 | mull T3 43 | movl %eax, T4 44 | movl T4, %eax 45 | movl %eax, a 46 | movl 0, %eax 47 | cmpl a, %eax 48 | jnz .tr0 49 | jmp .fal0 50 | .tr0: 51 | movl a, %eax 52 | addl c, %eax 53 | movl %eax, T5 54 | movl T5, %eax 55 | movl %eax, a 56 | jmp .next0 57 | .fal0: 58 | movl a, %eax 59 | mull c 60 | movl %eax, T6 61 | movl T6, %eax 62 | movl %eax, a 63 | .next0: 64 | .L0: 65 | movl a, %eax 66 | subl $100, %eax 67 | movl %eax, T7 68 | movl 0, %eax 69 | cmpl T7, %eax 70 | jnz .w_tr0 71 | jmp .w_fal0 72 | .w_tr0: 73 | movl a, %eax 74 | addl $5, %eax 75 | movl %eax, T8 76 | movl T8, %eax 77 | movl %eax, a 78 | jnz .L0 79 | .w_fal0: 80 | .L1: 81 | movl 0, %eax 82 | cmpl b, %eax 83 | jnz .w_tr1 84 | jmp .w_fal1 85 | .w_tr1: 86 | movl 0, %eax 87 | cmpl b, %eax 88 | jnz .tr1 89 | jmp .fal1 90 | .tr1: 91 | movl b, %eax 92 | subl $1, %eax 93 | movl %eax, T9 94 | movl T9, %eax 95 | movl %eax, b 96 | .fal1: 97 | jnz .L1 98 | .w_fal1: 99 | movl 0, %eax 100 | cmpl c, %eax 101 | jnz .tr2 102 | jmp .fal2 103 | .tr2: 104 | .L2: 105 | movl 0, %eax 106 | cmpl a, %eax 107 | jnz .w_tr2 108 | jmp .w_fal2 109 | .w_tr2: 110 | movl a, %eax 111 | subl $1, %eax 112 | movl %eax, T10 113 | movl T10, %eax 114 | movl %eax, a 115 | jnz .L2 116 | .w_fal2: 117 | .fal2: 118 | -------------------------------------------------------------------------------- /asm.s: -------------------------------------------------------------------------------- 1 | .section .bss 2 | .lcomm zjh, 4 3 | .lcomm a, 4 4 | .lcomm b, 4 5 | .lcomm l, 4 6 | .lcomm sub, 4 7 | .lcomm x1, 4 8 | .lcomm c, 4 9 | .lcomm T0, 4 10 | .lcomm T1, 4 11 | .lcomm T2, 4 12 | .lcomm T3, 4 13 | .lcomm T4, 4 14 | .lcomm T5, 4 15 | .lcomm T6, 4 16 | .lcomm T7, 4 17 | .lcomm T8, 4 18 | .lcomm T9, 4 19 | .lcomm T10, 4 20 | .section .text 21 | movl $40, %eax 22 | movl %eax, $x1 23 | movl $10, %eax 24 | addl $20, %eax 25 | movl %eax, $T0 26 | movl $T0, %eax 27 | movl %eax, $a 28 | movl $12.3, %eax 29 | movl %eax, $b 30 | movl $a, %eax 31 | mull $x1, %eax 32 | movl %eax, $T1 33 | movl $T1, %eax 34 | addl $2, %eax 35 | movl %eax, $T2 36 | movl $T2, %eax 37 | movl %eax, $c 38 | movl $a, %eax 39 | divl $x1, %eax 40 | movl %eax, $T3 41 | movl $2, %eax 42 | mull $T3, %eax 43 | movl %eax, $T4 44 | movl $T4, %eax 45 | movl %eax, $a 46 | movl 0, %eax 47 | cmpl a, %eax 48 | jnz .tr0 49 | jmp .fal0 50 | .tr0: 51 | movl $a, %eax 52 | addl $c, %eax 53 | movl %eax, $T5 54 | movl $T5, %eax 55 | movl %eax, $a 56 | jmp .next0 57 | .fal0: 58 | movl $a, %eax 59 | mull $c, %eax 60 | movl %eax, $T6 61 | movl $T6, %eax 62 | movl %eax, $a 63 | .next0: 64 | .L0: 65 | movl $a, %eax 66 | subl $100, %eax 67 | movl %eax, $T7 68 | movl 0, %eax 69 | cmpl T7, %eax 70 | jnz .w_tr0 71 | jmp .w_fal0 72 | .w_tr0: 73 | movl $a, %eax 74 | addl $5, %eax 75 | movl %eax, $T8 76 | movl $T8, %eax 77 | movl %eax, $a 78 | jnz .L0 79 | .w_fal0: 80 | .L1: 81 | movl 0, %eax 82 | cmpl b, %eax 83 | jnz .w_tr1 84 | jmp .w_fal1 85 | .w_tr1: 86 | movl 0, %eax 87 | cmpl b, %eax 88 | jnz .tr1 89 | jmp .fal1 90 | .tr1: 91 | movl $b, %eax 92 | subl $1, %eax 93 | movl %eax, $T9 94 | movl $T9, %eax 95 | movl %eax, $b 96 | .fal1: 97 | jnz .L1 98 | .w_fal1: 99 | movl 0, %eax 100 | cmpl c, %eax 101 | jnz .tr2 102 | jmp .fal2 103 | .tr2: 104 | .L2: 105 | movl 0, %eax 106 | cmpl a, %eax 107 | jnz .w_tr2 108 | jmp .w_fal2 109 | .w_tr2: 110 | movl $a, %eax 111 | subl $1, %eax 112 | movl %eax, $T10 113 | movl $T10, %eax 114 | movl %eax, $a 115 | jnz .L2 116 | .w_fal2: 117 | .fal2: 118 | -------------------------------------------------------------------------------- /result_goto.txt: -------------------------------------------------------------------------------- 1 | {0.0.0.0.0.1.0.0.0.0.0.0.0.0.0}. 2 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 3 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 4 | {4.5.0.0.0.0.0.0.0.0.0.0.0.0.0}. 5 | {8.5.0.0.9.0.0.10.0.0.0.0.0.0.0}. 6 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 7 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 8 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 9 | {8.5.0.0.20.0.0.10.0.0.0.0.0.0.0}. 10 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 11 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 12 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 13 | {0.0.0.0.0.0.22.0.0.0.0.0.0.0.0}. 14 | {0.0.0.0.0.0.0.0.23.24.25.26.0.27.0}. 15 | {0.0.0.0.0.0.0.0.23.24.25.26.0.32.0}. 16 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 17 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 18 | {0.0.35.0.0.0.0.0.0.0.0.0.0.0.0}. 19 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 20 | {41.5.0.0.0.0.0.0.0.0.0.0.0.0.0}. 21 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 22 | {0.0.0.0.0.0.0.0.42.24.25.0.0.0.0}. 23 | {8.5.0.0.43.0.0.10.0.0.0.0.0.0.0}. 24 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 25 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 26 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 27 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 28 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 29 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 30 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 31 | {0.0.0.0.0.0.0.0.52.24.25.0.53.0.0}. 32 | {0.0.0.0.0.0.0.0.55.24.25.0.0.0.0}. 33 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 34 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.57}. 35 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.59}. 36 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 37 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 38 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 39 | {0.0.0.61.0.0.0.0.0.0.0.0.0.0.0}. 40 | {63.5.0.0.0.0.0.0.0.0.0.0.0.0.0}. 41 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 42 | {8.5.0.0.64.0.0.10.0.0.0.0.0.0.0}. 43 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 44 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 45 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 46 | {0.0.0.0.0.0.0.0.52.24.25.0.66.0.0}. 47 | {0.0.0.0.0.0.0.0.52.24.25.0.67.0.0}. 48 | {0.0.0.0.0.0.0.0.0.68.25.0.0.0.0}. 49 | {0.0.0.0.0.0.0.0.0.69.25.0.0.0.0}. 50 | {0.0.0.0.0.0.0.0.0.0.70.0.0.0.0}. 51 | {0.0.0.0.0.0.0.0.0.0.71.0.0.0.0}. 52 | {8.5.0.0.72.0.0.10.0.0.0.0.0.0.0}. 53 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 54 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 55 | {0.0.0.0.0.0.0.0.73.24.25.74.0.0.0}. 56 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 57 | {8.5.0.0.76.0.0.10.0.0.0.0.0.0.0}. 58 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 59 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 60 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 61 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 62 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 63 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 64 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 65 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 66 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 67 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 68 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 69 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 70 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 71 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 72 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 73 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 74 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 75 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 76 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 77 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 78 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 79 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 80 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 81 | {0.0.89.0.0.0.0.0.0.0.0.0.0.0.0}. 82 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 83 | {8.5.0.0.91.0.0.10.0.0.0.0.0.0.0}. 84 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 85 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 86 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 87 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 88 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 89 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 90 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 91 | {0.0.0.92.0.0.0.0.0.0.0.0.0.0.0}. 92 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 93 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 94 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}. 95 | -------------------------------------------------------------------------------- /html网页数据抓取/html_date_get.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import os 3 | 4 | from html.parser import HTMLParser 5 | 6 | #global var 7 | HTML_FILE = '' 8 | HTML_STR = '' 9 | PRODUCTION = ( 10 | "P ->A", 11 | "A -> prog id D S|", 12 | "S -> D S|", 13 | "D -> List : T semi | proc id D S", 14 | "S -> begin B end|", 15 | "B -> B S | ", 16 | "List -> List , id | id", 17 | "T -> integer | real | array C of T | record D", 18 | "C -> [ num ] C | ", 19 | 20 | "S -> Left := E semi|", 21 | "E -> E + H | E - H | H", 22 | "H -> H * F | H / F | F", 23 | "F -> ( E ) | id | num", 24 | "Left -> id|", 25 | 26 | "S =>while W do S endwhile", 27 | 28 | "S => if W then S endif", 29 | "S => if W then S else S endif", 30 | 31 | "W => I | E", 32 | 33 | "I => E or L | E and L | not L", 34 | "L => ( I ) | E", 35 | 36 | "S => read ( plist , id ) semi", 37 | "S => write ( plist , id ) semi", 38 | "plist => plist , id | id", 39 | ) 40 | class MyHTMLParser(HTMLParser): 41 | flag = False 42 | links = [] 43 | temp = '' 44 | shift_flag = False 45 | reduce_flag = False 46 | def handle_starttag(self, tag, attrs): 47 | 48 | if tag == "td": 49 | if len(attrs) == 1: 50 | self.flag = True 51 | else : 52 | for (variable, value) in attrs: 53 | if variable == "rowspan" or variable == "colspan": 54 | self.links.append(value) 55 | def handle_entityref(self,data): 56 | if self.flag == True: 57 | if self.shift_flag == False and self.reduce_flag == False: 58 | self.links.append('0') 59 | if self.reduce_flag == True: 60 | self.temp += ' ' 61 | 62 | def handle_endtag(self, tag): 63 | if tag == "td": 64 | self.flag = False 65 | if self.reduce_flag == True : 66 | self.links.append(self.temp[1:]) 67 | self.temp = '' 68 | self.reduce_flag = False 69 | def handle_data(self, data): 70 | if self.flag == True: 71 | if self.shift_flag == True: 72 | self.links.append(data) 73 | self.shift_flag = False 74 | elif self.reduce_flag == True: 75 | self.temp += data 76 | elif data == "shift": 77 | self.shift_flag = True 78 | elif data == "reduce": 79 | self.reduce_flag = True 80 | else : self.links.append(data) 81 | else : 82 | pass 83 | def parser_test(html_str): 84 | '''解析html源文件''' 85 | parser = MyHTMLParser(strict = False) 86 | parser.feed(html_str) 87 | parser.close() 88 | 89 | 90 | def read_html_file(path): 91 | '''读取html文件源文件信息''' 92 | content = '' 93 | if os.path.exists(path): 94 | print('开始读取文件:[{}]'.format(path)) 95 | with open(path, 'r') as pf: 96 | for line in pf: 97 | content += line 98 | pf.close() 99 | return content 100 | else: 101 | print('the path [{}] dosen\'t exist!'.format(path)) 102 | return content 103 | 104 | 105 | def __init__(self): 106 | HTMLParser.__init__(self) 107 | self.links = [] 108 | self.flag = False 109 | self.temp = '' 110 | self.shift_flag = False 111 | self.reduce_flag = False 112 | 113 | 114 | def init(): 115 | #html源文件位置 116 | global HTML_FILE 117 | HTML_FILE = 'LALR分析表.htm' 118 | #html源文件的内容 119 | global HTML_STR 120 | HTML_STR = read_html_file(HTML_FILE) 121 | def get_GT(): 122 | output3 = open('get_gt.txt','w') 123 | for i in PRODUCTION : 124 | a = i.split() 125 | output3.write('{'+a[0]+'.'+str(len(a)-2)+'}.'+'\n') 126 | output3.close() 127 | 128 | def main(): 129 | init() 130 | 131 | print('#' * 50) 132 | 133 | hp = MyHTMLParser() 134 | hp.feed(HTML_STR) 135 | hp.close() 136 | print(hp.links) 137 | 138 | output1 = open('result_action.txt','w') 139 | output2 = open('result_goto.txt','w') 140 | 141 | sum = 1 + int(hp.links[1]) + int(hp.links[2]) 142 | l = len(hp.links) 143 | print(l) 144 | 145 | 146 | output1.write("ACTION表的范围为1~"+hp.links[1]) 147 | output2.write(",GOTO表的范围为"+str(int(hp.links[1])+1)+"~"+str(int(hp.links[1])+int(hp.links[2]))+'\n') 148 | 149 | 150 | for i in range(3, l): 151 | try : 152 | a = PRODUCTION.index(hp.links[i]) 153 | except ValueError : 154 | a = -1 155 | if a!= -1: 156 | hp.links[i] = str(-a) 157 | 158 | 159 | if (i-2)%sum == 1 : 160 | output1.write('{') 161 | if (i-2)%sum < int(hp.links[1]) and (i-2)%sum > 0: 162 | output1.write(hp.links[i]+'.') 163 | if (i-2)%sum == int(hp.links[1]) : 164 | output1.write(hp.links[i]+'}.'+'\n') 165 | if (i-2)%sum == int(hp.links[1])+1 : 166 | output2.write('{') 167 | if (i-2)%sum >= int(hp.links[1])+1 and (i-2)%sum < int(hp.links[1])+int(hp.links[2]): 168 | output2.write(hp.links[i]+'.') 169 | if (i-2)%sum == int(hp.links[1])+int(hp.links[2]) : 170 | output2.write(hp.links[i] +'}.'+'\n') 171 | 172 | 173 | output1.close() 174 | output2.close() 175 | #get_GT() 176 | print("wjw="+str(len(PRODUCTION))) 177 | 178 | if __name__ == '__main__': 179 | main() --------------------------------------------------------------------------------