├── c.txt
├── html网页数据抓取
├── 111.docx
├── result.txt
├── LALR分析表.htm
├── result_goto.txt
├── result_action.txt
├── get_gt.txt
└── html_date_get.py
├── main.cpp
├── obj
└── Debug
│ └── main.o
├── bin
└── Debug
│ ├── SAM.exe
│ └── AtmaCompiler.exe
├── result_action.txt
├── main.cpp.save-failed
├── AtmaCompiler.depend
├── README.md
├── AtmaCompiler.layout
├── a.txt
├── b.txt
├── AtmaCompiler.cbp
├── def.h
├── asm.txt
├── asm.s
└── result_goto.txt
/c.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/html网页数据抓取/111.docx:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/html网页数据抓取/result.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/main.cpp
--------------------------------------------------------------------------------
/obj/Debug/main.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/obj/Debug/main.o
--------------------------------------------------------------------------------
/bin/Debug/SAM.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/bin/Debug/SAM.exe
--------------------------------------------------------------------------------
/result_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/result_action.txt
--------------------------------------------------------------------------------
/main.cpp.save-failed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/main.cpp.save-failed
--------------------------------------------------------------------------------
/html网页数据抓取/LALR分析表.htm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/html网页数据抓取/LALR分析表.htm
--------------------------------------------------------------------------------
/bin/Debug/AtmaCompiler.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/bin/Debug/AtmaCompiler.exe
--------------------------------------------------------------------------------
/html网页数据抓取/result_goto.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/html网页数据抓取/result_goto.txt
--------------------------------------------------------------------------------
/html网页数据抓取/result_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AtmaHou/Pascal-Simple-Compiler/HEAD/html网页数据抓取/result_action.txt
--------------------------------------------------------------------------------
/AtmaCompiler.depend:
--------------------------------------------------------------------------------
1 | # depslib dependency file v1.0
2 | 1464792989 source:e:\code\atmacompiler\main.cpp
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Pascal简易编译器
2 | Pascal-Simple-Compiler
3 | ## 说明:
4 | 本项目为哈工大编译原理实验内容,实现了Pascal到汇编级的翻译
5 | 可以作为学习时的参考,如需要文法、报告等内容可联系我
6 | ## 本人联系方式:
7 | v-yuthou@microsoft.com
8 | ythou@ir.hit.edu.cn
9 |
10 |
11 |
--------------------------------------------------------------------------------
/AtmaCompiler.layout:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/a.txt:
--------------------------------------------------------------------------------
1 | prog zjh
2 | a,b:integer semi
3 | b:real semi
4 | l:array [100][100] of real semi
5 | proc sub
6 | x1:integer semi
7 | begin
8 | x1:=40 semi
9 | end
10 |
11 | begin
12 | a:=10+20 semi
13 | b:=12 semi
14 | c:=(a*x1)+2 semi
15 | a:=2*(a/x1) semi
16 |
17 | if a then
18 | a:=a+c semi
19 | else
20 | a:=a*c semi
21 | endif
22 |
23 | while a-100 do
24 | a:=a+5 semi
25 | endwhile
26 |
27 | while b do
28 | if b then
29 | b:=b-1 semi
30 | endif
31 | endwhile
32 |
33 | if c then
34 | while a do
35 | a:=a-1 semi
36 | endwhile
37 | endif
38 |
39 | write(c) semi
40 | read(b) semi
41 | end $
--------------------------------------------------------------------------------
/b.txt:
--------------------------------------------------------------------------------
1 | x1:=40
2 | T0:=10+20
3 | a:=T0
4 | b:=12
5 | T1:=a*x1
6 | T2:=T1+2
7 | c:=T2
8 | T3:=a/x1
9 | T4:=2*T3
10 | a:=T4
11 |
12 | if a goto tr0
13 | goto fal0
14 | tr0:
15 | T5:=a+c
16 | a:=T5
17 | goto next0
18 | fal0:
19 | T6:=a*c
20 | a:=T6
21 | next0:
22 |
23 | L0:
24 | T7:=a-100
25 | if T7 goto w_tr0
26 | goto w_fal0
27 | w_tr0:
28 | T8:=a+5
29 | a:=T8
30 | goto L0
31 | w_fal0:
32 |
33 | L1:
34 | if b goto w_tr1
35 | goto w_fal1
36 | w_tr1:
37 |
38 | if b goto tr1
39 | goto fal1
40 | tr1:
41 | T9:=b-1
42 | b:=T9
43 | fal1:
44 | goto L1
45 | w_fal1:
46 |
47 | if c goto tr2
48 | goto fal2
49 | tr2:
50 |
51 | L2:
52 | if a goto w_tr2
53 | goto w_fal2
54 | w_tr2:
55 | T10:=a-1
56 | a:=T10
57 | goto L2
58 | w_fal2:
59 | fal2:
60 |
--------------------------------------------------------------------------------
/html网页数据抓取/get_gt.txt:
--------------------------------------------------------------------------------
1 | {program',1},
2 | {program,2},
3 | {program,1},
4 | {program_sub,6},
5 | {Zlist,3},
6 | {Zlist,1},
7 | {Zlist,1},
8 | {Z,2},
9 | {return_type,1},
10 | {return_type,1},
11 | {program_main,4},
12 | {S,1},
13 | {S,2},
14 | {S,1},
15 | {S,1},
16 | {S,2},
17 | {S,2},
18 | {S,2},
19 | {S,1},
20 | {P,3},
21 | {Type,1},
22 | {Type,4},
23 | {num_list,4},
24 | {num_list,3},
25 | {simple_T,1},
26 | {simple_T,1},
27 | {simple_T,1},
28 | {List,3},
29 | {List,1},
30 | {A,3},
31 | {A,2},
32 | {A,2},
33 | {E,3},
34 | {E,3},
35 | {E,1},
36 | {factor,1},
37 | {factor,3},
38 | {factor,3},
39 | {factor,3},
40 | {F,3},
41 | {F,1},
42 | {F,1},
43 | {F,1},
44 | {B,5},
45 | {B,5},
46 | {B,2},
47 | {B,1},
48 | {B,1},
49 | {B,3},
50 | {B,3},
51 | {relop,1},
52 | {relop,1},
53 | {relop,1},
54 | {relop,1},
55 | {relop,1},
56 | {relop,1},
57 | {C,1},
58 | {C,1},
59 | {C1,6},
60 | {C1,8},
61 | {C2,6},
62 | {C_S,3},
63 | {block,1},
64 | {block,1},
65 | {S_list,2},
66 | {S_list,1},
67 | {I_S,9},
68 | {call_S,4},
69 | {call_S,3},
70 | {Elist,3},
71 | {Elist,1},
72 | {return_S,2},
73 | {IO_S,4},
74 | {IO_S,4},
75 | {IO_S,4},
76 |
--------------------------------------------------------------------------------
/AtmaCompiler.cbp:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/def.h:
--------------------------------------------------------------------------------
1 | /*#define AND 1
2 | #define ARRAY 2
3 | #define BEGIN 3
4 | #define CASE 4
5 | #define CONST 5
6 | #define DIV 6
7 | #define DO 7
8 | #define DOWNTO 8
9 | #define ElSE 9
10 | #define END 10
11 | #define FILE 11
12 | #define FOR 12
13 | #define FUNC 13
14 | #define GOTO 14
15 | #define IF 15
16 | #define IN 16
17 | #define LABEL 17
18 | #define MOD 18
19 | #define NIL 19
20 | #define NOT 20
21 | #define OF 21
22 | #define OR 22
23 | #define PACKED 23
24 | #define PROC 24
25 | #define PROG 25
26 | #define RECORD 26
27 | #define REPEAT 27
28 | #define SET 28
29 | #define THEN 29
30 | #define TO 30
31 | #define TYPE 31
32 | #define UNTIL 32
33 | #define VAR 33
34 | #define WHILE 34
35 | #define WITH 35
36 | #define INT 36
37 | #define REAL 37
38 | #define ID 38
39 | #define STRING 39
40 | #define PLUS 40
41 | #define MINUS 41
42 | #define MULTI 42
43 | #define RDIV 43
44 | #define EQ 44
45 | #define LT 45
46 | #define GT 46
47 | #define LE 47
48 | #define GE 48
49 | #define NE 49
50 | #define LR_BRAC 50
51 | #define RR_BRAC 51
52 | #define COMMA 52
53 | #define P_MARK 53
54 | #define F_STOP 54
55 | #define RANGE 55
56 | #define COLON 56
57 | #define ASSIGN 57
58 | #define SEMIC 58
59 | #define CAP 59
60 | #define EXP 60
61 | #define LS_BRAC 61
62 | #define RS_BRAC 62
63 | #define Q_MARK 63*/
64 | #define program 34
65 | #define subprogram_declarations 35
66 | #define identifier_list 36
67 | #define declarations 37
68 | #define compound_statement 38
69 | #define declaration 39
70 | #define type 40
71 | #define standard_type 41
72 | #define subprogram_declarations 42
73 | #define subprogram_declaration 43
74 | #define subprogram_head 44
75 | #define arguments 45
76 | #define parameter_list 46
77 | #define optional_statements 47
78 | #define statement_list 48
79 | #define statement 49
80 | #define procedure_statement 50
81 | #define variable 51
82 | #define expression 52
83 | #define expression_list 53
84 | #define simple_expression 54
85 | #define term 55
86 | #define factor 56
87 | #define sign 57
88 |
--------------------------------------------------------------------------------
/asm.txt:
--------------------------------------------------------------------------------
1 | .section .bss
2 | .lcomm zjh, 4
3 | .lcomm a, 4
4 | .lcomm b, 4
5 | .lcomm l, 4
6 | .lcomm sub, 4
7 | .lcomm x1, 4
8 | .lcomm c, 4
9 | .lcomm T0, 4
10 | .lcomm T1, 4
11 | .lcomm T2, 4
12 | .lcomm T3, 4
13 | .lcomm T4, 4
14 | .lcomm T5, 4
15 | .lcomm T6, 4
16 | .lcomm T7, 4
17 | .lcomm T8, 4
18 | .lcomm T9, 4
19 | .lcomm T10, 4
20 | .section .text
21 | movl $40, %eax
22 | movl %eax, x1
23 | movl $10, %eax
24 | addl $20, %eax
25 | movl %eax, T0
26 | movl T0, %eax
27 | movl %eax, a
28 | movl $12, %eax
29 | movl %eax, b
30 | movl a, %eax
31 | mull x1
32 | movl %eax, T1
33 | movl T1, %eax
34 | addl $2, %eax
35 | movl %eax, T2
36 | movl T2, %eax
37 | movl %eax, c
38 | movl a, %eax
39 | divl x1
40 | movl %eax, T3
41 | movl $2, %eax
42 | mull T3
43 | movl %eax, T4
44 | movl T4, %eax
45 | movl %eax, a
46 | movl 0, %eax
47 | cmpl a, %eax
48 | jnz .tr0
49 | jmp .fal0
50 | .tr0:
51 | movl a, %eax
52 | addl c, %eax
53 | movl %eax, T5
54 | movl T5, %eax
55 | movl %eax, a
56 | jmp .next0
57 | .fal0:
58 | movl a, %eax
59 | mull c
60 | movl %eax, T6
61 | movl T6, %eax
62 | movl %eax, a
63 | .next0:
64 | .L0:
65 | movl a, %eax
66 | subl $100, %eax
67 | movl %eax, T7
68 | movl 0, %eax
69 | cmpl T7, %eax
70 | jnz .w_tr0
71 | jmp .w_fal0
72 | .w_tr0:
73 | movl a, %eax
74 | addl $5, %eax
75 | movl %eax, T8
76 | movl T8, %eax
77 | movl %eax, a
78 | jnz .L0
79 | .w_fal0:
80 | .L1:
81 | movl 0, %eax
82 | cmpl b, %eax
83 | jnz .w_tr1
84 | jmp .w_fal1
85 | .w_tr1:
86 | movl 0, %eax
87 | cmpl b, %eax
88 | jnz .tr1
89 | jmp .fal1
90 | .tr1:
91 | movl b, %eax
92 | subl $1, %eax
93 | movl %eax, T9
94 | movl T9, %eax
95 | movl %eax, b
96 | .fal1:
97 | jnz .L1
98 | .w_fal1:
99 | movl 0, %eax
100 | cmpl c, %eax
101 | jnz .tr2
102 | jmp .fal2
103 | .tr2:
104 | .L2:
105 | movl 0, %eax
106 | cmpl a, %eax
107 | jnz .w_tr2
108 | jmp .w_fal2
109 | .w_tr2:
110 | movl a, %eax
111 | subl $1, %eax
112 | movl %eax, T10
113 | movl T10, %eax
114 | movl %eax, a
115 | jnz .L2
116 | .w_fal2:
117 | .fal2:
118 |
--------------------------------------------------------------------------------
/asm.s:
--------------------------------------------------------------------------------
1 | .section .bss
2 | .lcomm zjh, 4
3 | .lcomm a, 4
4 | .lcomm b, 4
5 | .lcomm l, 4
6 | .lcomm sub, 4
7 | .lcomm x1, 4
8 | .lcomm c, 4
9 | .lcomm T0, 4
10 | .lcomm T1, 4
11 | .lcomm T2, 4
12 | .lcomm T3, 4
13 | .lcomm T4, 4
14 | .lcomm T5, 4
15 | .lcomm T6, 4
16 | .lcomm T7, 4
17 | .lcomm T8, 4
18 | .lcomm T9, 4
19 | .lcomm T10, 4
20 | .section .text
21 | movl $40, %eax
22 | movl %eax, $x1
23 | movl $10, %eax
24 | addl $20, %eax
25 | movl %eax, $T0
26 | movl $T0, %eax
27 | movl %eax, $a
28 | movl $12.3, %eax
29 | movl %eax, $b
30 | movl $a, %eax
31 | mull $x1, %eax
32 | movl %eax, $T1
33 | movl $T1, %eax
34 | addl $2, %eax
35 | movl %eax, $T2
36 | movl $T2, %eax
37 | movl %eax, $c
38 | movl $a, %eax
39 | divl $x1, %eax
40 | movl %eax, $T3
41 | movl $2, %eax
42 | mull $T3, %eax
43 | movl %eax, $T4
44 | movl $T4, %eax
45 | movl %eax, $a
46 | movl 0, %eax
47 | cmpl a, %eax
48 | jnz .tr0
49 | jmp .fal0
50 | .tr0:
51 | movl $a, %eax
52 | addl $c, %eax
53 | movl %eax, $T5
54 | movl $T5, %eax
55 | movl %eax, $a
56 | jmp .next0
57 | .fal0:
58 | movl $a, %eax
59 | mull $c, %eax
60 | movl %eax, $T6
61 | movl $T6, %eax
62 | movl %eax, $a
63 | .next0:
64 | .L0:
65 | movl $a, %eax
66 | subl $100, %eax
67 | movl %eax, $T7
68 | movl 0, %eax
69 | cmpl T7, %eax
70 | jnz .w_tr0
71 | jmp .w_fal0
72 | .w_tr0:
73 | movl $a, %eax
74 | addl $5, %eax
75 | movl %eax, $T8
76 | movl $T8, %eax
77 | movl %eax, $a
78 | jnz .L0
79 | .w_fal0:
80 | .L1:
81 | movl 0, %eax
82 | cmpl b, %eax
83 | jnz .w_tr1
84 | jmp .w_fal1
85 | .w_tr1:
86 | movl 0, %eax
87 | cmpl b, %eax
88 | jnz .tr1
89 | jmp .fal1
90 | .tr1:
91 | movl $b, %eax
92 | subl $1, %eax
93 | movl %eax, $T9
94 | movl $T9, %eax
95 | movl %eax, $b
96 | .fal1:
97 | jnz .L1
98 | .w_fal1:
99 | movl 0, %eax
100 | cmpl c, %eax
101 | jnz .tr2
102 | jmp .fal2
103 | .tr2:
104 | .L2:
105 | movl 0, %eax
106 | cmpl a, %eax
107 | jnz .w_tr2
108 | jmp .w_fal2
109 | .w_tr2:
110 | movl $a, %eax
111 | subl $1, %eax
112 | movl %eax, $T10
113 | movl $T10, %eax
114 | movl %eax, $a
115 | jnz .L2
116 | .w_fal2:
117 | .fal2:
118 |
--------------------------------------------------------------------------------
/result_goto.txt:
--------------------------------------------------------------------------------
1 | {0.0.0.0.0.1.0.0.0.0.0.0.0.0.0}.
2 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
3 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
4 | {4.5.0.0.0.0.0.0.0.0.0.0.0.0.0}.
5 | {8.5.0.0.9.0.0.10.0.0.0.0.0.0.0}.
6 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
7 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
8 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
9 | {8.5.0.0.20.0.0.10.0.0.0.0.0.0.0}.
10 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
11 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
12 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
13 | {0.0.0.0.0.0.22.0.0.0.0.0.0.0.0}.
14 | {0.0.0.0.0.0.0.0.23.24.25.26.0.27.0}.
15 | {0.0.0.0.0.0.0.0.23.24.25.26.0.32.0}.
16 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
17 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
18 | {0.0.35.0.0.0.0.0.0.0.0.0.0.0.0}.
19 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
20 | {41.5.0.0.0.0.0.0.0.0.0.0.0.0.0}.
21 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
22 | {0.0.0.0.0.0.0.0.42.24.25.0.0.0.0}.
23 | {8.5.0.0.43.0.0.10.0.0.0.0.0.0.0}.
24 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
25 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
26 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
27 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
28 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
29 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
30 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
31 | {0.0.0.0.0.0.0.0.52.24.25.0.53.0.0}.
32 | {0.0.0.0.0.0.0.0.55.24.25.0.0.0.0}.
33 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
34 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.57}.
35 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.59}.
36 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
37 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
38 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
39 | {0.0.0.61.0.0.0.0.0.0.0.0.0.0.0}.
40 | {63.5.0.0.0.0.0.0.0.0.0.0.0.0.0}.
41 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
42 | {8.5.0.0.64.0.0.10.0.0.0.0.0.0.0}.
43 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
44 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
45 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
46 | {0.0.0.0.0.0.0.0.52.24.25.0.66.0.0}.
47 | {0.0.0.0.0.0.0.0.52.24.25.0.67.0.0}.
48 | {0.0.0.0.0.0.0.0.0.68.25.0.0.0.0}.
49 | {0.0.0.0.0.0.0.0.0.69.25.0.0.0.0}.
50 | {0.0.0.0.0.0.0.0.0.0.70.0.0.0.0}.
51 | {0.0.0.0.0.0.0.0.0.0.71.0.0.0.0}.
52 | {8.5.0.0.72.0.0.10.0.0.0.0.0.0.0}.
53 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
54 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
55 | {0.0.0.0.0.0.0.0.73.24.25.74.0.0.0}.
56 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
57 | {8.5.0.0.76.0.0.10.0.0.0.0.0.0.0}.
58 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
59 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
60 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
61 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
62 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
63 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
64 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
65 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
66 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
67 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
68 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
69 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
70 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
71 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
72 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
73 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
74 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
75 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
76 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
77 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
78 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
79 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
80 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
81 | {0.0.89.0.0.0.0.0.0.0.0.0.0.0.0}.
82 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
83 | {8.5.0.0.91.0.0.10.0.0.0.0.0.0.0}.
84 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
85 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
86 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
87 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
88 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
89 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
90 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
91 | {0.0.0.92.0.0.0.0.0.0.0.0.0.0.0}.
92 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
93 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
94 | {0.0.0.0.0.0.0.0.0.0.0.0.0.0.0}.
95 |
--------------------------------------------------------------------------------
/html网页数据抓取/html_date_get.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import os
3 |
4 | from html.parser import HTMLParser
5 |
6 | #global var
7 | HTML_FILE = ''
8 | HTML_STR = ''
9 | PRODUCTION = (
10 | "P ->A",
11 | "A -> prog id D S|",
12 | "S -> D S|",
13 | "D -> List : T semi | proc id D S",
14 | "S -> begin B end|",
15 | "B -> B S | ",
16 | "List -> List , id | id",
17 | "T -> integer | real | array C of T | record D",
18 | "C -> [ num ] C | ",
19 |
20 | "S -> Left := E semi|",
21 | "E -> E + H | E - H | H",
22 | "H -> H * F | H / F | F",
23 | "F -> ( E ) | id | num",
24 | "Left -> id|",
25 |
26 | "S =>while W do S endwhile",
27 |
28 | "S => if W then S endif",
29 | "S => if W then S else S endif",
30 |
31 | "W => I | E",
32 |
33 | "I => E or L | E and L | not L",
34 | "L => ( I ) | E",
35 |
36 | "S => read ( plist , id ) semi",
37 | "S => write ( plist , id ) semi",
38 | "plist => plist , id | id",
39 | )
40 | class MyHTMLParser(HTMLParser):
41 | flag = False
42 | links = []
43 | temp = ''
44 | shift_flag = False
45 | reduce_flag = False
46 | def handle_starttag(self, tag, attrs):
47 |
48 | if tag == "td":
49 | if len(attrs) == 1:
50 | self.flag = True
51 | else :
52 | for (variable, value) in attrs:
53 | if variable == "rowspan" or variable == "colspan":
54 | self.links.append(value)
55 | def handle_entityref(self,data):
56 | if self.flag == True:
57 | if self.shift_flag == False and self.reduce_flag == False:
58 | self.links.append('0')
59 | if self.reduce_flag == True:
60 | self.temp += ' '
61 |
62 | def handle_endtag(self, tag):
63 | if tag == "td":
64 | self.flag = False
65 | if self.reduce_flag == True :
66 | self.links.append(self.temp[1:])
67 | self.temp = ''
68 | self.reduce_flag = False
69 | def handle_data(self, data):
70 | if self.flag == True:
71 | if self.shift_flag == True:
72 | self.links.append(data)
73 | self.shift_flag = False
74 | elif self.reduce_flag == True:
75 | self.temp += data
76 | elif data == "shift":
77 | self.shift_flag = True
78 | elif data == "reduce":
79 | self.reduce_flag = True
80 | else : self.links.append(data)
81 | else :
82 | pass
83 | def parser_test(html_str):
84 | '''解析html源文件'''
85 | parser = MyHTMLParser(strict = False)
86 | parser.feed(html_str)
87 | parser.close()
88 |
89 |
90 | def read_html_file(path):
91 | '''读取html文件源文件信息'''
92 | content = ''
93 | if os.path.exists(path):
94 | print('开始读取文件:[{}]'.format(path))
95 | with open(path, 'r') as pf:
96 | for line in pf:
97 | content += line
98 | pf.close()
99 | return content
100 | else:
101 | print('the path [{}] dosen\'t exist!'.format(path))
102 | return content
103 |
104 |
105 | def __init__(self):
106 | HTMLParser.__init__(self)
107 | self.links = []
108 | self.flag = False
109 | self.temp = ''
110 | self.shift_flag = False
111 | self.reduce_flag = False
112 |
113 |
114 | def init():
115 | #html源文件位置
116 | global HTML_FILE
117 | HTML_FILE = 'LALR分析表.htm'
118 | #html源文件的内容
119 | global HTML_STR
120 | HTML_STR = read_html_file(HTML_FILE)
121 | def get_GT():
122 | output3 = open('get_gt.txt','w')
123 | for i in PRODUCTION :
124 | a = i.split()
125 | output3.write('{'+a[0]+'.'+str(len(a)-2)+'}.'+'\n')
126 | output3.close()
127 |
128 | def main():
129 | init()
130 |
131 | print('#' * 50)
132 |
133 | hp = MyHTMLParser()
134 | hp.feed(HTML_STR)
135 | hp.close()
136 | print(hp.links)
137 |
138 | output1 = open('result_action.txt','w')
139 | output2 = open('result_goto.txt','w')
140 |
141 | sum = 1 + int(hp.links[1]) + int(hp.links[2])
142 | l = len(hp.links)
143 | print(l)
144 |
145 |
146 | output1.write("ACTION表的范围为1~"+hp.links[1])
147 | output2.write(",GOTO表的范围为"+str(int(hp.links[1])+1)+"~"+str(int(hp.links[1])+int(hp.links[2]))+'\n')
148 |
149 |
150 | for i in range(3, l):
151 | try :
152 | a = PRODUCTION.index(hp.links[i])
153 | except ValueError :
154 | a = -1
155 | if a!= -1:
156 | hp.links[i] = str(-a)
157 |
158 |
159 | if (i-2)%sum == 1 :
160 | output1.write('{')
161 | if (i-2)%sum < int(hp.links[1]) and (i-2)%sum > 0:
162 | output1.write(hp.links[i]+'.')
163 | if (i-2)%sum == int(hp.links[1]) :
164 | output1.write(hp.links[i]+'}.'+'\n')
165 | if (i-2)%sum == int(hp.links[1])+1 :
166 | output2.write('{')
167 | if (i-2)%sum >= int(hp.links[1])+1 and (i-2)%sum < int(hp.links[1])+int(hp.links[2]):
168 | output2.write(hp.links[i]+'.')
169 | if (i-2)%sum == int(hp.links[1])+int(hp.links[2]) :
170 | output2.write(hp.links[i] +'}.'+'\n')
171 |
172 |
173 | output1.close()
174 | output2.close()
175 | #get_GT()
176 | print("wjw="+str(len(PRODUCTION)))
177 |
178 | if __name__ == '__main__':
179 | main()
--------------------------------------------------------------------------------