├── .gitattributes
├── .gitignore
├── 1.txt
├── LICENSE
├── README.md
├── params
├── bart
│ ├── config.json
│ ├── merges.txt
│ ├── tokenizer.json
│ └── vocab.json
├── pegasus
│ ├── config.json
│ ├── special_tokens_map.json
│ ├── spiece.model
│ ├── tokenizer.json
│ └── tokenizer_config.json
├── t5-base
│ ├── config.json
│ ├── spiece.model
│ └── tokenizer.json
├── t5-large
│ ├── config.json
│ ├── spiece.model
│ └── tokenizer.json
└── t5-small
│ ├── config.json
│ ├── spiece.model
│ └── tokenizer.json
├── requirements.txt
├── score.png
└── source
├── __pycache__
├── models.cpython-37.pyc
├── pretrained_models.cpython-37.pyc
├── settings.cpython-37.pyc
├── submodels.cpython-37.pyc
└── utils.cpython-37.pyc
├── go.py
├── models.py
├── pretrained_models.py
├── settings.py
├── temp.py
└── utils.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /dataset/
2 | *.bin
--------------------------------------------------------------------------------
/1.txt:
--------------------------------------------------------------------------------
1 | PegasusForConditionalGeneration(
2 | (model): PegasusModel(
3 | (shared): Embedding(96103, 1024, padding_idx=0)
4 | (encoder): PegasusEncoder(
5 | (embed_tokens): Embedding(96103, 1024, padding_idx=0)
6 | (embed_positions): PegasusSinusoidalPositionalEmbedding(512, 1024)
7 | (layers): ModuleList(
8 | (0): PegasusEncoderLayer(
9 | (self_attn): PegasusAttention(
10 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
11 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
12 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
13 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
14 | )
15 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
16 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
17 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
18 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
19 | )
20 | (1): PegasusEncoderLayer(
21 | (self_attn): PegasusAttention(
22 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
23 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
24 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
25 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
26 | )
27 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
28 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
29 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
30 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
31 | )
32 | (2): PegasusEncoderLayer(
33 | (self_attn): PegasusAttention(
34 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
35 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
36 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
37 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
38 | )
39 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
40 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
41 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
42 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
43 | )
44 | (3): PegasusEncoderLayer(
45 | (self_attn): PegasusAttention(
46 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
47 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
48 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
49 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
50 | )
51 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
52 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
53 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
54 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
55 | )
56 | (4): PegasusEncoderLayer(
57 | (self_attn): PegasusAttention(
58 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
59 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
60 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
61 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
62 | )
63 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
64 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
65 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
66 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
67 | )
68 | (5): PegasusEncoderLayer(
69 | (self_attn): PegasusAttention(
70 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
71 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
72 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
73 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
74 | )
75 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
76 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
77 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
78 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
79 | )
80 | (6): PegasusEncoderLayer(
81 | (self_attn): PegasusAttention(
82 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
83 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
84 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
85 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
86 | )
87 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
88 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
89 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
90 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
91 | )
92 | (7): PegasusEncoderLayer(
93 | (self_attn): PegasusAttention(
94 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
95 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
96 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
97 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
98 | )
99 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
100 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
101 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
102 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
103 | )
104 | (8): PegasusEncoderLayer(
105 | (self_attn): PegasusAttention(
106 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
107 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
108 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
109 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
110 | )
111 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
112 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
113 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
114 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
115 | )
116 | (9): PegasusEncoderLayer(
117 | (self_attn): PegasusAttention(
118 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
119 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
120 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
121 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
122 | )
123 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
124 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
125 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
126 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
127 | )
128 | (10): PegasusEncoderLayer(
129 | (self_attn): PegasusAttention(
130 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
131 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
132 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
133 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
134 | )
135 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
136 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
137 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
138 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
139 | )
140 | (11): PegasusEncoderLayer(
141 | (self_attn): PegasusAttention(
142 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
143 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
144 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
145 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
146 | )
147 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
148 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
149 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
150 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
151 | )
152 | (12): PegasusEncoderLayer(
153 | (self_attn): PegasusAttention(
154 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
155 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
156 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
157 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
158 | )
159 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
160 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
161 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
162 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
163 | )
164 | (13): PegasusEncoderLayer(
165 | (self_attn): PegasusAttention(
166 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
167 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
168 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
169 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
170 | )
171 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
172 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
173 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
174 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
175 | )
176 | (14): PegasusEncoderLayer(
177 | (self_attn): PegasusAttention(
178 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
179 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
180 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
181 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
182 | )
183 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
184 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
185 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
186 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
187 | )
188 | (15): PegasusEncoderLayer(
189 | (self_attn): PegasusAttention(
190 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
191 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
192 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
193 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
194 | )
195 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
196 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
197 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
198 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
199 | )
200 | )
201 | (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
202 | )
203 | (decoder): PegasusDecoder(
204 | (embed_tokens): Embedding(96103, 1024, padding_idx=0)
205 | (embed_positions): PegasusSinusoidalPositionalEmbedding(512, 1024)
206 | (layers): ModuleList(
207 | (0): PegasusDecoderLayer(
208 | (self_attn): PegasusAttention(
209 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
210 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
211 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
212 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
213 | )
214 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
215 | (encoder_attn): PegasusAttention(
216 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
217 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
218 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
219 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
220 | )
221 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
222 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
223 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
224 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
225 | )
226 | (1): PegasusDecoderLayer(
227 | (self_attn): PegasusAttention(
228 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
229 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
230 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
231 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
232 | )
233 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
234 | (encoder_attn): PegasusAttention(
235 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
236 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
237 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
238 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
239 | )
240 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
241 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
242 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
243 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
244 | )
245 | (2): PegasusDecoderLayer(
246 | (self_attn): PegasusAttention(
247 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
248 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
249 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
250 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
251 | )
252 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
253 | (encoder_attn): PegasusAttention(
254 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
255 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
256 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
257 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
258 | )
259 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
260 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
261 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
262 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
263 | )
264 | (3): PegasusDecoderLayer(
265 | (self_attn): PegasusAttention(
266 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
267 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
268 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
269 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
270 | )
271 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
272 | (encoder_attn): PegasusAttention(
273 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
274 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
275 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
276 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
277 | )
278 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
279 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
280 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
281 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
282 | )
283 | (4): PegasusDecoderLayer(
284 | (self_attn): PegasusAttention(
285 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
286 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
287 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
288 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
289 | )
290 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
291 | (encoder_attn): PegasusAttention(
292 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
293 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
294 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
295 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
296 | )
297 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
298 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
299 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
300 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
301 | )
302 | (5): PegasusDecoderLayer(
303 | (self_attn): PegasusAttention(
304 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
305 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
306 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
307 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
308 | )
309 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
310 | (encoder_attn): PegasusAttention(
311 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
312 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
313 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
314 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
315 | )
316 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
317 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
318 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
319 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
320 | )
321 | (6): PegasusDecoderLayer(
322 | (self_attn): PegasusAttention(
323 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
324 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
325 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
326 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
327 | )
328 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
329 | (encoder_attn): PegasusAttention(
330 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
331 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
332 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
333 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
334 | )
335 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
336 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
337 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
338 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
339 | )
340 | (7): PegasusDecoderLayer(
341 | (self_attn): PegasusAttention(
342 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
343 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
344 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
345 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
346 | )
347 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
348 | (encoder_attn): PegasusAttention(
349 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
350 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
351 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
352 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
353 | )
354 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
355 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
356 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
357 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
358 | )
359 | (8): PegasusDecoderLayer(
360 | (self_attn): PegasusAttention(
361 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
362 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
363 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
364 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
365 | )
366 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
367 | (encoder_attn): PegasusAttention(
368 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
369 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
370 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
371 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
372 | )
373 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
374 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
375 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
376 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
377 | )
378 | (9): PegasusDecoderLayer(
379 | (self_attn): PegasusAttention(
380 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
381 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
382 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
383 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
384 | )
385 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
386 | (encoder_attn): PegasusAttention(
387 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
388 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
389 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
390 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
391 | )
392 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
393 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
394 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
395 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
396 | )
397 | (10): PegasusDecoderLayer(
398 | (self_attn): PegasusAttention(
399 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
400 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
401 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
402 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
403 | )
404 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
405 | (encoder_attn): PegasusAttention(
406 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
407 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
408 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
409 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
410 | )
411 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
412 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
413 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
414 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
415 | )
416 | (11): PegasusDecoderLayer(
417 | (self_attn): PegasusAttention(
418 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
419 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
420 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
421 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
422 | )
423 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
424 | (encoder_attn): PegasusAttention(
425 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
426 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
427 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
428 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
429 | )
430 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
431 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
432 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
433 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
434 | )
435 | (12): PegasusDecoderLayer(
436 | (self_attn): PegasusAttention(
437 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
438 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
439 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
440 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
441 | )
442 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
443 | (encoder_attn): PegasusAttention(
444 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
445 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
446 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
447 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
448 | )
449 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
450 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
451 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
452 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
453 | )
454 | (13): PegasusDecoderLayer(
455 | (self_attn): PegasusAttention(
456 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
457 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
458 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
459 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
460 | )
461 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
462 | (encoder_attn): PegasusAttention(
463 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
464 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
465 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
466 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
467 | )
468 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
469 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
470 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
471 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
472 | )
473 | (14): PegasusDecoderLayer(
474 | (self_attn): PegasusAttention(
475 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
476 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
477 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
478 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
479 | )
480 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
481 | (encoder_attn): PegasusAttention(
482 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
483 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
484 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
485 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
486 | )
487 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
488 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
489 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
490 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
491 | )
492 | (15): PegasusDecoderLayer(
493 | (self_attn): PegasusAttention(
494 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
495 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
496 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
497 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
498 | )
499 | (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
500 | (encoder_attn): PegasusAttention(
501 | (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
502 | (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
503 | (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
504 | (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
505 | )
506 | (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
507 | (fc1): Linear(in_features=1024, out_features=4096, bias=True)
508 | (fc2): Linear(in_features=4096, out_features=1024, bias=True)
509 | (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
510 | )
511 | )
512 | (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
513 | )
514 | )
515 | (lm_head): Linear(in_features=1024, out_features=96103, bias=False)
516 | )
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TextSum
2 | ## 0 使用说明
3 | 1. 项目相关依赖已写入requirements.txt文件 `pip install -r requirements.txt`
4 | 2. 项目使用了`transformers`提供的预训练模型,相关模型、配置文件、词典文件等于[此处](https://huggingface.co/models)下载
5 | 3. 运行项目前,于/source/settings.py中修改路径设置为本地实际绝对路径
6 | 4. 项目结构:
7 | TextSum
8 | --dataset(数据集、词典、词频表)
9 | --params(预训练模型、模型参数保存文件)
10 | --source(源代码)
11 | ----go.py(主控函数)
12 | ----pretrained_models.py(预训练模型)
13 | ----models.py(自定义模型)
14 | ----settings.py(项目设置)
15 | ----utils.py(工具函数)
16 | 5. `python go.py` 运行项目,可选命令行参数如下:
17 | ```
18 | -h, --help show this help message and exit
19 | -p, --preprocess 预处理数据
20 | -b, --build 建立词频表
21 | -m, --make 建立词典
22 | -t 模型名, --train 训练
23 |
24 | -f 模型名, --fine_tune 微调
25 |
26 | -g 模型名 参数路径, --gen 生成submission
27 |
28 | ```
29 | ## 1 数据处理
30 | 本项目数据处理共分为部分:数据清洗与划分、词典生成、张量转换
31 | + 数据清洗与划分
32 | + 使用正则表达式清洗原始数据,去除文本中与任务无关的信息
33 | + 从原始训练集中划分出验证集
34 | + 将原始CSV文件转换为逐条文本的JSON文件
35 | + 词典生成
36 | 统计数据集中出现过的所有单词的词频,取一定数目的高频词生成字典
37 | + 张量转换
38 | 读取预处理完毕的json文件,进一步处理后将文本数据集转换为成batch的Tensor
39 | ## 2 模型结构
40 | 本项目使用`pytorch`实现了模型基础结构、自定义损失函数、优化器以及模型训练、验证过程;
41 | 本项目还使用`transformers`提供的预训练模型(bart、t5、pegasus)及函数接口实现了模型的微调与推断
42 | 以下给出部分模型的网络结构
43 | 1. GRU编码器-解码器架构网络结构如下:
44 | ```python
45 | EncoderDecoder(
46 | (encoder): GruEncoder(
47 | (embdding): Embedding(10004, 512)
48 | (rnn): GRU(512, 256, num_layers=2)
49 | )
50 | (decoder): GruDecoder(
51 | (embdding): Embedding(10004, 512)
52 | (rnn): GRU(768, 256, num_layers=2)
53 | (dense): Linear(in_features=256, out_features=10004, bias=True)
54 | )
55 | )
56 | ```
57 | 2. t5(small)
58 | ```python
59 | T5ForConditionalGeneration(
60 | (shared): Embedding(32128, 512)
61 | (encoder): T5Stack(
62 | (embed_tokens): Embedding(32128, 512)
63 | (block): ModuleList(
64 | (0): T5Block(
65 | (layer): ModuleList(
66 | (0): T5LayerSelfAttention(
67 | (SelfAttention): T5Attention(
68 | (q): Linear(in_features=512, out_features=512, bias=False)
69 | (k): Linear(in_features=512, out_features=512, bias=False)
70 | (v): Linear(in_features=512, out_features=512, bias=False)
71 | (o): Linear(in_features=512, out_features=512, bias=False)
72 | (relative_attention_bias): Embedding(32, 8)
73 | )
74 | (layer_norm): T5LayerNorm()
75 | (dropout): Dropout(p=0.1, inplace=False)
76 | )
77 | (1): T5LayerFF(
78 | (DenseReluDense): T5DenseReluDense(
79 | (wi): Linear(in_features=512, out_features=2048, bias=False)
80 | (wo): Linear(in_features=2048, out_features=512, bias=False)
81 | (dropout): Dropout(p=0.1, inplace=False)
82 | )
83 | (layer_norm): T5LayerNorm()
84 | (dropout): Dropout(p=0.1, inplace=False)
85 | )
86 | )
87 | )
88 | (1): T5Block(
89 | (layer): ModuleList(
90 | (0): T5LayerSelfAttention(
91 | (SelfAttention): T5Attention(
92 | (q): Linear(in_features=512, out_features=512, bias=False)
93 | (k): Linear(in_features=512, out_features=512, bias=False)
94 | (v): Linear(in_features=512, out_features=512, bias=False)
95 | (o): Linear(in_features=512, out_features=512, bias=False)
96 | )
97 | (layer_norm): T5LayerNorm()
98 | (dropout): Dropout(p=0.1, inplace=False)
99 | )
100 | (1): T5LayerFF(
101 | (DenseReluDense): T5DenseReluDense(
102 | (wi): Linear(in_features=512, out_features=2048, bias=False)
103 | (wo): Linear(in_features=2048, out_features=512, bias=False)
104 | (dropout): Dropout(p=0.1, inplace=False)
105 | )
106 | (layer_norm): T5LayerNorm()
107 | (dropout): Dropout(p=0.1, inplace=False)
108 | )
109 | )
110 | )
111 | (2): T5Block(
112 | (layer): ModuleList(
113 | (0): T5LayerSelfAttention(
114 | (SelfAttention): T5Attention(
115 | (q): Linear(in_features=512, out_features=512, bias=False)
116 | (k): Linear(in_features=512, out_features=512, bias=False)
117 | (v): Linear(in_features=512, out_features=512, bias=False)
118 | (o): Linear(in_features=512, out_features=512, bias=False)
119 | )
120 | (layer_norm): T5LayerNorm()
121 | (dropout): Dropout(p=0.1, inplace=False)
122 | )
123 | (1): T5LayerFF(
124 | (DenseReluDense): T5DenseReluDense(
125 | (wi): Linear(in_features=512, out_features=2048, bias=False)
126 | (wo): Linear(in_features=2048, out_features=512, bias=False)
127 | (dropout): Dropout(p=0.1, inplace=False)
128 | )
129 | (layer_norm): T5LayerNorm()
130 | (dropout): Dropout(p=0.1, inplace=False)
131 | )
132 | )
133 | )
134 | (3): T5Block(
135 | (layer): ModuleList(
136 | (0): T5LayerSelfAttention(
137 | (SelfAttention): T5Attention(
138 | (q): Linear(in_features=512, out_features=512, bias=False)
139 | (k): Linear(in_features=512, out_features=512, bias=False)
140 | (v): Linear(in_features=512, out_features=512, bias=False)
141 | (o): Linear(in_features=512, out_features=512, bias=False)
142 | )
143 | (layer_norm): T5LayerNorm()
144 | (dropout): Dropout(p=0.1, inplace=False)
145 | )
146 | (1): T5LayerFF(
147 | (DenseReluDense): T5DenseReluDense(
148 | (wi): Linear(in_features=512, out_features=2048, bias=False)
149 | (wo): Linear(in_features=2048, out_features=512, bias=False)
150 | (dropout): Dropout(p=0.1, inplace=False)
151 | )
152 | (layer_norm): T5LayerNorm()
153 | (dropout): Dropout(p=0.1, inplace=False)
154 | )
155 | )
156 | )
157 | (4): T5Block(
158 | (layer): ModuleList(
159 | (0): T5LayerSelfAttention(
160 | (SelfAttention): T5Attention(
161 | (q): Linear(in_features=512, out_features=512, bias=False)
162 | (k): Linear(in_features=512, out_features=512, bias=False)
163 | (v): Linear(in_features=512, out_features=512, bias=False)
164 | (o): Linear(in_features=512, out_features=512, bias=False)
165 | )
166 | (layer_norm): T5LayerNorm()
167 | (dropout): Dropout(p=0.1, inplace=False)
168 | )
169 | (1): T5LayerFF(
170 | (DenseReluDense): T5DenseReluDense(
171 | (wi): Linear(in_features=512, out_features=2048, bias=False)
172 | (wo): Linear(in_features=2048, out_features=512, bias=False)
173 | (dropout): Dropout(p=0.1, inplace=False)
174 | )
175 | (layer_norm): T5LayerNorm()
176 | (dropout): Dropout(p=0.1, inplace=False)
177 | )
178 | )
179 | )
180 | (5): T5Block(
181 | (layer): ModuleList(
182 | (0): T5LayerSelfAttention(
183 | (SelfAttention): T5Attention(
184 | (q): Linear(in_features=512, out_features=512, bias=False)
185 | (k): Linear(in_features=512, out_features=512, bias=False)
186 | (v): Linear(in_features=512, out_features=512, bias=False)
187 | (o): Linear(in_features=512, out_features=512, bias=False)
188 | )
189 | (layer_norm): T5LayerNorm()
190 | (dropout): Dropout(p=0.1, inplace=False)
191 | )
192 | (1): T5LayerFF(
193 | (DenseReluDense): T5DenseReluDense(
194 | (wi): Linear(in_features=512, out_features=2048, bias=False)
195 | (wo): Linear(in_features=2048, out_features=512, bias=False)
196 | (dropout): Dropout(p=0.1, inplace=False)
197 | )
198 | (layer_norm): T5LayerNorm()
199 | (dropout): Dropout(p=0.1, inplace=False)
200 | )
201 | )
202 | )
203 | )
204 | (final_layer_norm): T5LayerNorm()
205 | (dropout): Dropout(p=0.1, inplace=False)
206 | )
207 | (decoder): T5Stack(
208 | (embed_tokens): Embedding(32128, 512)
209 | (block): ModuleList(
210 | (0): T5Block(
211 | (layer): ModuleList(
212 | (0): T5LayerSelfAttention(
213 | (SelfAttention): T5Attention(
214 | (q): Linear(in_features=512, out_features=512, bias=False)
215 | (k): Linear(in_features=512, out_features=512, bias=False)
216 | (v): Linear(in_features=512, out_features=512, bias=False)
217 | (o): Linear(in_features=512, out_features=512, bias=False)
218 | (relative_attention_bias): Embedding(32, 8)
219 | )
220 | (layer_norm): T5LayerNorm()
221 | (dropout): Dropout(p=0.1, inplace=False)
222 | )
223 | (1): T5LayerCrossAttention(
224 | (EncDecAttention): T5Attention(
225 | (q): Linear(in_features=512, out_features=512, bias=False)
226 | (k): Linear(in_features=512, out_features=512, bias=False)
227 | (v): Linear(in_features=512, out_features=512, bias=False)
228 | (o): Linear(in_features=512, out_features=512, bias=False)
229 | )
230 | (layer_norm): T5LayerNorm()
231 | (dropout): Dropout(p=0.1, inplace=False)
232 | )
233 | (2): T5LayerFF(
234 | (DenseReluDense): T5DenseReluDense(
235 | (wi): Linear(in_features=512, out_features=2048, bias=False)
236 | (wo): Linear(in_features=2048, out_features=512, bias=False)
237 | (dropout): Dropout(p=0.1, inplace=False)
238 | )
239 | (layer_norm): T5LayerNorm()
240 | (dropout): Dropout(p=0.1, inplace=False)
241 | )
242 | )
243 | )
244 | (1): T5Block(
245 | (layer): ModuleList(
246 | (0): T5LayerSelfAttention(
247 | (SelfAttention): T5Attention(
248 | (q): Linear(in_features=512, out_features=512, bias=False)
249 | (k): Linear(in_features=512, out_features=512, bias=False)
250 | (v): Linear(in_features=512, out_features=512, bias=False)
251 | (o): Linear(in_features=512, out_features=512, bias=False)
252 | )
253 | (layer_norm): T5LayerNorm()
254 | (dropout): Dropout(p=0.1, inplace=False)
255 | )
256 | (1): T5LayerCrossAttention(
257 | (EncDecAttention): T5Attention(
258 | (q): Linear(in_features=512, out_features=512, bias=False)
259 | (k): Linear(in_features=512, out_features=512, bias=False)
260 | (v): Linear(in_features=512, out_features=512, bias=False)
261 | (o): Linear(in_features=512, out_features=512, bias=False)
262 | )
263 | (layer_norm): T5LayerNorm()
264 | (dropout): Dropout(p=0.1, inplace=False)
265 | )
266 | (2): T5LayerFF(
267 | (DenseReluDense): T5DenseReluDense(
268 | (wi): Linear(in_features=512, out_features=2048, bias=False)
269 | (wo): Linear(in_features=2048, out_features=512, bias=False)
270 | (dropout): Dropout(p=0.1, inplace=False)
271 | )
272 | (layer_norm): T5LayerNorm()
273 | (dropout): Dropout(p=0.1, inplace=False)
274 | )
275 | )
276 | )
277 | (2): T5Block(
278 | (layer): ModuleList(
279 | (0): T5LayerSelfAttention(
280 | (SelfAttention): T5Attention(
281 | (q): Linear(in_features=512, out_features=512, bias=False)
282 | (k): Linear(in_features=512, out_features=512, bias=False)
283 | (v): Linear(in_features=512, out_features=512, bias=False)
284 | (o): Linear(in_features=512, out_features=512, bias=False)
285 | )
286 | (layer_norm): T5LayerNorm()
287 | (dropout): Dropout(p=0.1, inplace=False)
288 | )
289 | (1): T5LayerCrossAttention(
290 | (EncDecAttention): T5Attention(
291 | (q): Linear(in_features=512, out_features=512, bias=False)
292 | (k): Linear(in_features=512, out_features=512, bias=False)
293 | (v): Linear(in_features=512, out_features=512, bias=False)
294 | (o): Linear(in_features=512, out_features=512, bias=False)
295 | )
296 | (layer_norm): T5LayerNorm()
297 | (dropout): Dropout(p=0.1, inplace=False)
298 | )
299 | (2): T5LayerFF(
300 | (DenseReluDense): T5DenseReluDense(
301 | (wi): Linear(in_features=512, out_features=2048, bias=False)
302 | (wo): Linear(in_features=2048, out_features=512, bias=False)
303 | (dropout): Dropout(p=0.1, inplace=False)
304 | )
305 | (layer_norm): T5LayerNorm()
306 | (dropout): Dropout(p=0.1, inplace=False)
307 | )
308 | )
309 | )
310 | (3): T5Block(
311 | (layer): ModuleList(
312 | (0): T5LayerSelfAttention(
313 | (SelfAttention): T5Attention(
314 | (q): Linear(in_features=512, out_features=512, bias=False)
315 | (k): Linear(in_features=512, out_features=512, bias=False)
316 | (v): Linear(in_features=512, out_features=512, bias=False)
317 | (o): Linear(in_features=512, out_features=512, bias=False)
318 | )
319 | (layer_norm): T5LayerNorm()
320 | (dropout): Dropout(p=0.1, inplace=False)
321 | )
322 | (1): T5LayerCrossAttention(
323 | (EncDecAttention): T5Attention(
324 | (q): Linear(in_features=512, out_features=512, bias=False)
325 | (k): Linear(in_features=512, out_features=512, bias=False)
326 | (v): Linear(in_features=512, out_features=512, bias=False)
327 | (o): Linear(in_features=512, out_features=512, bias=False)
328 | )
329 | (layer_norm): T5LayerNorm()
330 | (dropout): Dropout(p=0.1, inplace=False)
331 | )
332 | (2): T5LayerFF(
333 | (DenseReluDense): T5DenseReluDense(
334 | (wi): Linear(in_features=512, out_features=2048, bias=False)
335 | (wo): Linear(in_features=2048, out_features=512, bias=False)
336 | (dropout): Dropout(p=0.1, inplace=False)
337 | )
338 | (layer_norm): T5LayerNorm()
339 | (dropout): Dropout(p=0.1, inplace=False)
340 | )
341 | )
342 | )
343 | (4): T5Block(
344 | (layer): ModuleList(
345 | (0): T5LayerSelfAttention(
346 | (SelfAttention): T5Attention(
347 | (q): Linear(in_features=512, out_features=512, bias=False)
348 | (k): Linear(in_features=512, out_features=512, bias=False)
349 | (v): Linear(in_features=512, out_features=512, bias=False)
350 | (o): Linear(in_features=512, out_features=512, bias=False)
351 | )
352 | (layer_norm): T5LayerNorm()
353 | (dropout): Dropout(p=0.1, inplace=False)
354 | )
355 | (1): T5LayerCrossAttention(
356 | (EncDecAttention): T5Attention(
357 | (q): Linear(in_features=512, out_features=512, bias=False)
358 | (k): Linear(in_features=512, out_features=512, bias=False)
359 | (v): Linear(in_features=512, out_features=512, bias=False)
360 | (o): Linear(in_features=512, out_features=512, bias=False)
361 | )
362 | (layer_norm): T5LayerNorm()
363 | (dropout): Dropout(p=0.1, inplace=False)
364 | )
365 | (2): T5LayerFF(
366 | (DenseReluDense): T5DenseReluDense(
367 | (wi): Linear(in_features=512, out_features=2048, bias=False)
368 | (wo): Linear(in_features=2048, out_features=512, bias=False)
369 | (dropout): Dropout(p=0.1, inplace=False)
370 | )
371 | (layer_norm): T5LayerNorm()
372 | (dropout): Dropout(p=0.1, inplace=False)
373 | )
374 | )
375 | )
376 | (5): T5Block(
377 | (layer): ModuleList(
378 | (0): T5LayerSelfAttention(
379 | (SelfAttention): T5Attention(
380 | (q): Linear(in_features=512, out_features=512, bias=False)
381 | (k): Linear(in_features=512, out_features=512, bias=False)
382 | (v): Linear(in_features=512, out_features=512, bias=False)
383 | (o): Linear(in_features=512, out_features=512, bias=False)
384 | )
385 | (layer_norm): T5LayerNorm()
386 | (dropout): Dropout(p=0.1, inplace=False)
387 | )
388 | (1): T5LayerCrossAttention(
389 | (EncDecAttention): T5Attention(
390 | (q): Linear(in_features=512, out_features=512, bias=False)
391 | (k): Linear(in_features=512, out_features=512, bias=False)
392 | (v): Linear(in_features=512, out_features=512, bias=False)
393 | (o): Linear(in_features=512, out_features=512, bias=False)
394 | )
395 | (layer_norm): T5LayerNorm()
396 | (dropout): Dropout(p=0.1, inplace=False)
397 | )
398 | (2): T5LayerFF(
399 | (DenseReluDense): T5DenseReluDense(
400 | (wi): Linear(in_features=512, out_features=2048, bias=False)
401 | (wo): Linear(in_features=2048, out_features=512, bias=False)
402 | (dropout): Dropout(p=0.1, inplace=False)
403 | )
404 | (layer_norm): T5LayerNorm()
405 | (dropout): Dropout(p=0.1, inplace=False)
406 | )
407 | )
408 | )
409 | )
410 | (final_layer_norm): T5LayerNorm()
411 | (dropout): Dropout(p=0.1, inplace=False)
412 | )
413 | (lm_head): Linear(in_features=512, out_features=32128, bias=False)
414 | )
415 | ```
416 |
417 | ## 3 最终成绩
418 | 本项目最终成绩为0.32107609
419 | 
420 | 参数设置如下:
421 | + 模型:bart-large-cnn
422 | + 搜索束个数:2
423 | + 最大序列长度:1024
424 | + 激活函数:gelu
425 | + 预测序列最短长度:30
426 | + 预测序列最长长度:590
427 | + 是否允许提前停止(预测出``即停止):是
428 |
--------------------------------------------------------------------------------
/params/bart/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "_num_labels": 3,
3 | "activation_dropout": 0.0,
4 | "activation_function": "gelu",
5 | "add_final_layer_norm": false,
6 | "architectures": [
7 | "BartForConditionalGeneration"
8 | ],
9 | "attention_dropout": 0.0,
10 | "bos_token_id": 0,
11 | "classif_dropout": 0.0,
12 | "classifier_dropout": 0.0,
13 | "d_model": 1024,
14 | "decoder_attention_heads": 16,
15 | "decoder_ffn_dim": 4096,
16 | "decoder_layerdrop": 0.0,
17 | "decoder_layers": 12,
18 | "decoder_start_token_id": 2,
19 | "dropout": 0.1,
20 | "early_stopping": true,
21 | "encoder_attention_heads": 16,
22 | "encoder_ffn_dim": 4096,
23 | "encoder_layerdrop": 0.0,
24 | "encoder_layers": 12,
25 | "eos_token_id": 2,
26 | "force_bos_token_to_be_generated": true,
27 | "forced_bos_token_id": 0,
28 | "forced_eos_token_id": 2,
29 | "gradient_checkpointing": false,
30 | "id2label": {
31 | "0": "LABEL_0",
32 | "1": "LABEL_1",
33 | "2": "LABEL_2"
34 | },
35 | "init_std": 0.02,
36 | "is_encoder_decoder": true,
37 | "label2id": {
38 | "LABEL_0": 0,
39 | "LABEL_1": 1,
40 | "LABEL_2": 2
41 | },
42 | "length_penalty": 2.0,
43 | "max_length": 142,
44 | "max_position_embeddings": 1024,
45 | "min_length": 56,
46 | "model_type": "bart",
47 | "no_repeat_ngram_size": 3,
48 | "normalize_before": false,
49 | "num_beams": 4,
50 | "num_hidden_layers": 12,
51 | "output_past": true,
52 | "pad_token_id": 1,
53 | "prefix": " ",
54 | "scale_embedding": false,
55 | "task_specific_params": {
56 | "summarization": {
57 | "early_stopping": true,
58 | "length_penalty": 2.0,
59 | "max_length": 142,
60 | "min_length": 56,
61 | "no_repeat_ngram_size": 3,
62 | "num_beams": 4
63 | }
64 | },
65 | "transformers_version": "4.7.0.dev0",
66 | "use_cache": true,
67 | "vocab_size": 50264
68 | }
69 |
--------------------------------------------------------------------------------
/params/pegasus/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "_name_or_path": "./",
3 | "activation_dropout": 0.1,
4 | "activation_function": "relu",
5 | "add_bias_logits": false,
6 | "add_final_layer_norm": true,
7 | "architectures": [
8 | "PegasusForConditionalGeneration"
9 | ],
10 | "attention_dropout": 0.1,
11 | "bos_token_id": 0,
12 | "classif_dropout": 0.0,
13 | "classifier_dropout": 0.0,
14 | "d_model": 1024,
15 | "decoder_attention_heads": 16,
16 | "decoder_ffn_dim": 4096,
17 | "decoder_layerdrop": 0.0,
18 | "decoder_layers": 16,
19 | "decoder_start_token_id": 0,
20 | "do_blenderbot_90_layernorm": false,
21 | "dropout": 0.1,
22 | "encoder_attention_heads": 16,
23 | "encoder_ffn_dim": 4096,
24 | "encoder_layerdrop": 0.0,
25 | "encoder_layers": 16,
26 | "eos_token_id": 1,
27 | "extra_pos_embeddings": 0,
28 | "force_bos_token_to_be_generated": false,
29 | "forced_eos_token_id": 1,
30 | "gradient_checkpointing": false,
31 | "id2label": {
32 | "0": "LABEL_0",
33 | "1": "LABEL_1",
34 | "2": "LABEL_2"
35 | },
36 | "init_std": 0.02,
37 | "is_encoder_decoder": true,
38 | "label2id": {
39 | "LABEL_0": 0,
40 | "LABEL_1": 1,
41 | "LABEL_2": 2
42 | },
43 | "length_penalty": 0.6,
44 | "max_length": 64,
45 | "max_position_embeddings": 512,
46 | "model_type": "pegasus",
47 | "normalize_before": true,
48 | "normalize_embedding": false,
49 | "num_beams": 8,
50 | "num_hidden_layers": 16,
51 | "pad_token_id": 0,
52 | "scale_embedding": true,
53 | "static_position_embeddings": true,
54 | "transformers_version": "4.11.0.dev0",
55 | "use_cache": true,
56 | "vocab_size": 96103
57 | }
58 |
--------------------------------------------------------------------------------
/params/pegasus/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"eos_token": "", "unk_token": "", "pad_token": ""}
--------------------------------------------------------------------------------
/params/pegasus/spiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyu2018/TextSum/20bbd5aec1051c59c880a931aa5eba6c3e55ebed/params/pegasus/spiece.model
--------------------------------------------------------------------------------
/params/pegasus/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {"model_max_length": 512, "special_tokens_map_file": null, "full_tokenizer_file": null}
--------------------------------------------------------------------------------
/params/t5-base/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "architectures": [
3 | "T5WithLMHeadModel"
4 | ],
5 | "d_ff": 3072,
6 | "d_kv": 64,
7 | "d_model": 768,
8 | "decoder_start_token_id": 0,
9 | "dropout_rate": 0.1,
10 | "eos_token_id": 1,
11 | "initializer_factor": 1.0,
12 | "is_encoder_decoder": true,
13 | "layer_norm_epsilon": 1e-06,
14 | "model_type": "t5",
15 | "n_positions": 512,
16 | "num_heads": 12,
17 | "num_layers": 12,
18 | "output_past": true,
19 | "pad_token_id": 0,
20 | "relative_attention_num_buckets": 32,
21 | "task_specific_params": {
22 | "summarization": {
23 | "early_stopping": true,
24 | "length_penalty": 2.0,
25 | "max_length": 200,
26 | "min_length": 30,
27 | "no_repeat_ngram_size": 3,
28 | "num_beams": 4,
29 | "prefix": "summarize: "
30 | },
31 | "translation_en_to_de": {
32 | "early_stopping": true,
33 | "max_length": 300,
34 | "num_beams": 4,
35 | "prefix": "translate English to German: "
36 | },
37 | "translation_en_to_fr": {
38 | "early_stopping": true,
39 | "max_length": 300,
40 | "num_beams": 4,
41 | "prefix": "translate English to French: "
42 | },
43 | "translation_en_to_ro": {
44 | "early_stopping": true,
45 | "max_length": 300,
46 | "num_beams": 4,
47 | "prefix": "translate English to Romanian: "
48 | }
49 | },
50 | "vocab_size": 32128
51 | }
52 |
--------------------------------------------------------------------------------
/params/t5-base/spiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyu2018/TextSum/20bbd5aec1051c59c880a931aa5eba6c3e55ebed/params/t5-base/spiece.model
--------------------------------------------------------------------------------
/params/t5-large/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "architectures": [
3 | "T5WithLMHeadModel"
4 | ],
5 | "d_ff": 4096,
6 | "d_kv": 64,
7 | "d_model": 1024,
8 | "decoder_start_token_id": 0,
9 | "dropout_rate": 0.1,
10 | "eos_token_id": 1,
11 | "initializer_factor": 1.0,
12 | "is_encoder_decoder": true,
13 | "layer_norm_epsilon": 1e-06,
14 | "model_type": "t5",
15 | "n_positions": 512,
16 | "num_heads": 16,
17 | "num_layers": 24,
18 | "output_past": true,
19 | "pad_token_id": 0,
20 | "relative_attention_num_buckets": 32,
21 | "task_specific_params": {
22 | "summarization": {
23 | "early_stopping": true,
24 | "length_penalty": 2.0,
25 | "max_length": 200,
26 | "min_length": 30,
27 | "no_repeat_ngram_size": 3,
28 | "num_beams": 4,
29 | "prefix": "summarize: "
30 | },
31 | "translation_en_to_de": {
32 | "early_stopping": true,
33 | "max_length": 300,
34 | "num_beams": 4,
35 | "prefix": "translate English to German: "
36 | },
37 | "translation_en_to_fr": {
38 | "early_stopping": true,
39 | "max_length": 300,
40 | "num_beams": 4,
41 | "prefix": "translate English to French: "
42 | },
43 | "translation_en_to_ro": {
44 | "early_stopping": true,
45 | "max_length": 300,
46 | "num_beams": 4,
47 | "prefix": "translate English to Romanian: "
48 | }
49 | },
50 | "vocab_size": 32128
51 | }
52 |
--------------------------------------------------------------------------------
/params/t5-large/spiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyu2018/TextSum/20bbd5aec1051c59c880a931aa5eba6c3e55ebed/params/t5-large/spiece.model
--------------------------------------------------------------------------------
/params/t5-small/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "architectures": [
3 | "T5WithLMHeadModel"
4 | ],
5 | "d_ff": 2048,
6 | "d_kv": 64,
7 | "d_model": 512,
8 | "decoder_start_token_id": 0,
9 | "dropout_rate": 0.1,
10 | "eos_token_id": 1,
11 | "initializer_factor": 1.0,
12 | "is_encoder_decoder": true,
13 | "layer_norm_epsilon": 1e-06,
14 | "model_type": "t5",
15 | "n_positions": 512,
16 | "num_heads": 8,
17 | "num_layers": 6,
18 | "output_past": true,
19 | "pad_token_id": 0,
20 | "relative_attention_num_buckets": 32,
21 | "task_specific_params": {
22 | "summarization": {
23 | "early_stopping": true,
24 | "length_penalty": 2.0,
25 | "max_length": 450,
26 | "min_length": 30,
27 | "no_repeat_ngram_size": 3,
28 | "num_beams": 4,
29 | "prefix": "summarize: "
30 | },
31 | "translation_en_to_de": {
32 | "early_stopping": true,
33 | "max_length": 300,
34 | "num_beams": 4,
35 | "prefix": "translate English to German: "
36 | },
37 | "translation_en_to_fr": {
38 | "early_stopping": true,
39 | "max_length": 300,
40 | "num_beams": 4,
41 | "prefix": "translate English to French: "
42 | },
43 | "translation_en_to_ro": {
44 | "early_stopping": true,
45 | "max_length": 300,
46 | "num_beams": 4,
47 | "prefix": "translate English to Romanian: "
48 | }
49 | },
50 | "vocab_size": 32128
51 | }
--------------------------------------------------------------------------------
/params/t5-small/spiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyu2018/TextSum/20bbd5aec1051c59c880a931aa5eba6c3e55ebed/params/t5-small/spiece.model
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | tqdm
3 | transformers
4 | pandas
5 | rouge
--------------------------------------------------------------------------------
/score.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyu2018/TextSum/20bbd5aec1051c59c880a931aa5eba6c3e55ebed/score.png
--------------------------------------------------------------------------------
/source/__pycache__/models.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyu2018/TextSum/20bbd5aec1051c59c880a931aa5eba6c3e55ebed/source/__pycache__/models.cpython-37.pyc
--------------------------------------------------------------------------------
/source/__pycache__/pretrained_models.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyu2018/TextSum/20bbd5aec1051c59c880a931aa5eba6c3e55ebed/source/__pycache__/pretrained_models.cpython-37.pyc
--------------------------------------------------------------------------------
/source/__pycache__/settings.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyu2018/TextSum/20bbd5aec1051c59c880a931aa5eba6c3e55ebed/source/__pycache__/settings.cpython-37.pyc
--------------------------------------------------------------------------------
/source/__pycache__/submodels.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyu2018/TextSum/20bbd5aec1051c59c880a931aa5eba6c3e55ebed/source/__pycache__/submodels.cpython-37.pyc
--------------------------------------------------------------------------------
/source/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyu2018/TextSum/20bbd5aec1051c59c880a931aa5eba6c3e55ebed/source/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/source/go.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import utils
3 | from models import GetModel
4 | import pretrained_models as pm
5 |
6 | parser=argparse.ArgumentParser()
7 | parser.add_argument("-p","--preprocess",help="预处理数据",action="store_true")
8 | parser.add_argument("-b","--build",help="建立词频表",action="store_true")
9 | parser.add_argument("-m","--make",help="建立词典",action="store_true")
10 | parser.add_argument("-t","--train",help="训练",type=str)
11 | parser.add_argument("-f","--fine_tune",help="微调",type=str)
12 | parser.add_argument("-g","--gen",help="生成submission",nargs=2,type=str)
13 |
14 |
15 | args=parser.parse_args()
16 |
17 | def main():
18 | if(args.preprocess):
19 | print("--------------开始数据预处理--------------")
20 | try:
21 | utils.Preprocess()
22 | except Exception as e:
23 | print(e)
24 | print("--------------数据预处理完毕--------------")
25 | exit(0)
26 | if(args.build):
27 | print("--------------开始建立词频表--------------")
28 | try:
29 | utils.BuildVocabCounter()
30 | except Exception as e:
31 | print(e)
32 | print("--------------词频表建立完毕--------------")
33 | exit(0)
34 | if(args.make):
35 | print("--------------开始建立字典--------------")
36 | try:
37 | utils.MakeVocab()
38 | except Exception as e:
39 | print(e)
40 | print("--------------字典建立完毕--------------")
41 | exit(0)
42 | if(args.train):
43 |
44 | try:
45 | net=GetModel(args.train)
46 | print("--------------开始训练模型--------------")
47 | utils.Train(net)
48 | print("--------------模型训练完毕--------------")
49 | except Exception as e:
50 | print(e)
51 | exit(0)
52 |
53 | if(args.fine_tune):
54 | try:
55 | net,tkz=pm.GetPModel(args.fine_tune)
56 | print("--------------开始微调--------------")
57 | pm.FineTune(net,tkz)
58 | print("--------------微调完毕--------------")
59 | except Exception as e:
60 | print(e)
61 | exit(0)
62 | if(args.gen):
63 |
64 | net,param_path=args.gen
65 |
66 | if(param_path=="x"):
67 | param_path=None
68 | try:
69 | print("--------------开始生成submission--------------")
70 | if(net=="gru"):
71 | net=GetModel(net)
72 | utils.GenSubmisson(net,param_path)
73 | else:
74 | net,tkz=pm.GetPModel(net)
75 | pm.GenSub(net,tkz,param_path)
76 |
77 | print("--------------submission生成完毕--------------")
78 | except Exception as e:
79 | print(e)
80 | exit(0)
81 |
82 |
83 |
84 | print(r"""
85 | ___________ __ _________ .__
86 | \__ ___/___ ___ ____/ |_ / _____/__ __ _____ _____ _____ _______|__|_______ ___________
87 | | |_/ __ \\ \/ /\ __\ \_____ \| | \/ \ / \\__ \\_ __ \ \___ // __ \_ __ \
88 | | |\ ___/ > < | | / \ | / Y Y \ Y Y \/ __ \| | \/ |/ /\ ___/| | \/
89 | |____| \___ >__/\_ \ |__| /_______ /____/|__|_| /__|_| (____ /__| |__/_____ \\___ >__|
90 | \/ \/ \/ \/ \/ \/ \/ \/
91 | """)
92 | print("-h, --help show help message and exit")
93 |
94 | if __name__=='__main__':
95 | main()
--------------------------------------------------------------------------------
/source/models.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch
3 | from torch.tensor import Tensor
4 | from settings import *
5 | import utils
6 |
7 | class MaskedSoftmaxCELoss(nn.CrossEntropyLoss):
8 | """带遮蔽的softmax交叉熵损失函数"""
9 |
10 | def _sequence_mask(self, X, valid_len, value=0):
11 | """ 在序列中屏蔽不相关的项。
12 | 接收valid_len是多个有效长度组成的一维tensor,如[1,2]代表第一个序列有效长度为1,第二个序列有效长度为2
13 | """
14 |
15 | maxlen = X.size(1)
16 | mask = torch.arange((maxlen), dtype=torch.float32,
17 | device=X.device)[None, :] < valid_len[:, None]
18 | X[~mask] = value
19 | # 有效长度以外的元素都被置零,不改变原始shape
20 | return X
21 |
22 | def forward(self, pred, label, valid_len):
23 | # 不用看标签中的padding的损失
24 | weights = torch.ones_like(label)
25 | weights = self._sequence_mask(weights, valid_len)
26 | self.reduction = 'none'
27 | unweighted_loss = super().forward(pred.permute(0, 2, 1), label)
28 |
29 | # 把整个序列的loss取平均,最后输出的shape是(batch_size)
30 | weighted_loss = (unweighted_loss * weights).mean(dim=1)
31 | return weighted_loss
32 |
33 |
34 | class Encoder(nn.Module):
35 | '''编码器接口'''
36 | def __init__(self, **kwargs):
37 | super(Encoder,self).__init__(**kwargs)
38 |
39 | def forward(self,X,*args):
40 | raise NotImplementedError
41 |
42 | class Decoder(nn.Module):
43 | '''编码器接口'''
44 | def __init__(self, **kwargs):
45 | super(Decoder,self).__init__(**kwargs)
46 |
47 | # 接收编码器的输出,作为当前步的先验状态
48 | def init_state(self,enc_outputs,*args):
49 | raise NotImplementedError
50 | # state和解码器输入共同作为输入
51 | # 在一次序列训练中,初始state为编码器输入,之后会不断自我更新
52 | def forward(self,X,state):
53 | raise NotImplementedError
54 |
55 | class EncoderDecoder(nn.Module):
56 | '''编码器解码器架构基类'''
57 | def __init__(self, encoder:Encoder,decoder:Decoder,**kwargs):
58 | super(EncoderDecoder,self).__init__(**kwargs)
59 | self.encoder=encoder
60 | self.decoder=decoder
61 |
62 | def forward(self,enc_X,dec_X,*args):
63 | enc_outputs=self.encoder(enc_X,*args)
64 | dec_state=self.decoder.init_state(enc_outputs)
65 |
66 | return self.decoder(dec_X,dec_state)
67 |
68 |
69 | ################################## RNN(效果太差了)
70 | class GruEncoder(Encoder):
71 | def __init__(self,in_dim,emb_dim,hidden_size,num_layers,dropout=0,**kwargs):
72 | super(GruEncoder,self).__init__(**kwargs)
73 | self.embdding=nn.Embedding(in_dim,emb_dim)
74 | self.rnn=nn.GRU(emb_dim,hidden_size,num_layers,dropout=dropout)
75 |
76 | def forward(self,X:Tensor,*args):
77 | X=self.embdding(X)
78 | # 更改数据维度为seq_len,batch_size,features
79 | X=X.permute(1,0,2)
80 | output,state=self.rnn(X)
81 | # shape分别为:
82 | # (seq_len,batch_size,hidden_size)
83 | # (num_layers,batch_size,hidden_size)
84 | return output,state
85 |
86 | class GruDecoder(Decoder):
87 | def __init__(self,in_dim,emb_dim,hidden_size,num_layers,dropout=0,**kwargs):
88 | super(GruDecoder,self).__init__(**kwargs)
89 | self.embdding=nn.Embedding(in_dim,emb_dim)
90 | self.rnn=nn.GRU(emb_dim+hidden_size,hidden_size,num_layers,dropout=dropout)
91 | self.dense=nn.Linear(hidden_size,VOCAB_SIZE+4)
92 |
93 | def init_state(self, enc_outputs, *args):
94 | # 取enc的state
95 | return enc_outputs[1]
96 |
97 | def forward(self,X:Tensor,state:Tensor):
98 | X=self.embdding(X).permute(1,0,2)
99 | # 取最后时刻的最后一层
100 | context=state[-1].repeat(X.shape[0],1,1)
101 |
102 | # 虽然state在h0已经传过来了,但是还是把state拼一下,拼到了特征的维度,问题不大
103 | X_and_context=torch.cat((X,context),2)
104 | output,state=self.rnn(X_and_context,hx=state)
105 | output=self.dense(output).permute(1,0,2)
106 | # shape分别为:
107 | # (batch_size,seq_len,hidden_size)
108 | # (num_layers,batch_size,hidden_size)
109 | return output,state
110 |
111 | def GetTextSum_GRU():
112 | return EncoderDecoder(
113 | GruEncoder(VOCAB_SIZE+4,512,256,2),
114 | GruDecoder(VOCAB_SIZE+4,512,256,2)
115 | )
116 | ##################################
117 |
118 |
119 |
120 | def GetModel(name:str):
121 | name=name.lower()
122 | if(name=="gru"):
123 | return GetTextSum_GRU().to(DEVICE)
124 |
125 | else:
126 | raise Exception("该模型未实现!")
127 |
128 | if __name__=='__main__':
129 | # encoder=GruEncoder(VOCAB_SIZE+4,512,256,2)
130 | # decoder=GruDecoder(VOCAB_SIZE+4,512,256,2)
131 | # for enc_X,dec_X,y in utils.train_iter:
132 | # print(enc_X[0].shape)
133 | # enc_out=encoder(enc_X[0])
134 |
135 | # state=decoder.init_state(enc_out)
136 | # output,state=decoder(dec_X[0],state)
137 | # print(output.shape)
138 | # loss_f=MaskedSoftmaxCELoss()
139 | # l=loss_f(output,y[0],y[1])
140 | # print(l)
141 |
142 | # break
143 |
144 | net=GetTextSum_GRU()
145 |
146 |
147 | with open("1.txt","w+") as f:
148 | f.write(str(net))
149 |
--------------------------------------------------------------------------------
/source/pretrained_models.py:
--------------------------------------------------------------------------------
1 | # 使用预训练模型
2 | from transformers import PegasusTokenizer,PegasusForConditionalGeneration
3 | from transformers import T5Tokenizer, T5ForConditionalGeneration,AdamW
4 | from transformers import BartTokenizer,BartForConditionalGeneration
5 | from settings import *
6 | from utils import GetRouge,CountFiles
7 | import os
8 | from torch.utils.data.dataset import TensorDataset
9 | from torch.utils.data.dataloader import DataLoader
10 | from torch.nn.modules.module import Module
11 |
12 | current_model=""
13 |
14 |
15 |
16 | def ToTensor(texts,summaries,tokenizer):
17 | task_prefix="summarize: "
18 | encoding = tokenizer([task_prefix + sequence for sequence in texts],
19 | padding='longest',
20 | max_length=SOURCE_THRESHOLD,
21 | truncation=True,
22 | return_tensors="pt")
23 | input_ids, attention_mask = encoding.input_ids, encoding.attention_mask
24 |
25 | target_encoding = tokenizer(summaries,
26 | padding='longest',
27 | max_length=SUMMARY_THRESHOLD,
28 | truncation=True)
29 | labels = target_encoding.input_ids
30 | labels = [(i if i != tokenizer.pad_token_id else -100) for i in labels]
31 | labels = torch.tensor(labels)
32 |
33 | return TensorDataset(input_ids,attention_mask,labels)
34 |
35 | def FineTune(net:Module,tokenizer):
36 | '''微调'''
37 |
38 | tset_texts=[]
39 | tset_summaries=[]
40 | vset_texts=[]
41 | vset_summaries=[]
42 | tset_len=CountFiles(DATA_DIR+"new_train")
43 | vset_len=CountFiles(DATA_DIR+"new_val")
44 | for i in range(tset_len):
45 | text,summary=ReadJson(i,DATA_DIR+"new_train")
46 | tset_texts.append(text)
47 | tset_summaries.append(summary)
48 | for i in range(vset_len):
49 | text,summary=ReadJson(i,DATA_DIR+"new_val")
50 | vset_texts.append(text)
51 | vset_summaries.append(summary)
52 | print("训练数据已读入内存...")
53 |
54 | train_iter=DataLoader(
55 | ToTensor(tset_texts,tset_summaries,tokenizer),
56 | batch_size=BATCH_SZIE,
57 | shuffle=True,
58 | num_workers=4
59 | )
60 | val_iter=DataLoader(
61 | ToTensor(vset_texts,vset_summaries,tokenizer),
62 | batch_size=BATCH_SZIE,
63 | shuffle=False,
64 | num_workers=4
65 | )
66 |
67 | print("minibatch已生成...")
68 |
69 | print("开始训练模型...")
70 | opt=AdamW(net.parameters())
71 | from tqdm import tqdm
72 | import time
73 | min_loss=10
74 | for epoch in range(EPOCHS):
75 | train_loss=[]
76 | val_loss=[]
77 | net.train()
78 | for batch in tqdm(train_iter):
79 | input_ids,attention_mask,labels=[x.to(DEVICE) for x in batch]
80 | l = net(input_ids=input_ids, attention_mask=attention_mask, labels=labels).loss
81 | l.backward()
82 | opt.step()
83 | opt.zero_grad()
84 | with torch.no_grad():
85 | train_loss.append(l.item())
86 |
87 | torch.cuda.empty_cache()
88 | net.eval()
89 | with torch.no_grad():
90 | for batch in tqdm(val_iter):
91 | input_ids,attention_mask,labels=[x.to(DEVICE) for x in batch]
92 | l = net(input_ids=input_ids, attention_mask=attention_mask, labels=labels).loss
93 | val_loss.append(l.item())
94 |
95 | if(sum(val_loss)数字
13 | WORD_IDX_PATH="D:/2021UCAS/AdvancedAI/TextSum/dataset/word2idx.pkl"
14 | # 数字->单词
15 | IDX_WORD_PATH="D:/2021UCAS/AdvancedAI/TextSum/dataset/idx2word.pkl"
16 |
17 | #------------------ 词典设置 ------------------#
18 | # 特殊符号
19 | PAD_WORD = ''
20 | UNK_WORD = ''
21 | BOS_WORD = ''
22 | EOS_WORD = ''
23 | PAD_NUM = 0
24 | UNK_NUM = 1
25 | BOS_NUM = 2
26 | EOS_NUM = 3
27 | # 词典大小(拉满就不会出现UNK),注意输入至网络时要加4(还有四个特殊字符)
28 | VOCAB_SIZE=10000
29 | # 最长原文序列长度
30 | MAX_SOURCE_LEN=2193
31 | # 最长摘要序列长度
32 | MAX_SUMMARY_LEN=587
33 |
34 | # 限定序列长度(长于此长度做切割,短于此长度做padding)
35 | SOURCE_THRESHOLD=1800
36 | SUMMARY_THRESHOLD=550
37 | # 读取数据时的标志
38 | TRAIN_FALG=0
39 | VAL_FALG=1
40 | TEST_FALG=2
41 | # 数据清理规则
42 | # 顺序莫变!
43 | PATTERNS_ONCE=[
44 | "by .*? published :.*?\. \| \..*? [0-9]+ \. ",
45 | "by \. .*? \. ",
46 | "-lrb- cnn -rrb- -- ",
47 | "\t(.*?-lrb- .*? -rrb- -- )",
48 | ]
49 | PATTERNS_ANY=[
50 | "``|''"
51 | ]
52 |
53 | #------------------ 其他设置 ------------------#
54 | DEVICE=torch.device("cuda:0")
55 | EPOCHS=10
56 | BATCH_SZIE=28
57 |
58 |
59 | #------------------ 预训练模型设置 ------------------#
60 |
61 | # 搜索束个数
62 | NUM_BEAMS=1
63 | # 预测序列最大长度
64 | MAX_LEN=590
65 | # 预测序列最小长度
66 | MIN_LEN=30
67 |
--------------------------------------------------------------------------------
/source/temp.py:
--------------------------------------------------------------------------------
1 | # import re
2 | # s1="3 by . daily mail reporter . published : . 15:34 est , 13 july 2012 . | . updated : . 01:33 est , 16 july 2012 . kelsey grammer 's wife kayte has given birth to their first child together . the boss actor , 57 , and his 32-year-old spouse -- who were expecting twins -- are ` thrilled ' after welcoming a ` healthy baby girl ' weighing 6lbs 2oz into the world this morning in los angeles , and they have named her faith evangeline elisa grammer . but the couple revealed they tragically lost their unborn son shortly after announcing kayte was pregnant with twins . joy and heartache : kelsey grammer and kayte walsh , pictured in chicago esterday , have welcomed a baby girl , but also revealed they lost a twin boy during the pregnancy . in a personal note , they said : ` early . this morning kayte gave birth to faith evangeline elisa grammer . we . are thrilled . she was 6lbs 2oz when she entered the world at 1am on the . 13th of july in the year 2012 . mother and child are in excellent . health . ' ` we were ecstatic earlier this year , . when we announced that kayte was carrying twins . tragically we lost the . little boy shortly thereafter . this was not something we cared to make . known publicly at the time . ' ` it was unspeakably painful and we . know that people will understand our desire to keep the news private . then , as we know they will respect our privacy in this matter now . a . glorious birth with a lingering sadness is ours today . ` we choose to celebrate the life that has been given us ' : the pair released an emotional statement today . ` healthy baby girl ' : they have named the baby , who weighs 6lbs 2oz , faith evangeline elisa grammer . ` we choose to celebrate the life that . has been given us . we proudly introduce our faith to the world today . looking forward to the days ahead and the children yet to come . ' the couple -- who got married in . february 2011 and renewed their vows in june -- previously lost a child . when kayte suffered a miscarriage in 2010 . kelsey already has four kids , . spencer , 28 , and greer , 19 , from previous relationships and 10-year-old . mason and jude , seven , with ex-wife camille donatacci . the couple went public with their romance just weeks after he split from the real housewives of beverly hills star . ex wife : kelsey with real housewives star camille and their children jude and mason in 2008 . kayte gave birth to a ` healthy baby girl ' named faith evangeline elisa this morning . couple reveal ` unspeakable ' pain at losing twin boy during pregnancy . celebrating a ` glorious birth ' with ` lingering sadness '"
3 | # s2="2 by . daily mail reporter . published : . 00:04 est , 14 july 2012 . | . updated : . 01:30 est , 16 july 2012 . sylvester stallone was said to have almost collapsed with grief on learning of the death of his son yesterday . the body of sage stallone , 36 , was found by his housekeeper at his los angeles home . prescription drugs were reportedly found nearby but police said it was too early to say whether they were the cause of his death . tragedy : sylvester stallone 's son sage was found dead this afternoon in his los angeles apartment after a suspected drug overdose . he was 36 , pictured here in 2006 in hollywood . a source close to stallone said : . ` when he heard the news , sly was shocked , short of breath and almost . collapsed . he just went quiet before sobbing uncontrollably . he is a . wreck at the moment . ' sage 's aunt melanie hart told the mail on sunday : ` people are speculating that it was suicide but we really have no idea . ' there were unconfirmed reports that . sage , whose mother is stallone 's first wife sasha czack , had been dead . for four days before his body was found . a source told radaronline that medics . arrived on the scene at 3.05 pm this afternoon and spent around 25 . minutes trying to revive sage before his death was pronounced at the . scene . his body was taken straight to the coroner 's office - and the insider claims no suicide note was found . ' i suspect he had been dead for quite a while when he was discovered , ' the source told the website . ` usually medics will be at the scene . for around 45 minutes but they were out of there within half an . hour . ` there were a number of prescription bottles found at the scene but it did not appear to be suicide and no note was found . ' pronounced dead at the scene : the coroner 's van was spotted at sage 's home in los angeles along with news crews . unresponsive : the filmmaker 's body was taken straight to the coroner 's office - and not to the hospital . a 9-1-1 call was placed shortly . before 3pm and the caller said sage was n't breathing and indicated it . could be a drug overdose , radar reports . an autopsy is scheduled to take place in the next 48 hours . shortly after news of sage 's death , a . spokesman released a statement on behalf of his action hero father , 66 , . who was at the comic con film convention in san diego yesterday . ` sylvester stallone is devastated and . grief-stricken over the sudden loss of his son , ' the actor 's . spokesperson michelle bega said in the statement . ` his compassion and thoughts are with sage 's mother , sasha . ' sudden death : the body of the 36-year-old sage stallone was brought out to the coroner 's van in los angeles . devastated : sly 's agent released a statement saying he was ` grief-striken ' at the loss of his son . mystery : an autopsy is scheduled to take place in the next 48 hours to determine the cause of death . earlier : sly was at comic com yesterday evening . red carpet smiles : sage pictured in 1996 at the hollywood premiere of daylight with his father sylvester and his now-wife jennifer flavin . double act : sage appeared alongside his father in the 1990 movie rocky v , playing the role of rocky 's son robert balboa . ` he was a very talented and wonderful young man . his loss will be felt forever . ' police said they found the younger . stallone in the home while responding to a ` welfare check ' , however . sage 's lawyer george braunstein said he was found by a housekeeper . friends and acquaintances had become concerned because they had n't heard from sage in the past day . braunstein said the death came as a shock , telling the new york post this afternoon : ` he was in good spirits , and working . on all kinds of projects . ` he was planning on getting married . i am just devastated . he was an extremely wonderful , loving guy . this is a tragedy . ' before the heartbreak : stallone was pictured yesterday with arnold schwarzenegger at the comic con film convention in san diego . sage moonblood stallone was the . oldest of sylvester stallone 's children and co-starred with his father . in two films . he was the first of two sons stallone had with first wife . sasha czack . he made his acting debut in 1990 's . rocky v - he played his stallone 's onscreen son - and also appeared with . his father in 1996 's daylight . hand in hand : sylvester pictured back in 1982 with his first wife sasha czack , sage 's mother . also in 1996 , sage stallone and . veteran film editor bob murawski co-founded grindhouse releasing , a . company dedicated to preserving and promoting the b-movies and . exploitation films of the 1970s and 80s . he also directed the 2006 short vic , which screened at the palm springs film festival . braunstein said sage had frequent requests to work on films . ` he was a full of life filmmaker with . his whole future ahead of him , ' he said . ` he was just very up and . enthusiastic and positive . ' i think it was probably some sort of accident , ' he said of the death . braunstein added that sage stallone greatly admired his father but was working hard to make his own name in the film industry . ` he was very proud of his father and proud to be his father 's son , ' he said . stallone 's split from sage 's mother czack in 1985 after 11 years together . they also have a another son . seargeoh , 32 , who is autistic . stallone went on to wed model and actress brigitte . nielsen in beverly hills but they split just two . years later in a very public divorce . he married third wife , jennifer . flavin , in 1997 after an eight-year on-again , off-again relationship and . they have three daughters : sophia rose , 15 , sistine rose , 14 , and . scarlet rose , 10 . sage , who was raised by his mother following his parents ' divorce , felt distant from his father growing up , a theme which hit home as they were filming rocky v together . big boots to fill : sage said he always worried about living up to his father 's success , seen here together again in rocky v . ` when i was screaming , `` you never spent time with me ! you never spent time with my mother ! '' - that was true , ' he told people magazine in 1996 . ` i was looking into my father 's face and really saying that . ' but it proved a turning point for the father and son , who went on to form a close bond and they acted again together in the 1996 film daylight . ` between takes , sly and sage would roll around in the dirt like two puppies , ' the director rob cohen observed at the time . sage certainly felt the pressure of growing up with such a famous father and would worry that he would never match his success . ` i tell him , `` as long as you give it your best , that 's all that matters , '' his mother sasha said in that same year . sage went on to pursue a career behind the camera and shunned the wild hollywood party scene , preferring to watch horror zombie films instead . ` people call me a hermit , ' he said while promoting the film . ` but i 'm happy . ' star ` devastated and grief-stricken ' over sudden loss of his eldest child . sage played the 66-year-old 's onscreen son in rocky v . an autopsy is scheduled to take place in the next 48 hours after filmmaker was found next to prescription drugs ."
4 | # s3="1 -lrb- cnn -rrb- -- to resolve america 's ongoing , bruising battle over the debt and deficit , house republican paul ryan and senate democrat patty murray announced a deal on december 10 to halt spending cuts -- mostly in defense -- and lock in a two-year budget agreement to avoid another government shutdown on january 15 . but in eagerly seeking agreement with the republicans who shut the government down in october , democrats risk hurting the economy 's fragile recovery by accepting too much budget austerity embedded in the newly adopted budget . president obama and the democrats won big over the republicans in october 's budget fight . instead of pressing their advantage , democrats took tax increases for the rich off the table , agreed to cut federal pensions and did not get unemployment benefits extended . the democrats basically threw away their political gains . the deal repeals less than half of the sequestration cuts planned for 2014 . if obama and congress continue their shortsighted obsession with austerity and budget cuts , they ignore the big economic lesson from the past several years : austerity hurts prosperity . the congressional budget office estimated that repealing the entire 2013-2014 spending cuts would increase gross domestic product by $ 113 billion and create 900,000 additional jobs next year . the october 2013 government shutdown took another $ 24 billion out of the gdp . unemployment remains stuck around 7 % . though the deal reduces a bit of fiscal uncertainty , it hardly affected the u.s. growth forecasts for big banks , despite bank economists citing some pessimism because of `` austerity shock '' from spending cuts and `` uncertainty shock '' from washington 's continued fiscal battles . republicans bargain for more cuts and fewer taxes , but cutting military spending makes them nervous , so they attack social security and medicare . the wall street-affiliated democratic group third way is helping . it launched an attack on sen. elizabeth warren , d-massachusetts , and others who rightly refuse to cut social security as part of a long-term budget solution . we all know that republicans like to defend the wealthy and slash government . but why does austerity , especially cuts to old-age programs , have credibility with obama and other democrats ? advocates of `` grand bargains , '' cutting programs to balance the budget , wrongly presume the budget is a fixed quantity . they imagine it like a fixed pie . programs for the young , like education , must be paid for by cutting other programs , like social security . but their belief that a dollar taken from the old will be spent on the young is not only divisive , mean and fierce -- it is wrong . in his december 6 speech on inequality , obama talked about the sky-high and stubborn child poverty rate : more than 24 % . but cutting social security and medicare will only destabilize the economy and increase the elderly poverty rate . in many countries , programs for elderly people are not traded off against help for the young . when support for old-age programs increases , so does spending on children . advanced democratic countries ' spending on the elderly is positively correlated with education spending . one analysis shows that a 10 % increase in spending on education is correlated with a 7.3 % increase in spending on pensions . the congressional budget office warns that long-term deficits can hurt the economy . want to reduce the debt and deficit ? tax the wealthy , which wo n't hurt the economy . economists emmanuel saez and thomas piketty estimate that raising the tax rate for the top 1 % as high as 80 % would generate far more revenue . sen. tom harkin , d-iowa , and rep. peter defazio , d-oregon , propose a transactions tax -- a three-penny charge on every $ 100 traded in the stock market , which the congressional budget office estimates would raise $ 352 billion over 10 years . this small tax would also reduce stock churning by speculators , creating a nice secondary benefit . want to find even more savings ? sen. harry reid , d-nevada , wisely put tax loopholes that cost the treasury almost a trillion dollars per year on the table . for example , reid called for eliminating the small , but noxious , tax break for buying yachts and the $ 17 billion break that comes from taxing private equity , real estate and hedge fund profits as `` carried interest '' rather than at the ordinary income rate of 39.6 % instead of the capital gains rate of 20 % . there is one piece of good news : the deficit is coming down , from 9.2 % when obama took office to 4.1 % of gdp in 2017 . faster economic growth would shrink the deficit more rapidly . in contrast , further spending cuts will slow the economy and deficit reduction along with it . so , this is no time for obama to accept a lower budget path , or to consider cuts in social security and medicare . the small budget deficit reductions in this deal -- less than one-half of 1 % of the total debt or $ 23 billion -- would almost pay for extended unemployment benefits for one year at $ 25 billion . democrats are flinching under continued pressure from republicans playing out their long game as they ready for another bitter fight when the debt limit is reached next spring . but the president and the democrats have a winning economic and political strategy : raise revenues and keep social security and medicare strong . do n't throw october 's hard-won victory away ; it wo n't help the elderly , it wo n't help children , and it wo n't help the economy . the opinions expressed in this commentary are solely those of rick mcgahey and teresa ghilarducci . democrats and republicans reach a deal on the budget . rick mcgahey , teresa ghilarducci : austerity in budget will hurt our economy . they say president obama should not make cuts to programs like social security . mcgahey , ghilarducci : taxing the wealthy would generate revenue and cut deficit ."
5 | # s4="123 new york -lrb- cnn -rrb- -- preliminary tests indicate ricin was found in letters sent this past weekend to new york mayor michael bloomberg , new york deputy police commissioner paul browne said wednesday . browne said the letters to bloomberg contained a threat to the mayor and mentioned the debate on gun laws . `` the letter obviously , referred to our anti-gun efforts but there 's 12,000 people -lrb- who -rrb- are going to get killed this year with guns and 19,000 that are going to commit suicide with guns , and we 're not going to walk away from those efforts , '' bloomberg said . one letter addressed to the mayor 's office was opened at the city government 's mail facility , browne said . the suspicious material found in the two letters was a `` pink-orange oily substance , '' he said , adding that it was the second of two tests that showed what appeared to be trace amounts of ricin . what is ricin ? the substance is being tested at the national bioforensic analysis center in maryland , with conclusive results expected by friday . some of the emergency services workers who touched the letter friday were examined after they showed minor intestinal symptoms of ricin exposure on saturday , browne said . the symptoms have since subsided . civilian workers showed no symptoms , browne said in a statement . `` we take a lot of security measures as you know , '' bloomberg said . `` the men and women that open the mail for example ... they are well trained . '' the second letter to the mayor was opened by mark glaze , director of mayors against illegal guns -- founded and co-chaired by bloomberg -- in washington on sunday . browne 's statement appeared to indicate glaze showed no symptoms . a spokeswoman for the organization declined to comment wednesday . opinion : ricin - almost never deadly . both letters were postmarked in shreveport , louisiana , on may 20 , the american postal workers union said on its website . bloomberg is an outspoken critic of current gun laws . in march , he said nationwide background checks on all gun sales would save lives . `` we know that 's true , because in states that already require background checks on private sales , the rate of women murdered by an intimate partner armed with a gun is 38 % lower than in states that do n't have such background checks , '' he said . fbi spokesman jim margolin told cnn the agency is working to determine from where the letters were sent and who sent them . if inhaled , injected or ingested , less than a pinpoint of ricin can kill a person within 36 to 48 hours because of the failure of the respiratory and circulatory systems . there is no known antidote for the toxin , which is derived from castor beans . it has been included in letters in the past few months sent to president barack obama and other officials . in april , letters were sent to obama ; sen. roger wicker , r-mississippi ; and sadie holland , a judge in lee county , mississippi . james everett dutschke of tupelo , mississippi , has been charged with possession and use of a biological agent in connection with the case . last week , fbi agents arrested matthew ryan buquet after a grand jury charged him with mailing threatening communication to a senior judge in the u.s. district court for the eastern district of washington state . the fbi said in a statement that tests -- conducted by that agency and the spokane regional health district -- showed that a suspicious substance found with the letter was `` active ricin toxin . '' there are no indications the cases are connected . man , 37 , arrested in probe of washington state ricin-laced letters . cnn 's deborah feyerick , jason kessler , lawrence crook iii , carol cratty and mary snow contributed to this report . new : suspicious substance was oily , new york police official says . new : postal union says letters were postmarked in shreveport , louisiana . letters were addressed to bloomberg , one went to an organization he founded . ricin is a toxin that can kill in a matter of days . "
6 | # s5="975 london , england -lrb- cnn -rrb- -- human rights and freedom of the press in china , the detention of terrorist suspects by the united states and russia 's treatment of political dissent are the focus of scrutiny in amnesty international 's annual report , released wednesday , which looks at the state of human rights around the world . amnesty international protestors outside the us supreme court in january dressed as guantanamo bay detainees . the 398-page report comes 60 years after the united nations adopted the universal declaration of human rights , and amnesty says governments still need to act on their promises . `` the biggest threat to the future of human rights is the absence of a shared vision and collective leadership , '' the organization said in a statement . irene khan , amnesty 's secretary-general , said that in particular , `` the human-rights flash points in darfur , zimbabwe , gaza , iraq and myanmar demand immediate attention . '' the report , the group said , `` reveals a world riven by inequality , scarred by discrimination and distorted by political repression . '' according to its count , people are tortured or subject to other ill treatment in at least 81 countries , face unfair trials in at least 54 countries and are not allowed to express themselves freely in at least 77 countries . of the 150 countries and regions listed in the report , amnesty paid particular attention to china , the host of this summer 's olympic games . the group said growing numbers of human rights activists were imprisoned or harassed in china in 2007 , with ethnic and religious minorities -- including tibetans , falun gong practitioners and christians -- repressed or persecuted . death penalty statistics in china are difficult to assess , amnesty said , but based on public reports , the group estimated that at least 470 people were executed in 2007 . amnesty also noted the repression of free speech in china and said censorship of the internet and other media intensified last year . `` the chinese authorities maintained efforts to tightly control the flow of information , '' the report said . `` they decided what topics and news stories could be published , and media outlets were sometimes required to respond within minutes to government directives . the authorities continued to block web sites and to filter internet content based on specified words and topics . '' around 30 journalists and at least 50 others are known to be in prison for posting their views online , amnesty said . amnesty also criticized the death penalty in the united states , where 42 people were executed last year . it noted new jersey 's decision in december to abolish the death penalty made it the first u.s. state in more than 40 years to do away with executions . as it has in previous annual reports , amnesty criticized the detention of hundreds of foreign nationals at the u.s. naval base at guantanamo bay , cuba . `` the usa must close guantanamo detention camp and secret detention centers , prosecute the detainees under fair trial standards or release them , and unequivocally reject the use of torture and ill-treatment , '' amnesty said . the group noted that guantanamo detainees are held indefinitely , most of them without charge and without recourse to u.s. courts . most detainees there are held in isolation in maximum-security facilities , heightening concerns for their physical and mental health , amnesty said . in fact , more is written on the united states than any other country listed in the report . asked about that at a press conference tuesday , khan said , `` we certainly devote a lot of time to sudan , to china , to zimbabwe and other countries . but we look to the u.s. to provide leadership around the world . governments around the world look to the united states as a role model for their own behavior . '' in a lengthy section on iraq , amnesty noted that thousands of civilians , including children , were killed or injured in ongoing sectarian violence during 2007 . `` all sides involved in the fighting committed gross human rights violations , some of which amounted to war crimes and crimes against humanity , '' the report said . abductions , torture and murder , with bodies left in the street , occur daily , and the violence has caused 2 million iraqis to flee to syria , jordan and elsewhere , amnesty said . u.s. forces held some 25,000 detainees `` without charge or trial , '' the group said , and 33 people were executed , `` some after grossly unfair trials . '' in afghanistan , conflict and insecurity aggravated by drought and floods contributed to `` large-scale displacement '' of people throughout the year . `` at least 6,500 people were estimated to have been killed in the context of the conflict , '' the report said . `` violations of international humanitarian and human rights law were committed with impunity by all parties , including afghan and international security forces and insurgent groups . '' russia must show greater tolerance for political dissent , amnesty said . `` the russian authorities were increasingly intolerant of dissent or criticism , branding it ` unpatriotic , ' '' the report said . `` a crackdown on civil and political rights was evident throughout the year and in particular during the run-up to the state duma -lsb- parliament -rsb- elections in december . '' the european court of human rights ruled that russia was responsible for enforced disappearances , torture and extrajudicial executions in 15 judgments relating to the recent conflict in chechnya , amnesty said . there were fewer reported disappearances in the chechen republic in 2007 than in previous years , amnesty said , but continued human rights violations made people reluctant to report abuses . the report also criticized human rights conditions in iran , gaza and myanmar . human rights conditions in zimbabwe continued to decline in 2007 , the report said , `` with an increase in organized violence and torture and restrictions on the rights to freedom of association , assembly and expression . '' members of the main opposition party , the mdc , along with other human rights defenders , were arrested , and many were tortured while in custody , amnesty said . some 4 million people required food aid because of the nation 's deteriorating economy , and victims of forced evictions in 2005 continued to live in `` deplorable conditions '' while president robert mugabe 's government failed to remedy their situation . `` human rights problems are not isolated tragedies , but are like viruses that can infect and spread rapidly , endangering all of us , '' khan said . `` governments today must show the same degree of vision , courage and commitment that led the united nations to adopt the universal declaration of human rights 60 years ago . ''"
7 | # s6="6737 by . eleanor crooks , press association . maria sharapova reached her third successive french open final by battling past eugenie bouchard . sharapova maintained her remarkable record in three-set matches by winning an 18th consecutive deciding set on clay in a 4-6 , 7-5 , 6-2 victory . the russian won her first title at roland garros in 2012 before losing to serena williams 12 months ago . on form : maria sharapova fought back from a set down to overcome a stiff challenge from eugenie bouchard . rising star : eugenie bouchard , 20 , was playing in her second consecutive grand slam semi-final . bouchard , . who was playing in her second straight grand slam semi-final , had lost . comfortably to sharapova in the second round last year and demonstrated . again the huge strides she has made . she possesses the same steely-eyed determination as sharapova and her mental strength is remarkable for a 20-year-old . the . canadian said after beating angelique kerber in the fourth round that . she did not have a best friend in tennis , adding : ' i do n't think the . tennis tour is the place to have friends . for me , it 's all competition . ' it . was a sentence that could well have been written by sharapova so it was . no surprise that this was not a match for the faint-hearted . bouchard . has improved significantly since making the last four at the australian . open in january , hitting the ball a lot more aggressively , and it was . she who struck first with a break for 2-1 . pumped up : sharapova celebrates as she comes back from a set down to seal her place in the final . sharapova . fought back to level at 4-4 but bouchard forged ahead again immediately . and held to take the set , saving a break point with the gutsiest of . backhand winners onto the line . sharapova . had recovered from a set down in both her last two matches against sam . stosur and garbine muguruza and set about doing the same , moving into a . 5-2 lead . but . the russian 's serve , never something to be relied upon , was having an . off day and , serving for the set , she twice double-faulted on set point . rising star : bouchard gets down low to play a forehand as she takes the first set over sharapova . there was also a second-serve ace on a break point for good measure but on her third chance bouchard pounced . the . 20-year-old was unable to resist when sharapova broke again at 5-5 , . though , and this time the seventh seed clinched the set when bouchard . netted a forehand . bouchard . had never lost a grand slam match in which she had won the first set . before but the sense was sharapova 's prowess in deciding sets would be . the crucial factor . scene of success : sharapova will play in her third consecutive french open final on saturday . the russian moved ahead at 3-1 , and for the first time bouchard was making bad mistakes on the big points . she . held for 4-2 , saving two break points , but in the next game missed a . routine forehand and a volley as sharapova moved to within one game of . victory . bouchard . fought on , saving four match points in terrific style , but there was . nothing she could do when a sharapova forehand fizzed off the baseline . after two hours and 27 minutes . french kiss : sharapova acknowledges the roland garros crowd after semi-final victory . sharapova fought back from a set down to beat canadian bouchard . the 2012 champion won 4-6 , 7-5 , 6-2 in two hours and 27 minutes . sharapova will play simona halep in saturday 's final at roland garros . "
8 | # s7="0 editor 's note : in our behind the scenes series , cnn correspondents share their experiences in covering news and analyze the stories behind the events . here , soledad o'brien takes users inside a jail where many of the inmates are mentally ill . an inmate housed on the `` forgotten floor , '' where many mentally ill inmates are housed in miami before trial . miami , florida -lrb- cnn -rrb- -- the ninth floor of the miami-dade pretrial detention facility is dubbed the `` forgotten floor . '' here , inmates with the most severe mental illnesses are incarcerated until they 're ready to appear in court . most often , they face drug charges or charges of assaulting an officer -- charges that judge steven leifman says are usually `` avoidable felonies . '' he says the arrests often result from confrontations with police . mentally ill people often wo n't do what they 're told when police arrive on the scene -- confrontation seems to exacerbate their illness and they become more paranoid , delusional , and less likely to follow directions , according to leifman . so , they end up on the ninth floor severely mentally disturbed , but not getting any real help because they 're in jail . we toured the jail with leifman . he is well known in miami as an advocate for justice and the mentally ill . even though we were not exactly welcomed with open arms by the guards , we were given permission to shoot videotape and tour the floor . go inside the ` forgotten floor ' '' at first , it 's hard to determine where the people are . the prisoners are wearing sleeveless robes . imagine cutting holes for arms and feet in a heavy wool sleeping bag -- that 's kind of what they look like . they 're designed to keep the mentally ill patients from injuring themselves . that 's also why they have no shoes , laces or mattresses . leifman says about one-third of all people in miami-dade county jails are mentally ill . so , he says , the sheer volume is overwhelming the system , and the result is what we see on the ninth floor . of course , it is a jail , so it 's not supposed to be warm and comforting , but the lights glare , the cells are tiny and it 's loud . we see two , sometimes three men -- sometimes in the robes , sometimes naked , lying or sitting in their cells . `` i am the son of the president . you need to get me out of here ! '' one man shouts at me . he is absolutely serious , convinced that help is on the way -- if only he could reach the white house . leifman tells me that these prisoner-patients will often circulate through the system , occasionally stabilizing in a mental hospital , only to return to jail to face their charges . it 's brutally unjust , in his mind , and he has become a strong advocate for changing things in miami . over a meal later , we talk about how things got this way for mental patients . leifman says 200 years ago people were considered `` lunatics '' and they were locked up in jails even if they had no charges against them . they were just considered unfit to be in society . over the years , he says , there was some public outcry , and the mentally ill were moved out of jails and into hospitals . but leifman says many of these mental hospitals were so horrible they were shut down . where did the patients go ? nowhere . the streets . they became , in many cases , the homeless , he says . they never got treatment . leifman says in 1955 there were more than half a million people in state mental hospitals , and today that number has been reduced 90 percent , and 40,000 to 50,000 people are in mental hospitals . the judge says he 's working to change this . starting in 2008 , many inmates who would otherwise have been brought to the `` forgotten floor '' will instead be sent to a new mental health facility -- the first step on a journey toward long-term treatment , not just punishment . leifman says it 's not the complete answer , but it 's a start . leifman says the best part is that it 's a win-win solution . the patients win , the families are relieved , and the state saves money by simply not cycling these prisoners through again and again . and , for leifman , justice is served . e-mail to a friend ."
9 | # s8="21 lagos , nigeria -lrb- reuters -rrb- -- nigeria 's television survival show has been"
10 | # pat1="by .*? published :.*?\. \| \..*? [0-9]+ \. "
11 | # pat2="-lrb- cnn -rrb- -- "
12 | # pat3="\t(.*?-lrb- .*? -rrb- -- )"
13 | # pat4="``|''"
14 | # pat5="by \. .*? \. "
15 | # res=re.sub(pat1,"",s4,1)
16 | # res=re.sub(pat2,"",res,1)
17 | # res=re.sub(pat3,"",res,1)
18 | # res=re.sub(pat4,"",res)
19 | # res=re.sub(pat5,"",res,1)
20 | # print(res)
21 | # import torch
22 | # from torch import nn
23 |
24 | # transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)
25 | # src = torch.rand((10, 32, 512))
26 | # tgt = torch.rand((20, 32, 512))
27 | # out = transformer_model(src, tgt)
28 | # print(out.shape)
29 |
30 | # l=[1,2,3,4]
31 |
32 | # print([i if i!=4 else 5 for i in l])
33 |
34 |
35 | # import utils
36 |
37 | # a=["hello hello","hi world"]
38 | # b=["hello hello","world"]
39 |
40 | # utils.GetRouge(a,b)
41 |
42 | # import torch
43 | # from transformers import T5Tokenizer, T5Model
44 |
45 | # tokenizer=T5Tokenizer.from_pretrained('t5-small')
46 | # text = ['Hello world!', 'Hello python!']
47 | # inputs = tokenizer(text, return_tensors='pt', padding=True)
48 | # print(inputs)
49 |
50 | from transformers import T5Tokenizer, T5ForConditionalGeneration,AdamW
51 | import torch
52 | from settings import *
53 |
54 | tokenizer = T5Tokenizer.from_pretrained(PARAM_DIR+"t5-small")
55 | model = T5ForConditionalGeneration.from_pretrained(PARAM_DIR+"t5-small")
56 |
57 | # the following 2 hyperparameters are task-specific
58 | max_source_length = 512
59 | max_target_length = 128
60 |
61 | # Suppose we have the following 2 training examples:
62 | input_sequence_1 = "Welcome to NYC"
63 | output_sequence_1 = "Bienvenue à NYC"
64 |
65 | input_sequence_2 = "HuggingFace is a company as e dd"
66 | output_sequence_2 = "HuggingFace est une entreprise"
67 |
68 | # encode the inputs
69 | task_prefix = "translate English to French: "
70 | input_sequences = [input_sequence_1, input_sequence_2]
71 | encoding = tokenizer([task_prefix + sequence for sequence in input_sequences],
72 | padding='longest',
73 | max_length=max_source_length,
74 | truncation=True,
75 | return_tensors="pt")
76 | input_ids, attention_mask = encoding.input_ids, encoding.attention_mask
77 |
78 | # encode the targets
79 | target_encoding = tokenizer([output_sequence_1, output_sequence_2],
80 | padding='longest',
81 | max_length=max_target_length,
82 | truncation=True)
83 | labels = target_encoding.input_ids
84 |
85 | # replace padding token id's of the labels by -100
86 | labels = [
87 | [(label if label != tokenizer.pad_token_id else -100) for label in labels_example] for labels_example in labels
88 | ]
89 | labels = torch.tensor(labels)
90 | loss = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels).loss
91 | loss.backward()
92 | opt=AdamW(model.parameters())
93 | opt.step()
94 |
95 | print(loss)
--------------------------------------------------------------------------------
/source/utils.py:
--------------------------------------------------------------------------------
1 | import time
2 | import os
3 | from torch import nn
4 | from torch import optim
5 | from torch.nn.modules.module import Module
6 | from tqdm.std import tqdm
7 | from settings import *
8 | import json
9 | import pickle as pkl
10 | import re
11 | from torch.utils.data.dataset import Dataset
12 | from torch.utils.data.dataloader import DataLoader
13 | import torch
14 | from rouge import Rouge
15 | import models
16 |
17 | ############################### Just run for one time! ###############################
18 | def Preprocess(train_path=DATA_DIR+"train_dataset.csv",test_path=DATA_DIR+"test_dataset.csv"):
19 | '''
20 | 清理数据、划分验证集后重新保存至新文件
21 | '''
22 |
23 | # 数据清洗
24 | def _cleanData(data):
25 | print("数据清洗开始=========================================")
26 |
27 | clean_data=[]
28 | for i,d in tqdm(enumerate(data)):
29 | res=d
30 | for pat in PATTERNS_ONCE:
31 | #################################之后修改
32 | if("\t" in pat):
33 | res=re.sub(pat,"\t",res,1)
34 | else:
35 | res=re.sub(pat,"",res,1)
36 | ####################################
37 | for pat in PATTERNS_ANY:
38 | res=re.sub(pat,"",res)
39 |
40 | clean_data.append(res)
41 |
42 | print("数据清洗完毕=========================================")
43 | return clean_data
44 |
45 | # 将处理后的数据保存为json文件
46 | def _save2Json(data,mode):
47 |
48 |
49 | if mode==2:
50 |
51 | for i in range(len(data)):
52 | source=data[i].split('\t')[1].strip('\n')
53 | if source!='':
54 | dict_data={"text":source,"summary":'no summary'}#测试集没有参考摘要
55 |
56 | with open(new_test_path+str(i)+'.json','w+',encoding='utf-8') as f:
57 | f.write(json.dumps(dict_data,ensure_ascii=False))
58 |
59 |
60 | else:
61 |
62 | for i in range(len(data)):
63 |
64 | if len(data[i].split('\t'))==3:
65 | source_seg=data[i].split("\t")[1]
66 | traget_seg=data[i].split("\t")[2].strip('\n')
67 |
68 |
69 | if source_seg and traget_seg !='':
70 | dict_data={"text":source_seg,"summary":traget_seg}
71 | path=new_train_path
72 | if mode==1:
73 | path= new_val_path
74 | with open(path+str(i)+'.json','w+',encoding='utf-8') as f:
75 | f.write(json.dumps(dict_data,ensure_ascii=False))
76 |
77 |
78 |
79 | with open(train_path,'r',encoding='utf-8') as f:
80 | train_data_all=f.readlines()
81 |
82 | with open(test_path,'r',encoding='utf-8') as f:
83 | test_data=f.readlines()
84 |
85 | # 数据清洗
86 | train_data_all=_cleanData(train_data_all)
87 | test_data=_cleanData(test_data)
88 |
89 | # with open("./1.csv",'w',encoding='utf-8') as f:
90 | # f.writelines(train_data_all)
91 | # with open("./2.csv",'w',encoding='utf-8') as f:
92 | # f.writelines(test_data)
93 | # random.shuffle(train_data_all)
94 |
95 | # 设置新文件路径
96 | new_train_path=os.path.join(DATA_DIR,"new_train/")
97 | new_val_path=os.path.join(DATA_DIR,"new_val/")
98 | new_test_path=os.path.join(DATA_DIR,"new_test/")
99 |
100 | if not os.path.exists(new_train_path):
101 | os.makedirs(new_train_path)
102 |
103 | if not os.path.exists(new_val_path):
104 | os.makedirs(new_val_path)
105 |
106 | if not os.path.exists(new_test_path):
107 | os.makedirs(new_test_path)
108 |
109 | train_data=train_data_all[:8000] #把训练集重新划分为训练子集和验证子集,保证验证集上loss最小的模型,预测测试集
110 | val_data=train_data_all[8000:]
111 |
112 | _save2Json(train_data,TRAIN_FALG)
113 | _save2Json(val_data,VAL_FALG)
114 | _save2Json(test_data,TEST_FALG)
115 |
116 |
117 | def CountFiles(path):
118 | '''
119 | 计算目标文件夹json文件数目
120 | '''
121 | matcher = re.compile(r'[0-9]+\.json')
122 | match = lambda name: bool(matcher.match(name))
123 | names = os.listdir(path)
124 | n_data = len(list(filter(match, names)))
125 | return n_data
126 |
127 | def BuildVocabCounter(data_dir=DATA_DIR):
128 | '''
129 | 统计所有词汇,建立词频表
130 | '''
131 | from collections import Counter
132 |
133 | def GetTokens(path):
134 | n_data=CountFiles(path)
135 | summary_words=[]
136 | source_words=[]
137 | for i in range(n_data):
138 | js_data=json.load(open(os.path.join(path,f"{i}.json"),encoding="utf-8"))
139 | summary=''.join(js_data['summary']).strip()
140 | summary_words.extend(summary.strip().split(' '))
141 |
142 | source=''.join(js_data['text']).strip()
143 | source_words.extend(source.strip().split(' '))
144 |
145 | return source_words+summary_words
146 |
147 | # print(_count_data(data_dir+"new_train"))
148 | vocab_counter=Counter()
149 | vocab_counter.update(t for t in GetTokens(data_dir+"new_train") if t !="")
150 | vocab_counter.update(t for t in GetTokens(data_dir+"new_val") if t !="")
151 | vocab_counter.update(t for t in GetTokens(data_dir+"new_test") if t !="")
152 | # print(vocab_counter.values())
153 |
154 | with open(VOCAB_PATH,"wb") as f:
155 | pkl.dump(vocab_counter,f)
156 |
157 | def MakeVocab(vocab_size=VOCAB_SIZE):
158 | '''
159 | 建立词典,通过vocab_size设置字典大小,将常用词设置到字典即可,其他生僻词汇用''表示
160 | '''
161 | with open(VOCAB_PATH,"rb") as f:
162 | wc=pkl.load(f)
163 | word2idx, idx2word = {}, {}
164 | word2idx[PAD_WORD] = 0
165 | word2idx[UNK_WORD] = 1
166 | word2idx[BOS_WORD] = 2
167 | word2idx[EOS_WORD] = 3
168 | for i, (w, _) in enumerate(wc.most_common(vocab_size), 4):
169 | word2idx[w] = i
170 | for w, i in word2idx.items():
171 | idx2word[i] = w
172 |
173 | with open(WORD_IDX_PATH,"wb") as f:
174 | pkl.dump(word2idx,f)
175 | with open(IDX_WORD_PATH,"wb") as f:
176 | pkl.dump(idx2word,f)
177 |
178 | def GetNumOfLongestSeq():
179 | '''
180 | 找到最长的seq长度,用于padding
181 | '''
182 |
183 | def _findInFolders(path,length):
184 | max_len=0
185 | for i in range(length):
186 | js_data=json.load(open(os.path.join(path,f"{i}.json"),encoding="utf-8"))
187 | l_data=js_data["summary"].split(" ")
188 | l=len(l_data)
189 | if(max_lenthreshold):
254 | if(EOS_NUM in line):
255 | line[threshold-1]=EOS_NUM
256 | return line[:threshold],threshold
257 | return line + [PAD_NUM] * (threshold - len(line)),p_len
258 |
259 | def ReadJson2List(dir,i,label=False):
260 | '''读取单个json文件(一个样本),并按空格分割转换成列表'''
261 |
262 | js_data=json.load(open(os.path.join(dir,f"{i}.json"),encoding="utf-8"))
263 | if label:
264 | return js_data["summary"].split(" ")
265 | return js_data["text"].split(" ")
266 |
267 |
268 | def GetRouge(pred,label):
269 | '''获取ROUGR-L值'''
270 | rouge=Rouge()
271 | rouge_score = rouge.get_scores(pred, label)
272 | rouge_L_f1 = 0
273 | rouge_L_p = 0
274 | rouge_L_r = 0
275 | for d in rouge_score:
276 | rouge_L_f1 += d["rouge-l"]["f"]
277 | rouge_L_p += d["rouge-l"]["p"]
278 | rouge_L_r += d["rouge-l"]["r"]
279 |
280 | return (rouge_L_f1 / len(rouge_score))
281 |
282 | print("rouge_f1:%.2f" % (rouge_L_f1 / len(rouge_score)))
283 | print("rouge_p:%.2f" % (rouge_L_p / len(rouge_score)))
284 | print("rouge_r:%.2f" % (rouge_L_r / len(rouge_score)))
285 |
286 |
287 | # 将数据转换为成batch的Tensor,win平台有bug,多进程不能写在函数里
288 | with open(WORD_IDX_PATH,"rb") as f:
289 | w2i=pkl.load(f)
290 | train_iter=DataLoader(TextDataset(TRAIN_FALG,w2i),shuffle=True,batch_size=BATCH_SZIE,num_workers=8)
291 | val_iter=DataLoader(TextDataset(VAL_FALG,w2i),shuffle=False,batch_size=BATCH_SZIE,num_workers=4)
292 | test_iter=DataLoader(TextDataset(TEST_FALG,w2i),shuffle=False,batch_size=1)
293 |
294 | def Train(net:Module,lr=0.01):
295 | """训练序列到序列模型。"""
296 | from tqdm import tqdm
297 |
298 | def xavier_init_weights(m):
299 | if type(m) == nn.Linear:
300 | nn.init.xavier_uniform_(m.weight)
301 | if type(m) == nn.GRU:
302 | for param in m._flat_weights_names:
303 | if "weight" in param:
304 | nn.init.xavier_uniform_(m._parameters[param])
305 |
306 | net.apply(xavier_init_weights)
307 | net.to(DEVICE)
308 | optimizer = optim.Adam(net.parameters(), lr=lr)
309 | loss = models.MaskedSoftmaxCELoss()
310 |
311 | # 验证集loss降到10000以下时开始保存每轮更低的参数
312 | min_loss=10000
313 | for epoch in range(EPOCHS):
314 | train_loss=[]
315 | val_loss=[]
316 |
317 | net.train()
318 | for batch in tqdm(train_iter):
319 | (enc_X, enc_x_l), (dec_x, dec_x_l), (y,y_l) = [(x[0].to(DEVICE),x[1].to(DEVICE)) for x in batch]
320 |
321 |
322 | pred, _ = net(enc_X, dec_x, enc_x_l)
323 | l = loss(pred, y, y_l).sum()
324 | l.backward()
325 |
326 | optimizer.step()
327 | optimizer.zero_grad()
328 |
329 | with torch.no_grad():
330 | train_loss.append(l.item())
331 |
332 | # 释放显存
333 | torch.cuda.empty_cache()
334 |
335 | net.eval()
336 | with torch.no_grad():
337 | for batch in tqdm(val_iter):
338 | (enc_X, enc_x_l), (dec_x, dec_x_l), (y,y_l) = [(x[0].to(DEVICE),x[1].to(DEVICE)) for x in batch]
339 | pred, _ = net(enc_X, dec_x, enc_x_l)
340 | l = loss(pred, y, y_l).sum()
341 | val_loss.append(l.item())
342 |
343 | # 保存模型参数,秒级时间戳保证唯一性
344 | if(sum(val_loss)