├── CDD_ML_Part_1_Acetylcholinesterase_Bioactivity_Data_Concised.ipynb
├── README.md
├── Stacking_Classifier.ipynb
├── Y_Scrambling_Solubility.ipynb
├── lazypredict.ipynb
├── transformer_pegasus_paraphrase.ipynb
└── vaex.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # Python codes
2 |
3 | ### How to compare machine learning classifiers in 2 lines of code (lazypredict Python library)
4 |
5 | Watch the video [How to compare machine learning classifiers in 2 lines of code (lazypredict Python library)](https://youtu.be/ZdDUwlwJNi0) to see the use of lazypredict in action.
6 |
7 |
8 |
9 | ---
10 |
11 | ### Vaex - Fast data frame for Data Science (Handle billion rows in seconds)
12 |
13 | Watch the video [Vaex - Fast data frame for Data Science (Handle billion rows in seconds)](https://youtu.be/inGjY4cds3Q) to see the use of lazypredict in action.
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/Stacking_Classifier.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Stacking-Classifier.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "toc_visible": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "iHxfnZV17AGx"
21 | },
22 | "source": [
23 | "# **How to stack machine learning models in Python**\n",
24 | "\n",
25 | "Chanin Nantasenamat\n",
26 | "\n",
27 | "[Data Professor](http://youtube.com/dataprofessor), http://youtube.com/dataprofessor"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {
33 | "id": "escsGkWFtzVj"
34 | },
35 | "source": [
36 | "# **Load dataset**"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "metadata": {
42 | "id": "A69jIUa4uq3Y"
43 | },
44 | "source": [
45 | "# Iris dataset\n",
46 | "from sklearn.datasets import load_iris\n",
47 | "X, y = load_iris(return_X_y=True)"
48 | ],
49 | "execution_count": null,
50 | "outputs": []
51 | },
52 | {
53 | "cell_type": "code",
54 | "metadata": {
55 | "id": "g-NFn_UOtuVE"
56 | },
57 | "source": [
58 | "#import pandas as pd\n",
59 | "#b1 = pd.read_csv('PubchemFingerprint_with_class_label.csv')\n",
60 | "#b1"
61 | ],
62 | "execution_count": null,
63 | "outputs": []
64 | },
65 | {
66 | "cell_type": "code",
67 | "metadata": {
68 | "id": "roxPlm8ShaJs"
69 | },
70 | "source": [
71 | "#X = b1.drop('Activity', axis=1)s\n",
72 | "#X"
73 | ],
74 | "execution_count": null,
75 | "outputs": []
76 | },
77 | {
78 | "cell_type": "code",
79 | "metadata": {
80 | "id": "cyAOBjc5hwf_"
81 | },
82 | "source": [
83 | "#y = b1['Activity'].copy()\n",
84 | "#y"
85 | ],
86 | "execution_count": null,
87 | "outputs": []
88 | },
89 | {
90 | "cell_type": "code",
91 | "metadata": {
92 | "colab": {
93 | "base_uri": "https://localhost:8080/"
94 | },
95 | "id": "k9vHDORkwJE5",
96 | "outputId": "afe2ac6b-c430-4c34-b321-d6c9e7400570"
97 | },
98 | "source": [
99 | "# Remove low variance features\n",
100 | "from sklearn.feature_selection import VarianceThreshold\n",
101 | "\n",
102 | "selection = VarianceThreshold(threshold=(0.1)) \n",
103 | "X = selection.fit_transform(X)\n",
104 | "X.shape"
105 | ],
106 | "execution_count": null,
107 | "outputs": [
108 | {
109 | "output_type": "execute_result",
110 | "data": {
111 | "text/plain": [
112 | "(150, 4)"
113 | ]
114 | },
115 | "metadata": {
116 | "tags": []
117 | },
118 | "execution_count": 2
119 | }
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "metadata": {
125 | "id": "lpns5FPdaQgq"
126 | },
127 | "source": [
128 | "# Data splitting\n",
129 | "from sklearn.model_selection import train_test_split\n",
130 | "\n",
131 | "X_train, X_test, y_train, y_test = train_test_split(\n",
132 | " X, y, stratify=y, test_size=0.2, random_state=42\n",
133 | ")"
134 | ],
135 | "execution_count": null,
136 | "outputs": []
137 | },
138 | {
139 | "cell_type": "code",
140 | "metadata": {
141 | "colab": {
142 | "base_uri": "https://localhost:8080/"
143 | },
144 | "id": "GGQmqYf2ObLw",
145 | "outputId": "46f73cc2-584a-4150-d1b3-759bf777c614"
146 | },
147 | "source": [
148 | "X_train.shape, X_test.shape"
149 | ],
150 | "execution_count": null,
151 | "outputs": [
152 | {
153 | "output_type": "execute_result",
154 | "data": {
155 | "text/plain": [
156 | "((120, 4), (30, 4))"
157 | ]
158 | },
159 | "metadata": {
160 | "tags": []
161 | },
162 | "execution_count": 4
163 | }
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "metadata": {
169 | "id": "bwYQXxo7Ox0r"
170 | },
171 | "source": [
172 | "#y_train.value_counts()"
173 | ],
174 | "execution_count": null,
175 | "outputs": []
176 | },
177 | {
178 | "cell_type": "code",
179 | "metadata": {
180 | "id": "mVoIXOmNPWOn"
181 | },
182 | "source": [
183 | "#y_test.value_counts()"
184 | ],
185 | "execution_count": null,
186 | "outputs": []
187 | },
188 | {
189 | "cell_type": "markdown",
190 | "metadata": {
191 | "id": "0UpZfDRyvb5t"
192 | },
193 | "source": [
194 | "# **Build Classification models**"
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "metadata": {
200 | "id": "Q8yCRtbQu5-F"
201 | },
202 | "source": [
203 | "from sklearn.metrics import accuracy_score\n",
204 | "from sklearn.metrics import matthews_corrcoef\n",
205 | "from sklearn.metrics import f1_score\n"
206 | ],
207 | "execution_count": null,
208 | "outputs": []
209 | },
210 | {
211 | "cell_type": "markdown",
212 | "metadata": {
213 | "id": "LswMOe9Y26Nm"
214 | },
215 | "source": [
216 | "**K nearest neighbors**"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "metadata": {
222 | "colab": {
223 | "base_uri": "https://localhost:8080/"
224 | },
225 | "id": "2fI6Ni6i3EAy",
226 | "outputId": "fcd88810-42d7-41d6-d4c4-90d808704f14"
227 | },
228 | "source": [
229 | "from sklearn.neighbors import KNeighborsClassifier\n",
230 | "\n",
231 | "knn = KNeighborsClassifier(3) # Define classifier\n",
232 | "knn.fit(X_train, y_train) # Train model\n",
233 | "\n",
234 | "# Make predictions\n",
235 | "y_train_pred = knn.predict(X_train)\n",
236 | "y_test_pred = knn.predict(X_test)\n",
237 | "\n",
238 | "# Training set performance\n",
239 | "knn_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy\n",
240 | "knn_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC\n",
241 | "knn_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score\n",
242 | "\n",
243 | "# Test set performance\n",
244 | "knn_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy\n",
245 | "knn_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC\n",
246 | "knn_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score\n",
247 | "\n",
248 | "print('Model performance for Training set')\n",
249 | "print('- Accuracy: %s' % knn_train_accuracy)\n",
250 | "print('- MCC: %s' % knn_train_mcc)\n",
251 | "print('- F1 score: %s' % knn_train_f1)\n",
252 | "print('----------------------------------')\n",
253 | "print('Model performance for Test set')\n",
254 | "print('- Accuracy: %s' % knn_test_accuracy)\n",
255 | "print('- MCC: %s' % knn_test_mcc)\n",
256 | "print('- F1 score: %s' % knn_test_f1)"
257 | ],
258 | "execution_count": null,
259 | "outputs": [
260 | {
261 | "output_type": "stream",
262 | "text": [
263 | "Model performance for Training set\n",
264 | "- Accuracy: 0.9583333333333334\n",
265 | "- MCC: 0.9375976715114386\n",
266 | "- F1 score: 0.9583268218992551\n",
267 | "----------------------------------\n",
268 | "Model performance for Test set\n",
269 | "- Accuracy: 1.0\n",
270 | "- MCC: 1.0\n",
271 | "- F1 score: 1.0\n"
272 | ],
273 | "name": "stdout"
274 | }
275 | ]
276 | },
277 | {
278 | "cell_type": "markdown",
279 | "metadata": {
280 | "id": "ojasbTOn4-x-"
281 | },
282 | "source": [
283 | "**Support vector machine (Radial basis function kernel)**"
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "metadata": {
289 | "id": "ot6hHeU04-2j",
290 | "colab": {
291 | "base_uri": "https://localhost:8080/"
292 | },
293 | "outputId": "b95d2e3a-5cd2-47c7-cdc7-87bbd8b9da52"
294 | },
295 | "source": [
296 | "from sklearn.svm import SVC\n",
297 | "\n",
298 | "svm_rbf = SVC(gamma=2, C=1)\n",
299 | "svm_rbf.fit(X_train, y_train)\n",
300 | "\n",
301 | "# Make predictions\n",
302 | "y_train_pred = svm_rbf.predict(X_train)\n",
303 | "y_test_pred = svm_rbf.predict(X_test)\n",
304 | "\n",
305 | "# Training set performance\n",
306 | "svm_rbf_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy\n",
307 | "svm_rbf_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC\n",
308 | "svm_rbf_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score\n",
309 | "\n",
310 | "# Test set performance\n",
311 | "svm_rbf_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy\n",
312 | "svm_rbf_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC\n",
313 | "svm_rbf_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score\n",
314 | "\n",
315 | "print('Model performance for Training set')\n",
316 | "print('- Accuracy: %s' % svm_rbf_train_accuracy)\n",
317 | "print('- MCC: %s' % svm_rbf_train_mcc)\n",
318 | "print('- F1 score: %s' % svm_rbf_train_f1)\n",
319 | "print('----------------------------------')\n",
320 | "print('Model performance for Test set')\n",
321 | "print('- Accuracy: %s' % svm_rbf_test_accuracy)\n",
322 | "print('- MCC: %s' % svm_rbf_test_mcc)\n",
323 | "print('- F1 score: %s' % svm_rbf_test_f1)"
324 | ],
325 | "execution_count": null,
326 | "outputs": [
327 | {
328 | "output_type": "stream",
329 | "text": [
330 | "Model performance for Training set\n",
331 | "- Accuracy: 0.9916666666666667\n",
332 | "- MCC: 0.9876028806587153\n",
333 | "- F1 score: 0.9916653643798509\n",
334 | "----------------------------------\n",
335 | "Model performance for Test set\n",
336 | "- Accuracy: 0.9666666666666667\n",
337 | "- MCC: 0.9515873026942034\n",
338 | "- F1 score: 0.9665831244778613\n"
339 | ],
340 | "name": "stdout"
341 | }
342 | ]
343 | },
344 | {
345 | "cell_type": "markdown",
346 | "metadata": {
347 | "id": "Tus32H-i42PT"
348 | },
349 | "source": [
350 | "**Decision tree**"
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "metadata": {
356 | "id": "d3YJF0rz44Ar",
357 | "colab": {
358 | "base_uri": "https://localhost:8080/"
359 | },
360 | "outputId": "198405ac-af31-4eb8-d773-5e00b350ad00"
361 | },
362 | "source": [
363 | "from sklearn.tree import DecisionTreeClassifier\n",
364 | "\n",
365 | "dt = DecisionTreeClassifier(max_depth=5) # Define classifier\n",
366 | "dt.fit(X_train, y_train) # Train model\n",
367 | "\n",
368 | "# Make predictions\n",
369 | "y_train_pred = dt.predict(X_train)\n",
370 | "y_test_pred = dt.predict(X_test)\n",
371 | "\n",
372 | "# Training set performance\n",
373 | "dt_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy\n",
374 | "dt_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC\n",
375 | "dt_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score\n",
376 | "\n",
377 | "# Test set performance\n",
378 | "dt_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy\n",
379 | "dt_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC\n",
380 | "dt_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score\n",
381 | "\n",
382 | "print('Model performance for Training set')\n",
383 | "print('- Accuracy: %s' % dt_train_accuracy)\n",
384 | "print('- MCC: %s' % dt_train_mcc)\n",
385 | "print('- F1 score: %s' % dt_train_f1)\n",
386 | "print('----------------------------------')\n",
387 | "print('Model performance for Test set')\n",
388 | "print('- Accuracy: %s' % dt_test_accuracy)\n",
389 | "print('- MCC: %s' % dt_test_mcc)\n",
390 | "print('- F1 score: %s' % dt_test_f1)"
391 | ],
392 | "execution_count": null,
393 | "outputs": [
394 | {
395 | "output_type": "stream",
396 | "text": [
397 | "Model performance for Training set\n",
398 | "- Accuracy: 1.0\n",
399 | "- MCC: 1.0\n",
400 | "- F1 score: 1.0\n",
401 | "----------------------------------\n",
402 | "Model performance for Test set\n",
403 | "- Accuracy: 0.9333333333333333\n",
404 | "- MCC: 0.9\n",
405 | "- F1 score: 0.9333333333333333\n"
406 | ],
407 | "name": "stdout"
408 | }
409 | ]
410 | },
411 | {
412 | "cell_type": "markdown",
413 | "metadata": {
414 | "id": "XXd2iTxuviDb"
415 | },
416 | "source": [
417 | "**Random forest**"
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "metadata": {
423 | "colab": {
424 | "base_uri": "https://localhost:8080/"
425 | },
426 | "id": "a4iahxJWvhVu",
427 | "outputId": "e4466663-02b7-4e55-9f55-01f568b79a17"
428 | },
429 | "source": [
430 | "from sklearn.ensemble import RandomForestClassifier\n",
431 | "\n",
432 | "rf = RandomForestClassifier(n_estimators=10) # Define classifier\n",
433 | "rf.fit(X_train, y_train) # Train model\n",
434 | "\n",
435 | "# Make predictions\n",
436 | "y_train_pred = rf.predict(X_train)\n",
437 | "y_test_pred = rf.predict(X_test)\n",
438 | "\n",
439 | "# Training set performance\n",
440 | "rf_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy\n",
441 | "rf_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC\n",
442 | "rf_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score\n",
443 | "\n",
444 | "# Test set performance\n",
445 | "rf_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy\n",
446 | "rf_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC\n",
447 | "rf_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score\n",
448 | "\n",
449 | "print('Model performance for Training set')\n",
450 | "print('- Accuracy: %s' % rf_train_accuracy)\n",
451 | "print('- MCC: %s' % rf_train_mcc)\n",
452 | "print('- F1 score: %s' % rf_train_f1)\n",
453 | "print('----------------------------------')\n",
454 | "print('Model performance for Test set')\n",
455 | "print('- Accuracy: %s' % rf_test_accuracy)\n",
456 | "print('- MCC: %s' % rf_test_mcc)\n",
457 | "print('- F1 score: %s' % rf_test_f1)"
458 | ],
459 | "execution_count": null,
460 | "outputs": [
461 | {
462 | "output_type": "stream",
463 | "text": [
464 | "Model performance for Training set\n",
465 | "- Accuracy: 0.9916666666666667\n",
466 | "- MCC: 0.9876028806587153\n",
467 | "- F1 score: 0.9916653643798509\n",
468 | "----------------------------------\n",
469 | "Model performance for Test set\n",
470 | "- Accuracy: 0.9333333333333333\n",
471 | "- MCC: 0.9\n",
472 | "- F1 score: 0.9333333333333333\n"
473 | ],
474 | "name": "stdout"
475 | }
476 | ]
477 | },
478 | {
479 | "cell_type": "markdown",
480 | "metadata": {
481 | "id": "G_H6KkezwfH0"
482 | },
483 | "source": [
484 | "**Neural network**"
485 | ]
486 | },
487 | {
488 | "cell_type": "code",
489 | "metadata": {
490 | "colab": {
491 | "base_uri": "https://localhost:8080/"
492 | },
493 | "id": "06mNcVuUwrpi",
494 | "outputId": "f4e49b00-d3ea-4501-8669-cab30141e7f3"
495 | },
496 | "source": [
497 | "from sklearn.neural_network import MLPClassifier\n",
498 | "\n",
499 | "mlp = MLPClassifier(alpha=1, max_iter=1000)\n",
500 | "mlp.fit(X_train, y_train)\n",
501 | "\n",
502 | "# Make predictions\n",
503 | "y_train_pred = mlp.predict(X_train)\n",
504 | "y_test_pred = mlp.predict(X_test)\n",
505 | "\n",
506 | "# Training set performance\n",
507 | "mlp_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy\n",
508 | "mlp_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC\n",
509 | "mlp_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score\n",
510 | "\n",
511 | "# Test set performance\n",
512 | "mlp_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy\n",
513 | "mlp_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC\n",
514 | "mlp_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score\n",
515 | "\n",
516 | "print('Model performance for Training set')\n",
517 | "print('- Accuracy: %s' % mlp_train_accuracy)\n",
518 | "print('- MCC: %s' % mlp_train_mcc)\n",
519 | "print('- F1 score: %s' % mlp_train_f1)\n",
520 | "print('----------------------------------')\n",
521 | "print('Model performance for Test set')\n",
522 | "print('- Accuracy: %s' % mlp_test_accuracy)\n",
523 | "print('- MCC: %s' % mlp_test_mcc)\n",
524 | "print('- F1 score: %s' % mlp_test_f1)"
525 | ],
526 | "execution_count": null,
527 | "outputs": [
528 | {
529 | "output_type": "stream",
530 | "text": [
531 | "Model performance for Training set\n",
532 | "- Accuracy: 0.9833333333333333\n",
533 | "- MCC: 0.9754065040827025\n",
534 | "- F1 score: 0.9833229101521785\n",
535 | "----------------------------------\n",
536 | "Model performance for Test set\n",
537 | "- Accuracy: 1.0\n",
538 | "- MCC: 1.0\n",
539 | "- F1 score: 1.0\n"
540 | ],
541 | "name": "stdout"
542 | }
543 | ]
544 | },
545 | {
546 | "cell_type": "markdown",
547 | "metadata": {
548 | "id": "SPP95Rpyt8go"
549 | },
550 | "source": [
551 | "# **Build Stacked model**"
552 | ]
553 | },
554 | {
555 | "cell_type": "code",
556 | "metadata": {
557 | "colab": {
558 | "base_uri": "https://localhost:8080/"
559 | },
560 | "id": "WO_qR3303OUp",
561 | "outputId": "985e12c1-9e17-4bd1-f9ca-8d705b47afd0"
562 | },
563 | "source": [
564 | "# Define estimators\n",
565 | "from sklearn.ensemble import StackingClassifier\n",
566 | "from sklearn.linear_model import LogisticRegression\n",
567 | "\n",
568 | "estimator_list = [\n",
569 | " ('knn',knn),\n",
570 | " ('svm_rbf',svm_rbf),\n",
571 | " ('dt',dt),\n",
572 | " ('rf',rf),\n",
573 | " ('mlp',mlp) ]\n",
574 | "\n",
575 | "# Build stack model\n",
576 | "stack_model = StackingClassifier(\n",
577 | " estimators=estimator_list, final_estimator=LogisticRegression()\n",
578 | ")\n",
579 | "\n",
580 | "# Train stacked model\n",
581 | "stack_model.fit(X_train, y_train)\n",
582 | "\n",
583 | "# Make predictions\n",
584 | "y_train_pred = stack_model.predict(X_train)\n",
585 | "y_test_pred = stack_model.predict(X_test)\n",
586 | "\n",
587 | "# Training set model performance\n",
588 | "stack_model_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy\n",
589 | "stack_model_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC\n",
590 | "stack_model_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score\n",
591 | "\n",
592 | "# Test set model performance\n",
593 | "stack_model_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy\n",
594 | "stack_model_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC\n",
595 | "stack_model_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score\n",
596 | "\n",
597 | "print('Model performance for Training set')\n",
598 | "print('- Accuracy: %s' % stack_model_train_accuracy)\n",
599 | "print('- MCC: %s' % stack_model_train_mcc)\n",
600 | "print('- F1 score: %s' % stack_model_train_f1)\n",
601 | "print('----------------------------------')\n",
602 | "print('Model performance for Test set')\n",
603 | "print('- Accuracy: %s' % stack_model_test_accuracy)\n",
604 | "print('- MCC: %s' % stack_model_test_mcc)\n",
605 | "print('- F1 score: %s' % stack_model_test_f1)"
606 | ],
607 | "execution_count": null,
608 | "outputs": [
609 | {
610 | "output_type": "stream",
611 | "text": [
612 | "Model performance for Training set\n",
613 | "- Accuracy: 0.9916666666666667\n",
614 | "- MCC: 0.9876028806587153\n",
615 | "- F1 score: 0.9916653643798509\n",
616 | "----------------------------------\n",
617 | "Model performance for Test set\n",
618 | "- Accuracy: 1.0\n",
619 | "- MCC: 1.0\n",
620 | "- F1 score: 1.0\n"
621 | ],
622 | "name": "stdout"
623 | }
624 | ]
625 | },
626 | {
627 | "cell_type": "markdown",
628 | "metadata": {
629 | "id": "5Az-v5Tmden0"
630 | },
631 | "source": [
632 | "# **Results**"
633 | ]
634 | },
635 | {
636 | "cell_type": "code",
637 | "metadata": {
638 | "id": "Pr_A9dIUdhes"
639 | },
640 | "source": [
641 | "acc_train_list = {'knn':knn_train_accuracy,\n",
642 | "'svm_rbf': svm_rbf_train_accuracy,\n",
643 | "'dt': dt_train_accuracy,\n",
644 | "'rf': rf_train_accuracy,\n",
645 | "'mlp': mlp_train_accuracy,\n",
646 | "'stack': stack_model_train_accuracy}\n",
647 | "\n",
648 | "mcc_train_list = {'knn':knn_train_mcc,\n",
649 | "'svm_rbf': svm_rbf_train_mcc,\n",
650 | "'dt': dt_train_mcc,\n",
651 | "'rf': rf_train_mcc,\n",
652 | "'mlp': mlp_train_mcc,\n",
653 | "'stack': stack_model_train_mcc}\n",
654 | "\n",
655 | "f1_train_list = {'knn':knn_train_f1,\n",
656 | "'svm_rbf': svm_rbf_train_f1,\n",
657 | "'dt': dt_train_f1,\n",
658 | "'rf': rf_train_f1,\n",
659 | "'mlp': mlp_train_f1,\n",
660 | "'stack': stack_model_train_f1}"
661 | ],
662 | "execution_count": null,
663 | "outputs": []
664 | },
665 | {
666 | "cell_type": "code",
667 | "metadata": {
668 | "id": "RrUnYrWj3p-s",
669 | "colab": {
670 | "base_uri": "https://localhost:8080/"
671 | },
672 | "outputId": "8e74df8b-3661-42f1-c339-3a288e28c3de"
673 | },
674 | "source": [
675 | "mcc_train_list"
676 | ],
677 | "execution_count": null,
678 | "outputs": [
679 | {
680 | "output_type": "execute_result",
681 | "data": {
682 | "text/plain": [
683 | "{'dt': 1.0,\n",
684 | " 'knn': 0.9375976715114386,\n",
685 | " 'mlp': 0.9754065040827025,\n",
686 | " 'rf': 0.9876028806587153,\n",
687 | " 'stack': 0.9876028806587153,\n",
688 | " 'svm_rbf': 0.9876028806587153}"
689 | ]
690 | },
691 | "metadata": {
692 | "tags": []
693 | },
694 | "execution_count": 14
695 | }
696 | ]
697 | },
698 | {
699 | "cell_type": "code",
700 | "metadata": {
701 | "id": "xPIcIXOte2fC",
702 | "colab": {
703 | "base_uri": "https://localhost:8080/",
704 | "height": 233
705 | },
706 | "outputId": "77111576-ece3-4e8f-ed8f-947860b2868b"
707 | },
708 | "source": [
709 | "import pandas as pd\n",
710 | "\n",
711 | "acc_df = pd.DataFrame.from_dict(acc_train_list, orient='index', columns=['Accuracy'])\n",
712 | "mcc_df = pd.DataFrame.from_dict(mcc_train_list, orient='index', columns=['MCC'])\n",
713 | "f1_df = pd.DataFrame.from_dict(f1_train_list, orient='index', columns=['F1'])\n",
714 | "df = pd.concat([acc_df, mcc_df, f1_df], axis=1)\n",
715 | "df"
716 | ],
717 | "execution_count": null,
718 | "outputs": [
719 | {
720 | "output_type": "execute_result",
721 | "data": {
722 | "text/html": [
723 | "
\n",
724 | "\n",
737 | "
\n",
738 | " \n",
739 | " \n",
740 | " | \n",
741 | " Accuracy | \n",
742 | " MCC | \n",
743 | " F1 | \n",
744 | "
\n",
745 | " \n",
746 | " \n",
747 | " \n",
748 | " knn | \n",
749 | " 0.958333 | \n",
750 | " 0.937598 | \n",
751 | " 0.958327 | \n",
752 | "
\n",
753 | " \n",
754 | " svm_rbf | \n",
755 | " 0.991667 | \n",
756 | " 0.987603 | \n",
757 | " 0.991665 | \n",
758 | "
\n",
759 | " \n",
760 | " dt | \n",
761 | " 1.000000 | \n",
762 | " 1.000000 | \n",
763 | " 1.000000 | \n",
764 | "
\n",
765 | " \n",
766 | " rf | \n",
767 | " 0.991667 | \n",
768 | " 0.987603 | \n",
769 | " 0.991665 | \n",
770 | "
\n",
771 | " \n",
772 | " mlp | \n",
773 | " 0.983333 | \n",
774 | " 0.975407 | \n",
775 | " 0.983323 | \n",
776 | "
\n",
777 | " \n",
778 | " stack | \n",
779 | " 0.991667 | \n",
780 | " 0.987603 | \n",
781 | " 0.991665 | \n",
782 | "
\n",
783 | " \n",
784 | "
\n",
785 | "
"
786 | ],
787 | "text/plain": [
788 | " Accuracy MCC F1\n",
789 | "knn 0.958333 0.937598 0.958327\n",
790 | "svm_rbf 0.991667 0.987603 0.991665\n",
791 | "dt 1.000000 1.000000 1.000000\n",
792 | "rf 0.991667 0.987603 0.991665\n",
793 | "mlp 0.983333 0.975407 0.983323\n",
794 | "stack 0.991667 0.987603 0.991665"
795 | ]
796 | },
797 | "metadata": {
798 | "tags": []
799 | },
800 | "execution_count": 21
801 | }
802 | ]
803 | },
804 | {
805 | "cell_type": "code",
806 | "metadata": {
807 | "id": "IVz6u1opkzyw"
808 | },
809 | "source": [
810 | "df.to_csv('results.csv')"
811 | ],
812 | "execution_count": null,
813 | "outputs": []
814 | }
815 | ]
816 | }
--------------------------------------------------------------------------------
/Y_Scrambling_Solubility.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Y-Scrambling-Solubility.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": []
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | }
14 | },
15 | "cells": [
16 | {
17 | "cell_type": "markdown",
18 | "metadata": {
19 | "id": "QQHZHevuXdEy"
20 | },
21 | "source": [
22 | "# **Y-Scrambling for Modeling of Solubility Dataset**\n",
23 | "\n",
24 | "Watch the [**YouTube video**](https://youtu.be/gKCDSiRRyUo) accompanying this Jupyter notebook\n",
25 | "\n",
26 | "*Y column is scrambled, shuffled or permutated. In plain English: \"change the order of\".*\n",
27 | "\n",
28 | "Chanin Nantasenamat\n",
29 | "\n",
30 | "*Data Professor YouTube channel, http://youtube.com/dataprofessor*"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {
36 | "id": "g1qtHa0zXfWM"
37 | },
38 | "source": [
39 | "# Read in data"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "metadata": {
45 | "id": "9MdfbvFKXtXq"
46 | },
47 | "source": [
48 | "import pandas as pd"
49 | ],
50 | "execution_count": null,
51 | "outputs": []
52 | },
53 | {
54 | "cell_type": "code",
55 | "metadata": {
56 | "id": "nerGP0fCXfgP",
57 | "colab": {
58 | "base_uri": "https://localhost:8080/",
59 | "height": 402
60 | },
61 | "outputId": "089ac535-f433-4428-ba5f-1ecd0efb5bc9"
62 | },
63 | "source": [
64 | "delaney_with_descriptors_url = 'https://raw.githubusercontent.com/dataprofessor/data/master/delaney_solubility_with_descriptors.csv'\n",
65 | "dataset = pd.read_csv(delaney_with_descriptors_url)\n",
66 | "dataset"
67 | ],
68 | "execution_count": null,
69 | "outputs": [
70 | {
71 | "output_type": "execute_result",
72 | "data": {
73 | "text/html": [
74 | "\n",
75 | "\n",
88 | "
\n",
89 | " \n",
90 | " \n",
91 | " | \n",
92 | " MolLogP | \n",
93 | " MolWt | \n",
94 | " NumRotatableBonds | \n",
95 | " AromaticProportion | \n",
96 | " logS | \n",
97 | "
\n",
98 | " \n",
99 | " \n",
100 | " \n",
101 | " 0 | \n",
102 | " 2.59540 | \n",
103 | " 167.850 | \n",
104 | " 0.0 | \n",
105 | " 0.000000 | \n",
106 | " -2.180 | \n",
107 | "
\n",
108 | " \n",
109 | " 1 | \n",
110 | " 2.37650 | \n",
111 | " 133.405 | \n",
112 | " 0.0 | \n",
113 | " 0.000000 | \n",
114 | " -2.000 | \n",
115 | "
\n",
116 | " \n",
117 | " 2 | \n",
118 | " 2.59380 | \n",
119 | " 167.850 | \n",
120 | " 1.0 | \n",
121 | " 0.000000 | \n",
122 | " -1.740 | \n",
123 | "
\n",
124 | " \n",
125 | " 3 | \n",
126 | " 2.02890 | \n",
127 | " 133.405 | \n",
128 | " 1.0 | \n",
129 | " 0.000000 | \n",
130 | " -1.480 | \n",
131 | "
\n",
132 | " \n",
133 | " 4 | \n",
134 | " 2.91890 | \n",
135 | " 187.375 | \n",
136 | " 1.0 | \n",
137 | " 0.000000 | \n",
138 | " -3.040 | \n",
139 | "
\n",
140 | " \n",
141 | " ... | \n",
142 | " ... | \n",
143 | " ... | \n",
144 | " ... | \n",
145 | " ... | \n",
146 | " ... | \n",
147 | "
\n",
148 | " \n",
149 | " 1139 | \n",
150 | " 1.98820 | \n",
151 | " 287.343 | \n",
152 | " 8.0 | \n",
153 | " 0.000000 | \n",
154 | " 1.144 | \n",
155 | "
\n",
156 | " \n",
157 | " 1140 | \n",
158 | " 3.42130 | \n",
159 | " 286.114 | \n",
160 | " 2.0 | \n",
161 | " 0.333333 | \n",
162 | " -4.925 | \n",
163 | "
\n",
164 | " \n",
165 | " 1141 | \n",
166 | " 3.60960 | \n",
167 | " 308.333 | \n",
168 | " 4.0 | \n",
169 | " 0.695652 | \n",
170 | " -3.893 | \n",
171 | "
\n",
172 | " \n",
173 | " 1142 | \n",
174 | " 2.56214 | \n",
175 | " 354.815 | \n",
176 | " 3.0 | \n",
177 | " 0.521739 | \n",
178 | " -3.790 | \n",
179 | "
\n",
180 | " \n",
181 | " 1143 | \n",
182 | " 2.02164 | \n",
183 | " 179.219 | \n",
184 | " 1.0 | \n",
185 | " 0.461538 | \n",
186 | " -2.581 | \n",
187 | "
\n",
188 | " \n",
189 | "
\n",
190 | "
1144 rows × 5 columns
\n",
191 | "
"
192 | ],
193 | "text/plain": [
194 | " MolLogP MolWt NumRotatableBonds AromaticProportion logS\n",
195 | "0 2.59540 167.850 0.0 0.000000 -2.180\n",
196 | "1 2.37650 133.405 0.0 0.000000 -2.000\n",
197 | "2 2.59380 167.850 1.0 0.000000 -1.740\n",
198 | "3 2.02890 133.405 1.0 0.000000 -1.480\n",
199 | "4 2.91890 187.375 1.0 0.000000 -3.040\n",
200 | "... ... ... ... ... ...\n",
201 | "1139 1.98820 287.343 8.0 0.000000 1.144\n",
202 | "1140 3.42130 286.114 2.0 0.333333 -4.925\n",
203 | "1141 3.60960 308.333 4.0 0.695652 -3.893\n",
204 | "1142 2.56214 354.815 3.0 0.521739 -3.790\n",
205 | "1143 2.02164 179.219 1.0 0.461538 -2.581\n",
206 | "\n",
207 | "[1144 rows x 5 columns]"
208 | ]
209 | },
210 | "metadata": {
211 | "tags": []
212 | },
213 | "execution_count": 42
214 | }
215 | ]
216 | },
217 | {
218 | "cell_type": "markdown",
219 | "metadata": {
220 | "id": "QVue8cSEtYI5"
221 | },
222 | "source": [
223 | "# Y-Scrambling"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "metadata": {
229 | "id": "IlL9mp8EdbFm"
230 | },
231 | "source": [
232 | "# Data set\n",
233 | "X = dataset.drop(['logS'], axis=1)\n",
234 | "Y = dataset.iloc[:,-1]"
235 | ],
236 | "execution_count": null,
237 | "outputs": []
238 | },
239 | {
240 | "cell_type": "code",
241 | "metadata": {
242 | "id": "YuXAKJhrtbbK"
243 | },
244 | "source": [
245 | "Y.sample(frac=1, replace=False, random_state=0)"
246 | ],
247 | "execution_count": null,
248 | "outputs": []
249 | },
250 | {
251 | "cell_type": "markdown",
252 | "metadata": {
253 | "id": "MVC1BjivfWXo"
254 | },
255 | "source": [
256 | "# Original vs Y-scrambled model"
257 | ]
258 | },
259 | {
260 | "cell_type": "markdown",
261 | "metadata": {
262 | "id": "HobSLiRtgWza"
263 | },
264 | "source": [
265 | "### Model from original X-Y pairs"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "metadata": {
271 | "colab": {
272 | "base_uri": "https://localhost:8080/"
273 | },
274 | "id": "L8yWh2hRxDgo",
275 | "outputId": "d5fa2053-9842-42d9-fd9c-e5fb2fa8c3da"
276 | },
277 | "source": [
278 | "# Model from original X-Y pairs\n",
279 | "from sklearn.model_selection import train_test_split\n",
280 | "from sklearn.linear_model import LinearRegression\n",
281 | "from sklearn.metrics import r2_score\n",
282 | "\n",
283 | "# Data set\n",
284 | "X = dataset.drop(['logS'], axis=1)\n",
285 | "Y = dataset.iloc[:,-1]\n",
286 | "# Data split\n",
287 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)\n",
288 | "# Y-Shuffling\n",
289 | "\n",
290 | "# Model building\n",
291 | "model = LinearRegression()\n",
292 | "model.fit(X_train, Y_train)\n",
293 | "# Make prediction\n",
294 | "Y_train_pred = model.predict(X_train)\n",
295 | "# Compute R2 score\n",
296 | "r2a = r2_score(Y_train, Y_train_pred)\n",
297 | "# Print R2 score\n",
298 | "r2a"
299 | ],
300 | "execution_count": null,
301 | "outputs": [
302 | {
303 | "output_type": "execute_result",
304 | "data": {
305 | "text/plain": [
306 | "0.7692295963594564"
307 | ]
308 | },
309 | "metadata": {
310 | "tags": []
311 | },
312 | "execution_count": 55
313 | }
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "metadata": {
319 | "colab": {
320 | "base_uri": "https://localhost:8080/",
321 | "height": 351
322 | },
323 | "id": "Mxg3d3aRgCTa",
324 | "outputId": "6bb4a994-23e0-4472-bdb8-15938416e9ef"
325 | },
326 | "source": [
327 | "# Plot of predicted vs actual\n",
328 | "import matplotlib.pyplot as plt\n",
329 | "import numpy as np\n",
330 | "\n",
331 | "plt.figure(figsize=(5,5))\n",
332 | "plt.scatter(x=Y_train, y=Y_train_pred, c=\"#7CAE00\", alpha=0.3)\n",
333 | "\n",
334 | "# Add trendline\n",
335 | "# https://stackoverflow.com/questions/26447191/how-to-add-trendline-in-python-matplotlib-dot-scatter-graphs\n",
336 | "z = np.polyfit(Y_train, Y_train_pred, 1)\n",
337 | "p = np.poly1d(z)\n",
338 | "\n",
339 | "plt.plot(Y,p(Y),\"#F8766D\")\n",
340 | "plt.ylabel('Predicted LogS')\n",
341 | "plt.xlabel('Experimental LogS')\n"
342 | ],
343 | "execution_count": null,
344 | "outputs": [
345 | {
346 | "output_type": "execute_result",
347 | "data": {
348 | "text/plain": [
349 | "Text(0.5, 0, 'Experimental LogS')"
350 | ]
351 | },
352 | "metadata": {
353 | "tags": []
354 | },
355 | "execution_count": 45
356 | },
357 | {
358 | "output_type": "display_data",
359 | "data": {
360 | "image/png": "\n",
361 | "text/plain": [
362 | ""
363 | ]
364 | },
365 | "metadata": {
366 | "tags": []
367 | }
368 | }
369 | ]
370 | },
371 | {
372 | "cell_type": "markdown",
373 | "metadata": {
374 | "id": "nzbvojBOgZSQ"
375 | },
376 | "source": [
377 | "### Build Y-scrambled models"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "metadata": {
383 | "colab": {
384 | "base_uri": "https://localhost:8080/"
385 | },
386 | "id": "74o3fUjguXdD",
387 | "outputId": "1f188ec0-f952-4429-b796-f148d2e989ba"
388 | },
389 | "source": [
390 | "# Build Y-scrambled models\n",
391 | "from sklearn.model_selection import train_test_split\n",
392 | "from sklearn.linear_model import LinearRegression\n",
393 | "from sklearn.metrics import r2_score\n",
394 | "\n",
395 | "r2_train_list = []\n",
396 | "\n",
397 | "# Data set\n",
398 | "X = dataset.drop(['logS'], axis=1)\n",
399 | "Y = dataset.iloc[:,-1]\n",
400 | "# Data split\n",
401 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)\n",
402 | "\n",
403 | "for i in range(1000):\n",
404 | " # Y-Shuffling\n",
405 | " Y_train = Y_train.sample(frac=1, replace=False, random_state=i)\n",
406 | " # Model building\n",
407 | " model = LinearRegression()\n",
408 | " model.fit(X_train, Y_train)\n",
409 | " # Make prediction\n",
410 | " Y_train_pred = model.predict(X_train)\n",
411 | " # Compute R2 score\n",
412 | " r2b = r2_score(Y_train, Y_train_pred)\n",
413 | " r2_train_list.append(r2b)\n",
414 | "# Print R2 score\n",
415 | "r2_train_list"
416 | ],
417 | "execution_count": null,
418 | "outputs": [
419 | {
420 | "output_type": "execute_result",
421 | "data": {
422 | "text/plain": [
423 | "[0.005000466583799712,\n",
424 | " 0.0007344012911356668,\n",
425 | " 0.0015350025013735058,\n",
426 | " 0.008489665717226091,\n",
427 | " 0.003915607299682988,\n",
428 | " 0.004548790428902105,\n",
429 | " 0.0008429663446728464,\n",
430 | " 0.001615778039415705,\n",
431 | " 0.006630020361143352,\n",
432 | " 0.001819193577353495,\n",
433 | " 0.003317010494101269,\n",
434 | " 0.0018133191135055737,\n",
435 | " 0.0030177365815898805,\n",
436 | " 0.002978056982963273,\n",
437 | " 0.0032133843178231425,\n",
438 | " 0.007567690178066466,\n",
439 | " 0.01087936035200976,\n",
440 | " 0.008827238527726822,\n",
441 | " 0.004009406151912254,\n",
442 | " 0.0025264174317727317,\n",
443 | " 0.00024635037369347135,\n",
444 | " 0.0038096144515310515,\n",
445 | " 0.0021811896733753278,\n",
446 | " 0.003952612637298625,\n",
447 | " 0.0007547520675049713,\n",
448 | " 0.0026415819692209297,\n",
449 | " 0.004859929867937374,\n",
450 | " 0.005013421614001201,\n",
451 | " 0.003921301914299069,\n",
452 | " 0.008410582384709042,\n",
453 | " 0.0031573997486421534,\n",
454 | " 0.0006141583859642363,\n",
455 | " 0.0037322043369546476,\n",
456 | " 0.0014226823214904538,\n",
457 | " 0.002413831601759009,\n",
458 | " 0.0036567985620454824,\n",
459 | " 0.005270657997406225,\n",
460 | " 0.0045796381916292095,\n",
461 | " 0.00498831882527051,\n",
462 | " 0.010597172477750827,\n",
463 | " 0.00871373552960275,\n",
464 | " 0.003948403001362344,\n",
465 | " 0.002950385888369267,\n",
466 | " 0.007037758573460229,\n",
467 | " 0.010832059826852425,\n",
468 | " 0.008131938903896963,\n",
469 | " 0.007999034720979292,\n",
470 | " 0.008638599076709608,\n",
471 | " 0.0032013595978932496,\n",
472 | " 0.0013196311506521452,\n",
473 | " 0.0013291023473434027,\n",
474 | " 0.007323949668804786,\n",
475 | " 0.0018985354897567142,\n",
476 | " 0.0069323185799359255,\n",
477 | " 0.006511403844476016,\n",
478 | " 0.004377721686534275,\n",
479 | " 0.007408323038023079,\n",
480 | " 0.003006078037850468,\n",
481 | " 0.0026539799904796713,\n",
482 | " 0.0047386951379589926,\n",
483 | " 0.002689307102032523,\n",
484 | " 0.004177588941504995,\n",
485 | " 0.00643086206709198,\n",
486 | " 0.00216643345359524,\n",
487 | " 0.0023134271335751855,\n",
488 | " 0.003917587881594908,\n",
489 | " 0.002033774930538357,\n",
490 | " 0.006934880770625096,\n",
491 | " 0.004939442512855385,\n",
492 | " 0.0035957257725197644,\n",
493 | " 0.011866786848136246,\n",
494 | " 0.0011654083533991777,\n",
495 | " 0.011894508579199603,\n",
496 | " 0.002228361747336871,\n",
497 | " 0.003919615555792011,\n",
498 | " 0.001594547420916137,\n",
499 | " 0.011310779297832307,\n",
500 | " 0.012481781863064145,\n",
501 | " 0.004662216529226493,\n",
502 | " 0.00495312966467254,\n",
503 | " 0.0026013848148636054,\n",
504 | " 0.0015124614629126176,\n",
505 | " 0.01300198657434204,\n",
506 | " 0.0005989494519624827,\n",
507 | " 0.009082201721269834,\n",
508 | " 0.00674381172152505,\n",
509 | " 0.0015830667623750738,\n",
510 | " 0.001511838766250162,\n",
511 | " 0.00867974725822529,\n",
512 | " 0.002096070366515379,\n",
513 | " 0.006354930012505755,\n",
514 | " 0.003055124811773724,\n",
515 | " 0.007620761644619445,\n",
516 | " 0.0038545061781107837,\n",
517 | " 0.002802180919902697,\n",
518 | " 0.00884773788121429,\n",
519 | " 0.009841780056854876,\n",
520 | " 0.003165536483330178,\n",
521 | " 0.004197725292167331,\n",
522 | " 0.0012869109626193476,\n",
523 | " 0.0006692724037677955,\n",
524 | " 0.005040700224592909,\n",
525 | " 0.003662115581530734,\n",
526 | " 0.004253251306182593,\n",
527 | " 0.0013729422433721439,\n",
528 | " 0.001097790922019115,\n",
529 | " 0.00443412197164339,\n",
530 | " 0.0022810775412960282,\n",
531 | " 0.0014446195051585065,\n",
532 | " 0.010303478251273535,\n",
533 | " 0.0059565573889373,\n",
534 | " 0.005685301770052886,\n",
535 | " 0.006665789570730052,\n",
536 | " 0.009517229953613482,\n",
537 | " 0.002203096887848033,\n",
538 | " 0.0013499710801631037,\n",
539 | " 0.003117042713302154,\n",
540 | " 0.002799525698333749,\n",
541 | " 0.0033056410522598334,\n",
542 | " 0.007120054811533882,\n",
543 | " 0.0008104732052177654,\n",
544 | " 0.0034336787342901243,\n",
545 | " 0.0009484964228322346,\n",
546 | " 0.00479654524805051,\n",
547 | " 0.0020660538548120666,\n",
548 | " 0.011733817622054277,\n",
549 | " 0.007599893986225736,\n",
550 | " 0.0036319230959311888,\n",
551 | " 0.005164255502366988,\n",
552 | " 0.010114683743708719,\n",
553 | " 0.00203073259716402,\n",
554 | " 0.002437355882785819,\n",
555 | " 0.0018659287624255283,\n",
556 | " 0.001468985802389411,\n",
557 | " 0.015193030774247829,\n",
558 | " 0.0019009908356143468,\n",
559 | " 0.004333115800636889,\n",
560 | " 0.008239892453291597,\n",
561 | " 0.00490965131727128,\n",
562 | " 0.008399837947000943,\n",
563 | " 0.003198031957093561,\n",
564 | " 0.000823997651859365,\n",
565 | " 0.0032862461166272006,\n",
566 | " 0.0051270888582576335,\n",
567 | " 0.00720271300932418,\n",
568 | " 0.0017525128916038302,\n",
569 | " 0.0027976314527771162,\n",
570 | " 0.006492290296800207,\n",
571 | " 0.004012587064703532,\n",
572 | " 0.0023206408279782265,\n",
573 | " 0.003409231497062737,\n",
574 | " 0.004443801634973821,\n",
575 | " 0.00485965388953058,\n",
576 | " 0.004807142390288788,\n",
577 | " 0.008342823269261834,\n",
578 | " 0.001429588921188829,\n",
579 | " 0.0012813907050628348,\n",
580 | " 0.0007596679445294185,\n",
581 | " 0.0025087179742439014,\n",
582 | " 0.0075805035840041946,\n",
583 | " 0.0016185984959787847,\n",
584 | " 0.004462211517595738,\n",
585 | " 0.003095492029914104,\n",
586 | " 0.0038700157446952543,\n",
587 | " 0.003530323329337226,\n",
588 | " 0.005219125456200735,\n",
589 | " 0.00044230174502501995,\n",
590 | " 0.0015792494536208679,\n",
591 | " 0.0024802373085792695,\n",
592 | " 0.004419751237961123,\n",
593 | " 0.008381795625152733,\n",
594 | " 0.00797018919573389,\n",
595 | " 0.006721556147702756,\n",
596 | " 0.0005834066347230049,\n",
597 | " 0.0028060844991655287,\n",
598 | " 0.00353102216486334,\n",
599 | " 0.005480709189058408,\n",
600 | " 0.0018904895950945244,\n",
601 | " 0.0031016593876361798,\n",
602 | " 0.002407703480560741,\n",
603 | " 0.004879796978919382,\n",
604 | " 0.015670705802311113,\n",
605 | " 0.00012289299113299457,\n",
606 | " 0.0019252529477084002,\n",
607 | " 0.003852854696029806,\n",
608 | " 0.0024481243318907886,\n",
609 | " 0.007714885436046259,\n",
610 | " 0.0015689756608052718,\n",
611 | " 0.005443314556731527,\n",
612 | " 0.002797686321356463,\n",
613 | " 0.0020022660584991803,\n",
614 | " 0.000819763100763371,\n",
615 | " 0.004275738469815549,\n",
616 | " 0.01110101119293394,\n",
617 | " 0.0016244613524453166,\n",
618 | " 0.0028435254259367992,\n",
619 | " 0.0042162997967564175,\n",
620 | " 0.004830027448857077,\n",
621 | " 0.0065822632827592376,\n",
622 | " 0.011402252443220684,\n",
623 | " 0.004443972758521042,\n",
624 | " 0.00534995771770741,\n",
625 | " 0.001920357269857731,\n",
626 | " 0.0017591023226399827,\n",
627 | " 0.002831797201794606,\n",
628 | " 0.002181453053920368,\n",
629 | " 0.004847885793296536,\n",
630 | " 0.007878057942260708,\n",
631 | " 0.0016740323442665517,\n",
632 | " 0.006147535680921412,\n",
633 | " 0.008167402149740766,\n",
634 | " 0.0007071674470254807,\n",
635 | " 0.005942170808882374,\n",
636 | " 0.00106546543162922,\n",
637 | " 0.016241323307542688,\n",
638 | " 0.0022128481024765723,\n",
639 | " 0.0039803983909091345,\n",
640 | " 0.0002690941457299534,\n",
641 | " 0.007983405640247354,\n",
642 | " 0.003572074250434043,\n",
643 | " 0.0023416195366009473,\n",
644 | " 0.002044246459631527,\n",
645 | " 0.002249090803784326,\n",
646 | " 0.011636950131857393,\n",
647 | " 0.0018709069085479646,\n",
648 | " 0.002852817131938412,\n",
649 | " 0.006699943132720376,\n",
650 | " 0.0018041556662965474,\n",
651 | " 0.003873383970494415,\n",
652 | " 0.00405344376172978,\n",
653 | " 0.007270390720777509,\n",
654 | " 0.011457468923034453,\n",
655 | " 0.002059280766259164,\n",
656 | " 0.0018997722863529631,\n",
657 | " 0.004523203738765136,\n",
658 | " 0.008284939423014226,\n",
659 | " 0.0016684400542473776,\n",
660 | " 0.006009906234783147,\n",
661 | " 0.0022803749913785643,\n",
662 | " 0.00447479650536331,\n",
663 | " 0.0037436422947196846,\n",
664 | " 0.0031836752682515845,\n",
665 | " 0.004828532339577474,\n",
666 | " 0.0023597561356817343,\n",
667 | " 0.0036552810753137654,\n",
668 | " 0.0010709546265983416,\n",
669 | " 0.0010518350616078864,\n",
670 | " 0.0028147817761794425,\n",
671 | " 0.005169362675205735,\n",
672 | " 0.0033047778094559277,\n",
673 | " 0.0029975116861922224,\n",
674 | " 0.009207460581985294,\n",
675 | " 0.007737972978486218,\n",
676 | " 0.0015754923832457246,\n",
677 | " 0.00011583065231146605,\n",
678 | " 0.005468655288137225,\n",
679 | " 0.0021654039993216356,\n",
680 | " 0.008715312721165991,\n",
681 | " 0.0036523918952876544,\n",
682 | " 0.001621184094483863,\n",
683 | " 0.0022794956038061365,\n",
684 | " 0.008443961047976956,\n",
685 | " 0.000966366516956918,\n",
686 | " 0.008653777496436033,\n",
687 | " 0.010794436642036387,\n",
688 | " 0.005437530434668769,\n",
689 | " 0.0024810322534144502,\n",
690 | " 0.006048219471833649,\n",
691 | " 0.002015247027264855,\n",
692 | " 0.0025861195017254923,\n",
693 | " 0.005485808170284101,\n",
694 | " 0.005268080571138034,\n",
695 | " 0.006873936029718641,\n",
696 | " 0.00437365780383403,\n",
697 | " 0.002199669150290795,\n",
698 | " 0.0032768703087288076,\n",
699 | " 0.0011585236185726044,\n",
700 | " 0.0018937911814931319,\n",
701 | " 0.007109321799058987,\n",
702 | " 0.0006211957777206845,\n",
703 | " 0.004951700572054296,\n",
704 | " 0.006369633628615423,\n",
705 | " 0.009005938414296644,\n",
706 | " 0.008340145570433166,\n",
707 | " 0.0033135522583919164,\n",
708 | " 0.013649709987486713,\n",
709 | " 0.005978593464157411,\n",
710 | " 0.006300032753829621,\n",
711 | " 0.009695979815823486,\n",
712 | " 0.0029655130474799396,\n",
713 | " 0.004684430570111031,\n",
714 | " 0.004226951245068222,\n",
715 | " 0.004683759976562452,\n",
716 | " 0.003726377503498135,\n",
717 | " 0.0015998872248308427,\n",
718 | " 0.007049768412934365,\n",
719 | " 0.004054793058633432,\n",
720 | " 0.004811413545738774,\n",
721 | " 0.005370441913956414,\n",
722 | " 0.013832171378249769,\n",
723 | " 0.001471568662495204,\n",
724 | " 0.004775156343007514,\n",
725 | " 0.0027991578030003295,\n",
726 | " 0.0018046294735374335,\n",
727 | " 0.002779029006909517,\n",
728 | " 0.0006749855566632323,\n",
729 | " 0.0044108185839153435,\n",
730 | " 0.0082935258976361,\n",
731 | " 0.003924760254681914,\n",
732 | " 0.003016928715514844,\n",
733 | " 0.0009234024741309188,\n",
734 | " 0.0029574905323982437,\n",
735 | " 0.0018249015856593997,\n",
736 | " 0.009781759954891056,\n",
737 | " 0.0026675147765263807,\n",
738 | " 0.0025384281853455537,\n",
739 | " 0.0041912402163404305,\n",
740 | " 0.0076243237246894635,\n",
741 | " 0.004708364789492658,\n",
742 | " 0.0038045489628382168,\n",
743 | " 0.0012758745281079564,\n",
744 | " 0.00434662864976465,\n",
745 | " 0.0005682191133082881,\n",
746 | " 0.00018667834651797666,\n",
747 | " 0.0020388236546882155,\n",
748 | " 0.011213029496792704,\n",
749 | " 0.016146904811173646,\n",
750 | " 0.001353136983827774,\n",
751 | " 0.004162751189306313,\n",
752 | " 0.003756952854604978,\n",
753 | " 0.00209270775198811,\n",
754 | " 0.00633361209937866,\n",
755 | " 0.00798480475937935,\n",
756 | " 0.010552729600476929,\n",
757 | " 0.003010664576092892,\n",
758 | " 0.00463891972246655,\n",
759 | " 0.004362958867724198,\n",
760 | " 0.004630518182693222,\n",
761 | " 0.003696300619830395,\n",
762 | " 0.005913052352869319,\n",
763 | " 0.00012919070875994088,\n",
764 | " 0.003171379191364454,\n",
765 | " 0.0031512949063039475,\n",
766 | " 0.002670272671211893,\n",
767 | " 0.000915437282247078,\n",
768 | " 0.009042594852385344,\n",
769 | " 0.003918461303712317,\n",
770 | " 0.0006554351106636824,\n",
771 | " 0.0020557141048646033,\n",
772 | " 0.0037696221467028446,\n",
773 | " 0.0009065078691846695,\n",
774 | " 0.0019789039243087725,\n",
775 | " 0.005256538468091043,\n",
776 | " 0.000828370992152383,\n",
777 | " 0.0004311526532527754,\n",
778 | " 0.0037114855782175438,\n",
779 | " 0.0009066710574199277,\n",
780 | " 0.0014765190407542983,\n",
781 | " 0.0020216590791868327,\n",
782 | " 0.004581429574432638,\n",
783 | " 0.0024343606357279945,\n",
784 | " 0.0023056395518757933,\n",
785 | " 0.0022698334881298132,\n",
786 | " 0.002787310131084353,\n",
787 | " 0.004153509437309966,\n",
788 | " 0.0011642212649302097,\n",
789 | " 0.004958143741891408,\n",
790 | " 0.0011443576369982722,\n",
791 | " 0.002679130600136137,\n",
792 | " 0.0013629892906094065,\n",
793 | " 0.0027135121055957834,\n",
794 | " 0.004267691808955143,\n",
795 | " 0.0036656024556619426,\n",
796 | " 0.0056660593800023085,\n",
797 | " 0.003472373068290735,\n",
798 | " 0.004522541497810839,\n",
799 | " 0.008098821271014245,\n",
800 | " 0.008675379812432382,\n",
801 | " 0.0013809418134813622,\n",
802 | " 0.0020591211441828294,\n",
803 | " 0.0030054141732032136,\n",
804 | " 0.003135679323392093,\n",
805 | " 0.006598859825153958,\n",
806 | " 0.000784538576583893,\n",
807 | " 0.009988696868021063,\n",
808 | " 0.0026932207291372467,\n",
809 | " 0.009263861151418307,\n",
810 | " 0.0021698410219941344,\n",
811 | " 0.0027893286756858204,\n",
812 | " 0.0022090712721674066,\n",
813 | " 0.002666954347434025,\n",
814 | " 0.009998974732278287,\n",
815 | " 0.0027828975949001356,\n",
816 | " 0.0015382430148604964,\n",
817 | " 0.002071871486679977,\n",
818 | " 0.0011658494291566512,\n",
819 | " 0.0020985731676529396,\n",
820 | " 0.005484012923749448,\n",
821 | " 0.0016709024985153142,\n",
822 | " 0.006638071741539298,\n",
823 | " 0.0009888373701746334,\n",
824 | " 0.004190321831247679,\n",
825 | " 0.002287390994085259,\n",
826 | " 0.00769175852145787,\n",
827 | " 0.0017496308534039606,\n",
828 | " 0.0059129904692718505,\n",
829 | " 0.009118078581281996,\n",
830 | " 0.0031208407575572528,\n",
831 | " 0.00031825536180030145,\n",
832 | " 0.0027990650829310404,\n",
833 | " 0.006772602912991865,\n",
834 | " 0.0027280090000907364,\n",
835 | " 0.0025548792313854207,\n",
836 | " 0.007243638321125623,\n",
837 | " 0.0072519467006597615,\n",
838 | " 0.006413578374167117,\n",
839 | " 0.009357326334614391,\n",
840 | " 0.007214720584758805,\n",
841 | " 0.003260510972996511,\n",
842 | " 0.011426417184211335,\n",
843 | " 0.0008837765000050402,\n",
844 | " 0.006486460843130759,\n",
845 | " 0.0032743485470720923,\n",
846 | " 0.013994831148205988,\n",
847 | " 0.003238283026512523,\n",
848 | " 0.0033060235365265367,\n",
849 | " 0.0018837520472032576,\n",
850 | " 0.005881729456770346,\n",
851 | " 0.013230608894770679,\n",
852 | " 0.007599815342470673,\n",
853 | " 0.006948384513621897,\n",
854 | " 0.0018641384050126497,\n",
855 | " 0.0020365446194748893,\n",
856 | " 0.0042158678664967475,\n",
857 | " 0.0026893300135153453,\n",
858 | " 0.008251803352573917,\n",
859 | " 0.0014067338594044854,\n",
860 | " 0.0031318163842143365,\n",
861 | " 0.0006548462981319503,\n",
862 | " 0.0004445973574357387,\n",
863 | " 0.005647235350450419,\n",
864 | " 0.0032420190371793556,\n",
865 | " 0.008841260944116858,\n",
866 | " 0.004244146750626121,\n",
867 | " 0.007185441525207059,\n",
868 | " 0.0014631862465182,\n",
869 | " 0.00573428294937528,\n",
870 | " 0.002994307885883951,\n",
871 | " 0.00679600134360836,\n",
872 | " 0.003673529221586369,\n",
873 | " 0.0013285735338927074,\n",
874 | " 0.0037321142164827847,\n",
875 | " 0.00023991868535833127,\n",
876 | " 0.0039508581171651125,\n",
877 | " 0.0025479946299102885,\n",
878 | " 0.0042904304912033675,\n",
879 | " 0.001517574156324808,\n",
880 | " 0.0038929942532586193,\n",
881 | " 0.010227893060189897,\n",
882 | " 0.001331270527420103,\n",
883 | " 0.00044586311389716027,\n",
884 | " 0.00548209234697461,\n",
885 | " 0.006751161494893854,\n",
886 | " 0.004509562308817272,\n",
887 | " 0.004123559126654186,\n",
888 | " 0.005376730543766639,\n",
889 | " 0.0034019828927628337,\n",
890 | " 0.0087183432511444,\n",
891 | " 0.00450568940918028,\n",
892 | " 0.007015168399732041,\n",
893 | " 0.0023925749960699294,\n",
894 | " 0.0012929788606175219,\n",
895 | " 0.00438031199686284,\n",
896 | " 0.010254832640834155,\n",
897 | " 0.0031028324221680226,\n",
898 | " 0.0026961724128541364,\n",
899 | " 0.013026527386187148,\n",
900 | " 0.004960911898688192,\n",
901 | " 0.0029719234237377545,\n",
902 | " 0.0029250156117853754,\n",
903 | " 0.0004249757872377691,\n",
904 | " 0.005990717416488711,\n",
905 | " 0.014374827226903086,\n",
906 | " 0.004146760420695839,\n",
907 | " 0.010156162554630477,\n",
908 | " 0.0013037195708948124,\n",
909 | " 0.0010265114421947885,\n",
910 | " 0.0009185228404129386,\n",
911 | " 0.0027074965936813022,\n",
912 | " 0.002752605743286729,\n",
913 | " 0.004573428892715725,\n",
914 | " 0.000948226417410325,\n",
915 | " 0.0022854824009620467,\n",
916 | " 0.0029099189826702343,\n",
917 | " 0.007704445039609653,\n",
918 | " 0.0023000831527943966,\n",
919 | " 0.0010294123848654246,\n",
920 | " 0.00112578120250173,\n",
921 | " 0.0010276549325696216,\n",
922 | " 0.005802912191218423,\n",
923 | " 0.003945335582444032,\n",
924 | " 0.007492248097973664,\n",
925 | " 0.006501133375989454,\n",
926 | " 0.008354513644933004,\n",
927 | " 0.003605742694607006,\n",
928 | " 0.008587574930271069,\n",
929 | " 0.005415108921713041,\n",
930 | " 0.0034384035218067988,\n",
931 | " 0.0033480256549174303,\n",
932 | " 0.0031362156313903267,\n",
933 | " 0.007237939446098518,\n",
934 | " 0.0020602158230488277,\n",
935 | " 0.0017733036976633754,\n",
936 | " 0.008918921926480694,\n",
937 | " 0.00478318311599879,\n",
938 | " 0.006355965489820092,\n",
939 | " 0.0068529319197627725,\n",
940 | " 0.011479391740508937,\n",
941 | " 0.0014676612014202783,\n",
942 | " 0.00801005169660296,\n",
943 | " 0.0015758198273274626,\n",
944 | " 0.00861938870906731,\n",
945 | " 0.004777059219925217,\n",
946 | " 0.0032612041107931145,\n",
947 | " 0.00244257083374555,\n",
948 | " 0.0004587000184542722,\n",
949 | " 0.004813151027559304,\n",
950 | " 0.01688001164028019,\n",
951 | " 0.003737567515983753,\n",
952 | " 0.0033134861682850847,\n",
953 | " 0.0056999373669240905,\n",
954 | " 0.0017132600851728075,\n",
955 | " 0.0013087464462577802,\n",
956 | " 0.0033141079856250366,\n",
957 | " 0.0019940210592185315,\n",
958 | " 0.0074064950225833925,\n",
959 | " 0.002924347332862509,\n",
960 | " 0.0056695940468061146,\n",
961 | " 0.0017392495826525911,\n",
962 | " 0.0010835408317472206,\n",
963 | " 0.00788055260843712,\n",
964 | " 0.00867274508686966,\n",
965 | " 0.004405793459964058,\n",
966 | " 0.006692816684754077,\n",
967 | " 0.0035857282031142024,\n",
968 | " 0.00446859978208447,\n",
969 | " 0.003265022480362756,\n",
970 | " 0.0018042710750679758,\n",
971 | " 0.004890971309230219,\n",
972 | " 0.000497860530433214,\n",
973 | " 0.0052518692003300504,\n",
974 | " 0.00524752584464705,\n",
975 | " 0.004601865028632579,\n",
976 | " 0.001685834434944078,\n",
977 | " 0.005308826111608322,\n",
978 | " 0.003782517243957728,\n",
979 | " 0.0021187958677754137,\n",
980 | " 0.006182266866124886,\n",
981 | " 0.0018101137943079548,\n",
982 | " 0.007930333086967112,\n",
983 | " 0.004837011013992698,\n",
984 | " 0.0008836845930124682,\n",
985 | " 0.024713793158566477,\n",
986 | " 0.0023116906304282425,\n",
987 | " 0.006196031124456103,\n",
988 | " 0.001350914908546108,\n",
989 | " 0.002088125656158679,\n",
990 | " 0.00825616893167469,\n",
991 | " 0.00202869782486359,\n",
992 | " 0.0037600955361389854,\n",
993 | " 0.00346944292998852,\n",
994 | " 0.0019552575981769404,\n",
995 | " 0.004989495724255222,\n",
996 | " 0.000583156440336019,\n",
997 | " 0.004008762507094699,\n",
998 | " 0.004827928334746989,\n",
999 | " 0.00567515862282153,\n",
1000 | " 0.004081082592535812,\n",
1001 | " 0.006022630017637365,\n",
1002 | " 0.0010471148278937692,\n",
1003 | " 0.004992325954706334,\n",
1004 | " 0.0047985878455961695,\n",
1005 | " 0.0063888456029371765,\n",
1006 | " 0.005370816102971854,\n",
1007 | " 0.004162981461143134,\n",
1008 | " 0.002964767909710675,\n",
1009 | " 0.004603581497133025,\n",
1010 | " 0.005090390312854853,\n",
1011 | " 0.003110167144954934,\n",
1012 | " 0.005282555068731987,\n",
1013 | " 0.004290885458458238,\n",
1014 | " 0.0032377478238068136,\n",
1015 | " 0.005834822671652806,\n",
1016 | " 0.0010136361537674832,\n",
1017 | " 0.007105443345372109,\n",
1018 | " 0.002974293254515703,\n",
1019 | " 0.006281807931527883,\n",
1020 | " 0.0032756475937398832,\n",
1021 | " 0.002610165099805939,\n",
1022 | " 0.00408234773688565,\n",
1023 | " 0.0026408663407303523,\n",
1024 | " 0.006314855082279802,\n",
1025 | " 0.007394848972956836,\n",
1026 | " 0.000178111243759993,\n",
1027 | " 0.004791541249788089,\n",
1028 | " 0.0010972138581405444,\n",
1029 | " 0.010884971815560474,\n",
1030 | " 0.009982133854189335,\n",
1031 | " 0.0055221379600502996,\n",
1032 | " 0.0022665423942549445,\n",
1033 | " 0.0005345602624970969,\n",
1034 | " 0.0037012445888642276,\n",
1035 | " 0.0038180478227604064,\n",
1036 | " 0.007286980844557811,\n",
1037 | " 0.0011787162827615605,\n",
1038 | " 0.0028546796293219545,\n",
1039 | " 0.006175361923695277,\n",
1040 | " 0.002675458044084089,\n",
1041 | " 0.007403960313518421,\n",
1042 | " 0.009445649027629566,\n",
1043 | " 0.0074738593925071095,\n",
1044 | " 0.005283803942620735,\n",
1045 | " 0.006458841091826706,\n",
1046 | " 0.008187947059229761,\n",
1047 | " 0.005258790432053795,\n",
1048 | " 0.004129004748032683,\n",
1049 | " 0.00962397634436396,\n",
1050 | " 0.007626851177701233,\n",
1051 | " 0.014539634259538081,\n",
1052 | " 0.003960231266614045,\n",
1053 | " 0.002189356086343963,\n",
1054 | " 0.007800446742294076,\n",
1055 | " 0.0056785902350708906,\n",
1056 | " 0.0027000965411677624,\n",
1057 | " 0.002780377132690859,\n",
1058 | " 0.001093766239621874,\n",
1059 | " 0.0012519869858429455,\n",
1060 | " 0.0014238051766555238,\n",
1061 | " 0.003635204462356545,\n",
1062 | " 0.011202936178990597,\n",
1063 | " 0.01332839355142268,\n",
1064 | " 0.006565773298784272,\n",
1065 | " 0.008541595672287516,\n",
1066 | " 0.002268726600309634,\n",
1067 | " 0.008410303638578753,\n",
1068 | " 0.002021350569272773,\n",
1069 | " 0.000915179644208064,\n",
1070 | " 0.001483338116104127,\n",
1071 | " 0.0008863842796352772,\n",
1072 | " 0.003719196896947774,\n",
1073 | " 0.002519964816030562,\n",
1074 | " 0.0060786927869959095,\n",
1075 | " 0.0013508830849506825,\n",
1076 | " 0.004063267310459806,\n",
1077 | " 0.004258891541485177,\n",
1078 | " 0.006242329294100846,\n",
1079 | " 0.0060516750673248465,\n",
1080 | " 0.007975049547686242,\n",
1081 | " 0.006067110699206069,\n",
1082 | " 0.004938865534384718,\n",
1083 | " 0.003940824433928736,\n",
1084 | " 0.0056531224893154874,\n",
1085 | " 0.0025265090846456895,\n",
1086 | " 0.0030695841227897924,\n",
1087 | " 0.006776662143960777,\n",
1088 | " 0.00505714496550802,\n",
1089 | " 0.003763771613090605,\n",
1090 | " 0.002183355669071574,\n",
1091 | " 0.0026619087992156842,\n",
1092 | " 0.006600033750757239,\n",
1093 | " 0.004853011124577922,\n",
1094 | " 0.0017583275189820835,\n",
1095 | " 0.0012378512355107896,\n",
1096 | " 0.004120201221703534,\n",
1097 | " 0.0035819646848092024,\n",
1098 | " 0.007064984876036129,\n",
1099 | " 0.003651120853055967,\n",
1100 | " 0.0020336510956078246,\n",
1101 | " 0.0020897488513913487,\n",
1102 | " 0.003395710289872511,\n",
1103 | " 0.007389886848578775,\n",
1104 | " 0.0025150912859109864,\n",
1105 | " 0.003978667814641512,\n",
1106 | " 0.0021852568073245093,\n",
1107 | " 0.0011108303150780818,\n",
1108 | " 0.0045421850115618945,\n",
1109 | " 0.007688053259815031,\n",
1110 | " 0.0015637754406943039,\n",
1111 | " 0.004106364570361287,\n",
1112 | " 0.002461556931982689,\n",
1113 | " 0.005516682914329829,\n",
1114 | " 0.0023757517495063274,\n",
1115 | " 0.0033480192851367274,\n",
1116 | " 0.007957085272821796,\n",
1117 | " 0.0016293250768308898,\n",
1118 | " 0.0002878246180229027,\n",
1119 | " 0.001833866959697339,\n",
1120 | " 0.007097086131433561,\n",
1121 | " 0.0013494268433986711,\n",
1122 | " 0.002951245677887293,\n",
1123 | " 0.004181001843605259,\n",
1124 | " 0.017846475920226146,\n",
1125 | " 0.0028522537868910236,\n",
1126 | " 0.004604188496539874,\n",
1127 | " 0.008079314172189545,\n",
1128 | " 0.0104609221170745,\n",
1129 | " 0.003327776251871195,\n",
1130 | " 0.0008582041140378083,\n",
1131 | " 0.005706884221880371,\n",
1132 | " 0.0018756788295537508,\n",
1133 | " 0.00409168605088428,\n",
1134 | " 0.005548423612084186,\n",
1135 | " 0.0017861593242988194,\n",
1136 | " 0.002420144715015282,\n",
1137 | " 0.0014370785408888143,\n",
1138 | " 0.002972291653664416,\n",
1139 | " 0.002440211433115236,\n",
1140 | " 0.003915879393823429,\n",
1141 | " 0.004940621871106776,\n",
1142 | " 0.002860929189707817,\n",
1143 | " 0.007083093363789539,\n",
1144 | " 0.001161753753416428,\n",
1145 | " 0.0023048973933327277,\n",
1146 | " 0.008104119304616297,\n",
1147 | " 0.003514348979507864,\n",
1148 | " 0.0040205631941855335,\n",
1149 | " 0.002672915554577937,\n",
1150 | " 0.0032174470101283825,\n",
1151 | " 0.0009473851353467566,\n",
1152 | " 0.007789726029165833,\n",
1153 | " 0.0013923292122299813,\n",
1154 | " 0.0027107712959589803,\n",
1155 | " 0.003879336878935491,\n",
1156 | " 0.006286050469792581,\n",
1157 | " 0.006300647519894453,\n",
1158 | " 0.0022807581297069612,\n",
1159 | " 0.0034081764654774194,\n",
1160 | " 0.003385527680936362,\n",
1161 | " 0.0034710620989313323,\n",
1162 | " 0.00246716964385596,\n",
1163 | " 0.003198613793649163,\n",
1164 | " 0.004516589680484739,\n",
1165 | " 0.007186435958266291,\n",
1166 | " 0.0026718375915537607,\n",
1167 | " 0.004311474232261192,\n",
1168 | " 0.001396392996708018,\n",
1169 | " 0.0011835559300031884,\n",
1170 | " 0.0007985062268726084,\n",
1171 | " 0.0019814806521580852,\n",
1172 | " 0.007692541558425359,\n",
1173 | " 0.0036691580955064973,\n",
1174 | " 0.0035277783461052925,\n",
1175 | " 0.001167110160448126,\n",
1176 | " 0.0030508107547203567,\n",
1177 | " 0.0034851702104007343,\n",
1178 | " 0.0035206039198386607,\n",
1179 | " 0.001325522139801305,\n",
1180 | " 0.0011529945992200252,\n",
1181 | " 0.012353617757670188,\n",
1182 | " 0.00035816778615238576,\n",
1183 | " 0.0061808080211036565,\n",
1184 | " 0.0009762926156788865,\n",
1185 | " 0.0052541784704571715,\n",
1186 | " 0.001699650238380146,\n",
1187 | " 0.002870618326302199,\n",
1188 | " 0.0017416804751038129,\n",
1189 | " 0.007510144369693772,\n",
1190 | " 0.0060341164260478974,\n",
1191 | " 0.0026059389414696543,\n",
1192 | " 0.003157567933794403,\n",
1193 | " 0.0053549955855728415,\n",
1194 | " 0.005308099048908388,\n",
1195 | " 0.009154699120233811,\n",
1196 | " 0.003150097338057045,\n",
1197 | " 0.0016684729924589803,\n",
1198 | " 0.0017687610835773748,\n",
1199 | " 0.0023224087644752034,\n",
1200 | " 0.0010250973398097107,\n",
1201 | " 0.005568017881177667,\n",
1202 | " 0.004224829880092207,\n",
1203 | " 0.0028340423844134266,\n",
1204 | " 0.010157903103126031,\n",
1205 | " 0.004582737138122517,\n",
1206 | " 0.0113170129789546,\n",
1207 | " 0.0023098819517999303,\n",
1208 | " 0.0047469952888024025,\n",
1209 | " 0.002733520235412845,\n",
1210 | " 0.008703948734458344,\n",
1211 | " 0.0058967715534576914,\n",
1212 | " 0.005863987282441863,\n",
1213 | " 0.012494920512016394,\n",
1214 | " 0.0012092683367875834,\n",
1215 | " 0.0072708540650765485,\n",
1216 | " 0.003281961860268301,\n",
1217 | " 0.002458541454295915,\n",
1218 | " 0.004895814685182742,\n",
1219 | " 0.0024944361388209435,\n",
1220 | " 0.002088683841345196,\n",
1221 | " 0.0008835297631305528,\n",
1222 | " 0.0038650836992603077,\n",
1223 | " 0.006111841605869972,\n",
1224 | " 0.004979171246565994,\n",
1225 | " 0.003706371204642789,\n",
1226 | " 0.002087295449702098,\n",
1227 | " 0.0015294020742712133,\n",
1228 | " 0.0038431594139706204,\n",
1229 | " 0.002835029658214583,\n",
1230 | " 0.006913879693340341,\n",
1231 | " 0.004719947001278091,\n",
1232 | " 0.0047440566771883574,\n",
1233 | " 0.0018567127151448704,\n",
1234 | " 0.002411236071401146,\n",
1235 | " 0.0016355064419933951,\n",
1236 | " 0.004822580933584786,\n",
1237 | " 0.0027335818638962728,\n",
1238 | " 0.011387982657999673,\n",
1239 | " 0.003240006993201594,\n",
1240 | " 0.002440959138355625,\n",
1241 | " 0.00374532686959983,\n",
1242 | " 0.005774493895141286,\n",
1243 | " 0.008424651618408219,\n",
1244 | " 0.004615534734083493,\n",
1245 | " 0.008690544018074675,\n",
1246 | " 4.425084407400082e-05,\n",
1247 | " 0.004047482567858918,\n",
1248 | " 0.0016955334428971902,\n",
1249 | " 0.004778976134881985,\n",
1250 | " 0.002848977637353589,\n",
1251 | " 0.010153878906787539,\n",
1252 | " 0.0021866562190493433,\n",
1253 | " 0.007031665945086085,\n",
1254 | " 0.007117236517623526,\n",
1255 | " 0.0016733803734572117,\n",
1256 | " 0.00026382302990635154,\n",
1257 | " 0.011112410482535084,\n",
1258 | " 0.0036970838698924258,\n",
1259 | " 0.004150611713451435,\n",
1260 | " 0.0017767449519883183,\n",
1261 | " 0.00445005374756724,\n",
1262 | " 0.0011147702510775348,\n",
1263 | " 0.0029617085791328046,\n",
1264 | " 0.0032694720073188632,\n",
1265 | " 0.006540332440947982,\n",
1266 | " 0.006843429995373018,\n",
1267 | " 0.005750315918981341,\n",
1268 | " 0.0088985717956922,\n",
1269 | " 0.0038962345967581413,\n",
1270 | " 0.01512412309370259,\n",
1271 | " 0.004680531225988771,\n",
1272 | " 0.004447319195276966,\n",
1273 | " 0.006968052248947365,\n",
1274 | " 0.002151137418611948,\n",
1275 | " 0.010317721958366222,\n",
1276 | " 0.006923246792756577,\n",
1277 | " 0.0035828648020346243,\n",
1278 | " 0.002138394670617605,\n",
1279 | " 0.00405124941777224,\n",
1280 | " 0.0025934936225198157,\n",
1281 | " 0.004217015933184576,\n",
1282 | " 0.0015407623272817306,\n",
1283 | " 0.006719539110735084,\n",
1284 | " 0.0034772872495916207,\n",
1285 | " 0.002376038367754685,\n",
1286 | " 0.005730211751046266,\n",
1287 | " 0.004014882670731135,\n",
1288 | " 0.005381417967106339,\n",
1289 | " 0.0020192203776692175,\n",
1290 | " 0.005788573870482705,\n",
1291 | " 0.0018498207025680058,\n",
1292 | " 0.0066039022455920415,\n",
1293 | " 0.0014490888048992456,\n",
1294 | " 0.002583265111927857,\n",
1295 | " 0.002929215272004293,\n",
1296 | " 0.0003014802512399939,\n",
1297 | " 0.006145215686595096,\n",
1298 | " 0.0024057921433177087,\n",
1299 | " 0.003299032789450651,\n",
1300 | " 0.001539252082881526,\n",
1301 | " 0.005249325091618351,\n",
1302 | " 0.006196033834849457,\n",
1303 | " 0.006543600914519332,\n",
1304 | " 0.0015793556455145907,\n",
1305 | " 0.00573292317017704,\n",
1306 | " 0.0036109134380358787,\n",
1307 | " 0.00878468308229774,\n",
1308 | " 0.0034413567538064127,\n",
1309 | " 0.004140595208686815,\n",
1310 | " 0.0056377100229138,\n",
1311 | " 0.0037571620329843736,\n",
1312 | " 0.007015148049037578,\n",
1313 | " 0.0031373366907309075,\n",
1314 | " 0.004535434212190048,\n",
1315 | " 0.0020820562316956615,\n",
1316 | " 0.0034909482394039903,\n",
1317 | " 0.0016374366138860452,\n",
1318 | " 0.005267635692424832,\n",
1319 | " 0.0033560818421801697,\n",
1320 | " 0.0063720978444931076,\n",
1321 | " 0.0013436475933932268,\n",
1322 | " 0.0014666471303782513,\n",
1323 | " 0.0027268525120628384,\n",
1324 | " 0.0025525578652446645,\n",
1325 | " 0.014400332139859029,\n",
1326 | " 0.0019700990769420113,\n",
1327 | " 0.002281416671125358,\n",
1328 | " 0.0035946423054814547,\n",
1329 | " 0.002700305525165758,\n",
1330 | " 0.0010361190814831556,\n",
1331 | " 0.004643949597827435,\n",
1332 | " 0.009194095528814006,\n",
1333 | " 0.004712999752370828,\n",
1334 | " 0.0028933657172528493,\n",
1335 | " 0.008004277617593925,\n",
1336 | " 0.0011527888295340682,\n",
1337 | " 0.004589995761281718,\n",
1338 | " 0.005293740225217447,\n",
1339 | " 0.0020642948204941947,\n",
1340 | " 0.004469339449910126,\n",
1341 | " 0.0024674674401892194,\n",
1342 | " 0.011998112841730801,\n",
1343 | " 0.005598210005470339,\n",
1344 | " 0.004471479816943069,\n",
1345 | " 0.0017739432459118465,\n",
1346 | " 0.003092940798192778,\n",
1347 | " 0.0026680741105628014,\n",
1348 | " 0.0032509210529246824,\n",
1349 | " 0.00015286974538342157,\n",
1350 | " 0.003149210894176191,\n",
1351 | " 0.001218914583230979,\n",
1352 | " 0.006626780705580204,\n",
1353 | " 0.008088748444263216,\n",
1354 | " 0.0018659147985619073,\n",
1355 | " 0.004000803868322467,\n",
1356 | " 0.008832001452628968,\n",
1357 | " 0.008479060053687815,\n",
1358 | " 0.005166096414558341,\n",
1359 | " 0.005298375705570568,\n",
1360 | " 0.009183007659652298,\n",
1361 | " 0.0032744252027445686,\n",
1362 | " 0.012123287577275543,\n",
1363 | " 0.004728920298387118,\n",
1364 | " 0.0023931821553465626,\n",
1365 | " 0.00330847484908503,\n",
1366 | " 0.004551547014340462,\n",
1367 | " 0.004700371672507586,\n",
1368 | " 0.0032752884232790658,\n",
1369 | " 0.004631251735780051,\n",
1370 | " 0.004427667142649305,\n",
1371 | " 0.005181825091185255,\n",
1372 | " 0.012853054808419584,\n",
1373 | " 0.004873013266997739,\n",
1374 | " 0.005238607057555966,\n",
1375 | " 0.0029127917394637937,\n",
1376 | " 0.01295433743940011,\n",
1377 | " 0.00550012021183699,\n",
1378 | " 0.0029624423500220765,\n",
1379 | " 0.0025691105991585417,\n",
1380 | " 0.004700843017570455,\n",
1381 | " 0.010075235823310602,\n",
1382 | " 0.009675381587556098,\n",
1383 | " 0.005224726168253313,\n",
1384 | " 0.0012721877139654225,\n",
1385 | " 0.006250396984379458,\n",
1386 | " 0.0030473257971390666,\n",
1387 | " 0.0063332622296405905,\n",
1388 | " 0.0008665568779214761,\n",
1389 | " 0.0015702206518444228,\n",
1390 | " 0.0016606222683431815,\n",
1391 | " 0.0004857431892508757,\n",
1392 | " 0.006608522489139901,\n",
1393 | " 0.003115634821838187,\n",
1394 | " 0.0003892118570264502,\n",
1395 | " 0.006096450339500903,\n",
1396 | " 0.0015006900134219014,\n",
1397 | " 0.000662734177229507,\n",
1398 | " 0.003418241863958116,\n",
1399 | " 0.004557762765782569,\n",
1400 | " 0.0021601283944738814,\n",
1401 | " 0.003592784651380132,\n",
1402 | " 0.0031280721738666806,\n",
1403 | " 0.006584775904637574,\n",
1404 | " 0.0016323055764891903,\n",
1405 | " 0.00622729707919123,\n",
1406 | " 0.0013555646609827443,\n",
1407 | " 0.0006991685156024774,\n",
1408 | " 0.002712389117510261,\n",
1409 | " 0.005622383123040198,\n",
1410 | " 0.005653480309959269,\n",
1411 | " 0.002263517525385339,\n",
1412 | " 0.0021862995821897435,\n",
1413 | " 0.003147820256372391,\n",
1414 | " 0.002105608898844258,\n",
1415 | " 0.005064114985818491,\n",
1416 | " 0.01610166875225061,\n",
1417 | " 0.003109300722268782,\n",
1418 | " 0.0022962528356692546,\n",
1419 | " 0.0034168022843508172,\n",
1420 | " 0.001484132361119106,\n",
1421 | " 0.005076924292612306,\n",
1422 | " 0.000347340320479228]"
1423 | ]
1424 | },
1425 | "metadata": {
1426 | "tags": []
1427 | },
1428 | "execution_count": 56
1429 | }
1430 | ]
1431 | },
1432 | {
1433 | "cell_type": "code",
1434 | "metadata": {
1435 | "colab": {
1436 | "base_uri": "https://localhost:8080/",
1437 | "height": 174
1438 | },
1439 | "id": "R5kI-S_-4cFF",
1440 | "outputId": "215700ad-e2cb-4dc2-fd0f-1788959b8caa"
1441 | },
1442 | "source": [
1443 | "# Histogram plot\n",
1444 | "import seaborn as sns\n",
1445 | "import matplotlib.pyplot as plt\n",
1446 | "\n",
1447 | "sns.set_style(\"white\")\n",
1448 | "plt.figure(figsize = (20, 2)) \n",
1449 | "\n",
1450 | "ax = sns.histplot(data=r2_train_list, color=\"skyblue\", kde=True, bins = 10)\n",
1451 | "plt.axvline(r2a)\n",
1452 | "ax.set(xlim=(0, 1))"
1453 | ],
1454 | "execution_count": null,
1455 | "outputs": [
1456 | {
1457 | "output_type": "execute_result",
1458 | "data": {
1459 | "text/plain": [
1460 | "[(0.0, 1.0)]"
1461 | ]
1462 | },
1463 | "metadata": {
1464 | "tags": []
1465 | },
1466 | "execution_count": 63
1467 | },
1468 | {
1469 | "output_type": "display_data",
1470 | "data": {
1471 | "image/png": "\n",
1472 | "text/plain": [
1473 | ""
1474 | ]
1475 | },
1476 | "metadata": {
1477 | "tags": []
1478 | }
1479 | }
1480 | ]
1481 | },
1482 | {
1483 | "cell_type": "code",
1484 | "metadata": {
1485 | "id": "R7Oay5tgnSBD"
1486 | },
1487 | "source": [
1488 | ""
1489 | ],
1490 | "execution_count": null,
1491 | "outputs": []
1492 | }
1493 | ]
1494 | }
--------------------------------------------------------------------------------
/transformer_pegasus_paraphrase.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "transformer_pegasus_paraphrase.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": []
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "accelerator": "GPU",
15 | "widgets": {
16 | "application/vnd.jupyter.widget-state+json": {
17 | "8f516d93300d49cbad04a67e1be9d32b": {
18 | "model_module": "@jupyter-widgets/controls",
19 | "model_name": "HBoxModel",
20 | "state": {
21 | "_view_name": "HBoxView",
22 | "_dom_classes": [],
23 | "_model_name": "HBoxModel",
24 | "_view_module": "@jupyter-widgets/controls",
25 | "_model_module_version": "1.5.0",
26 | "_view_count": null,
27 | "_view_module_version": "1.5.0",
28 | "box_style": "",
29 | "layout": "IPY_MODEL_580961e7e8584b868d427cf672078674",
30 | "_model_module": "@jupyter-widgets/controls",
31 | "children": [
32 | "IPY_MODEL_704b994e029c4cdd93e0ecda3726d607",
33 | "IPY_MODEL_4b1be8cc3c5c40ec9deeb75200a3a7f3"
34 | ]
35 | }
36 | },
37 | "580961e7e8584b868d427cf672078674": {
38 | "model_module": "@jupyter-widgets/base",
39 | "model_name": "LayoutModel",
40 | "state": {
41 | "_view_name": "LayoutView",
42 | "grid_template_rows": null,
43 | "right": null,
44 | "justify_content": null,
45 | "_view_module": "@jupyter-widgets/base",
46 | "overflow": null,
47 | "_model_module_version": "1.2.0",
48 | "_view_count": null,
49 | "flex_flow": null,
50 | "width": null,
51 | "min_width": null,
52 | "border": null,
53 | "align_items": null,
54 | "bottom": null,
55 | "_model_module": "@jupyter-widgets/base",
56 | "top": null,
57 | "grid_column": null,
58 | "overflow_y": null,
59 | "overflow_x": null,
60 | "grid_auto_flow": null,
61 | "grid_area": null,
62 | "grid_template_columns": null,
63 | "flex": null,
64 | "_model_name": "LayoutModel",
65 | "justify_items": null,
66 | "grid_row": null,
67 | "max_height": null,
68 | "align_content": null,
69 | "visibility": null,
70 | "align_self": null,
71 | "height": null,
72 | "min_height": null,
73 | "padding": null,
74 | "grid_auto_rows": null,
75 | "grid_gap": null,
76 | "max_width": null,
77 | "order": null,
78 | "_view_module_version": "1.2.0",
79 | "grid_template_areas": null,
80 | "object_position": null,
81 | "object_fit": null,
82 | "grid_auto_columns": null,
83 | "margin": null,
84 | "display": null,
85 | "left": null
86 | }
87 | },
88 | "704b994e029c4cdd93e0ecda3726d607": {
89 | "model_module": "@jupyter-widgets/controls",
90 | "model_name": "FloatProgressModel",
91 | "state": {
92 | "_view_name": "ProgressView",
93 | "style": "IPY_MODEL_a503070cc2a646d5879a3339cebaf3a7",
94 | "_dom_classes": [],
95 | "description": "Downloading: 100%",
96 | "_model_name": "FloatProgressModel",
97 | "bar_style": "success",
98 | "max": 1912529,
99 | "_view_module": "@jupyter-widgets/controls",
100 | "_model_module_version": "1.5.0",
101 | "value": 1912529,
102 | "_view_count": null,
103 | "_view_module_version": "1.5.0",
104 | "orientation": "horizontal",
105 | "min": 0,
106 | "description_tooltip": null,
107 | "_model_module": "@jupyter-widgets/controls",
108 | "layout": "IPY_MODEL_904e9bbc2fed482e84a02299e9762fe9"
109 | }
110 | },
111 | "4b1be8cc3c5c40ec9deeb75200a3a7f3": {
112 | "model_module": "@jupyter-widgets/controls",
113 | "model_name": "HTMLModel",
114 | "state": {
115 | "_view_name": "HTMLView",
116 | "style": "IPY_MODEL_a24d51573d594fb2a2cd1b3c5f8f4dd4",
117 | "_dom_classes": [],
118 | "description": "",
119 | "_model_name": "HTMLModel",
120 | "placeholder": "",
121 | "_view_module": "@jupyter-widgets/controls",
122 | "_model_module_version": "1.5.0",
123 | "value": " 1.91M/1.91M [00:39<00:00, 47.8kB/s]",
124 | "_view_count": null,
125 | "_view_module_version": "1.5.0",
126 | "description_tooltip": null,
127 | "_model_module": "@jupyter-widgets/controls",
128 | "layout": "IPY_MODEL_50f97ea6b61c47b0b152a5b5fefea06a"
129 | }
130 | },
131 | "a503070cc2a646d5879a3339cebaf3a7": {
132 | "model_module": "@jupyter-widgets/controls",
133 | "model_name": "ProgressStyleModel",
134 | "state": {
135 | "_view_name": "StyleView",
136 | "_model_name": "ProgressStyleModel",
137 | "description_width": "initial",
138 | "_view_module": "@jupyter-widgets/base",
139 | "_model_module_version": "1.5.0",
140 | "_view_count": null,
141 | "_view_module_version": "1.2.0",
142 | "bar_color": null,
143 | "_model_module": "@jupyter-widgets/controls"
144 | }
145 | },
146 | "904e9bbc2fed482e84a02299e9762fe9": {
147 | "model_module": "@jupyter-widgets/base",
148 | "model_name": "LayoutModel",
149 | "state": {
150 | "_view_name": "LayoutView",
151 | "grid_template_rows": null,
152 | "right": null,
153 | "justify_content": null,
154 | "_view_module": "@jupyter-widgets/base",
155 | "overflow": null,
156 | "_model_module_version": "1.2.0",
157 | "_view_count": null,
158 | "flex_flow": null,
159 | "width": null,
160 | "min_width": null,
161 | "border": null,
162 | "align_items": null,
163 | "bottom": null,
164 | "_model_module": "@jupyter-widgets/base",
165 | "top": null,
166 | "grid_column": null,
167 | "overflow_y": null,
168 | "overflow_x": null,
169 | "grid_auto_flow": null,
170 | "grid_area": null,
171 | "grid_template_columns": null,
172 | "flex": null,
173 | "_model_name": "LayoutModel",
174 | "justify_items": null,
175 | "grid_row": null,
176 | "max_height": null,
177 | "align_content": null,
178 | "visibility": null,
179 | "align_self": null,
180 | "height": null,
181 | "min_height": null,
182 | "padding": null,
183 | "grid_auto_rows": null,
184 | "grid_gap": null,
185 | "max_width": null,
186 | "order": null,
187 | "_view_module_version": "1.2.0",
188 | "grid_template_areas": null,
189 | "object_position": null,
190 | "object_fit": null,
191 | "grid_auto_columns": null,
192 | "margin": null,
193 | "display": null,
194 | "left": null
195 | }
196 | },
197 | "a24d51573d594fb2a2cd1b3c5f8f4dd4": {
198 | "model_module": "@jupyter-widgets/controls",
199 | "model_name": "DescriptionStyleModel",
200 | "state": {
201 | "_view_name": "StyleView",
202 | "_model_name": "DescriptionStyleModel",
203 | "description_width": "",
204 | "_view_module": "@jupyter-widgets/base",
205 | "_model_module_version": "1.5.0",
206 | "_view_count": null,
207 | "_view_module_version": "1.2.0",
208 | "_model_module": "@jupyter-widgets/controls"
209 | }
210 | },
211 | "50f97ea6b61c47b0b152a5b5fefea06a": {
212 | "model_module": "@jupyter-widgets/base",
213 | "model_name": "LayoutModel",
214 | "state": {
215 | "_view_name": "LayoutView",
216 | "grid_template_rows": null,
217 | "right": null,
218 | "justify_content": null,
219 | "_view_module": "@jupyter-widgets/base",
220 | "overflow": null,
221 | "_model_module_version": "1.2.0",
222 | "_view_count": null,
223 | "flex_flow": null,
224 | "width": null,
225 | "min_width": null,
226 | "border": null,
227 | "align_items": null,
228 | "bottom": null,
229 | "_model_module": "@jupyter-widgets/base",
230 | "top": null,
231 | "grid_column": null,
232 | "overflow_y": null,
233 | "overflow_x": null,
234 | "grid_auto_flow": null,
235 | "grid_area": null,
236 | "grid_template_columns": null,
237 | "flex": null,
238 | "_model_name": "LayoutModel",
239 | "justify_items": null,
240 | "grid_row": null,
241 | "max_height": null,
242 | "align_content": null,
243 | "visibility": null,
244 | "align_self": null,
245 | "height": null,
246 | "min_height": null,
247 | "padding": null,
248 | "grid_auto_rows": null,
249 | "grid_gap": null,
250 | "max_width": null,
251 | "order": null,
252 | "_view_module_version": "1.2.0",
253 | "grid_template_areas": null,
254 | "object_position": null,
255 | "object_fit": null,
256 | "grid_auto_columns": null,
257 | "margin": null,
258 | "display": null,
259 | "left": null
260 | }
261 | },
262 | "063b18d513424f1183737e93c264cb8f": {
263 | "model_module": "@jupyter-widgets/controls",
264 | "model_name": "HBoxModel",
265 | "state": {
266 | "_view_name": "HBoxView",
267 | "_dom_classes": [],
268 | "_model_name": "HBoxModel",
269 | "_view_module": "@jupyter-widgets/controls",
270 | "_model_module_version": "1.5.0",
271 | "_view_count": null,
272 | "_view_module_version": "1.5.0",
273 | "box_style": "",
274 | "layout": "IPY_MODEL_5d0db7a84da44349a277cba30148c4d7",
275 | "_model_module": "@jupyter-widgets/controls",
276 | "children": [
277 | "IPY_MODEL_957e5d06b761440ab3d00478b11d547a",
278 | "IPY_MODEL_632c106781594f1db82f272b34d3c3f4"
279 | ]
280 | }
281 | },
282 | "5d0db7a84da44349a277cba30148c4d7": {
283 | "model_module": "@jupyter-widgets/base",
284 | "model_name": "LayoutModel",
285 | "state": {
286 | "_view_name": "LayoutView",
287 | "grid_template_rows": null,
288 | "right": null,
289 | "justify_content": null,
290 | "_view_module": "@jupyter-widgets/base",
291 | "overflow": null,
292 | "_model_module_version": "1.2.0",
293 | "_view_count": null,
294 | "flex_flow": null,
295 | "width": null,
296 | "min_width": null,
297 | "border": null,
298 | "align_items": null,
299 | "bottom": null,
300 | "_model_module": "@jupyter-widgets/base",
301 | "top": null,
302 | "grid_column": null,
303 | "overflow_y": null,
304 | "overflow_x": null,
305 | "grid_auto_flow": null,
306 | "grid_area": null,
307 | "grid_template_columns": null,
308 | "flex": null,
309 | "_model_name": "LayoutModel",
310 | "justify_items": null,
311 | "grid_row": null,
312 | "max_height": null,
313 | "align_content": null,
314 | "visibility": null,
315 | "align_self": null,
316 | "height": null,
317 | "min_height": null,
318 | "padding": null,
319 | "grid_auto_rows": null,
320 | "grid_gap": null,
321 | "max_width": null,
322 | "order": null,
323 | "_view_module_version": "1.2.0",
324 | "grid_template_areas": null,
325 | "object_position": null,
326 | "object_fit": null,
327 | "grid_auto_columns": null,
328 | "margin": null,
329 | "display": null,
330 | "left": null
331 | }
332 | },
333 | "957e5d06b761440ab3d00478b11d547a": {
334 | "model_module": "@jupyter-widgets/controls",
335 | "model_name": "FloatProgressModel",
336 | "state": {
337 | "_view_name": "ProgressView",
338 | "style": "IPY_MODEL_553bd1a9e5c745629a51d0b2b03bf16e",
339 | "_dom_classes": [],
340 | "description": "Downloading: 100%",
341 | "_model_name": "FloatProgressModel",
342 | "bar_style": "success",
343 | "max": 65,
344 | "_view_module": "@jupyter-widgets/controls",
345 | "_model_module_version": "1.5.0",
346 | "value": 65,
347 | "_view_count": null,
348 | "_view_module_version": "1.5.0",
349 | "orientation": "horizontal",
350 | "min": 0,
351 | "description_tooltip": null,
352 | "_model_module": "@jupyter-widgets/controls",
353 | "layout": "IPY_MODEL_2f6a0ab725334895bff8f5524a2c22f5"
354 | }
355 | },
356 | "632c106781594f1db82f272b34d3c3f4": {
357 | "model_module": "@jupyter-widgets/controls",
358 | "model_name": "HTMLModel",
359 | "state": {
360 | "_view_name": "HTMLView",
361 | "style": "IPY_MODEL_6dcab5852f99408ba1e177185843fa6b",
362 | "_dom_classes": [],
363 | "description": "",
364 | "_model_name": "HTMLModel",
365 | "placeholder": "",
366 | "_view_module": "@jupyter-widgets/controls",
367 | "_model_module_version": "1.5.0",
368 | "value": " 65.0/65.0 [00:00<00:00, 749B/s]",
369 | "_view_count": null,
370 | "_view_module_version": "1.5.0",
371 | "description_tooltip": null,
372 | "_model_module": "@jupyter-widgets/controls",
373 | "layout": "IPY_MODEL_1830e657159044b3ba9d411136f0f90d"
374 | }
375 | },
376 | "553bd1a9e5c745629a51d0b2b03bf16e": {
377 | "model_module": "@jupyter-widgets/controls",
378 | "model_name": "ProgressStyleModel",
379 | "state": {
380 | "_view_name": "StyleView",
381 | "_model_name": "ProgressStyleModel",
382 | "description_width": "initial",
383 | "_view_module": "@jupyter-widgets/base",
384 | "_model_module_version": "1.5.0",
385 | "_view_count": null,
386 | "_view_module_version": "1.2.0",
387 | "bar_color": null,
388 | "_model_module": "@jupyter-widgets/controls"
389 | }
390 | },
391 | "2f6a0ab725334895bff8f5524a2c22f5": {
392 | "model_module": "@jupyter-widgets/base",
393 | "model_name": "LayoutModel",
394 | "state": {
395 | "_view_name": "LayoutView",
396 | "grid_template_rows": null,
397 | "right": null,
398 | "justify_content": null,
399 | "_view_module": "@jupyter-widgets/base",
400 | "overflow": null,
401 | "_model_module_version": "1.2.0",
402 | "_view_count": null,
403 | "flex_flow": null,
404 | "width": null,
405 | "min_width": null,
406 | "border": null,
407 | "align_items": null,
408 | "bottom": null,
409 | "_model_module": "@jupyter-widgets/base",
410 | "top": null,
411 | "grid_column": null,
412 | "overflow_y": null,
413 | "overflow_x": null,
414 | "grid_auto_flow": null,
415 | "grid_area": null,
416 | "grid_template_columns": null,
417 | "flex": null,
418 | "_model_name": "LayoutModel",
419 | "justify_items": null,
420 | "grid_row": null,
421 | "max_height": null,
422 | "align_content": null,
423 | "visibility": null,
424 | "align_self": null,
425 | "height": null,
426 | "min_height": null,
427 | "padding": null,
428 | "grid_auto_rows": null,
429 | "grid_gap": null,
430 | "max_width": null,
431 | "order": null,
432 | "_view_module_version": "1.2.0",
433 | "grid_template_areas": null,
434 | "object_position": null,
435 | "object_fit": null,
436 | "grid_auto_columns": null,
437 | "margin": null,
438 | "display": null,
439 | "left": null
440 | }
441 | },
442 | "6dcab5852f99408ba1e177185843fa6b": {
443 | "model_module": "@jupyter-widgets/controls",
444 | "model_name": "DescriptionStyleModel",
445 | "state": {
446 | "_view_name": "StyleView",
447 | "_model_name": "DescriptionStyleModel",
448 | "description_width": "",
449 | "_view_module": "@jupyter-widgets/base",
450 | "_model_module_version": "1.5.0",
451 | "_view_count": null,
452 | "_view_module_version": "1.2.0",
453 | "_model_module": "@jupyter-widgets/controls"
454 | }
455 | },
456 | "1830e657159044b3ba9d411136f0f90d": {
457 | "model_module": "@jupyter-widgets/base",
458 | "model_name": "LayoutModel",
459 | "state": {
460 | "_view_name": "LayoutView",
461 | "grid_template_rows": null,
462 | "right": null,
463 | "justify_content": null,
464 | "_view_module": "@jupyter-widgets/base",
465 | "overflow": null,
466 | "_model_module_version": "1.2.0",
467 | "_view_count": null,
468 | "flex_flow": null,
469 | "width": null,
470 | "min_width": null,
471 | "border": null,
472 | "align_items": null,
473 | "bottom": null,
474 | "_model_module": "@jupyter-widgets/base",
475 | "top": null,
476 | "grid_column": null,
477 | "overflow_y": null,
478 | "overflow_x": null,
479 | "grid_auto_flow": null,
480 | "grid_area": null,
481 | "grid_template_columns": null,
482 | "flex": null,
483 | "_model_name": "LayoutModel",
484 | "justify_items": null,
485 | "grid_row": null,
486 | "max_height": null,
487 | "align_content": null,
488 | "visibility": null,
489 | "align_self": null,
490 | "height": null,
491 | "min_height": null,
492 | "padding": null,
493 | "grid_auto_rows": null,
494 | "grid_gap": null,
495 | "max_width": null,
496 | "order": null,
497 | "_view_module_version": "1.2.0",
498 | "grid_template_areas": null,
499 | "object_position": null,
500 | "object_fit": null,
501 | "grid_auto_columns": null,
502 | "margin": null,
503 | "display": null,
504 | "left": null
505 | }
506 | },
507 | "ec24a0e1eb0b4e57987d0610c2941d71": {
508 | "model_module": "@jupyter-widgets/controls",
509 | "model_name": "HBoxModel",
510 | "state": {
511 | "_view_name": "HBoxView",
512 | "_dom_classes": [],
513 | "_model_name": "HBoxModel",
514 | "_view_module": "@jupyter-widgets/controls",
515 | "_model_module_version": "1.5.0",
516 | "_view_count": null,
517 | "_view_module_version": "1.5.0",
518 | "box_style": "",
519 | "layout": "IPY_MODEL_501b90a0a74041189e38c080eebae9eb",
520 | "_model_module": "@jupyter-widgets/controls",
521 | "children": [
522 | "IPY_MODEL_dc0db6639cc64b3daed8149ca090a978",
523 | "IPY_MODEL_a35b8a96d72d4c7e881bc9a696e13835"
524 | ]
525 | }
526 | },
527 | "501b90a0a74041189e38c080eebae9eb": {
528 | "model_module": "@jupyter-widgets/base",
529 | "model_name": "LayoutModel",
530 | "state": {
531 | "_view_name": "LayoutView",
532 | "grid_template_rows": null,
533 | "right": null,
534 | "justify_content": null,
535 | "_view_module": "@jupyter-widgets/base",
536 | "overflow": null,
537 | "_model_module_version": "1.2.0",
538 | "_view_count": null,
539 | "flex_flow": null,
540 | "width": null,
541 | "min_width": null,
542 | "border": null,
543 | "align_items": null,
544 | "bottom": null,
545 | "_model_module": "@jupyter-widgets/base",
546 | "top": null,
547 | "grid_column": null,
548 | "overflow_y": null,
549 | "overflow_x": null,
550 | "grid_auto_flow": null,
551 | "grid_area": null,
552 | "grid_template_columns": null,
553 | "flex": null,
554 | "_model_name": "LayoutModel",
555 | "justify_items": null,
556 | "grid_row": null,
557 | "max_height": null,
558 | "align_content": null,
559 | "visibility": null,
560 | "align_self": null,
561 | "height": null,
562 | "min_height": null,
563 | "padding": null,
564 | "grid_auto_rows": null,
565 | "grid_gap": null,
566 | "max_width": null,
567 | "order": null,
568 | "_view_module_version": "1.2.0",
569 | "grid_template_areas": null,
570 | "object_position": null,
571 | "object_fit": null,
572 | "grid_auto_columns": null,
573 | "margin": null,
574 | "display": null,
575 | "left": null
576 | }
577 | },
578 | "dc0db6639cc64b3daed8149ca090a978": {
579 | "model_module": "@jupyter-widgets/controls",
580 | "model_name": "FloatProgressModel",
581 | "state": {
582 | "_view_name": "ProgressView",
583 | "style": "IPY_MODEL_02ad3e7df3d64bbc8b1ae397e12162f3",
584 | "_dom_classes": [],
585 | "description": "Downloading: 100%",
586 | "_model_name": "FloatProgressModel",
587 | "bar_style": "success",
588 | "max": 86,
589 | "_view_module": "@jupyter-widgets/controls",
590 | "_model_module_version": "1.5.0",
591 | "value": 86,
592 | "_view_count": null,
593 | "_view_module_version": "1.5.0",
594 | "orientation": "horizontal",
595 | "min": 0,
596 | "description_tooltip": null,
597 | "_model_module": "@jupyter-widgets/controls",
598 | "layout": "IPY_MODEL_fb9c5b1db4bb49eeb640a2a24d9b17a6"
599 | }
600 | },
601 | "a35b8a96d72d4c7e881bc9a696e13835": {
602 | "model_module": "@jupyter-widgets/controls",
603 | "model_name": "HTMLModel",
604 | "state": {
605 | "_view_name": "HTMLView",
606 | "style": "IPY_MODEL_d1903fc67e9a4ae288ea933dc2f632d7",
607 | "_dom_classes": [],
608 | "description": "",
609 | "_model_name": "HTMLModel",
610 | "placeholder": "",
611 | "_view_module": "@jupyter-widgets/controls",
612 | "_model_module_version": "1.5.0",
613 | "value": " 86.0/86.0 [00:01<00:00, 50.5B/s]",
614 | "_view_count": null,
615 | "_view_module_version": "1.5.0",
616 | "description_tooltip": null,
617 | "_model_module": "@jupyter-widgets/controls",
618 | "layout": "IPY_MODEL_6d323ec59a304e8abb1ed5fc58159505"
619 | }
620 | },
621 | "02ad3e7df3d64bbc8b1ae397e12162f3": {
622 | "model_module": "@jupyter-widgets/controls",
623 | "model_name": "ProgressStyleModel",
624 | "state": {
625 | "_view_name": "StyleView",
626 | "_model_name": "ProgressStyleModel",
627 | "description_width": "initial",
628 | "_view_module": "@jupyter-widgets/base",
629 | "_model_module_version": "1.5.0",
630 | "_view_count": null,
631 | "_view_module_version": "1.2.0",
632 | "bar_color": null,
633 | "_model_module": "@jupyter-widgets/controls"
634 | }
635 | },
636 | "fb9c5b1db4bb49eeb640a2a24d9b17a6": {
637 | "model_module": "@jupyter-widgets/base",
638 | "model_name": "LayoutModel",
639 | "state": {
640 | "_view_name": "LayoutView",
641 | "grid_template_rows": null,
642 | "right": null,
643 | "justify_content": null,
644 | "_view_module": "@jupyter-widgets/base",
645 | "overflow": null,
646 | "_model_module_version": "1.2.0",
647 | "_view_count": null,
648 | "flex_flow": null,
649 | "width": null,
650 | "min_width": null,
651 | "border": null,
652 | "align_items": null,
653 | "bottom": null,
654 | "_model_module": "@jupyter-widgets/base",
655 | "top": null,
656 | "grid_column": null,
657 | "overflow_y": null,
658 | "overflow_x": null,
659 | "grid_auto_flow": null,
660 | "grid_area": null,
661 | "grid_template_columns": null,
662 | "flex": null,
663 | "_model_name": "LayoutModel",
664 | "justify_items": null,
665 | "grid_row": null,
666 | "max_height": null,
667 | "align_content": null,
668 | "visibility": null,
669 | "align_self": null,
670 | "height": null,
671 | "min_height": null,
672 | "padding": null,
673 | "grid_auto_rows": null,
674 | "grid_gap": null,
675 | "max_width": null,
676 | "order": null,
677 | "_view_module_version": "1.2.0",
678 | "grid_template_areas": null,
679 | "object_position": null,
680 | "object_fit": null,
681 | "grid_auto_columns": null,
682 | "margin": null,
683 | "display": null,
684 | "left": null
685 | }
686 | },
687 | "d1903fc67e9a4ae288ea933dc2f632d7": {
688 | "model_module": "@jupyter-widgets/controls",
689 | "model_name": "DescriptionStyleModel",
690 | "state": {
691 | "_view_name": "StyleView",
692 | "_model_name": "DescriptionStyleModel",
693 | "description_width": "",
694 | "_view_module": "@jupyter-widgets/base",
695 | "_model_module_version": "1.5.0",
696 | "_view_count": null,
697 | "_view_module_version": "1.2.0",
698 | "_model_module": "@jupyter-widgets/controls"
699 | }
700 | },
701 | "6d323ec59a304e8abb1ed5fc58159505": {
702 | "model_module": "@jupyter-widgets/base",
703 | "model_name": "LayoutModel",
704 | "state": {
705 | "_view_name": "LayoutView",
706 | "grid_template_rows": null,
707 | "right": null,
708 | "justify_content": null,
709 | "_view_module": "@jupyter-widgets/base",
710 | "overflow": null,
711 | "_model_module_version": "1.2.0",
712 | "_view_count": null,
713 | "flex_flow": null,
714 | "width": null,
715 | "min_width": null,
716 | "border": null,
717 | "align_items": null,
718 | "bottom": null,
719 | "_model_module": "@jupyter-widgets/base",
720 | "top": null,
721 | "grid_column": null,
722 | "overflow_y": null,
723 | "overflow_x": null,
724 | "grid_auto_flow": null,
725 | "grid_area": null,
726 | "grid_template_columns": null,
727 | "flex": null,
728 | "_model_name": "LayoutModel",
729 | "justify_items": null,
730 | "grid_row": null,
731 | "max_height": null,
732 | "align_content": null,
733 | "visibility": null,
734 | "align_self": null,
735 | "height": null,
736 | "min_height": null,
737 | "padding": null,
738 | "grid_auto_rows": null,
739 | "grid_gap": null,
740 | "max_width": null,
741 | "order": null,
742 | "_view_module_version": "1.2.0",
743 | "grid_template_areas": null,
744 | "object_position": null,
745 | "object_fit": null,
746 | "grid_auto_columns": null,
747 | "margin": null,
748 | "display": null,
749 | "left": null
750 | }
751 | }
752 | }
753 | }
754 | },
755 | "cells": [
756 | {
757 | "cell_type": "markdown",
758 | "metadata": {
759 | "id": "JlN09HAEHIth"
760 | },
761 | "source": [
762 | "# **How to paraphrase text using transformers in Python**\n",
763 | "\n",
764 | "Chanin Nantasenamat\n",
765 | "\n",
766 | "[Data Professor](http://youtube.com/dataprofessor), http://youtube.com/dataprofessor\n",
767 | "\n",
768 | "**Notes and References:**\n",
769 | "- PEGASUS is an acronym for Pre-training with Extracted Gap-sentences for Abstractive SUmmarization Sequence-to-sequence models\n",
770 | "- [PEGASUS](https://huggingface.co/tuner007/pegasus_paraphrase) model used herein is from *Huggingface's* **transformers** library\n",
771 | "- [PEGASUS model from Google Research](https://github.com/google-research/pegasus)\n",
772 | "- Read the original paper [PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization](https://arxiv.org/abs/1912.08777)\n",
773 | "- Check out the book [Transformers for Natural Language Processing](https://amzn.to/39IC6E6)\n"
774 | ]
775 | },
776 | {
777 | "cell_type": "markdown",
778 | "metadata": {
779 | "id": "4njR9yRpVLYY"
780 | },
781 | "source": [
782 | "# **Install library**"
783 | ]
784 | },
785 | {
786 | "cell_type": "code",
787 | "metadata": {
788 | "colab": {
789 | "base_uri": "https://localhost:8080/"
790 | },
791 | "id": "a3VswpH3ftGo",
792 | "outputId": "646d751f-dbb9-4de5-ad18-889899a406dd"
793 | },
794 | "source": [
795 | "! pip install sentence-splitter"
796 | ],
797 | "execution_count": null,
798 | "outputs": [
799 | {
800 | "output_type": "stream",
801 | "text": [
802 | "Requirement already satisfied: sentence-splitter in /usr/local/lib/python3.7/dist-packages (1.4)\n",
803 | "Requirement already satisfied: regex>=2017.12.12 in /usr/local/lib/python3.7/dist-packages (from sentence-splitter) (2019.12.20)\n"
804 | ],
805 | "name": "stdout"
806 | }
807 | ]
808 | },
809 | {
810 | "cell_type": "code",
811 | "metadata": {
812 | "colab": {
813 | "base_uri": "https://localhost:8080/"
814 | },
815 | "id": "asu-lhsoV1KZ",
816 | "outputId": "3a10c57a-8254-45e2-dd51-5bb5f7c5772f"
817 | },
818 | "source": [
819 | "! pip install transformers"
820 | ],
821 | "execution_count": null,
822 | "outputs": [
823 | {
824 | "output_type": "stream",
825 | "text": [
826 | "Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.4.2)\n",
827 | "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)\n",
828 | "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.44)\n",
829 | "Requirement already satisfied: tokenizers<0.11,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.10.1)\n",
830 | "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from transformers) (3.8.1)\n",
831 | "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.0.12)\n",
832 | "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
833 | "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers) (20.9)\n",
834 | "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.41.1)\n",
835 | "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
836 | "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n",
837 | "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.0.1)\n",
838 | "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n",
839 | "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.4.1)\n",
840 | "Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.7.4.3)\n",
841 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2020.12.5)\n",
842 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
843 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
844 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
845 | "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers) (2.4.7)\n"
846 | ],
847 | "name": "stdout"
848 | }
849 | ]
850 | },
851 | {
852 | "cell_type": "code",
853 | "metadata": {
854 | "colab": {
855 | "base_uri": "https://localhost:8080/"
856 | },
857 | "id": "0oFm6iQpyMve",
858 | "outputId": "17771cff-a791-4015-ed0e-d3b90e362ffe"
859 | },
860 | "source": [
861 | "! pip install SentencePiece"
862 | ],
863 | "execution_count": null,
864 | "outputs": [
865 | {
866 | "output_type": "stream",
867 | "text": [
868 | "Requirement already satisfied: SentencePiece in /usr/local/lib/python3.7/dist-packages (0.1.95)\n"
869 | ],
870 | "name": "stdout"
871 | }
872 | ]
873 | },
874 | {
875 | "cell_type": "code",
876 | "metadata": {
877 | "id": "_yCRu_3CNaTE",
878 | "colab": {
879 | "base_uri": "https://localhost:8080/",
880 | "height": 165,
881 | "referenced_widgets": [
882 | "8f516d93300d49cbad04a67e1be9d32b",
883 | "580961e7e8584b868d427cf672078674",
884 | "704b994e029c4cdd93e0ecda3726d607",
885 | "4b1be8cc3c5c40ec9deeb75200a3a7f3",
886 | "a503070cc2a646d5879a3339cebaf3a7",
887 | "904e9bbc2fed482e84a02299e9762fe9",
888 | "a24d51573d594fb2a2cd1b3c5f8f4dd4",
889 | "50f97ea6b61c47b0b152a5b5fefea06a",
890 | "063b18d513424f1183737e93c264cb8f",
891 | "5d0db7a84da44349a277cba30148c4d7",
892 | "957e5d06b761440ab3d00478b11d547a",
893 | "632c106781594f1db82f272b34d3c3f4",
894 | "553bd1a9e5c745629a51d0b2b03bf16e",
895 | "2f6a0ab725334895bff8f5524a2c22f5",
896 | "6dcab5852f99408ba1e177185843fa6b",
897 | "1830e657159044b3ba9d411136f0f90d",
898 | "ec24a0e1eb0b4e57987d0610c2941d71",
899 | "501b90a0a74041189e38c080eebae9eb",
900 | "dc0db6639cc64b3daed8149ca090a978",
901 | "a35b8a96d72d4c7e881bc9a696e13835",
902 | "02ad3e7df3d64bbc8b1ae397e12162f3",
903 | "fb9c5b1db4bb49eeb640a2a24d9b17a6",
904 | "d1903fc67e9a4ae288ea933dc2f632d7",
905 | "6d323ec59a304e8abb1ed5fc58159505"
906 | ]
907 | },
908 | "outputId": "444e39c0-ad06-4145-b7e5-ac873e44723d"
909 | },
910 | "source": [
911 | "# https://huggingface.co/tuner007/pegasus_paraphrase\n",
912 | "\n",
913 | "import torch\n",
914 | "from transformers import PegasusForConditionalGeneration, PegasusTokenizer\n",
915 | "\n",
916 | "model_name = 'tuner007/pegasus_paraphrase'\n",
917 | "torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
918 | "tokenizer = PegasusTokenizer.from_pretrained(model_name)\n",
919 | "model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)\n",
920 | "\n",
921 | "def get_response(input_text,num_return_sequences):\n",
922 | " batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors=\"pt\").to(torch_device)\n",
923 | " translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)\n",
924 | " tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)\n",
925 | " return tgt_text"
926 | ],
927 | "execution_count": null,
928 | "outputs": [
929 | {
930 | "output_type": "display_data",
931 | "data": {
932 | "application/vnd.jupyter.widget-view+json": {
933 | "model_id": "8f516d93300d49cbad04a67e1be9d32b",
934 | "version_minor": 0,
935 | "version_major": 2
936 | },
937 | "text/plain": [
938 | "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1912529.0, style=ProgressStyle(descript…"
939 | ]
940 | },
941 | "metadata": {
942 | "tags": []
943 | }
944 | },
945 | {
946 | "output_type": "stream",
947 | "text": [
948 | "\n"
949 | ],
950 | "name": "stdout"
951 | },
952 | {
953 | "output_type": "display_data",
954 | "data": {
955 | "application/vnd.jupyter.widget-view+json": {
956 | "model_id": "063b18d513424f1183737e93c264cb8f",
957 | "version_minor": 0,
958 | "version_major": 2
959 | },
960 | "text/plain": [
961 | "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=65.0, style=ProgressStyle(description_w…"
962 | ]
963 | },
964 | "metadata": {
965 | "tags": []
966 | }
967 | },
968 | {
969 | "output_type": "stream",
970 | "text": [
971 | "\n"
972 | ],
973 | "name": "stdout"
974 | },
975 | {
976 | "output_type": "display_data",
977 | "data": {
978 | "application/vnd.jupyter.widget-view+json": {
979 | "model_id": "ec24a0e1eb0b4e57987d0610c2941d71",
980 | "version_minor": 0,
981 | "version_major": 2
982 | },
983 | "text/plain": [
984 | "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=86.0, style=ProgressStyle(description_w…"
985 | ]
986 | },
987 | "metadata": {
988 | "tags": []
989 | }
990 | },
991 | {
992 | "output_type": "stream",
993 | "text": [
994 | "\n"
995 | ],
996 | "name": "stdout"
997 | }
998 | ]
999 | },
1000 | {
1001 | "cell_type": "markdown",
1002 | "metadata": {
1003 | "id": "lva62WD3gScS"
1004 | },
1005 | "source": [
1006 | "---"
1007 | ]
1008 | },
1009 | {
1010 | "cell_type": "markdown",
1011 | "metadata": {
1012 | "id": "6A44V5YUTdHP"
1013 | },
1014 | "source": [
1015 | "## **Processing a single sentence**"
1016 | ]
1017 | },
1018 | {
1019 | "cell_type": "code",
1020 | "metadata": {
1021 | "id": "zzrMc_I2TgV9"
1022 | },
1023 | "source": [
1024 | "text = \"In this video, I will be showing you how to build a stock price web application in Python using the Streamlit and yfinance library.\""
1025 | ],
1026 | "execution_count": null,
1027 | "outputs": []
1028 | },
1029 | {
1030 | "cell_type": "code",
1031 | "metadata": {
1032 | "colab": {
1033 | "base_uri": "https://localhost:8080/"
1034 | },
1035 | "id": "CBbJmP4jTrne",
1036 | "outputId": "ca776c9a-bbf7-407a-cd9d-8a3758b78332"
1037 | },
1038 | "source": [
1039 | "get_response(text, 5)"
1040 | ],
1041 | "execution_count": null,
1042 | "outputs": [
1043 | {
1044 | "output_type": "stream",
1045 | "text": [
1046 | "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py:3221: FutureWarning: `prepare_seq2seq_batch` is deprecated and will be removed in version 5 of 🤗 Transformers. Use the regular `__call__` method to prepare your inputs and the tokenizer under the `with_target_tokenizer` context manager to prepare your targets. See the documentation of your specific tokenizer for more details\n",
1047 | " FutureWarning,\n"
1048 | ],
1049 | "name": "stderr"
1050 | },
1051 | {
1052 | "output_type": "execute_result",
1053 | "data": {
1054 | "text/plain": [
1055 | "['In this video, I will show you how to use the Streamlit and yfinance libraries to build a stock price web application.',\n",
1056 | " 'In this video, I will show you how to build a stock price web application in Python using the Streamlit and yfinance libraries.',\n",
1057 | " 'In this video, I will show you how to build a stock price web application using the Streamlit and yfinance libraries.',\n",
1058 | " 'In this video, I will show you how to use the Streamlit and yfinance libraries to build a stock price web application in Python.',\n",
1059 | " 'In this video, I will show you how to use the Streamlit and yfinance library to build a stock price web application.']"
1060 | ]
1061 | },
1062 | "metadata": {
1063 | "tags": []
1064 | },
1065 | "execution_count": 6
1066 | }
1067 | ]
1068 | },
1069 | {
1070 | "cell_type": "code",
1071 | "metadata": {
1072 | "colab": {
1073 | "base_uri": "https://localhost:8080/"
1074 | },
1075 | "id": "3xmOh_62Tujy",
1076 | "outputId": "0f5a855c-11c2-4fad-af68-309d39741137"
1077 | },
1078 | "source": [
1079 | "get_response(text, 1)"
1080 | ],
1081 | "execution_count": null,
1082 | "outputs": [
1083 | {
1084 | "output_type": "stream",
1085 | "text": [
1086 | "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py:3221: FutureWarning: `prepare_seq2seq_batch` is deprecated and will be removed in version 5 of 🤗 Transformers. Use the regular `__call__` method to prepare your inputs and the tokenizer under the `with_target_tokenizer` context manager to prepare your targets. See the documentation of your specific tokenizer for more details\n",
1087 | " FutureWarning,\n"
1088 | ],
1089 | "name": "stderr"
1090 | },
1091 | {
1092 | "output_type": "execute_result",
1093 | "data": {
1094 | "text/plain": [
1095 | "['In this video, I will show you how to use the Streamlit and yfinance libraries to build a stock price web application.']"
1096 | ]
1097 | },
1098 | "metadata": {
1099 | "tags": []
1100 | },
1101 | "execution_count": 7
1102 | }
1103 | ]
1104 | },
1105 | {
1106 | "cell_type": "markdown",
1107 | "metadata": {
1108 | "id": "HXeaFgNcTV9B"
1109 | },
1110 | "source": [
1111 | "## **Processing a paragraph of text**"
1112 | ]
1113 | },
1114 | {
1115 | "cell_type": "code",
1116 | "metadata": {
1117 | "id": "yOiKx9cPNanF",
1118 | "colab": {
1119 | "base_uri": "https://localhost:8080/"
1120 | },
1121 | "outputId": "56e927c5-d1df-4d36-b327-2075bec11fe4"
1122 | },
1123 | "source": [
1124 | "# Paragraph of text\n",
1125 | "context = \"In this video, I will be showing you how to build a stock price web application in Python using the Streamlit and yfinance library. The app will be able to retrieve company information as well as the stock price data for S and P 500 companies. All of this in less than 50 lines of code.\"\n",
1126 | "print(context)"
1127 | ],
1128 | "execution_count": null,
1129 | "outputs": [
1130 | {
1131 | "output_type": "stream",
1132 | "text": [
1133 | "In this video, I will be showing you how to build a stock price web application in Python using the Streamlit and yfinance library. The app will be able to retrieve company information as well as the stock price data for S and P 500 companies. All of this in less than 50 lines of code.\n"
1134 | ],
1135 | "name": "stdout"
1136 | }
1137 | ]
1138 | },
1139 | {
1140 | "cell_type": "code",
1141 | "metadata": {
1142 | "colab": {
1143 | "base_uri": "https://localhost:8080/"
1144 | },
1145 | "id": "Fk-oJbtUNxkP",
1146 | "outputId": "75a26119-ddca-4c7c-d247-9de23e08ef45"
1147 | },
1148 | "source": [
1149 | "# Takes the input paragraph and splits it into a list of sentences\n",
1150 | "from sentence_splitter import SentenceSplitter, split_text_into_sentences\n",
1151 | "\n",
1152 | "splitter = SentenceSplitter(language='en')\n",
1153 | "\n",
1154 | "sentence_list = splitter.split(context)\n",
1155 | "sentence_list"
1156 | ],
1157 | "execution_count": null,
1158 | "outputs": [
1159 | {
1160 | "output_type": "execute_result",
1161 | "data": {
1162 | "text/plain": [
1163 | "['In this video, I will be showing you how to build a stock price web application in Python using the Streamlit and yfinance library.',\n",
1164 | " 'The app will be able to retrieve company information as well as the stock price data for S and P 500 companies.',\n",
1165 | " 'All of this in less than 50 lines of code.']"
1166 | ]
1167 | },
1168 | "metadata": {
1169 | "tags": []
1170 | },
1171 | "execution_count": 9
1172 | }
1173 | ]
1174 | },
1175 | {
1176 | "cell_type": "code",
1177 | "metadata": {
1178 | "id": "QdE0CH6agx8H",
1179 | "colab": {
1180 | "base_uri": "https://localhost:8080/"
1181 | },
1182 | "outputId": "44a7c9dd-9080-46f9-953d-7af2bedf6f09"
1183 | },
1184 | "source": [
1185 | "# Do a for loop to iterate through the list of sentences and paraphrase each sentence in the iteration\n",
1186 | "paraphrase = []\n",
1187 | "\n",
1188 | "for i in sentence_list:\n",
1189 | " a = get_response(i,1)\n",
1190 | " paraphrase.append(a)"
1191 | ],
1192 | "execution_count": null,
1193 | "outputs": [
1194 | {
1195 | "output_type": "stream",
1196 | "text": [
1197 | "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py:3221: FutureWarning: `prepare_seq2seq_batch` is deprecated and will be removed in version 5 of 🤗 Transformers. Use the regular `__call__` method to prepare your inputs and the tokenizer under the `with_target_tokenizer` context manager to prepare your targets. See the documentation of your specific tokenizer for more details\n",
1198 | " FutureWarning,\n"
1199 | ],
1200 | "name": "stderr"
1201 | }
1202 | ]
1203 | },
1204 | {
1205 | "cell_type": "code",
1206 | "metadata": {
1207 | "colab": {
1208 | "base_uri": "https://localhost:8080/"
1209 | },
1210 | "id": "i1zyKct5jM3C",
1211 | "outputId": "4acdc64c-59cb-4369-97c1-1506a8d9fce0"
1212 | },
1213 | "source": [
1214 | "# This is the paraphrased text\n",
1215 | "paraphrase"
1216 | ],
1217 | "execution_count": null,
1218 | "outputs": [
1219 | {
1220 | "output_type": "execute_result",
1221 | "data": {
1222 | "text/plain": [
1223 | "[['In this video, I will show you how to use the Streamlit and yfinance libraries to build a stock price web application.'],\n",
1224 | " ['The stock price data for S and P 500 companies will be retrieved by the app.'],\n",
1225 | " ['This is in less than 50 lines of code.']]"
1226 | ]
1227 | },
1228 | "metadata": {
1229 | "tags": []
1230 | },
1231 | "execution_count": 11
1232 | }
1233 | ]
1234 | },
1235 | {
1236 | "cell_type": "code",
1237 | "metadata": {
1238 | "colab": {
1239 | "base_uri": "https://localhost:8080/"
1240 | },
1241 | "id": "fSOQwP_hiPjk",
1242 | "outputId": "e5b9261b-07fc-4b47-b889-2ff343bc3f65"
1243 | },
1244 | "source": [
1245 | "paraphrase2 = [' '.join(x) for x in paraphrase]\n",
1246 | "paraphrase2"
1247 | ],
1248 | "execution_count": null,
1249 | "outputs": [
1250 | {
1251 | "output_type": "execute_result",
1252 | "data": {
1253 | "text/plain": [
1254 | "['In this video, I will show you how to use the Streamlit and yfinance libraries to build a stock price web application.',\n",
1255 | " 'The stock price data for S and P 500 companies will be retrieved by the app.',\n",
1256 | " 'This is in less than 50 lines of code.']"
1257 | ]
1258 | },
1259 | "metadata": {
1260 | "tags": []
1261 | },
1262 | "execution_count": 12
1263 | }
1264 | ]
1265 | },
1266 | {
1267 | "cell_type": "code",
1268 | "metadata": {
1269 | "colab": {
1270 | "base_uri": "https://localhost:8080/",
1271 | "height": 70
1272 | },
1273 | "id": "fJ5tDLzokYCd",
1274 | "outputId": "eca018b6-c0c2-4c6f-db92-5cdc6c56d862"
1275 | },
1276 | "source": [
1277 | "# Combines the above list into a paragraph\n",
1278 | "paraphrase3 = [' '.join(x for x in paraphrase2) ]\n",
1279 | "paraphrased_text = str(paraphrase3).strip('[]').strip(\"'\")\n",
1280 | "paraphrased_text"
1281 | ],
1282 | "execution_count": null,
1283 | "outputs": [
1284 | {
1285 | "output_type": "execute_result",
1286 | "data": {
1287 | "application/vnd.google.colaboratory.intrinsic+json": {
1288 | "type": "string"
1289 | },
1290 | "text/plain": [
1291 | "'In this video, I will show you how to use the Streamlit and yfinance libraries to build a stock price web application. The stock price data for S and P 500 companies will be retrieved by the app. This is in less than 50 lines of code.'"
1292 | ]
1293 | },
1294 | "metadata": {
1295 | "tags": []
1296 | },
1297 | "execution_count": 18
1298 | }
1299 | ]
1300 | },
1301 | {
1302 | "cell_type": "code",
1303 | "metadata": {
1304 | "id": "8XNC-TJQpZWW",
1305 | "colab": {
1306 | "base_uri": "https://localhost:8080/"
1307 | },
1308 | "outputId": "dd95d96b-9f27-4076-a90e-d4c4f49bd2fe"
1309 | },
1310 | "source": [
1311 | "# Comparison of the original (context variable) and the paraphrased version (paraphrase3 variable)\n",
1312 | "\n",
1313 | "print(context)\n",
1314 | "print(paraphrased_text)"
1315 | ],
1316 | "execution_count": null,
1317 | "outputs": [
1318 | {
1319 | "output_type": "stream",
1320 | "text": [
1321 | "In this video, I will be showing you how to build a stock price web application in Python using the Streamlit and yfinance library. The app will be able to retrieve company information as well as the stock price data for S and P 500 companies. All of this in less than 50 lines of code.\n",
1322 | "In this video, I will show you how to use the Streamlit and yfinance libraries to build a stock price web application. The stock price data for S and P 500 companies will be retrieved by the app. This is in less than 50 lines of code.\n"
1323 | ],
1324 | "name": "stdout"
1325 | }
1326 | ]
1327 | },
1328 | {
1329 | "cell_type": "code",
1330 | "metadata": {
1331 | "id": "xFUouTov0v9_"
1332 | },
1333 | "source": [
1334 | ""
1335 | ],
1336 | "execution_count": null,
1337 | "outputs": []
1338 | }
1339 | ]
1340 | }
--------------------------------------------------------------------------------