├── 20_08_langchain.ipynb
├── AQI_india.ipynb
├── ActivePassive.py
├── Aspect_based_sentiment_analysis.ipynb
├── Automatic EDA.ipynb
├── BERTspamfilter.ipynb
├── BM25_ensemble_retriever.ipynb
├── Basic Chatbot.ipynb
├── Bertopic.ipynb
├── Building_an_auto_correct_in_python.ipynb
├── COVIDdetectionusingXray.py
├── Cartoonify using python.ipynb
├── CasualToFormalConverter.py
├── Clustering.ipynb
├── Comparing different language detector.ipynb
├── DataPrep.ipynb
├── Describe_alternative.ipynb
├── Detect Binod.ipynb
├── DiabetesClassificationUsingNeuralNetwork.py
├── Dummy_variable_trap.ipynb
├── EntityExtraction.py
├── Faker.ipynb
├── Fruit_detection_using_CNNs.ipynb
├── Gensim introduction hindi.ipynb
├── Grammarchecker.py
├── HaarCascade.py
├── IPLdataAnalysis.ipynb
├── Information_retrieval_Fact_extractors_python.ipynb
├── Kepler-delete.ipynb
├── LazyPredict.ipynb
├── Lux.ipynb
├── MLDC .ipynb
├── Multi class classification using Machine Learning.ipynb
├── OCR .ipynb
├── OCR.ipynb
├── Object_detection_using_detecto.ipynb
├── PaliGemma.ipynb
├── PassiveActive.py
├── Performance Analyzer.ipynb
├── Pivot table in pandas.ipynb
├── RAG_fusion.ipynb
├── README.md
├── Readability.ipynb
├── SMSSpamCollection
├── Semantic_search.ipynb
├── Sentiment Analysis using VADER.ipynb
├── Sentiment_Analysis_using_Distilbert.ipynb
├── Sigmoid_overflow_problem.ipynb
├── Speechtotext.ipynb
├── Stanza library.ipynb
├── Stopwords.ipynb
├── TSNE demo.ipynb
├── Topic modelling using Gensim.ipynb
├── Twitter API POC.ipynb
├── Whisper.pptx
├── WordCloud.ipynb
├── YouTube_recommendation_pinecone.ipynb
├── YoutubeComments.csv
├── cuisine_data.csv
├── d3blocks.ipynb
├── diabetes.csv
├── face-mask-detector-project.zip
├── langgraph_simple_chatbot.ipynb
├── medspacydemo.ipynb
├── segmind_ssd.ipynb
├── sentimentanalysis_usingbert.py
├── stable_diffusion_with_chatgpt_noteook.ipynb
├── test script.py
├── test.csv
├── text_summarization.py
├── titanic_processed_data.csv
└── train.csv


/ActivePassive.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | from styleformer import Styleformer
 4 | import torch
 5 | sf = Styleformer(style = 2) 
 6 | st.title('Active Voice to Passive Voice Converter')
 7 | st.write("Please enter your sentence in active voice")
 8 | text = st.text_input('Entered Text')
 9 | if st.button('Convert Active to Passive'):
10 |   target_sentence = sf.transfer(text)
11 |   st.write(target_sentence)
12 | else:
13 |      pass
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/Automatic EDA.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "from pandas_profiling import ProfileReport\n",
 12 |     "import sklearn"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "from sklearn.datasets import load_iris"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 3,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "diab_data=load_iris()"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 4,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "df=pd.DataFrame(data=diab_data.data,columns=diab_data.feature_names)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 5,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "data": {
 49 |       "text/html": [
 50 |        "<div>\n",
 51 |        "<style scoped>\n",
 52 |        "    .dataframe tbody tr th:only-of-type {\n",
 53 |        "        vertical-align: middle;\n",
 54 |        "    }\n",
 55 |        "\n",
 56 |        "    .dataframe tbody tr th {\n",
 57 |        "        vertical-align: top;\n",
 58 |        "    }\n",
 59 |        "\n",
 60 |        "    .dataframe thead th {\n",
 61 |        "        text-align: right;\n",
 62 |        "    }\n",
 63 |        "</style>\n",
 64 |        "<table border=\"1\" class=\"dataframe\">\n",
 65 |        "  <thead>\n",
 66 |        "    <tr style=\"text-align: right;\">\n",
 67 |        "      <th></th>\n",
 68 |        "      <th>sepal length (cm)</th>\n",
 69 |        "      <th>sepal width (cm)</th>\n",
 70 |        "      <th>petal length (cm)</th>\n",
 71 |        "      <th>petal width (cm)</th>\n",
 72 |        "    </tr>\n",
 73 |        "  </thead>\n",
 74 |        "  <tbody>\n",
 75 |        "    <tr>\n",
 76 |        "      <th>0</th>\n",
 77 |        "      <td>5.1</td>\n",
 78 |        "      <td>3.5</td>\n",
 79 |        "      <td>1.4</td>\n",
 80 |        "      <td>0.2</td>\n",
 81 |        "    </tr>\n",
 82 |        "    <tr>\n",
 83 |        "      <th>1</th>\n",
 84 |        "      <td>4.9</td>\n",
 85 |        "      <td>3.0</td>\n",
 86 |        "      <td>1.4</td>\n",
 87 |        "      <td>0.2</td>\n",
 88 |        "    </tr>\n",
 89 |        "    <tr>\n",
 90 |        "      <th>2</th>\n",
 91 |        "      <td>4.7</td>\n",
 92 |        "      <td>3.2</td>\n",
 93 |        "      <td>1.3</td>\n",
 94 |        "      <td>0.2</td>\n",
 95 |        "    </tr>\n",
 96 |        "    <tr>\n",
 97 |        "      <th>3</th>\n",
 98 |        "      <td>4.6</td>\n",
 99 |        "      <td>3.1</td>\n",
100 |        "      <td>1.5</td>\n",
101 |        "      <td>0.2</td>\n",
102 |        "    </tr>\n",
103 |        "    <tr>\n",
104 |        "      <th>4</th>\n",
105 |        "      <td>5.0</td>\n",
106 |        "      <td>3.6</td>\n",
107 |        "      <td>1.4</td>\n",
108 |        "      <td>0.2</td>\n",
109 |        "    </tr>\n",
110 |        "  </tbody>\n",
111 |        "</table>\n",
112 |        "</div>"
113 |       ],
114 |       "text/plain": [
115 |        "   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)\n",
116 |        "0                5.1               3.5                1.4               0.2\n",
117 |        "1                4.9               3.0                1.4               0.2\n",
118 |        "2                4.7               3.2                1.3               0.2\n",
119 |        "3                4.6               3.1                1.5               0.2\n",
120 |        "4                5.0               3.6                1.4               0.2"
121 |       ]
122 |      },
123 |      "execution_count": 5,
124 |      "metadata": {},
125 |      "output_type": "execute_result"
126 |     }
127 |    ],
128 |    "source": [
129 |     "df.head()"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 6,
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "data": {
139 |       "text/plain": [
140 |        "Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',\n",
141 |        "       'petal width (cm)'],\n",
142 |        "      dtype='object')"
143 |       ]
144 |      },
145 |      "execution_count": 6,
146 |      "metadata": {},
147 |      "output_type": "execute_result"
148 |     }
149 |    ],
150 |    "source": [
151 |     "df.columns"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 7,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True)"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 8,
166 |    "metadata": {},
167 |    "outputs": [
168 |     {
169 |      "data": {
170 |       "application/vnd.jupyter.widget-view+json": {
171 |        "model_id": "cb20f4fe95354e03818ef76f2c1056d4",
172 |        "version_major": 2,
173 |        "version_minor": 0
174 |       },
175 |       "text/plain": [
176 |        "HBox(children=(FloatProgress(value=0.0, description='Summarize dataset', max=18.0, style=ProgressStyle(descrip…"
177 |       ]
178 |      },
179 |      "metadata": {},
180 |      "output_type": "display_data"
181 |     },
182 |     {
183 |      "name": "stdout",
184 |      "output_type": "stream",
185 |      "text": [
186 |       "\n"
187 |      ]
188 |     },
189 |     {
190 |      "data": {
191 |       "application/vnd.jupyter.widget-view+json": {
192 |        "model_id": "7b5a0242c1fa474fb7355dc84e048725",
193 |        "version_major": 2,
194 |        "version_minor": 0
195 |       },
196 |       "text/plain": [
197 |        "HBox(children=(FloatProgress(value=0.0, description='Generate report structure', max=1.0, style=ProgressStyle(…"
198 |       ]
199 |      },
200 |      "metadata": {},
201 |      "output_type": "display_data"
202 |     },
203 |     {
204 |      "name": "stdout",
205 |      "output_type": "stream",
206 |      "text": [
207 |       "\n"
208 |      ]
209 |     },
210 |     {
211 |      "data": {
212 |       "application/vnd.jupyter.widget-view+json": {
213 |        "model_id": "",
214 |        "version_major": 2,
215 |        "version_minor": 0
216 |       },
217 |       "text/plain": [
218 |        "HBox(children=(FloatProgress(value=0.0, description='Render widgets', max=1.0, style=ProgressStyle(description…"
219 |       ]
220 |      },
221 |      "metadata": {},
222 |      "output_type": "display_data"
223 |     },
224 |     {
225 |      "data": {
226 |       "application/vnd.jupyter.widget-view+json": {
227 |        "model_id": "cbf1a7f8914540c9a3233999b14f6372",
228 |        "version_major": 2,
229 |        "version_minor": 0
230 |       },
231 |       "text/plain": [
232 |        "VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…"
233 |       ]
234 |      },
235 |      "metadata": {},
236 |      "output_type": "display_data"
237 |     }
238 |    ],
239 |    "source": [
240 |     "profile.to_widgets()"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": 9,
246 |    "metadata": {},
247 |    "outputs": [
248 |     {
249 |      "data": {
250 |       "application/vnd.jupyter.widget-view+json": {
251 |        "model_id": "ce9fd83849f74df6a15877cde281cad9",
252 |        "version_major": 2,
253 |        "version_minor": 0
254 |       },
255 |       "text/plain": [
256 |        "HBox(children=(FloatProgress(value=0.0, description='Render HTML', max=1.0, style=ProgressStyle(description_wi…"
257 |       ]
258 |      },
259 |      "metadata": {},
260 |      "output_type": "display_data"
261 |     },
262 |     {
263 |      "name": "stdout",
264 |      "output_type": "stream",
265 |      "text": [
266 |       "\n"
267 |      ]
268 |     },
269 |     {
270 |      "data": {
271 |       "application/vnd.jupyter.widget-view+json": {
272 |        "model_id": "43e7ed70e39b41f38e76bbade3ef75d1",
273 |        "version_major": 2,
274 |        "version_minor": 0
275 |       },
276 |       "text/plain": [
277 |        "HBox(children=(FloatProgress(value=0.0, description='Export report to file', max=1.0, style=ProgressStyle(desc…"
278 |       ]
279 |      },
280 |      "metadata": {},
281 |      "output_type": "display_data"
282 |     },
283 |     {
284 |      "name": "stdout",
285 |      "output_type": "stream",
286 |      "text": [
287 |       "\n"
288 |      ]
289 |     }
290 |    ],
291 |    "source": [
292 |     "profile.to_file(\"Output2.html\")"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": 19,
298 |    "metadata": {},
299 |    "outputs": [],
300 |    "source": [
301 |     "import sweetviz,pandas as pd"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": 20,
307 |    "metadata": {},
308 |    "outputs": [],
309 |    "source": [
310 |     "data = pd.read_csv('titanic.csv',sep = '\\t')"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 21,
316 |    "metadata": {},
317 |    "outputs": [
318 |     {
319 |      "data": {
320 |       "text/html": [
321 |        "<div>\n",
322 |        "<style scoped>\n",
323 |        "    .dataframe tbody tr th:only-of-type {\n",
324 |        "        vertical-align: middle;\n",
325 |        "    }\n",
326 |        "\n",
327 |        "    .dataframe tbody tr th {\n",
328 |        "        vertical-align: top;\n",
329 |        "    }\n",
330 |        "\n",
331 |        "    .dataframe thead th {\n",
332 |        "        text-align: right;\n",
333 |        "    }\n",
334 |        "</style>\n",
335 |        "<table border=\"1\" class=\"dataframe\">\n",
336 |        "  <thead>\n",
337 |        "    <tr style=\"text-align: right;\">\n",
338 |        "      <th></th>\n",
339 |        "      <th>PassengerId</th>\n",
340 |        "      <th>Survived</th>\n",
341 |        "      <th>Pclass</th>\n",
342 |        "      <th>Name</th>\n",
343 |        "      <th>Sex</th>\n",
344 |        "      <th>Age</th>\n",
345 |        "      <th>SibSp</th>\n",
346 |        "      <th>Parch</th>\n",
347 |        "      <th>Ticket</th>\n",
348 |        "      <th>Fare</th>\n",
349 |        "      <th>Cabin</th>\n",
350 |        "      <th>Embarked</th>\n",
351 |        "    </tr>\n",
352 |        "  </thead>\n",
353 |        "  <tbody>\n",
354 |        "    <tr>\n",
355 |        "      <th>0</th>\n",
356 |        "      <td>1</td>\n",
357 |        "      <td>0</td>\n",
358 |        "      <td>3</td>\n",
359 |        "      <td>Braund, Mr. Owen Harris</td>\n",
360 |        "      <td>male</td>\n",
361 |        "      <td>22.0</td>\n",
362 |        "      <td>1</td>\n",
363 |        "      <td>0</td>\n",
364 |        "      <td>A/5 21171</td>\n",
365 |        "      <td>7.2500</td>\n",
366 |        "      <td>NaN</td>\n",
367 |        "      <td>S</td>\n",
368 |        "    </tr>\n",
369 |        "    <tr>\n",
370 |        "      <th>1</th>\n",
371 |        "      <td>2</td>\n",
372 |        "      <td>1</td>\n",
373 |        "      <td>1</td>\n",
374 |        "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
375 |        "      <td>female</td>\n",
376 |        "      <td>38.0</td>\n",
377 |        "      <td>1</td>\n",
378 |        "      <td>0</td>\n",
379 |        "      <td>PC 17599</td>\n",
380 |        "      <td>71.2833</td>\n",
381 |        "      <td>C85</td>\n",
382 |        "      <td>C</td>\n",
383 |        "    </tr>\n",
384 |        "    <tr>\n",
385 |        "      <th>2</th>\n",
386 |        "      <td>3</td>\n",
387 |        "      <td>1</td>\n",
388 |        "      <td>3</td>\n",
389 |        "      <td>Heikkinen, Miss. Laina</td>\n",
390 |        "      <td>female</td>\n",
391 |        "      <td>26.0</td>\n",
392 |        "      <td>0</td>\n",
393 |        "      <td>0</td>\n",
394 |        "      <td>STON/O2. 3101282</td>\n",
395 |        "      <td>7.9250</td>\n",
396 |        "      <td>NaN</td>\n",
397 |        "      <td>S</td>\n",
398 |        "    </tr>\n",
399 |        "    <tr>\n",
400 |        "      <th>3</th>\n",
401 |        "      <td>4</td>\n",
402 |        "      <td>1</td>\n",
403 |        "      <td>1</td>\n",
404 |        "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
405 |        "      <td>female</td>\n",
406 |        "      <td>35.0</td>\n",
407 |        "      <td>1</td>\n",
408 |        "      <td>0</td>\n",
409 |        "      <td>113803</td>\n",
410 |        "      <td>53.1000</td>\n",
411 |        "      <td>C123</td>\n",
412 |        "      <td>S</td>\n",
413 |        "    </tr>\n",
414 |        "    <tr>\n",
415 |        "      <th>4</th>\n",
416 |        "      <td>5</td>\n",
417 |        "      <td>0</td>\n",
418 |        "      <td>3</td>\n",
419 |        "      <td>Allen, Mr. William Henry</td>\n",
420 |        "      <td>male</td>\n",
421 |        "      <td>35.0</td>\n",
422 |        "      <td>0</td>\n",
423 |        "      <td>0</td>\n",
424 |        "      <td>373450</td>\n",
425 |        "      <td>8.0500</td>\n",
426 |        "      <td>NaN</td>\n",
427 |        "      <td>S</td>\n",
428 |        "    </tr>\n",
429 |        "    <tr>\n",
430 |        "      <th>...</th>\n",
431 |        "      <td>...</td>\n",
432 |        "      <td>...</td>\n",
433 |        "      <td>...</td>\n",
434 |        "      <td>...</td>\n",
435 |        "      <td>...</td>\n",
436 |        "      <td>...</td>\n",
437 |        "      <td>...</td>\n",
438 |        "      <td>...</td>\n",
439 |        "      <td>...</td>\n",
440 |        "      <td>...</td>\n",
441 |        "      <td>...</td>\n",
442 |        "      <td>...</td>\n",
443 |        "    </tr>\n",
444 |        "    <tr>\n",
445 |        "      <th>151</th>\n",
446 |        "      <td>152</td>\n",
447 |        "      <td>1</td>\n",
448 |        "      <td>1</td>\n",
449 |        "      <td>Pears, Mrs. Thomas (Edith Wearne)</td>\n",
450 |        "      <td>female</td>\n",
451 |        "      <td>22.0</td>\n",
452 |        "      <td>1</td>\n",
453 |        "      <td>0</td>\n",
454 |        "      <td>113776</td>\n",
455 |        "      <td>66.6000</td>\n",
456 |        "      <td>C2</td>\n",
457 |        "      <td>S</td>\n",
458 |        "    </tr>\n",
459 |        "    <tr>\n",
460 |        "      <th>152</th>\n",
461 |        "      <td>153</td>\n",
462 |        "      <td>0</td>\n",
463 |        "      <td>3</td>\n",
464 |        "      <td>Meo, Mr. Alfonzo</td>\n",
465 |        "      <td>male</td>\n",
466 |        "      <td>55.5</td>\n",
467 |        "      <td>0</td>\n",
468 |        "      <td>0</td>\n",
469 |        "      <td>A.5. 11206</td>\n",
470 |        "      <td>8.0500</td>\n",
471 |        "      <td>NaN</td>\n",
472 |        "      <td>S</td>\n",
473 |        "    </tr>\n",
474 |        "    <tr>\n",
475 |        "      <th>153</th>\n",
476 |        "      <td>154</td>\n",
477 |        "      <td>0</td>\n",
478 |        "      <td>3</td>\n",
479 |        "      <td>van Billiard, Mr. Austin Blyler</td>\n",
480 |        "      <td>male</td>\n",
481 |        "      <td>40.5</td>\n",
482 |        "      <td>0</td>\n",
483 |        "      <td>2</td>\n",
484 |        "      <td>A/5. 851</td>\n",
485 |        "      <td>14.5000</td>\n",
486 |        "      <td>NaN</td>\n",
487 |        "      <td>S</td>\n",
488 |        "    </tr>\n",
489 |        "    <tr>\n",
490 |        "      <th>154</th>\n",
491 |        "      <td>155</td>\n",
492 |        "      <td>0</td>\n",
493 |        "      <td>3</td>\n",
494 |        "      <td>Olsen, Mr. Ole Martin</td>\n",
495 |        "      <td>male</td>\n",
496 |        "      <td>NaN</td>\n",
497 |        "      <td>0</td>\n",
498 |        "      <td>0</td>\n",
499 |        "      <td>Fa 265302</td>\n",
500 |        "      <td>7.3125</td>\n",
501 |        "      <td>NaN</td>\n",
502 |        "      <td>S</td>\n",
503 |        "    </tr>\n",
504 |        "    <tr>\n",
505 |        "      <th>155</th>\n",
506 |        "      <td>156</td>\n",
507 |        "      <td>0</td>\n",
508 |        "      <td>1</td>\n",
509 |        "      <td>Williams, Mr. Charles Duane</td>\n",
510 |        "      <td>male</td>\n",
511 |        "      <td>51.0</td>\n",
512 |        "      <td>0</td>\n",
513 |        "      <td>1</td>\n",
514 |        "      <td>PC 17597</td>\n",
515 |        "      <td>61.3792</td>\n",
516 |        "      <td>NaN</td>\n",
517 |        "      <td>C</td>\n",
518 |        "    </tr>\n",
519 |        "  </tbody>\n",
520 |        "</table>\n",
521 |        "<p>156 rows × 12 columns</p>\n",
522 |        "</div>"
523 |       ],
524 |       "text/plain": [
525 |        "     PassengerId  Survived  Pclass  \\\n",
526 |        "0              1         0       3   \n",
527 |        "1              2         1       1   \n",
528 |        "2              3         1       3   \n",
529 |        "3              4         1       1   \n",
530 |        "4              5         0       3   \n",
531 |        "..           ...       ...     ...   \n",
532 |        "151          152         1       1   \n",
533 |        "152          153         0       3   \n",
534 |        "153          154         0       3   \n",
535 |        "154          155         0       3   \n",
536 |        "155          156         0       1   \n",
537 |        "\n",
538 |        "                                                  Name     Sex   Age  SibSp  \\\n",
539 |        "0                              Braund, Mr. Owen Harris    male  22.0      1   \n",
540 |        "1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
541 |        "2                               Heikkinen, Miss. Laina  female  26.0      0   \n",
542 |        "3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
543 |        "4                             Allen, Mr. William Henry    male  35.0      0   \n",
544 |        "..                                                 ...     ...   ...    ...   \n",
545 |        "151                  Pears, Mrs. Thomas (Edith Wearne)  female  22.0      1   \n",
546 |        "152                                   Meo, Mr. Alfonzo    male  55.5      0   \n",
547 |        "153                    van Billiard, Mr. Austin Blyler    male  40.5      0   \n",
548 |        "154                              Olsen, Mr. Ole Martin    male   NaN      0   \n",
549 |        "155                        Williams, Mr. Charles Duane    male  51.0      0   \n",
550 |        "\n",
551 |        "     Parch            Ticket     Fare Cabin Embarked  \n",
552 |        "0        0         A/5 21171   7.2500   NaN        S  \n",
553 |        "1        0          PC 17599  71.2833   C85        C  \n",
554 |        "2        0  STON/O2. 3101282   7.9250   NaN        S  \n",
555 |        "3        0            113803  53.1000  C123        S  \n",
556 |        "4        0            373450   8.0500   NaN        S  \n",
557 |        "..     ...               ...      ...   ...      ...  \n",
558 |        "151      0            113776  66.6000    C2        S  \n",
559 |        "152      0        A.5. 11206   8.0500   NaN        S  \n",
560 |        "153      2          A/5. 851  14.5000   NaN        S  \n",
561 |        "154      0         Fa 265302   7.3125   NaN        S  \n",
562 |        "155      1          PC 17597  61.3792   NaN        C  \n",
563 |        "\n",
564 |        "[156 rows x 12 columns]"
565 |       ]
566 |      },
567 |      "execution_count": 21,
568 |      "metadata": {},
569 |      "output_type": "execute_result"
570 |     }
571 |    ],
572 |    "source": [
573 |     "data"
574 |    ]
575 |   },
576 |   {
577 |    "cell_type": "code",
578 |    "execution_count": 22,
579 |    "metadata": {},
580 |    "outputs": [
581 |     {
582 |      "name": "stderr",
583 |      "output_type": "stream",
584 |      "text": [
585 |       ":FEATURES DONE:                    |█████████████████████| [100%]   00:04  -> (00:00 left)\n",
586 |       ":PAIRWISE DONE:                    |█████████████████████| [100%]   00:00  -> (00:00 left)\n"
587 |      ]
588 |     },
589 |     {
590 |      "name": "stdout",
591 |      "output_type": "stream",
592 |      "text": [
593 |       "Creating Associations graph... DONE!\n"
594 |      ]
595 |     }
596 |    ],
597 |    "source": [
598 |     "my_report = sweetviz.analyze([data, \"Data\"],target_feat='Survived')"
599 |    ]
600 |   },
601 |   {
602 |    "cell_type": "code",
603 |    "execution_count": 23,
604 |    "metadata": {},
605 |    "outputs": [],
606 |    "source": [
607 |     "my_report.show_html('Report.html')"
608 |    ]
609 |   }
610 |  ],
611 |  "metadata": {
612 |   "kernelspec": {
613 |    "display_name": "Python 3",
614 |    "language": "python",
615 |    "name": "python3"
616 |   },
617 |   "language_info": {
618 |    "codemirror_mode": {
619 |     "name": "ipython",
620 |     "version": 3
621 |    },
622 |    "file_extension": ".py",
623 |    "mimetype": "text/x-python",
624 |    "name": "python",
625 |    "nbconvert_exporter": "python",
626 |    "pygments_lexer": "ipython3",
627 |    "version": "3.8.1"
628 |   }
629 |  },
630 |  "nbformat": 4,
631 |  "nbformat_minor": 4
632 | }
633 | 


--------------------------------------------------------------------------------
/BERTspamfilter.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "BERTspamfilter.ipynb",
  7 |       "provenance": []
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     },
 13 |     "language_info": {
 14 |       "name": "python"
 15 |     }
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "code",
 20 |       "metadata": {
 21 |         "colab": {
 22 |           "base_uri": "https://localhost:8080/"
 23 |         },
 24 |         "id": "SXBDflH0yEBW",
 25 |         "outputId": "7a54ff87-c16e-43d3-d050-8e16adb81090"
 26 |       },
 27 |       "source": [
 28 |         "!pip install transformers"
 29 |       ],
 30 |       "execution_count": 1,
 31 |       "outputs": [
 32 |         {
 33 |           "output_type": "stream",
 34 |           "text": [
 35 |             "Collecting transformers\n",
 36 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/b0/9e/5b80becd952d5f7250eaf8fc64b957077b12ccfe73e9c03d37146ab29712/transformers-4.6.0-py3-none-any.whl (2.3MB)\n",
 37 |             "\u001b[K     |████████████████████████████████| 2.3MB 25.2MB/s \n",
 38 |             "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)\n",
 39 |             "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.0.12)\n",
 40 |             "Collecting sacremoses\n",
 41 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)\n",
 42 |             "\u001b[K     |████████████████████████████████| 901kB 41.3MB/s \n",
 43 |             "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.41.1)\n",
 44 |             "Collecting tokenizers<0.11,>=0.10.1\n",
 45 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/ae/04/5b870f26a858552025a62f1649c20d29d2672c02ff3c3fb4c688ca46467a/tokenizers-0.10.2-cp37-cp37m-manylinux2010_x86_64.whl (3.3MB)\n",
 46 |             "\u001b[K     |████████████████████████████████| 3.3MB 39.7MB/s \n",
 47 |             "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
 48 |             "Collecting huggingface-hub==0.0.8\n",
 49 |             "  Downloading https://files.pythonhosted.org/packages/a1/88/7b1e45720ecf59c6c6737ff332f41c955963090a18e72acbcbeac6b25e86/huggingface_hub-0.0.8-py3-none-any.whl\n",
 50 |             "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
 51 |             "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers) (20.9)\n",
 52 |             "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from transformers) (4.0.1)\n",
 53 |             "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n",
 54 |             "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.0.1)\n",
 55 |             "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (8.0.0)\n",
 56 |             "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
 57 |             "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
 58 |             "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2020.12.5)\n",
 59 |             "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
 60 |             "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers) (2.4.7)\n",
 61 |             "Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.7.4.3)\n",
 62 |             "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.4.1)\n",
 63 |             "Installing collected packages: sacremoses, tokenizers, huggingface-hub, transformers\n",
 64 |             "Successfully installed huggingface-hub-0.0.8 sacremoses-0.0.45 tokenizers-0.10.2 transformers-4.6.0\n"
 65 |           ],
 66 |           "name": "stdout"
 67 |         }
 68 |       ]
 69 |     },
 70 |     {
 71 |       "cell_type": "code",
 72 |       "metadata": {
 73 |         "id": "-lI76wIlypxM"
 74 |       },
 75 |       "source": [
 76 |         "import numpy as np\n",
 77 |         "import pandas as pd\n",
 78 |         "from sklearn.model_selection import train_test_split\n",
 79 |         "import torch\n",
 80 |         "import transformers as tf\n",
 81 |         "import warnings\n",
 82 |         "warnings.filterwarnings('ignore')"
 83 |       ],
 84 |       "execution_count": 33,
 85 |       "outputs": []
 86 |     },
 87 |     {
 88 |       "cell_type": "code",
 89 |       "metadata": {
 90 |         "colab": {
 91 |           "base_uri": "https://localhost:8080/",
 92 |           "height": 206
 93 |         },
 94 |         "id": "SUaQNE0tyvA-",
 95 |         "outputId": "c43fd1b2-577f-4a86-a38b-fbe50dbb0c82"
 96 |       },
 97 |       "source": [
 98 |         "import pandas as pd\n",
 99 |         "# Dataset from - https://archive.ics.uci.edu/ml/datasets/SMS+Spam+Collection\n",
100 |         "data_sms= pd.read_table('sample_data/SMSSpamCollection',\n",
101 |         "                   sep='\\t',\n",
102 |         "                   header=None,\n",
103 |         "                   names=['label', 'sms'])\n",
104 |         "\n",
105 |         "data_sms.head()"
106 |       ],
107 |       "execution_count": 3,
108 |       "outputs": [
109 |         {
110 |           "output_type": "execute_result",
111 |           "data": {
112 |             "text/html": [
113 |               "<div>\n",
114 |               "<style scoped>\n",
115 |               "    .dataframe tbody tr th:only-of-type {\n",
116 |               "        vertical-align: middle;\n",
117 |               "    }\n",
118 |               "\n",
119 |               "    .dataframe tbody tr th {\n",
120 |               "        vertical-align: top;\n",
121 |               "    }\n",
122 |               "\n",
123 |               "    .dataframe thead th {\n",
124 |               "        text-align: right;\n",
125 |               "    }\n",
126 |               "</style>\n",
127 |               "<table border=\"1\" class=\"dataframe\">\n",
128 |               "  <thead>\n",
129 |               "    <tr style=\"text-align: right;\">\n",
130 |               "      <th></th>\n",
131 |               "      <th>label</th>\n",
132 |               "      <th>sms</th>\n",
133 |               "    </tr>\n",
134 |               "  </thead>\n",
135 |               "  <tbody>\n",
136 |               "    <tr>\n",
137 |               "      <th>0</th>\n",
138 |               "      <td>ham</td>\n",
139 |               "      <td>Go until jurong point, crazy.. Available only ...</td>\n",
140 |               "    </tr>\n",
141 |               "    <tr>\n",
142 |               "      <th>1</th>\n",
143 |               "      <td>ham</td>\n",
144 |               "      <td>Ok lar... Joking wif u oni...</td>\n",
145 |               "    </tr>\n",
146 |               "    <tr>\n",
147 |               "      <th>2</th>\n",
148 |               "      <td>spam</td>\n",
149 |               "      <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
150 |               "    </tr>\n",
151 |               "    <tr>\n",
152 |               "      <th>3</th>\n",
153 |               "      <td>ham</td>\n",
154 |               "      <td>U dun say so early hor... U c already then say...</td>\n",
155 |               "    </tr>\n",
156 |               "    <tr>\n",
157 |               "      <th>4</th>\n",
158 |               "      <td>ham</td>\n",
159 |               "      <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
160 |               "    </tr>\n",
161 |               "  </tbody>\n",
162 |               "</table>\n",
163 |               "</div>"
164 |             ],
165 |             "text/plain": [
166 |               "  label                                                sms\n",
167 |               "0   ham  Go until jurong point, crazy.. Available only ...\n",
168 |               "1   ham                      Ok lar... Joking wif u oni...\n",
169 |               "2  spam  Free entry in 2 a wkly comp to win FA Cup fina...\n",
170 |               "3   ham  U dun say so early hor... U c already then say...\n",
171 |               "4   ham  Nah I don't think he goes to usf, he lives aro..."
172 |             ]
173 |           },
174 |           "metadata": {
175 |             "tags": []
176 |           },
177 |           "execution_count": 3
178 |         }
179 |       ]
180 |     },
181 |     {
182 |       "cell_type": "code",
183 |       "metadata": {
184 |         "id": "4r3jP0Li_sac"
185 |       },
186 |       "source": [
187 |         "sample_data = data_sms[:1000]"
188 |       ],
189 |       "execution_count": 11,
190 |       "outputs": []
191 |     },
192 |     {
193 |       "cell_type": "code",
194 |       "metadata": {
195 |         "id": "e1Mv3aBpzOqd"
196 |       },
197 |       "source": [
198 |         "ml_cls, tokenizer_cls, pt_weights = (tf.DistilBertModel, tf.DistilBertTokenizer, 'distilbert-base-uncased')"
199 |       ],
200 |       "execution_count": 36,
201 |       "outputs": []
202 |     },
203 |     {
204 |       "cell_type": "code",
205 |       "metadata": {
206 |         "id": "58NSOHs7-AJf"
207 |       },
208 |       "source": [
209 |         "tzr = tokenizer_cls.from_pretrained(pt_weights)"
210 |       ],
211 |       "execution_count": 37,
212 |       "outputs": []
213 |     },
214 |     {
215 |       "cell_type": "code",
216 |       "metadata": {
217 |         "colab": {
218 |           "base_uri": "https://localhost:8080/"
219 |         },
220 |         "id": "YuFAAgb7-JGA",
221 |         "outputId": "13539fd3-f3de-46cf-e8f2-c6cd551ca269"
222 |       },
223 |       "source": [
224 |         "mod = ml_cls.from_pretrained(pt_weights)"
225 |       ],
226 |       "execution_count": 39,
227 |       "outputs": [
228 |         {
229 |           "output_type": "stream",
230 |           "text": [
231 |             "Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias']\n",
232 |             "- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
233 |             "- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
234 |           ],
235 |           "name": "stderr"
236 |         }
237 |       ]
238 |     },
239 |     {
240 |       "cell_type": "code",
241 |       "metadata": {
242 |         "id": "4FcTNjaq-PaK"
243 |       },
244 |       "source": [
245 |         "tokenized = sample_data['sms'].apply((lambda x: tokenizer.encode(x, add_special_tokens=True)))"
246 |       ],
247 |       "execution_count": 40,
248 |       "outputs": []
249 |     },
250 |     {
251 |       "cell_type": "code",
252 |       "metadata": {
253 |         "id": "Tto46jcp-hCt"
254 |       },
255 |       "source": [
256 |         "max_len = 0\n",
257 |         "for i in tokenized.values:\n",
258 |         "    if len(i) > max_len:\n",
259 |         "        max_len = len(i)\n",
260 |         "\n",
261 |         "padded = np.array([i + [0]*(max_len-len(i)) for i in tokenized.values])"
262 |       ],
263 |       "execution_count": 41,
264 |       "outputs": []
265 |     },
266 |     {
267 |       "cell_type": "code",
268 |       "metadata": {
269 |         "colab": {
270 |           "base_uri": "https://localhost:8080/"
271 |         },
272 |         "id": "0Rr8Z-kX-pHn",
273 |         "outputId": "4233f2a8-0dcc-44b3-b9d2-ccef656a1df3"
274 |       },
275 |       "source": [
276 |         "np.array(padded).shape"
277 |       ],
278 |       "execution_count": 42,
279 |       "outputs": [
280 |         {
281 |           "output_type": "execute_result",
282 |           "data": {
283 |             "text/plain": [
284 |               "(1000, 100)"
285 |             ]
286 |           },
287 |           "metadata": {
288 |             "tags": []
289 |           },
290 |           "execution_count": 42
291 |         }
292 |       ]
293 |     },
294 |     {
295 |       "cell_type": "code",
296 |       "metadata": {
297 |         "colab": {
298 |           "base_uri": "https://localhost:8080/"
299 |         },
300 |         "id": "pOp34e3a-uT1",
301 |         "outputId": "e5ab3d4c-7c81-4960-f2f0-b7aec01da728"
302 |       },
303 |       "source": [
304 |         "attention_mask = np.where(padded != 0, 1, 0)\n",
305 |         "attention_mask.shape"
306 |       ],
307 |       "execution_count": 43,
308 |       "outputs": [
309 |         {
310 |           "output_type": "execute_result",
311 |           "data": {
312 |             "text/plain": [
313 |               "(1000, 100)"
314 |             ]
315 |           },
316 |           "metadata": {
317 |             "tags": []
318 |           },
319 |           "execution_count": 43
320 |         }
321 |       ]
322 |     },
323 |     {
324 |       "cell_type": "markdown",
325 |       "metadata": {
326 |         "id": "oICLQY7rg7_6"
327 |       },
328 |       "source": [
329 |         "Torch.no_grad() deactivates autograd engine. Eventually it will reduce the memory usage and speed up computations.\n",
330 |         "\n",
331 |         "Use of Torch.no_grad():\n",
332 |         "\n",
333 |         "    To perform inference without Gradient Calculation.\n",
334 |         "\n",
335 |         "    To make sure there's no leak test data into the model.\n",
336 |         "\n",
337 |         "It's generally used to perform Validation. Reason in this case one can use validation batch of large size"
338 |       ]
339 |     },
340 |     {
341 |       "cell_type": "code",
342 |       "metadata": {
343 |         "id": "tQw7--SK-485"
344 |       },
345 |       "source": [
346 |         "input_ids = torch.tensor(padded)  \n",
347 |         "attention_mask = torch.tensor(attention_mask)\n",
348 |         "\n",
349 |         "with torch.no_grad():\n",
350 |         "    last_hidden_states = model(input_ids, attention_mask=attention_mask)"
351 |       ],
352 |       "execution_count": 44,
353 |       "outputs": []
354 |     },
355 |     {
356 |       "cell_type": "code",
357 |       "metadata": {
358 |         "id": "9gZWEqrKBaTy"
359 |       },
360 |       "source": [
361 |         "features = last_hidden_states[0][:,0,:].numpy()"
362 |       ],
363 |       "execution_count": 45,
364 |       "outputs": []
365 |     },
366 |     {
367 |       "cell_type": "code",
368 |       "metadata": {
369 |         "id": "x8S4svUCBdXJ"
370 |       },
371 |       "source": [
372 |         "labels = sample_data['label']"
373 |       ],
374 |       "execution_count": 46,
375 |       "outputs": []
376 |     },
377 |     {
378 |       "cell_type": "code",
379 |       "metadata": {
380 |         "id": "3IE2_e70Biv9"
381 |       },
382 |       "source": [
383 |         "#mapping = {'ham' : 0 , 'spam':1}\n",
384 |         "labels = labels.map(mapping)"
385 |       ],
386 |       "execution_count": 47,
387 |       "outputs": []
388 |     },
389 |     {
390 |       "cell_type": "code",
391 |       "metadata": {
392 |         "id": "4a2U91nZDE4b"
393 |       },
394 |       "source": [
395 |         "train_features, test_features, train_labels, test_labels = train_test_split(features, labels)"
396 |       ],
397 |       "execution_count": 48,
398 |       "outputs": []
399 |     },
400 |     {
401 |       "cell_type": "code",
402 |       "metadata": {
403 |         "colab": {
404 |           "base_uri": "https://localhost:8080/"
405 |         },
406 |         "id": "EptrX8Y2DbIJ",
407 |         "outputId": "cc4886f4-2903-4178-a721-380a07529444"
408 |       },
409 |       "source": [
410 |         "from sklearn.tree import DecisionTreeClassifier\n",
411 |         "clf = DecisionTreeClassifier(random_state=0)\n",
412 |         "clf.fit(train_features, train_labels)"
413 |       ],
414 |       "execution_count": 49,
415 |       "outputs": [
416 |         {
417 |           "output_type": "execute_result",
418 |           "data": {
419 |             "text/plain": [
420 |               "DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',\n",
421 |               "                       max_depth=None, max_features=None, max_leaf_nodes=None,\n",
422 |               "                       min_impurity_decrease=0.0, min_impurity_split=None,\n",
423 |               "                       min_samples_leaf=1, min_samples_split=2,\n",
424 |               "                       min_weight_fraction_leaf=0.0, presort='deprecated',\n",
425 |               "                       random_state=0, splitter='best')"
426 |             ]
427 |           },
428 |           "metadata": {
429 |             "tags": []
430 |           },
431 |           "execution_count": 49
432 |         }
433 |       ]
434 |     },
435 |     {
436 |       "cell_type": "code",
437 |       "metadata": {
438 |         "colab": {
439 |           "base_uri": "https://localhost:8080/"
440 |         },
441 |         "id": "JfTQXDyqDiKH",
442 |         "outputId": "ddcb7d3a-98fd-46bc-b9ce-086ff80a5a88"
443 |       },
444 |       "source": [
445 |         "clf.score(test_features, test_labels)"
446 |       ],
447 |       "execution_count": 50,
448 |       "outputs": [
449 |         {
450 |           "output_type": "execute_result",
451 |           "data": {
452 |             "text/plain": [
453 |               "0.92"
454 |             ]
455 |           },
456 |           "metadata": {
457 |             "tags": []
458 |           },
459 |           "execution_count": 50
460 |         }
461 |       ]
462 |     }
463 |   ]
464 | }


--------------------------------------------------------------------------------
/Basic Chatbot.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 8,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import nltk"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 12,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "3.4.5\n"
 22 |      ]
 23 |     }
 24 |    ],
 25 |    "source": [
 26 |     "print(nltk.__version__)"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 9,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "from nltk.chat.util import Chat,reflections"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 10,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "pairs = [[r\"Hey (.*)\", [\"Chatarpatar : How are you Akshay ?\"]],\n",
 45 |     "         [r\"I m good.Who do(.*)\",[\"Chatarpatar : IPL is so unpredictable , can't say but CSK , MI have good records.\"]],\n",
 46 |     "         [r\"I agree.Which Team(.)* ?\",[\"Chatarpatar : CSK, what about you ?\"]],\n",
 47 |     "         [r\"The one which(.)*\",[\"Chatarpatar : Funny , should I laugh ?\"]],\n",
 48 |     "         [r\"You are a chatbot (.)*\",[\"Chatarpatar : That was MACHINIST(RACIST) !!! Go quit me I don't wanna talk with you\"]],\n",
 49 |     "         [r\"Come on Chatarpata(.)*\",[\"Chatarpatar : Quit me Akshay !!\"]], \n",
 50 |     "         [r\"Chatbot he pr nakhre(.)*\",[\"Chatpatar : Bye Akshay , Have a *** day\"]]]"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 11,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "def Chatbot():\n",
 60 |     "    print(\"Chatarpatar : Hey Akshay\")\n",
 61 |     "    chatbot = Chat(pairs,reflections)\n",
 62 |     "    chatbot.converse()"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 12,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "Chatarpatar : Hey Akshay\n",
 75 |       "AKSHAY : Hey Chatarpatar\n",
 76 |       "Chatarpatar : How are you Akshay ?\n",
 77 |       "AKSHAY : I m good.Who do u think will win IPL\n",
 78 |       "Chatarpatar : IPL is so unpredictable , can't say but CSK , MI have good records.\n",
 79 |       "AKSHAY : I agree.Which Team is ur favourite ?\n",
 80 |       "Chatarpatar : CSK, what about you ?\n",
 81 |       "AKSHAY : The one which wins.Hehehe\n",
 82 |       "Chatarpatar : Funny , should I laugh ?\n",
 83 |       "AKSHAY : You are a chatbot. How can you laugh ?\n",
 84 |       "None\n",
 85 |       "AKSHAY : You are a chatbot . How can you laugh ?\n",
 86 |       "Chatarpatar : That was MACHINIST(RACIST) !!! Go quit me I don't wanna talk with you\n",
 87 |       "AKSHAY : Come on Chatarpatar.Be Sporty\n",
 88 |       "Chatarpatar : Quit me Akshay !!\n",
 89 |       "AKSHAY : Chatbot he pr nakhre toh dekho\n",
 90 |       "Chatpatar : Bye Akshay , Have a *** day\n"
 91 |      ]
 92 |     },
 93 |     {
 94 |      "ename": "KeyboardInterrupt",
 95 |      "evalue": "Interrupted by user",
 96 |      "output_type": "error",
 97 |      "traceback": [
 98 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 99 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
100 |       "\u001b[0;32m<ipython-input-12-47b0d318e462>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m__name__\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'__main__'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0mChatbot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
101 |       "\u001b[0;32m<ipython-input-11-c6ff4f328087>\u001b[0m in \u001b[0;36mChatbot\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Chatarpatar : Hey Akshay\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m     \u001b[0mchatbot\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mChat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpairs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mreflections\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m     \u001b[0mchatbot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconverse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
102 |       "\u001b[0;32m~/anaconda3/envs/NLP/lib/python3.6/site-packages/nltk/chat/util.py\u001b[0m in \u001b[0;36mconverse\u001b[0;34m(self, quit)\u001b[0m\n\u001b[1;32m    117\u001b[0m             \u001b[0muser_input\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mquit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    118\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m                 \u001b[0muser_input\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"AKSHAY : \"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    120\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mEOFError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    121\u001b[0m                 \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muser_input\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
103 |       "\u001b[0;32m~/anaconda3/envs/NLP/lib/python3.6/site-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36mraw_input\u001b[0;34m(self, prompt)\u001b[0m\n\u001b[1;32m    861\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent_ident\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    862\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent_header\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 863\u001b[0;31m             \u001b[0mpassword\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    864\u001b[0m         )\n\u001b[1;32m    865\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
104 |       "\u001b[0;32m~/anaconda3/envs/NLP/lib/python3.6/site-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36m_input_request\u001b[0;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[1;32m    902\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    903\u001b[0m                 \u001b[0;31m# re-raise KeyboardInterrupt, to truncate traceback\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 904\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Interrupted by user\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    905\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    906\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwarning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Invalid Message:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc_info\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
105 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: Interrupted by user"
106 |      ]
107 |     }
108 |    ],
109 |    "source": [
110 |     "if __name__ == '__main__':\n",
111 |     "    Chatbot()"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": []
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": []
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": []
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": []
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 4,
145 |    "metadata": {
146 |     "scrolled": true
147 |    },
148 |    "outputs": [
149 |     {
150 |      "name": "stdout",
151 |      "output_type": "stream",
152 |      "text": [
153 |       "Chatarpatar : Hey Akshay\n"
154 |      ]
155 |     },
156 |     {
157 |      "ename": "NameError",
158 |      "evalue": "name 'pairs' is not defined",
159 |      "output_type": "error",
160 |      "traceback": [
161 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
162 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
163 |       "\u001b[0;32m<ipython-input-4-47b0d318e462>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m__name__\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'__main__'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0mChatbot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
164 |       "\u001b[0;32m<ipython-input-3-c6ff4f328087>\u001b[0m in \u001b[0;36mChatbot\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mChatbot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Chatarpatar : Hey Akshay\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m     \u001b[0mchatbot\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mChat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpairs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mreflections\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      4\u001b[0m     \u001b[0mchatbot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconverse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
165 |       "\u001b[0;31mNameError\u001b[0m: name 'pairs' is not defined"
166 |      ]
167 |     }
168 |    ],
169 |    "source": [
170 |     "if __name__ == '__main__':\n",
171 |     "    Chatbot()"
172 |    ]
173 |   }
174 |  ],
175 |  "metadata": {
176 |   "kernelspec": {
177 |    "display_name": "Python 3",
178 |    "language": "python",
179 |    "name": "python3"
180 |   },
181 |   "language_info": {
182 |    "codemirror_mode": {
183 |     "name": "ipython",
184 |     "version": 3
185 |    },
186 |    "file_extension": ".py",
187 |    "mimetype": "text/x-python",
188 |    "name": "python",
189 |    "nbconvert_exporter": "python",
190 |    "pygments_lexer": "ipython3",
191 |    "version": "3.6.10"
192 |   }
193 |  },
194 |  "nbformat": 4,
195 |  "nbformat_minor": 4
196 | }
197 | 


--------------------------------------------------------------------------------
/Building_an_auto_correct_in_python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Building an auto correct in python.ipynb",
  7 |       "provenance": []
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     },
 13 |     "language_info": {
 14 |       "name": "python"
 15 |     }
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "metadata": {
 21 |         "id": "rPFzEHfSpyIl"
 22 |       },
 23 |       "source": [
 24 |         "#Installing the required packages"
 25 |       ]
 26 |     },
 27 |     {
 28 |       "cell_type": "code",
 29 |       "metadata": {
 30 |         "colab": {
 31 |           "base_uri": "https://localhost:8080/"
 32 |         },
 33 |         "id": "hGU_V6CypHMv",
 34 |         "outputId": "e569cfca-6fe9-4fb8-c003-52cd06b84f4f"
 35 |       },
 36 |       "source": [
 37 |         "! pip install pyspellchecker"
 38 |       ],
 39 |       "execution_count": 2,
 40 |       "outputs": [
 41 |         {
 42 |           "output_type": "stream",
 43 |           "text": [
 44 |             "Collecting pyspellchecker\n",
 45 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/64/c7/435f49c0ac6bec031d1aba4daf94dc21dc08a9db329692cdb77faac51cea/pyspellchecker-0.6.2-py3-none-any.whl (2.7MB)\n",
 46 |             "\u001b[K     |████████████████████████████████| 2.7MB 3.3MB/s \n",
 47 |             "\u001b[?25hInstalling collected packages: pyspellchecker\n",
 48 |             "Successfully installed pyspellchecker-0.6.2\n"
 49 |           ],
 50 |           "name": "stdout"
 51 |         }
 52 |       ]
 53 |     },
 54 |     {
 55 |       "cell_type": "markdown",
 56 |       "metadata": {
 57 |         "id": "y966QZZqqKUV"
 58 |       },
 59 |       "source": [
 60 |         "#Coding"
 61 |       ]
 62 |     },
 63 |     {
 64 |       "cell_type": "code",
 65 |       "metadata": {
 66 |         "colab": {
 67 |           "base_uri": "https://localhost:8080/"
 68 |         },
 69 |         "id": "qfjt4IXRpusJ",
 70 |         "outputId": "4a8916e9-23a9-432c-812f-9e4f6de6d121"
 71 |       },
 72 |       "source": [
 73 |         "from spellchecker import SpellChecker\n",
 74 |         "\n",
 75 |         "spell = SpellChecker()\n",
 76 |         "\n",
 77 |         "text = \"Dataa scienttist is a very promising career.\"\n",
 78 |         "\n",
 79 |         "dict_of_autocorrect_words = {}\n",
 80 |         "for i in spell.unknown(text.split()):\n",
 81 |         "    dict_of_autocorrect_words[i] = spell.correction(i)\n",
 82 |         "\n",
 83 |         "print(f'The AUTOCORRECT suggestions are Mis-spelled words are {dict_of_autocorrect_words}')\n",
 84 |         "\n",
 85 |         "temp = text.split()\n",
 86 |         "res = []\n",
 87 |         "for wrd in temp:\n",
 88 |         "      \n",
 89 |         "    res.append(dict_of_autocorrect_words.get(wrd, wrd))\n",
 90 |         "      \n",
 91 |         "res = ' '.join(res)\n",
 92 |         "\n",
 93 |         "print(res)\n",
 94 |         "\n"
 95 |       ],
 96 |       "execution_count": 7,
 97 |       "outputs": [
 98 |         {
 99 |           "output_type": "stream",
100 |           "text": [
101 |             "The AUTOCORRECT suggestions are Mis-spelled words are {'dataa': 'data', 'scienttist': 'scientist', 'career.': 'career'}\n",
102 |             "Dataa scientist is a very promising career\n"
103 |           ],
104 |           "name": "stdout"
105 |         }
106 |       ]
107 |     }
108 |   ]
109 | }


--------------------------------------------------------------------------------
/COVIDdetectionusingXray.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat Feb 27 01:16:15 2021
 5 | 
 6 | @author: intel123
 7 | """
 8 | 
 9 | from keras.models import Sequential
10 | from keras.layers import Conv2D
11 | from keras.layers import MaxPooling2D
12 | from keras.layers import Flatten
13 | from keras.layers import Dense
14 | 
15 | Classifier=Sequential();
16 | 
17 | Classifier.add(Conv2D(64,(3,3),input_shape=(64,64,3),activation='relu'))
18 | 
19 | Classifier.add(MaxPooling2D(pool_size=(2,2)))
20 | 
21 | Classifier.add(Conv2D(32,(3,3),activation='relu'))
22 | 
23 | Classifier.add(MaxPooling2D(pool_size=(2,2)))
24 | 
25 | Classifier.add(Flatten())
26 | 
27 | Classifier.add(Dense(units=104, activation='relu'))
28 | Classifier.add(Dense(units=1, activation='sigmoid'))
29 | 
30 | Classifier.compile(optimizer ='adam',loss='binary_crossentropy',metrics=['accuracy'])
31 | 
32 | from keras.preprocessing.image import ImageDataGenerator
33 | 
34 | train_datagen = ImageDataGenerator(rescale = 1./255,
35 |                                    shear_range = 0.4,
36 |                                    zoom_range = 0.3,
37 |                                    horizontal_flip = True)
38 | 
39 | test_datagen = ImageDataGenerator(rescale = 1./255)
40 | 
41 | training_set = train_datagen.flow_from_directory('/home/intel123/AS/dataset/training_set',
42 |                                                  target_size = (64, 64),
43 |                                                  batch_size = 4,
44 |                                                  class_mode = 'binary')
45 | 
46 | test_set = test_datagen.flow_from_directory('/home/intel123/AS/dataset/test_set',
47 |                                             target_size = (64, 64),
48 |                                             batch_size = 4,
49 |                                             class_mode = 'binary')
50 | 
51 | 
52 | Classifier.fit_generator(training_set,
53 |                          steps_per_epoch = 40,
54 |                          epochs = 5,
55 |                          validation_data = test_set,
56 |                          validation_steps = 8)
57 | 
58 | import numpy as np
59 | from keras.preprocessing import image
60 | test_image = image.load_img('/home/intel123/AS/dataset/covid_or_normal.jpg',target_size=(64,64))
61 | test_image = image.img_to_array(test_image)
62 | test_image = np.expand_dims(test_image,axis=0)
63 | result = Classifier.predict(test_image)
64 | training_set.class_indices
65 | if result[0][0] == 1:
66 |     prediction = 'Normal'
67 |     print(prediction)
68 | else:
69 |     prediction = 'COVID'
70 |     print(prediction)


--------------------------------------------------------------------------------
/CasualToFormalConverter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | from styleformer import Styleformer
 4 | import torch
 5 | sf = Styleformer(style = 0) 
 6 | st.title('Casual to Formal converter')
 7 | st.write("Please enter your casual text")
 8 | text = st.text_input('Enter some text')
 9 | if st.button('Hit me'):
10 |   target_sentence = sf.transfer(text)
11 |   st.write(target_sentence)
12 | else:
13 |      pass
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/Comparing different language detector.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# langdetect package"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 3,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "from langdetect import detect "
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 7,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "en\n",
 29 |       "hi\n",
 30 |       "es\n",
 31 |       "ja\n"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "print(detect(\"At AS Learning , learning never ends.\")) \n",
 37 |     "print(detect(\"एएस लर्निंग में, सीखना कभी समाप्त नहीं होता है।\")) \n",
 38 |     "print(detect(\"En AS Learning, el aprendizaje nunca termina.\")) \n",
 39 |     "print(detect(\"ASラーニングでは、学習は決して終わりません\")) "
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "# Langid package"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 57,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "('en', 0.901430497396395)\n",
 59 |       "('hi', 0.9999996623730644)\n",
 60 |       "('es', 0.9870864718495084)\n",
 61 |       "('ja', 1.0)\n"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "import langid\n",
 67 |     "from langid.langid import LanguageIdentifier, model\n",
 68 |     "lang_identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)\n",
 69 |     "print(lang_identifier.classify(\"At AS Learning , learning never ends.\"))\n",
 70 |     "print(lang_identifier.classify(\"एएस लर्निंग में, सीखना कभी समाप्त नहीं होता है।\"))\n",
 71 |     "print(lang_identifier.classify(\"En AS Learning, el aprendizaje nunca termina.\"))\n",
 72 |     "print(lang_identifier.classify(\"ASラーニングでは、学習は決して終わりません\"))"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "# TextBlob package"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 10,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "from textblob import TextBlob "
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 11,
 94 |    "metadata": {},
 95 |    "outputs": [
 96 |     {
 97 |      "name": "stdout",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "en\n",
101 |       "hi\n",
102 |       "es\n",
103 |       "ja\n"
104 |      ]
105 |     }
106 |    ],
107 |    "source": [
108 |     "print(TextBlob(\"At AS Learning , learning never ends.\").detect_language()) \n",
109 |     "print(TextBlob(\"एएस लर्निंग में, सीखना कभी समाप्त नहीं होता है।\").detect_language()) \n",
110 |     "print(TextBlob(\"En AS Learning, el aprendizaje nunca termina.\").detect_language()) \n",
111 |     "print(TextBlob(\"ASラーニングでは、学習は決して終わりません\").detect_language()) "
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "# Other packages spacy and Fasttext"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "# Speed comparison between three packages"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 68,
131 |    "metadata": {},
132 |    "outputs": [
133 |     {
134 |      "name": "stdout",
135 |      "output_type": "stream",
136 |      "text": [
137 |       "en\n",
138 |       "hi\n",
139 |       "es\n",
140 |       "ja\n",
141 |       "Time elapsed for TextBlob: 0.802361011505127\n"
142 |      ]
143 |     }
144 |    ],
145 |    "source": [
146 |     "import time\n",
147 |     "start = time.time()\n",
148 |     "print(TextBlob(\"At AS Learning , learning never ends.\").detect_language()) \n",
149 |     "print(TextBlob(\"एएस लर्निंग में, सीखना कभी समाप्त नहीं होता है।\").detect_language()) \n",
150 |     "print(TextBlob(\"En AS Learning, el aprendizaje nunca termina.\").detect_language()) \n",
151 |     "print(TextBlob(\"ASラーニングでは、学習は決して終わりません\").detect_language()) \n",
152 |     "end = time.time()\n",
153 |     "print (\"Time elapsed for TextBlob:\", end - start)\n",
154 |     "\n"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 69,
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "name": "stdout",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "('en', 0.901430497396395)\n",
167 |       "('hi', 0.9999996623730644)\n",
168 |       "('es', 0.9870864718495084)\n",
169 |       "('ja', 1.0)\n",
170 |       "Time elapsed for langid: 0.009955883026123047\n"
171 |      ]
172 |     }
173 |    ],
174 |    "source": [
175 |     "start = time.time()\n",
176 |     "print(lang_identifier.classify(\"At AS Learning , learning never ends.\"))\n",
177 |     "print(lang_identifier.classify(\"एएस लर्निंग में, सीखना कभी समाप्त नहीं होता है।\"))\n",
178 |     "print(lang_identifier.classify(\"En AS Learning, el aprendizaje nunca termina.\"))\n",
179 |     "print(lang_identifier.classify(\"ASラーニングでは、学習は決して終わりません\"))\n",
180 |     "end = time.time()\n",
181 |     "print (\"Time elapsed for langid:\", end - start)"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 71,
187 |    "metadata": {},
188 |    "outputs": [
189 |     {
190 |      "name": "stdout",
191 |      "output_type": "stream",
192 |      "text": [
193 |       "en\n",
194 |       "hi\n",
195 |       "es\n",
196 |       "ja\n",
197 |       "Time elapsed for langdetect: 0.05070662498474121\n"
198 |      ]
199 |     }
200 |    ],
201 |    "source": [
202 |     "start = time.time()\n",
203 |     "print(detect(\"At AS Learning , learning never ends.\")) \n",
204 |     "print(detect(\"एएस लर्निंग में, सीखना कभी समाप्त नहीं होता है।\")) \n",
205 |     "print(detect(\"En AS Learning, el aprendizaje nunca termina.\")) \n",
206 |     "print(detect(\"ASラーニングでは、学習は決して終わりません\")) \n",
207 |     "end = time.time()\n",
208 |     "print (\"Time elapsed for langdetect:\", end - start)"
209 |    ]
210 |   }
211 |  ],
212 |  "metadata": {
213 |   "kernelspec": {
214 |    "display_name": "Python 3",
215 |    "language": "python",
216 |    "name": "python3"
217 |   },
218 |   "language_info": {
219 |    "codemirror_mode": {
220 |     "name": "ipython",
221 |     "version": 3
222 |    },
223 |    "file_extension": ".py",
224 |    "mimetype": "text/x-python",
225 |    "name": "python",
226 |    "nbconvert_exporter": "python",
227 |    "pygments_lexer": "ipython3",
228 |    "version": "3.7.6"
229 |   }
230 |  },
231 |  "nbformat": 4,
232 |  "nbformat_minor": 4
233 | }
234 | 


--------------------------------------------------------------------------------
/DataPrep.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "DataPrep.ipynb",
  7 |       "provenance": []
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     },
 13 |     "language_info": {
 14 |       "name": "python"
 15 |     }
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "code",
 20 |       "metadata": {
 21 |         "colab": {
 22 |           "base_uri": "https://localhost:8080/"
 23 |         },
 24 |         "id": "n449T-2LgzLl",
 25 |         "outputId": "3a3cc2e9-1b67-47c7-e6b0-ef8a9b7fbdce"
 26 |       },
 27 |       "source": [
 28 |         "! pip install dataprep"
 29 |       ],
 30 |       "execution_count": 1,
 31 |       "outputs": [
 32 |         {
 33 |           "output_type": "stream",
 34 |           "text": [
 35 |             "Collecting dataprep\n",
 36 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/f6/c4/1f6f12d3f8bc5b71cd806f66c96c8f7f8615328a99a16e90e66cc17f3b84/dataprep-0.2.15-py3-none-any.whl (193kB)\n",
 37 |             "\u001b[K     |████████████████████████████████| 194kB 5.8MB/s \n",
 38 |             "\u001b[?25hCollecting pydantic<2.0,>=1.6\n",
 39 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/2b/a3/0ffdb6c63f45f10d19b8e8b32670b22ed089cafb29732f6bf8ce518821fb/pydantic-1.8.1-cp37-cp37m-manylinux2014_x86_64.whl (10.1MB)\n",
 40 |             "\u001b[K     |████████████████████████████████| 10.1MB 13.0MB/s \n",
 41 |             "\u001b[?25hRequirement already satisfied: jinja2<3.0,>=2.11 in /usr/local/lib/python3.7/dist-packages (from dataprep) (2.11.3)\n",
 42 |             "Collecting aiohttp<4.0,>=3.6\n",
 43 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/88/c0/5890b4c8b04a79b7360e8fe4490feb0bb3ab179743f199f0e6220cebd568/aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl (1.3MB)\n",
 44 |             "\u001b[K     |████████████████████████████████| 1.3MB 45.1MB/s \n",
 45 |             "\u001b[?25hCollecting dask[array,dataframe,delayed]<3.0,>=2.25\n",
 46 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/04/94/b4012c61c09300f4413c58a522a6cc1a212dc4a7f6fe1ba98d67429c089d/dask-2.30.0-py3-none-any.whl (848kB)\n",
 47 |             "\u001b[K     |████████████████████████████████| 849kB 49.3MB/s \n",
 48 |             "\u001b[?25hRequirement already satisfied: bokeh<3,>=2 in /usr/local/lib/python3.7/dist-packages (from dataprep) (2.3.1)\n",
 49 |             "Collecting nltk<4.0,>=3.5\n",
 50 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/5e/37/9532ddd4b1bbb619333d5708aaad9bf1742f051a664c3c6fa6632a105fd8/nltk-3.6.2-py3-none-any.whl (1.5MB)\n",
 51 |             "\u001b[K     |████████████████████████████████| 1.5MB 42.0MB/s \n",
 52 |             "\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.7/dist-packages (from dataprep) (1.19.5)\n",
 53 |             "Collecting jsonpath-ng<2.0,>=1.5\n",
 54 |             "  Downloading https://files.pythonhosted.org/packages/ae/03/a8a12e49e88ba7983d704ef518e25041206aa2e934686270516f1bc439ff/jsonpath_ng-1.5.2-py3-none-any.whl\n",
 55 |             "Requirement already satisfied: scipy<2,>=1 in /usr/local/lib/python3.7/dist-packages (from dataprep) (1.4.1)\n",
 56 |             "Requirement already satisfied: ipywidgets<8.0,>=7.5 in /usr/local/lib/python3.7/dist-packages (from dataprep) (7.6.3)\n",
 57 |             "Collecting tqdm<5.0,>=4.48\n",
 58 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/72/8a/34efae5cf9924328a8f34eeb2fdaae14c011462d9f0e3fcded48e1266d1c/tqdm-4.60.0-py2.py3-none-any.whl (75kB)\n",
 59 |             "\u001b[K     |████████████████████████████████| 81kB 8.8MB/s \n",
 60 |             "\u001b[?25hCollecting wordcloud<2.0,>=1.8\n",
 61 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/1b/06/0516bdba2ebdc0d5bd476aa66f94666dd0ad6b9abda723fdf28e451db919/wordcloud-1.8.1-cp37-cp37m-manylinux1_x86_64.whl (366kB)\n",
 62 |             "\u001b[K     |████████████████████████████████| 368kB 40.3MB/s \n",
 63 |             "\u001b[?25hRequirement already satisfied: pandas<2,>=1 in /usr/local/lib/python3.7/dist-packages (from dataprep) (1.1.5)\n",
 64 |             "Collecting regex<2021.0.0,>=2020.10.15\n",
 65 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/61/b2/8f281520d9f08d0f6771b8160a87a4b487850cde9f1abe257da4d8bab599/regex-2020.11.13-cp37-cp37m-manylinux2014_x86_64.whl (719kB)\n",
 66 |             "\u001b[K     |████████████████████████████████| 727kB 42.6MB/s \n",
 67 |             "\u001b[?25hRequirement already satisfied: bottleneck<2.0,>=1.3 in /usr/local/lib/python3.7/dist-packages (from dataprep) (1.3.2)\n",
 68 |             "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from pydantic<2.0,>=1.6->dataprep) (3.7.4.3)\n",
 69 |             "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from jinja2<3.0,>=2.11->dataprep) (1.1.1)\n",
 70 |             "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp<4.0,>=3.6->dataprep) (20.3.0)\n",
 71 |             "Collecting multidict<7.0,>=4.5\n",
 72 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/7c/a6/4123b8165acbe773d1a8dc8e3f0d1edea16d29f7de018eda769abb56bd30/multidict-5.1.0-cp37-cp37m-manylinux2014_x86_64.whl (142kB)\n",
 73 |             "\u001b[K     |████████████████████████████████| 143kB 44.4MB/s \n",
 74 |             "\u001b[?25hRequirement already satisfied: chardet<5.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp<4.0,>=3.6->dataprep) (3.0.4)\n",
 75 |             "Collecting async-timeout<4.0,>=3.0\n",
 76 |             "  Downloading https://files.pythonhosted.org/packages/e1/1e/5a4441be21b0726c4464f3f23c8b19628372f606755a9d2e46c187e65ec4/async_timeout-3.0.1-py3-none-any.whl\n",
 77 |             "Collecting yarl<2.0,>=1.0\n",
 78 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/f1/62/046834c5fc998c88ab2ef722f5d42122230a632212c8afa76418324f53ff/yarl-1.6.3-cp37-cp37m-manylinux2014_x86_64.whl (294kB)\n",
 79 |             "\u001b[K     |████████████████████████████████| 296kB 40.9MB/s \n",
 80 |             "\u001b[?25hRequirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from dask[array,dataframe,delayed]<3.0,>=2.25->dataprep) (3.13)\n",
 81 |             "Requirement already satisfied: toolz>=0.8.2; extra == \"array\" in /usr/local/lib/python3.7/dist-packages (from dask[array,dataframe,delayed]<3.0,>=2.25->dataprep) (0.11.1)\n",
 82 |             "Collecting partd>=0.3.10; extra == \"dataframe\"\n",
 83 |             "  Downloading https://files.pythonhosted.org/packages/41/94/360258a68b55f47859d72b2d0b2b3cfe0ca4fbbcb81b78812bd00ae86b7c/partd-1.2.0-py3-none-any.whl\n",
 84 |             "Collecting fsspec>=0.6.0; extra == \"dataframe\"\n",
 85 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/e9/91/2ef649137816850fa4f4c97c6f2eabb1a79bf0aa2c8ed198e387e373455e/fsspec-2021.4.0-py3-none-any.whl (108kB)\n",
 86 |             "\u001b[K     |████████████████████████████████| 112kB 39.0MB/s \n",
 87 |             "\u001b[?25hRequirement already satisfied: cloudpickle>=0.2.2; extra == \"delayed\" in /usr/local/lib/python3.7/dist-packages (from dask[array,dataframe,delayed]<3.0,>=2.25->dataprep) (1.3.0)\n",
 88 |             "Requirement already satisfied: tornado>=5.1 in /usr/local/lib/python3.7/dist-packages (from bokeh<3,>=2->dataprep) (5.1.1)\n",
 89 |             "Requirement already satisfied: packaging>=16.8 in /usr/local/lib/python3.7/dist-packages (from bokeh<3,>=2->dataprep) (20.9)\n",
 90 |             "Requirement already satisfied: pillow>=7.1.0 in /usr/local/lib/python3.7/dist-packages (from bokeh<3,>=2->dataprep) (7.1.2)\n",
 91 |             "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from bokeh<3,>=2->dataprep) (2.8.1)\n",
 92 |             "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from nltk<4.0,>=3.5->dataprep) (1.0.1)\n",
 93 |             "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from nltk<4.0,>=3.5->dataprep) (7.1.2)\n",
 94 |             "Collecting ply\n",
 95 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl (49kB)\n",
 96 |             "\u001b[K     |████████████████████████████████| 51kB 5.6MB/s \n",
 97 |             "\u001b[?25hRequirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from jsonpath-ng<2.0,>=1.5->dataprep) (1.15.0)\n",
 98 |             "Requirement already satisfied: decorator in /usr/local/lib/python3.7/dist-packages (from jsonpath-ng<2.0,>=1.5->dataprep) (4.4.2)\n",
 99 |             "Requirement already satisfied: widgetsnbextension~=3.5.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8.0,>=7.5->dataprep) (3.5.1)\n",
100 |             "Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8.0,>=7.5->dataprep) (5.0.5)\n",
101 |             "Requirement already satisfied: jupyterlab-widgets>=1.0.0; python_version >= \"3.6\" in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8.0,>=7.5->dataprep) (1.0.0)\n",
102 |             "Requirement already satisfied: ipython>=4.0.0; python_version >= \"3.3\" in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8.0,>=7.5->dataprep) (5.5.0)\n",
103 |             "Requirement already satisfied: nbformat>=4.2.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8.0,>=7.5->dataprep) (5.1.3)\n",
104 |             "Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8.0,>=7.5->dataprep) (4.10.1)\n",
105 |             "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from wordcloud<2.0,>=1.8->dataprep) (3.2.2)\n",
106 |             "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas<2,>=1->dataprep) (2018.9)\n",
107 |             "Requirement already satisfied: idna>=2.0 in /usr/local/lib/python3.7/dist-packages (from yarl<2.0,>=1.0->aiohttp<4.0,>=3.6->dataprep) (2.10)\n",
108 |             "Collecting locket\n",
109 |             "  Downloading https://files.pythonhosted.org/packages/50/b8/e789e45b9b9c2db75e9d9e6ceb022c8d1d7e49b2c085ce8c05600f90a96b/locket-0.2.1-py2.py3-none-any.whl\n",
110 |             "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=16.8->bokeh<3,>=2->dataprep) (2.4.7)\n",
111 |             "Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.7/dist-packages (from widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (5.3.1)\n",
112 |             "Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.7/dist-packages (from traitlets>=4.3.1->ipywidgets<8.0,>=7.5->dataprep) (0.2.0)\n",
113 |             "Requirement already satisfied: pexpect; sys_platform != \"win32\" in /usr/local/lib/python3.7/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (4.8.0)\n",
114 |             "Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.7/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (0.8.1)\n",
115 |             "Requirement already satisfied: pickleshare in /usr/local/lib/python3.7/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (0.7.5)\n",
116 |             "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.7/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (56.0.0)\n",
117 |             "Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.7/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (1.0.18)\n",
118 |             "Requirement already satisfied: pygments in /usr/local/lib/python3.7/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (2.6.1)\n",
119 |             "Requirement already satisfied: jupyter-core in /usr/local/lib/python3.7/dist-packages (from nbformat>=4.2.0->ipywidgets<8.0,>=7.5->dataprep) (4.7.1)\n",
120 |             "Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /usr/local/lib/python3.7/dist-packages (from nbformat>=4.2.0->ipywidgets<8.0,>=7.5->dataprep) (2.6.0)\n",
121 |             "Requirement already satisfied: jupyter-client in /usr/local/lib/python3.7/dist-packages (from ipykernel>=4.5.1->ipywidgets<8.0,>=7.5->dataprep) (5.3.5)\n",
122 |             "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->wordcloud<2.0,>=1.8->dataprep) (0.10.0)\n",
123 |             "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->wordcloud<2.0,>=1.8->dataprep) (1.3.1)\n",
124 |             "Requirement already satisfied: Send2Trash in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (1.5.0)\n",
125 |             "Requirement already satisfied: terminado>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (0.9.4)\n",
126 |             "Requirement already satisfied: nbconvert in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (5.6.1)\n",
127 |             "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.7/dist-packages (from pexpect; sys_platform != \"win32\"->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (0.7.0)\n",
128 |             "Requirement already satisfied: wcwidth in /usr/local/lib/python3.7/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (0.2.5)\n",
129 |             "Requirement already satisfied: pyzmq>=13 in /usr/local/lib/python3.7/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets<8.0,>=7.5->dataprep) (22.0.3)\n",
130 |             "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (0.8.4)\n",
131 |             "Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (0.3)\n",
132 |             "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (1.4.3)\n",
133 |             "Requirement already satisfied: defusedxml in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (0.7.1)\n",
134 |             "Requirement already satisfied: testpath in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (0.4.4)\n",
135 |             "Requirement already satisfied: bleach in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (3.3.0)\n",
136 |             "Requirement already satisfied: webencodings in /usr/local/lib/python3.7/dist-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (0.5.1)\n",
137 |             "Installing collected packages: pydantic, multidict, async-timeout, yarl, aiohttp, locket, partd, fsspec, dask, regex, tqdm, nltk, ply, jsonpath-ng, wordcloud, dataprep\n",
138 |             "  Found existing installation: dask 2.12.0\n",
139 |             "    Uninstalling dask-2.12.0:\n",
140 |             "      Successfully uninstalled dask-2.12.0\n",
141 |             "  Found existing installation: regex 2019.12.20\n",
142 |             "    Uninstalling regex-2019.12.20:\n",
143 |             "      Successfully uninstalled regex-2019.12.20\n",
144 |             "  Found existing installation: tqdm 4.41.1\n",
145 |             "    Uninstalling tqdm-4.41.1:\n",
146 |             "      Successfully uninstalled tqdm-4.41.1\n",
147 |             "  Found existing installation: nltk 3.2.5\n",
148 |             "    Uninstalling nltk-3.2.5:\n",
149 |             "      Successfully uninstalled nltk-3.2.5\n",
150 |             "  Found existing installation: wordcloud 1.5.0\n",
151 |             "    Uninstalling wordcloud-1.5.0:\n",
152 |             "      Successfully uninstalled wordcloud-1.5.0\n",
153 |             "Successfully installed aiohttp-3.7.4.post0 async-timeout-3.0.1 dask-2.30.0 dataprep-0.2.15 fsspec-2021.4.0 jsonpath-ng-1.5.2 locket-0.2.1 multidict-5.1.0 nltk-3.6.2 partd-1.2.0 ply-3.11 pydantic-1.8.1 regex-2020.11.13 tqdm-4.60.0 wordcloud-1.8.1 yarl-1.6.3\n"
154 |           ],
155 |           "name": "stdout"
156 |         }
157 |       ]
158 |     },
159 |     {
160 |       "cell_type": "code",
161 |       "metadata": {
162 |         "colab": {
163 |           "base_uri": "https://localhost:8080/"
164 |         },
165 |         "id": "6ilOAxQwhGly",
166 |         "outputId": "cfd6dd21-7cb8-4360-d9f3-74cb8fb53b98"
167 |       },
168 |       "source": [
169 |         "from dataprep.datasets import load_dataset\n",
170 |         "from dataprep.eda import create_report\n",
171 |         "df = load_dataset(\"iris\")\n",
172 |         "obj = create_report(df)"
173 |       ],
174 |       "execution_count": 10,
175 |       "outputs": [
176 |         {
177 |           "output_type": "stream",
178 |           "text": [
179 |             ""
180 |           ],
181 |           "name": "stderr"
182 |         }
183 |       ]
184 |     },
185 |     {
186 |       "cell_type": "code",
187 |       "metadata": {
188 |         "colab": {
189 |           "base_uri": "https://localhost:8080/"
190 |         },
191 |         "id": "lOQ0WJ21jCF0",
192 |         "outputId": "81424e29-0b64-4f0a-dd3c-541e06e1cf4f"
193 |       },
194 |       "source": [
195 |         "dir(obj)"
196 |       ],
197 |       "execution_count": 11,
198 |       "outputs": [
199 |         {
200 |           "output_type": "execute_result",
201 |           "data": {
202 |             "text/plain": [
203 |               "['__class__',\n",
204 |               " '__delattr__',\n",
205 |               " '__dict__',\n",
206 |               " '__dir__',\n",
207 |               " '__doc__',\n",
208 |               " '__eq__',\n",
209 |               " '__format__',\n",
210 |               " '__ge__',\n",
211 |               " '__getattribute__',\n",
212 |               " '__gt__',\n",
213 |               " '__hash__',\n",
214 |               " '__init__',\n",
215 |               " '__init_subclass__',\n",
216 |               " '__le__',\n",
217 |               " '__lt__',\n",
218 |               " '__module__',\n",
219 |               " '__ne__',\n",
220 |               " '__new__',\n",
221 |               " '__reduce__',\n",
222 |               " '__reduce_ex__',\n",
223 |               " '__repr__',\n",
224 |               " '__setattr__',\n",
225 |               " '__sizeof__',\n",
226 |               " '__str__',\n",
227 |               " '__subclasshook__',\n",
228 |               " '__weakref__',\n",
229 |               " '_repr_html_',\n",
230 |               " 'report',\n",
231 |               " 'save',\n",
232 |               " 'show',\n",
233 |               " 'show_browser']"
234 |             ]
235 |           },
236 |           "metadata": {
237 |             "tags": []
238 |           },
239 |           "execution_count": 11
240 |         }
241 |       ]
242 |     },
243 |     {
244 |       "cell_type": "code",
245 |       "metadata": {
246 |         "colab": {
247 |           "base_uri": "https://localhost:8080/"
248 |         },
249 |         "id": "385SqzecjGQe",
250 |         "outputId": "693eda8a-8da1-4a76-c4a3-b94da1eb726d"
251 |       },
252 |       "source": [
253 |         "obj.save()"
254 |       ],
255 |       "execution_count": 12,
256 |       "outputs": [
257 |         {
258 |           "output_type": "stream",
259 |           "text": [
260 |             "Report has been saved to /content/report.html!\n"
261 |           ],
262 |           "name": "stdout"
263 |         }
264 |       ]
265 |     },
266 |     {
267 |       "cell_type": "code",
268 |       "metadata": {
269 |         "id": "1r3TxnJH9LWz"
270 |       },
271 |       "source": [
272 |         "from google.colab import drive\n",
273 |         "drive.mount('/content/drive')"
274 |       ],
275 |       "execution_count": null,
276 |       "outputs": []
277 |     },
278 |     {
279 |       "cell_type": "code",
280 |       "metadata": {
281 |         "colab": {
282 |           "base_uri": "https://localhost:8080/"
283 |         },
284 |         "id": "4yc6L8oVhynE",
285 |         "outputId": "f2796a7e-fbdc-4238-c46a-14bd175e328a"
286 |       },
287 |       "source": [
288 |         "import dataprep\n",
289 |         "dataprep.datasets.get_dataset_names()"
290 |       ],
291 |       "execution_count": 6,
292 |       "outputs": [
293 |         {
294 |           "output_type": "execute_result",
295 |           "data": {
296 |             "text/plain": [
297 |               "['iris', 'titanic']"
298 |             ]
299 |           },
300 |           "metadata": {
301 |             "tags": []
302 |           },
303 |           "execution_count": 6
304 |         }
305 |       ]
306 |     }
307 |   ]
308 | }


--------------------------------------------------------------------------------
/Detect Binod.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 20,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import DetectBinod"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 9,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "data": {
 19 |       "text/plain": [
 20 |        "['PdfFileReader',\n",
 21 |        " '__builtins__',\n",
 22 |        " '__cached__',\n",
 23 |        " '__doc__',\n",
 24 |        " '__file__',\n",
 25 |        " '__loader__',\n",
 26 |        " '__name__',\n",
 27 |        " '__package__',\n",
 28 |        " '__path__',\n",
 29 |        " '__spec__',\n",
 30 |        " 'docx',\n",
 31 |        " 'isBinod_docx',\n",
 32 |        " 'isBinod_pdf',\n",
 33 |        " 'isBinod_txt',\n",
 34 |        " 'isBinod_xlsx',\n",
 35 |        " 'status',\n",
 36 |        " 'xlrd']"
 37 |       ]
 38 |      },
 39 |      "execution_count": 9,
 40 |      "metadata": {},
 41 |      "output_type": "execute_result"
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "dir(DetectBinod)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 12,
 51 |    "metadata": {},
 52 |    "outputs": [
 53 |     {
 54 |      "name": "stdout",
 55 |      "output_type": "stream",
 56 |      "text": [
 57 |       "/home/akshay/Learning/AS\r\n"
 58 |      ]
 59 |     }
 60 |    ],
 61 |    "source": [
 62 |     "! pwd"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 21,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "Binod Found in file abc.docx\n"
 75 |      ]
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "obj = DetectBinod.isBinod_docx('abc.docx')"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 16,
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "data": {
 89 |       "text/plain": [
 90 |        "['__class__',\n",
 91 |        " '__delattr__',\n",
 92 |        " '__dict__',\n",
 93 |        " '__dir__',\n",
 94 |        " '__doc__',\n",
 95 |        " '__eq__',\n",
 96 |        " '__format__',\n",
 97 |        " '__ge__',\n",
 98 |        " '__getattribute__',\n",
 99 |        " '__gt__',\n",
100 |        " '__hash__',\n",
101 |        " '__init__',\n",
102 |        " '__init_subclass__',\n",
103 |        " '__le__',\n",
104 |        " '__lt__',\n",
105 |        " '__module__',\n",
106 |        " '__ne__',\n",
107 |        " '__new__',\n",
108 |        " '__reduce__',\n",
109 |        " '__reduce_ex__',\n",
110 |        " '__repr__',\n",
111 |        " '__setattr__',\n",
112 |        " '__sizeof__',\n",
113 |        " '__str__',\n",
114 |        " '__subclasshook__',\n",
115 |        " '__weakref__',\n",
116 |        " '_authenticateUserPassword',\n",
117 |        " '_buildDestination',\n",
118 |        " '_buildField',\n",
119 |        " '_buildOutline',\n",
120 |        " '_checkKids',\n",
121 |        " '_decrypt',\n",
122 |        " '_decryptObject',\n",
123 |        " '_flatten',\n",
124 |        " '_getObjectFromStream',\n",
125 |        " '_getPageNumberByIndirect',\n",
126 |        " '_override_encryption',\n",
127 |        " '_pageId2Num',\n",
128 |        " '_pairs',\n",
129 |        " '_writeField',\n",
130 |        " '_zeroXref',\n",
131 |        " 'cacheGetIndirectObject',\n",
132 |        " 'cacheIndirectObject',\n",
133 |        " 'decrypt',\n",
134 |        " 'documentInfo',\n",
135 |        " 'flattenedPages',\n",
136 |        " 'getDestinationPageNumber',\n",
137 |        " 'getDocumentInfo',\n",
138 |        " 'getFields',\n",
139 |        " 'getFormTextFields',\n",
140 |        " 'getIsEncrypted',\n",
141 |        " 'getNamedDestinations',\n",
142 |        " 'getNumPages',\n",
143 |        " 'getObject',\n",
144 |        " 'getOutlines',\n",
145 |        " 'getPage',\n",
146 |        " 'getPageLayout',\n",
147 |        " 'getPageMode',\n",
148 |        " 'getPageNumber',\n",
149 |        " 'getXmpMetadata',\n",
150 |        " 'isEncrypted',\n",
151 |        " 'namedDestinations',\n",
152 |        " 'numPages',\n",
153 |        " 'outlines',\n",
154 |        " 'pageLayout',\n",
155 |        " 'pageMode',\n",
156 |        " 'pages',\n",
157 |        " 'read',\n",
158 |        " 'readNextEndLine',\n",
159 |        " 'readObjectHeader',\n",
160 |        " 'resolvedObjects',\n",
161 |        " 'stream',\n",
162 |        " 'strict',\n",
163 |        " 'trailer',\n",
164 |        " 'xmpMetadata',\n",
165 |        " 'xref',\n",
166 |        " 'xrefIndex',\n",
167 |        " 'xref_objStm']"
168 |       ]
169 |      },
170 |      "execution_count": 16,
171 |      "metadata": {},
172 |      "output_type": "execute_result"
173 |     }
174 |    ],
175 |    "source": [
176 |     "dir(obj)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 22,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "import os"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": 23,
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": [
194 |     "obj = os.listdir();"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 24,
200 |    "metadata": {},
201 |    "outputs": [
202 |     {
203 |      "name": "stdout",
204 |      "output_type": "stream",
205 |      "text": [
206 |       "['OCR .ipynb', 'exaplainable AI.jpeg', 'prediction 1.jpeg', 'Gensim hindi tutorials', '2020-07-18 12-15-43.mkv', '.~lock.abc.docx#', 'abc.docx', 'Subscribe_to_My_Think_Channel_on_YouTube.gif', 'ytlogo.png', 'anomaly.jpeg', 'Untitled1.ipynb', 'abc.pdf', 'Screenshot from 2020-07-16 10-56-01.png', 'download.jpeg', 'Youtube video analytics.ipynb', 'like.jpg', 'abc.txt', 'prediction 2.jpeg', 'Screenshot from 2020-07-16 12-42-56.png', 'Untitled.ipynb', 'Blackrock2.mp4', 'Screenshot from 2020-07-16 10-54-11.png', 'Screenshot from 2020-07-16 10-54-43.png', 'share.jpg', 'Screenshot from 2020-07-16 10-55-00.png', 'ASlogo.jpg', '.ipynb_checkpoints', 'Screenshot from 2020-07-16 10-53-21.png', 'Detect Binod.ipynb', 'gensim.png', 'sentiment.jpeg', 'Black_plain.jpeg']\n"
207 |      ]
208 |     }
209 |    ],
210 |    "source": [
211 |     "print(obj)"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 27,
217 |    "metadata": {},
218 |    "outputs": [
219 |     {
220 |      "name": "stdout",
221 |      "output_type": "stream",
222 |      "text": [
223 |       "Detecting Binod in abc.txt\n",
224 |       "Binod is not detected\n"
225 |      ]
226 |     }
227 |    ],
228 |    "source": [
229 |     "def checkBinod(i):\n",
230 |     "    with open(i,\"r\") as f:\n",
231 |     "        data = f.read()\n",
232 |     "        if \"binod\" in data.lower():\n",
233 |     "            return True\n",
234 |     "        else:\n",
235 |     "            return False\n",
236 |     "\n",
237 |     "for i in obj:\n",
238 |     "    if i.endswith('txt'):\n",
239 |     "        print(f\"Detecting Binod in {i}\")\n",
240 |     "        \n",
241 |     "        flag = checkBinod(i)\n",
242 |     "        \n",
243 |     "        if flag == True :\n",
244 |     "            print(\"Binod is detected\")\n",
245 |     "        else:\n",
246 |     "            print(\"Binod is not detected\")"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {},
253 |    "outputs": [],
254 |    "source": []
255 |   }
256 |  ],
257 |  "metadata": {
258 |   "kernelspec": {
259 |    "display_name": "Python 3",
260 |    "language": "python",
261 |    "name": "python3"
262 |   },
263 |   "language_info": {
264 |    "codemirror_mode": {
265 |     "name": "ipython",
266 |     "version": 3
267 |    },
268 |    "file_extension": ".py",
269 |    "mimetype": "text/x-python",
270 |    "name": "python",
271 |    "nbconvert_exporter": "python",
272 |    "pygments_lexer": "ipython3",
273 |    "version": "3.7.4"
274 |   }
275 |  },
276 |  "nbformat": 4,
277 |  "nbformat_minor": 2
278 | }
279 | 


--------------------------------------------------------------------------------
/DiabetesClassificationUsingNeuralNetwork.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Spyder Editor
 4 | 
 5 | This is a temporary script file.
 6 | """
 7 | 
 8 | import pandas as pd
 9 | 
10 | data=pd.read_csv('diabetes.csv')
11 | data.head()
12 | 
13 | 
14 | X = data.iloc[:, :-1].values
15 | y = data.iloc[:, -1].values
16 | 
17 | from sklearn.model_selection import train_test_split
18 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
19 | 
20 | from sklearn.preprocessing import StandardScaler
21 | sc = StandardScaler()
22 | X_train = sc.fit_transform(X_train)
23 | X_test = sc.transform(X_test)
24 | 
25 | import keras
26 | from keras.models import Sequential
27 | from keras.layers import Dense
28 | 
29 | from keras.layers import Dropout
30 | 
31 | classifier = Sequential()
32 | classifier.add(Dense(16,activation='relu',input_dim=8))
33 | classifier.add(Dropout(0.2))
34 | 
35 | #adding the second hidden layer
36 | classifier.add(Dense(16,activation='relu'))
37 | classifier.add(Dropout(0.2))
38 | 
39 | #adding the output layer
40 | classifier.add(Dense(1,activation='sigmoid'))
41 | 
42 | classifier.compile(optimizer="Adam", loss='binary_crossentropy', metrics=['accuracy'])
43 | 
44 | classifier.fit(X_train, y_train, batch_size=100, epochs=300)
45 | 
46 | y_pred = classifier.predict(X_test)
47 | y_pred = (y_pred > 0.5)
48 | 
49 | from sklearn.metrics import confusion_matrix
50 | cm = confusion_matrix(y_test, y_pred)
51 | 
52 | #sns.heatmap(cm,annot=True)
53 | #plt.savefig('h.png')
54 | 


--------------------------------------------------------------------------------
/EntityExtraction.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Wed Jan 27 21:39:18 2021
 5 | 
 6 | @author: akshay
 7 | """
 8 | 
 9 | import en_core_web_sm
10 | nlp = en_core_web_sm.load()
11 | doc = nlp(u"Ramesh is earning in 100 dollars in UK")
12 | for entity in doc.ents:
13 |   print(entity.label_, ' | ', entity.text)
14 | 


--------------------------------------------------------------------------------
/Gensim introduction hindi.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "1)Gensim stands for \"Generate Similar\"\n",
  8 |     "\n",
  9 |     "2)Features provided by <b>Gensim</b> :\n",
 10 |     "\n",
 11 |     "a)fastText<br>\n",
 12 |     "b)word2vec<br> \n",
 13 |     "c)LSA<br>\n",
 14 |     "d)LDA<br>\n",
 15 |     "e)TF-IDF<br>\n",
 16 |     "\n",
 17 |     "<b>Documents : </b> It refers to some text\n"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "document = \"Akshay is teaching gensim on youtube.\""
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "<b>Corpus : </b> It refers to collection of texts"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 3,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "corpus = [\"Akshay is teaching gensim on youtube.\",\"Today is a sunny day\",\"India is one of the top ranking teasm in cricket\",\"My favourite hobby is playing badminton\"]"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 4,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "stoplist = set('for a of the and to in'.split(' '))\n",
 52 |     "processed_corpus = [[word for word in document.lower().split() if word not in stoplist]\n",
 53 |     "   for document in corpus]"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 15,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stdout",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "[['akshay', 'is', 'teaching', 'gensim', 'on', 'youtube.'],\n",
 66 |       " ['today', 'is', 'sunny', 'day'],\n",
 67 |       " ['india', 'is', 'one', 'top', 'ranking', 'teasm', 'cricket'],\n",
 68 |       " ['my', 'favourite', 'hobby', 'is', 'playing', 'badminton']]\n"
 69 |      ]
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "import pprint\n",
 74 |     "pprint.pprint(processed_corpus)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 6,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "import gensim"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 7,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "corpus = \"\"\"'Akshay is teaching gensim on youtube.',\"Today is a sunny day\",\"India is one of the top ranking teasm in cricket\",'My favourite hobby is playing badminton'\"\"\""
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 8,
 98 |    "metadata": {},
 99 |    "outputs": [
100 |     {
101 |      "data": {
102 |       "text/plain": [
103 |        "['akshay',\n",
104 |        " 'is',\n",
105 |        " 'teaching',\n",
106 |        " 'gensim',\n",
107 |        " 'on',\n",
108 |        " 'youtube',\n",
109 |        " 'today',\n",
110 |        " 'is',\n",
111 |        " 'sunny',\n",
112 |        " 'day',\n",
113 |        " 'india',\n",
114 |        " 'is',\n",
115 |        " 'one',\n",
116 |        " 'of',\n",
117 |        " 'the',\n",
118 |        " 'top',\n",
119 |        " 'ranking',\n",
120 |        " 'teasm',\n",
121 |        " 'in',\n",
122 |        " 'cricket',\n",
123 |        " 'my',\n",
124 |        " 'favourite',\n",
125 |        " 'hobby',\n",
126 |        " 'is',\n",
127 |        " 'playing',\n",
128 |        " 'badminton']"
129 |       ]
130 |      },
131 |      "execution_count": 8,
132 |      "metadata": {},
133 |      "output_type": "execute_result"
134 |     }
135 |    ],
136 |    "source": [
137 |     "gensim.utils.simple_preprocess(corpus, deacc=False, min_len=2, max_len=15)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "Document is text and vector is a mathematically convenient representation of that text.\n",
145 |     "\n",
146 |     "One more important thing to be noted here is that, two different documents may have the same vector representation."
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 9,
152 |    "metadata": {},
153 |    "outputs": [
154 |     {
155 |      "name": "stdout",
156 |      "output_type": "stream",
157 |      "text": [
158 |       "Dictionary(20 unique tokens: ['akshay', 'gensim', 'is', 'on', 'teaching']...)\n"
159 |      ]
160 |     }
161 |    ],
162 |    "source": [
163 |     "from gensim import corpora\n",
164 |     "dictionary = corpora.Dictionary(processed_corpus)\n",
165 |     "print(dictionary)"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 10,
171 |    "metadata": {},
172 |    "outputs": [
173 |     {
174 |      "name": "stdout",
175 |      "output_type": "stream",
176 |      "text": [
177 |       "{'akshay': 0,\n",
178 |       " 'badminton': 15,\n",
179 |       " 'cricket': 9,\n",
180 |       " 'day': 6,\n",
181 |       " 'favourite': 16,\n",
182 |       " 'gensim': 1,\n",
183 |       " 'hobby': 17,\n",
184 |       " 'india': 10,\n",
185 |       " 'is': 2,\n",
186 |       " 'my': 18,\n",
187 |       " 'on': 3,\n",
188 |       " 'one': 11,\n",
189 |       " 'playing': 19,\n",
190 |       " 'ranking': 12,\n",
191 |       " 'sunny': 7,\n",
192 |       " 'teaching': 4,\n",
193 |       " 'teasm': 13,\n",
194 |       " 'today': 8,\n",
195 |       " 'top': 14,\n",
196 |       " 'youtube.': 5}\n"
197 |      ]
198 |     }
199 |    ],
200 |    "source": [
201 |     "pprint.pprint(dictionary.token2id)"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 11,
207 |    "metadata": {},
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/plain": [
212 |        "[['akshay', 'is', 'teaching', 'gensim', 'on', 'youtube.'],\n",
213 |        " ['today', 'is', 'sunny', 'day'],\n",
214 |        " ['india', 'is', 'one', 'top', 'ranking', 'teasm', 'cricket'],\n",
215 |        " ['my', 'favourite', 'hobby', 'is', 'playing', 'badminton']]"
216 |       ]
217 |      },
218 |      "execution_count": 11,
219 |      "metadata": {},
220 |      "output_type": "execute_result"
221 |     }
222 |    ],
223 |    "source": [
224 |     "processed_corpus"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": 12,
230 |    "metadata": {},
231 |    "outputs": [
232 |     {
233 |      "name": "stdout",
234 |      "output_type": "stream",
235 |      "text": [
236 |       "[[(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1)],\n",
237 |       " [(2, 1), (6, 1), (7, 1), (8, 1)],\n",
238 |       " [(2, 1), (9, 1), (10, 1), (11, 1), (12, 1), (13, 1), (14, 1)],\n",
239 |       " [(2, 1), (15, 1), (16, 1), (17, 1), (18, 1), (19, 1)]]\n"
240 |      ]
241 |     }
242 |    ],
243 |    "source": [
244 |     "BoW_corpus = [dictionary.doc2bow(text) for text in processed_corpus]\n",
245 |     "pprint.pprint(BoW_corpus)"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 13,
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "name": "stdout",
255 |      "output_type": "stream",
256 |      "text": [
257 |       "TfidfModel(num_docs=4, num_nnz=23)\n",
258 |       "[(0, 0.7071067811865475), (9, 0.7071067811865475)]\n"
259 |      ]
260 |     }
261 |    ],
262 |    "source": [
263 |     "from gensim import models\n",
264 |     "tfidf = models.TfidfModel(BoW_corpus)\n",
265 |     "words = \"akshay cricket\".lower().split()\n",
266 |     "print(tfidf)\n",
267 |     "print(tfidf[dictionary.doc2bow(words)])"
268 |    ]
269 |   }
270 |  ],
271 |  "metadata": {
272 |   "kernelspec": {
273 |    "display_name": "Python 3",
274 |    "language": "python",
275 |    "name": "python3"
276 |   },
277 |   "language_info": {
278 |    "codemirror_mode": {
279 |     "name": "ipython",
280 |     "version": 3
281 |    },
282 |    "file_extension": ".py",
283 |    "mimetype": "text/x-python",
284 |    "name": "python",
285 |    "nbconvert_exporter": "python",
286 |    "pygments_lexer": "ipython3",
287 |    "version": "3.7.4"
288 |   }
289 |  },
290 |  "nbformat": 4,
291 |  "nbformat_minor": 2
292 | }
293 | 


--------------------------------------------------------------------------------
/Grammarchecker.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat Jan 16 15:00:43 2021
 5 | 
 6 | @author: akshay
 7 | """
 8 | 
 9 | from gingerit.gingerit import GingerIt
10 | 
11 | text = 'Narendra Modi is our prme mnister. He is from Gujaratt'
12 | 
13 | parser = GingerIt()
14 | print(len(parser.parse(text)['corrections']))
15 | 


--------------------------------------------------------------------------------
/HaarCascade.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Fri Nov 20 13:39:30 2020
 5 | 
 6 | @author: akshay
 7 | """
 8 | 
 9 | import cv2
10 | 
11 | face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml')
12 | eye_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_eye.xml')
13 | 
14 | img = cv2.imread('virat.jpeg')
15 | 
16 | r = 500.0 / img.shape[1]
17 | dim = (500, int(img.shape[0] * r))
18 | 
19 | resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
20 | 
21 | grey = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
22 | 
23 | grey.shape
24 | 
25 | faces = face_cascade.detectMultiScale(grey, 1.3, 5)
26 | eyes = eye_cascade.detectMultiScale(grey,1.3,5)
27 | 
28 | for (x,y,w,h) in faces:
29 |     cv2.rectangle(resized,(x,y),(x+w,y+h),(255,0,0),2)
30 |     roi_grey = grey[y:y+h, x:x+w]
31 |     roi_color = resized[y:y+h, x:x+w]
32 |     eyes = eye_cascade.detectMultiScale(roi_grey)
33 | for (ex,ey,ew,eh) in eyes:
34 |     cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)
35 | 
36 | #Display the bounding box for the face and eyes
37 | #cv2.imshow('img',resized)
38 | #cv2.waitKey(0)
39 | 
40 | #cv2.imshow('image',resized)
41 | #cv2.waitKey(0) 


--------------------------------------------------------------------------------
/Kepler-delete.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 5
6 | }
7 | 


--------------------------------------------------------------------------------
/LazyPredict.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "LazyPredict.ipynb",
  7 |       "provenance": []
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     },
 13 |     "language_info": {
 14 |       "name": "python"
 15 |     }
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "code",
 20 |       "metadata": {
 21 |         "id": "S6DI72w20Zsu",
 22 |         "colab": {
 23 |           "base_uri": "https://localhost:8080/",
 24 |           "height": 1000
 25 |         },
 26 |         "outputId": "981359a0-7fd0-4b37-c98d-c95f39ae2858"
 27 |       },
 28 |       "source": [
 29 |         "! pip install lazypredict"
 30 |       ],
 31 |       "execution_count": 1,
 32 |       "outputs": [
 33 |         {
 34 |           "output_type": "stream",
 35 |           "text": [
 36 |             "Collecting lazypredict\n",
 37 |             "  Downloading https://files.pythonhosted.org/packages/97/38/cadb2b79268c7f82f6b027bf0b2f68750aafc5c70b6e1bc46b357386e07b/lazypredict-0.2.9-py2.py3-none-any.whl\n",
 38 |             "Requirement already satisfied: click==7.1.2 in /usr/local/lib/python3.7/dist-packages (from lazypredict) (7.1.2)\n",
 39 |             "Collecting lightgbm==2.3.1\n",
 40 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/0b/9d/ddcb2f43aca194987f1a99e27edf41cf9bc39ea750c3371c2a62698c509a/lightgbm-2.3.1-py2.py3-none-manylinux1_x86_64.whl (1.2MB)\n",
 41 |             "\u001b[K     |████████████████████████████████| 1.2MB 6.2MB/s \n",
 42 |             "\u001b[?25hCollecting scipy==1.5.4\n",
 43 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/dc/7e/8f6a79b102ca1ea928bae8998b05bf5dc24a90571db13cd119f275ba6252/scipy-1.5.4-cp37-cp37m-manylinux1_x86_64.whl (25.9MB)\n",
 44 |             "\u001b[K     |████████████████████████████████| 25.9MB 162kB/s \n",
 45 |             "\u001b[?25hCollecting joblib==1.0.0\n",
 46 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/34/5b/bd0f0fb5564183884d8e35b81d06d7ec06a20d1a0c8b4c407f1554691dce/joblib-1.0.0-py3-none-any.whl (302kB)\n",
 47 |             "\u001b[K     |████████████████████████████████| 307kB 37.6MB/s \n",
 48 |             "\u001b[?25hRequirement already satisfied: six==1.15.0 in /usr/local/lib/python3.7/dist-packages (from lazypredict) (1.15.0)\n",
 49 |             "Collecting numpy==1.19.1\n",
 50 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/50/8f/29d5688614f9bba59931683d5d353d738d4a3007833219ee19c455732753/numpy-1.19.1-cp37-cp37m-manylinux2010_x86_64.whl (14.5MB)\n",
 51 |             "\u001b[K     |████████████████████████████████| 14.5MB 339kB/s \n",
 52 |             "\u001b[?25hCollecting tqdm==4.56.0\n",
 53 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/80/02/8f8880a4fd6625461833abcf679d4c12a44c76f9925f92bf212bb6cefaad/tqdm-4.56.0-py2.py3-none-any.whl (72kB)\n",
 54 |             "\u001b[K     |████████████████████████████████| 81kB 9.3MB/s \n",
 55 |             "\u001b[?25hCollecting pytest==5.4.3\n",
 56 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/9f/f3/0a83558da436a081344aa6c8b85ea5b5f05071214106036ce341b7769b0b/pytest-5.4.3-py3-none-any.whl (248kB)\n",
 57 |             "\u001b[K     |████████████████████████████████| 256kB 48.8MB/s \n",
 58 |             "\u001b[?25hCollecting PyYAML==5.3.1\n",
 59 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/64/c2/b80047c7ac2478f9501676c988a5411ed5572f35d1beff9cae07d321512c/PyYAML-5.3.1.tar.gz (269kB)\n",
 60 |             "\u001b[K     |████████████████████████████████| 276kB 41.9MB/s \n",
 61 |             "\u001b[?25hCollecting scikit-learn==0.23.1\n",
 62 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/b8/7e/74e707b66490d4eb05f702966ad0990881127acecf9d5cdcef3c95ec6c16/scikit_learn-0.23.1-cp37-cp37m-manylinux1_x86_64.whl (6.8MB)\n",
 63 |             "\u001b[K     |████████████████████████████████| 6.8MB 41.5MB/s \n",
 64 |             "\u001b[?25hCollecting xgboost==1.1.1\n",
 65 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/7c/32/a11befbb003e0e6b7e062a77f010dfcec0ec3589be537b02d2eb2ff93b9a/xgboost-1.1.1-py3-none-manylinux2010_x86_64.whl (127.6MB)\n",
 66 |             "\u001b[K     |████████████████████████████████| 127.6MB 104kB/s \n",
 67 |             "\u001b[?25hCollecting pandas==1.0.5\n",
 68 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/af/f3/683bf2547a3eaeec15b39cef86f61e921b3b187f250fcd2b5c5fb4386369/pandas-1.0.5-cp37-cp37m-manylinux1_x86_64.whl (10.1MB)\n",
 69 |             "\u001b[K     |████████████████████████████████| 10.1MB 36.8MB/s \n",
 70 |             "\u001b[?25hRequirement already satisfied: wcwidth in /usr/local/lib/python3.7/dist-packages (from pytest==5.4.3->lazypredict) (0.2.5)\n",
 71 |             "Requirement already satisfied: more-itertools>=4.0.0 in /usr/local/lib/python3.7/dist-packages (from pytest==5.4.3->lazypredict) (8.7.0)\n",
 72 |             "Requirement already satisfied: importlib-metadata>=0.12; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from pytest==5.4.3->lazypredict) (3.10.1)\n",
 73 |             "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from pytest==5.4.3->lazypredict) (20.9)\n",
 74 |             "Collecting pluggy<1.0,>=0.12\n",
 75 |             "  Downloading https://files.pythonhosted.org/packages/a0/28/85c7aa31b80d150b772fbe4a229487bc6644da9ccb7e427dd8cc60cb8a62/pluggy-0.13.1-py2.py3-none-any.whl\n",
 76 |             "Requirement already satisfied: py>=1.5.0 in /usr/local/lib/python3.7/dist-packages (from pytest==5.4.3->lazypredict) (1.10.0)\n",
 77 |             "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.7/dist-packages (from pytest==5.4.3->lazypredict) (20.3.0)\n",
 78 |             "Collecting threadpoolctl>=2.0.0\n",
 79 |             "  Downloading https://files.pythonhosted.org/packages/f7/12/ec3f2e203afa394a149911729357aa48affc59c20e2c1c8297a60f33f133/threadpoolctl-2.1.0-py3-none-any.whl\n",
 80 |             "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas==1.0.5->lazypredict) (2018.9)\n",
 81 |             "Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.7/dist-packages (from pandas==1.0.5->lazypredict) (2.8.1)\n",
 82 |             "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.12; python_version < \"3.8\"->pytest==5.4.3->lazypredict) (3.4.1)\n",
 83 |             "Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.12; python_version < \"3.8\"->pytest==5.4.3->lazypredict) (3.7.4.3)\n",
 84 |             "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->pytest==5.4.3->lazypredict) (2.4.7)\n",
 85 |             "Building wheels for collected packages: PyYAML\n",
 86 |             "  Building wheel for PyYAML (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
 87 |             "  Created wheel for PyYAML: filename=PyYAML-5.3.1-cp37-cp37m-linux_x86_64.whl size=44620 sha256=f6d917d739712d660ac988fc4d460b50631e794e8734262ffe5c016362d97a0f\n",
 88 |             "  Stored in directory: /root/.cache/pip/wheels/a7/c1/ea/cf5bd31012e735dc1dfea3131a2d5eae7978b251083d6247bd\n",
 89 |             "Successfully built PyYAML\n",
 90 |             "\u001b[31mERROR: tensorflow 2.4.1 has requirement numpy~=1.19.2, but you'll have numpy 1.19.1 which is incompatible.\u001b[0m\n",
 91 |             "\u001b[31mERROR: google-colab 1.0.0 has requirement pandas~=1.1.0; python_version >= \"3.0\", but you'll have pandas 1.0.5 which is incompatible.\u001b[0m\n",
 92 |             "\u001b[31mERROR: datascience 0.10.6 has requirement folium==0.2.1, but you'll have folium 0.8.3 which is incompatible.\u001b[0m\n",
 93 |             "\u001b[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.\u001b[0m\n",
 94 |             "Installing collected packages: threadpoolctl, joblib, numpy, scipy, scikit-learn, lightgbm, tqdm, pluggy, pytest, PyYAML, xgboost, pandas, lazypredict\n",
 95 |             "  Found existing installation: joblib 1.0.1\n",
 96 |             "    Uninstalling joblib-1.0.1:\n",
 97 |             "      Successfully uninstalled joblib-1.0.1\n",
 98 |             "  Found existing installation: numpy 1.19.5\n",
 99 |             "    Uninstalling numpy-1.19.5:\n",
100 |             "      Successfully uninstalled numpy-1.19.5\n",
101 |             "  Found existing installation: scipy 1.4.1\n",
102 |             "    Uninstalling scipy-1.4.1:\n",
103 |             "      Successfully uninstalled scipy-1.4.1\n",
104 |             "  Found existing installation: scikit-learn 0.22.2.post1\n",
105 |             "    Uninstalling scikit-learn-0.22.2.post1:\n",
106 |             "      Successfully uninstalled scikit-learn-0.22.2.post1\n",
107 |             "  Found existing installation: lightgbm 2.2.3\n",
108 |             "    Uninstalling lightgbm-2.2.3:\n",
109 |             "      Successfully uninstalled lightgbm-2.2.3\n",
110 |             "  Found existing installation: tqdm 4.41.1\n",
111 |             "    Uninstalling tqdm-4.41.1:\n",
112 |             "      Successfully uninstalled tqdm-4.41.1\n",
113 |             "  Found existing installation: pluggy 0.7.1\n",
114 |             "    Uninstalling pluggy-0.7.1:\n",
115 |             "      Successfully uninstalled pluggy-0.7.1\n",
116 |             "  Found existing installation: pytest 3.6.4\n",
117 |             "    Uninstalling pytest-3.6.4:\n",
118 |             "      Successfully uninstalled pytest-3.6.4\n",
119 |             "  Found existing installation: PyYAML 3.13\n",
120 |             "    Uninstalling PyYAML-3.13:\n",
121 |             "      Successfully uninstalled PyYAML-3.13\n",
122 |             "  Found existing installation: xgboost 0.90\n",
123 |             "    Uninstalling xgboost-0.90:\n",
124 |             "      Successfully uninstalled xgboost-0.90\n",
125 |             "  Found existing installation: pandas 1.1.5\n",
126 |             "    Uninstalling pandas-1.1.5:\n",
127 |             "      Successfully uninstalled pandas-1.1.5\n",
128 |             "Successfully installed PyYAML-5.3.1 joblib-1.0.0 lazypredict-0.2.9 lightgbm-2.3.1 numpy-1.19.1 pandas-1.0.5 pluggy-0.13.1 pytest-5.4.3 scikit-learn-0.23.1 scipy-1.5.4 threadpoolctl-2.1.0 tqdm-4.56.0 xgboost-1.1.1\n"
129 |           ],
130 |           "name": "stdout"
131 |         },
132 |         {
133 |           "output_type": "display_data",
134 |           "data": {
135 |             "application/vnd.colab-display-data+json": {
136 |               "pip_warning": {
137 |                 "packages": [
138 |                   "numpy",
139 |                   "pandas"
140 |                 ]
141 |               }
142 |             }
143 |           },
144 |           "metadata": {
145 |             "tags": []
146 |           }
147 |         }
148 |       ]
149 |     },
150 |     {
151 |       "cell_type": "code",
152 |       "metadata": {
153 |         "id": "mPO0zcqPy7fG"
154 |       },
155 |       "source": [
156 |         "import lazypredict"
157 |       ],
158 |       "execution_count": 1,
159 |       "outputs": []
160 |     },
161 |     {
162 |       "cell_type": "code",
163 |       "metadata": {
164 |         "id": "G6Vu7hMLy_DK"
165 |       },
166 |       "source": [
167 |         "from sklearn import datasets"
168 |       ],
169 |       "execution_count": 2,
170 |       "outputs": []
171 |     },
172 |     {
173 |       "cell_type": "code",
174 |       "metadata": {
175 |         "colab": {
176 |           "base_uri": "https://localhost:8080/"
177 |         },
178 |         "id": "b2SCh4Y0zLi6",
179 |         "outputId": "43bf944a-b93c-4e25-a643-bacf2b1a605a"
180 |       },
181 |       "source": [
182 |         "dir(datasets)"
183 |       ],
184 |       "execution_count": 3,
185 |       "outputs": [
186 |         {
187 |           "output_type": "execute_result",
188 |           "data": {
189 |             "text/plain": [
190 |               "['__all__',\n",
191 |               " '__builtins__',\n",
192 |               " '__cached__',\n",
193 |               " '__doc__',\n",
194 |               " '__file__',\n",
195 |               " '__loader__',\n",
196 |               " '__name__',\n",
197 |               " '__package__',\n",
198 |               " '__path__',\n",
199 |               " '__spec__',\n",
200 |               " '_base',\n",
201 |               " '_california_housing',\n",
202 |               " '_covtype',\n",
203 |               " '_kddcup99',\n",
204 |               " '_lfw',\n",
205 |               " '_olivetti_faces',\n",
206 |               " '_openml',\n",
207 |               " '_rcv1',\n",
208 |               " '_samples_generator',\n",
209 |               " '_species_distributions',\n",
210 |               " '_svmlight_format_fast',\n",
211 |               " '_svmlight_format_io',\n",
212 |               " '_twenty_newsgroups',\n",
213 |               " 'clear_data_home',\n",
214 |               " 'dump_svmlight_file',\n",
215 |               " 'fetch_20newsgroups',\n",
216 |               " 'fetch_20newsgroups_vectorized',\n",
217 |               " 'fetch_california_housing',\n",
218 |               " 'fetch_covtype',\n",
219 |               " 'fetch_kddcup99',\n",
220 |               " 'fetch_lfw_pairs',\n",
221 |               " 'fetch_lfw_people',\n",
222 |               " 'fetch_olivetti_faces',\n",
223 |               " 'fetch_openml',\n",
224 |               " 'fetch_rcv1',\n",
225 |               " 'fetch_species_distributions',\n",
226 |               " 'get_data_home',\n",
227 |               " 'load_boston',\n",
228 |               " 'load_breast_cancer',\n",
229 |               " 'load_diabetes',\n",
230 |               " 'load_digits',\n",
231 |               " 'load_files',\n",
232 |               " 'load_iris',\n",
233 |               " 'load_linnerud',\n",
234 |               " 'load_sample_image',\n",
235 |               " 'load_sample_images',\n",
236 |               " 'load_svmlight_file',\n",
237 |               " 'load_svmlight_files',\n",
238 |               " 'load_wine',\n",
239 |               " 'make_biclusters',\n",
240 |               " 'make_blobs',\n",
241 |               " 'make_checkerboard',\n",
242 |               " 'make_circles',\n",
243 |               " 'make_classification',\n",
244 |               " 'make_friedman1',\n",
245 |               " 'make_friedman2',\n",
246 |               " 'make_friedman3',\n",
247 |               " 'make_gaussian_quantiles',\n",
248 |               " 'make_hastie_10_2',\n",
249 |               " 'make_low_rank_matrix',\n",
250 |               " 'make_moons',\n",
251 |               " 'make_multilabel_classification',\n",
252 |               " 'make_regression',\n",
253 |               " 'make_s_curve',\n",
254 |               " 'make_sparse_coded_signal',\n",
255 |               " 'make_sparse_spd_matrix',\n",
256 |               " 'make_sparse_uncorrelated',\n",
257 |               " 'make_spd_matrix',\n",
258 |               " 'make_swiss_roll']"
259 |             ]
260 |           },
261 |           "metadata": {
262 |             "tags": []
263 |           },
264 |           "execution_count": 3
265 |         }
266 |       ]
267 |     },
268 |     {
269 |       "cell_type": "code",
270 |       "metadata": {
271 |         "id": "mQtWciUGzTTA"
272 |       },
273 |       "source": [
274 |         "from sklearn.datasets import "
275 |       ],
276 |       "execution_count": null,
277 |       "outputs": []
278 |     }
279 |   ]
280 | }


--------------------------------------------------------------------------------
/Lux.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Collecting lux-api\n",
 13 |       "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/64/32/ee46df189e978c21a040e01d0eb99067de07974fd4ef67c69190a5d1fdde/lux-api-0.2.1.tar.gz (76kB)\n",
 14 |       "\u001b[K     |████████████████████████████████| 81kB 11kB/s eta 0:00:012\n",
 15 |       "\u001b[?25hCollecting scipy>=1.3.3 (from lux-api)\n",
 16 |       "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/dc/7e/8f6a79b102ca1ea928bae8998b05bf5dc24a90571db13cd119f275ba6252/scipy-1.5.4-cp37-cp37m-manylinux1_x86_64.whl (25.9MB)\n",
 17 |       "\u001b[K     |████████████████████████████████| 25.9MB 52kB/s eta 0:00:013    |█▏                              | 921kB 288kB/s eta 0:01:27     |███████████████████▎            | 15.6MB 476kB/s eta 0:00:22\n",
 18 |       "\u001b[?25hCollecting altair>=4.0.0 (from lux-api)\n",
 19 |       "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/01/55/0bb2226e34f21fa549c3f4557b4f154a5632f61132a969da17c95ca8eab9/altair-4.1.0-py3-none-any.whl (727kB)\n",
 20 |       "\u001b[K     |████████████████████████████████| 737kB 673kB/s eta 0:00:01\n",
 21 |       "\u001b[?25hCollecting pandas>=1.1.0 (from lux-api)\n",
 22 |       "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/bf/4c/cb7da76f3a5e077e545f9cf8575b8f488a4e8ad60490838f89c5cdd5bb57/pandas-1.1.4-cp37-cp37m-manylinux1_x86_64.whl (9.5MB)\n",
 23 |       "\u001b[K     |████████████████████████████████| 9.5MB 89kB/s eta 0:00:012     |██████████                      | 3.0MB 280kB/s eta 0:00:24\n",
 24 |       "\u001b[?25hRequirement already satisfied: scikit-learn>=0.22 in /home/akshay/anaconda3/lib/python3.7/site-packages (from lux-api) (0.22.2.post1)\n",
 25 |       "Collecting lux-widget>=0.1.1 (from lux-api)\n",
 26 |       "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/3e/94/ac2ab4f1a28496c3aacdb52ba13d7df2ad7f51af9138c4cc076815cbb618/lux-widget-0.1.1.tar.gz (1.9MB)\n",
 27 |       "\u001b[K     |████████████████████████████████| 1.9MB 907kB/s eta 0:00:01\n",
 28 |       "\u001b[?25hRequirement already satisfied: numpy>=1.14.5 in /home/akshay/anaconda3/lib/python3.7/site-packages (from scipy>=1.3.3->lux-api) (1.17.2)\n",
 29 |       "Requirement already satisfied: jsonschema in /home/akshay/anaconda3/lib/python3.7/site-packages (from altair>=4.0.0->lux-api) (3.0.2)\n",
 30 |       "Requirement already satisfied: entrypoints in /home/akshay/anaconda3/lib/python3.7/site-packages (from altair>=4.0.0->lux-api) (0.3)\n",
 31 |       "Requirement already satisfied: jinja2 in /home/akshay/anaconda3/lib/python3.7/site-packages (from altair>=4.0.0->lux-api) (2.10.3)\n",
 32 |       "Requirement already satisfied: toolz in /home/akshay/anaconda3/lib/python3.7/site-packages (from altair>=4.0.0->lux-api) (0.10.0)\n",
 33 |       "Requirement already satisfied: python-dateutil>=2.7.3 in /home/akshay/anaconda3/lib/python3.7/site-packages (from pandas>=1.1.0->lux-api) (2.8.0)\n",
 34 |       "Requirement already satisfied: pytz>=2017.2 in /home/akshay/anaconda3/lib/python3.7/site-packages (from pandas>=1.1.0->lux-api) (2019.3)\n",
 35 |       "Requirement already satisfied: joblib>=0.11 in /home/akshay/anaconda3/lib/python3.7/site-packages (from scikit-learn>=0.22->lux-api) (0.13.2)\n",
 36 |       "Requirement already satisfied: jupyter in /home/akshay/anaconda3/lib/python3.7/site-packages (from lux-widget>=0.1.1->lux-api) (1.0.0)\n",
 37 |       "Requirement already satisfied: notebook>=4.0.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from lux-widget>=0.1.1->lux-api) (6.0.1)\n",
 38 |       "Requirement already satisfied: ipywidgets>=7.0.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from lux-widget>=0.1.1->lux-api) (7.5.1)\n",
 39 |       "Requirement already satisfied: pyrsistent>=0.14.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from jsonschema->altair>=4.0.0->lux-api) (0.15.4)\n",
 40 |       "Requirement already satisfied: setuptools in /home/akshay/anaconda3/lib/python3.7/site-packages (from jsonschema->altair>=4.0.0->lux-api) (41.4.0)\n",
 41 |       "Requirement already satisfied: attrs>=17.4.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from jsonschema->altair>=4.0.0->lux-api) (19.2.0)\n",
 42 |       "Requirement already satisfied: six>=1.11.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from jsonschema->altair>=4.0.0->lux-api) (1.15.0)\n",
 43 |       "Requirement already satisfied: MarkupSafe>=0.23 in /home/akshay/anaconda3/lib/python3.7/site-packages (from jinja2->altair>=4.0.0->lux-api) (1.1.1)\n",
 44 |       "Requirement already satisfied: qtconsole in /home/akshay/anaconda3/lib/python3.7/site-packages (from jupyter->lux-widget>=0.1.1->lux-api) (4.5.5)\n",
 45 |       "Requirement already satisfied: nbconvert in /home/akshay/anaconda3/lib/python3.7/site-packages (from jupyter->lux-widget>=0.1.1->lux-api) (5.6.0)\n",
 46 |       "Requirement already satisfied: ipykernel in /home/akshay/anaconda3/lib/python3.7/site-packages (from jupyter->lux-widget>=0.1.1->lux-api) (5.1.2)\n",
 47 |       "Requirement already satisfied: jupyter-console in /home/akshay/anaconda3/lib/python3.7/site-packages (from jupyter->lux-widget>=0.1.1->lux-api) (6.0.0)\n",
 48 |       "Requirement already satisfied: Send2Trash in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (1.5.0)\n",
 49 |       "Requirement already satisfied: traitlets>=4.2.1 in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (4.3.3)\n",
 50 |       "Requirement already satisfied: terminado>=0.8.1 in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (0.8.2)\n",
 51 |       "Requirement already satisfied: ipython-genutils in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (0.2.0)\n",
 52 |       "Requirement already satisfied: jupyter-core>=4.4.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (4.5.0)\n",
 53 |       "Requirement already satisfied: pyzmq>=17 in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (18.1.0)\n",
 54 |       "Requirement already satisfied: prometheus-client in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (0.7.1)\n",
 55 |       "Requirement already satisfied: tornado>=5.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (6.0.3)\n",
 56 |       "Requirement already satisfied: jupyter-client>=5.3.1 in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (5.3.3)\n",
 57 |       "Requirement already satisfied: nbformat in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (4.4.0)\n",
 58 |       "Requirement already satisfied: ipython>=4.0.0; python_version >= \"3.3\" in /home/akshay/anaconda3/lib/python3.7/site-packages (from ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (7.8.0)\n",
 59 |       "Requirement already satisfied: widgetsnbextension~=3.5.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (3.5.1)\n",
 60 |       "Requirement already satisfied: pygments in /home/akshay/anaconda3/lib/python3.7/site-packages (from qtconsole->jupyter->lux-widget>=0.1.1->lux-api) (2.4.2)\n",
 61 |       "Requirement already satisfied: mistune<2,>=0.8.1 in /home/akshay/anaconda3/lib/python3.7/site-packages (from nbconvert->jupyter->lux-widget>=0.1.1->lux-api) (0.8.4)\n",
 62 |       "Requirement already satisfied: pandocfilters>=1.4.1 in /home/akshay/anaconda3/lib/python3.7/site-packages (from nbconvert->jupyter->lux-widget>=0.1.1->lux-api) (1.4.2)\n",
 63 |       "Requirement already satisfied: bleach in /home/akshay/anaconda3/lib/python3.7/site-packages (from nbconvert->jupyter->lux-widget>=0.1.1->lux-api) (3.1.0)\n",
 64 |       "Requirement already satisfied: testpath in /home/akshay/anaconda3/lib/python3.7/site-packages (from nbconvert->jupyter->lux-widget>=0.1.1->lux-api) (0.4.2)\n",
 65 |       "Requirement already satisfied: defusedxml in /home/akshay/anaconda3/lib/python3.7/site-packages (from nbconvert->jupyter->lux-widget>=0.1.1->lux-api) (0.6.0)\n",
 66 |       "Requirement already satisfied: prompt_toolkit<2.1.0,>=2.0.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from jupyter-console->jupyter->lux-widget>=0.1.1->lux-api) (2.0.10)\n",
 67 |       "Requirement already satisfied: decorator in /home/akshay/anaconda3/lib/python3.7/site-packages (from traitlets>=4.2.1->notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (4.4.0)\n",
 68 |       "Requirement already satisfied: pexpect; sys_platform != \"win32\" in /home/akshay/anaconda3/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (4.7.0)\n",
 69 |       "Requirement already satisfied: pickleshare in /home/akshay/anaconda3/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (0.7.5)\n",
 70 |       "Requirement already satisfied: jedi>=0.10 in /home/akshay/anaconda3/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (0.15.1)\n"
 71 |      ]
 72 |     },
 73 |     {
 74 |      "name": "stdout",
 75 |      "output_type": "stream",
 76 |      "text": [
 77 |       "Requirement already satisfied: backcall in /home/akshay/anaconda3/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (0.1.0)\n",
 78 |       "Requirement already satisfied: webencodings in /home/akshay/anaconda3/lib/python3.7/site-packages (from bleach->nbconvert->jupyter->lux-widget>=0.1.1->lux-api) (0.5.1)\n",
 79 |       "Requirement already satisfied: wcwidth in /home/akshay/anaconda3/lib/python3.7/site-packages (from prompt_toolkit<2.1.0,>=2.0.0->jupyter-console->jupyter->lux-widget>=0.1.1->lux-api) (0.1.7)\n",
 80 |       "Requirement already satisfied: ptyprocess>=0.5 in /home/akshay/anaconda3/lib/python3.7/site-packages (from pexpect; sys_platform != \"win32\"->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (0.6.0)\n",
 81 |       "Requirement already satisfied: parso>=0.5.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from jedi>=0.10->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (0.5.1)\n",
 82 |       "Building wheels for collected packages: lux-api, lux-widget\n",
 83 |       "  Building wheel for lux-api (setup.py) ... \u001b[?25ldone\n",
 84 |       "\u001b[?25h  Created wheel for lux-api: filename=lux_api-0.2.1-cp37-none-any.whl size=117593 sha256=a48b36e6b3179bf23acd9d6d8e45060266a472282d61a9cb783284d3f9e43381\n",
 85 |       "  Stored in directory: /home/akshay/.cache/pip/wheels/7d/f2/72/6a08419c87357fcf47378e68faeabff55715615601b677a353\n",
 86 |       "  Building wheel for lux-widget (setup.py) ... \u001b[?25ldone\n",
 87 |       "\u001b[?25h  Created wheel for lux-widget: filename=lux_widget-0.1.1-cp37-none-any.whl size=1935816 sha256=0572df60c3e6921d3441bef7f99adb09b4b693afae1a8127628a6093f8cb83de\n",
 88 |       "  Stored in directory: /home/akshay/.cache/pip/wheels/69/1c/5d/bc38f4976953e3cb58842423d04ed2aa0d6579b6b5857fc444\n",
 89 |       "Successfully built lux-api lux-widget\n",
 90 |       "Installing collected packages: scipy, pandas, altair, lux-widget, lux-api\n",
 91 |       "  Found existing installation: scipy 1.3.1\n",
 92 |       "    Uninstalling scipy-1.3.1:\n",
 93 |       "      Successfully uninstalled scipy-1.3.1\n",
 94 |       "  Found existing installation: pandas 0.25.1\n",
 95 |       "    Uninstalling pandas-0.25.1:\n",
 96 |       "      Successfully uninstalled pandas-0.25.1\n",
 97 |       "Successfully installed altair-4.1.0 lux-api-0.2.1 lux-widget-0.1.1 pandas-1.1.4 scipy-1.5.4\n"
 98 |      ]
 99 |     }
100 |    ],
101 |    "source": [
102 |     "! pip install lux-api"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 9,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "import lux\n",
112 |     "import pandas as pd"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 10,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "df = pd.read_csv('titanic.csv')"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 11,
127 |    "metadata": {
128 |     "scrolled": true
129 |    },
130 |    "outputs": [
131 |     {
132 |      "data": {
133 |       "application/vnd.jupyter.widget-view+json": {
134 |        "model_id": "b8542711f1e54d9bbc352a50e9ccbc11",
135 |        "version_major": 2,
136 |        "version_minor": 0
137 |       },
138 |       "text/plain": [
139 |        "Button(description='Toggle Pandas/Lux', layout=Layout(top='5px', width='140px'), style=ButtonStyle())"
140 |       ]
141 |      },
142 |      "metadata": {},
143 |      "output_type": "display_data"
144 |     },
145 |     {
146 |      "data": {
147 |       "application/vnd.jupyter.widget-view+json": {
148 |        "model_id": "af00684885a74f29b0c86ca4c7934499",
149 |        "version_major": 2,
150 |        "version_minor": 0
151 |       },
152 |       "text/plain": [
153 |        "Output()"
154 |       ]
155 |      },
156 |      "metadata": {},
157 |      "output_type": "display_data"
158 |     },
159 |     {
160 |      "data": {
161 |       "text/plain": []
162 |      },
163 |      "execution_count": 11,
164 |      "metadata": {},
165 |      "output_type": "execute_result"
166 |     }
167 |    ],
168 |    "source": [
169 |     "\n",
170 |     "df"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 12,
176 |    "metadata": {},
177 |    "outputs": [
178 |     {
179 |      "name": "stderr",
180 |      "output_type": "stream",
181 |      "text": [
182 |       "/home/akshay/anaconda3/lib/python3.7/site-packages/lux/vis/VisList.py:305: UserWarning:\n",
183 |       "The following issues are ecountered when validating the parsed intent:\n",
184 |       "- The input value 'nan' does not exist for the attribute 'Embarked' for the DataFrame.\n"
185 |      ]
186 |     },
187 |     {
188 |      "data": {
189 |       "application/vnd.jupyter.widget-view+json": {
190 |        "model_id": "fb5c244b1bfb4e09961690fd6523a01a",
191 |        "version_major": 2,
192 |        "version_minor": 0
193 |       },
194 |       "text/plain": [
195 |        "Button(description='Toggle Pandas/Lux', layout=Layout(top='5px', width='140px'), style=ButtonStyle())"
196 |       ]
197 |      },
198 |      "metadata": {},
199 |      "output_type": "display_data"
200 |     },
201 |     {
202 |      "data": {
203 |       "application/vnd.jupyter.widget-view+json": {
204 |        "model_id": "d743879ef24144329970ca103a7bd204",
205 |        "version_major": 2,
206 |        "version_minor": 0
207 |       },
208 |       "text/plain": [
209 |        "Output()"
210 |       ]
211 |      },
212 |      "metadata": {},
213 |      "output_type": "display_data"
214 |     },
215 |     {
216 |      "data": {
217 |       "text/plain": []
218 |      },
219 |      "execution_count": 12,
220 |      "metadata": {},
221 |      "output_type": "execute_result"
222 |     }
223 |    ],
224 |    "source": [
225 |     "df.intent = [\"Age\",\"Fare\"]\n",
226 |     "df"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": 13,
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "vis = df.exported"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 14,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "application/vnd.jupyter.widget-view+json": {
246 |        "model_id": "917ad753df8347ffb91f9ffd73d41903",
247 |        "version_major": 2,
248 |        "version_minor": 0
249 |       },
250 |       "text/plain": [
251 |        "LuxWidget(recommendations=[{'action': 'Vis List', 'description': 'Shows a vis list defined by the intent', 'vs…"
252 |       ]
253 |      },
254 |      "metadata": {},
255 |      "output_type": "display_data"
256 |     },
257 |     {
258 |      "data": {
259 |       "text/plain": [
260 |        "[<Vis  (x: Age, y: Fare, color: Survived) mark: scatter, score: 0.50 >]"
261 |       ]
262 |      },
263 |      "execution_count": 14,
264 |      "metadata": {},
265 |      "output_type": "execute_result"
266 |     }
267 |    ],
268 |    "source": [
269 |     "vis"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 15,
275 |    "metadata": {},
276 |    "outputs": [
277 |     {
278 |      "data": {
279 |       "application/vnd.jupyter.widget-view+json": {
280 |        "model_id": "1fceb05044d9481f886233375de6c4b5",
281 |        "version_major": 2,
282 |        "version_minor": 0
283 |       },
284 |       "text/plain": [
285 |        "LuxWidget(current_vis={'config': {'view': {'continuousWidth': 400, 'continuousHeight': 300}, 'axis': {'labelCo…"
286 |       ]
287 |      },
288 |      "metadata": {},
289 |      "output_type": "display_data"
290 |     },
291 |     {
292 |      "data": {
293 |       "text/plain": [
294 |        "<Vis  (x: BIN(Fare), y: COUNT(Record) -- [Embarked=C]) mark: histogram, score: 0.0 >"
295 |       ]
296 |      },
297 |      "execution_count": 15,
298 |      "metadata": {},
299 |      "output_type": "execute_result"
300 |     }
301 |    ],
302 |    "source": [
303 |     "from lux.vis.Vis import Vis\n",
304 |     "Vis([\"Embarked=C\",\"Fare\"],df)"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 16,
310 |    "metadata": {},
311 |    "outputs": [
312 |     {
313 |      "data": {
314 |       "application/vnd.jupyter.widget-view+json": {
315 |        "model_id": "2f8d03f01a4844debf438535fb3af585",
316 |        "version_major": 2,
317 |        "version_minor": 0
318 |       },
319 |       "text/plain": [
320 |        "LuxWidget(recommendations=[{'action': 'Vis List', 'description': 'Shows a vis list defined by the intent', 'vs…"
321 |       ]
322 |      },
323 |      "metadata": {},
324 |      "output_type": "display_data"
325 |     },
326 |     {
327 |      "data": {
328 |       "text/plain": [
329 |        "[<Vis  (x: COUNT(Record), y: Pclass  -- [Embarked=S]   ) mark: bar, score: 0.00 >,\n",
330 |        " <Vis  (x: COUNT(Record), y: Pclass  -- [Embarked=C]   ) mark: bar, score: 0.00 >,\n",
331 |        " <Vis  (x: COUNT(Record), y: Pclass  -- [Embarked=Q]   ) mark: bar, score: 0.00 >,\n",
332 |        " <Vis  (x: COUNT(Record), y: Pclass  -- [Embarked=nan] ) mark: bar, score: 0.00 >]"
333 |       ]
334 |      },
335 |      "execution_count": 16,
336 |      "metadata": {},
337 |      "output_type": "execute_result"
338 |     }
339 |    ],
340 |    "source": [
341 |     "from lux.vis.VisList import VisList\n",
342 |     "VisList([\"Embarked=?\",\"Pclass\"],df)"
343 |    ]
344 |   }
345 |  ],
346 |  "metadata": {
347 |   "kernelspec": {
348 |    "display_name": "Python 3",
349 |    "language": "python",
350 |    "name": "python3"
351 |   },
352 |   "language_info": {
353 |    "codemirror_mode": {
354 |     "name": "ipython",
355 |     "version": 3
356 |    },
357 |    "file_extension": ".py",
358 |    "mimetype": "text/x-python",
359 |    "name": "python",
360 |    "nbconvert_exporter": "python",
361 |    "pygments_lexer": "ipython3",
362 |    "version": "3.7.4"
363 |   }
364 |  },
365 |  "nbformat": 4,
366 |  "nbformat_minor": 2
367 | }
368 | 


--------------------------------------------------------------------------------
/OCR .ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import matplotlib.pyplot as plt\n",
 10 |     "import cv2\n",
 11 |     "import easyocr\n",
 12 |     "from pylab import rcParams\n",
 13 |     "from IPython.display import Image\n",
 14 |     "rcParams['figure.figsize'] = 8, 16"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 12,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "name": "stderr",
 24 |      "output_type": "stream",
 25 |      "text": [
 26 |       "CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.\n",
 27 |       "Downloading detection model, please wait\n"
 28 |      ]
 29 |     },
 30 |     {
 31 |      "ename": "HTTPError",
 32 |      "evalue": "HTTP Error 403: Forbidden",
 33 |      "output_type": "error",
 34 |      "traceback": [
 35 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 36 |       "\u001b[0;31mHTTPError\u001b[0m                                 Traceback (most recent call last)",
 37 |       "\u001b[0;32m<ipython-input-12-ca0f38143924>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0measyocr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mreader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0measyocr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'en'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 38 |       "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/site-packages/easyocr/easyocr.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, lang_list, gpu)\u001b[0m\n\u001b[1;32m    183\u001b[0m             \u001b[0murlretrieve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_url\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'detector'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mDETECTOR_PATH\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    184\u001b[0m             \u001b[0;32massert\u001b[0m \u001b[0mcalculate_md5\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDETECTOR_PATH\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mmodel_url\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'detector'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCORRUPT_MSG\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m             \u001b[0meprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Download complete'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    186\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mcalculate_md5\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDETECTOR_PATH\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mmodel_url\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'detector'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    187\u001b[0m             \u001b[0meprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCORRUPT_MSG\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 39 |       "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36murlretrieve\u001b[0;34m(url, filename, reporthook, data)\u001b[0m\n\u001b[1;32m    245\u001b[0m     \u001b[0murl_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_splittype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 247\u001b[0;31m     \u001b[0;32mwith\u001b[0m \u001b[0mcontextlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclosing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murlopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    248\u001b[0m         \u001b[0mheaders\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    249\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 40 |       "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(url, data, timeout, cafile, capath, cadefault, context)\u001b[0m\n\u001b[1;32m    220\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    221\u001b[0m         \u001b[0mopener\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_opener\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 222\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mopener\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    223\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    224\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minstall_opener\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 41 |       "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36mopen\u001b[0;34m(self, fullurl, data, timeout)\u001b[0m\n\u001b[1;32m    529\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mprocessor\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_response\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    530\u001b[0m             \u001b[0mmeth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprocessor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeth_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 531\u001b[0;31m             \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmeth\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    532\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    533\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 42 |       "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36mhttp_response\u001b[0;34m(self, request, response)\u001b[0m\n\u001b[1;32m    638\u001b[0m         \u001b[0;31m# request was successfully received, understood, and accepted.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    639\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m200\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0mcode\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m300\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 640\u001b[0;31m             response = self.parent.error(\n\u001b[0m\u001b[1;32m    641\u001b[0m                 'http', request, response, code, msg, hdrs)\n\u001b[1;32m    642\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 43 |       "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36merror\u001b[0;34m(self, proto, *args)\u001b[0m\n\u001b[1;32m    567\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mhttp_err\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    568\u001b[0m             \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'default'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'http_error_default'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0morig_args\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 569\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_chain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    570\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    571\u001b[0m \u001b[0;31m# XXX probably also want an abstract factory that knows when it makes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 44 |       "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36m_call_chain\u001b[0;34m(self, chain, kind, meth_name, *args)\u001b[0m\n\u001b[1;32m    500\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mhandler\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mhandlers\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    501\u001b[0m             \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhandler\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeth_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 502\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    503\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    504\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 45 |       "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36mhttp_error_default\u001b[0;34m(self, req, fp, code, msg, hdrs)\u001b[0m\n\u001b[1;32m    647\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mHTTPDefaultErrorHandler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mBaseHandler\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    648\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mhttp_error_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhdrs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 649\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mHTTPError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhdrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    650\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    651\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mHTTPRedirectHandler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mBaseHandler\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 46 |       "\u001b[0;31mHTTPError\u001b[0m: HTTP Error 403: Forbidden"
 47 |      ]
 48 |     }
 49 |    ],
 50 |    "source": [
 51 |     "import easyocr\n",
 52 |     "reader = easyocr.Reader(['en'])"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 5,
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "name": "stdout",
 62 |      "output_type": "stream",
 63 |      "text": [
 64 |       "None\n"
 65 |      ]
 66 |     }
 67 |    ],
 68 |    "source": [
 69 |     "import os\n",
 70 |     "print(os.environ.get(\"MODULE_PATH\"))"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 8,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "data": {
 80 |       "text/plain": [
 81 |        "['_MutableMapping__marker',\n",
 82 |        " '__abstractmethods__',\n",
 83 |        " '__class__',\n",
 84 |        " '__contains__',\n",
 85 |        " '__delattr__',\n",
 86 |        " '__delitem__',\n",
 87 |        " '__dict__',\n",
 88 |        " '__dir__',\n",
 89 |        " '__doc__',\n",
 90 |        " '__eq__',\n",
 91 |        " '__format__',\n",
 92 |        " '__ge__',\n",
 93 |        " '__getattribute__',\n",
 94 |        " '__getitem__',\n",
 95 |        " '__gt__',\n",
 96 |        " '__hash__',\n",
 97 |        " '__init__',\n",
 98 |        " '__init_subclass__',\n",
 99 |        " '__iter__',\n",
100 |        " '__le__',\n",
101 |        " '__len__',\n",
102 |        " '__lt__',\n",
103 |        " '__module__',\n",
104 |        " '__ne__',\n",
105 |        " '__new__',\n",
106 |        " '__reduce__',\n",
107 |        " '__reduce_ex__',\n",
108 |        " '__repr__',\n",
109 |        " '__reversed__',\n",
110 |        " '__setattr__',\n",
111 |        " '__setitem__',\n",
112 |        " '__sizeof__',\n",
113 |        " '__slots__',\n",
114 |        " '__str__',\n",
115 |        " '__subclasshook__',\n",
116 |        " '__weakref__',\n",
117 |        " '_abc_impl',\n",
118 |        " '_data',\n",
119 |        " 'clear',\n",
120 |        " 'copy',\n",
121 |        " 'decodekey',\n",
122 |        " 'decodevalue',\n",
123 |        " 'encodekey',\n",
124 |        " 'encodevalue',\n",
125 |        " 'get',\n",
126 |        " 'items',\n",
127 |        " 'keys',\n",
128 |        " 'pop',\n",
129 |        " 'popitem',\n",
130 |        " 'putenv',\n",
131 |        " 'setdefault',\n",
132 |        " 'unsetenv',\n",
133 |        " 'update',\n",
134 |        " 'values']"
135 |       ]
136 |      },
137 |      "execution_count": 8,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "dir(os.environ)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 1,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "import re"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 3,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "str1 = '5,8'"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 5,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "import pandas as pd\n",
171 |     "df = pd.DataFrame(['5,8','5,9'])\n"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 6,
177 |    "metadata": {},
178 |    "outputs": [
179 |     {
180 |      "data": {
181 |       "text/html": [
182 |        "<div>\n",
183 |        "<style scoped>\n",
184 |        "    .dataframe tbody tr th:only-of-type {\n",
185 |        "        vertical-align: middle;\n",
186 |        "    }\n",
187 |        "\n",
188 |        "    .dataframe tbody tr th {\n",
189 |        "        vertical-align: top;\n",
190 |        "    }\n",
191 |        "\n",
192 |        "    .dataframe thead th {\n",
193 |        "        text-align: right;\n",
194 |        "    }\n",
195 |        "</style>\n",
196 |        "<table border=\"1\" class=\"dataframe\">\n",
197 |        "  <thead>\n",
198 |        "    <tr style=\"text-align: right;\">\n",
199 |        "      <th></th>\n",
200 |        "      <th>0</th>\n",
201 |        "    </tr>\n",
202 |        "  </thead>\n",
203 |        "  <tbody>\n",
204 |        "    <tr>\n",
205 |        "      <td>0</td>\n",
206 |        "      <td>5,8</td>\n",
207 |        "    </tr>\n",
208 |        "    <tr>\n",
209 |        "      <td>1</td>\n",
210 |        "      <td>5,9</td>\n",
211 |        "    </tr>\n",
212 |        "  </tbody>\n",
213 |        "</table>\n",
214 |        "</div>"
215 |       ],
216 |       "text/plain": [
217 |        "     0\n",
218 |        "0  5,8\n",
219 |        "1  5,9"
220 |       ]
221 |      },
222 |      "execution_count": 6,
223 |      "metadata": {},
224 |      "output_type": "execute_result"
225 |     }
226 |    ],
227 |    "source": [
228 |     "df.head()"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 7,
234 |    "metadata": {},
235 |    "outputs": [
236 |     {
237 |      "name": "stdout",
238 |      "output_type": "stream",
239 |      "text": [
240 |       "     0\n",
241 |       "0  5.8\n",
242 |       "1  5.9\n"
243 |      ]
244 |     }
245 |    ],
246 |    "source": [
247 |     "print(df.replace(to_replace =',', value = '.', regex = True)) "
248 |    ]
249 |   }
250 |  ],
251 |  "metadata": {
252 |   "kernelspec": {
253 |    "display_name": "Python 3",
254 |    "language": "python",
255 |    "name": "python3"
256 |   },
257 |   "language_info": {
258 |    "codemirror_mode": {
259 |     "name": "ipython",
260 |     "version": 3
261 |    },
262 |    "file_extension": ".py",
263 |    "mimetype": "text/x-python",
264 |    "name": "python",
265 |    "nbconvert_exporter": "python",
266 |    "pygments_lexer": "ipython3",
267 |    "version": "3.7.4"
268 |   }
269 |  },
270 |  "nbformat": 4,
271 |  "nbformat_minor": 2
272 | }
273 | 


--------------------------------------------------------------------------------
/PassiveActive.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st1
 2 | from styleformer import Styleformer
 3 | import torch
 4 | sf1 = Styleformer(style = 3) 
 5 | st1.title('Passive Voice to Active Voice Converter')
 6 | st1.write("Please enter your sentence in passive voice")
 7 | text1 = st1.text_input('Entered Text')
 8 | if st1.button('Convert Passive to Active'):
 9 |   target_sentence1 = sf1.transfer(text1)
10 |   st1.write(target_sentence1)
11 | else:
12 |      pass
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data-Science
2 | This repo contains Data Science code snippet
3 | 
4 | # YouTube channel link : 
5 | https://www.youtube.com/@aslearning4818
6 | 


--------------------------------------------------------------------------------
/Readability.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Collecting readability\n",
 13 |       "  Downloading https://files.pythonhosted.org/packages/26/70/6f8750066255d4d2b82b813dd2550e0bd2bee99d026d14088a7b977cd0fc/readability-0.3.1.tar.gz\n",
 14 |       "Building wheels for collected packages: readability\n",
 15 |       "  Building wheel for readability (setup.py) ... \u001b[?25ldone\n",
 16 |       "\u001b[?25h  Created wheel for readability: filename=readability-0.3.1-cp37-none-any.whl size=35464 sha256=b432dd772e5a327736e14ea876f8fe36914ef541eba12aed8baf35cb92581908\n",
 17 |       "  Stored in directory: /home/akshay/.cache/pip/wheels/36/3f/65/bc327f4cdd5bff9ff510834e07522f94389e28858311b33b41\n",
 18 |       "Successfully built readability\n",
 19 |       "Installing collected packages: readability\n",
 20 |       "Successfully installed readability-0.3.1\n"
 21 |      ]
 22 |     }
 23 |    ],
 24 |    "source": [
 25 |     "! pip install readability"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import readability"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 14,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "text = ('Ram is eating mango')"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 15,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "results = readability.getmeasures(text, lang='en')"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 16,
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "name": "stdout",
 62 |      "output_type": "stream",
 63 |      "text": [
 64 |       "75.87500000000001\n"
 65 |      ]
 66 |     }
 67 |    ],
 68 |    "source": [
 69 |     "print(results['readability grades']['FleschReadingEase'])"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 17,
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "data": {
 79 |       "text/plain": [
 80 |        "OrderedDict([('readability grades',\n",
 81 |        "              OrderedDict([('Kincaid', 3.6700000000000017),\n",
 82 |        "                           ('ARI', -0.5899999999999999),\n",
 83 |        "                           ('Coleman-Liau', 0.32178000000000395),\n",
 84 |        "                           ('FleschReadingEase', 75.87500000000001),\n",
 85 |        "                           ('GunningFogIndex', 1.6),\n",
 86 |        "                           ('LIX', 4.0),\n",
 87 |        "                           ('SMOGIndex', 3.0),\n",
 88 |        "                           ('RIX', 0.0),\n",
 89 |        "                           ('DaleChallIndex', 11.7299)])),\n",
 90 |        "             ('sentence info',\n",
 91 |        "              OrderedDict([('characters_per_word', 4.0),\n",
 92 |        "                           ('syll_per_word', 1.5),\n",
 93 |        "                           ('words_per_sentence', 4.0),\n",
 94 |        "                           ('sentences_per_paragraph', 1.0),\n",
 95 |        "                           ('type_token_ratio', 1.0),\n",
 96 |        "                           ('characters', 16),\n",
 97 |        "                           ('syllables', 6),\n",
 98 |        "                           ('words', 4),\n",
 99 |        "                           ('wordtypes', 4),\n",
100 |        "                           ('sentences', 1),\n",
101 |        "                           ('paragraphs', 1),\n",
102 |        "                           ('long_words', 0),\n",
103 |        "                           ('complex_words', 0),\n",
104 |        "                           ('complex_words_dc', 2)])),\n",
105 |        "             ('word usage',\n",
106 |        "              OrderedDict([('tobeverb', 1),\n",
107 |        "                           ('auxverb', 0),\n",
108 |        "                           ('conjunction', 0),\n",
109 |        "                           ('pronoun', 0),\n",
110 |        "                           ('preposition', 0),\n",
111 |        "                           ('nominalization', 0)])),\n",
112 |        "             ('sentence beginnings',\n",
113 |        "              OrderedDict([('pronoun', 0),\n",
114 |        "                           ('interrogative', 0),\n",
115 |        "                           ('article', 0),\n",
116 |        "                           ('subordination', 0),\n",
117 |        "                           ('conjunction', 0),\n",
118 |        "                           ('preposition', 0)]))])"
119 |       ]
120 |      },
121 |      "execution_count": 17,
122 |      "metadata": {},
123 |      "output_type": "execute_result"
124 |     }
125 |    ],
126 |    "source": [
127 |     "results"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 18,
133 |    "metadata": {},
134 |    "outputs": [
135 |     {
136 |      "name": "stdout",
137 |      "output_type": "stream",
138 |      "text": [
139 |       "-47.979999999999976\n"
140 |      ]
141 |     }
142 |    ],
143 |    "source": [
144 |     "text2 = ('Asasadas')\n",
145 |     "results = readability.getmeasures(text2, lang='en')\n",
146 |     "print(results['readability grades']['FleschReadingEase'])"
147 |    ]
148 |   }
149 |  ],
150 |  "metadata": {
151 |   "kernelspec": {
152 |    "display_name": "Python 3",
153 |    "language": "python",
154 |    "name": "python3"
155 |   },
156 |   "language_info": {
157 |    "codemirror_mode": {
158 |     "name": "ipython",
159 |     "version": 3
160 |    },
161 |    "file_extension": ".py",
162 |    "mimetype": "text/x-python",
163 |    "name": "python",
164 |    "nbconvert_exporter": "python",
165 |    "pygments_lexer": "ipython3",
166 |    "version": "3.7.4"
167 |   }
168 |  },
169 |  "nbformat": 4,
170 |  "nbformat_minor": 2
171 | }
172 | 


--------------------------------------------------------------------------------
/Sentiment Analysis using VADER.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# SENTIMENT ANALYSIS DEFINITION"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "In sentiment analysis we classify the polarity of given text at document ,sentence or feature level.It tells us but the opinion of it whether is positive , negative or neutral. If we go more advance like beyond polarity we can go for emotional states like angry , sad and happy."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "# INSTALLING VADER"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "metadata": {},
 28 |    "outputs": [
 29 |     {
 30 |      "name": "stdout",
 31 |      "output_type": "stream",
 32 |      "text": [
 33 |       "Collecting vaderSentiment\n",
 34 |       "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/76/fc/310e16254683c1ed35eeb97386986d6c00bc29df17ce280aed64d55537e9/vaderSentiment-3.3.2-py2.py3-none-any.whl (125kB)\n",
 35 |       "\u001b[K     |████████████████████████████████| 133kB 776kB/s eta 0:00:01\n",
 36 |       "\u001b[?25hRequirement already satisfied: requests in /home/akshay/anaconda3/lib/python3.7/site-packages (from vaderSentiment) (2.22.0)\n",
 37 |       "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/akshay/anaconda3/lib/python3.7/site-packages (from requests->vaderSentiment) (1.24.2)\n",
 38 |       "Requirement already satisfied: idna<2.9,>=2.5 in /home/akshay/anaconda3/lib/python3.7/site-packages (from requests->vaderSentiment) (2.8)\n",
 39 |       "Requirement already satisfied: certifi>=2017.4.17 in /home/akshay/anaconda3/lib/python3.7/site-packages (from requests->vaderSentiment) (2019.9.11)\n",
 40 |       "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /home/akshay/anaconda3/lib/python3.7/site-packages (from requests->vaderSentiment) (3.0.4)\n",
 41 |       "Installing collected packages: vaderSentiment\n",
 42 |       "Successfully installed vaderSentiment-3.3.2\n"
 43 |      ]
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "! pip install vaderSentiment"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "# What is VADER ?"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "VADER stands for Valence Aware Dictionary and sEntiment Reasoner. It is a rule-based sentiment analyzer.It consists of a list of lexical features (e.g. words) which are generally labeled as per their semantic orientation as positive or negative.\n",
 62 |     "\n"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 3,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 7,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "obj = SentimentIntensityAnalyzer()"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 32,
 86 |    "metadata": {},
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       "{'neg': 0.0, 'neu': 0.484, 'pos': 0.516, 'compound': 0.4927}\n"
 93 |      ]
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "sentence = \"Ram is really good \"\n",
 98 |     "sentiment_dict = obj.polarity_scores(sentence)\n",
 99 |     "print(sentiment_dict)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 33,
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "name": "stdout",
109 |      "output_type": "stream",
110 |      "text": [
111 |       "{'neg': 0.558, 'neu': 0.442, 'pos': 0.0, 'compound': -0.5849}\n"
112 |      ]
113 |     }
114 |    ],
115 |    "source": [
116 |     "sentence = \"Rahul is really bad\"\n",
117 |     "sentiment_dict = obj.polarity_scores(sentence)\n",
118 |     "print(sentiment_dict)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "# Things which manipulate the Sentiment score"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "# 1) PUNCTUATIONS : It increases the intensity"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 34,
138 |    "metadata": {},
139 |    "outputs": [
140 |     {
141 |      "name": "stdout",
142 |      "output_type": "stream",
143 |      "text": [
144 |       "{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}\n",
145 |       "{'neg': 0.0, 'neu': 0.484, 'pos': 0.516, 'compound': 0.4926}\n",
146 |       "{'neg': 0.0, 'neu': 0.463, 'pos': 0.537, 'compound': 0.5399}\n"
147 |      ]
148 |     }
149 |    ],
150 |    "source": [
151 |     "print(obj.polarity_scores('Ram is good boy'))\n",
152 |     "print(obj.polarity_scores('Ram is good boy!'))\n",
153 |     "print(obj.polarity_scores('Ram is good boy!!'))"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "# 2) CAPITALIZATION : It increases the intensity"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 35,
166 |    "metadata": {},
167 |    "outputs": [
168 |     {
169 |      "name": "stdout",
170 |      "output_type": "stream",
171 |      "text": [
172 |       "{'neg': 0.0, 'neu': 0.408, 'pos': 0.592, 'compound': 0.4404}\n",
173 |       "{'neg': 0.0, 'neu': 0.355, 'pos': 0.645, 'compound': 0.5622}\n"
174 |      ]
175 |     }
176 |    ],
177 |    "source": [
178 |     "print(obj.polarity_scores('Ram is good'))\n",
179 |     "print(obj.polarity_scores('Ram is GOOD'))\n"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "# 3) DEGREE MODIFIERS"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 28,
192 |    "metadata": {},
193 |    "outputs": [
194 |     {
195 |      "name": "stdout",
196 |      "output_type": "stream",
197 |      "text": [
198 |       "{'neg': 0.0, 'neu': 0.408, 'pos': 0.592, 'compound': 0.4404}\n",
199 |       "{'neg': 0.0, 'neu': 0.484, 'pos': 0.516, 'compound': 0.4927}\n"
200 |      ]
201 |     }
202 |    ],
203 |    "source": [
204 |     "print(obj.polarity_scores('Ram is good'))\n",
205 |     "print(obj.polarity_scores('Ram is very good'))"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "markdown",
210 |    "metadata": {},
211 |    "source": [
212 |     "# 4) CONJUNCTIONS"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": 29,
218 |    "metadata": {},
219 |    "outputs": [
220 |     {
221 |      "name": "stdout",
222 |      "output_type": "stream",
223 |      "text": [
224 |       "{'neg': 0.0, 'neu': 0.408, 'pos': 0.592, 'compound': 0.4404}\n",
225 |       "{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}\n"
226 |      ]
227 |     }
228 |    ],
229 |    "source": [
230 |     "print(obj.polarity_scores('Ram is good'))\n",
231 |     "print(obj.polarity_scores('Ram is good,but he is also naughty sometimes'))"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "markdown",
236 |    "metadata": {},
237 |    "source": [
238 |     "# DOES VADER HANDLES SLANGS AND EMOTICONS ?"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": 24,
244 |    "metadata": {},
245 |    "outputs": [
246 |     {
247 |      "name": "stdout",
248 |      "output_type": "stream",
249 |      "text": [
250 |       "{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}\n",
251 |       "{'neg': 0.618, 'neu': 0.382, 'pos': 0.0, 'compound': -0.4995}\n"
252 |      ]
253 |     }
254 |    ],
255 |    "source": [
256 |     "print(obj.polarity_scores(\"That Hotel\"))\n",
257 |     "print(obj.polarity_scores(\"That Hotel SUX\"))"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 25,
263 |    "metadata": {},
264 |    "outputs": [
265 |     {
266 |      "name": "stdout",
267 |      "output_type": "stream",
268 |      "text": [
269 |       "{'neg': 0.0, 'neu': 0.556, 'pos': 0.444, 'compound': 0.8016}\n",
270 |       "{'neg': 0.0, 'neu': 0.574, 'pos': 0.426, 'compound': 0.7713}\n"
271 |      ]
272 |     }
273 |    ],
274 |    "source": [
275 |     "print(obj.polarity_scores(\"Your :) is the most beautiful thing I have ever seen\"))\n",
276 |     "print(obj.polarity_scores(\"Your smile is the most beautiful thing I have ever seen\"))"
277 |    ]
278 |   }
279 |  ],
280 |  "metadata": {
281 |   "kernelspec": {
282 |    "display_name": "Python 3",
283 |    "language": "python",
284 |    "name": "python3"
285 |   },
286 |   "language_info": {
287 |    "codemirror_mode": {
288 |     "name": "ipython",
289 |     "version": 3
290 |    },
291 |    "file_extension": ".py",
292 |    "mimetype": "text/x-python",
293 |    "name": "python",
294 |    "nbconvert_exporter": "python",
295 |    "pygments_lexer": "ipython3",
296 |    "version": "3.7.4"
297 |   }
298 |  },
299 |  "nbformat": 4,
300 |  "nbformat_minor": 2
301 | }
302 | 


--------------------------------------------------------------------------------
/Speechtotext.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Speechtotext.ipynb",
  7 |       "provenance": []
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     }
 13 |   },
 14 |   "cells": [
 15 |     {
 16 |       "cell_type": "code",
 17 |       "metadata": {
 18 |         "id": "vU_eMPLMbV2b",
 19 |         "outputId": "19885023-778c-422f-986b-11dbc76c2e2d",
 20 |         "colab": {
 21 |           "base_uri": "https://localhost:8080/"
 22 |         }
 23 |       },
 24 |       "source": [
 25 |         "! pip3 install SpeechRecognition pydub"
 26 |       ],
 27 |       "execution_count": null,
 28 |       "outputs": [
 29 |         {
 30 |           "output_type": "stream",
 31 |           "text": [
 32 |             "Requirement already satisfied: SpeechRecognition in /usr/local/lib/python3.6/dist-packages (3.8.1)\n",
 33 |             "Requirement already satisfied: pydub in /usr/local/lib/python3.6/dist-packages (0.24.1)\n"
 34 |           ],
 35 |           "name": "stdout"
 36 |         }
 37 |       ]
 38 |     },
 39 |     {
 40 |       "cell_type": "markdown",
 41 |       "metadata": {
 42 |         "id": "ibpYddfDd7Us"
 43 |       },
 44 |       "source": [
 45 |         "1) CMU Sphinx (offline)\n",
 46 |         "\n",
 47 |         "2) Google Speech Recognition\n",
 48 |         "\n",
 49 |         "3) Google Cloud Speech API\n",
 50 |         "\n",
 51 |         "4) Wit.ai\n",
 52 |         "\n",
 53 |         "5) Microsoft Bing Voice Recognition\n",
 54 |         "\n",
 55 |         "6) Houndify API\n",
 56 |         "\n",
 57 |         "7) IBM Speech To Text\n",
 58 |         "\n",
 59 |         "8) Snowboy Hotword Detection (offline)\n"
 60 |       ]
 61 |     },
 62 |     {
 63 |       "cell_type": "code",
 64 |       "metadata": {
 65 |         "id": "GuhAgOzrcKG1"
 66 |       },
 67 |       "source": [
 68 |         "import speech_recognition as sr"
 69 |       ],
 70 |       "execution_count": null,
 71 |       "outputs": []
 72 |     },
 73 |     {
 74 |       "cell_type": "code",
 75 |       "metadata": {
 76 |         "id": "ArV_Nw3OcNe4"
 77 |       },
 78 |       "source": [
 79 |         "r = sr.Recognizer()"
 80 |       ],
 81 |       "execution_count": null,
 82 |       "outputs": []
 83 |     },
 84 |     {
 85 |       "cell_type": "code",
 86 |       "metadata": {
 87 |         "id": "03wcn-_De8mD"
 88 |       },
 89 |       "source": [
 90 |         "filename = \"bcd.wav\""
 91 |       ],
 92 |       "execution_count": null,
 93 |       "outputs": []
 94 |     },
 95 |     {
 96 |       "cell_type": "code",
 97 |       "metadata": {
 98 |         "id": "EkEY34uucfiF",
 99 |         "outputId": "ca9e64e0-6275-4e48-b722-40ee661b2470",
100 |         "colab": {
101 |           "base_uri": "https://localhost:8080/"
102 |         }
103 |       },
104 |       "source": [
105 |         "\n",
106 |         "with sr.AudioFile(filename) as source:\n",
107 |         "    audio_data = r.record(source)\n",
108 |         "    text = r.recognize_google(audio_data)\n",
109 |         "    print(text)"
110 |       ],
111 |       "execution_count": null,
112 |       "outputs": [
113 |         {
114 |           "output_type": "stream",
115 |           "text": [
116 |             "kids are talkin by the door\n"
117 |           ],
118 |           "name": "stdout"
119 |         }
120 |       ]
121 |     }
122 |   ]
123 | }


--------------------------------------------------------------------------------
/Whisper.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaytheau/Data-Science/e5a1a3510f9afa9e04c9809f12e513cb6eec9188/Whisper.pptx


--------------------------------------------------------------------------------
/YouTube_recommendation_pinecone.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "code",
  5 |       "execution_count": null,
  6 |       "metadata": {
  7 |         "id": "whr9pySjG8yd",
  8 |         "collapsed": true
  9 |       },
 10 |       "outputs": [],
 11 |       "source": [
 12 |         "! pip install pandas\n",
 13 |         "! pip install pytube\n",
 14 |         "! pip install numpy\n",
 15 |         "! pip install pinecone-client\n",
 16 |         "! pip install git+https://github.com/openai/whisper.git"
 17 |       ]
 18 |     },
 19 |     {
 20 |       "cell_type": "code",
 21 |       "execution_count": null,
 22 |       "metadata": {
 23 |         "id": "tfXF3-S-IU_L"
 24 |       },
 25 |       "outputs": [],
 26 |       "source": [
 27 |         "# Import the modules\n",
 28 |         "import os\n",
 29 |         "import torch\n",
 30 |         "import whisper\n",
 31 |         "import pinecone\n",
 32 |         "import numpy as np\n",
 33 |         "import pandas as pd\n",
 34 |         "from pytube import YouTube"
 35 |       ]
 36 |     },
 37 |     {
 38 |       "cell_type": "code",
 39 |       "execution_count": null,
 40 |       "metadata": {
 41 |         "id": "nRJaOc6-IaOV"
 42 |       },
 43 |       "outputs": [],
 44 |       "source": [
 45 |         "def video_to_audio(video_url, destination):\n",
 46 |         "\n",
 47 |         "    # Get the video\n",
 48 |         "    video = YouTube(video_url)\n",
 49 |         "\n",
 50 |         "    # Convert video to Audio\n",
 51 |         "    audio = video.streams.filter(only_audio=True).first()\n",
 52 |         "\n",
 53 |         "    # Save to destination\n",
 54 |         "    output = audio.download(output_path = destination)\n",
 55 |         "\n",
 56 |         "    name, ext = os.path.splitext(output)\n",
 57 |         "    new_file = name + '.mp3'\n",
 58 |         "\n",
 59 |         "    # Replace spaces with \"_\"\n",
 60 |         "    new_file = new_file.replace(\" \", \"_\")\n",
 61 |         "\n",
 62 |         "    # Change the name of the file\n",
 63 |         "    os.rename(output, new_file)\n",
 64 |         "\n",
 65 |         "    return new_file\n"
 66 |       ]
 67 |     },
 68 |     {
 69 |       "cell_type": "code",
 70 |       "execution_count": null,
 71 |       "metadata": {
 72 |         "id": "4n13nfbQIfZq"
 73 |       },
 74 |       "outputs": [],
 75 |       "source": [
 76 |         "%%bash\n",
 77 |         "mkdir \"audio_data\""
 78 |       ]
 79 |     },
 80 |     {
 81 |       "cell_type": "code",
 82 |       "execution_count": null,
 83 |       "metadata": {
 84 |         "id": "O-hoBEi8InCL"
 85 |       },
 86 |       "outputs": [],
 87 |       "source": [
 88 |         "# Create URL column\n",
 89 |         "audio_path = \"audio_data\"\n",
 90 |         "\n",
 91 |         "# Have just provided a sample of links for experimentation purpose\n",
 92 |         "list_videos = [\"https://www.youtube.com/watch?v=IdTMDpizis8\",\n",
 93 |         "              \"https://www.youtube.com/watch?v=fLeJJPxua3E\",\n",
 94 |         "              \"https://www.youtube.com/watch?v=z3FA2kALScU\"]\n",
 95 |         "# Create dataframe\n",
 96 |         "transcription_df = pd.DataFrame(list_videos, columns=['URLs'])"
 97 |       ]
 98 |     },
 99 |     {
100 |       "cell_type": "code",
101 |       "execution_count": null,
102 |       "metadata": {
103 |         "id": "yeuLSMbkIoND"
104 |       },
105 |       "outputs": [],
106 |       "source": [
107 |         "\n",
108 |         "transcription_df.head()"
109 |       ]
110 |     },
111 |     {
112 |       "cell_type": "code",
113 |       "execution_count": null,
114 |       "metadata": {
115 |         "id": "jbGxPPwfIrFQ"
116 |       },
117 |       "outputs": [],
118 |       "source": [
119 |         "# Create the files_name\n",
120 |         "transcription_df[\"file_name\"] = transcription_df[\"URLs\"].apply(lambda url: video_to_audio(url, audio_path))\n",
121 |         "transcription_df.head()"
122 |       ]
123 |     },
124 |     {
125 |       "cell_type": "code",
126 |       "execution_count": null,
127 |       "metadata": {
128 |         "id": "uuFeg71oIt4e"
129 |       },
130 |       "outputs": [],
131 |       "source": [
132 |         "# Set the device\n",
133 |         "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
134 |         "\n",
135 |         "# Load the model\n",
136 |         "whisper_model = whisper.load_model(\"base\", device=device)"
137 |       ]
138 |     },
139 |     {
140 |       "cell_type": "code",
141 |       "execution_count": null,
142 |       "metadata": {
143 |         "id": "490_i7XAOIf6"
144 |       },
145 |       "outputs": [],
146 |       "source": [
147 |         "def audio_to_text(audio_file):\n",
148 |         "\n",
149 |         "    return whisper_model.transcribe(audio_file)[\"text\"]"
150 |       ]
151 |     },
152 |     {
153 |       "cell_type": "code",
154 |       "execution_count": null,
155 |       "metadata": {
156 |         "id": "DelG9fXBOLLf"
157 |       },
158 |       "outputs": [],
159 |       "source": [
160 |         "# Apply the function to all the audio files\n",
161 |         "transcription_df[\"transcriptions\"] = transcription_df[\"file_name\"].apply(lambda f_name: audio_to_text(f_name))\n",
162 |         "\n",
163 |         "\n",
164 |         "# Show the first five rows\n",
165 |         "transcription_df.head()"
166 |       ]
167 |     },
168 |     {
169 |       "cell_type": "code",
170 |       "execution_count": null,
171 |       "metadata": {
172 |         "id": "zQ0ixNobOUTS"
173 |       },
174 |       "outputs": [],
175 |       "source": [
176 |         "transcription_df.head()"
177 |       ]
178 |     },
179 |     {
180 |       "cell_type": "code",
181 |       "source": [
182 |         "model_id = \"sentence-transformers/all-MiniLM-L6-v2\""
183 |       ],
184 |       "metadata": {
185 |         "id": "DSfRz_vU0c3j"
186 |       },
187 |       "execution_count": null,
188 |       "outputs": []
189 |     },
190 |     {
191 |       "cell_type": "code",
192 |       "source": [
193 |         "from getpass import getpass\n",
194 |         "os.environ[\"HUGGING_FACE_TOKEN\"] = getpass('Enter Hugging Face token: ')\n",
195 |         "hf_token = os.getenv('HUGGING_FACE_TOKEN')"
196 |       ],
197 |       "metadata": {
198 |         "id": "DrhHaFbzfl2J"
199 |       },
200 |       "execution_count": null,
201 |       "outputs": []
202 |     },
203 |     {
204 |       "cell_type": "code",
205 |       "source": [
206 |         "import requests\n",
207 |         "\n",
208 |         "api_url = f\"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}\"\n",
209 |         "headers = {\"Authorization\": f\"Bearer {hf_token}\"}"
210 |       ],
211 |       "metadata": {
212 |         "id": "BH9hR2JV0pKc"
213 |       },
214 |       "execution_count": null,
215 |       "outputs": []
216 |     },
217 |     {
218 |       "cell_type": "code",
219 |       "source": [
220 |         "def query(texts):\n",
221 |         "    response = requests.post(api_url, headers=headers, json={\"inputs\": texts, \"options\":{\"wait_for_model\":True}})\n",
222 |         "    return response.json()"
223 |       ],
224 |       "metadata": {
225 |         "id": "sC9otQxe0u_0"
226 |       },
227 |       "execution_count": null,
228 |       "outputs": []
229 |     },
230 |     {
231 |       "cell_type": "code",
232 |       "source": [
233 |         "transcription_df[\"embedding\"] = transcription_df[\"transcriptions\"].astype(str).apply(query)"
234 |       ],
235 |       "metadata": {
236 |         "id": "bM-kZru01LuN"
237 |       },
238 |       "execution_count": null,
239 |       "outputs": []
240 |     },
241 |     {
242 |       "cell_type": "code",
243 |       "execution_count": null,
244 |       "metadata": {
245 |         "id": "XXxw58woOggL"
246 |       },
247 |       "outputs": [],
248 |       "source": [
249 |         "transcription_df.head()"
250 |       ]
251 |     },
252 |     {
253 |       "cell_type": "code",
254 |       "execution_count": null,
255 |       "metadata": {
256 |         "id": "YZQveIHvOi2d"
257 |       },
258 |       "outputs": [],
259 |       "source": [
260 |         "vector_dim = transcription_df.iloc[2].embedding\n",
261 |         "len(vector_dim)"
262 |       ]
263 |     },
264 |     {
265 |       "cell_type": "code",
266 |       "source": [
267 |         "import os\n",
268 |         "\n",
269 |         "\n",
270 |         "os.environ[\"PINECONE_API_KEY\"] = getpass('Enter your Pinecone API Key: ')"
271 |       ],
272 |       "metadata": {
273 |         "id": "qKfNJ0AB3DKG"
274 |       },
275 |       "execution_count": null,
276 |       "outputs": []
277 |     },
278 |     {
279 |       "cell_type": "code",
280 |       "source": [
281 |         "os.environ[\"PINECONE_ENVIRONMENT\"] = getpass('Enter your Pinecone Environment: ')"
282 |       ],
283 |       "metadata": {
284 |         "id": "1-v9xbae3Hpm"
285 |       },
286 |       "execution_count": null,
287 |       "outputs": []
288 |     },
289 |     {
290 |       "cell_type": "code",
291 |       "source": [
292 |         "# find API key in console at app.pinecone.io\n",
293 |         "api_key = os.getenv('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n",
294 |         "# find ENV (cloud region) next to API key in console\n",
295 |         "env = os.getenv('PINECONE_ENVIRONMENT') or 'PINECONE_ENVIRONMENT'\n",
296 |         "\n",
297 |         "# Initialize connection to pinecone\n",
298 |         "pinecone.init(\n",
299 |         "  api_key=api_key,\n",
300 |         "  environment=env\n",
301 |         ")"
302 |       ],
303 |       "metadata": {
304 |         "id": "8iseMCvslEgA"
305 |       },
306 |       "execution_count": null,
307 |       "outputs": []
308 |     },
309 |     {
310 |       "cell_type": "code",
311 |       "execution_count": null,
312 |       "metadata": {
313 |         "id": "iJFDt2ayOlKa"
314 |       },
315 |       "outputs": [],
316 |       "source": [
317 |         "\n",
318 |         "\n",
319 |         "# Index params\n",
320 |         "my_index_name = \"audio-search\"\n",
321 |         "vector_dim = len(transcription_df.iloc[0].embedding)\n",
322 |         "\n",
323 |         "if my_index_name not in pinecone.list_indexes():\n",
324 |         "  # Create the index\n",
325 |         "  pinecone.create_index(name = my_index_name,\n",
326 |         "                      dimension=vector_dim,\n",
327 |         "                      metric=\"cosine\", shards=1,\n",
328 |         "                      pod_type='s1.x1')\n",
329 |         "# Connect to the index\n",
330 |         "my_index = pinecone.Index(index_name = my_index_name)"
331 |       ]
332 |     },
333 |     {
334 |       "cell_type": "code",
335 |       "execution_count": null,
336 |       "metadata": {
337 |         "id": "ytD9KY83OrEI"
338 |       },
339 |       "outputs": [],
340 |       "source": [
341 |         "transcription_df[\"vector_id\"] = transcription_df.index\n",
342 |         "transcription_df[\"vector_id\"] = transcription_df[\"vector_id\"].apply(str)\n",
343 |         "\n",
344 |         "# Get all the metadata\n",
345 |         "final_metadata = []\n",
346 |         "\n",
347 |         "for index in range(len(transcription_df)):\n",
348 |         "  final_metadata.append({\n",
349 |         "      'ID':  index,\n",
350 |         "      'url': transcription_df.iloc[index].URLs,\n",
351 |         "      'transcription': transcription_df.iloc[index].transcriptions\n",
352 |         "  })\n",
353 |         "\n",
354 |         "audio_IDs = transcription_df.vector_id.tolist()\n",
355 |         "audio_embeddings = [arr for arr in transcription_df.embedding]\n",
356 |         "\n",
357 |         "# Create the single list of dictionary format to insert\n",
358 |         "data_to_upsert = list(zip(audio_IDs, audio_embeddings, final_metadata))\n",
359 |         "\n",
360 |         "# Upload the final data\n",
361 |         "my_index.upsert(vectors = data_to_upsert)\n",
362 |         "\n",
363 |         "# Show information about the vector index\n",
364 |         "my_index.describe_index_stats()"
365 |       ]
366 |     },
367 |     {
368 |       "cell_type": "code",
369 |       "execution_count": null,
370 |       "metadata": {
371 |         "id": "dTAJpC5JOte6"
372 |       },
373 |       "outputs": [],
374 |       "source": [
375 |         "N = 2\n",
376 |         "my_query_embedding = transcription_df.embedding[0]\n",
377 |         "\n",
378 |         "# Run the Query Search\n",
379 |         "my_index.query(my_query_embedding, top_k=N, include_metadata=True)"
380 |       ]
381 |     }
382 |   ],
383 |   "metadata": {
384 |     "accelerator": "GPU",
385 |     "colab": {
386 |       "provenance": []
387 |     },
388 |     "kernelspec": {
389 |       "display_name": "Python 3",
390 |       "name": "python3"
391 |     },
392 |     "language_info": {
393 |       "name": "python"
394 |     }
395 |   },
396 |   "nbformat": 4,
397 |   "nbformat_minor": 0
398 | }


--------------------------------------------------------------------------------
/YoutubeComments.csv:
--------------------------------------------------------------------------------
 1 | Comments
 2 | Super explanations so good
 3 | Good
 4 | Thankyou!!
 5 | Excellent explanation...
 6 | Nice
 7 | Wowwww
 8 | It was very clear and useful ..... 
 9 | Very helpful and easy to understand
10 | Super video bhai
11 | Thank you so much bro. Excellent explanation. Absolutely superb. keep it up
12 | ek number explanation
13 | I like your teaching skill and use 3 paper for each concept if any help for YouTube views then send me mail
14 | Thanking you sir
15 | Love you thinks bor 
16 | you saved my semester thnx :D
17 | good job bro.....
18 | Ek number explanation
19 | Wrong
20 | "Very raw , need improvement"
21 | The best ever seen
22 | This just too good
23 | Wow you are the best
24 | Simple and elegant explanation
25 | Grt explanation
26 | nice
27 | good job bro.....
28 | Very good
29 | Too grt
30 | Nice video
31 | Keept it up
32 | 


--------------------------------------------------------------------------------
/face-mask-detector-project.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaytheau/Data-Science/e5a1a3510f9afa9e04c9809f12e513cb6eec9188/face-mask-detector-project.zip


--------------------------------------------------------------------------------
/medspacydemo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "7c58cc64",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "<b>Clinical NLP</b> is a specialization of NLP that allows computers to understand the rich meaning that lies behind a doctor’s written analysis of a patient.\n",
  9 |     "\n",
 10 |     "Normal NLP engines use large corpora of text, usually books or other written documents, to determine how language is structured and how grammar is formed."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "ca8b9bde",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "<b>Entity extraction</b>: to surface relevant clinical concepts from unstructured data.\n",
 19 |     "\n",
 20 |     "<b>Contextualization</b>: to decipher the doctor’s meaning when they mention a concept. For example, when doctors deny a patient has a condition or talk about a patient’s history.\n",
 21 |     "\n",
 22 |     "<b>Knowledge graph</b>: to understand how clinical concepts are interrelated, like the fact that both fentanyl and hydrocodone are opiates."
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "id": "2433023c",
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "name": "stdout",
 33 |      "output_type": "stream",
 34 |      "text": [
 35 |       "Collecting medspacy\n",
 36 |       "  Using cached medspacy-1.0.0-py3-none-any.whl (146 kB)\n",
 37 |       "Requirement already satisfied: jsonschema in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from medspacy) (4.16.0)\n",
 38 |       "Collecting PyRuSH>=1.0.6\n",
 39 |       "  Using cached PyRuSH-1.0.8-cp39-cp39-macosx_10_9_x86_64.whl (63 kB)\n",
 40 |       "Collecting pysbd==0.3.4\n",
 41 |       "  Using cached pysbd-0.3.4-py3-none-any.whl (71 kB)\n",
 42 |       "Collecting medspacy-quickumls==2.6\n",
 43 |       "  Using cached medspacy_quickumls-2.6-py3-none-any.whl (31 kB)\n",
 44 |       "Collecting spacy>=3.4.1\n",
 45 |       "  Using cached spacy-3.4.2-cp39-cp39-macosx_10_9_x86_64.whl (6.7 MB)\n",
 46 |       "Requirement already satisfied: numpy>=1.8.2 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from medspacy-quickumls==2.6->medspacy) (1.21.5)\n",
 47 |       "Collecting leveldb>=0.193\n",
 48 |       "  Using cached leveldb-0.201.tar.gz (236 kB)\n",
 49 |       "  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
 50 |       "\u001b[?25hRequirement already satisfied: pytest>=6 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from medspacy-quickumls==2.6->medspacy) (7.1.2)\n",
 51 |       "Requirement already satisfied: nltk>=3.3 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from medspacy-quickumls==2.6->medspacy) (3.7)\n",
 52 |       "Collecting unqlite>=0.8.1\n",
 53 |       "  Using cached unqlite-0.9.3.tar.gz (575 kB)\n",
 54 |       "  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
 55 |       "\u001b[?25hRequirement already satisfied: unidecode>=0.4.19 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from medspacy-quickumls==2.6->medspacy) (1.2.0)\n",
 56 |       "Requirement already satisfied: six in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from medspacy-quickumls==2.6->medspacy) (1.16.0)\n",
 57 |       "Collecting medspacy-simstring>=2.1\n",
 58 |       "  Using cached medspacy_simstring-2.1.tar.gz (68 kB)\n",
 59 |       "  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
 60 |       "\u001b[?25hRequirement already satisfied: Cython<3.0,>=0.25 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from PyRuSH>=1.0.6->medspacy) (0.29.32)\n",
 61 |       "Collecting PyFastNER>=1.0.8\n",
 62 |       "  Using cached PyFastNER-1.0.8-py3-none-any.whl (18 kB)\n",
 63 |       "Requirement already satisfied: setuptools in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from PyRuSH>=1.0.6->medspacy) (63.4.1)\n",
 64 |       "Collecting quicksectx>=0.3.5\n",
 65 |       "  Using cached quicksectx-0.3.5-cp39-cp39-macosx_10_9_x86_64.whl (283 kB)\n",
 66 |       "Collecting thinc<8.2.0,>=8.1.0\n",
 67 |       "  Using cached thinc-8.1.5-cp39-cp39-macosx_10_9_x86_64.whl (768 kB)\n",
 68 |       "Collecting cymem<2.1.0,>=2.0.2\n",
 69 |       "  Using cached cymem-2.0.7-cp39-cp39-macosx_10_9_x86_64.whl (32 kB)\n",
 70 |       "Collecting typer<0.5.0,>=0.3.0\n",
 71 |       "  Using cached typer-0.4.2-py3-none-any.whl (27 kB)\n",
 72 |       "Collecting spacy-legacy<3.1.0,>=3.0.10\n",
 73 |       "  Using cached spacy_legacy-3.0.10-py2.py3-none-any.whl (21 kB)\n",
 74 |       "Collecting langcodes<4.0.0,>=3.2.0\n",
 75 |       "  Using cached langcodes-3.3.0-py3-none-any.whl (181 kB)\n",
 76 |       "Collecting catalogue<2.1.0,>=2.0.6\n",
 77 |       "  Using cached catalogue-2.0.8-py3-none-any.whl (17 kB)\n",
 78 |       "Requirement already satisfied: wasabi<1.1.0,>=0.9.1 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from spacy>=3.4.1->medspacy) (0.10.1)\n",
 79 |       "Requirement already satisfied: jinja2 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from spacy>=3.4.1->medspacy) (2.11.3)\n",
 80 |       "Collecting srsly<3.0.0,>=2.4.3\n",
 81 |       "  Using cached srsly-2.4.5-cp39-cp39-macosx_10_9_x86_64.whl (491 kB)\n",
 82 |       "Collecting pathy>=0.3.5\n",
 83 |       "  Using cached pathy-0.6.2-py3-none-any.whl (42 kB)\n",
 84 |       "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from spacy>=3.4.1->medspacy) (4.64.1)\n",
 85 |       "Collecting spacy-loggers<2.0.0,>=1.0.0\n",
 86 |       "  Using cached spacy_loggers-1.0.3-py3-none-any.whl (9.3 kB)\n",
 87 |       "Collecting pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4\n",
 88 |       "  Using cached pydantic-1.10.2-cp39-cp39-macosx_10_9_x86_64.whl (3.2 MB)\n",
 89 |       "Collecting murmurhash<1.1.0,>=0.28.0\n",
 90 |       "  Using cached murmurhash-1.0.9-cp39-cp39-macosx_10_9_x86_64.whl (18 kB)\n",
 91 |       "Collecting preshed<3.1.0,>=3.0.2\n",
 92 |       "  Using cached preshed-3.0.8-cp39-cp39-macosx_10_9_x86_64.whl (107 kB)\n",
 93 |       "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from spacy>=3.4.1->medspacy) (2.28.1)\n",
 94 |       "Requirement already satisfied: packaging>=20.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from spacy>=3.4.1->medspacy) (21.3)\n",
 95 |       "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from jsonschema->medspacy) (0.18.0)\n",
 96 |       "Requirement already satisfied: attrs>=17.4.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from jsonschema->medspacy) (21.4.0)\n",
 97 |       "Requirement already satisfied: regex>=2021.8.3 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from nltk>=3.3->medspacy-quickumls==2.6->medspacy) (2022.7.9)\n",
 98 |       "Requirement already satisfied: joblib in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from nltk>=3.3->medspacy-quickumls==2.6->medspacy) (1.1.0)\n",
 99 |       "Requirement already satisfied: click in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from nltk>=3.3->medspacy-quickumls==2.6->medspacy) (8.0.4)\n",
100 |       "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from packaging>=20.0->spacy>=3.4.1->medspacy) (3.0.9)\n",
101 |       "Requirement already satisfied: smart-open<6.0.0,>=5.2.1 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from pathy>=0.3.5->spacy>=3.4.1->medspacy) (5.2.1)\n",
102 |       "Requirement already satisfied: typing-extensions>=4.1.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4->spacy>=3.4.1->medspacy) (4.3.0)\n",
103 |       "Requirement already satisfied: iniconfig in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from pytest>=6->medspacy-quickumls==2.6->medspacy) (1.1.1)\n",
104 |       "Requirement already satisfied: pluggy<2.0,>=0.12 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from pytest>=6->medspacy-quickumls==2.6->medspacy) (1.0.0)\n",
105 |       "Requirement already satisfied: py>=1.8.2 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from pytest>=6->medspacy-quickumls==2.6->medspacy) (1.11.0)\n",
106 |       "Requirement already satisfied: tomli>=1.0.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from pytest>=6->medspacy-quickumls==2.6->medspacy) (2.0.1)\n",
107 |       "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy>=3.4.1->medspacy) (1.26.11)\n",
108 |       "Requirement already satisfied: idna<4,>=2.5 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy>=3.4.1->medspacy) (3.3)\n",
109 |       "Requirement already satisfied: charset-normalizer<3,>=2 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy>=3.4.1->medspacy) (2.0.4)\n",
110 |       "Requirement already satisfied: certifi>=2017.4.17 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy>=3.4.1->medspacy) (2022.9.24)\n",
111 |       "Collecting confection<1.0.0,>=0.0.1\n",
112 |       "  Using cached confection-0.0.3-py3-none-any.whl (32 kB)\n",
113 |       "Collecting blis<0.8.0,>=0.7.8\n",
114 |       "  Using cached blis-0.7.9-cp39-cp39-macosx_10_9_x86_64.whl (6.1 MB)\n",
115 |       "Requirement already satisfied: MarkupSafe>=0.23 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from jinja2->spacy>=3.4.1->medspacy) (2.0.1)\n",
116 |       "Building wheels for collected packages: leveldb, medspacy-simstring, unqlite\n",
117 |       "  Building wheel for leveldb (setup.py) ... \u001b[?25ldone\n",
118 |       "\u001b[?25h  Created wheel for leveldb: filename=leveldb-0.201-cp39-cp39-macosx_10_9_x86_64.whl size=159743 sha256=4ceb82328451b65c3a130b5ff48412bffa38a6a02949d6190066279643d4f0bd\n",
119 |       "  Stored in directory: /Users/ark/Library/Caches/pip/wheels/c7/5d/9f/043268a081577de68513e3a0c1919162406abd711f58962e24\n",
120 |       "  Building wheel for medspacy-simstring (setup.py) ... \u001b[?25ldone\n",
121 |       "\u001b[?25h  Created wheel for medspacy-simstring: filename=medspacy_simstring-2.1-cp39-cp39-macosx_10_9_x86_64.whl size=100820 sha256=de56fe7f8c0a1f0b2d189e3b64eacbb7449d609a85e19d5031a327b00a57ad5e\n",
122 |       "  Stored in directory: /Users/ark/Library/Caches/pip/wheels/99/f4/d1/233bc6c0bd14c4102a06bf21fc03a116e9d830db4f6595d109\n",
123 |       "  Building wheel for unqlite (setup.py) ... \u001b[?25ldone\n",
124 |       "\u001b[?25h  Created wheel for unqlite: filename=unqlite-0.9.3-cp39-cp39-macosx_10_9_x86_64.whl size=323498 sha256=054aff9da5bf5825522d7e518beff4d659c11fb3ea7166b487828d7bf1b1748f\n",
125 |       "  Stored in directory: /Users/ark/Library/Caches/pip/wheels/cd/9c/52/fb16aba35ce11954e9742a1f1b7dfa5e878dd94c198d3cf254\n",
126 |       "Successfully built leveldb medspacy-simstring unqlite\n"
127 |      ]
128 |     },
129 |     {
130 |      "name": "stdout",
131 |      "output_type": "stream",
132 |      "text": [
133 |       "Installing collected packages: medspacy-simstring, leveldb, cymem, unqlite, typer, spacy-loggers, spacy-legacy, quicksectx, pysbd, pydantic, murmurhash, langcodes, catalogue, blis, srsly, PyFastNER, preshed, pathy, confection, thinc, spacy, PyRuSH, medspacy-quickumls, medspacy\n",
134 |       "Successfully installed PyFastNER-1.0.8 PyRuSH-1.0.8 blis-0.7.9 catalogue-2.0.8 confection-0.0.3 cymem-2.0.7 langcodes-3.3.0 leveldb-0.201 medspacy-1.0.0 medspacy-quickumls-2.6 medspacy-simstring-2.1 murmurhash-1.0.9 pathy-0.6.2 preshed-3.0.8 pydantic-1.10.2 pysbd-0.3.4 quicksectx-0.3.5 spacy-3.4.2 spacy-legacy-3.0.10 spacy-loggers-1.0.3 srsly-2.4.5 thinc-8.1.5 typer-0.4.2 unqlite-0.9.3\n"
135 |      ]
136 |     }
137 |    ],
138 |    "source": [
139 |     "! pip install medspacy"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 3,
145 |    "id": "060c8922",
146 |    "metadata": {},
147 |    "outputs": [
148 |     {
149 |      "name": "stdout",
150 |      "output_type": "stream",
151 |      "text": [
152 |       "['medspacy_pyrush', 'medspacy_target_matcher', 'medspacy_context']\n"
153 |      ]
154 |     },
155 |     {
156 |      "data": {
157 |       "text/html": [
158 |        "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\"></br>Past Medical History:</br>1. \n",
159 |        "<mark class=\"entity\" style=\"background: #2ca02c; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
160 |        "    Atrial fibrillation\n",
161 |        "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PROBLEM</span>\n",
162 |        "</mark>\n",
163 |        "</br>2. \n",
164 |        "<mark class=\"entity\" style=\"background: #2ca02c; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
165 |        "    Type II Diabetes Mellitus\n",
166 |        "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PROBLEM</span>\n",
167 |        "</mark>\n",
168 |        "</br></br>Assessment and Plan:</br>There is \n",
169 |        "<mark class=\"entity\" style=\"background: #ff7f0e; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
170 |        "    no evidence of\n",
171 |        "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">NEGATED_EXISTENCE</span>\n",
172 |        "</mark>\n",
173 |        " \n",
174 |        "<mark class=\"entity\" style=\"background: #2ca02c; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
175 |        "    pneumonia\n",
176 |        "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PROBLEM</span>\n",
177 |        "</mark>\n",
178 |        ". Continue \n",
179 |        "<mark class=\"entity\" style=\"background: #1f77b4; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
180 |        "    warfarin\n",
181 |        "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">MEDICATION</span>\n",
182 |        "</mark>\n",
183 |        " for \n",
184 |        "<mark class=\"entity\" style=\"background: #2ca02c; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
185 |        "    Afib\n",
186 |        "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PROBLEM</span>\n",
187 |        "</mark>\n",
188 |        ". Follow up for management of \n",
189 |        "<mark class=\"entity\" style=\"background: #2ca02c; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
190 |        "    type 2 DM\n",
191 |        "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PROBLEM</span>\n",
192 |        "</mark>\n",
193 |        ".</br></div></span>"
194 |       ],
195 |       "text/plain": [
196 |        "<IPython.core.display.HTML object>"
197 |       ]
198 |      },
199 |      "metadata": {},
200 |      "output_type": "display_data"
201 |     }
202 |    ],
203 |    "source": [
204 |     "import medspacy\n",
205 |     "from medspacy.ner import TargetRule\n",
206 |     "from medspacy.visualization import visualize_ent\n",
207 |     "\n",
208 |     "nlp = medspacy.load()\n",
209 |     "print(nlp.pipe_names)\n",
210 |     "\n",
211 |     "text = \"\"\"\n",
212 |     "Past Medical History:\n",
213 |     "1. Atrial fibrillation\n",
214 |     "2. Type II Diabetes Mellitus\n",
215 |     "\n",
216 |     "Assessment and Plan:\n",
217 |     "There is no evidence of pneumonia. Continue warfarin for Afib. Follow up for management of type 2 DM.\n",
218 |     "\"\"\"\n",
219 |     "\n",
220 |     "target_matcher = nlp.get_pipe(\"medspacy_target_matcher\")\n",
221 |     "target_rules = [\n",
222 |     "    TargetRule(\"atrial fibrillation\", \"PROBLEM\"),\n",
223 |     "    TargetRule(\"atrial fibrillation\", \"PROBLEM\", pattern=[{\"LOWER\": \"afib\"}]),\n",
224 |     "    TargetRule(\"pneumonia\", \"PROBLEM\"),\n",
225 |     "    TargetRule(\"Type II Diabetes Mellitus\", \"PROBLEM\", \n",
226 |     "              pattern=[\n",
227 |     "                  {\"LOWER\": \"type\"},\n",
228 |     "                  {\"LOWER\": {\"IN\": [\"2\", \"ii\", \"two\"]}},\n",
229 |     "                  {\"LOWER\": {\"IN\": [\"dm\", \"diabetes\"]}},\n",
230 |     "                  {\"LOWER\": \"mellitus\", \"OP\": \"?\"}\n",
231 |     "              ]),\n",
232 |     "    TargetRule(\"warfarin\", \"MEDICATION\")\n",
233 |     "]\n",
234 |     "target_matcher.add(target_rules)\n",
235 |     "\n",
236 |     "doc = nlp(text)\n",
237 |     "visualize_ent(doc)"
238 |    ]
239 |   }
240 |  ],
241 |  "metadata": {
242 |   "kernelspec": {
243 |    "display_name": "Python 3 (ipykernel)",
244 |    "language": "python",
245 |    "name": "python3"
246 |   },
247 |   "language_info": {
248 |    "codemirror_mode": {
249 |     "name": "ipython",
250 |     "version": 3
251 |    },
252 |    "file_extension": ".py",
253 |    "mimetype": "text/x-python",
254 |    "name": "python",
255 |    "nbconvert_exporter": "python",
256 |    "pygments_lexer": "ipython3",
257 |    "version": "3.9.13"
258 |   }
259 |  },
260 |  "nbformat": 4,
261 |  "nbformat_minor": 5
262 | }
263 | 


--------------------------------------------------------------------------------
/sentimentanalysis_usingbert.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """SentimentAnalysis usingBERT.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1Und9XiLKRld1ELczxS1Ht-no-c70kYsh
 8 | """
 9 | 
10 | from transformers import pipeline
11 | st = f"What a great guy he is good and awesome."
12 | seq = pipeline(task="text-classification", model='nlptown/bert-base-multilingual-uncased-sentiment')
13 | print(f"Result: { seq(st) }")
14 | 
15 | ! pip install transformers


--------------------------------------------------------------------------------
/test script.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | from keras.models import load_model
 4 | model=load_model("model2-001.h5")
 5 | results={0:'without mask',1:'mask'}
 6 | GR_dict={0:(0,0,255),1:(0,255,0)}
 7 | rect_size = 4
 8 | cap = cv2.VideoCapture(0) 
 9 | haarcascade = cv2.CascadeClassifier('/home/akshay/.local/lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_default.xml')
10 | 
11 | def main():
12 | 	while True:
13 | 	    (rval, im) = cap.read()
14 | 	    im=cv2.flip(im,1,1) 
15 | 	    
16 | 	    rerect_size = cv2.resize(im, (im.shape[1] // rect_size, im.shape[0] // rect_size))
17 | 	    faces = haarcascade.detectMultiScale(rerect_size)
18 | 	    for f in faces:
19 | 		(x, y, w, h) = [v * rect_size for v in f] 
20 | 		
21 | 		face_img = im[y:y+h, x:x+w]
22 | 		rerect_sized=cv2.resize(face_img,(150,150))
23 | 		normalized=rerect_sized/255.0
24 | 		reshaped=np.reshape(normalized,(1,150,150,3))
25 | 		reshaped = np.vstack([reshaped])
26 | 		result=model.predict(reshaped)
27 | 		
28 | 		label=np.argmax(result,axis=1)[0]
29 | 	      
30 | 		cv2.rectangle(im,(x,y),(x+w,y+h),GR_dict[label],2)
31 | 		cv2.rectangle(im,(x,y-40),(x+w,y),GR_dict[label],-1)
32 | 		cv2.putText(im, results[label], (x, y-10),cv2.FONT_HERSHEY_SIMPLEX,0.8,(255,255,255),2)
33 | 	    cv2.imshow('LIVE',   im)
34 | 	    key = cv2.waitKey(10)
35 | 	    
36 | 	    if key == 27: 
37 | 		break
38 | 	cap.release()
39 | 	cv2.destroyAllWindows()
40 | 
41 | if __name__ == __main__:
42 | 	main()
43 | 


--------------------------------------------------------------------------------
/text_summarization.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Text Summarization.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1uxDowTb9Au_kgXioUxUs8E19ZjD4phkZ
 8 | """
 9 | 
10 | ! pip install transformers
11 | 
12 | from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig
13 | model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
14 | 
15 | tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
16 | 
17 | ARTICLE_TO_SUMMARIZE = "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."
18 | 
19 | inputs = tokenizer([ARTICLE_TO_SUMMARIZE], return_tensors='pt')
20 | 
21 | summary_ids = model.generate(inputs['input_ids'], max_length=500, early_stopping=False)
22 | 
23 | summary_ids
24 | 
25 | print([tokenizer.decode(g, skip_special_tokens=True) for g in summary_ids])
26 | 
27 | 


--------------------------------------------------------------------------------