├── 20_08_langchain.ipynb ├── AQI_india.ipynb ├── ActivePassive.py ├── Aspect_based_sentiment_analysis.ipynb ├── Automatic EDA.ipynb ├── BERTspamfilter.ipynb ├── BM25_ensemble_retriever.ipynb ├── Basic Chatbot.ipynb ├── Bertopic.ipynb ├── Building_an_auto_correct_in_python.ipynb ├── COVIDdetectionusingXray.py ├── Cartoonify using python.ipynb ├── CasualToFormalConverter.py ├── Clustering.ipynb ├── Comparing different language detector.ipynb ├── DataPrep.ipynb ├── Describe_alternative.ipynb ├── Detect Binod.ipynb ├── DiabetesClassificationUsingNeuralNetwork.py ├── Dummy_variable_trap.ipynb ├── EntityExtraction.py ├── Faker.ipynb ├── Fruit_detection_using_CNNs.ipynb ├── Gensim introduction hindi.ipynb ├── Grammarchecker.py ├── HaarCascade.py ├── IPLdataAnalysis.ipynb ├── Information_retrieval_Fact_extractors_python.ipynb ├── Kepler-delete.ipynb ├── LazyPredict.ipynb ├── Lux.ipynb ├── MLDC .ipynb ├── Multi class classification using Machine Learning.ipynb ├── OCR .ipynb ├── OCR.ipynb ├── Object_detection_using_detecto.ipynb ├── PaliGemma.ipynb ├── PassiveActive.py ├── Performance Analyzer.ipynb ├── Pivot table in pandas.ipynb ├── RAG_fusion.ipynb ├── README.md ├── Readability.ipynb ├── SMSSpamCollection ├── Semantic_search.ipynb ├── Sentiment Analysis using VADER.ipynb ├── Sentiment_Analysis_using_Distilbert.ipynb ├── Sigmoid_overflow_problem.ipynb ├── Speechtotext.ipynb ├── Stanza library.ipynb ├── Stopwords.ipynb ├── TSNE demo.ipynb ├── Topic modelling using Gensim.ipynb ├── Twitter API POC.ipynb ├── Whisper.pptx ├── WordCloud.ipynb ├── YouTube_recommendation_pinecone.ipynb ├── YoutubeComments.csv ├── cuisine_data.csv ├── d3blocks.ipynb ├── diabetes.csv ├── face-mask-detector-project.zip ├── langgraph_simple_chatbot.ipynb ├── medspacydemo.ipynb ├── segmind_ssd.ipynb ├── sentimentanalysis_usingbert.py ├── stable_diffusion_with_chatgpt_noteook.ipynb ├── test script.py ├── test.csv ├── text_summarization.py ├── titanic_processed_data.csv └── train.csv /ActivePassive.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | from styleformer import Styleformer 4 | import torch 5 | sf = Styleformer(style = 2) 6 | st.title('Active Voice to Passive Voice Converter') 7 | st.write("Please enter your sentence in active voice") 8 | text = st.text_input('Entered Text') 9 | if st.button('Convert Active to Passive'): 10 | target_sentence = sf.transfer(text) 11 | st.write(target_sentence) 12 | else: 13 | pass 14 | 15 | 16 | -------------------------------------------------------------------------------- /Automatic EDA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "from pandas_profiling import ProfileReport\n", 12 | "import sklearn" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "from sklearn.datasets import load_iris" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "diab_data=load_iris()" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 4, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "df=pd.DataFrame(data=diab_data.data,columns=diab_data.feature_names)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 5, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "data": { 49 | "text/html": [ 50 | "
\n", 51 | "\n", 64 | "\n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
\n", 112 | "
" 113 | ], 114 | "text/plain": [ 115 | " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)\n", 116 | "0 5.1 3.5 1.4 0.2\n", 117 | "1 4.9 3.0 1.4 0.2\n", 118 | "2 4.7 3.2 1.3 0.2\n", 119 | "3 4.6 3.1 1.5 0.2\n", 120 | "4 5.0 3.6 1.4 0.2" 121 | ] 122 | }, 123 | "execution_count": 5, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "df.head()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 6, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/plain": [ 140 | "Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',\n", 141 | " 'petal width (cm)'],\n", 142 | " dtype='object')" 143 | ] 144 | }, 145 | "execution_count": 6, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "df.columns" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 7, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 8, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "application/vnd.jupyter.widget-view+json": { 171 | "model_id": "cb20f4fe95354e03818ef76f2c1056d4", 172 | "version_major": 2, 173 | "version_minor": 0 174 | }, 175 | "text/plain": [ 176 | "HBox(children=(FloatProgress(value=0.0, description='Summarize dataset', max=18.0, style=ProgressStyle(descrip…" 177 | ] 178 | }, 179 | "metadata": {}, 180 | "output_type": "display_data" 181 | }, 182 | { 183 | "name": "stdout", 184 | "output_type": "stream", 185 | "text": [ 186 | "\n" 187 | ] 188 | }, 189 | { 190 | "data": { 191 | "application/vnd.jupyter.widget-view+json": { 192 | "model_id": "7b5a0242c1fa474fb7355dc84e048725", 193 | "version_major": 2, 194 | "version_minor": 0 195 | }, 196 | "text/plain": [ 197 | "HBox(children=(FloatProgress(value=0.0, description='Generate report structure', max=1.0, style=ProgressStyle(…" 198 | ] 199 | }, 200 | "metadata": {}, 201 | "output_type": "display_data" 202 | }, 203 | { 204 | "name": "stdout", 205 | "output_type": "stream", 206 | "text": [ 207 | "\n" 208 | ] 209 | }, 210 | { 211 | "data": { 212 | "application/vnd.jupyter.widget-view+json": { 213 | "model_id": "", 214 | "version_major": 2, 215 | "version_minor": 0 216 | }, 217 | "text/plain": [ 218 | "HBox(children=(FloatProgress(value=0.0, description='Render widgets', max=1.0, style=ProgressStyle(description…" 219 | ] 220 | }, 221 | "metadata": {}, 222 | "output_type": "display_data" 223 | }, 224 | { 225 | "data": { 226 | "application/vnd.jupyter.widget-view+json": { 227 | "model_id": "cbf1a7f8914540c9a3233999b14f6372", 228 | "version_major": 2, 229 | "version_minor": 0 230 | }, 231 | "text/plain": [ 232 | "VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…" 233 | ] 234 | }, 235 | "metadata": {}, 236 | "output_type": "display_data" 237 | } 238 | ], 239 | "source": [ 240 | "profile.to_widgets()" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 9, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "application/vnd.jupyter.widget-view+json": { 251 | "model_id": "ce9fd83849f74df6a15877cde281cad9", 252 | "version_major": 2, 253 | "version_minor": 0 254 | }, 255 | "text/plain": [ 256 | "HBox(children=(FloatProgress(value=0.0, description='Render HTML', max=1.0, style=ProgressStyle(description_wi…" 257 | ] 258 | }, 259 | "metadata": {}, 260 | "output_type": "display_data" 261 | }, 262 | { 263 | "name": "stdout", 264 | "output_type": "stream", 265 | "text": [ 266 | "\n" 267 | ] 268 | }, 269 | { 270 | "data": { 271 | "application/vnd.jupyter.widget-view+json": { 272 | "model_id": "43e7ed70e39b41f38e76bbade3ef75d1", 273 | "version_major": 2, 274 | "version_minor": 0 275 | }, 276 | "text/plain": [ 277 | "HBox(children=(FloatProgress(value=0.0, description='Export report to file', max=1.0, style=ProgressStyle(desc…" 278 | ] 279 | }, 280 | "metadata": {}, 281 | "output_type": "display_data" 282 | }, 283 | { 284 | "name": "stdout", 285 | "output_type": "stream", 286 | "text": [ 287 | "\n" 288 | ] 289 | } 290 | ], 291 | "source": [ 292 | "profile.to_file(\"Output2.html\")" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 19, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "import sweetviz,pandas as pd" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 20, 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [ 310 | "data = pd.read_csv('titanic.csv',sep = '\\t')" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 21, 316 | "metadata": {}, 317 | "outputs": [ 318 | { 319 | "data": { 320 | "text/html": [ 321 | "
\n", 322 | "\n", 335 | "\n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
.......................................
15115211Pears, Mrs. Thomas (Edith Wearne)female22.01011377666.6000C2S
15215303Meo, Mr. Alfonzomale55.500A.5. 112068.0500NaNS
15315403van Billiard, Mr. Austin Blylermale40.502A/5. 85114.5000NaNS
15415503Olsen, Mr. Ole MartinmaleNaN00Fa 2653027.3125NaNS
15515601Williams, Mr. Charles Duanemale51.001PC 1759761.3792NaNC
\n", 521 | "

156 rows × 12 columns

\n", 522 | "
" 523 | ], 524 | "text/plain": [ 525 | " PassengerId Survived Pclass \\\n", 526 | "0 1 0 3 \n", 527 | "1 2 1 1 \n", 528 | "2 3 1 3 \n", 529 | "3 4 1 1 \n", 530 | "4 5 0 3 \n", 531 | ".. ... ... ... \n", 532 | "151 152 1 1 \n", 533 | "152 153 0 3 \n", 534 | "153 154 0 3 \n", 535 | "154 155 0 3 \n", 536 | "155 156 0 1 \n", 537 | "\n", 538 | " Name Sex Age SibSp \\\n", 539 | "0 Braund, Mr. Owen Harris male 22.0 1 \n", 540 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", 541 | "2 Heikkinen, Miss. Laina female 26.0 0 \n", 542 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", 543 | "4 Allen, Mr. William Henry male 35.0 0 \n", 544 | ".. ... ... ... ... \n", 545 | "151 Pears, Mrs. Thomas (Edith Wearne) female 22.0 1 \n", 546 | "152 Meo, Mr. Alfonzo male 55.5 0 \n", 547 | "153 van Billiard, Mr. Austin Blyler male 40.5 0 \n", 548 | "154 Olsen, Mr. Ole Martin male NaN 0 \n", 549 | "155 Williams, Mr. Charles Duane male 51.0 0 \n", 550 | "\n", 551 | " Parch Ticket Fare Cabin Embarked \n", 552 | "0 0 A/5 21171 7.2500 NaN S \n", 553 | "1 0 PC 17599 71.2833 C85 C \n", 554 | "2 0 STON/O2. 3101282 7.9250 NaN S \n", 555 | "3 0 113803 53.1000 C123 S \n", 556 | "4 0 373450 8.0500 NaN S \n", 557 | ".. ... ... ... ... ... \n", 558 | "151 0 113776 66.6000 C2 S \n", 559 | "152 0 A.5. 11206 8.0500 NaN S \n", 560 | "153 2 A/5. 851 14.5000 NaN S \n", 561 | "154 0 Fa 265302 7.3125 NaN S \n", 562 | "155 1 PC 17597 61.3792 NaN C \n", 563 | "\n", 564 | "[156 rows x 12 columns]" 565 | ] 566 | }, 567 | "execution_count": 21, 568 | "metadata": {}, 569 | "output_type": "execute_result" 570 | } 571 | ], 572 | "source": [ 573 | "data" 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": 22, 579 | "metadata": {}, 580 | "outputs": [ 581 | { 582 | "name": "stderr", 583 | "output_type": "stream", 584 | "text": [ 585 | ":FEATURES DONE: |█████████████████████| [100%] 00:04 -> (00:00 left)\n", 586 | ":PAIRWISE DONE: |█████████████████████| [100%] 00:00 -> (00:00 left)\n" 587 | ] 588 | }, 589 | { 590 | "name": "stdout", 591 | "output_type": "stream", 592 | "text": [ 593 | "Creating Associations graph... DONE!\n" 594 | ] 595 | } 596 | ], 597 | "source": [ 598 | "my_report = sweetviz.analyze([data, \"Data\"],target_feat='Survived')" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": 23, 604 | "metadata": {}, 605 | "outputs": [], 606 | "source": [ 607 | "my_report.show_html('Report.html')" 608 | ] 609 | } 610 | ], 611 | "metadata": { 612 | "kernelspec": { 613 | "display_name": "Python 3", 614 | "language": "python", 615 | "name": "python3" 616 | }, 617 | "language_info": { 618 | "codemirror_mode": { 619 | "name": "ipython", 620 | "version": 3 621 | }, 622 | "file_extension": ".py", 623 | "mimetype": "text/x-python", 624 | "name": "python", 625 | "nbconvert_exporter": "python", 626 | "pygments_lexer": "ipython3", 627 | "version": "3.8.1" 628 | } 629 | }, 630 | "nbformat": 4, 631 | "nbformat_minor": 4 632 | } 633 | -------------------------------------------------------------------------------- /BERTspamfilter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "BERTspamfilter.ipynb", 7 | "provenance": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "code", 20 | "metadata": { 21 | "colab": { 22 | "base_uri": "https://localhost:8080/" 23 | }, 24 | "id": "SXBDflH0yEBW", 25 | "outputId": "7a54ff87-c16e-43d3-d050-8e16adb81090" 26 | }, 27 | "source": [ 28 | "!pip install transformers" 29 | ], 30 | "execution_count": 1, 31 | "outputs": [ 32 | { 33 | "output_type": "stream", 34 | "text": [ 35 | "Collecting transformers\n", 36 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/b0/9e/5b80becd952d5f7250eaf8fc64b957077b12ccfe73e9c03d37146ab29712/transformers-4.6.0-py3-none-any.whl (2.3MB)\n", 37 | "\u001b[K |████████████████████████████████| 2.3MB 25.2MB/s \n", 38 | "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)\n", 39 | "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.0.12)\n", 40 | "Collecting sacremoses\n", 41 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)\n", 42 | "\u001b[K |████████████████████████████████| 901kB 41.3MB/s \n", 43 | "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.41.1)\n", 44 | "Collecting tokenizers<0.11,>=0.10.1\n", 45 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/ae/04/5b870f26a858552025a62f1649c20d29d2672c02ff3c3fb4c688ca46467a/tokenizers-0.10.2-cp37-cp37m-manylinux2010_x86_64.whl (3.3MB)\n", 46 | "\u001b[K |████████████████████████████████| 3.3MB 39.7MB/s \n", 47 | "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", 48 | "Collecting huggingface-hub==0.0.8\n", 49 | " Downloading https://files.pythonhosted.org/packages/a1/88/7b1e45720ecf59c6c6737ff332f41c955963090a18e72acbcbeac6b25e86/huggingface_hub-0.0.8-py3-none-any.whl\n", 50 | "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n", 51 | "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers) (20.9)\n", 52 | "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from transformers) (4.0.1)\n", 53 | "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n", 54 | "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.0.1)\n", 55 | "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (8.0.0)\n", 56 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n", 57 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n", 58 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2020.12.5)\n", 59 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n", 60 | "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers) (2.4.7)\n", 61 | "Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.7.4.3)\n", 62 | "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.4.1)\n", 63 | "Installing collected packages: sacremoses, tokenizers, huggingface-hub, transformers\n", 64 | "Successfully installed huggingface-hub-0.0.8 sacremoses-0.0.45 tokenizers-0.10.2 transformers-4.6.0\n" 65 | ], 66 | "name": "stdout" 67 | } 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "metadata": { 73 | "id": "-lI76wIlypxM" 74 | }, 75 | "source": [ 76 | "import numpy as np\n", 77 | "import pandas as pd\n", 78 | "from sklearn.model_selection import train_test_split\n", 79 | "import torch\n", 80 | "import transformers as tf\n", 81 | "import warnings\n", 82 | "warnings.filterwarnings('ignore')" 83 | ], 84 | "execution_count": 33, 85 | "outputs": [] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "metadata": { 90 | "colab": { 91 | "base_uri": "https://localhost:8080/", 92 | "height": 206 93 | }, 94 | "id": "SUaQNE0tyvA-", 95 | "outputId": "c43fd1b2-577f-4a86-a38b-fbe50dbb0c82" 96 | }, 97 | "source": [ 98 | "import pandas as pd\n", 99 | "# Dataset from - https://archive.ics.uci.edu/ml/datasets/SMS+Spam+Collection\n", 100 | "data_sms= pd.read_table('sample_data/SMSSpamCollection',\n", 101 | " sep='\\t',\n", 102 | " header=None,\n", 103 | " names=['label', 'sms'])\n", 104 | "\n", 105 | "data_sms.head()" 106 | ], 107 | "execution_count": 3, 108 | "outputs": [ 109 | { 110 | "output_type": "execute_result", 111 | "data": { 112 | "text/html": [ 113 | "
\n", 114 | "\n", 127 | "\n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | "
labelsms
0hamGo until jurong point, crazy.. Available only ...
1hamOk lar... Joking wif u oni...
2spamFree entry in 2 a wkly comp to win FA Cup fina...
3hamU dun say so early hor... U c already then say...
4hamNah I don't think he goes to usf, he lives aro...
\n", 163 | "
" 164 | ], 165 | "text/plain": [ 166 | " label sms\n", 167 | "0 ham Go until jurong point, crazy.. Available only ...\n", 168 | "1 ham Ok lar... Joking wif u oni...\n", 169 | "2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n", 170 | "3 ham U dun say so early hor... U c already then say...\n", 171 | "4 ham Nah I don't think he goes to usf, he lives aro..." 172 | ] 173 | }, 174 | "metadata": { 175 | "tags": [] 176 | }, 177 | "execution_count": 3 178 | } 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "metadata": { 184 | "id": "4r3jP0Li_sac" 185 | }, 186 | "source": [ 187 | "sample_data = data_sms[:1000]" 188 | ], 189 | "execution_count": 11, 190 | "outputs": [] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "metadata": { 195 | "id": "e1Mv3aBpzOqd" 196 | }, 197 | "source": [ 198 | "ml_cls, tokenizer_cls, pt_weights = (tf.DistilBertModel, tf.DistilBertTokenizer, 'distilbert-base-uncased')" 199 | ], 200 | "execution_count": 36, 201 | "outputs": [] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "metadata": { 206 | "id": "58NSOHs7-AJf" 207 | }, 208 | "source": [ 209 | "tzr = tokenizer_cls.from_pretrained(pt_weights)" 210 | ], 211 | "execution_count": 37, 212 | "outputs": [] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "metadata": { 217 | "colab": { 218 | "base_uri": "https://localhost:8080/" 219 | }, 220 | "id": "YuFAAgb7-JGA", 221 | "outputId": "13539fd3-f3de-46cf-e8f2-c6cd551ca269" 222 | }, 223 | "source": [ 224 | "mod = ml_cls.from_pretrained(pt_weights)" 225 | ], 226 | "execution_count": 39, 227 | "outputs": [ 228 | { 229 | "output_type": "stream", 230 | "text": [ 231 | "Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias']\n", 232 | "- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", 233 | "- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" 234 | ], 235 | "name": "stderr" 236 | } 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "metadata": { 242 | "id": "4FcTNjaq-PaK" 243 | }, 244 | "source": [ 245 | "tokenized = sample_data['sms'].apply((lambda x: tokenizer.encode(x, add_special_tokens=True)))" 246 | ], 247 | "execution_count": 40, 248 | "outputs": [] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "metadata": { 253 | "id": "Tto46jcp-hCt" 254 | }, 255 | "source": [ 256 | "max_len = 0\n", 257 | "for i in tokenized.values:\n", 258 | " if len(i) > max_len:\n", 259 | " max_len = len(i)\n", 260 | "\n", 261 | "padded = np.array([i + [0]*(max_len-len(i)) for i in tokenized.values])" 262 | ], 263 | "execution_count": 41, 264 | "outputs": [] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "metadata": { 269 | "colab": { 270 | "base_uri": "https://localhost:8080/" 271 | }, 272 | "id": "0Rr8Z-kX-pHn", 273 | "outputId": "4233f2a8-0dcc-44b3-b9d2-ccef656a1df3" 274 | }, 275 | "source": [ 276 | "np.array(padded).shape" 277 | ], 278 | "execution_count": 42, 279 | "outputs": [ 280 | { 281 | "output_type": "execute_result", 282 | "data": { 283 | "text/plain": [ 284 | "(1000, 100)" 285 | ] 286 | }, 287 | "metadata": { 288 | "tags": [] 289 | }, 290 | "execution_count": 42 291 | } 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "metadata": { 297 | "colab": { 298 | "base_uri": "https://localhost:8080/" 299 | }, 300 | "id": "pOp34e3a-uT1", 301 | "outputId": "e5ab3d4c-7c81-4960-f2f0-b7aec01da728" 302 | }, 303 | "source": [ 304 | "attention_mask = np.where(padded != 0, 1, 0)\n", 305 | "attention_mask.shape" 306 | ], 307 | "execution_count": 43, 308 | "outputs": [ 309 | { 310 | "output_type": "execute_result", 311 | "data": { 312 | "text/plain": [ 313 | "(1000, 100)" 314 | ] 315 | }, 316 | "metadata": { 317 | "tags": [] 318 | }, 319 | "execution_count": 43 320 | } 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": { 326 | "id": "oICLQY7rg7_6" 327 | }, 328 | "source": [ 329 | "Torch.no_grad() deactivates autograd engine. Eventually it will reduce the memory usage and speed up computations.\n", 330 | "\n", 331 | "Use of Torch.no_grad():\n", 332 | "\n", 333 | " To perform inference without Gradient Calculation.\n", 334 | "\n", 335 | " To make sure there's no leak test data into the model.\n", 336 | "\n", 337 | "It's generally used to perform Validation. Reason in this case one can use validation batch of large size" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "metadata": { 343 | "id": "tQw7--SK-485" 344 | }, 345 | "source": [ 346 | "input_ids = torch.tensor(padded) \n", 347 | "attention_mask = torch.tensor(attention_mask)\n", 348 | "\n", 349 | "with torch.no_grad():\n", 350 | " last_hidden_states = model(input_ids, attention_mask=attention_mask)" 351 | ], 352 | "execution_count": 44, 353 | "outputs": [] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "metadata": { 358 | "id": "9gZWEqrKBaTy" 359 | }, 360 | "source": [ 361 | "features = last_hidden_states[0][:,0,:].numpy()" 362 | ], 363 | "execution_count": 45, 364 | "outputs": [] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "metadata": { 369 | "id": "x8S4svUCBdXJ" 370 | }, 371 | "source": [ 372 | "labels = sample_data['label']" 373 | ], 374 | "execution_count": 46, 375 | "outputs": [] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "metadata": { 380 | "id": "3IE2_e70Biv9" 381 | }, 382 | "source": [ 383 | "#mapping = {'ham' : 0 , 'spam':1}\n", 384 | "labels = labels.map(mapping)" 385 | ], 386 | "execution_count": 47, 387 | "outputs": [] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "metadata": { 392 | "id": "4a2U91nZDE4b" 393 | }, 394 | "source": [ 395 | "train_features, test_features, train_labels, test_labels = train_test_split(features, labels)" 396 | ], 397 | "execution_count": 48, 398 | "outputs": [] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "metadata": { 403 | "colab": { 404 | "base_uri": "https://localhost:8080/" 405 | }, 406 | "id": "EptrX8Y2DbIJ", 407 | "outputId": "cc4886f4-2903-4178-a721-380a07529444" 408 | }, 409 | "source": [ 410 | "from sklearn.tree import DecisionTreeClassifier\n", 411 | "clf = DecisionTreeClassifier(random_state=0)\n", 412 | "clf.fit(train_features, train_labels)" 413 | ], 414 | "execution_count": 49, 415 | "outputs": [ 416 | { 417 | "output_type": "execute_result", 418 | "data": { 419 | "text/plain": [ 420 | "DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',\n", 421 | " max_depth=None, max_features=None, max_leaf_nodes=None,\n", 422 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 423 | " min_samples_leaf=1, min_samples_split=2,\n", 424 | " min_weight_fraction_leaf=0.0, presort='deprecated',\n", 425 | " random_state=0, splitter='best')" 426 | ] 427 | }, 428 | "metadata": { 429 | "tags": [] 430 | }, 431 | "execution_count": 49 432 | } 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "metadata": { 438 | "colab": { 439 | "base_uri": "https://localhost:8080/" 440 | }, 441 | "id": "JfTQXDyqDiKH", 442 | "outputId": "ddcb7d3a-98fd-46bc-b9ce-086ff80a5a88" 443 | }, 444 | "source": [ 445 | "clf.score(test_features, test_labels)" 446 | ], 447 | "execution_count": 50, 448 | "outputs": [ 449 | { 450 | "output_type": "execute_result", 451 | "data": { 452 | "text/plain": [ 453 | "0.92" 454 | ] 455 | }, 456 | "metadata": { 457 | "tags": [] 458 | }, 459 | "execution_count": 50 460 | } 461 | ] 462 | } 463 | ] 464 | } -------------------------------------------------------------------------------- /Basic Chatbot.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 8, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import nltk" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 12, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "3.4.5\n" 22 | ] 23 | } 24 | ], 25 | "source": [ 26 | "print(nltk.__version__)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 9, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "from nltk.chat.util import Chat,reflections" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 10, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "pairs = [[r\"Hey (.*)\", [\"Chatarpatar : How are you Akshay ?\"]],\n", 45 | " [r\"I m good.Who do(.*)\",[\"Chatarpatar : IPL is so unpredictable , can't say but CSK , MI have good records.\"]],\n", 46 | " [r\"I agree.Which Team(.)* ?\",[\"Chatarpatar : CSK, what about you ?\"]],\n", 47 | " [r\"The one which(.)*\",[\"Chatarpatar : Funny , should I laugh ?\"]],\n", 48 | " [r\"You are a chatbot (.)*\",[\"Chatarpatar : That was MACHINIST(RACIST) !!! Go quit me I don't wanna talk with you\"]],\n", 49 | " [r\"Come on Chatarpata(.)*\",[\"Chatarpatar : Quit me Akshay !!\"]], \n", 50 | " [r\"Chatbot he pr nakhre(.)*\",[\"Chatpatar : Bye Akshay , Have a *** day\"]]]" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 11, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "def Chatbot():\n", 60 | " print(\"Chatarpatar : Hey Akshay\")\n", 61 | " chatbot = Chat(pairs,reflections)\n", 62 | " chatbot.converse()" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 12, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "Chatarpatar : Hey Akshay\n", 75 | "AKSHAY : Hey Chatarpatar\n", 76 | "Chatarpatar : How are you Akshay ?\n", 77 | "AKSHAY : I m good.Who do u think will win IPL\n", 78 | "Chatarpatar : IPL is so unpredictable , can't say but CSK , MI have good records.\n", 79 | "AKSHAY : I agree.Which Team is ur favourite ?\n", 80 | "Chatarpatar : CSK, what about you ?\n", 81 | "AKSHAY : The one which wins.Hehehe\n", 82 | "Chatarpatar : Funny , should I laugh ?\n", 83 | "AKSHAY : You are a chatbot. How can you laugh ?\n", 84 | "None\n", 85 | "AKSHAY : You are a chatbot . How can you laugh ?\n", 86 | "Chatarpatar : That was MACHINIST(RACIST) !!! Go quit me I don't wanna talk with you\n", 87 | "AKSHAY : Come on Chatarpatar.Be Sporty\n", 88 | "Chatarpatar : Quit me Akshay !!\n", 89 | "AKSHAY : Chatbot he pr nakhre toh dekho\n", 90 | "Chatpatar : Bye Akshay , Have a *** day\n" 91 | ] 92 | }, 93 | { 94 | "ename": "KeyboardInterrupt", 95 | "evalue": "Interrupted by user", 96 | "output_type": "error", 97 | "traceback": [ 98 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 99 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 100 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m__name__\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'__main__'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mChatbot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 101 | "\u001b[0;32m\u001b[0m in \u001b[0;36mChatbot\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Chatarpatar : Hey Akshay\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mchatbot\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mChat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpairs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mreflections\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mchatbot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconverse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 102 | "\u001b[0;32m~/anaconda3/envs/NLP/lib/python3.6/site-packages/nltk/chat/util.py\u001b[0m in \u001b[0;36mconverse\u001b[0;34m(self, quit)\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0muser_input\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mquit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m \u001b[0muser_input\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"AKSHAY : \"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 120\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mEOFError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muser_input\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 103 | "\u001b[0;32m~/anaconda3/envs/NLP/lib/python3.6/site-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36mraw_input\u001b[0;34m(self, prompt)\u001b[0m\n\u001b[1;32m 861\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent_ident\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 862\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent_header\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 863\u001b[0;31m \u001b[0mpassword\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 864\u001b[0m )\n\u001b[1;32m 865\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 104 | "\u001b[0;32m~/anaconda3/envs/NLP/lib/python3.6/site-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36m_input_request\u001b[0;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[1;32m 902\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 903\u001b[0m \u001b[0;31m# re-raise KeyboardInterrupt, to truncate traceback\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 904\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Interrupted by user\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 905\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 906\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwarning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Invalid Message:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc_info\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 105 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: Interrupted by user" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "if __name__ == '__main__':\n", 111 | " Chatbot()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 4, 145 | "metadata": { 146 | "scrolled": true 147 | }, 148 | "outputs": [ 149 | { 150 | "name": "stdout", 151 | "output_type": "stream", 152 | "text": [ 153 | "Chatarpatar : Hey Akshay\n" 154 | ] 155 | }, 156 | { 157 | "ename": "NameError", 158 | "evalue": "name 'pairs' is not defined", 159 | "output_type": "error", 160 | "traceback": [ 161 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 162 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 163 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m__name__\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'__main__'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mChatbot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 164 | "\u001b[0;32m\u001b[0m in \u001b[0;36mChatbot\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mChatbot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Chatarpatar : Hey Akshay\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mchatbot\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mChat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpairs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mreflections\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mchatbot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconverse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 165 | "\u001b[0;31mNameError\u001b[0m: name 'pairs' is not defined" 166 | ] 167 | } 168 | ], 169 | "source": [ 170 | "if __name__ == '__main__':\n", 171 | " Chatbot()" 172 | ] 173 | } 174 | ], 175 | "metadata": { 176 | "kernelspec": { 177 | "display_name": "Python 3", 178 | "language": "python", 179 | "name": "python3" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 3 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython3", 191 | "version": "3.6.10" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 4 196 | } 197 | -------------------------------------------------------------------------------- /Building_an_auto_correct_in_python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Building an auto correct in python.ipynb", 7 | "provenance": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "rPFzEHfSpyIl" 22 | }, 23 | "source": [ 24 | "#Installing the required packages" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "metadata": { 30 | "colab": { 31 | "base_uri": "https://localhost:8080/" 32 | }, 33 | "id": "hGU_V6CypHMv", 34 | "outputId": "e569cfca-6fe9-4fb8-c003-52cd06b84f4f" 35 | }, 36 | "source": [ 37 | "! pip install pyspellchecker" 38 | ], 39 | "execution_count": 2, 40 | "outputs": [ 41 | { 42 | "output_type": "stream", 43 | "text": [ 44 | "Collecting pyspellchecker\n", 45 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/64/c7/435f49c0ac6bec031d1aba4daf94dc21dc08a9db329692cdb77faac51cea/pyspellchecker-0.6.2-py3-none-any.whl (2.7MB)\n", 46 | "\u001b[K |████████████████████████████████| 2.7MB 3.3MB/s \n", 47 | "\u001b[?25hInstalling collected packages: pyspellchecker\n", 48 | "Successfully installed pyspellchecker-0.6.2\n" 49 | ], 50 | "name": "stdout" 51 | } 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": { 57 | "id": "y966QZZqqKUV" 58 | }, 59 | "source": [ 60 | "#Coding" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "metadata": { 66 | "colab": { 67 | "base_uri": "https://localhost:8080/" 68 | }, 69 | "id": "qfjt4IXRpusJ", 70 | "outputId": "4a8916e9-23a9-432c-812f-9e4f6de6d121" 71 | }, 72 | "source": [ 73 | "from spellchecker import SpellChecker\n", 74 | "\n", 75 | "spell = SpellChecker()\n", 76 | "\n", 77 | "text = \"Dataa scienttist is a very promising career.\"\n", 78 | "\n", 79 | "dict_of_autocorrect_words = {}\n", 80 | "for i in spell.unknown(text.split()):\n", 81 | " dict_of_autocorrect_words[i] = spell.correction(i)\n", 82 | "\n", 83 | "print(f'The AUTOCORRECT suggestions are Mis-spelled words are {dict_of_autocorrect_words}')\n", 84 | "\n", 85 | "temp = text.split()\n", 86 | "res = []\n", 87 | "for wrd in temp:\n", 88 | " \n", 89 | " res.append(dict_of_autocorrect_words.get(wrd, wrd))\n", 90 | " \n", 91 | "res = ' '.join(res)\n", 92 | "\n", 93 | "print(res)\n", 94 | "\n" 95 | ], 96 | "execution_count": 7, 97 | "outputs": [ 98 | { 99 | "output_type": "stream", 100 | "text": [ 101 | "The AUTOCORRECT suggestions are Mis-spelled words are {'dataa': 'data', 'scienttist': 'scientist', 'career.': 'career'}\n", 102 | "Dataa scientist is a very promising career\n" 103 | ], 104 | "name": "stdout" 105 | } 106 | ] 107 | } 108 | ] 109 | } -------------------------------------------------------------------------------- /COVIDdetectionusingXray.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Feb 27 01:16:15 2021 5 | 6 | @author: intel123 7 | """ 8 | 9 | from keras.models import Sequential 10 | from keras.layers import Conv2D 11 | from keras.layers import MaxPooling2D 12 | from keras.layers import Flatten 13 | from keras.layers import Dense 14 | 15 | Classifier=Sequential(); 16 | 17 | Classifier.add(Conv2D(64,(3,3),input_shape=(64,64,3),activation='relu')) 18 | 19 | Classifier.add(MaxPooling2D(pool_size=(2,2))) 20 | 21 | Classifier.add(Conv2D(32,(3,3),activation='relu')) 22 | 23 | Classifier.add(MaxPooling2D(pool_size=(2,2))) 24 | 25 | Classifier.add(Flatten()) 26 | 27 | Classifier.add(Dense(units=104, activation='relu')) 28 | Classifier.add(Dense(units=1, activation='sigmoid')) 29 | 30 | Classifier.compile(optimizer ='adam',loss='binary_crossentropy',metrics=['accuracy']) 31 | 32 | from keras.preprocessing.image import ImageDataGenerator 33 | 34 | train_datagen = ImageDataGenerator(rescale = 1./255, 35 | shear_range = 0.4, 36 | zoom_range = 0.3, 37 | horizontal_flip = True) 38 | 39 | test_datagen = ImageDataGenerator(rescale = 1./255) 40 | 41 | training_set = train_datagen.flow_from_directory('/home/intel123/AS/dataset/training_set', 42 | target_size = (64, 64), 43 | batch_size = 4, 44 | class_mode = 'binary') 45 | 46 | test_set = test_datagen.flow_from_directory('/home/intel123/AS/dataset/test_set', 47 | target_size = (64, 64), 48 | batch_size = 4, 49 | class_mode = 'binary') 50 | 51 | 52 | Classifier.fit_generator(training_set, 53 | steps_per_epoch = 40, 54 | epochs = 5, 55 | validation_data = test_set, 56 | validation_steps = 8) 57 | 58 | import numpy as np 59 | from keras.preprocessing import image 60 | test_image = image.load_img('/home/intel123/AS/dataset/covid_or_normal.jpg',target_size=(64,64)) 61 | test_image = image.img_to_array(test_image) 62 | test_image = np.expand_dims(test_image,axis=0) 63 | result = Classifier.predict(test_image) 64 | training_set.class_indices 65 | if result[0][0] == 1: 66 | prediction = 'Normal' 67 | print(prediction) 68 | else: 69 | prediction = 'COVID' 70 | print(prediction) -------------------------------------------------------------------------------- /CasualToFormalConverter.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | from styleformer import Styleformer 4 | import torch 5 | sf = Styleformer(style = 0) 6 | st.title('Casual to Formal converter') 7 | st.write("Please enter your casual text") 8 | text = st.text_input('Enter some text') 9 | if st.button('Hit me'): 10 | target_sentence = sf.transfer(text) 11 | st.write(target_sentence) 12 | else: 13 | pass 14 | 15 | 16 | -------------------------------------------------------------------------------- /Comparing different language detector.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# langdetect package" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 3, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from langdetect import detect " 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 7, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "en\n", 29 | "hi\n", 30 | "es\n", 31 | "ja\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "print(detect(\"At AS Learning , learning never ends.\")) \n", 37 | "print(detect(\"एएस लर्निंग में, सीखना कभी समाप्त नहीं होता है।\")) \n", 38 | "print(detect(\"En AS Learning, el aprendizaje nunca termina.\")) \n", 39 | "print(detect(\"ASラーニングでは、学習は決して終わりません\")) " 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "# Langid package" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 57, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "('en', 0.901430497396395)\n", 59 | "('hi', 0.9999996623730644)\n", 60 | "('es', 0.9870864718495084)\n", 61 | "('ja', 1.0)\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "import langid\n", 67 | "from langid.langid import LanguageIdentifier, model\n", 68 | "lang_identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)\n", 69 | "print(lang_identifier.classify(\"At AS Learning , learning never ends.\"))\n", 70 | "print(lang_identifier.classify(\"एएस लर्निंग में, सीखना कभी समाप्त नहीं होता है।\"))\n", 71 | "print(lang_identifier.classify(\"En AS Learning, el aprendizaje nunca termina.\"))\n", 72 | "print(lang_identifier.classify(\"ASラーニングでは、学習は決して終わりません\"))" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "# TextBlob package" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 10, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "from textblob import TextBlob " 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 11, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "en\n", 101 | "hi\n", 102 | "es\n", 103 | "ja\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "print(TextBlob(\"At AS Learning , learning never ends.\").detect_language()) \n", 109 | "print(TextBlob(\"एएस लर्निंग में, सीखना कभी समाप्त नहीं होता है।\").detect_language()) \n", 110 | "print(TextBlob(\"En AS Learning, el aprendizaje nunca termina.\").detect_language()) \n", 111 | "print(TextBlob(\"ASラーニングでは、学習は決して終わりません\").detect_language()) " 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "# Other packages spacy and Fasttext" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "# Speed comparison between three packages" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 68, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "name": "stdout", 135 | "output_type": "stream", 136 | "text": [ 137 | "en\n", 138 | "hi\n", 139 | "es\n", 140 | "ja\n", 141 | "Time elapsed for TextBlob: 0.802361011505127\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "import time\n", 147 | "start = time.time()\n", 148 | "print(TextBlob(\"At AS Learning , learning never ends.\").detect_language()) \n", 149 | "print(TextBlob(\"एएस लर्निंग में, सीखना कभी समाप्त नहीं होता है।\").detect_language()) \n", 150 | "print(TextBlob(\"En AS Learning, el aprendizaje nunca termina.\").detect_language()) \n", 151 | "print(TextBlob(\"ASラーニングでは、学習は決して終わりません\").detect_language()) \n", 152 | "end = time.time()\n", 153 | "print (\"Time elapsed for TextBlob:\", end - start)\n", 154 | "\n" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 69, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "('en', 0.901430497396395)\n", 167 | "('hi', 0.9999996623730644)\n", 168 | "('es', 0.9870864718495084)\n", 169 | "('ja', 1.0)\n", 170 | "Time elapsed for langid: 0.009955883026123047\n" 171 | ] 172 | } 173 | ], 174 | "source": [ 175 | "start = time.time()\n", 176 | "print(lang_identifier.classify(\"At AS Learning , learning never ends.\"))\n", 177 | "print(lang_identifier.classify(\"एएस लर्निंग में, सीखना कभी समाप्त नहीं होता है।\"))\n", 178 | "print(lang_identifier.classify(\"En AS Learning, el aprendizaje nunca termina.\"))\n", 179 | "print(lang_identifier.classify(\"ASラーニングでは、学習は決して終わりません\"))\n", 180 | "end = time.time()\n", 181 | "print (\"Time elapsed for langid:\", end - start)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 71, 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "name": "stdout", 191 | "output_type": "stream", 192 | "text": [ 193 | "en\n", 194 | "hi\n", 195 | "es\n", 196 | "ja\n", 197 | "Time elapsed for langdetect: 0.05070662498474121\n" 198 | ] 199 | } 200 | ], 201 | "source": [ 202 | "start = time.time()\n", 203 | "print(detect(\"At AS Learning , learning never ends.\")) \n", 204 | "print(detect(\"एएस लर्निंग में, सीखना कभी समाप्त नहीं होता है।\")) \n", 205 | "print(detect(\"En AS Learning, el aprendizaje nunca termina.\")) \n", 206 | "print(detect(\"ASラーニングでは、学習は決して終わりません\")) \n", 207 | "end = time.time()\n", 208 | "print (\"Time elapsed for langdetect:\", end - start)" 209 | ] 210 | } 211 | ], 212 | "metadata": { 213 | "kernelspec": { 214 | "display_name": "Python 3", 215 | "language": "python", 216 | "name": "python3" 217 | }, 218 | "language_info": { 219 | "codemirror_mode": { 220 | "name": "ipython", 221 | "version": 3 222 | }, 223 | "file_extension": ".py", 224 | "mimetype": "text/x-python", 225 | "name": "python", 226 | "nbconvert_exporter": "python", 227 | "pygments_lexer": "ipython3", 228 | "version": "3.7.6" 229 | } 230 | }, 231 | "nbformat": 4, 232 | "nbformat_minor": 4 233 | } 234 | -------------------------------------------------------------------------------- /DataPrep.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "DataPrep.ipynb", 7 | "provenance": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "code", 20 | "metadata": { 21 | "colab": { 22 | "base_uri": "https://localhost:8080/" 23 | }, 24 | "id": "n449T-2LgzLl", 25 | "outputId": "3a3cc2e9-1b67-47c7-e6b0-ef8a9b7fbdce" 26 | }, 27 | "source": [ 28 | "! pip install dataprep" 29 | ], 30 | "execution_count": 1, 31 | "outputs": [ 32 | { 33 | "output_type": "stream", 34 | "text": [ 35 | "Collecting dataprep\n", 36 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/f6/c4/1f6f12d3f8bc5b71cd806f66c96c8f7f8615328a99a16e90e66cc17f3b84/dataprep-0.2.15-py3-none-any.whl (193kB)\n", 37 | "\u001b[K |████████████████████████████████| 194kB 5.8MB/s \n", 38 | "\u001b[?25hCollecting pydantic<2.0,>=1.6\n", 39 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/2b/a3/0ffdb6c63f45f10d19b8e8b32670b22ed089cafb29732f6bf8ce518821fb/pydantic-1.8.1-cp37-cp37m-manylinux2014_x86_64.whl (10.1MB)\n", 40 | "\u001b[K |████████████████████████████████| 10.1MB 13.0MB/s \n", 41 | "\u001b[?25hRequirement already satisfied: jinja2<3.0,>=2.11 in /usr/local/lib/python3.7/dist-packages (from dataprep) (2.11.3)\n", 42 | "Collecting aiohttp<4.0,>=3.6\n", 43 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/88/c0/5890b4c8b04a79b7360e8fe4490feb0bb3ab179743f199f0e6220cebd568/aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl (1.3MB)\n", 44 | "\u001b[K |████████████████████████████████| 1.3MB 45.1MB/s \n", 45 | "\u001b[?25hCollecting dask[array,dataframe,delayed]<3.0,>=2.25\n", 46 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/04/94/b4012c61c09300f4413c58a522a6cc1a212dc4a7f6fe1ba98d67429c089d/dask-2.30.0-py3-none-any.whl (848kB)\n", 47 | "\u001b[K |████████████████████████████████| 849kB 49.3MB/s \n", 48 | "\u001b[?25hRequirement already satisfied: bokeh<3,>=2 in /usr/local/lib/python3.7/dist-packages (from dataprep) (2.3.1)\n", 49 | "Collecting nltk<4.0,>=3.5\n", 50 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/5e/37/9532ddd4b1bbb619333d5708aaad9bf1742f051a664c3c6fa6632a105fd8/nltk-3.6.2-py3-none-any.whl (1.5MB)\n", 51 | "\u001b[K |████████████████████████████████| 1.5MB 42.0MB/s \n", 52 | "\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.7/dist-packages (from dataprep) (1.19.5)\n", 53 | "Collecting jsonpath-ng<2.0,>=1.5\n", 54 | " Downloading https://files.pythonhosted.org/packages/ae/03/a8a12e49e88ba7983d704ef518e25041206aa2e934686270516f1bc439ff/jsonpath_ng-1.5.2-py3-none-any.whl\n", 55 | "Requirement already satisfied: scipy<2,>=1 in /usr/local/lib/python3.7/dist-packages (from dataprep) (1.4.1)\n", 56 | "Requirement already satisfied: ipywidgets<8.0,>=7.5 in /usr/local/lib/python3.7/dist-packages (from dataprep) (7.6.3)\n", 57 | "Collecting tqdm<5.0,>=4.48\n", 58 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/72/8a/34efae5cf9924328a8f34eeb2fdaae14c011462d9f0e3fcded48e1266d1c/tqdm-4.60.0-py2.py3-none-any.whl (75kB)\n", 59 | "\u001b[K |████████████████████████████████| 81kB 8.8MB/s \n", 60 | "\u001b[?25hCollecting wordcloud<2.0,>=1.8\n", 61 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/1b/06/0516bdba2ebdc0d5bd476aa66f94666dd0ad6b9abda723fdf28e451db919/wordcloud-1.8.1-cp37-cp37m-manylinux1_x86_64.whl (366kB)\n", 62 | "\u001b[K |████████████████████████████████| 368kB 40.3MB/s \n", 63 | "\u001b[?25hRequirement already satisfied: pandas<2,>=1 in /usr/local/lib/python3.7/dist-packages (from dataprep) (1.1.5)\n", 64 | "Collecting regex<2021.0.0,>=2020.10.15\n", 65 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/61/b2/8f281520d9f08d0f6771b8160a87a4b487850cde9f1abe257da4d8bab599/regex-2020.11.13-cp37-cp37m-manylinux2014_x86_64.whl (719kB)\n", 66 | "\u001b[K |████████████████████████████████| 727kB 42.6MB/s \n", 67 | "\u001b[?25hRequirement already satisfied: bottleneck<2.0,>=1.3 in /usr/local/lib/python3.7/dist-packages (from dataprep) (1.3.2)\n", 68 | "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from pydantic<2.0,>=1.6->dataprep) (3.7.4.3)\n", 69 | "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from jinja2<3.0,>=2.11->dataprep) (1.1.1)\n", 70 | "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp<4.0,>=3.6->dataprep) (20.3.0)\n", 71 | "Collecting multidict<7.0,>=4.5\n", 72 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/7c/a6/4123b8165acbe773d1a8dc8e3f0d1edea16d29f7de018eda769abb56bd30/multidict-5.1.0-cp37-cp37m-manylinux2014_x86_64.whl (142kB)\n", 73 | "\u001b[K |████████████████████████████████| 143kB 44.4MB/s \n", 74 | "\u001b[?25hRequirement already satisfied: chardet<5.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp<4.0,>=3.6->dataprep) (3.0.4)\n", 75 | "Collecting async-timeout<4.0,>=3.0\n", 76 | " Downloading https://files.pythonhosted.org/packages/e1/1e/5a4441be21b0726c4464f3f23c8b19628372f606755a9d2e46c187e65ec4/async_timeout-3.0.1-py3-none-any.whl\n", 77 | "Collecting yarl<2.0,>=1.0\n", 78 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/f1/62/046834c5fc998c88ab2ef722f5d42122230a632212c8afa76418324f53ff/yarl-1.6.3-cp37-cp37m-manylinux2014_x86_64.whl (294kB)\n", 79 | "\u001b[K |████████████████████████████████| 296kB 40.9MB/s \n", 80 | "\u001b[?25hRequirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from dask[array,dataframe,delayed]<3.0,>=2.25->dataprep) (3.13)\n", 81 | "Requirement already satisfied: toolz>=0.8.2; extra == \"array\" in /usr/local/lib/python3.7/dist-packages (from dask[array,dataframe,delayed]<3.0,>=2.25->dataprep) (0.11.1)\n", 82 | "Collecting partd>=0.3.10; extra == \"dataframe\"\n", 83 | " Downloading https://files.pythonhosted.org/packages/41/94/360258a68b55f47859d72b2d0b2b3cfe0ca4fbbcb81b78812bd00ae86b7c/partd-1.2.0-py3-none-any.whl\n", 84 | "Collecting fsspec>=0.6.0; extra == \"dataframe\"\n", 85 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/e9/91/2ef649137816850fa4f4c97c6f2eabb1a79bf0aa2c8ed198e387e373455e/fsspec-2021.4.0-py3-none-any.whl (108kB)\n", 86 | "\u001b[K |████████████████████████████████| 112kB 39.0MB/s \n", 87 | "\u001b[?25hRequirement already satisfied: cloudpickle>=0.2.2; extra == \"delayed\" in /usr/local/lib/python3.7/dist-packages (from dask[array,dataframe,delayed]<3.0,>=2.25->dataprep) (1.3.0)\n", 88 | "Requirement already satisfied: tornado>=5.1 in /usr/local/lib/python3.7/dist-packages (from bokeh<3,>=2->dataprep) (5.1.1)\n", 89 | "Requirement already satisfied: packaging>=16.8 in /usr/local/lib/python3.7/dist-packages (from bokeh<3,>=2->dataprep) (20.9)\n", 90 | "Requirement already satisfied: pillow>=7.1.0 in /usr/local/lib/python3.7/dist-packages (from bokeh<3,>=2->dataprep) (7.1.2)\n", 91 | "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from bokeh<3,>=2->dataprep) (2.8.1)\n", 92 | "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from nltk<4.0,>=3.5->dataprep) (1.0.1)\n", 93 | "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from nltk<4.0,>=3.5->dataprep) (7.1.2)\n", 94 | "Collecting ply\n", 95 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl (49kB)\n", 96 | "\u001b[K |████████████████████████████████| 51kB 5.6MB/s \n", 97 | "\u001b[?25hRequirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from jsonpath-ng<2.0,>=1.5->dataprep) (1.15.0)\n", 98 | "Requirement already satisfied: decorator in /usr/local/lib/python3.7/dist-packages (from jsonpath-ng<2.0,>=1.5->dataprep) (4.4.2)\n", 99 | "Requirement already satisfied: widgetsnbextension~=3.5.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8.0,>=7.5->dataprep) (3.5.1)\n", 100 | "Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8.0,>=7.5->dataprep) (5.0.5)\n", 101 | "Requirement already satisfied: jupyterlab-widgets>=1.0.0; python_version >= \"3.6\" in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8.0,>=7.5->dataprep) (1.0.0)\n", 102 | "Requirement already satisfied: ipython>=4.0.0; python_version >= \"3.3\" in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8.0,>=7.5->dataprep) (5.5.0)\n", 103 | "Requirement already satisfied: nbformat>=4.2.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8.0,>=7.5->dataprep) (5.1.3)\n", 104 | "Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8.0,>=7.5->dataprep) (4.10.1)\n", 105 | "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from wordcloud<2.0,>=1.8->dataprep) (3.2.2)\n", 106 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas<2,>=1->dataprep) (2018.9)\n", 107 | "Requirement already satisfied: idna>=2.0 in /usr/local/lib/python3.7/dist-packages (from yarl<2.0,>=1.0->aiohttp<4.0,>=3.6->dataprep) (2.10)\n", 108 | "Collecting locket\n", 109 | " Downloading https://files.pythonhosted.org/packages/50/b8/e789e45b9b9c2db75e9d9e6ceb022c8d1d7e49b2c085ce8c05600f90a96b/locket-0.2.1-py2.py3-none-any.whl\n", 110 | "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=16.8->bokeh<3,>=2->dataprep) (2.4.7)\n", 111 | "Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.7/dist-packages (from widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (5.3.1)\n", 112 | "Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.7/dist-packages (from traitlets>=4.3.1->ipywidgets<8.0,>=7.5->dataprep) (0.2.0)\n", 113 | "Requirement already satisfied: pexpect; sys_platform != \"win32\" in /usr/local/lib/python3.7/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (4.8.0)\n", 114 | "Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.7/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (0.8.1)\n", 115 | "Requirement already satisfied: pickleshare in /usr/local/lib/python3.7/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (0.7.5)\n", 116 | "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.7/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (56.0.0)\n", 117 | "Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.7/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (1.0.18)\n", 118 | "Requirement already satisfied: pygments in /usr/local/lib/python3.7/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (2.6.1)\n", 119 | "Requirement already satisfied: jupyter-core in /usr/local/lib/python3.7/dist-packages (from nbformat>=4.2.0->ipywidgets<8.0,>=7.5->dataprep) (4.7.1)\n", 120 | "Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /usr/local/lib/python3.7/dist-packages (from nbformat>=4.2.0->ipywidgets<8.0,>=7.5->dataprep) (2.6.0)\n", 121 | "Requirement already satisfied: jupyter-client in /usr/local/lib/python3.7/dist-packages (from ipykernel>=4.5.1->ipywidgets<8.0,>=7.5->dataprep) (5.3.5)\n", 122 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->wordcloud<2.0,>=1.8->dataprep) (0.10.0)\n", 123 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->wordcloud<2.0,>=1.8->dataprep) (1.3.1)\n", 124 | "Requirement already satisfied: Send2Trash in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (1.5.0)\n", 125 | "Requirement already satisfied: terminado>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (0.9.4)\n", 126 | "Requirement already satisfied: nbconvert in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (5.6.1)\n", 127 | "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.7/dist-packages (from pexpect; sys_platform != \"win32\"->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (0.7.0)\n", 128 | "Requirement already satisfied: wcwidth in /usr/local/lib/python3.7/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets<8.0,>=7.5->dataprep) (0.2.5)\n", 129 | "Requirement already satisfied: pyzmq>=13 in /usr/local/lib/python3.7/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets<8.0,>=7.5->dataprep) (22.0.3)\n", 130 | "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (0.8.4)\n", 131 | "Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (0.3)\n", 132 | "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (1.4.3)\n", 133 | "Requirement already satisfied: defusedxml in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (0.7.1)\n", 134 | "Requirement already satisfied: testpath in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (0.4.4)\n", 135 | "Requirement already satisfied: bleach in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (3.3.0)\n", 136 | "Requirement already satisfied: webencodings in /usr/local/lib/python3.7/dist-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8.0,>=7.5->dataprep) (0.5.1)\n", 137 | "Installing collected packages: pydantic, multidict, async-timeout, yarl, aiohttp, locket, partd, fsspec, dask, regex, tqdm, nltk, ply, jsonpath-ng, wordcloud, dataprep\n", 138 | " Found existing installation: dask 2.12.0\n", 139 | " Uninstalling dask-2.12.0:\n", 140 | " Successfully uninstalled dask-2.12.0\n", 141 | " Found existing installation: regex 2019.12.20\n", 142 | " Uninstalling regex-2019.12.20:\n", 143 | " Successfully uninstalled regex-2019.12.20\n", 144 | " Found existing installation: tqdm 4.41.1\n", 145 | " Uninstalling tqdm-4.41.1:\n", 146 | " Successfully uninstalled tqdm-4.41.1\n", 147 | " Found existing installation: nltk 3.2.5\n", 148 | " Uninstalling nltk-3.2.5:\n", 149 | " Successfully uninstalled nltk-3.2.5\n", 150 | " Found existing installation: wordcloud 1.5.0\n", 151 | " Uninstalling wordcloud-1.5.0:\n", 152 | " Successfully uninstalled wordcloud-1.5.0\n", 153 | "Successfully installed aiohttp-3.7.4.post0 async-timeout-3.0.1 dask-2.30.0 dataprep-0.2.15 fsspec-2021.4.0 jsonpath-ng-1.5.2 locket-0.2.1 multidict-5.1.0 nltk-3.6.2 partd-1.2.0 ply-3.11 pydantic-1.8.1 regex-2020.11.13 tqdm-4.60.0 wordcloud-1.8.1 yarl-1.6.3\n" 154 | ], 155 | "name": "stdout" 156 | } 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "metadata": { 162 | "colab": { 163 | "base_uri": "https://localhost:8080/" 164 | }, 165 | "id": "6ilOAxQwhGly", 166 | "outputId": "cfd6dd21-7cb8-4360-d9f3-74cb8fb53b98" 167 | }, 168 | "source": [ 169 | "from dataprep.datasets import load_dataset\n", 170 | "from dataprep.eda import create_report\n", 171 | "df = load_dataset(\"iris\")\n", 172 | "obj = create_report(df)" 173 | ], 174 | "execution_count": 10, 175 | "outputs": [ 176 | { 177 | "output_type": "stream", 178 | "text": [ 179 | "" 180 | ], 181 | "name": "stderr" 182 | } 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "metadata": { 188 | "colab": { 189 | "base_uri": "https://localhost:8080/" 190 | }, 191 | "id": "lOQ0WJ21jCF0", 192 | "outputId": "81424e29-0b64-4f0a-dd3c-541e06e1cf4f" 193 | }, 194 | "source": [ 195 | "dir(obj)" 196 | ], 197 | "execution_count": 11, 198 | "outputs": [ 199 | { 200 | "output_type": "execute_result", 201 | "data": { 202 | "text/plain": [ 203 | "['__class__',\n", 204 | " '__delattr__',\n", 205 | " '__dict__',\n", 206 | " '__dir__',\n", 207 | " '__doc__',\n", 208 | " '__eq__',\n", 209 | " '__format__',\n", 210 | " '__ge__',\n", 211 | " '__getattribute__',\n", 212 | " '__gt__',\n", 213 | " '__hash__',\n", 214 | " '__init__',\n", 215 | " '__init_subclass__',\n", 216 | " '__le__',\n", 217 | " '__lt__',\n", 218 | " '__module__',\n", 219 | " '__ne__',\n", 220 | " '__new__',\n", 221 | " '__reduce__',\n", 222 | " '__reduce_ex__',\n", 223 | " '__repr__',\n", 224 | " '__setattr__',\n", 225 | " '__sizeof__',\n", 226 | " '__str__',\n", 227 | " '__subclasshook__',\n", 228 | " '__weakref__',\n", 229 | " '_repr_html_',\n", 230 | " 'report',\n", 231 | " 'save',\n", 232 | " 'show',\n", 233 | " 'show_browser']" 234 | ] 235 | }, 236 | "metadata": { 237 | "tags": [] 238 | }, 239 | "execution_count": 11 240 | } 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "metadata": { 246 | "colab": { 247 | "base_uri": "https://localhost:8080/" 248 | }, 249 | "id": "385SqzecjGQe", 250 | "outputId": "693eda8a-8da1-4a76-c4a3-b94da1eb726d" 251 | }, 252 | "source": [ 253 | "obj.save()" 254 | ], 255 | "execution_count": 12, 256 | "outputs": [ 257 | { 258 | "output_type": "stream", 259 | "text": [ 260 | "Report has been saved to /content/report.html!\n" 261 | ], 262 | "name": "stdout" 263 | } 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "metadata": { 269 | "id": "1r3TxnJH9LWz" 270 | }, 271 | "source": [ 272 | "from google.colab import drive\n", 273 | "drive.mount('/content/drive')" 274 | ], 275 | "execution_count": null, 276 | "outputs": [] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "metadata": { 281 | "colab": { 282 | "base_uri": "https://localhost:8080/" 283 | }, 284 | "id": "4yc6L8oVhynE", 285 | "outputId": "f2796a7e-fbdc-4238-c46a-14bd175e328a" 286 | }, 287 | "source": [ 288 | "import dataprep\n", 289 | "dataprep.datasets.get_dataset_names()" 290 | ], 291 | "execution_count": 6, 292 | "outputs": [ 293 | { 294 | "output_type": "execute_result", 295 | "data": { 296 | "text/plain": [ 297 | "['iris', 'titanic']" 298 | ] 299 | }, 300 | "metadata": { 301 | "tags": [] 302 | }, 303 | "execution_count": 6 304 | } 305 | ] 306 | } 307 | ] 308 | } -------------------------------------------------------------------------------- /Detect Binod.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 20, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import DetectBinod" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 9, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "['PdfFileReader',\n", 21 | " '__builtins__',\n", 22 | " '__cached__',\n", 23 | " '__doc__',\n", 24 | " '__file__',\n", 25 | " '__loader__',\n", 26 | " '__name__',\n", 27 | " '__package__',\n", 28 | " '__path__',\n", 29 | " '__spec__',\n", 30 | " 'docx',\n", 31 | " 'isBinod_docx',\n", 32 | " 'isBinod_pdf',\n", 33 | " 'isBinod_txt',\n", 34 | " 'isBinod_xlsx',\n", 35 | " 'status',\n", 36 | " 'xlrd']" 37 | ] 38 | }, 39 | "execution_count": 9, 40 | "metadata": {}, 41 | "output_type": "execute_result" 42 | } 43 | ], 44 | "source": [ 45 | "dir(DetectBinod)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 12, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "name": "stdout", 55 | "output_type": "stream", 56 | "text": [ 57 | "/home/akshay/Learning/AS\r\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "! pwd" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 21, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "Binod Found in file abc.docx\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "obj = DetectBinod.isBinod_docx('abc.docx')" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 16, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "['__class__',\n", 91 | " '__delattr__',\n", 92 | " '__dict__',\n", 93 | " '__dir__',\n", 94 | " '__doc__',\n", 95 | " '__eq__',\n", 96 | " '__format__',\n", 97 | " '__ge__',\n", 98 | " '__getattribute__',\n", 99 | " '__gt__',\n", 100 | " '__hash__',\n", 101 | " '__init__',\n", 102 | " '__init_subclass__',\n", 103 | " '__le__',\n", 104 | " '__lt__',\n", 105 | " '__module__',\n", 106 | " '__ne__',\n", 107 | " '__new__',\n", 108 | " '__reduce__',\n", 109 | " '__reduce_ex__',\n", 110 | " '__repr__',\n", 111 | " '__setattr__',\n", 112 | " '__sizeof__',\n", 113 | " '__str__',\n", 114 | " '__subclasshook__',\n", 115 | " '__weakref__',\n", 116 | " '_authenticateUserPassword',\n", 117 | " '_buildDestination',\n", 118 | " '_buildField',\n", 119 | " '_buildOutline',\n", 120 | " '_checkKids',\n", 121 | " '_decrypt',\n", 122 | " '_decryptObject',\n", 123 | " '_flatten',\n", 124 | " '_getObjectFromStream',\n", 125 | " '_getPageNumberByIndirect',\n", 126 | " '_override_encryption',\n", 127 | " '_pageId2Num',\n", 128 | " '_pairs',\n", 129 | " '_writeField',\n", 130 | " '_zeroXref',\n", 131 | " 'cacheGetIndirectObject',\n", 132 | " 'cacheIndirectObject',\n", 133 | " 'decrypt',\n", 134 | " 'documentInfo',\n", 135 | " 'flattenedPages',\n", 136 | " 'getDestinationPageNumber',\n", 137 | " 'getDocumentInfo',\n", 138 | " 'getFields',\n", 139 | " 'getFormTextFields',\n", 140 | " 'getIsEncrypted',\n", 141 | " 'getNamedDestinations',\n", 142 | " 'getNumPages',\n", 143 | " 'getObject',\n", 144 | " 'getOutlines',\n", 145 | " 'getPage',\n", 146 | " 'getPageLayout',\n", 147 | " 'getPageMode',\n", 148 | " 'getPageNumber',\n", 149 | " 'getXmpMetadata',\n", 150 | " 'isEncrypted',\n", 151 | " 'namedDestinations',\n", 152 | " 'numPages',\n", 153 | " 'outlines',\n", 154 | " 'pageLayout',\n", 155 | " 'pageMode',\n", 156 | " 'pages',\n", 157 | " 'read',\n", 158 | " 'readNextEndLine',\n", 159 | " 'readObjectHeader',\n", 160 | " 'resolvedObjects',\n", 161 | " 'stream',\n", 162 | " 'strict',\n", 163 | " 'trailer',\n", 164 | " 'xmpMetadata',\n", 165 | " 'xref',\n", 166 | " 'xrefIndex',\n", 167 | " 'xref_objStm']" 168 | ] 169 | }, 170 | "execution_count": 16, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "dir(obj)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 22, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "import os" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 23, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "obj = os.listdir();" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 24, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "name": "stdout", 204 | "output_type": "stream", 205 | "text": [ 206 | "['OCR .ipynb', 'exaplainable AI.jpeg', 'prediction 1.jpeg', 'Gensim hindi tutorials', '2020-07-18 12-15-43.mkv', '.~lock.abc.docx#', 'abc.docx', 'Subscribe_to_My_Think_Channel_on_YouTube.gif', 'ytlogo.png', 'anomaly.jpeg', 'Untitled1.ipynb', 'abc.pdf', 'Screenshot from 2020-07-16 10-56-01.png', 'download.jpeg', 'Youtube video analytics.ipynb', 'like.jpg', 'abc.txt', 'prediction 2.jpeg', 'Screenshot from 2020-07-16 12-42-56.png', 'Untitled.ipynb', 'Blackrock2.mp4', 'Screenshot from 2020-07-16 10-54-11.png', 'Screenshot from 2020-07-16 10-54-43.png', 'share.jpg', 'Screenshot from 2020-07-16 10-55-00.png', 'ASlogo.jpg', '.ipynb_checkpoints', 'Screenshot from 2020-07-16 10-53-21.png', 'Detect Binod.ipynb', 'gensim.png', 'sentiment.jpeg', 'Black_plain.jpeg']\n" 207 | ] 208 | } 209 | ], 210 | "source": [ 211 | "print(obj)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 27, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "name": "stdout", 221 | "output_type": "stream", 222 | "text": [ 223 | "Detecting Binod in abc.txt\n", 224 | "Binod is not detected\n" 225 | ] 226 | } 227 | ], 228 | "source": [ 229 | "def checkBinod(i):\n", 230 | " with open(i,\"r\") as f:\n", 231 | " data = f.read()\n", 232 | " if \"binod\" in data.lower():\n", 233 | " return True\n", 234 | " else:\n", 235 | " return False\n", 236 | "\n", 237 | "for i in obj:\n", 238 | " if i.endswith('txt'):\n", 239 | " print(f\"Detecting Binod in {i}\")\n", 240 | " \n", 241 | " flag = checkBinod(i)\n", 242 | " \n", 243 | " if flag == True :\n", 244 | " print(\"Binod is detected\")\n", 245 | " else:\n", 246 | " print(\"Binod is not detected\")" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [] 255 | } 256 | ], 257 | "metadata": { 258 | "kernelspec": { 259 | "display_name": "Python 3", 260 | "language": "python", 261 | "name": "python3" 262 | }, 263 | "language_info": { 264 | "codemirror_mode": { 265 | "name": "ipython", 266 | "version": 3 267 | }, 268 | "file_extension": ".py", 269 | "mimetype": "text/x-python", 270 | "name": "python", 271 | "nbconvert_exporter": "python", 272 | "pygments_lexer": "ipython3", 273 | "version": "3.7.4" 274 | } 275 | }, 276 | "nbformat": 4, 277 | "nbformat_minor": 2 278 | } 279 | -------------------------------------------------------------------------------- /DiabetesClassificationUsingNeuralNetwork.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | 8 | import pandas as pd 9 | 10 | data=pd.read_csv('diabetes.csv') 11 | data.head() 12 | 13 | 14 | X = data.iloc[:, :-1].values 15 | y = data.iloc[:, -1].values 16 | 17 | from sklearn.model_selection import train_test_split 18 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 19 | 20 | from sklearn.preprocessing import StandardScaler 21 | sc = StandardScaler() 22 | X_train = sc.fit_transform(X_train) 23 | X_test = sc.transform(X_test) 24 | 25 | import keras 26 | from keras.models import Sequential 27 | from keras.layers import Dense 28 | 29 | from keras.layers import Dropout 30 | 31 | classifier = Sequential() 32 | classifier.add(Dense(16,activation='relu',input_dim=8)) 33 | classifier.add(Dropout(0.2)) 34 | 35 | #adding the second hidden layer 36 | classifier.add(Dense(16,activation='relu')) 37 | classifier.add(Dropout(0.2)) 38 | 39 | #adding the output layer 40 | classifier.add(Dense(1,activation='sigmoid')) 41 | 42 | classifier.compile(optimizer="Adam", loss='binary_crossentropy', metrics=['accuracy']) 43 | 44 | classifier.fit(X_train, y_train, batch_size=100, epochs=300) 45 | 46 | y_pred = classifier.predict(X_test) 47 | y_pred = (y_pred > 0.5) 48 | 49 | from sklearn.metrics import confusion_matrix 50 | cm = confusion_matrix(y_test, y_pred) 51 | 52 | #sns.heatmap(cm,annot=True) 53 | #plt.savefig('h.png') 54 | -------------------------------------------------------------------------------- /EntityExtraction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jan 27 21:39:18 2021 5 | 6 | @author: akshay 7 | """ 8 | 9 | import en_core_web_sm 10 | nlp = en_core_web_sm.load() 11 | doc = nlp(u"Ramesh is earning in 100 dollars in UK") 12 | for entity in doc.ents: 13 | print(entity.label_, ' | ', entity.text) 14 | -------------------------------------------------------------------------------- /Gensim introduction hindi.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "1)Gensim stands for \"Generate Similar\"\n", 8 | "\n", 9 | "2)Features provided by Gensim :\n", 10 | "\n", 11 | "a)fastText
\n", 12 | "b)word2vec
\n", 13 | "c)LSA
\n", 14 | "d)LDA
\n", 15 | "e)TF-IDF
\n", 16 | "\n", 17 | "Documents : It refers to some text\n" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "document = \"Akshay is teaching gensim on youtube.\"" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "Corpus : It refers to collection of texts" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "corpus = [\"Akshay is teaching gensim on youtube.\",\"Today is a sunny day\",\"India is one of the top ranking teasm in cricket\",\"My favourite hobby is playing badminton\"]" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "stoplist = set('for a of the and to in'.split(' '))\n", 52 | "processed_corpus = [[word for word in document.lower().split() if word not in stoplist]\n", 53 | " for document in corpus]" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 15, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "name": "stdout", 63 | "output_type": "stream", 64 | "text": [ 65 | "[['akshay', 'is', 'teaching', 'gensim', 'on', 'youtube.'],\n", 66 | " ['today', 'is', 'sunny', 'day'],\n", 67 | " ['india', 'is', 'one', 'top', 'ranking', 'teasm', 'cricket'],\n", 68 | " ['my', 'favourite', 'hobby', 'is', 'playing', 'badminton']]\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "import pprint\n", 74 | "pprint.pprint(processed_corpus)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 6, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "import gensim" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 7, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "corpus = \"\"\"'Akshay is teaching gensim on youtube.',\"Today is a sunny day\",\"India is one of the top ranking teasm in cricket\",'My favourite hobby is playing badminton'\"\"\"" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 8, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "data": { 102 | "text/plain": [ 103 | "['akshay',\n", 104 | " 'is',\n", 105 | " 'teaching',\n", 106 | " 'gensim',\n", 107 | " 'on',\n", 108 | " 'youtube',\n", 109 | " 'today',\n", 110 | " 'is',\n", 111 | " 'sunny',\n", 112 | " 'day',\n", 113 | " 'india',\n", 114 | " 'is',\n", 115 | " 'one',\n", 116 | " 'of',\n", 117 | " 'the',\n", 118 | " 'top',\n", 119 | " 'ranking',\n", 120 | " 'teasm',\n", 121 | " 'in',\n", 122 | " 'cricket',\n", 123 | " 'my',\n", 124 | " 'favourite',\n", 125 | " 'hobby',\n", 126 | " 'is',\n", 127 | " 'playing',\n", 128 | " 'badminton']" 129 | ] 130 | }, 131 | "execution_count": 8, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": [ 137 | "gensim.utils.simple_preprocess(corpus, deacc=False, min_len=2, max_len=15)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "Document is text and vector is a mathematically convenient representation of that text.\n", 145 | "\n", 146 | "One more important thing to be noted here is that, two different documents may have the same vector representation." 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 9, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "Dictionary(20 unique tokens: ['akshay', 'gensim', 'is', 'on', 'teaching']...)\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "from gensim import corpora\n", 164 | "dictionary = corpora.Dictionary(processed_corpus)\n", 165 | "print(dictionary)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 10, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "{'akshay': 0,\n", 178 | " 'badminton': 15,\n", 179 | " 'cricket': 9,\n", 180 | " 'day': 6,\n", 181 | " 'favourite': 16,\n", 182 | " 'gensim': 1,\n", 183 | " 'hobby': 17,\n", 184 | " 'india': 10,\n", 185 | " 'is': 2,\n", 186 | " 'my': 18,\n", 187 | " 'on': 3,\n", 188 | " 'one': 11,\n", 189 | " 'playing': 19,\n", 190 | " 'ranking': 12,\n", 191 | " 'sunny': 7,\n", 192 | " 'teaching': 4,\n", 193 | " 'teasm': 13,\n", 194 | " 'today': 8,\n", 195 | " 'top': 14,\n", 196 | " 'youtube.': 5}\n" 197 | ] 198 | } 199 | ], 200 | "source": [ 201 | "pprint.pprint(dictionary.token2id)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 11, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "[['akshay', 'is', 'teaching', 'gensim', 'on', 'youtube.'],\n", 213 | " ['today', 'is', 'sunny', 'day'],\n", 214 | " ['india', 'is', 'one', 'top', 'ranking', 'teasm', 'cricket'],\n", 215 | " ['my', 'favourite', 'hobby', 'is', 'playing', 'badminton']]" 216 | ] 217 | }, 218 | "execution_count": 11, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "processed_corpus" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 12, 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "name": "stdout", 234 | "output_type": "stream", 235 | "text": [ 236 | "[[(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1)],\n", 237 | " [(2, 1), (6, 1), (7, 1), (8, 1)],\n", 238 | " [(2, 1), (9, 1), (10, 1), (11, 1), (12, 1), (13, 1), (14, 1)],\n", 239 | " [(2, 1), (15, 1), (16, 1), (17, 1), (18, 1), (19, 1)]]\n" 240 | ] 241 | } 242 | ], 243 | "source": [ 244 | "BoW_corpus = [dictionary.doc2bow(text) for text in processed_corpus]\n", 245 | "pprint.pprint(BoW_corpus)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 13, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "name": "stdout", 255 | "output_type": "stream", 256 | "text": [ 257 | "TfidfModel(num_docs=4, num_nnz=23)\n", 258 | "[(0, 0.7071067811865475), (9, 0.7071067811865475)]\n" 259 | ] 260 | } 261 | ], 262 | "source": [ 263 | "from gensim import models\n", 264 | "tfidf = models.TfidfModel(BoW_corpus)\n", 265 | "words = \"akshay cricket\".lower().split()\n", 266 | "print(tfidf)\n", 267 | "print(tfidf[dictionary.doc2bow(words)])" 268 | ] 269 | } 270 | ], 271 | "metadata": { 272 | "kernelspec": { 273 | "display_name": "Python 3", 274 | "language": "python", 275 | "name": "python3" 276 | }, 277 | "language_info": { 278 | "codemirror_mode": { 279 | "name": "ipython", 280 | "version": 3 281 | }, 282 | "file_extension": ".py", 283 | "mimetype": "text/x-python", 284 | "name": "python", 285 | "nbconvert_exporter": "python", 286 | "pygments_lexer": "ipython3", 287 | "version": "3.7.4" 288 | } 289 | }, 290 | "nbformat": 4, 291 | "nbformat_minor": 2 292 | } 293 | -------------------------------------------------------------------------------- /Grammarchecker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Jan 16 15:00:43 2021 5 | 6 | @author: akshay 7 | """ 8 | 9 | from gingerit.gingerit import GingerIt 10 | 11 | text = 'Narendra Modi is our prme mnister. He is from Gujaratt' 12 | 13 | parser = GingerIt() 14 | print(len(parser.parse(text)['corrections'])) 15 | -------------------------------------------------------------------------------- /HaarCascade.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Nov 20 13:39:30 2020 5 | 6 | @author: akshay 7 | """ 8 | 9 | import cv2 10 | 11 | face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml') 12 | eye_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_eye.xml') 13 | 14 | img = cv2.imread('virat.jpeg') 15 | 16 | r = 500.0 / img.shape[1] 17 | dim = (500, int(img.shape[0] * r)) 18 | 19 | resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA) 20 | 21 | grey = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY) 22 | 23 | grey.shape 24 | 25 | faces = face_cascade.detectMultiScale(grey, 1.3, 5) 26 | eyes = eye_cascade.detectMultiScale(grey,1.3,5) 27 | 28 | for (x,y,w,h) in faces: 29 | cv2.rectangle(resized,(x,y),(x+w,y+h),(255,0,0),2) 30 | roi_grey = grey[y:y+h, x:x+w] 31 | roi_color = resized[y:y+h, x:x+w] 32 | eyes = eye_cascade.detectMultiScale(roi_grey) 33 | for (ex,ey,ew,eh) in eyes: 34 | cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2) 35 | 36 | #Display the bounding box for the face and eyes 37 | #cv2.imshow('img',resized) 38 | #cv2.waitKey(0) 39 | 40 | #cv2.imshow('image',resized) 41 | #cv2.waitKey(0) -------------------------------------------------------------------------------- /Kepler-delete.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 5 6 | } 7 | -------------------------------------------------------------------------------- /LazyPredict.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "LazyPredict.ipynb", 7 | "provenance": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "code", 20 | "metadata": { 21 | "id": "S6DI72w20Zsu", 22 | "colab": { 23 | "base_uri": "https://localhost:8080/", 24 | "height": 1000 25 | }, 26 | "outputId": "981359a0-7fd0-4b37-c98d-c95f39ae2858" 27 | }, 28 | "source": [ 29 | "! pip install lazypredict" 30 | ], 31 | "execution_count": 1, 32 | "outputs": [ 33 | { 34 | "output_type": "stream", 35 | "text": [ 36 | "Collecting lazypredict\n", 37 | " Downloading https://files.pythonhosted.org/packages/97/38/cadb2b79268c7f82f6b027bf0b2f68750aafc5c70b6e1bc46b357386e07b/lazypredict-0.2.9-py2.py3-none-any.whl\n", 38 | "Requirement already satisfied: click==7.1.2 in /usr/local/lib/python3.7/dist-packages (from lazypredict) (7.1.2)\n", 39 | "Collecting lightgbm==2.3.1\n", 40 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/0b/9d/ddcb2f43aca194987f1a99e27edf41cf9bc39ea750c3371c2a62698c509a/lightgbm-2.3.1-py2.py3-none-manylinux1_x86_64.whl (1.2MB)\n", 41 | "\u001b[K |████████████████████████████████| 1.2MB 6.2MB/s \n", 42 | "\u001b[?25hCollecting scipy==1.5.4\n", 43 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/dc/7e/8f6a79b102ca1ea928bae8998b05bf5dc24a90571db13cd119f275ba6252/scipy-1.5.4-cp37-cp37m-manylinux1_x86_64.whl (25.9MB)\n", 44 | "\u001b[K |████████████████████████████████| 25.9MB 162kB/s \n", 45 | "\u001b[?25hCollecting joblib==1.0.0\n", 46 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/34/5b/bd0f0fb5564183884d8e35b81d06d7ec06a20d1a0c8b4c407f1554691dce/joblib-1.0.0-py3-none-any.whl (302kB)\n", 47 | "\u001b[K |████████████████████████████████| 307kB 37.6MB/s \n", 48 | "\u001b[?25hRequirement already satisfied: six==1.15.0 in /usr/local/lib/python3.7/dist-packages (from lazypredict) (1.15.0)\n", 49 | "Collecting numpy==1.19.1\n", 50 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/50/8f/29d5688614f9bba59931683d5d353d738d4a3007833219ee19c455732753/numpy-1.19.1-cp37-cp37m-manylinux2010_x86_64.whl (14.5MB)\n", 51 | "\u001b[K |████████████████████████████████| 14.5MB 339kB/s \n", 52 | "\u001b[?25hCollecting tqdm==4.56.0\n", 53 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/80/02/8f8880a4fd6625461833abcf679d4c12a44c76f9925f92bf212bb6cefaad/tqdm-4.56.0-py2.py3-none-any.whl (72kB)\n", 54 | "\u001b[K |████████████████████████████████| 81kB 9.3MB/s \n", 55 | "\u001b[?25hCollecting pytest==5.4.3\n", 56 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/9f/f3/0a83558da436a081344aa6c8b85ea5b5f05071214106036ce341b7769b0b/pytest-5.4.3-py3-none-any.whl (248kB)\n", 57 | "\u001b[K |████████████████████████████████| 256kB 48.8MB/s \n", 58 | "\u001b[?25hCollecting PyYAML==5.3.1\n", 59 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/64/c2/b80047c7ac2478f9501676c988a5411ed5572f35d1beff9cae07d321512c/PyYAML-5.3.1.tar.gz (269kB)\n", 60 | "\u001b[K |████████████████████████████████| 276kB 41.9MB/s \n", 61 | "\u001b[?25hCollecting scikit-learn==0.23.1\n", 62 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/b8/7e/74e707b66490d4eb05f702966ad0990881127acecf9d5cdcef3c95ec6c16/scikit_learn-0.23.1-cp37-cp37m-manylinux1_x86_64.whl (6.8MB)\n", 63 | "\u001b[K |████████████████████████████████| 6.8MB 41.5MB/s \n", 64 | "\u001b[?25hCollecting xgboost==1.1.1\n", 65 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/7c/32/a11befbb003e0e6b7e062a77f010dfcec0ec3589be537b02d2eb2ff93b9a/xgboost-1.1.1-py3-none-manylinux2010_x86_64.whl (127.6MB)\n", 66 | "\u001b[K |████████████████████████████████| 127.6MB 104kB/s \n", 67 | "\u001b[?25hCollecting pandas==1.0.5\n", 68 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/af/f3/683bf2547a3eaeec15b39cef86f61e921b3b187f250fcd2b5c5fb4386369/pandas-1.0.5-cp37-cp37m-manylinux1_x86_64.whl (10.1MB)\n", 69 | "\u001b[K |████████████████████████████████| 10.1MB 36.8MB/s \n", 70 | "\u001b[?25hRequirement already satisfied: wcwidth in /usr/local/lib/python3.7/dist-packages (from pytest==5.4.3->lazypredict) (0.2.5)\n", 71 | "Requirement already satisfied: more-itertools>=4.0.0 in /usr/local/lib/python3.7/dist-packages (from pytest==5.4.3->lazypredict) (8.7.0)\n", 72 | "Requirement already satisfied: importlib-metadata>=0.12; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from pytest==5.4.3->lazypredict) (3.10.1)\n", 73 | "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from pytest==5.4.3->lazypredict) (20.9)\n", 74 | "Collecting pluggy<1.0,>=0.12\n", 75 | " Downloading https://files.pythonhosted.org/packages/a0/28/85c7aa31b80d150b772fbe4a229487bc6644da9ccb7e427dd8cc60cb8a62/pluggy-0.13.1-py2.py3-none-any.whl\n", 76 | "Requirement already satisfied: py>=1.5.0 in /usr/local/lib/python3.7/dist-packages (from pytest==5.4.3->lazypredict) (1.10.0)\n", 77 | "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.7/dist-packages (from pytest==5.4.3->lazypredict) (20.3.0)\n", 78 | "Collecting threadpoolctl>=2.0.0\n", 79 | " Downloading https://files.pythonhosted.org/packages/f7/12/ec3f2e203afa394a149911729357aa48affc59c20e2c1c8297a60f33f133/threadpoolctl-2.1.0-py3-none-any.whl\n", 80 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas==1.0.5->lazypredict) (2018.9)\n", 81 | "Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.7/dist-packages (from pandas==1.0.5->lazypredict) (2.8.1)\n", 82 | "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.12; python_version < \"3.8\"->pytest==5.4.3->lazypredict) (3.4.1)\n", 83 | "Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.12; python_version < \"3.8\"->pytest==5.4.3->lazypredict) (3.7.4.3)\n", 84 | "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->pytest==5.4.3->lazypredict) (2.4.7)\n", 85 | "Building wheels for collected packages: PyYAML\n", 86 | " Building wheel for PyYAML (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 87 | " Created wheel for PyYAML: filename=PyYAML-5.3.1-cp37-cp37m-linux_x86_64.whl size=44620 sha256=f6d917d739712d660ac988fc4d460b50631e794e8734262ffe5c016362d97a0f\n", 88 | " Stored in directory: /root/.cache/pip/wheels/a7/c1/ea/cf5bd31012e735dc1dfea3131a2d5eae7978b251083d6247bd\n", 89 | "Successfully built PyYAML\n", 90 | "\u001b[31mERROR: tensorflow 2.4.1 has requirement numpy~=1.19.2, but you'll have numpy 1.19.1 which is incompatible.\u001b[0m\n", 91 | "\u001b[31mERROR: google-colab 1.0.0 has requirement pandas~=1.1.0; python_version >= \"3.0\", but you'll have pandas 1.0.5 which is incompatible.\u001b[0m\n", 92 | "\u001b[31mERROR: datascience 0.10.6 has requirement folium==0.2.1, but you'll have folium 0.8.3 which is incompatible.\u001b[0m\n", 93 | "\u001b[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.\u001b[0m\n", 94 | "Installing collected packages: threadpoolctl, joblib, numpy, scipy, scikit-learn, lightgbm, tqdm, pluggy, pytest, PyYAML, xgboost, pandas, lazypredict\n", 95 | " Found existing installation: joblib 1.0.1\n", 96 | " Uninstalling joblib-1.0.1:\n", 97 | " Successfully uninstalled joblib-1.0.1\n", 98 | " Found existing installation: numpy 1.19.5\n", 99 | " Uninstalling numpy-1.19.5:\n", 100 | " Successfully uninstalled numpy-1.19.5\n", 101 | " Found existing installation: scipy 1.4.1\n", 102 | " Uninstalling scipy-1.4.1:\n", 103 | " Successfully uninstalled scipy-1.4.1\n", 104 | " Found existing installation: scikit-learn 0.22.2.post1\n", 105 | " Uninstalling scikit-learn-0.22.2.post1:\n", 106 | " Successfully uninstalled scikit-learn-0.22.2.post1\n", 107 | " Found existing installation: lightgbm 2.2.3\n", 108 | " Uninstalling lightgbm-2.2.3:\n", 109 | " Successfully uninstalled lightgbm-2.2.3\n", 110 | " Found existing installation: tqdm 4.41.1\n", 111 | " Uninstalling tqdm-4.41.1:\n", 112 | " Successfully uninstalled tqdm-4.41.1\n", 113 | " Found existing installation: pluggy 0.7.1\n", 114 | " Uninstalling pluggy-0.7.1:\n", 115 | " Successfully uninstalled pluggy-0.7.1\n", 116 | " Found existing installation: pytest 3.6.4\n", 117 | " Uninstalling pytest-3.6.4:\n", 118 | " Successfully uninstalled pytest-3.6.4\n", 119 | " Found existing installation: PyYAML 3.13\n", 120 | " Uninstalling PyYAML-3.13:\n", 121 | " Successfully uninstalled PyYAML-3.13\n", 122 | " Found existing installation: xgboost 0.90\n", 123 | " Uninstalling xgboost-0.90:\n", 124 | " Successfully uninstalled xgboost-0.90\n", 125 | " Found existing installation: pandas 1.1.5\n", 126 | " Uninstalling pandas-1.1.5:\n", 127 | " Successfully uninstalled pandas-1.1.5\n", 128 | "Successfully installed PyYAML-5.3.1 joblib-1.0.0 lazypredict-0.2.9 lightgbm-2.3.1 numpy-1.19.1 pandas-1.0.5 pluggy-0.13.1 pytest-5.4.3 scikit-learn-0.23.1 scipy-1.5.4 threadpoolctl-2.1.0 tqdm-4.56.0 xgboost-1.1.1\n" 129 | ], 130 | "name": "stdout" 131 | }, 132 | { 133 | "output_type": "display_data", 134 | "data": { 135 | "application/vnd.colab-display-data+json": { 136 | "pip_warning": { 137 | "packages": [ 138 | "numpy", 139 | "pandas" 140 | ] 141 | } 142 | } 143 | }, 144 | "metadata": { 145 | "tags": [] 146 | } 147 | } 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "metadata": { 153 | "id": "mPO0zcqPy7fG" 154 | }, 155 | "source": [ 156 | "import lazypredict" 157 | ], 158 | "execution_count": 1, 159 | "outputs": [] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "metadata": { 164 | "id": "G6Vu7hMLy_DK" 165 | }, 166 | "source": [ 167 | "from sklearn import datasets" 168 | ], 169 | "execution_count": 2, 170 | "outputs": [] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "metadata": { 175 | "colab": { 176 | "base_uri": "https://localhost:8080/" 177 | }, 178 | "id": "b2SCh4Y0zLi6", 179 | "outputId": "43bf944a-b93c-4e25-a643-bacf2b1a605a" 180 | }, 181 | "source": [ 182 | "dir(datasets)" 183 | ], 184 | "execution_count": 3, 185 | "outputs": [ 186 | { 187 | "output_type": "execute_result", 188 | "data": { 189 | "text/plain": [ 190 | "['__all__',\n", 191 | " '__builtins__',\n", 192 | " '__cached__',\n", 193 | " '__doc__',\n", 194 | " '__file__',\n", 195 | " '__loader__',\n", 196 | " '__name__',\n", 197 | " '__package__',\n", 198 | " '__path__',\n", 199 | " '__spec__',\n", 200 | " '_base',\n", 201 | " '_california_housing',\n", 202 | " '_covtype',\n", 203 | " '_kddcup99',\n", 204 | " '_lfw',\n", 205 | " '_olivetti_faces',\n", 206 | " '_openml',\n", 207 | " '_rcv1',\n", 208 | " '_samples_generator',\n", 209 | " '_species_distributions',\n", 210 | " '_svmlight_format_fast',\n", 211 | " '_svmlight_format_io',\n", 212 | " '_twenty_newsgroups',\n", 213 | " 'clear_data_home',\n", 214 | " 'dump_svmlight_file',\n", 215 | " 'fetch_20newsgroups',\n", 216 | " 'fetch_20newsgroups_vectorized',\n", 217 | " 'fetch_california_housing',\n", 218 | " 'fetch_covtype',\n", 219 | " 'fetch_kddcup99',\n", 220 | " 'fetch_lfw_pairs',\n", 221 | " 'fetch_lfw_people',\n", 222 | " 'fetch_olivetti_faces',\n", 223 | " 'fetch_openml',\n", 224 | " 'fetch_rcv1',\n", 225 | " 'fetch_species_distributions',\n", 226 | " 'get_data_home',\n", 227 | " 'load_boston',\n", 228 | " 'load_breast_cancer',\n", 229 | " 'load_diabetes',\n", 230 | " 'load_digits',\n", 231 | " 'load_files',\n", 232 | " 'load_iris',\n", 233 | " 'load_linnerud',\n", 234 | " 'load_sample_image',\n", 235 | " 'load_sample_images',\n", 236 | " 'load_svmlight_file',\n", 237 | " 'load_svmlight_files',\n", 238 | " 'load_wine',\n", 239 | " 'make_biclusters',\n", 240 | " 'make_blobs',\n", 241 | " 'make_checkerboard',\n", 242 | " 'make_circles',\n", 243 | " 'make_classification',\n", 244 | " 'make_friedman1',\n", 245 | " 'make_friedman2',\n", 246 | " 'make_friedman3',\n", 247 | " 'make_gaussian_quantiles',\n", 248 | " 'make_hastie_10_2',\n", 249 | " 'make_low_rank_matrix',\n", 250 | " 'make_moons',\n", 251 | " 'make_multilabel_classification',\n", 252 | " 'make_regression',\n", 253 | " 'make_s_curve',\n", 254 | " 'make_sparse_coded_signal',\n", 255 | " 'make_sparse_spd_matrix',\n", 256 | " 'make_sparse_uncorrelated',\n", 257 | " 'make_spd_matrix',\n", 258 | " 'make_swiss_roll']" 259 | ] 260 | }, 261 | "metadata": { 262 | "tags": [] 263 | }, 264 | "execution_count": 3 265 | } 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "metadata": { 271 | "id": "mQtWciUGzTTA" 272 | }, 273 | "source": [ 274 | "from sklearn.datasets import " 275 | ], 276 | "execution_count": null, 277 | "outputs": [] 278 | } 279 | ] 280 | } -------------------------------------------------------------------------------- /Lux.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Collecting lux-api\n", 13 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/64/32/ee46df189e978c21a040e01d0eb99067de07974fd4ef67c69190a5d1fdde/lux-api-0.2.1.tar.gz (76kB)\n", 14 | "\u001b[K |████████████████████████████████| 81kB 11kB/s eta 0:00:012\n", 15 | "\u001b[?25hCollecting scipy>=1.3.3 (from lux-api)\n", 16 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/dc/7e/8f6a79b102ca1ea928bae8998b05bf5dc24a90571db13cd119f275ba6252/scipy-1.5.4-cp37-cp37m-manylinux1_x86_64.whl (25.9MB)\n", 17 | "\u001b[K |████████████████████████████████| 25.9MB 52kB/s eta 0:00:013 |█▏ | 921kB 288kB/s eta 0:01:27 |███████████████████▎ | 15.6MB 476kB/s eta 0:00:22\n", 18 | "\u001b[?25hCollecting altair>=4.0.0 (from lux-api)\n", 19 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/01/55/0bb2226e34f21fa549c3f4557b4f154a5632f61132a969da17c95ca8eab9/altair-4.1.0-py3-none-any.whl (727kB)\n", 20 | "\u001b[K |████████████████████████████████| 737kB 673kB/s eta 0:00:01\n", 21 | "\u001b[?25hCollecting pandas>=1.1.0 (from lux-api)\n", 22 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/bf/4c/cb7da76f3a5e077e545f9cf8575b8f488a4e8ad60490838f89c5cdd5bb57/pandas-1.1.4-cp37-cp37m-manylinux1_x86_64.whl (9.5MB)\n", 23 | "\u001b[K |████████████████████████████████| 9.5MB 89kB/s eta 0:00:012 |██████████ | 3.0MB 280kB/s eta 0:00:24\n", 24 | "\u001b[?25hRequirement already satisfied: scikit-learn>=0.22 in /home/akshay/anaconda3/lib/python3.7/site-packages (from lux-api) (0.22.2.post1)\n", 25 | "Collecting lux-widget>=0.1.1 (from lux-api)\n", 26 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/3e/94/ac2ab4f1a28496c3aacdb52ba13d7df2ad7f51af9138c4cc076815cbb618/lux-widget-0.1.1.tar.gz (1.9MB)\n", 27 | "\u001b[K |████████████████████████████████| 1.9MB 907kB/s eta 0:00:01\n", 28 | "\u001b[?25hRequirement already satisfied: numpy>=1.14.5 in /home/akshay/anaconda3/lib/python3.7/site-packages (from scipy>=1.3.3->lux-api) (1.17.2)\n", 29 | "Requirement already satisfied: jsonschema in /home/akshay/anaconda3/lib/python3.7/site-packages (from altair>=4.0.0->lux-api) (3.0.2)\n", 30 | "Requirement already satisfied: entrypoints in /home/akshay/anaconda3/lib/python3.7/site-packages (from altair>=4.0.0->lux-api) (0.3)\n", 31 | "Requirement already satisfied: jinja2 in /home/akshay/anaconda3/lib/python3.7/site-packages (from altair>=4.0.0->lux-api) (2.10.3)\n", 32 | "Requirement already satisfied: toolz in /home/akshay/anaconda3/lib/python3.7/site-packages (from altair>=4.0.0->lux-api) (0.10.0)\n", 33 | "Requirement already satisfied: python-dateutil>=2.7.3 in /home/akshay/anaconda3/lib/python3.7/site-packages (from pandas>=1.1.0->lux-api) (2.8.0)\n", 34 | "Requirement already satisfied: pytz>=2017.2 in /home/akshay/anaconda3/lib/python3.7/site-packages (from pandas>=1.1.0->lux-api) (2019.3)\n", 35 | "Requirement already satisfied: joblib>=0.11 in /home/akshay/anaconda3/lib/python3.7/site-packages (from scikit-learn>=0.22->lux-api) (0.13.2)\n", 36 | "Requirement already satisfied: jupyter in /home/akshay/anaconda3/lib/python3.7/site-packages (from lux-widget>=0.1.1->lux-api) (1.0.0)\n", 37 | "Requirement already satisfied: notebook>=4.0.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from lux-widget>=0.1.1->lux-api) (6.0.1)\n", 38 | "Requirement already satisfied: ipywidgets>=7.0.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from lux-widget>=0.1.1->lux-api) (7.5.1)\n", 39 | "Requirement already satisfied: pyrsistent>=0.14.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from jsonschema->altair>=4.0.0->lux-api) (0.15.4)\n", 40 | "Requirement already satisfied: setuptools in /home/akshay/anaconda3/lib/python3.7/site-packages (from jsonschema->altair>=4.0.0->lux-api) (41.4.0)\n", 41 | "Requirement already satisfied: attrs>=17.4.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from jsonschema->altair>=4.0.0->lux-api) (19.2.0)\n", 42 | "Requirement already satisfied: six>=1.11.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from jsonschema->altair>=4.0.0->lux-api) (1.15.0)\n", 43 | "Requirement already satisfied: MarkupSafe>=0.23 in /home/akshay/anaconda3/lib/python3.7/site-packages (from jinja2->altair>=4.0.0->lux-api) (1.1.1)\n", 44 | "Requirement already satisfied: qtconsole in /home/akshay/anaconda3/lib/python3.7/site-packages (from jupyter->lux-widget>=0.1.1->lux-api) (4.5.5)\n", 45 | "Requirement already satisfied: nbconvert in /home/akshay/anaconda3/lib/python3.7/site-packages (from jupyter->lux-widget>=0.1.1->lux-api) (5.6.0)\n", 46 | "Requirement already satisfied: ipykernel in /home/akshay/anaconda3/lib/python3.7/site-packages (from jupyter->lux-widget>=0.1.1->lux-api) (5.1.2)\n", 47 | "Requirement already satisfied: jupyter-console in /home/akshay/anaconda3/lib/python3.7/site-packages (from jupyter->lux-widget>=0.1.1->lux-api) (6.0.0)\n", 48 | "Requirement already satisfied: Send2Trash in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (1.5.0)\n", 49 | "Requirement already satisfied: traitlets>=4.2.1 in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (4.3.3)\n", 50 | "Requirement already satisfied: terminado>=0.8.1 in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (0.8.2)\n", 51 | "Requirement already satisfied: ipython-genutils in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (0.2.0)\n", 52 | "Requirement already satisfied: jupyter-core>=4.4.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (4.5.0)\n", 53 | "Requirement already satisfied: pyzmq>=17 in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (18.1.0)\n", 54 | "Requirement already satisfied: prometheus-client in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (0.7.1)\n", 55 | "Requirement already satisfied: tornado>=5.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (6.0.3)\n", 56 | "Requirement already satisfied: jupyter-client>=5.3.1 in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (5.3.3)\n", 57 | "Requirement already satisfied: nbformat in /home/akshay/anaconda3/lib/python3.7/site-packages (from notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (4.4.0)\n", 58 | "Requirement already satisfied: ipython>=4.0.0; python_version >= \"3.3\" in /home/akshay/anaconda3/lib/python3.7/site-packages (from ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (7.8.0)\n", 59 | "Requirement already satisfied: widgetsnbextension~=3.5.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (3.5.1)\n", 60 | "Requirement already satisfied: pygments in /home/akshay/anaconda3/lib/python3.7/site-packages (from qtconsole->jupyter->lux-widget>=0.1.1->lux-api) (2.4.2)\n", 61 | "Requirement already satisfied: mistune<2,>=0.8.1 in /home/akshay/anaconda3/lib/python3.7/site-packages (from nbconvert->jupyter->lux-widget>=0.1.1->lux-api) (0.8.4)\n", 62 | "Requirement already satisfied: pandocfilters>=1.4.1 in /home/akshay/anaconda3/lib/python3.7/site-packages (from nbconvert->jupyter->lux-widget>=0.1.1->lux-api) (1.4.2)\n", 63 | "Requirement already satisfied: bleach in /home/akshay/anaconda3/lib/python3.7/site-packages (from nbconvert->jupyter->lux-widget>=0.1.1->lux-api) (3.1.0)\n", 64 | "Requirement already satisfied: testpath in /home/akshay/anaconda3/lib/python3.7/site-packages (from nbconvert->jupyter->lux-widget>=0.1.1->lux-api) (0.4.2)\n", 65 | "Requirement already satisfied: defusedxml in /home/akshay/anaconda3/lib/python3.7/site-packages (from nbconvert->jupyter->lux-widget>=0.1.1->lux-api) (0.6.0)\n", 66 | "Requirement already satisfied: prompt_toolkit<2.1.0,>=2.0.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from jupyter-console->jupyter->lux-widget>=0.1.1->lux-api) (2.0.10)\n", 67 | "Requirement already satisfied: decorator in /home/akshay/anaconda3/lib/python3.7/site-packages (from traitlets>=4.2.1->notebook>=4.0.0->lux-widget>=0.1.1->lux-api) (4.4.0)\n", 68 | "Requirement already satisfied: pexpect; sys_platform != \"win32\" in /home/akshay/anaconda3/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (4.7.0)\n", 69 | "Requirement already satisfied: pickleshare in /home/akshay/anaconda3/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (0.7.5)\n", 70 | "Requirement already satisfied: jedi>=0.10 in /home/akshay/anaconda3/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (0.15.1)\n" 71 | ] 72 | }, 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "Requirement already satisfied: backcall in /home/akshay/anaconda3/lib/python3.7/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (0.1.0)\n", 78 | "Requirement already satisfied: webencodings in /home/akshay/anaconda3/lib/python3.7/site-packages (from bleach->nbconvert->jupyter->lux-widget>=0.1.1->lux-api) (0.5.1)\n", 79 | "Requirement already satisfied: wcwidth in /home/akshay/anaconda3/lib/python3.7/site-packages (from prompt_toolkit<2.1.0,>=2.0.0->jupyter-console->jupyter->lux-widget>=0.1.1->lux-api) (0.1.7)\n", 80 | "Requirement already satisfied: ptyprocess>=0.5 in /home/akshay/anaconda3/lib/python3.7/site-packages (from pexpect; sys_platform != \"win32\"->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (0.6.0)\n", 81 | "Requirement already satisfied: parso>=0.5.0 in /home/akshay/anaconda3/lib/python3.7/site-packages (from jedi>=0.10->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0->lux-widget>=0.1.1->lux-api) (0.5.1)\n", 82 | "Building wheels for collected packages: lux-api, lux-widget\n", 83 | " Building wheel for lux-api (setup.py) ... \u001b[?25ldone\n", 84 | "\u001b[?25h Created wheel for lux-api: filename=lux_api-0.2.1-cp37-none-any.whl size=117593 sha256=a48b36e6b3179bf23acd9d6d8e45060266a472282d61a9cb783284d3f9e43381\n", 85 | " Stored in directory: /home/akshay/.cache/pip/wheels/7d/f2/72/6a08419c87357fcf47378e68faeabff55715615601b677a353\n", 86 | " Building wheel for lux-widget (setup.py) ... \u001b[?25ldone\n", 87 | "\u001b[?25h Created wheel for lux-widget: filename=lux_widget-0.1.1-cp37-none-any.whl size=1935816 sha256=0572df60c3e6921d3441bef7f99adb09b4b693afae1a8127628a6093f8cb83de\n", 88 | " Stored in directory: /home/akshay/.cache/pip/wheels/69/1c/5d/bc38f4976953e3cb58842423d04ed2aa0d6579b6b5857fc444\n", 89 | "Successfully built lux-api lux-widget\n", 90 | "Installing collected packages: scipy, pandas, altair, lux-widget, lux-api\n", 91 | " Found existing installation: scipy 1.3.1\n", 92 | " Uninstalling scipy-1.3.1:\n", 93 | " Successfully uninstalled scipy-1.3.1\n", 94 | " Found existing installation: pandas 0.25.1\n", 95 | " Uninstalling pandas-0.25.1:\n", 96 | " Successfully uninstalled pandas-0.25.1\n", 97 | "Successfully installed altair-4.1.0 lux-api-0.2.1 lux-widget-0.1.1 pandas-1.1.4 scipy-1.5.4\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "! pip install lux-api" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 9, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "import lux\n", 112 | "import pandas as pd" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 10, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "df = pd.read_csv('titanic.csv')" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 11, 127 | "metadata": { 128 | "scrolled": true 129 | }, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "application/vnd.jupyter.widget-view+json": { 134 | "model_id": "b8542711f1e54d9bbc352a50e9ccbc11", 135 | "version_major": 2, 136 | "version_minor": 0 137 | }, 138 | "text/plain": [ 139 | "Button(description='Toggle Pandas/Lux', layout=Layout(top='5px', width='140px'), style=ButtonStyle())" 140 | ] 141 | }, 142 | "metadata": {}, 143 | "output_type": "display_data" 144 | }, 145 | { 146 | "data": { 147 | "application/vnd.jupyter.widget-view+json": { 148 | "model_id": "af00684885a74f29b0c86ca4c7934499", 149 | "version_major": 2, 150 | "version_minor": 0 151 | }, 152 | "text/plain": [ 153 | "Output()" 154 | ] 155 | }, 156 | "metadata": {}, 157 | "output_type": "display_data" 158 | }, 159 | { 160 | "data": { 161 | "text/plain": [] 162 | }, 163 | "execution_count": 11, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "\n", 170 | "df" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 12, 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "name": "stderr", 180 | "output_type": "stream", 181 | "text": [ 182 | "/home/akshay/anaconda3/lib/python3.7/site-packages/lux/vis/VisList.py:305: UserWarning:\n", 183 | "The following issues are ecountered when validating the parsed intent:\n", 184 | "- The input value 'nan' does not exist for the attribute 'Embarked' for the DataFrame.\n" 185 | ] 186 | }, 187 | { 188 | "data": { 189 | "application/vnd.jupyter.widget-view+json": { 190 | "model_id": "fb5c244b1bfb4e09961690fd6523a01a", 191 | "version_major": 2, 192 | "version_minor": 0 193 | }, 194 | "text/plain": [ 195 | "Button(description='Toggle Pandas/Lux', layout=Layout(top='5px', width='140px'), style=ButtonStyle())" 196 | ] 197 | }, 198 | "metadata": {}, 199 | "output_type": "display_data" 200 | }, 201 | { 202 | "data": { 203 | "application/vnd.jupyter.widget-view+json": { 204 | "model_id": "d743879ef24144329970ca103a7bd204", 205 | "version_major": 2, 206 | "version_minor": 0 207 | }, 208 | "text/plain": [ 209 | "Output()" 210 | ] 211 | }, 212 | "metadata": {}, 213 | "output_type": "display_data" 214 | }, 215 | { 216 | "data": { 217 | "text/plain": [] 218 | }, 219 | "execution_count": 12, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": [ 225 | "df.intent = [\"Age\",\"Fare\"]\n", 226 | "df" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 13, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "vis = df.exported" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 14, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "application/vnd.jupyter.widget-view+json": { 246 | "model_id": "917ad753df8347ffb91f9ffd73d41903", 247 | "version_major": 2, 248 | "version_minor": 0 249 | }, 250 | "text/plain": [ 251 | "LuxWidget(recommendations=[{'action': 'Vis List', 'description': 'Shows a vis list defined by the intent', 'vs…" 252 | ] 253 | }, 254 | "metadata": {}, 255 | "output_type": "display_data" 256 | }, 257 | { 258 | "data": { 259 | "text/plain": [ 260 | "[]" 261 | ] 262 | }, 263 | "execution_count": 14, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "vis" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 15, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "data": { 279 | "application/vnd.jupyter.widget-view+json": { 280 | "model_id": "1fceb05044d9481f886233375de6c4b5", 281 | "version_major": 2, 282 | "version_minor": 0 283 | }, 284 | "text/plain": [ 285 | "LuxWidget(current_vis={'config': {'view': {'continuousWidth': 400, 'continuousHeight': 300}, 'axis': {'labelCo…" 286 | ] 287 | }, 288 | "metadata": {}, 289 | "output_type": "display_data" 290 | }, 291 | { 292 | "data": { 293 | "text/plain": [ 294 | "" 295 | ] 296 | }, 297 | "execution_count": 15, 298 | "metadata": {}, 299 | "output_type": "execute_result" 300 | } 301 | ], 302 | "source": [ 303 | "from lux.vis.Vis import Vis\n", 304 | "Vis([\"Embarked=C\",\"Fare\"],df)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 16, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "data": { 314 | "application/vnd.jupyter.widget-view+json": { 315 | "model_id": "2f8d03f01a4844debf438535fb3af585", 316 | "version_major": 2, 317 | "version_minor": 0 318 | }, 319 | "text/plain": [ 320 | "LuxWidget(recommendations=[{'action': 'Vis List', 'description': 'Shows a vis list defined by the intent', 'vs…" 321 | ] 322 | }, 323 | "metadata": {}, 324 | "output_type": "display_data" 325 | }, 326 | { 327 | "data": { 328 | "text/plain": [ 329 | "[,\n", 330 | " ,\n", 331 | " ,\n", 332 | " ]" 333 | ] 334 | }, 335 | "execution_count": 16, 336 | "metadata": {}, 337 | "output_type": "execute_result" 338 | } 339 | ], 340 | "source": [ 341 | "from lux.vis.VisList import VisList\n", 342 | "VisList([\"Embarked=?\",\"Pclass\"],df)" 343 | ] 344 | } 345 | ], 346 | "metadata": { 347 | "kernelspec": { 348 | "display_name": "Python 3", 349 | "language": "python", 350 | "name": "python3" 351 | }, 352 | "language_info": { 353 | "codemirror_mode": { 354 | "name": "ipython", 355 | "version": 3 356 | }, 357 | "file_extension": ".py", 358 | "mimetype": "text/x-python", 359 | "name": "python", 360 | "nbconvert_exporter": "python", 361 | "pygments_lexer": "ipython3", 362 | "version": "3.7.4" 363 | } 364 | }, 365 | "nbformat": 4, 366 | "nbformat_minor": 2 367 | } 368 | -------------------------------------------------------------------------------- /OCR .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import matplotlib.pyplot as plt\n", 10 | "import cv2\n", 11 | "import easyocr\n", 12 | "from pylab import rcParams\n", 13 | "from IPython.display import Image\n", 14 | "rcParams['figure.figsize'] = 8, 16" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 12, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "name": "stderr", 24 | "output_type": "stream", 25 | "text": [ 26 | "CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.\n", 27 | "Downloading detection model, please wait\n" 28 | ] 29 | }, 30 | { 31 | "ename": "HTTPError", 32 | "evalue": "HTTP Error 403: Forbidden", 33 | "output_type": "error", 34 | "traceback": [ 35 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 36 | "\u001b[0;31mHTTPError\u001b[0m Traceback (most recent call last)", 37 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0measyocr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mreader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0measyocr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'en'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 38 | "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/site-packages/easyocr/easyocr.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, lang_list, gpu)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0murlretrieve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_url\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'detector'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mDETECTOR_PATH\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mcalculate_md5\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDETECTOR_PATH\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mmodel_url\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'detector'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCORRUPT_MSG\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m \u001b[0meprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Download complete'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 186\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mcalculate_md5\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDETECTOR_PATH\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mmodel_url\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'detector'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0meprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCORRUPT_MSG\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 39 | "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36murlretrieve\u001b[0;34m(url, filename, reporthook, data)\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0murl_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_splittype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 247\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mcontextlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclosing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murlopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 248\u001b[0m \u001b[0mheaders\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 40 | "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(url, data, timeout, cafile, capath, cadefault, context)\u001b[0m\n\u001b[1;32m 220\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[0mopener\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_opener\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 222\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mopener\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 223\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minstall_opener\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 41 | "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36mopen\u001b[0;34m(self, fullurl, data, timeout)\u001b[0m\n\u001b[1;32m 529\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mprocessor\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_response\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[0mmeth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprocessor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeth_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 531\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmeth\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 532\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 533\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 42 | "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36mhttp_response\u001b[0;34m(self, request, response)\u001b[0m\n\u001b[1;32m 638\u001b[0m \u001b[0;31m# request was successfully received, understood, and accepted.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 639\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m200\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0mcode\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m300\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 640\u001b[0;31m response = self.parent.error(\n\u001b[0m\u001b[1;32m 641\u001b[0m 'http', request, response, code, msg, hdrs)\n\u001b[1;32m 642\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 43 | "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36merror\u001b[0;34m(self, proto, *args)\u001b[0m\n\u001b[1;32m 567\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhttp_err\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 568\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'default'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'http_error_default'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0morig_args\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 569\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_chain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 570\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 571\u001b[0m \u001b[0;31m# XXX probably also want an abstract factory that knows when it makes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 44 | "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36m_call_chain\u001b[0;34m(self, chain, kind, meth_name, *args)\u001b[0m\n\u001b[1;32m 500\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mhandler\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mhandlers\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 501\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhandler\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeth_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 502\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 503\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 504\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 45 | "\u001b[0;32m~/anaconda3/envs/OpenCV/lib/python3.8/urllib/request.py\u001b[0m in \u001b[0;36mhttp_error_default\u001b[0;34m(self, req, fp, code, msg, hdrs)\u001b[0m\n\u001b[1;32m 647\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mHTTPDefaultErrorHandler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mBaseHandler\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 648\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mhttp_error_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhdrs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 649\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mHTTPError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhdrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 650\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 651\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mHTTPRedirectHandler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mBaseHandler\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 46 | "\u001b[0;31mHTTPError\u001b[0m: HTTP Error 403: Forbidden" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "import easyocr\n", 52 | "reader = easyocr.Reader(['en'])" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 5, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "None\n" 65 | ] 66 | } 67 | ], 68 | "source": [ 69 | "import os\n", 70 | "print(os.environ.get(\"MODULE_PATH\"))" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 8, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "['_MutableMapping__marker',\n", 82 | " '__abstractmethods__',\n", 83 | " '__class__',\n", 84 | " '__contains__',\n", 85 | " '__delattr__',\n", 86 | " '__delitem__',\n", 87 | " '__dict__',\n", 88 | " '__dir__',\n", 89 | " '__doc__',\n", 90 | " '__eq__',\n", 91 | " '__format__',\n", 92 | " '__ge__',\n", 93 | " '__getattribute__',\n", 94 | " '__getitem__',\n", 95 | " '__gt__',\n", 96 | " '__hash__',\n", 97 | " '__init__',\n", 98 | " '__init_subclass__',\n", 99 | " '__iter__',\n", 100 | " '__le__',\n", 101 | " '__len__',\n", 102 | " '__lt__',\n", 103 | " '__module__',\n", 104 | " '__ne__',\n", 105 | " '__new__',\n", 106 | " '__reduce__',\n", 107 | " '__reduce_ex__',\n", 108 | " '__repr__',\n", 109 | " '__reversed__',\n", 110 | " '__setattr__',\n", 111 | " '__setitem__',\n", 112 | " '__sizeof__',\n", 113 | " '__slots__',\n", 114 | " '__str__',\n", 115 | " '__subclasshook__',\n", 116 | " '__weakref__',\n", 117 | " '_abc_impl',\n", 118 | " '_data',\n", 119 | " 'clear',\n", 120 | " 'copy',\n", 121 | " 'decodekey',\n", 122 | " 'decodevalue',\n", 123 | " 'encodekey',\n", 124 | " 'encodevalue',\n", 125 | " 'get',\n", 126 | " 'items',\n", 127 | " 'keys',\n", 128 | " 'pop',\n", 129 | " 'popitem',\n", 130 | " 'putenv',\n", 131 | " 'setdefault',\n", 132 | " 'unsetenv',\n", 133 | " 'update',\n", 134 | " 'values']" 135 | ] 136 | }, 137 | "execution_count": 8, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": [ 143 | "dir(os.environ)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 1, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "import re" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 3, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "str1 = '5,8'" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 5, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "import pandas as pd\n", 171 | "df = pd.DataFrame(['5,8','5,9'])\n" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 6, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "data": { 181 | "text/html": [ 182 | "
\n", 183 | "\n", 196 | "\n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | "
0
05,8
15,9
\n", 214 | "
" 215 | ], 216 | "text/plain": [ 217 | " 0\n", 218 | "0 5,8\n", 219 | "1 5,9" 220 | ] 221 | }, 222 | "execution_count": 6, 223 | "metadata": {}, 224 | "output_type": "execute_result" 225 | } 226 | ], 227 | "source": [ 228 | "df.head()" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 7, 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "name": "stdout", 238 | "output_type": "stream", 239 | "text": [ 240 | " 0\n", 241 | "0 5.8\n", 242 | "1 5.9\n" 243 | ] 244 | } 245 | ], 246 | "source": [ 247 | "print(df.replace(to_replace =',', value = '.', regex = True)) " 248 | ] 249 | } 250 | ], 251 | "metadata": { 252 | "kernelspec": { 253 | "display_name": "Python 3", 254 | "language": "python", 255 | "name": "python3" 256 | }, 257 | "language_info": { 258 | "codemirror_mode": { 259 | "name": "ipython", 260 | "version": 3 261 | }, 262 | "file_extension": ".py", 263 | "mimetype": "text/x-python", 264 | "name": "python", 265 | "nbconvert_exporter": "python", 266 | "pygments_lexer": "ipython3", 267 | "version": "3.7.4" 268 | } 269 | }, 270 | "nbformat": 4, 271 | "nbformat_minor": 2 272 | } 273 | -------------------------------------------------------------------------------- /PassiveActive.py: -------------------------------------------------------------------------------- 1 | import streamlit as st1 2 | from styleformer import Styleformer 3 | import torch 4 | sf1 = Styleformer(style = 3) 5 | st1.title('Passive Voice to Active Voice Converter') 6 | st1.write("Please enter your sentence in passive voice") 7 | text1 = st1.text_input('Entered Text') 8 | if st1.button('Convert Passive to Active'): 9 | target_sentence1 = sf1.transfer(text1) 10 | st1.write(target_sentence1) 11 | else: 12 | pass 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data-Science 2 | This repo contains Data Science code snippet 3 | 4 | # YouTube channel link : 5 | https://www.youtube.com/@aslearning4818 6 | -------------------------------------------------------------------------------- /Readability.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Collecting readability\n", 13 | " Downloading https://files.pythonhosted.org/packages/26/70/6f8750066255d4d2b82b813dd2550e0bd2bee99d026d14088a7b977cd0fc/readability-0.3.1.tar.gz\n", 14 | "Building wheels for collected packages: readability\n", 15 | " Building wheel for readability (setup.py) ... \u001b[?25ldone\n", 16 | "\u001b[?25h Created wheel for readability: filename=readability-0.3.1-cp37-none-any.whl size=35464 sha256=b432dd772e5a327736e14ea876f8fe36914ef541eba12aed8baf35cb92581908\n", 17 | " Stored in directory: /home/akshay/.cache/pip/wheels/36/3f/65/bc327f4cdd5bff9ff510834e07522f94389e28858311b33b41\n", 18 | "Successfully built readability\n", 19 | "Installing collected packages: readability\n", 20 | "Successfully installed readability-0.3.1\n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "! pip install readability" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import readability" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 14, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "text = ('Ram is eating mango')" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 15, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "results = readability.getmeasures(text, lang='en')" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 16, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "75.87500000000001\n" 65 | ] 66 | } 67 | ], 68 | "source": [ 69 | "print(results['readability grades']['FleschReadingEase'])" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 17, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/plain": [ 80 | "OrderedDict([('readability grades',\n", 81 | " OrderedDict([('Kincaid', 3.6700000000000017),\n", 82 | " ('ARI', -0.5899999999999999),\n", 83 | " ('Coleman-Liau', 0.32178000000000395),\n", 84 | " ('FleschReadingEase', 75.87500000000001),\n", 85 | " ('GunningFogIndex', 1.6),\n", 86 | " ('LIX', 4.0),\n", 87 | " ('SMOGIndex', 3.0),\n", 88 | " ('RIX', 0.0),\n", 89 | " ('DaleChallIndex', 11.7299)])),\n", 90 | " ('sentence info',\n", 91 | " OrderedDict([('characters_per_word', 4.0),\n", 92 | " ('syll_per_word', 1.5),\n", 93 | " ('words_per_sentence', 4.0),\n", 94 | " ('sentences_per_paragraph', 1.0),\n", 95 | " ('type_token_ratio', 1.0),\n", 96 | " ('characters', 16),\n", 97 | " ('syllables', 6),\n", 98 | " ('words', 4),\n", 99 | " ('wordtypes', 4),\n", 100 | " ('sentences', 1),\n", 101 | " ('paragraphs', 1),\n", 102 | " ('long_words', 0),\n", 103 | " ('complex_words', 0),\n", 104 | " ('complex_words_dc', 2)])),\n", 105 | " ('word usage',\n", 106 | " OrderedDict([('tobeverb', 1),\n", 107 | " ('auxverb', 0),\n", 108 | " ('conjunction', 0),\n", 109 | " ('pronoun', 0),\n", 110 | " ('preposition', 0),\n", 111 | " ('nominalization', 0)])),\n", 112 | " ('sentence beginnings',\n", 113 | " OrderedDict([('pronoun', 0),\n", 114 | " ('interrogative', 0),\n", 115 | " ('article', 0),\n", 116 | " ('subordination', 0),\n", 117 | " ('conjunction', 0),\n", 118 | " ('preposition', 0)]))])" 119 | ] 120 | }, 121 | "execution_count": 17, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "results" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 18, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "name": "stdout", 137 | "output_type": "stream", 138 | "text": [ 139 | "-47.979999999999976\n" 140 | ] 141 | } 142 | ], 143 | "source": [ 144 | "text2 = ('Asasadas')\n", 145 | "results = readability.getmeasures(text2, lang='en')\n", 146 | "print(results['readability grades']['FleschReadingEase'])" 147 | ] 148 | } 149 | ], 150 | "metadata": { 151 | "kernelspec": { 152 | "display_name": "Python 3", 153 | "language": "python", 154 | "name": "python3" 155 | }, 156 | "language_info": { 157 | "codemirror_mode": { 158 | "name": "ipython", 159 | "version": 3 160 | }, 161 | "file_extension": ".py", 162 | "mimetype": "text/x-python", 163 | "name": "python", 164 | "nbconvert_exporter": "python", 165 | "pygments_lexer": "ipython3", 166 | "version": "3.7.4" 167 | } 168 | }, 169 | "nbformat": 4, 170 | "nbformat_minor": 2 171 | } 172 | -------------------------------------------------------------------------------- /Sentiment Analysis using VADER.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# SENTIMENT ANALYSIS DEFINITION" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "In sentiment analysis we classify the polarity of given text at document ,sentence or feature level.It tells us but the opinion of it whether is positive , negative or neutral. If we go more advance like beyond polarity we can go for emotional states like angry , sad and happy." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# INSTALLING VADER" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "name": "stdout", 31 | "output_type": "stream", 32 | "text": [ 33 | "Collecting vaderSentiment\n", 34 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/76/fc/310e16254683c1ed35eeb97386986d6c00bc29df17ce280aed64d55537e9/vaderSentiment-3.3.2-py2.py3-none-any.whl (125kB)\n", 35 | "\u001b[K |████████████████████████████████| 133kB 776kB/s eta 0:00:01\n", 36 | "\u001b[?25hRequirement already satisfied: requests in /home/akshay/anaconda3/lib/python3.7/site-packages (from vaderSentiment) (2.22.0)\n", 37 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/akshay/anaconda3/lib/python3.7/site-packages (from requests->vaderSentiment) (1.24.2)\n", 38 | "Requirement already satisfied: idna<2.9,>=2.5 in /home/akshay/anaconda3/lib/python3.7/site-packages (from requests->vaderSentiment) (2.8)\n", 39 | "Requirement already satisfied: certifi>=2017.4.17 in /home/akshay/anaconda3/lib/python3.7/site-packages (from requests->vaderSentiment) (2019.9.11)\n", 40 | "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /home/akshay/anaconda3/lib/python3.7/site-packages (from requests->vaderSentiment) (3.0.4)\n", 41 | "Installing collected packages: vaderSentiment\n", 42 | "Successfully installed vaderSentiment-3.3.2\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "! pip install vaderSentiment" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "# What is VADER ?" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "VADER stands for Valence Aware Dictionary and sEntiment Reasoner. It is a rule-based sentiment analyzer.It consists of a list of lexical features (e.g. words) which are generally labeled as per their semantic orientation as positive or negative.\n", 62 | "\n" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 3, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 7, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "obj = SentimentIntensityAnalyzer()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 32, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "{'neg': 0.0, 'neu': 0.484, 'pos': 0.516, 'compound': 0.4927}\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "sentence = \"Ram is really good \"\n", 98 | "sentiment_dict = obj.polarity_scores(sentence)\n", 99 | "print(sentiment_dict)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 33, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "name": "stdout", 109 | "output_type": "stream", 110 | "text": [ 111 | "{'neg': 0.558, 'neu': 0.442, 'pos': 0.0, 'compound': -0.5849}\n" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "sentence = \"Rahul is really bad\"\n", 117 | "sentiment_dict = obj.polarity_scores(sentence)\n", 118 | "print(sentiment_dict)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "# Things which manipulate the Sentiment score" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "# 1) PUNCTUATIONS : It increases the intensity" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 34, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "name": "stdout", 142 | "output_type": "stream", 143 | "text": [ 144 | "{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}\n", 145 | "{'neg': 0.0, 'neu': 0.484, 'pos': 0.516, 'compound': 0.4926}\n", 146 | "{'neg': 0.0, 'neu': 0.463, 'pos': 0.537, 'compound': 0.5399}\n" 147 | ] 148 | } 149 | ], 150 | "source": [ 151 | "print(obj.polarity_scores('Ram is good boy'))\n", 152 | "print(obj.polarity_scores('Ram is good boy!'))\n", 153 | "print(obj.polarity_scores('Ram is good boy!!'))" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "# 2) CAPITALIZATION : It increases the intensity" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 35, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "name": "stdout", 170 | "output_type": "stream", 171 | "text": [ 172 | "{'neg': 0.0, 'neu': 0.408, 'pos': 0.592, 'compound': 0.4404}\n", 173 | "{'neg': 0.0, 'neu': 0.355, 'pos': 0.645, 'compound': 0.5622}\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "print(obj.polarity_scores('Ram is good'))\n", 179 | "print(obj.polarity_scores('Ram is GOOD'))\n" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "# 3) DEGREE MODIFIERS" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 28, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "name": "stdout", 196 | "output_type": "stream", 197 | "text": [ 198 | "{'neg': 0.0, 'neu': 0.408, 'pos': 0.592, 'compound': 0.4404}\n", 199 | "{'neg': 0.0, 'neu': 0.484, 'pos': 0.516, 'compound': 0.4927}\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "print(obj.polarity_scores('Ram is good'))\n", 205 | "print(obj.polarity_scores('Ram is very good'))" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "# 4) CONJUNCTIONS" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 29, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "{'neg': 0.0, 'neu': 0.408, 'pos': 0.592, 'compound': 0.4404}\n", 225 | "{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}\n" 226 | ] 227 | } 228 | ], 229 | "source": [ 230 | "print(obj.polarity_scores('Ram is good'))\n", 231 | "print(obj.polarity_scores('Ram is good,but he is also naughty sometimes'))" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "# DOES VADER HANDLES SLANGS AND EMOTICONS ?" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 24, 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "name": "stdout", 248 | "output_type": "stream", 249 | "text": [ 250 | "{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}\n", 251 | "{'neg': 0.618, 'neu': 0.382, 'pos': 0.0, 'compound': -0.4995}\n" 252 | ] 253 | } 254 | ], 255 | "source": [ 256 | "print(obj.polarity_scores(\"That Hotel\"))\n", 257 | "print(obj.polarity_scores(\"That Hotel SUX\"))" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 25, 263 | "metadata": {}, 264 | "outputs": [ 265 | { 266 | "name": "stdout", 267 | "output_type": "stream", 268 | "text": [ 269 | "{'neg': 0.0, 'neu': 0.556, 'pos': 0.444, 'compound': 0.8016}\n", 270 | "{'neg': 0.0, 'neu': 0.574, 'pos': 0.426, 'compound': 0.7713}\n" 271 | ] 272 | } 273 | ], 274 | "source": [ 275 | "print(obj.polarity_scores(\"Your :) is the most beautiful thing I have ever seen\"))\n", 276 | "print(obj.polarity_scores(\"Your smile is the most beautiful thing I have ever seen\"))" 277 | ] 278 | } 279 | ], 280 | "metadata": { 281 | "kernelspec": { 282 | "display_name": "Python 3", 283 | "language": "python", 284 | "name": "python3" 285 | }, 286 | "language_info": { 287 | "codemirror_mode": { 288 | "name": "ipython", 289 | "version": 3 290 | }, 291 | "file_extension": ".py", 292 | "mimetype": "text/x-python", 293 | "name": "python", 294 | "nbconvert_exporter": "python", 295 | "pygments_lexer": "ipython3", 296 | "version": "3.7.4" 297 | } 298 | }, 299 | "nbformat": 4, 300 | "nbformat_minor": 2 301 | } 302 | -------------------------------------------------------------------------------- /Speechtotext.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Speechtotext.ipynb", 7 | "provenance": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | } 13 | }, 14 | "cells": [ 15 | { 16 | "cell_type": "code", 17 | "metadata": { 18 | "id": "vU_eMPLMbV2b", 19 | "outputId": "19885023-778c-422f-986b-11dbc76c2e2d", 20 | "colab": { 21 | "base_uri": "https://localhost:8080/" 22 | } 23 | }, 24 | "source": [ 25 | "! pip3 install SpeechRecognition pydub" 26 | ], 27 | "execution_count": null, 28 | "outputs": [ 29 | { 30 | "output_type": "stream", 31 | "text": [ 32 | "Requirement already satisfied: SpeechRecognition in /usr/local/lib/python3.6/dist-packages (3.8.1)\n", 33 | "Requirement already satisfied: pydub in /usr/local/lib/python3.6/dist-packages (0.24.1)\n" 34 | ], 35 | "name": "stdout" 36 | } 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "id": "ibpYddfDd7Us" 43 | }, 44 | "source": [ 45 | "1) CMU Sphinx (offline)\n", 46 | "\n", 47 | "2) Google Speech Recognition\n", 48 | "\n", 49 | "3) Google Cloud Speech API\n", 50 | "\n", 51 | "4) Wit.ai\n", 52 | "\n", 53 | "5) Microsoft Bing Voice Recognition\n", 54 | "\n", 55 | "6) Houndify API\n", 56 | "\n", 57 | "7) IBM Speech To Text\n", 58 | "\n", 59 | "8) Snowboy Hotword Detection (offline)\n" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "metadata": { 65 | "id": "GuhAgOzrcKG1" 66 | }, 67 | "source": [ 68 | "import speech_recognition as sr" 69 | ], 70 | "execution_count": null, 71 | "outputs": [] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "metadata": { 76 | "id": "ArV_Nw3OcNe4" 77 | }, 78 | "source": [ 79 | "r = sr.Recognizer()" 80 | ], 81 | "execution_count": null, 82 | "outputs": [] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "metadata": { 87 | "id": "03wcn-_De8mD" 88 | }, 89 | "source": [ 90 | "filename = \"bcd.wav\"" 91 | ], 92 | "execution_count": null, 93 | "outputs": [] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "metadata": { 98 | "id": "EkEY34uucfiF", 99 | "outputId": "ca9e64e0-6275-4e48-b722-40ee661b2470", 100 | "colab": { 101 | "base_uri": "https://localhost:8080/" 102 | } 103 | }, 104 | "source": [ 105 | "\n", 106 | "with sr.AudioFile(filename) as source:\n", 107 | " audio_data = r.record(source)\n", 108 | " text = r.recognize_google(audio_data)\n", 109 | " print(text)" 110 | ], 111 | "execution_count": null, 112 | "outputs": [ 113 | { 114 | "output_type": "stream", 115 | "text": [ 116 | "kids are talkin by the door\n" 117 | ], 118 | "name": "stdout" 119 | } 120 | ] 121 | } 122 | ] 123 | } -------------------------------------------------------------------------------- /Whisper.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaytheau/Data-Science/e5a1a3510f9afa9e04c9809f12e513cb6eec9188/Whisper.pptx -------------------------------------------------------------------------------- /YouTube_recommendation_pinecone.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "id": "whr9pySjG8yd", 8 | "collapsed": true 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "! pip install pandas\n", 13 | "! pip install pytube\n", 14 | "! pip install numpy\n", 15 | "! pip install pinecone-client\n", 16 | "! pip install git+https://github.com/openai/whisper.git" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": { 23 | "id": "tfXF3-S-IU_L" 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "# Import the modules\n", 28 | "import os\n", 29 | "import torch\n", 30 | "import whisper\n", 31 | "import pinecone\n", 32 | "import numpy as np\n", 33 | "import pandas as pd\n", 34 | "from pytube import YouTube" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "id": "nRJaOc6-IaOV" 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "def video_to_audio(video_url, destination):\n", 46 | "\n", 47 | " # Get the video\n", 48 | " video = YouTube(video_url)\n", 49 | "\n", 50 | " # Convert video to Audio\n", 51 | " audio = video.streams.filter(only_audio=True).first()\n", 52 | "\n", 53 | " # Save to destination\n", 54 | " output = audio.download(output_path = destination)\n", 55 | "\n", 56 | " name, ext = os.path.splitext(output)\n", 57 | " new_file = name + '.mp3'\n", 58 | "\n", 59 | " # Replace spaces with \"_\"\n", 60 | " new_file = new_file.replace(\" \", \"_\")\n", 61 | "\n", 62 | " # Change the name of the file\n", 63 | " os.rename(output, new_file)\n", 64 | "\n", 65 | " return new_file\n" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "id": "4n13nfbQIfZq" 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "%%bash\n", 77 | "mkdir \"audio_data\"" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "id": "O-hoBEi8InCL" 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "# Create URL column\n", 89 | "audio_path = \"audio_data\"\n", 90 | "\n", 91 | "# Have just provided a sample of links for experimentation purpose\n", 92 | "list_videos = [\"https://www.youtube.com/watch?v=IdTMDpizis8\",\n", 93 | " \"https://www.youtube.com/watch?v=fLeJJPxua3E\",\n", 94 | " \"https://www.youtube.com/watch?v=z3FA2kALScU\"]\n", 95 | "# Create dataframe\n", 96 | "transcription_df = pd.DataFrame(list_videos, columns=['URLs'])" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "id": "yeuLSMbkIoND" 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "\n", 108 | "transcription_df.head()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "id": "jbGxPPwfIrFQ" 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "# Create the files_name\n", 120 | "transcription_df[\"file_name\"] = transcription_df[\"URLs\"].apply(lambda url: video_to_audio(url, audio_path))\n", 121 | "transcription_df.head()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "id": "uuFeg71oIt4e" 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "# Set the device\n", 133 | "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", 134 | "\n", 135 | "# Load the model\n", 136 | "whisper_model = whisper.load_model(\"base\", device=device)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "id": "490_i7XAOIf6" 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "def audio_to_text(audio_file):\n", 148 | "\n", 149 | " return whisper_model.transcribe(audio_file)[\"text\"]" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "id": "DelG9fXBOLLf" 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "# Apply the function to all the audio files\n", 161 | "transcription_df[\"transcriptions\"] = transcription_df[\"file_name\"].apply(lambda f_name: audio_to_text(f_name))\n", 162 | "\n", 163 | "\n", 164 | "# Show the first five rows\n", 165 | "transcription_df.head()" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": { 172 | "id": "zQ0ixNobOUTS" 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "transcription_df.head()" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "source": [ 182 | "model_id = \"sentence-transformers/all-MiniLM-L6-v2\"" 183 | ], 184 | "metadata": { 185 | "id": "DSfRz_vU0c3j" 186 | }, 187 | "execution_count": null, 188 | "outputs": [] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "source": [ 193 | "from getpass import getpass\n", 194 | "os.environ[\"HUGGING_FACE_TOKEN\"] = getpass('Enter Hugging Face token: ')\n", 195 | "hf_token = os.getenv('HUGGING_FACE_TOKEN')" 196 | ], 197 | "metadata": { 198 | "id": "DrhHaFbzfl2J" 199 | }, 200 | "execution_count": null, 201 | "outputs": [] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "source": [ 206 | "import requests\n", 207 | "\n", 208 | "api_url = f\"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}\"\n", 209 | "headers = {\"Authorization\": f\"Bearer {hf_token}\"}" 210 | ], 211 | "metadata": { 212 | "id": "BH9hR2JV0pKc" 213 | }, 214 | "execution_count": null, 215 | "outputs": [] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "source": [ 220 | "def query(texts):\n", 221 | " response = requests.post(api_url, headers=headers, json={\"inputs\": texts, \"options\":{\"wait_for_model\":True}})\n", 222 | " return response.json()" 223 | ], 224 | "metadata": { 225 | "id": "sC9otQxe0u_0" 226 | }, 227 | "execution_count": null, 228 | "outputs": [] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "source": [ 233 | "transcription_df[\"embedding\"] = transcription_df[\"transcriptions\"].astype(str).apply(query)" 234 | ], 235 | "metadata": { 236 | "id": "bM-kZru01LuN" 237 | }, 238 | "execution_count": null, 239 | "outputs": [] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": { 245 | "id": "XXxw58woOggL" 246 | }, 247 | "outputs": [], 248 | "source": [ 249 | "transcription_df.head()" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": { 256 | "id": "YZQveIHvOi2d" 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "vector_dim = transcription_df.iloc[2].embedding\n", 261 | "len(vector_dim)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "source": [ 267 | "import os\n", 268 | "\n", 269 | "\n", 270 | "os.environ[\"PINECONE_API_KEY\"] = getpass('Enter your Pinecone API Key: ')" 271 | ], 272 | "metadata": { 273 | "id": "qKfNJ0AB3DKG" 274 | }, 275 | "execution_count": null, 276 | "outputs": [] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "source": [ 281 | "os.environ[\"PINECONE_ENVIRONMENT\"] = getpass('Enter your Pinecone Environment: ')" 282 | ], 283 | "metadata": { 284 | "id": "1-v9xbae3Hpm" 285 | }, 286 | "execution_count": null, 287 | "outputs": [] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "source": [ 292 | "# find API key in console at app.pinecone.io\n", 293 | "api_key = os.getenv('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n", 294 | "# find ENV (cloud region) next to API key in console\n", 295 | "env = os.getenv('PINECONE_ENVIRONMENT') or 'PINECONE_ENVIRONMENT'\n", 296 | "\n", 297 | "# Initialize connection to pinecone\n", 298 | "pinecone.init(\n", 299 | " api_key=api_key,\n", 300 | " environment=env\n", 301 | ")" 302 | ], 303 | "metadata": { 304 | "id": "8iseMCvslEgA" 305 | }, 306 | "execution_count": null, 307 | "outputs": [] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": { 313 | "id": "iJFDt2ayOlKa" 314 | }, 315 | "outputs": [], 316 | "source": [ 317 | "\n", 318 | "\n", 319 | "# Index params\n", 320 | "my_index_name = \"audio-search\"\n", 321 | "vector_dim = len(transcription_df.iloc[0].embedding)\n", 322 | "\n", 323 | "if my_index_name not in pinecone.list_indexes():\n", 324 | " # Create the index\n", 325 | " pinecone.create_index(name = my_index_name,\n", 326 | " dimension=vector_dim,\n", 327 | " metric=\"cosine\", shards=1,\n", 328 | " pod_type='s1.x1')\n", 329 | "# Connect to the index\n", 330 | "my_index = pinecone.Index(index_name = my_index_name)" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": { 337 | "id": "ytD9KY83OrEI" 338 | }, 339 | "outputs": [], 340 | "source": [ 341 | "transcription_df[\"vector_id\"] = transcription_df.index\n", 342 | "transcription_df[\"vector_id\"] = transcription_df[\"vector_id\"].apply(str)\n", 343 | "\n", 344 | "# Get all the metadata\n", 345 | "final_metadata = []\n", 346 | "\n", 347 | "for index in range(len(transcription_df)):\n", 348 | " final_metadata.append({\n", 349 | " 'ID': index,\n", 350 | " 'url': transcription_df.iloc[index].URLs,\n", 351 | " 'transcription': transcription_df.iloc[index].transcriptions\n", 352 | " })\n", 353 | "\n", 354 | "audio_IDs = transcription_df.vector_id.tolist()\n", 355 | "audio_embeddings = [arr for arr in transcription_df.embedding]\n", 356 | "\n", 357 | "# Create the single list of dictionary format to insert\n", 358 | "data_to_upsert = list(zip(audio_IDs, audio_embeddings, final_metadata))\n", 359 | "\n", 360 | "# Upload the final data\n", 361 | "my_index.upsert(vectors = data_to_upsert)\n", 362 | "\n", 363 | "# Show information about the vector index\n", 364 | "my_index.describe_index_stats()" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": { 371 | "id": "dTAJpC5JOte6" 372 | }, 373 | "outputs": [], 374 | "source": [ 375 | "N = 2\n", 376 | "my_query_embedding = transcription_df.embedding[0]\n", 377 | "\n", 378 | "# Run the Query Search\n", 379 | "my_index.query(my_query_embedding, top_k=N, include_metadata=True)" 380 | ] 381 | } 382 | ], 383 | "metadata": { 384 | "accelerator": "GPU", 385 | "colab": { 386 | "provenance": [] 387 | }, 388 | "kernelspec": { 389 | "display_name": "Python 3", 390 | "name": "python3" 391 | }, 392 | "language_info": { 393 | "name": "python" 394 | } 395 | }, 396 | "nbformat": 4, 397 | "nbformat_minor": 0 398 | } -------------------------------------------------------------------------------- /YoutubeComments.csv: -------------------------------------------------------------------------------- 1 | Comments 2 | Super explanations so good 3 | Good 4 | Thankyou!! 5 | Excellent explanation... 6 | Nice 7 | Wowwww 8 | It was very clear and useful ..... 9 | Very helpful and easy to understand 10 | Super video bhai 11 | Thank you so much bro. Excellent explanation. Absolutely superb. keep it up 12 | ek number explanation 13 | I like your teaching skill and use 3 paper for each concept if any help for YouTube views then send me mail 14 | Thanking you sir 15 | Love you thinks bor 16 | you saved my semester thnx :D 17 | good job bro..... 18 | Ek number explanation 19 | Wrong 20 | "Very raw , need improvement" 21 | The best ever seen 22 | This just too good 23 | Wow you are the best 24 | Simple and elegant explanation 25 | Grt explanation 26 | nice 27 | good job bro..... 28 | Very good 29 | Too grt 30 | Nice video 31 | Keept it up 32 | -------------------------------------------------------------------------------- /face-mask-detector-project.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaytheau/Data-Science/e5a1a3510f9afa9e04c9809f12e513cb6eec9188/face-mask-detector-project.zip -------------------------------------------------------------------------------- /medspacydemo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7c58cc64", 6 | "metadata": {}, 7 | "source": [ 8 | "Clinical NLP is a specialization of NLP that allows computers to understand the rich meaning that lies behind a doctor’s written analysis of a patient.\n", 9 | "\n", 10 | "Normal NLP engines use large corpora of text, usually books or other written documents, to determine how language is structured and how grammar is formed." 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "ca8b9bde", 16 | "metadata": {}, 17 | "source": [ 18 | "Entity extraction: to surface relevant clinical concepts from unstructured data.\n", 19 | "\n", 20 | "Contextualization: to decipher the doctor’s meaning when they mention a concept. For example, when doctors deny a patient has a condition or talk about a patient’s history.\n", 21 | "\n", 22 | "Knowledge graph: to understand how clinical concepts are interrelated, like the fact that both fentanyl and hydrocodone are opiates." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "id": "2433023c", 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "name": "stdout", 33 | "output_type": "stream", 34 | "text": [ 35 | "Collecting medspacy\n", 36 | " Using cached medspacy-1.0.0-py3-none-any.whl (146 kB)\n", 37 | "Requirement already satisfied: jsonschema in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from medspacy) (4.16.0)\n", 38 | "Collecting PyRuSH>=1.0.6\n", 39 | " Using cached PyRuSH-1.0.8-cp39-cp39-macosx_10_9_x86_64.whl (63 kB)\n", 40 | "Collecting pysbd==0.3.4\n", 41 | " Using cached pysbd-0.3.4-py3-none-any.whl (71 kB)\n", 42 | "Collecting medspacy-quickumls==2.6\n", 43 | " Using cached medspacy_quickumls-2.6-py3-none-any.whl (31 kB)\n", 44 | "Collecting spacy>=3.4.1\n", 45 | " Using cached spacy-3.4.2-cp39-cp39-macosx_10_9_x86_64.whl (6.7 MB)\n", 46 | "Requirement already satisfied: numpy>=1.8.2 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from medspacy-quickumls==2.6->medspacy) (1.21.5)\n", 47 | "Collecting leveldb>=0.193\n", 48 | " Using cached leveldb-0.201.tar.gz (236 kB)\n", 49 | " Preparing metadata (setup.py) ... \u001b[?25ldone\n", 50 | "\u001b[?25hRequirement already satisfied: pytest>=6 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from medspacy-quickumls==2.6->medspacy) (7.1.2)\n", 51 | "Requirement already satisfied: nltk>=3.3 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from medspacy-quickumls==2.6->medspacy) (3.7)\n", 52 | "Collecting unqlite>=0.8.1\n", 53 | " Using cached unqlite-0.9.3.tar.gz (575 kB)\n", 54 | " Preparing metadata (setup.py) ... \u001b[?25ldone\n", 55 | "\u001b[?25hRequirement already satisfied: unidecode>=0.4.19 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from medspacy-quickumls==2.6->medspacy) (1.2.0)\n", 56 | "Requirement already satisfied: six in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from medspacy-quickumls==2.6->medspacy) (1.16.0)\n", 57 | "Collecting medspacy-simstring>=2.1\n", 58 | " Using cached medspacy_simstring-2.1.tar.gz (68 kB)\n", 59 | " Preparing metadata (setup.py) ... \u001b[?25ldone\n", 60 | "\u001b[?25hRequirement already satisfied: Cython<3.0,>=0.25 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from PyRuSH>=1.0.6->medspacy) (0.29.32)\n", 61 | "Collecting PyFastNER>=1.0.8\n", 62 | " Using cached PyFastNER-1.0.8-py3-none-any.whl (18 kB)\n", 63 | "Requirement already satisfied: setuptools in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from PyRuSH>=1.0.6->medspacy) (63.4.1)\n", 64 | "Collecting quicksectx>=0.3.5\n", 65 | " Using cached quicksectx-0.3.5-cp39-cp39-macosx_10_9_x86_64.whl (283 kB)\n", 66 | "Collecting thinc<8.2.0,>=8.1.0\n", 67 | " Using cached thinc-8.1.5-cp39-cp39-macosx_10_9_x86_64.whl (768 kB)\n", 68 | "Collecting cymem<2.1.0,>=2.0.2\n", 69 | " Using cached cymem-2.0.7-cp39-cp39-macosx_10_9_x86_64.whl (32 kB)\n", 70 | "Collecting typer<0.5.0,>=0.3.0\n", 71 | " Using cached typer-0.4.2-py3-none-any.whl (27 kB)\n", 72 | "Collecting spacy-legacy<3.1.0,>=3.0.10\n", 73 | " Using cached spacy_legacy-3.0.10-py2.py3-none-any.whl (21 kB)\n", 74 | "Collecting langcodes<4.0.0,>=3.2.0\n", 75 | " Using cached langcodes-3.3.0-py3-none-any.whl (181 kB)\n", 76 | "Collecting catalogue<2.1.0,>=2.0.6\n", 77 | " Using cached catalogue-2.0.8-py3-none-any.whl (17 kB)\n", 78 | "Requirement already satisfied: wasabi<1.1.0,>=0.9.1 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from spacy>=3.4.1->medspacy) (0.10.1)\n", 79 | "Requirement already satisfied: jinja2 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from spacy>=3.4.1->medspacy) (2.11.3)\n", 80 | "Collecting srsly<3.0.0,>=2.4.3\n", 81 | " Using cached srsly-2.4.5-cp39-cp39-macosx_10_9_x86_64.whl (491 kB)\n", 82 | "Collecting pathy>=0.3.5\n", 83 | " Using cached pathy-0.6.2-py3-none-any.whl (42 kB)\n", 84 | "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from spacy>=3.4.1->medspacy) (4.64.1)\n", 85 | "Collecting spacy-loggers<2.0.0,>=1.0.0\n", 86 | " Using cached spacy_loggers-1.0.3-py3-none-any.whl (9.3 kB)\n", 87 | "Collecting pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4\n", 88 | " Using cached pydantic-1.10.2-cp39-cp39-macosx_10_9_x86_64.whl (3.2 MB)\n", 89 | "Collecting murmurhash<1.1.0,>=0.28.0\n", 90 | " Using cached murmurhash-1.0.9-cp39-cp39-macosx_10_9_x86_64.whl (18 kB)\n", 91 | "Collecting preshed<3.1.0,>=3.0.2\n", 92 | " Using cached preshed-3.0.8-cp39-cp39-macosx_10_9_x86_64.whl (107 kB)\n", 93 | "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from spacy>=3.4.1->medspacy) (2.28.1)\n", 94 | "Requirement already satisfied: packaging>=20.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from spacy>=3.4.1->medspacy) (21.3)\n", 95 | "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from jsonschema->medspacy) (0.18.0)\n", 96 | "Requirement already satisfied: attrs>=17.4.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from jsonschema->medspacy) (21.4.0)\n", 97 | "Requirement already satisfied: regex>=2021.8.3 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from nltk>=3.3->medspacy-quickumls==2.6->medspacy) (2022.7.9)\n", 98 | "Requirement already satisfied: joblib in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from nltk>=3.3->medspacy-quickumls==2.6->medspacy) (1.1.0)\n", 99 | "Requirement already satisfied: click in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from nltk>=3.3->medspacy-quickumls==2.6->medspacy) (8.0.4)\n", 100 | "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from packaging>=20.0->spacy>=3.4.1->medspacy) (3.0.9)\n", 101 | "Requirement already satisfied: smart-open<6.0.0,>=5.2.1 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from pathy>=0.3.5->spacy>=3.4.1->medspacy) (5.2.1)\n", 102 | "Requirement already satisfied: typing-extensions>=4.1.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4->spacy>=3.4.1->medspacy) (4.3.0)\n", 103 | "Requirement already satisfied: iniconfig in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from pytest>=6->medspacy-quickumls==2.6->medspacy) (1.1.1)\n", 104 | "Requirement already satisfied: pluggy<2.0,>=0.12 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from pytest>=6->medspacy-quickumls==2.6->medspacy) (1.0.0)\n", 105 | "Requirement already satisfied: py>=1.8.2 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from pytest>=6->medspacy-quickumls==2.6->medspacy) (1.11.0)\n", 106 | "Requirement already satisfied: tomli>=1.0.0 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from pytest>=6->medspacy-quickumls==2.6->medspacy) (2.0.1)\n", 107 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy>=3.4.1->medspacy) (1.26.11)\n", 108 | "Requirement already satisfied: idna<4,>=2.5 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy>=3.4.1->medspacy) (3.3)\n", 109 | "Requirement already satisfied: charset-normalizer<3,>=2 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy>=3.4.1->medspacy) (2.0.4)\n", 110 | "Requirement already satisfied: certifi>=2017.4.17 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy>=3.4.1->medspacy) (2022.9.24)\n", 111 | "Collecting confection<1.0.0,>=0.0.1\n", 112 | " Using cached confection-0.0.3-py3-none-any.whl (32 kB)\n", 113 | "Collecting blis<0.8.0,>=0.7.8\n", 114 | " Using cached blis-0.7.9-cp39-cp39-macosx_10_9_x86_64.whl (6.1 MB)\n", 115 | "Requirement already satisfied: MarkupSafe>=0.23 in /Users/ark/opt/anaconda3/lib/python3.9/site-packages (from jinja2->spacy>=3.4.1->medspacy) (2.0.1)\n", 116 | "Building wheels for collected packages: leveldb, medspacy-simstring, unqlite\n", 117 | " Building wheel for leveldb (setup.py) ... \u001b[?25ldone\n", 118 | "\u001b[?25h Created wheel for leveldb: filename=leveldb-0.201-cp39-cp39-macosx_10_9_x86_64.whl size=159743 sha256=4ceb82328451b65c3a130b5ff48412bffa38a6a02949d6190066279643d4f0bd\n", 119 | " Stored in directory: /Users/ark/Library/Caches/pip/wheels/c7/5d/9f/043268a081577de68513e3a0c1919162406abd711f58962e24\n", 120 | " Building wheel for medspacy-simstring (setup.py) ... \u001b[?25ldone\n", 121 | "\u001b[?25h Created wheel for medspacy-simstring: filename=medspacy_simstring-2.1-cp39-cp39-macosx_10_9_x86_64.whl size=100820 sha256=de56fe7f8c0a1f0b2d189e3b64eacbb7449d609a85e19d5031a327b00a57ad5e\n", 122 | " Stored in directory: /Users/ark/Library/Caches/pip/wheels/99/f4/d1/233bc6c0bd14c4102a06bf21fc03a116e9d830db4f6595d109\n", 123 | " Building wheel for unqlite (setup.py) ... \u001b[?25ldone\n", 124 | "\u001b[?25h Created wheel for unqlite: filename=unqlite-0.9.3-cp39-cp39-macosx_10_9_x86_64.whl size=323498 sha256=054aff9da5bf5825522d7e518beff4d659c11fb3ea7166b487828d7bf1b1748f\n", 125 | " Stored in directory: /Users/ark/Library/Caches/pip/wheels/cd/9c/52/fb16aba35ce11954e9742a1f1b7dfa5e878dd94c198d3cf254\n", 126 | "Successfully built leveldb medspacy-simstring unqlite\n" 127 | ] 128 | }, 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "Installing collected packages: medspacy-simstring, leveldb, cymem, unqlite, typer, spacy-loggers, spacy-legacy, quicksectx, pysbd, pydantic, murmurhash, langcodes, catalogue, blis, srsly, PyFastNER, preshed, pathy, confection, thinc, spacy, PyRuSH, medspacy-quickumls, medspacy\n", 134 | "Successfully installed PyFastNER-1.0.8 PyRuSH-1.0.8 blis-0.7.9 catalogue-2.0.8 confection-0.0.3 cymem-2.0.7 langcodes-3.3.0 leveldb-0.201 medspacy-1.0.0 medspacy-quickumls-2.6 medspacy-simstring-2.1 murmurhash-1.0.9 pathy-0.6.2 preshed-3.0.8 pydantic-1.10.2 pysbd-0.3.4 quicksectx-0.3.5 spacy-3.4.2 spacy-legacy-3.0.10 spacy-loggers-1.0.3 srsly-2.4.5 thinc-8.1.5 typer-0.4.2 unqlite-0.9.3\n" 135 | ] 136 | } 137 | ], 138 | "source": [ 139 | "! pip install medspacy" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 3, 145 | "id": "060c8922", 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "['medspacy_pyrush', 'medspacy_target_matcher', 'medspacy_context']\n" 153 | ] 154 | }, 155 | { 156 | "data": { 157 | "text/html": [ 158 | "

Past Medical History:
1. \n", 159 | "\n", 160 | " Atrial fibrillation\n", 161 | " PROBLEM\n", 162 | "\n", 163 | "
2. \n", 164 | "\n", 165 | " Type II Diabetes Mellitus\n", 166 | " PROBLEM\n", 167 | "\n", 168 | "

Assessment and Plan:
There is \n", 169 | "\n", 170 | " no evidence of\n", 171 | " NEGATED_EXISTENCE\n", 172 | "\n", 173 | " \n", 174 | "\n", 175 | " pneumonia\n", 176 | " PROBLEM\n", 177 | "\n", 178 | ". Continue \n", 179 | "\n", 180 | " warfarin\n", 181 | " MEDICATION\n", 182 | "\n", 183 | " for \n", 184 | "\n", 185 | " Afib\n", 186 | " PROBLEM\n", 187 | "\n", 188 | ". Follow up for management of \n", 189 | "\n", 190 | " type 2 DM\n", 191 | " PROBLEM\n", 192 | "\n", 193 | ".
" 194 | ], 195 | "text/plain": [ 196 | "" 197 | ] 198 | }, 199 | "metadata": {}, 200 | "output_type": "display_data" 201 | } 202 | ], 203 | "source": [ 204 | "import medspacy\n", 205 | "from medspacy.ner import TargetRule\n", 206 | "from medspacy.visualization import visualize_ent\n", 207 | "\n", 208 | "nlp = medspacy.load()\n", 209 | "print(nlp.pipe_names)\n", 210 | "\n", 211 | "text = \"\"\"\n", 212 | "Past Medical History:\n", 213 | "1. Atrial fibrillation\n", 214 | "2. Type II Diabetes Mellitus\n", 215 | "\n", 216 | "Assessment and Plan:\n", 217 | "There is no evidence of pneumonia. Continue warfarin for Afib. Follow up for management of type 2 DM.\n", 218 | "\"\"\"\n", 219 | "\n", 220 | "target_matcher = nlp.get_pipe(\"medspacy_target_matcher\")\n", 221 | "target_rules = [\n", 222 | " TargetRule(\"atrial fibrillation\", \"PROBLEM\"),\n", 223 | " TargetRule(\"atrial fibrillation\", \"PROBLEM\", pattern=[{\"LOWER\": \"afib\"}]),\n", 224 | " TargetRule(\"pneumonia\", \"PROBLEM\"),\n", 225 | " TargetRule(\"Type II Diabetes Mellitus\", \"PROBLEM\", \n", 226 | " pattern=[\n", 227 | " {\"LOWER\": \"type\"},\n", 228 | " {\"LOWER\": {\"IN\": [\"2\", \"ii\", \"two\"]}},\n", 229 | " {\"LOWER\": {\"IN\": [\"dm\", \"diabetes\"]}},\n", 230 | " {\"LOWER\": \"mellitus\", \"OP\": \"?\"}\n", 231 | " ]),\n", 232 | " TargetRule(\"warfarin\", \"MEDICATION\")\n", 233 | "]\n", 234 | "target_matcher.add(target_rules)\n", 235 | "\n", 236 | "doc = nlp(text)\n", 237 | "visualize_ent(doc)" 238 | ] 239 | } 240 | ], 241 | "metadata": { 242 | "kernelspec": { 243 | "display_name": "Python 3 (ipykernel)", 244 | "language": "python", 245 | "name": "python3" 246 | }, 247 | "language_info": { 248 | "codemirror_mode": { 249 | "name": "ipython", 250 | "version": 3 251 | }, 252 | "file_extension": ".py", 253 | "mimetype": "text/x-python", 254 | "name": "python", 255 | "nbconvert_exporter": "python", 256 | "pygments_lexer": "ipython3", 257 | "version": "3.9.13" 258 | } 259 | }, 260 | "nbformat": 4, 261 | "nbformat_minor": 5 262 | } 263 | -------------------------------------------------------------------------------- /sentimentanalysis_usingbert.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """SentimentAnalysis usingBERT.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1Und9XiLKRld1ELczxS1Ht-no-c70kYsh 8 | """ 9 | 10 | from transformers import pipeline 11 | st = f"What a great guy he is good and awesome." 12 | seq = pipeline(task="text-classification", model='nlptown/bert-base-multilingual-uncased-sentiment') 13 | print(f"Result: { seq(st) }") 14 | 15 | ! pip install transformers -------------------------------------------------------------------------------- /test script.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from keras.models import load_model 4 | model=load_model("model2-001.h5") 5 | results={0:'without mask',1:'mask'} 6 | GR_dict={0:(0,0,255),1:(0,255,0)} 7 | rect_size = 4 8 | cap = cv2.VideoCapture(0) 9 | haarcascade = cv2.CascadeClassifier('/home/akshay/.local/lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_default.xml') 10 | 11 | def main(): 12 | while True: 13 | (rval, im) = cap.read() 14 | im=cv2.flip(im,1,1) 15 | 16 | rerect_size = cv2.resize(im, (im.shape[1] // rect_size, im.shape[0] // rect_size)) 17 | faces = haarcascade.detectMultiScale(rerect_size) 18 | for f in faces: 19 | (x, y, w, h) = [v * rect_size for v in f] 20 | 21 | face_img = im[y:y+h, x:x+w] 22 | rerect_sized=cv2.resize(face_img,(150,150)) 23 | normalized=rerect_sized/255.0 24 | reshaped=np.reshape(normalized,(1,150,150,3)) 25 | reshaped = np.vstack([reshaped]) 26 | result=model.predict(reshaped) 27 | 28 | label=np.argmax(result,axis=1)[0] 29 | 30 | cv2.rectangle(im,(x,y),(x+w,y+h),GR_dict[label],2) 31 | cv2.rectangle(im,(x,y-40),(x+w,y),GR_dict[label],-1) 32 | cv2.putText(im, results[label], (x, y-10),cv2.FONT_HERSHEY_SIMPLEX,0.8,(255,255,255),2) 33 | cv2.imshow('LIVE', im) 34 | key = cv2.waitKey(10) 35 | 36 | if key == 27: 37 | break 38 | cap.release() 39 | cv2.destroyAllWindows() 40 | 41 | if __name__ == __main__: 42 | main() 43 | -------------------------------------------------------------------------------- /text_summarization.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Text Summarization.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1uxDowTb9Au_kgXioUxUs8E19ZjD4phkZ 8 | """ 9 | 10 | ! pip install transformers 11 | 12 | from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig 13 | model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn') 14 | 15 | tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn') 16 | 17 | ARTICLE_TO_SUMMARIZE = "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct." 18 | 19 | inputs = tokenizer([ARTICLE_TO_SUMMARIZE], return_tensors='pt') 20 | 21 | summary_ids = model.generate(inputs['input_ids'], max_length=500, early_stopping=False) 22 | 23 | summary_ids 24 | 25 | print([tokenizer.decode(g, skip_special_tokens=True) for g in summary_ids]) 26 | 27 | --------------------------------------------------------------------------------