├── .gitignore ├── 1. Text Representation.ipynb ├── 2. Topic Modeling.ipynb ├── 3. Sentiment Analysis.ipynb ├── 4. Applications.ipynb ├── LICENSE ├── README.md ├── d4sci.mplstyle ├── data ├── Apple-Twitter-Sentiment-DFE.csv ├── D4Sci_logo_ball.png ├── D4Sci_logo_full.png ├── googlebooks-eng-all-1gram-20120701-a.gz ├── mary.pickle ├── negative-words.txt ├── nltk_stopwords.txt ├── polyglot-en.pkl ├── positive-words.txt ├── questions-words.txt ├── table_langs.dat ├── text8.gz └── vader_lexicon.txt ├── requirements.txt └── slides └── NLP.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | *.key 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | -------------------------------------------------------------------------------- /3. Sentiment Analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "
Bruno Gonçalves
\n",
13 | " www.data4sci.com
\n",
14 | " @bgoncalves, @data4sci
\n", 580 | " | 0 | \n", 581 | "1 | \n", 582 | "2 | \n", 583 | "3 | \n", 584 | "
---|---|---|---|---|
0 | \n", 589 | "$: | \n", 590 | "-1.5 | \n", 591 | "0.80623 | \n", 592 | "[-1, -1, -1, -1, -3, -1, -3, -1, -2, -1] | \n", 593 | "
1 | \n", 596 | "%) | \n", 597 | "-0.4 | \n", 598 | "1.01980 | \n", 599 | "[-1, 0, -1, 0, 0, -2, -1, 2, -1, 0] | \n", 600 | "
2 | \n", 603 | "%-) | \n", 604 | "-1.5 | \n", 605 | "1.43178 | \n", 606 | "[-2, 0, -2, -2, -1, 2, -2, -3, -2, -3] | \n", 607 | "
3 | \n", 610 | "&-: | \n", 611 | "-0.4 | \n", 612 | "1.42829 | \n", 613 | "[-3, -1, 0, 0, -1, -1, -1, 2, -1, 2] | \n", 614 | "
4 | \n", 617 | "&: | \n", 618 | "-0.7 | \n", 619 | "0.64031 | \n", 620 | "[0, -1, -1, -1, 1, -1, -1, -1, -1, -1] | \n", 621 | "
\n", 670 | " | 0 | \n", 671 | "1 | \n", 672 | "2 | \n", 673 | "3 | \n", 674 | "
---|---|---|---|---|
7512 | \n", 679 | "}: | \n", 680 | "-2.1 | \n", 681 | "0.83066 | \n", 682 | "[-1, -1, -3, -2, -3, -2, -2, -1, -3, -3] | \n", 683 | "
7513 | \n", 686 | "}:( | \n", 687 | "-2.0 | \n", 688 | "0.63246 | \n", 689 | "[-3, -1, -2, -1, -3, -2, -2, -2, -2, -2] | \n", 690 | "
7514 | \n", 693 | "}:) | \n", 694 | "0.4 | \n", 695 | "1.42829 | \n", 696 | "[1, 1, -2, 1, 2, -2, 1, -1, 2, 1] | \n", 697 | "
7515 | \n", 700 | "}:-( | \n", 701 | "-2.1 | \n", 702 | "0.70000 | \n", 703 | "[-2, -1, -2, -2, -2, -4, -2, -2, -2, -2] | \n", 704 | "
7516 | \n", 707 | "}:-) | \n", 708 | "0.3 | \n", 709 | "1.61555 | \n", 710 | "[1, 1, -2, 1, -1, -3, 2, 2, 1, 1] | \n", 711 | "