├── .bumpversion.cfg
├── .coveragerc
├── .github
    └── FUNDING.yml
├── .gitignore
├── .readthedocs.yaml
├── .travis.yml
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── SECURITY.md
├── assets
    ├── example1.png
    ├── logo.png
    └── logo.svg
├── chats
    └── whatsapp
    │   ├── lorem-2000.txt
    │   ├── lorem-merge-part1.txt
    │   ├── lorem-merge-part2.txt
    │   ├── lorem.txt
    │   └── pokemon.txt
├── docs
    ├── Makefile
    ├── _static
    │   ├── css
    │   │   └── custom.css
    │   ├── favicon.png
    │   ├── html
    │   │   ├── boxplot.html
    │   │   ├── custom_interventions_vs_length.html
    │   │   ├── interventions_count_date.html
    │   │   ├── interventions_count_date_all.html
    │   │   ├── interventions_count_date_cum.html
    │   │   ├── interventions_count_date_length.html
    │   │   ├── interventions_count_date_length_cum.html
    │   │   ├── interventions_count_hours.html
    │   │   ├── interventions_count_months.html
    │   │   ├── interventions_count_weekday.html
    │   │   ├── user_message_responses_flow.html
    │   │   └── user_message_responses_heatmap.html
    │   └── images
    │   │   ├── WhatsAppChat.from_source.png
    │   │   ├── WhatsAppChat.from_sources.png
    │   │   ├── chat-export-android9-wp2.20.123.gif
    │   │   └── chat-export-ios17-wp24.5.75.gif
    ├── _templates
    │   ├── autosummary
    │   │   └── modules.rst
    │   ├── layout.html
    │   ├── modules.rst
    │   └── versioning.html
    ├── assets
    │   └── style.css
    ├── conf.py
    ├── index.rst
    ├── make.bat
    └── source
    │   ├── about.rst
    │   ├── api
    │       ├── cmd
    │       │   ├── cmd_chat_gen.rst
    │       │   ├── cmd_graph.rst
    │       │   └── cmd_to_csv.rst
    │       ├── index.rst
    │       ├── whatstk.FigureBuilder.rst
    │       ├── whatstk.WhatsAppChat.rst
    │       ├── whatstk._chat.rst
    │       ├── whatstk.analysis.rst
    │       ├── whatstk.data.rst
    │       ├── whatstk.graph.rst
    │       ├── whatstk.utils.rst
    │       └── whatstk.whatsapp.rst
    │   ├── changelog.rst
    │   ├── code_examples
    │       ├── custom.rst
    │       ├── index.rst
    │       ├── interventions_count.rst
    │       ├── load_chat.rst
    │       ├── load_chat_gdrive.rst
    │       ├── load_chat_hformat.rst
    │       ├── load_chat_multiple.rst
    │       ├── message_length_boxplot.rst
    │       └── user_interaction.rst
    │   ├── community.rst
    │   ├── contribute.rst
    │   ├── developer_guide
    │       └── index.rst
    │   ├── getting_started
    │       ├── auto_header.rst
    │       ├── command_line.rst
    │       ├── export_chat.rst
    │       ├── hformat.rst
    │       ├── index.rst
    │       ├── library-available-chats.rst
    │       └── load_chat.rst
    │   ├── modules.rst
    │   ├── whatstk.analysis.rst
    │   ├── whatstk.graph.figures.rst
    │   ├── whatstk.graph.rst
    │   ├── whatstk.rst
    │   ├── whatstk.utils.rst
    │   ├── whatstk.whatsapp.rst
    │   └── why_whatstk.rst
├── requirements-docs.txt
├── requirements-flake.txt
├── requirements-test.txt
├── requirements.txt
├── run-tests.sh
├── setup.py
├── tests
    ├── __init__.py
    ├── analysis
    │   ├── __init__.py
    │   ├── test_interventions.py
    │   └── test_responses.py
    ├── graph
    │   ├── __init__.py
    │   └── test_figures.py
    ├── test_chat.py
    ├── test_data.py
    ├── utils
    │   ├── __init__.py
    │   ├── test_chat_merge.py
    │   ├── test_gdrive.py
    │   └── test_utils.py
    └── whatsapp
    │   ├── __init__.py
    │   ├── test_auto_header.py
    │   ├── test_generation.py
    │   ├── test_hformat.py
    │   ├── test_objects.py
    │   └── test_parser.py
└── whatstk
    ├── __init__.py
    ├── _chat.py
    ├── analysis
        ├── __init__.py
        ├── interventions.py
        └── responses.py
    ├── data.py
    ├── graph
        ├── __init__.py
        ├── base.py
        └── figures
        │   ├── __init__.py
        │   ├── boxplot.py
        │   ├── heatmap.py
        │   ├── sankey.py
        │   ├── scatter.py
        │   └── utils.py
    ├── scripts
        ├── __init__.py
        ├── generate_chats.py
        ├── graph.py
        └── txt_to_csv.py
    ├── utils
        ├── __init__.py
        ├── chat_merge.py
        ├── exceptions.py
        ├── gdrive.py
        └── utils.py
    └── whatsapp
        ├── __init__.py
        ├── assets
            ├── __init__.py
            └── header_format_support.json
        ├── auto_header.py
        ├── generation.py
        ├── hformat.py
        ├── objects.py
        └── parser.py


/.bumpversion.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.7.1
 3 | parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(.(?P<pre>[a-z]+)(?P<prenum>\d+))?
 4 | serialize = 
 5 | 	{major}.{minor}.{patch}.{pre}{prenum}
 6 | 	{major}.{minor}.{patch}
 7 | 
 8 | [bumpversion:part:pre]
 9 | optional_value = stable
10 | values = 
11 | 	dev
12 | 	a
13 | 	b
14 | 	rc
15 | 	stable
16 | 
17 | [bumpversion:file:setup.py]
18 | 
19 | [bumpversion:file:README.md]
20 | 
21 | [bumpversion:file:whatstk/__init__.py]
22 | 
23 | [bumpversion:file:docs/conf.py]
24 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | omit = 
4 |         whatstk/tests/*
5 |         whatstk/scripts/*
6 | 
7 | [report]
8 | fail_under=80


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: [lucasrodes]
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | test_data/*
  3 | ### Windows template
  4 | # Windows image file caches
  5 | Thumbs.db
  6 | ehthumbs.db
  7 | 
  8 | # Folder config file
  9 | Desktop.ini
 10 | 
 11 | # Recycle Bin used on file shares
 12 | $RECYCLE.BIN/
 13 | 
 14 | # Windows Installer files
 15 | *.cab
 16 | *.msi
 17 | *.msm
 18 | *.msp
 19 | 
 20 | # Windows shortcuts
 21 | *.lnk
 22 | ### macOS template
 23 | *.DS_Store
 24 | .AppleDouble
 25 | .LSOverride
 26 | 
 27 | # Icon must end with two \r
 28 | Icon
 29 | 
 30 | 
 31 | # Thumbnails
 32 | ._*
 33 | 
 34 | # Files that might appear in the root of a volume
 35 | .DocumentRevisions-V100
 36 | .fseventsd
 37 | .Spotlight-V100
 38 | .TemporaryItems
 39 | .Trashes
 40 | .VolumeIcon.icns
 41 | .com.apple.timemachine.donotpresent
 42 | 
 43 | # Directories potentially created on remote AFP share
 44 | .AppleDB
 45 | .AppleDesktop
 46 | Network Trash Folder
 47 | Temporary Items
 48 | .apdisk
 49 | ### VirtualEnv template
 50 | # Virtualenv
 51 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
 52 | .Python
 53 | [Bb]in
 54 | [Ii]nclude
 55 | [Ll]ib
 56 | [Ll]ib64
 57 | [Ll]ocal
 58 | pyvenv.cfg
 59 | .venv
 60 | pip-selfcheck.json
 61 | ### Vim template
 62 | # swap
 63 | [._]*.s[a-w][a-z]
 64 | [._]s[a-w][a-z]
 65 | # session
 66 | Session.vim
 67 | # temporary
 68 | .netrwhist
 69 | *~
 70 | # auto-generated tag files
 71 | tags
 72 | ### Linux template
 73 | 
 74 | # temporary files which can be created if a process still has a handle open of a deleted file
 75 | .fuse_hidden*
 76 | 
 77 | # KDE directory preferences
 78 | .directory
 79 | 
 80 | # Linux trash folder which might appear on any partition or disk
 81 | .Trash-*
 82 | 
 83 | # .nfs files are created when an open file is removed but is still being accessed
 84 | .nfs*
 85 | ### Python template
 86 | # Byte-compiled / optimized / DLL files
 87 | __pycache__/
 88 | *.py[cod]
 89 | *$py.class
 90 | 
 91 | # C extensions
 92 | *.so
 93 | 
 94 | # Distribution / packaging
 95 | env/
 96 | build/
 97 | develop-eggs/
 98 | dist/
 99 | downloads/
100 | eggs/
101 | .eggs/
102 | lib/
103 | lib64/
104 | parts/
105 | sdist/
106 | var/
107 | *.egg-info/
108 | .installed.cfg
109 | *.egg
110 | 
111 | # PyInstaller
112 | #  Usually these files are written by a python script from a template
113 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
114 | *.manifest
115 | *.spec
116 | 
117 | # Installer logs
118 | pip-log.txt
119 | pip-delete-this-directory.txt
120 | 
121 | # Unit test / coverage reports
122 | htmlcov/
123 | .tox/
124 | .coverage
125 | .coverage.*
126 | .cache
127 | nosetests.xml
128 | coverage.xml
129 | cov.xml
130 | *,cover
131 | .hypothesis/
132 | .vscode
133 | 
134 | # Translations
135 | *.mo
136 | *.pot
137 | 
138 | # Django stuff:
139 | *.log
140 | local_settings.py
141 | 
142 | # Flask stuff:
143 | instance/
144 | .webassets-cache
145 | 
146 | # Scrapy stuff:
147 | .scrapy
148 | 
149 | # Sphinx documentation
150 | #docs/_build/
151 | docs/_build/doctrees
152 | 
153 | # PyBuilder
154 | target/
155 | 
156 | # Jupyter Notebook
157 | .ipynb_checkpoints
158 | 
159 | # pyenv
160 | .python-version
161 | 
162 | # celery beat schedule file
163 | celerybeat-schedule
164 | 
165 | # dotenv
166 | .env
167 | 
168 | # virtualenv
169 | .venv/
170 | venv/
171 | ENV/
172 | 
173 | # Spyder project settings
174 | .spyderproject
175 | 
176 | # Rope project settings
177 | .ropeproject
178 | ### SublimeText template
179 | # cache files for sublime text
180 | *.tmlanguage.cache
181 | *.tmPreferences.cache
182 | *.stTheme.cache
183 | 
184 | # workspace files are user-specific
185 | *.sublime-workspace
186 | 
187 | # project files should be checked into the repository, unless a significant
188 | # proportion of contributors will probably not be using SublimeText
189 | # *.sublime-project
190 | 
191 | # sftp configuration file
192 | sftp-config.json
193 | 
194 | # Package control specific files
195 | Package Control.last-run
196 | Package Control.ca-list
197 | Package Control.ca-bundle
198 | Package Control.system-ca-bundle
199 | Package Control.cache/
200 | Package Control.ca-certs/
201 | bh_unicode_properties.cache
202 | 
203 | # Sublime-github package stores a github token in this file
204 | # https://packagecontrol.io/packages/sublime-github
205 | GitHub.sublime-settings
206 | ### JetBrains template
207 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
208 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
209 | .idea/*
210 | # User-specific stuff:
211 | .idea/workspace.xml
212 | .idea/tasks.xml
213 | 
214 | # Sensitive or high-churn files:
215 | .idea/dataSources/
216 | .idea/dataSources.ids
217 | .idea/dataSources.xml
218 | .idea/dataSources.local.xml
219 | .idea/sqlDataSources.xml
220 | .idea/dynamic.xml
221 | .idea/uiDesigner.xml
222 | 
223 | # Gradle:
224 | .idea/gradle.xml
225 | .idea/libraries
226 | 
227 | # Mongo Explorer plugin:
228 | .idea/mongoSettings.xml
229 | 
230 | ## File-based project format:
231 | *.iws
232 | 
233 | ## Plugin-specific files:
234 | 
235 | # IntelliJ
236 | /out/
237 | 
238 | # mpeltonen/sbt-idea plugin
239 | .idea_modules/
240 | 
241 | # JIRA plugin
242 | atlassian-ide-plugin.xml
243 | 
244 | # Crashlytics plugin (for Android Studio and IntelliJ)
245 | com_crashlytics_export_strings.xml
246 | crashlytics.properties
247 | crashlytics-build.properties
248 | fabric.properties
249 | 
250 | # Chats and results
251 | mychats/
252 | notebooks/Untitled.ipynb
253 | .whatstk
254 | todos.md
255 | examples
256 | 
257 | 
258 | #tox stuff
259 | tox.ini
260 | #.coveragerc
261 | setup.cfg
262 | testreport.html
263 | testreport.xml
264 | version-info.cfg
265 | 
266 | docs2
267 | version-info.cfg
268 | py37
269 | py38
270 | py39
271 | *.ipynb
272 | 
273 | .pypirc
274 | learn/
275 | assets/style.css
276 | tests/chats/*
277 | flake-report
278 | 
279 | notebooks
280 | reports
281 | 
282 | 
283 | docs/_build/
284 | package-lock.json
285 | version-changes
286 | 
287 | client_secrets.json
288 | settings.yaml
289 | credentials.json


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.10"
13 | 
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 |   configuration: docs/conf.py
17 | 
18 | # We recommend specifying your dependencies to enable reproducible builds:
19 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
20 | python:
21 |   install:
22 |     - method: pip
23 |       path: .
24 |       extra_requirements:
25 |         - full
26 |     - requirements: requirements-docs.txt
27 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: focal
 2 | language: python
 3 | python:
 4 |   - 3.9
 5 |   - "3.10"
 6 |   - "3.11"
 7 |   - "3.12"
 8 | git:
 9 |   depth: false
10 | before_install:
11 |   - pip install --upgrade pip
12 | install:
13 |   # - git fetch --tags
14 |   #
15 |   - pip install -r requirements-test.txt
16 |   - pip install -r requirements-flake.txt
17 |   - ls -l
18 |   - pip uninstall whatstk
19 |   - pip install .[full]
20 |   # - ls -l /home/travis/virtualenv/python3.7.1/lib/python3.7/site-packages/whatstk/whatsapp/assets/
21 |   - cat MANIFEST.in
22 |   - mkdir -p tests/chats/hformats tests/chats/merge
23 |   - whatstk-generate-chat --size 500 -z --output-path tests/chats/hformats/ # Generate chats for hformat checks
24 |   - whatstk-generate-chat --size 300 --last-timestamp 2019-09-01 --hformats '%Y-%m-%d, %H:%M - %name:' --output-path tests/chats/merge/ --filenames file1.txt
25 |   - whatstk-generate-chat --size 300 --last-timestamp 2020-01-01 --hformats '%Y-%m-%d, %H:%M - %name:' --output-path tests/chats/merge/ --filenames file2.txt
26 | #pip install -r requirements.txt
27 | script:
28 |   - flake8 --max-complexity=10 --docstring-convention=google --max-line-length=120 --ignore=ANN101,ANN102,ANN401 whatstk
29 |   - pytest --cov-report term --cov=whatstk tests
30 | after_success:
31 |   - codecov # submit coverage
32 | 
33 | jobs:
34 |   include:
35 |     # perform a linux build
36 |     # - services: docker
37 |     # and a windows build
38 |     - os: windows
39 |       language: shell
40 |       before_install:
41 |         - choco upgrade python -y --version 3.12.2
42 |         - export PATH="/c/Python312:/c/Python312/Scripts:$PATH"
43 |         # make sure it's on PATH as 'python3'
44 |         - ln -s /c/Python312/python.exe /c/Python312/python3.exe
45 |     - stage: deploy
46 |       python: 3.11
47 |       deploy:
48 |         - provider: pypi
49 |           user: $USER_PYPI
50 |           password: $PWD_PYPI
51 |           on:
52 |             tags: true
53 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | We are very open to have collaborators. You can freely fork and issue a pull request with your updates!
 4 | For other issues/bugs/suggestions, please report it in the [issues section](https://github.com/lucasrodes/whatstk/issues).
 5 | 
 6 | ## Pull Requests
 7 | 
 8 | Pull requests to branch `develop` are accepted. Please link your forks to specific issues (you may want to open an
 9 | issue). 
10 | 
11 | 
12 | Make sure to test your code before issuing a pull request:
13 | 
14 | 1. Install library in develop mode, 
15 | 
16 | ```bash
17 | pip install -e .
18 | ```
19 | 
20 | 2. Run test script
21 | 
22 | ```bash
23 | sh run-tests.sh
24 | ```
25 | 
26 | However, pull requests will trigger the Travis CI pipeline, which will run the tests as well.
27 | 
28 | ## Join the community
29 | 
30 | Join us on [Gitter](https://gitter.im/sociepy/whatstk).


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.md
 2 | include LICENSE
 3 | include requirements.txt
 4 | include requirements-test.txt
 5 | include requirements-flake.txt
 6 | include requirements-docs.txt
 7 | include whatstk/whatsapp/assets/header_format_support.json
 8 | include .coveragerc
 9 | 
10 | recursive-include altair *.py *.json *.ipynb *.html
11 | global-exclude *.py[co] __pycache__


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="left">
  2 |   <img src="https://raw.githubusercontent.com/lucasrodes/whatstk/develop/assets/logo.svg" width="70%">
  3 | </div>
  4 | <h1 align="left" style="border-bottom: none;"> whatstk: analyze WhatsApp chats with python
  5 | </h1>
  6 | <p align="left">
  7 |   <a href="#">
  8 |     <img alt="Package version" src="https://img.shields.io/badge/pypi-0.7.1-blue.svg?&color=25D366&logo=whatsapp&">
  9 |   </a>
 10 | </p>
 11 | <!-- style=for-the-badge -->
 12 | 
 13 | <p align="left">
 14 |   <a href="https://app.travis-ci.com/lucasrodes/whatstk">
 15 |     <img alt="Build Status" src="https://app.travis-ci.com/lucasrodes/whatstk.svg?&branch=main">
 16 |   </a>
 17 |   <a href="https://codecov.io/gh/lucasrodes/whatstk">
 18 |     <img alt="codecov" src="https://codecov.io/gh/lucasrodes/whatstk/branch/master/graph/badge.svg">
 19 |   </a>
 20 |   <a href='https://whatstk.readthedocs.io/en/stable/?badge=stable'>
 21 |     <img src='https://readthedocs.org/projects/whatstk/badge/?version=stable' alt='Documentation Status' />
 22 |   </a>
 23 |   <a href="https://towardsdatascience.com/analyzing-whatsapp-chats-with-python-20d62ce7fe2d">
 24 |     <img alt="Tutorial" src="https://img.shields.io/badge/tutorial-on_medium-1a8917.svg?&logo=medium&logoColor=white">
 25 |   </a>
 26 |   <a href="https://www.python.org/downloads/release/python-3/">
 27 |     <img alt="Python 3" src="https://img.shields.io/badge/python-3.9|3.10|3.11|3.12-blue.svg?&logo=python&logoColor=yellow">
 28 |   </a>
 29 |   <a href="https://pepy.tech/badge/whatstk">
 30 |     <img alt="Number of downloads" src="https://pepy.tech/badge/whatstk">
 31 |   </a>
 32 |   <a href="https://github.com/lucasrodes/whatstk/blob/master/LICENSE">
 33 |     <img alt="GitHub license" src="https://img.shields.io/github/license/lucasrodes/whatstk.svg?">
 34 |   </a>
 35 | </p>
 36 | 
 37 | ---
 38 | 
 39 | **Try the [live demo parser](https://whatstk.streamlit.app/) to convert your chats to CSV**
 40 | 
 41 | ---
 42 | 
 43 | <!-- [![Downloads](https://pepy.tech/badge/whatstk)](https://pepy.tech/project/whatstk) -->
 44 | <!-- > [Get the Desktop App](https://lcsrg.me/whatstk-gui) -->
 45 | 
 46 | **whatstk** is a python package providing tools to parse, analyze and visualise WhatsApp chats developed under the
 47 | **[sociepy](https://sociepy.org)** project. Easily convert your chats to csv or simply visualise some stats using
 48 | the provided command-line tools or python. The package uses [pandas](https://github.com/pandas-dev/pandas) to process
 49 | the data and [plotly](https://github.com/plotly/plotly.py) to visualise it.
 50 | 
 51 | It is distributed under the GPL-3.0 license.
 52 | 
 53 | ⭐ Please **star** our project if you found it interesting to **give us some dopamine** 😄!
 54 | 
 55 | ### Content
 56 | 
 57 | - [Installation](#installation)
 58 | - [Getting Started](#getting-started)
 59 | - [Documentation](https://whatstk.readthedocs.io/en/stable/)
 60 | - [Contribute](#contribute)
 61 | - [Covered in](#covered-in)
 62 | - [Citation](#citation)
 63 | 
 64 | ## Installation
 65 | 
 66 | ```
 67 | pip install whatstk
 68 | ```
 69 | 
 70 | Install develop version (not stable):
 71 | 
 72 | ```
 73 | pip install git+https://github.com/lucasrodes/whatstk.git@develop
 74 | ```
 75 | 
 76 | _More details [here](https://whatstk.readthedocs.io/en/stable/source/about.html#installation-compatibility)_
 77 | 
 78 | ## Getting Started
 79 | 
 80 | For a rapid introduction, check this [tutorial on Medium](https://towardsdatascience.com/analyzing-whatsapp-chats-with-python-20d62ce7fe2d).
 81 | 
 82 | #### Export your chat using your phone:
 83 | 
 84 | _See [instructions](https://whatstk.readthedocs.io/en/stable/source/getting_started/export_chat.html)._
 85 | 
 86 | #### Load chat as a DataFrame
 87 | 
 88 | ```python
 89 | from whatstk import df_from_whatsapp
 90 | df = df_from_whatsapp("path/to/chat.txt")
 91 | ```
 92 | 
 93 | **NOTE:** You can now also load directly from a zip chat (iOS export).
 94 | 
 95 | #### Convert chat to csv
 96 | 
 97 | ```bash
 98 | $ whatstk-to-csv [input_filename] [output_filename]
 99 | ```
100 | 
101 | #### More examples
102 | 
103 | _See more in sections [getting started](https://whatstk.readthedocs.io/en/stable/source/getting_started/index.html) and
104 | [examples](https://whatstk.readthedocs.io/en/stable/source/code_examples/index.html)._
105 | 
106 | ## Documentation
107 | 
108 | _See [official documentation](https://whatstk.readthedocs.io/en/stable/)._
109 | 
110 | ## Contribute
111 | 
112 | _See [contribute section](https://whatstk.readthedocs.io/en/stable/source/contribute.html)._
113 | 
114 | ## License
115 | 
116 | [GPL-3.0](LICENSE)
117 | 
118 | ## Citation
119 | 
120 | Lucas Rodés-Guirao. "whatstk, WhatsApp analysis and parsing toolkit", https://github.com/lucasrodes/whatstk
121 | 
122 | ## Covered in
123 | 
124 | - [Your Whatsapp Chat History in Cool Graphs](https://deepnote.com/@batmanscode/Your-Whatsapp-Chat-History-in-Cool-Graphs-mQoSsYjUSw29D4nZDs_KwA), by [@batmanscode](https://github.com/batmanscode).
125 | - [WhatsAppening to the news](https://whatsappening.joltetn.eu/), by [@enric1994](https://github.com/enric1994)
126 | - [whatsappening source code](https://github.com/enric1994/whatsappening), by [@enric1994](https://github.com/enric1994)
127 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Supported Versions
 4 | 
 5 | We release patches for security vulnerabilities for some project version. Check below which are these version::
 6 | 
 7 | | Version | Supported          |
 8 | | ------- | ------------------ |
 9 | | 0.2.x   | :white_check_mark: |
10 | | 0.1.x   | :x:                |
11 | 
12 | ## Reporting a Vulnerability
13 | 
14 | Please report (suspected) security vulnerabilities to [issues section](https://github.com/lucasrodes/whatstk/issues). We will analyze it and if the issue is confirmed, we will release a patch as soon as possible.
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/assets/example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/assets/example1.png


--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/assets/logo.png


--------------------------------------------------------------------------------
/assets/logo.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1433.19 310"><defs><style>.cls-1{fill:#25d366;}</style></defs><g id="Layer_2" data-name="Layer 2"><g id="Layer_1-2" data-name="Layer 1"><path d="M50,280.4,10,108.8q15.6-6.39,28.2-6.4t18.4,5.4q5.79,5.4,7.8,16.6L79.6,198q8,39.21,11.6,64.4c.53,2.14,1.6,3.2,3.2,3.2L122,107.2q12.39-2.4,31.2-2.4t30.4,2.4l27.6,155.2c.26,2.14,1.33,3.2,3.2,3.2q7.2-41.6,12.4-67.6l20-92q8.4-2.4,23.8-2.4t24.2,4.4l1.6,3.2-48,193.2q-13.2,2-34.2,2t-28.6-4.8q-7.6-4.8-10-20.4L166.8,232q-8-46-12.8-79.2h-2.4q-2.81,20.8-13.2,78.8l-13.6,72.8a253.16,253.16,0,0,1-34.8,2q-20.81,0-28.6-5.2T50,280.4Z"/><path class="cls-1" d="M40,270.4,0,98.8q15.6-6.39,28.2-6.4t18.4,5.4q5.79,5.4,7.8,16.6L69.6,188q8,39.21,11.6,64.4c.53,2.14,1.6,3.2,3.2,3.2L112,97.2q12.39-2.4,31.2-2.4t30.4,2.4l27.6,155.2c.26,2.14,1.33,3.2,3.2,3.2Q211.6,214,216.8,188l20-92q8.4-2.4,23.8-2.4T284.8,98l1.6,3.2-48,193.2q-13.2,2-34.2,2t-28.6-4.8q-7.6-4.8-10-20.4L156.8,222q-8-46-12.8-79.2h-2.4q-2.81,20.8-13.2,78.8l-13.6,72.8a253.16,253.16,0,0,1-34.8,2q-20.81,0-28.6-5.2T40,270.4Z"/><path d="M500,170.8V258q0,25.61,8.4,38-12.81,11.21-30.8,11.2-17.21,0-23.6-7.8t-6.4-24.6V181.6q0-18-4.4-25.2t-16.4-7.2q-21.21,0-39.6,19.2v136A93.14,93.14,0,0,1,375,306q-6.21.39-13,.4-6.4,0-12.8-.4a105.32,105.32,0,0,1-12.8-1.6V12.8l2.4-2.8h20.4q15.6,0,21.8,7.2t6.2,25.2v84.4q28-24.39,56.4-24.4,28.8,0,42.6,18.6T500,170.8Z"/><path class="cls-1" d="M490,160.8V248q0,25.61,8.4,38-12.81,11.21-30.8,11.2-17.21,0-23.6-7.8t-6.4-24.6V171.6q0-18-4.4-25.2t-16.4-7.2q-21.21,0-39.6,19.2v136A93.14,93.14,0,0,1,365,296q-6.21.39-13,.4-6.4,0-12.8-.4a105.32,105.32,0,0,1-12.8-1.6V2.8L328.8,0h20.4Q364.8,0,371,7.2t6.2,25.2v84.4q28-24.39,56.4-24.4,28.8,0,42.6,18.6T490,160.8Z"/><path d="M723.2,280.4a55.93,55.93,0,0,1-7.2,15.2,48.17,48.17,0,0,1-10.4,12q-26-3.21-38-22.8-26,25.2-56.8,25.2-15.6,0-27.4-5a56.17,56.17,0,0,1-19.6-13.4,54.47,54.47,0,0,1-11.6-19.2,66.8,66.8,0,0,1-3.8-22.4q0-16.8,5.8-29.4a56.43,56.43,0,0,1,16.4-21,75.45,75.45,0,0,1,24.8-12.8,104.38,104.38,0,0,1,31-4.4q21.6,0,33.6,1.2V172.8q0-30.39-29.6-30.4-20,0-60.8,14Q558.4,144,556,119.2a222,222,0,0,1,87.6-18q30,0,49.2,16.8T712,168.8v82Q712,272,723.2,280.4Zm-100.8-10q20,0,37.6-17.6V218q-6.4-.39-12.4-.8c-4-.26-7.87-.4-11.6-.4q-17.61,0-26.8,6.6T600,245.2q0,12,6,18.6T622.4,270.4Z"/><path class="cls-1" d="M713.2,270.4a55.93,55.93,0,0,1-7.2,15.2,48.17,48.17,0,0,1-10.4,12q-26-3.21-38-22.8-26,25.2-56.8,25.2-15.6,0-27.4-5a56.17,56.17,0,0,1-19.6-13.4,54.47,54.47,0,0,1-11.6-19.2,66.8,66.8,0,0,1-3.8-22.4q0-16.8,5.8-29.4a56.43,56.43,0,0,1,16.4-21,75.45,75.45,0,0,1,24.8-12.8,104.38,104.38,0,0,1,31-4.4q21.6,0,33.6,1.2V162.8q0-30.39-29.6-30.4-20,0-60.8,14Q548.4,134,546,109.2a222,222,0,0,1,87.6-18q30,0,49.2,16.8T702,158.8v82Q702,262,713.2,270.4Zm-100.8-10q20,0,37.6-17.6V208q-6.4-.39-12.4-.8c-4-.26-7.87-.4-11.6-.4q-17.61,0-26.8,6.6T590,235.2q0,12,6,18.6T612.4,260.4Z"/><path d="M877.2,105.6a74.83,74.83,0,0,1,0,37.6l-48.4-.8V247.6Q828.8,266,846,266h24.8a72.32,72.32,0,0,1,4,22.8q0,10.8-1.2,14a328.64,328.64,0,0,1-48,3.6q-48,0-48-48.8V142.8l-30.4.4a77.79,77.79,0,0,1-2-18.8,77.84,77.84,0,0,1,2-18.8l30.4.4V77.2q0-17.6,6.4-24.8t22-7.2h20l2.8,2.4v58.8Z"/><path class="cls-1" d="M867.2,95.6a74.83,74.83,0,0,1,0,37.6l-48.4-.8V237.6Q818.8,256,836,256h24.8a72.32,72.32,0,0,1,4,22.8q0,10.8-1.2,14a328.64,328.64,0,0,1-48,3.6q-48,0-48-48.8V132.8l-30.4.4a77.79,77.79,0,0,1-2-18.8,77.84,77.84,0,0,1,2-18.8l30.4.4V67.2q0-17.6,6.4-24.8t22-7.2h20l2.8,2.4V96.4Z"/><path d="M905.59,290.4q.4-8.79,5-19.4t10.6-16.6q31.6,17.21,55.6,17.2,13.2,0,21.4-5.2t8.2-14q0-14-21.6-22.4l-22.4-8.4Q912,203.21,912,162.8a59.55,59.55,0,0,1,5.39-25.8,57.44,57.44,0,0,1,15.2-19.6A69.51,69.51,0,0,1,956,104.8a98.46,98.46,0,0,1,30.39-4.4,135.28,135.28,0,0,1,17,1.2q9.4,1.2,19,3.4t18.4,5a107.22,107.22,0,0,1,15.21,6,59.93,59.93,0,0,1-4,20.8q-4,10.8-10.81,16-31.59-14-54.8-14-10.39,0-16.39,5a16.14,16.14,0,0,0-6,13q0,12.4,20,19.6l24.39,8.8q26.42,9.21,39.2,25.2a57.79,57.79,0,0,1,12.8,37.2q0,28.41-21.2,45.4t-60.8,17A143.39,143.39,0,0,1,905.59,290.4Z"/><path class="cls-1" d="M895.59,280.4q.4-8.79,5-19.4t10.6-16.6q31.6,17.21,55.6,17.2,13.2,0,21.4-5.2t8.2-14q0-14-21.6-22.4l-22.4-8.4Q902,193.21,902,152.8a59.55,59.55,0,0,1,5.39-25.8,57.44,57.44,0,0,1,15.2-19.6A69.51,69.51,0,0,1,946,94.8a98.46,98.46,0,0,1,30.39-4.4,135.28,135.28,0,0,1,17,1.2q9.4,1.2,19,3.4t18.4,5a107.22,107.22,0,0,1,15.21,6,59.93,59.93,0,0,1-4,20.8q-4,10.8-10.81,16-31.59-14-54.8-14-10.39,0-16.39,5a16.14,16.14,0,0,0-6,13q0,12.4,20,19.6l24.39,8.8q26.42,9.21,39.2,25.2a57.79,57.79,0,0,1,12.8,37.2q0,28.41-21.2,45.4t-60.8,17A143.39,143.39,0,0,1,895.59,280.4Z"/><path d="M1214.39,105.6a74.83,74.83,0,0,1,0,37.6l-48.4-.8V247.6q0,18.41,17.2,18.4H1208a72.32,72.32,0,0,1,4,22.8q0,10.8-1.2,14a328.64,328.64,0,0,1-48,3.6q-48,0-48-48.8V142.8l-30.4.4a77.79,77.79,0,0,1-2-18.8,77.84,77.84,0,0,1,2-18.8l30.4.4V77.2q0-17.6,6.4-24.8t22-7.2h20l2.8,2.4v58.8Z"/><path class="cls-1" d="M1204.39,95.6a74.83,74.83,0,0,1,0,37.6l-48.4-.8V237.6q0,18.41,17.2,18.4H1198a72.32,72.32,0,0,1,4,22.8q0,10.8-1.2,14a328.64,328.64,0,0,1-48,3.6q-48,0-48-48.8V132.8l-30.4.4a77.79,77.79,0,0,1-2-18.8,77.84,77.84,0,0,1,2-18.8l30.4.4V67.2q0-17.6,6.4-24.8t22-7.2h20l2.8,2.4V96.4Z"/><path d="M1402.39,227.28l11.6,31.17q8.79,25.19,19.2,32.77-14,16-35.2,16-12,0-18-5.77t-11.2-19.71l-13.6-36.63q-5.2-13.53-11.4-18.31a23.21,23.21,0,0,0-14.6-4.78q-8.4,0-18,.8v81.6a162.29,162.29,0,0,1-50.8,0V12.8l2.4-2.8h20.4q15.6,0,21.8,7.4t6.2,25V178.8l10-.4q6.8,0,10.4-6.4l27.6-47.6q10.8-20,31.2-20,9.6,0,28.8.8l2.4,3.21-37.2,64.08a52.79,52.79,0,0,1-16.8,19.63Q1392,200.11,1402.39,227.28Z"/><path class="cls-1" d="M1392.39,217.28l11.6,31.17q8.79,25.19,19.2,32.77-14,16-35.2,16-12,0-18-5.77t-11.2-19.71l-13.6-36.63q-5.2-13.53-11.4-18.31a23.21,23.21,0,0,0-14.6-4.78q-8.4,0-18,.8v81.6a162.29,162.29,0,0,1-50.8,0V2.8l2.4-2.8h20.4q15.6,0,21.8,7.4t6.2,25V168.8l10-.4q6.8,0,10.4-6.4l27.6-47.6q10.8-20,31.2-20,9.6,0,28.8.8l2.4,3.21-37.2,64.08a52.79,52.79,0,0,1-16.8,19.63Q1382,190.11,1392.39,217.28Z"/></g></g></svg>


--------------------------------------------------------------------------------
/chats/whatsapp/pokemon.txt:
--------------------------------------------------------------------------------
 1 | 15.04.2016, 15:04 - Pokemon Chat: Messages and calls are end-to-end encrypted. No one outside of this chat, not even WhatsApp, can read or listen to them.
 2 | 06.08.2016, 13:18 - Messages you send to this group are now secured with end-to-end encryption. Tap for more info.
 3 | 06.08.2016, 13:23 - Ash Ketchum: Hey guys!
 4 | 06.08.2016, 13:25 - Brock: Hey Ash, good to have a common group!
 5 | 06.08.2016, 13:30 - Misty: Hey guys! Long time since heard anything from you
 6 | 06.08.2016, 13:45 - Ash Ketchum: Indeed. I think having a WhatsApp group nowadays is a good idea
 7 | 06.08.2016, 14:30 - Misty: Definitely
 8 | 06.08.2016, 17:25 - Brock: I totally agree
 9 | 07.08.2016, 11:45 - Prof. Oak: Kids, shall I design a smart Pokeball?
10 | 07.08.2016, 18:45 - Ash Ketchum: I don't mind Prof. I quit capturing Pokemon.
11 | 07.08.2016, 19:30 - Misty: Was a great time, but had enough also.
12 | 07.08.2016, 23:25 - Brock: Guys, I am still in the first gym. No one is playing Pokemon, they went crazy with pokemon Go.
13 | 10.08.2016, 09:45 - Jessie & James: Hey, thanks for adding us. Wanna meet soon? Just for the old times.
14 | 10.08.2016, 11:25 - Raichu: I am in!
15 | 10.08.2016, 13:23 - Ash Ketchum: FFS, no way, Pikachu did you evolve?
16 | 10.08.2016, 15:23 - Raichu: Yes... Weird to have a different body!
17 | 11.08.2016, 19:30 - Misty: Gotta see that.
18 | 11.09.2016, 20:25 - Meowth: Hey people, I was on holiday in Sinnoh. Crazy region.
19 | 31.10.2016, 11:45 - Prof. Oak: Smart-pokeball is created.
20 | 31.10.2016, 12:23 - Wobbuffet: Wo-bbu-ffet
21 | 
22 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | # gitchangelog > source/changelog.rst
20 | 
21 | %: Makefile
22 | 	# gitchangelog > source/changelog.rst
23 | 	# auto-changelog --output source/changelog.md -u
24 | 	# auto-changelog --repo .. --output source/changelog.md -u
25 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
26 | 


--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
  1 | .wy-side-nav-search { 
  2 |     /* background-color: #25D366; */
  3 |     background-color: white;
  4 |     /* color: red; */
  5 | }
  6 | 
  7 | .wy-nav-top{
  8 |     background: #3B9B5E;
  9 | }
 10 | 
 11 | 
 12 | .wy-side-nav-search>div.version{
 13 |     color: black;
 14 | }
 15 | 
 16 | .wy-side-nav-search>a { 
 17 |     color: black;
 18 | }
 19 | 
 20 | a {
 21 |     color: #46ba71;
 22 | }
 23 | a:hover {
 24 |     color: #25D366;
 25 | }
 26 | 
 27 | .wy-side-nav-search input[type=text] {
 28 |     border-color: #25D366;
 29 |     border-radius: 10px;
 30 | }
 31 | /* a:visited{
 32 |     color: #d32593;
 33 | } */
 34 | 
 35 | 
 36 | .rst-content dl:not(.docutils) dt {
 37 |     color: #3B9B5E;
 38 |     border-top: solid 3px #3B9B5E;
 39 |     background: #D9FCE6;
 40 | }
 41 | 
 42 | .rst-content .viewcode-link {
 43 |     color: #2980B9
 44 | }
 45 | 
 46 | .rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal {
 47 |     color: #3B9B5E;
 48 | }
 49 | 
 50 | code.py-func:hover, code.py-class:hover, code.py-obj:hover, code.py-mod:hover{
 51 |     background-color: #3B9B5E;
 52 |     color: white;
 53 | }
 54 | 
 55 | code, .rst-content tt, .rst-content code {
 56 |     white-space: nowrap;
 57 |     max-width: 100%;
 58 |     background: transparent;
 59 |     border-width: 0px;
 60 |     font-size: 85%;
 61 | }
 62 | 
 63 | .rst-content dl:not(.docutils) dl dt {
 64 |     border-left: solid 0px white;
 65 |     background: transparent;
 66 |     font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;
 67 |     color: #3B9B5E;
 68 |     border-left: solid 3px #3B9B5E;
 69 | }
 70 | 
 71 | .wy-alert.wy-alert-info, .rst-content .note, .rst-content .wy-alert-info.attention, .rst-content .wy-alert-info.caution, .rst-content .wy-alert-info.danger, .rst-content .wy-alert-info.error, .rst-content .wy-alert-info.hint, .rst-content .wy-alert-info.important, .rst-content .wy-alert-info.tip, .rst-content .wy-alert-info.warning, .rst-content .seealso, .rst-content .wy-alert-info.admonition-todo, .rst-content .wy-alert-info.admonition {
 72 |     background: white;
 73 |     border-left: solid 3px #007bff;
 74 | }
 75 | 
 76 | .wy-alert.wy-alert-info .wy-alert-title, .rst-content .note .wy-alert-title, .rst-content .wy-alert-info.attention .wy-alert-title, .rst-content .wy-alert-info.caution .wy-alert-title, .rst-content .wy-alert-info.danger .wy-alert-title, .rst-content .wy-alert-info.error .wy-alert-title, .rst-content .wy-alert-info.hint .wy-alert-title, .rst-content .wy-alert-info.important .wy-alert-title, .rst-content .wy-alert-info.tip .wy-alert-title, .rst-content .wy-alert-info.warning .wy-alert-title, .rst-content .seealso .wy-alert-title, .rst-content .wy-alert-info.admonition-todo .wy-alert-title, .rst-content .wy-alert-info.admonition .wy-alert-title, .wy-alert.wy-alert-info .rst-content .admonition-title, .rst-content .wy-alert.wy-alert-info .admonition-title, .rst-content .note .admonition-title, .rst-content .wy-alert-info.attention .admonition-title, .rst-content .wy-alert-info.caution .admonition-title, .rst-content .wy-alert-info.danger .admonition-title, .rst-content .wy-alert-info.error .admonition-title, .rst-content .wy-alert-info.hint .admonition-title, .rst-content .wy-alert-info.important .admonition-title, .rst-content .wy-alert-info.tip .admonition-title, .rst-content .wy-alert-info.warning .admonition-title, .rst-content .seealso .admonition-title, .rst-content .wy-alert-info.admonition-todo .admonition-title, .rst-content .wy-alert-info.admonition .admonition-title {
 77 |     background: #E0EDFD;
 78 | }
 79 | 
 80 | .admonition .admonition-title::before {
 81 |     color: #007BFF;
 82 | }
 83 | 
 84 | .admonition .admonition-title {
 85 |     color: black;
 86 | }
 87 | 
 88 | 
 89 | .admonition {
 90 |     box-shadow: 2px 2px 7px 0px rgba(0,0,0,0.25);
 91 | }
 92 | 
 93 | .rst-content div[class^='highlight'] {
 94 |     border-left-width: 0px;
 95 |     border-bottom-width: 0px;
 96 |     border-top-width: 0px;
 97 |     border-right: 3px solid #e88a50;
 98 | }
 99 | 
100 | .rst-content dl:not(.docutils) dt:first-child {
101 |     margin-top: 0;
102 |     width: 100%;
103 | }
104 | 


--------------------------------------------------------------------------------
/docs/_static/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/docs/_static/favicon.png


--------------------------------------------------------------------------------
/docs/_static/images/WhatsAppChat.from_source.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/docs/_static/images/WhatsAppChat.from_source.png


--------------------------------------------------------------------------------
/docs/_static/images/WhatsAppChat.from_sources.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/docs/_static/images/WhatsAppChat.from_sources.png


--------------------------------------------------------------------------------
/docs/_static/images/chat-export-android9-wp2.20.123.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/docs/_static/images/chat-export-android9-wp2.20.123.gif


--------------------------------------------------------------------------------
/docs/_static/images/chat-export-ios17-wp24.5.75.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/docs/_static/images/chat-export-ios17-wp24.5.75.gif


--------------------------------------------------------------------------------
/docs/_templates/autosummary/modules.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname }}
 2 | {{ underline }}
 3 | 
 4 | .. contents::
 5 |     :local:
 6 | 
 7 | .. automodule:: {{fullname}}
 8 | 
 9 |     Members
10 |     =======


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends '!layout.html' %}
2 | {% block document %}
3 | {{super()}}
4 | {% endblock %}
5 | 


--------------------------------------------------------------------------------
/docs/_templates/modules.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname }}
 2 | {{ underline }}
 3 | 
 4 | .. contents::
 5 |     :local:
 6 | 
 7 | .. automodule:: {{fullname}}
 8 | 
 9 |     Members
10 |     =======


--------------------------------------------------------------------------------
/docs/_templates/versioning.html:
--------------------------------------------------------------------------------
1 | {% if versions %}
2 | <h3>{{ _('Versions') }}</h3>
3 | <ul>
4 |   {%- for item in versions %}
5 |   <li><a href="{{ item.url }}">{{ item.name }}</a></li>
6 |   {%- endfor %}
7 | </ul>
8 | {% endif %}


--------------------------------------------------------------------------------
/docs/assets/style.css:
--------------------------------------------------------------------------------
  1 | body {
  2 | 	font-family: Helvetica, Arial, sans-serif;
  3 | 	font-size: 12px;
  4 | 	/* do not increase min-width as some may use split screens */
  5 | 	min-width: 800px;
  6 | 	color: #999;
  7 | }
  8 | 
  9 | h1 {
 10 | 	font-size: 24px;
 11 | 	color: black;
 12 | }
 13 | 
 14 | h2 {
 15 | 	font-size: 16px;
 16 | 	color: black;
 17 | }
 18 | 
 19 | p {
 20 |     color: black;
 21 | }
 22 | 
 23 | a {
 24 | 	color: #999;
 25 | }
 26 | 
 27 | table {
 28 | 	border-collapse: collapse;
 29 | }
 30 | 
 31 | /******************************
 32 |  * SUMMARY INFORMATION
 33 |  ******************************/
 34 | 
 35 | #environment td {
 36 | 	padding: 5px;
 37 | 	border: 1px solid #E6E6E6;
 38 | }
 39 | 
 40 | #environment tr:nth-child(odd) {
 41 | 	background-color: #f6f6f6;
 42 | }
 43 | 
 44 | /******************************
 45 |  * TEST RESULT COLORS
 46 |  ******************************/
 47 | span.passed, .passed .col-result {
 48 | 	color: green;
 49 | }
 50 | span.skipped, span.xfailed, span.rerun, .skipped .col-result, .xfailed .col-result, .rerun .col-result {
 51 | 	color: orange;
 52 | }
 53 | span.error, span.failed, span.xpassed, .error .col-result, .failed .col-result, .xpassed .col-result  {
 54 | 	color: red;
 55 | }
 56 | 
 57 | 
 58 | /******************************
 59 |  * RESULTS TABLE
 60 |  *
 61 |  * 1. Table Layout
 62 |  * 2. Extra
 63 |  * 3. Sorting items
 64 |  *
 65 |  ******************************/
 66 | 
 67 | /*------------------
 68 |  * 1. Table Layout
 69 |  *------------------*/
 70 | 
 71 | #results-table {
 72 | 	border: 1px solid #e6e6e6;
 73 | 	color: #999;
 74 | 	font-size: 12px;
 75 | 	width: 100%
 76 | }
 77 | 
 78 | #results-table th, #results-table td {
 79 | 	padding: 5px;
 80 | 	border: 1px solid #E6E6E6;
 81 | 	text-align: left
 82 | }
 83 | #results-table th {
 84 | 	font-weight: bold
 85 | }
 86 | 
 87 | /*------------------
 88 |  * 2. Extra
 89 |  *------------------*/
 90 | 
 91 | .log:only-child {
 92 | 	height: inherit
 93 | }
 94 | .log {
 95 | 	background-color: #e6e6e6;
 96 | 	border: 1px solid #e6e6e6;
 97 | 	color: black;
 98 | 	display: block;
 99 | 	font-family: "Courier New", Courier, monospace;
100 | 	height: 230px;
101 | 	overflow-y: scroll;
102 | 	padding: 5px;
103 | 	white-space: pre-wrap
104 | }
105 | div.image {
106 | 	border: 1px solid #e6e6e6;
107 | 	float: right;
108 | 	height: 240px;
109 | 	margin-left: 5px;
110 | 	overflow: hidden;
111 | 	width: 320px
112 | }
113 | div.image img {
114 | 	width: 320px
115 | }
116 | div.video {
117 | 	border: 1px solid #e6e6e6;
118 | 	float: right;
119 | 	height: 240px;
120 | 	margin-left: 5px;
121 | 	overflow: hidden;
122 | 	width: 320px
123 | }
124 | div.video video {
125 | 	overflow: hidden;
126 | 	width: 320px;
127 |     height: 240px;
128 | }
129 | .collapsed {
130 | 	display: none;
131 | }
132 | .expander::after {
133 | 	content: " (show details)";
134 | 	color: #BBB;
135 | 	font-style: italic;
136 | 	cursor: pointer;
137 | }
138 | .collapser::after {
139 | 	content: " (hide details)";
140 | 	color: #BBB;
141 | 	font-style: italic;
142 | 	cursor: pointer;
143 | }
144 | 
145 | /*------------------
146 |  * 3. Sorting items
147 |  *------------------*/
148 | .sortable {
149 | 	cursor: pointer;
150 | }
151 | 
152 | .sort-icon {
153 | 	font-size: 0px;
154 | 	float: left;
155 | 	margin-right: 5px;
156 | 	margin-top: 5px;
157 | 	/*triangle*/
158 | 	width: 0;
159 | 	height: 0;
160 | 	border-left: 8px solid transparent;
161 | 	border-right: 8px solid transparent;
162 | }
163 | 
164 | .inactive .sort-icon {
165 | 	/*finish triangle*/
166 | 	border-top: 8px solid #E6E6E6;
167 | }
168 | 
169 | .asc.active .sort-icon {
170 | 	/*finish triangle*/
171 | 	border-bottom: 8px solid #999;
172 | }
173 | 
174 | .desc.active .sort-icon {
175 | 	/*finish triangle*/
176 | 	border-top: 8px solid #999;
177 | }
178 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | import os
 14 | import sys
 15 | sys.path.insert(0, os.path.abspath('..'))
 16 | # sys.path.insert(0, os.path.abspath('_ext'))
 17 | 
 18 | from sphinx.ext.autosummary import Autosummary
 19 | from sphinx.ext.autosummary import get_documenter
 20 | from docutils.parsers.rst import directives
 21 | from sphinx.util.inspect import safe_getattr
 22 | from datetime import datetime
 23 | 
 24 | 
 25 | # -- Project information -----------------------------------------------------
 26 | 
 27 | project = 'whatstk'
 28 | copy_right = f'{datetime.now().year}, sociepy'
 29 | author = 'lucasrodes'
 30 | 
 31 | # The full version, including alpha/beta/rc tags
 32 | version = 'v0.7.1'
 33 | 
 34 | 
 35 | # -- General configuration ---------------------------------------------------
 36 | 
 37 | # Add any Sphinx extension module names here, as strings. They can be
 38 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 39 | # ones.
 40 | extensions = [
 41 |     'sphinx.ext.autodoc',
 42 |     'sphinx.ext.napoleon',
 43 |     'sphinx.ext.viewcode',
 44 |     'sphinx.ext.todo',
 45 |     'sphinx.ext.githubpages',
 46 |     'sphinx.ext.autosummary',
 47 |     'sphinx_rtd_theme',
 48 |     'sphinx_copybutton',
 49 |     'sphinx.ext.autosectionlabel',
 50 |     'sphinx_git',
 51 |     'autodocsumm',
 52 |     'sphinx.ext.mathjax',
 53 |     'recommonmark'
 54 |     # "sphinx_multiversion",
 55 |     # 'sphinx_gallery.gen_gallery'
 56 | ]
 57 | 
 58 | # The name of the entry point, without the ".rst" extension.
 59 | # By convention this will be "index"
 60 | master_doc = "index"
 61 | 
 62 | # Add any paths that contain templates here, relative to this directory.
 63 | templates_path = ['_templates']
 64 | 
 65 | # List of patterns, relative to source directory, that match files and
 66 | # directories to ignore when looking for source files.
 67 | # This pattern also affects html_static_path and html_extra_path.
 68 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '../../setup.py']
 69 | EXCLUDE_PATTERN = ['../setup.py']
 70 | 
 71 | # -- Options for HTML output -------------------------------------------------
 72 | 
 73 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 74 | # a list of builtin themes.
 75 | #
 76 | html_theme = 'sphinx_rtd_theme'
 77 | # html_theme = 'python_docs_theme'
 78 | # html_theme = 'alabaster'
 79 | 
 80 | # Add any paths that contain custom static files (such as style sheets) here,
 81 | # relative to this directory. They are copied after the builtin static files,
 82 | # so a file named "default.css" will overwrite the builtin "default.css".
 83 | html_static_path = ['_static']
 84 | 
 85 | # -- Copybutton ---------------------------------------------------------------
 86 | copybutton_prompt_text = ">>> "
 87 | 
 88 | 
 89 | # -- autoautosummary ----------------------------------------------------------
 90 | class AutoAutoSummary(Autosummary):
 91 |     option_spec = {
 92 |         'methods': directives.unchanged,
 93 |         'attributes': directives.unchanged
 94 |     }
 95 | 
 96 |     required_arguments = 1
 97 | 
 98 |     @staticmethod
 99 |     def get_members(obj, typ, include_public=None):
100 |         if not include_public:
101 |             include_public = []
102 |         items = []
103 |         for name in dir(obj):
104 |             try:
105 |                 documenter = get_documenter(safe_getattr(obj, name), obj)
106 |             except AttributeError:
107 |                 continue
108 |             if documenter.objtype == typ:
109 |                 items.append(name)
110 |         public = [x for x in items if x in include_public or not x.startswith('_')]
111 |         return public, items
112 | 
113 |     def run(self):
114 |         clazz = str(self.arguments[0])
115 |         try:
116 |             (module_name, class_name) = clazz.rsplit('.', 1)
117 |             m = __import__(module_name, globals(), locals(), [class_name])
118 |             c = getattr(m, class_name)
119 |             if 'methods' in self.options:
120 |                 _, methods = self.get_members(c, 'method', ['__init__'])
121 | 
122 |                 self.content = ["~%s.%s" % (clazz, method) for method in methods if not method.startswith('_')]
123 |             if 'attributes' in self.options:
124 |                 _, attribs = self.get_members(c, 'attribute')
125 |                 self.content = ["~%s.%s" % (clazz, attrib) for attrib in attribs if not attrib.startswith('_')]
126 |         finally:
127 |             return super(AutoAutoSummary, self).run()
128 | 
129 | # -- Theme --------------------------------------------------------------------
130 | def setup(app):
131 |     app.add_css_file('css/custom.css')
132 |     app.add_directive('autoautosummary', AutoAutoSummary)
133 | 
134 | 
135 | html_title = "WhatsApp Analysis Toolkit"
136 | html_logo = "../assets/logo.png"
137 | html_favicon = "_static/favicon.png"
138 | 
139 | html_show_sourcelink = False
140 | html_copy_source = True
141 | 
142 | github_url = 'https://github.com/lucasrodes/whatstk'
143 | 
144 | html_theme_options = {
145 |     'logo_only': True,
146 |     'navigation_depth': 4,
147 |     'display_version': True,
148 |     'collapse_navigation': False,
149 |     'sticky_navigation': False,
150 |     'github_banner': True,
151 | }
152 | 
153 | # -- Args ---------------------------------------------------------------------
154 | # html4_writer = True
155 | napoleon_use_rtype = False
156 | autosummary_generate = True
157 | 
158 | 
159 | # Autodocsum
160 | autodoc_default_options = {
161 |     'autosummary': True,
162 | }
163 | 
164 | # Sphinx gallery
165 | # from plotly.io._sg_scraper import plotly_sg_scraper
166 | # image_scrapers = ('matplotlib', plotly_sg_scraper,)
167 | 
168 | # sphinx_gallery_conf = {
169 | #      'examples_dirs': '_static/examples_py',   # path to your example scripts
170 | #      'gallery_dirs': 'source/gallery',  # path to where to save gallery generated output
171 | #      'reference_url': {'plotly': None,
172 | #       },
173 | #      'image_scrapers': image_scrapers,
174 | # }
175 | 
176 | # html_sidebars = {'**': ['versioning.html']}
177 | # smv_tag_whitelist = r'^(3.0.0.dev0)'
178 | # smv_branch_whitelist = 'feature/documentation'
179 | # smv_tag_whitelist = r'^.*$'
180 | # smv_remote_whitelist = '^.*$'
181 | # smv_branch_whitelist = r'^(feature/documentation)$'
182 | # smv_released_pattern = r'^tags/.*$'
183 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. include:: source/about.rst
 2 | 
 3 | ----
 4 | 
 5 | Content:
 6 | ========
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 2
10 | 
11 |    About whatstk <source/about>
12 |    Getting started <source/getting_started/index>
13 |    Code examples <source/code_examples/index>
14 |    API Reference <source/api/index>
15 |    Why choose whatstk? <source/why_whatstk>
16 |    Community & Governance <source/community>
17 |    Contribute <source/contribute>
18 |    Changelog <source/changelog>
19 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/about.rst:
--------------------------------------------------------------------------------
  1 | About whatstk
  2 | =============
  3 | 
  4 | **whatstk**  is a python package providing tools to parse, analyze and visualize WhatsApp chats developed by
  5 | `Lucas Rodés-Guirao <https://lcsrg.me>`_. Easily convert your chats to csv or simply visualize statistics
  6 | using the python library. The package uses `pandas <https://github.com/pandas-dev/pandas>`_ to
  7 | process the data and `plotly <https://github.com/plotly/plotly.py>`_ to visualise it.
  8 | 
  9 | You can also `try a live demo <https://whatstk.streamlit.app/>`_.
 10 | 
 11 | 
 12 | The project is distributed under the `GPL-3.0 license <https://github.com/lucasrodes/whatstk/blob/master/LICENSE>`_
 13 | and is available on `GitHub <http://github.com/lucasrodes/whatstk>`_.
 14 | 
 15 | ----
 16 | 
 17 | First contact with whatstk
 18 | --------------------------
 19 | **whatstk** is built around :func:`BaseChat <whatstk._chat.BaseChat>` object interface, which requires class method
 20 | :func:`from_source <whatstk._chat.BaseChat.from_source>` to be implemented. This method loads and parses the source 
 21 | chat file into a pandas.DataFrame.
 22 | 
 23 | Below, we use method :func:`df_from_whatsapp <whatstk.whatsapp.parser.df_from_whatsapp>` to load `LOREM chat
 24 | <http://raw.githubusercontent.com/lucasrodes/whatstk/develop/chats/whatsapp/lorem.txt>`_. To test it with your own 
 25 | chat, simply :ref:`export it as a txt file<Export chat>` to your computer and then use class argument ``filepath``, as
 26 | shown in the following example.
 27 | 
 28 | 
 29 | .. code-block:: python
 30 | 
 31 |     >>> from whatstk import df_from_whatsapp
 32 |     >>> from whatstk.data import whatsapp_urls
 33 |     >>> df = df_from_whatsapp(filepath=whatsapp_urls.LOREM)
 34 |     >>> df.head(5)
 35 |                      date        username                                            message
 36 |     0 2020-01-15 02:22:56            Mary                     Nostrud exercitation magna id.
 37 |     1 2020-01-15 03:33:01            Mary     Non elit irure irure pariatur exercitation. 🇩🇰
 38 |     2 2020-01-15 04:18:42  +1 123 456 789  Exercitation esse lorem reprehenderit ut ex ve...
 39 |     3 2020-01-15 06:05:14        Giuseppe  Aliquip dolor reprehenderit voluptate dolore e...
 40 |     4 2020-01-15 06:56:00            Mary              Ullamco duis et commodo exercitation.
 41 | 
 42 | ----
 43 | 
 44 | Installation & compatibility
 45 | ----------------------------
 46 | This project is on `PyPI <https://pypi.org/project/whatstk/>`_, install it with pip:
 47 | 
 48 | .. code-block:: bash
 49 | 
 50 |     pip install whatstk
 51 | 
 52 | Project has been tested in Python>=3.7.
 53 | 
 54 | From source
 55 | ^^^^^^^^^^^
 56 | Clone the project from the `official repository <https://github.com/lucasrodes/whatstk/>`_ and install it locally 
 57 | 
 58 | .. code-block:: bash
 59 | 
 60 |     git clone https://github.com/lucasrodes/whatstk.git
 61 |     cd whatstk
 62 |     pip install .
 63 | 
 64 | Extensions
 65 | ^^^^^^^^^^
 66 | To use :ref:`Google Drive <Load WhatsApp chat from Google Drive>` or Chat Generation support, install the library along with the corresponding extensions:
 67 | 
 68 | .. code-block:: bash
 69 | 
 70 |     pip install whatstk[gdrive]
 71 | 
 72 | .. code-block:: bash
 73 | 
 74 |     pip install whatstk[generate]
 75 | 
 76 | Or install the full suite:
 77 | 
 78 | .. code-block:: bash
 79 | 
 80 |     pip install whatstk[full]
 81 | 
 82 | 
 83 | Develop
 84 | ^^^^^^^
 85 | You can also install the version in development directly from github
 86 | `develop <https://github.com/lucasrodes/whatstk/tree/develop>`_ branch. 
 87 | 
 88 | .. code-block:: bash
 89 | 
 90 |     pip install git+https://github.com/lucasrodes/whatstk.git@develop
 91 | 
 92 | Note: It requires `git <https://git-scm.com/>`_ to be installed.
 93 | 
 94 | ----
 95 | 
 96 | Support
 97 | -------
 98 | You can ask questions and join the development discussion on `GitHub <https://github.com/lucasrodes/whatstk>`_. Use the
 99 | `GitHub issues <https://github.com/lucasrodes/whatstk/issues>`_ section to report bugs or request features and `GitHub discussions <https://github.com/lucasrodes/whatstk/issues>`_ to open up broader discussions. You can also check the `project roadmap <https://github.com/lucasrodes/whatstk/projects/3>`_.
100 | 
101 | For more details, refer to the :ref:`contribute section <Contribute>`.
102 | 
103 | ----
104 | 
105 | Why this name, whatstk?
106 | -----------------------
107 | whatstk stands for "WhatsApp Toolkit", since the project was initially conceived as a python library to read and process WhatsApp chats. It currently only supports WhatsApp chats, but this might be extended in the future.
108 | 


--------------------------------------------------------------------------------
/docs/source/api/cmd/cmd_chat_gen.rst:
--------------------------------------------------------------------------------
 1 | ``whatstk-generate-chat``
 2 | =========================
 3 | 
 4 | .. warning::
 5 | 
 6 |     To use the chat generation functionalities, install the library with the corresponding extension (ignore the
 7 |     ``--upgrade`` option if you haven't installed the library):
 8 | 
 9 |     .. code-block::
10 | 
11 |         pip install whatstk[generate] --upgrade
12 | 
13 | Generate random WhatsApp chat.
14 | 
15 | .. code-block:: bash
16 | 
17 |     whatstk-generate-chat --help
18 |     usage: Generate chat. Make sure to install the library with required extension: pip install whatstk[generate]
19 |     --upgrade
20 |            [-h] -o OUTPUT_PATH
21 |                           [--filenames FILENAMES [FILENAMES ...]] [-s SIZE]
22 |                           [-f HFORMATS [HFORMATS ...]]
23 |                           [--last-timestamp LAST_TIMESTAMP] [-v]
24 | 
25 |     optional arguments:
26 |     -h, --help            show this help message and exit
27 |     -o OUTPUT_PATH, --output-path OUTPUT_PATH
28 |                             Path where to store generated chats. Must exist.
29 |     --filenames FILENAMES [FILENAMES ...]
30 |                             Filenames. Must be equal length of --hformats.
31 |     -s SIZE, --size SIZE  Number of messages to create per chat. Defaults to
32 |                             500.
33 |     -f HFORMATS [HFORMATS ...], --hformats HFORMATS [HFORMATS ...]
34 |                             Header format. If None, defaults to all supported
35 |                             hformats. List formats as 'format 1' 'format 2' ...
36 |     --last-timestamp LAST_TIMESTAMP
37 |                             Timestamp of last message. Format YYYY-mm-dd
38 |     -v, --verbose         Verbosity.
39 | 


--------------------------------------------------------------------------------
/docs/source/api/cmd/cmd_graph.rst:
--------------------------------------------------------------------------------
 1 | ``whatstk-graph``
 2 | =================
 3 | 
 4 | Get graph from your WhatsApp txt file.
 5 | 
 6 | .. code-block:: bash
 7 | 
 8 |     usage: whatstk-graph [-h] [-o OUTPUT_FILENAME]
 9 |                          [-t {interventions_count,msg_length}]
10 |                          [-id {date,hour,weekday,month}] [-ic] [-il] [-f HFORMAT]
11 |                          input_filename
12 | 
13 |     Visualise a WhatsApp chat. For advance settings, see package
14 |     librarydocumentation
15 | 
16 |     positional arguments:
17 |     input_filename        Input txt file.
18 | 
19 |     optional arguments:
20 |     -h, --help            show this help message and exit
21 |     -o OUTPUT_FILENAME, --output_filename OUTPUT_FILENAME
22 |                             Graph generated can be stored as an HTMLfile.
23 |     -t {interventions_count,msg_length}, --type {interventions_count,msg_length}
24 |                             Type of graph.
25 |     -id {date,hour,weekday,month}, --icount-date-mode {date,hour,weekday,month}
26 |                             Select date mode. Only valid for
27 |                             --type=interventions_count.
28 |     -ic, --icount-cumulative
29 |                             Show values in a cumulative fashion. Only valid for
30 |                             --type=interventions_count.
31 |     -il, --icount-msg-length
32 |                             Count an intervention with its number of characters.
33 |                             Otherwise an intervention is count as one.Only valid
34 |                             for --type=interventions_count.
35 |     -f HFORMAT, --hformat HFORMAT
36 |                             By default, auto-header detection isattempted. If does
37 |                             not work, you can specify it manually using this
38 |                             argument.
39 | 


--------------------------------------------------------------------------------
/docs/source/api/cmd/cmd_to_csv.rst:
--------------------------------------------------------------------------------
 1 | ``whatstk-to-csv``
 2 | =================
 3 | 
 4 | Convert a WhatsApp txt file to csv.
 5 | 
 6 | .. code-block:: bash
 7 | 
 8 |     usage: whatstk-to-csv [-h] [-f HFORMAT] input_filename output_filename
 9 | 
10 |     Convert a Whatsapp chat from csv to txt.
11 | 
12 |     positional arguments:
13 |     input_filename        Input txt file.
14 |     output_filename       Name of output csv file.
15 | 
16 |     optional arguments:
17 |     -h, --help            show this help message and exit
18 |     -f HFORMAT, --hformat HFORMAT
19 |                             By default, auto-header detection isattempted. If does
20 |                             not work, you can specify it manually using this
21 |                             argument.
22 | 


--------------------------------------------------------------------------------
/docs/source/api/index.rst:
--------------------------------------------------------------------------------
 1 | API Reference
 2 | ===============
 3 | 
 4 | Main objects
 5 | ------------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 1
 9 | 
10 |    WhatsAppChat <whatstk.WhatsAppChat>
11 |    FigureBuilder <whatstk.FigureBuilder>
12 | 
13 | Core API
14 | --------
15 | 
16 | .. toctree::
17 |    :maxdepth: 4
18 | 
19 |    whatstk.whatsapp <whatstk.whatsapp>
20 |    whatstk.analysis <whatstk.analysis> 
21 |    whatstk.graph <whatstk.graph>
22 |    whatstk.utils <whatstk.utils>
23 |    whatstk.data <whatstk.data>
24 |    whatstk._chat <whatstk._chat>
25 | 
26 | Command line tools
27 | ------------------
28 | 
29 | .. toctree::
30 |    :maxdepth: 4
31 | 
32 |    whatst-to-csv <cmd/cmd_to_csv>
33 |    whatstk-graph <cmd/cmd_graph> 
34 |    whatstk-generate-chat <cmd/cmd_chat_gen>
35 | 


--------------------------------------------------------------------------------
/docs/source/api/whatstk.FigureBuilder.rst:
--------------------------------------------------------------------------------
 1 | FigureBuilder
 2 | =============
 3 | **whatstk** provides this object to ease the generation of insightfull plots from your chat. :class:`FigureBuilder
 4 | <whatstk.FigureBuilder>` contains several methods to generate different plots. It assigns a unique color to each user,
 5 | so that a user can be easily identified in all plots.
 6 | 
 7 | To insantiate it, you just need to provide the chat (as pandas.DataFrame or :class:`BaseChat <whatstk._chat.BaseChat>`-API-compliant object).
 8 | 
 9 | 
10 | 
11 | .. autoclass:: whatstk.FigureBuilder
12 |     :members:
13 |     :undoc-members:
14 |     :show-inheritance:
15 |     :inherited-members:
16 | 


--------------------------------------------------------------------------------
/docs/source/api/whatstk.WhatsAppChat.rst:
--------------------------------------------------------------------------------
 1 | WhatsAppChat
 2 | ============
 3 | 
 4 | Object :class:`WhatsAppChat <whatstk.WhatsAppChat>` works as a bridge between the python code and the whatsapp chat text
 5 | file. Easily load a chat from a text file and work with it using all the power of
 6 | `pandas <https://pandas.pydata.org/>`_. 
 7 | 
 8 | A chat can be loaded from a single source file using :func:`WhatsAppChat.from_source <whatstk.WhatsAppChat.from_source>`
 9 | 
10 | .. image:: ../../_static/images/WhatsAppChat.from_source.png
11 |     :width: 1000
12 |     :alt: Concept diagram of WhatsAppChat.from_source
13 | 
14 | 
15 | or multiple source files using :func:`WhatsAppChat.from_sources <whatstk.WhatsAppChat.from_sources>`
16 | 
17 | .. image:: ../../_static/images/WhatsAppChat.from_sources.png
18 |     :width: 1000
19 |     :alt: Concept diagram of WhatsAppChat.from_sources
20 | 
21 |     
22 | .. autoclass:: whatstk.WhatsAppChat
23 |     :members:
24 |     :undoc-members:
25 |     :show-inheritance:
26 |     :inherited-members:
27 | 


--------------------------------------------------------------------------------
/docs/source/api/whatstk._chat.rst:
--------------------------------------------------------------------------------
1 | whatstk._chat
2 | ================
3 | 
4 | 
5 | .. automodule:: whatstk._chat
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/api/whatstk.analysis.rst:
--------------------------------------------------------------------------------
1 | whatstk.analysis
2 | ================
3 | 
4 | .. automodule:: whatstk.analysis
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api/whatstk.data.rst:
--------------------------------------------------------------------------------
1 | whatstk.data
2 | ============
3 | 
4 | .. automodule:: whatstk.data
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api/whatstk.graph.rst:
--------------------------------------------------------------------------------
 1 | whatstk.graph
 2 | =====================
 3 | 
 4 | Plot tools using plotly.
 5 | 
 6 | 
 7 | Import `plot <https://github.com/plotly/plotly.py/blob/d1914585bfe747218e95218b2744898d0242de9b/packages/python/
 8 | plotly/plotly/offline/offline.py#L402>`_ (by plotly) to plot figures.
 9 | 
10 | .. code-block:: python
11 |    
12 |       >>> from whatstk.graph import plot
13 | 
14 | 
15 | whatstk.graph.base
16 | ------------------
17 | 
18 | .. automodule:: whatstk.graph.base
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | 
24 | whatstk.graph.figures
25 | ---------------------
26 | 
27 | .. automodule:: whatstk.graph.figures
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 
32 | whatstk.graph.figures.boxplot
33 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
34 | 
35 | .. automodule:: whatstk.graph.figures.boxplot
36 |    :members:
37 |    :undoc-members:
38 |    :show-inheritance:
39 | 
40 | whatstk.graph.figures.heatmap
41 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
42 | 
43 | .. automodule:: whatstk.graph.figures.heatmap
44 |    :members:
45 |    :undoc-members:
46 |    :show-inheritance:
47 | 
48 | whatstk.graph.figures.sankey
49 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
50 | 
51 | .. automodule:: whatstk.graph.figures.sankey
52 |    :members:
53 |    :undoc-members:
54 |    :show-inheritance:
55 | 
56 | whatstk.graph.figures.scatter
57 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
58 | 
59 | .. automodule:: whatstk.graph.figures.scatter
60 |    :members:
61 |    :undoc-members:
62 |    :show-inheritance:
63 | 
64 | whatstk.graph.figures.utils
65 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
66 | 
67 | .. automodule:: whatstk.graph.figures.utils
68 |    :members:
69 |    :undoc-members:
70 |    :show-inheritance:
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/docs/source/api/whatstk.utils.rst:
--------------------------------------------------------------------------------
 1 | whatstk.utils
 2 | =============
 3 | 
 4 | 
 5 | .. automodule:: whatstk.utils
 6 |    :members:
 7 |    :undoc-members:
 8 |    :show-inheritance:
 9 | 
10 | ----
11 | 
12 | whatstk.utils.chat\_merge
13 | -------------------------
14 | 
15 | .. automodule:: whatstk.utils.chat_merge
16 |    :members:
17 |    :undoc-members:
18 |    :show-inheritance:
19 | 
20 | ----
21 | 
22 | whatstk.utils.gdrive
23 | ------------------------
24 | 
25 | .. automodule:: whatstk.utils.gdrive
26 |    :members:
27 |    :undoc-members:
28 |    :show-inheritance:
29 | 
30 | ----
31 | 
32 | whatstk.utils.exceptions
33 | ------------------------
34 | 
35 | .. automodule:: whatstk.utils.exceptions
36 |    :members:
37 |    :undoc-members:
38 |    :show-inheritance:
39 | 
40 | ----
41 | 
42 | whatstk.utils.utils
43 | -------------------
44 | 
45 | .. automodule:: whatstk.utils.utils
46 |    :members:
47 |    :undoc-members:
48 |    :show-inheritance:
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/docs/source/api/whatstk.whatsapp.rst:
--------------------------------------------------------------------------------
 1 | whatstk.whatsapp
 2 | ================
 3 | 
 4 | 
 5 | .. automodule:: whatstk.whatsapp
 6 |    :members:
 7 |    :undoc-members:
 8 |    :show-inheritance:
 9 | 
10 | ----
11 | 
12 | 
13 | whatstk.whatsapp.objects
14 | ------------------------
15 | 
16 | .. automodule:: whatstk.whatsapp.objects
17 |    :members:
18 |    :undoc-members:
19 |    :show-inheritance:
20 |    :inherited-members:
21 | 
22 | ----
23 | 
24 | whatstk.whatsapp.parser
25 | -----------------------
26 | 
27 | .. automodule:: whatstk.whatsapp.parser
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 
32 | ----
33 | 
34 | whatstk.whatsapp.auto\_header
35 | ---------------------------------
36 | 
37 | .. automodule:: whatstk.whatsapp.auto_header
38 |    :members:
39 |    :undoc-members:
40 |    :show-inheritance:
41 | 
42 | ----
43 | 
44 | whatstk.whatsapp.generation
45 | ------------------------------
46 | 
47 | .. automodule:: whatstk.whatsapp.generation
48 |    :members:
49 |    :undoc-members:
50 |    :show-inheritance:
51 | 
52 | ----
53 | 
54 | whatstk.whatsapp.hformat
55 | -------------------------
56 | 
57 | .. automodule:: whatstk.whatsapp.hformat
58 |    :members:
59 |    :undoc-members:
60 |    :show-inheritance:
61 | 


--------------------------------------------------------------------------------
/docs/source/changelog.rst:
--------------------------------------------------------------------------------
 1 | Changelog
 2 | =========
 3 | 
 4 | 
 5 | Unreleased
 6 | ----------
 7 | .. git_changelog::
 8 |     :rev-list: v0.6.2..HEAD
 9 | 
10 | 
11 | v0.6.x
12 | ----------
13 | .. git_changelog::
14 |     :rev-list: v0.5.0..v0.6.2
15 |     
16 | v0.5.x
17 | ----------
18 | .. git_changelog::
19 |     :rev-list: v0.4.0..v0.5.0
20 | 
21 | v0.4.x
22 | ----------
23 | .. git_changelog::
24 |     :rev-list: v0.3.0..v0.4.0
25 | 
26 | v0.3.x
27 | ----------
28 | .. git_changelog::
29 |     :rev-list: 0.2.0..v0.3.0
30 | 
31 | v0.2.x
32 | ----------
33 | .. git_changelog::
34 |     :rev-list: 0.1.10..0.2.0
35 | 


--------------------------------------------------------------------------------
/docs/source/code_examples/custom.rst:
--------------------------------------------------------------------------------
  1 | Custom plot
  2 | ===========
  3 | 
  4 | :class:`FigureBuilder <whatstk.FigureBuilder>` provides some tools to easily visualize your chat. However, the possible
  5 | visualizations are infinite. Here, we provide some examples of a custom visualization using some library tools together
  6 | with pandas and plotly.
  7 | 
  8 | 
  9 | Number of messages vs. Number of characters sent
 10 | ------------------------------------------------
 11 | For each user, we will obtain a 2D scatter plot measuring the number of messages and characters sent in a day. That is,
 12 | for a given user we will have `N` points, where `N` is the number of days that the user has sent at least one message.
 13 | Each point therefore corresponds to a specific day, where the x-axis and the y-axis measure the number of messages sent
 14 | and the average number of characters per message in that day, respectively.
 15 | 
 16 | 
 17 | First of all, lets instatiate objects :class:`WhatsAppChat<whatstk.WhatsAppChat>` (chat loading) and
 18 | :class:`FigureBuilder <whatstk.FigureBuilder>` (figure coloring).
 19 | 
 20 | .. code-block:: python
 21 | 
 22 |     >>> from whatstk import WhatsAppChat, FigureBuilder
 23 |     >>> from whatstk.data import whatsapp_urls
 24 |     >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM_2000)
 25 |     >>> fb = FigureBuilder(chat=chat)
 26 | 
 27 | Next, we obtain the number of messages and number of characters sent per user per day.
 28 | 
 29 | .. code-block:: python
 30 | 
 31 |     >>> from whatstk.analysis import get_interventions_count
 32 |     >>> counts_interv = get_interventions_count(chat=chat, date_mode='date', msg_length=False, cumulative=False)
 33 |     >>> counts_len = get_interventions_count(chat=chat, date_mode='date', msg_length=True, cumulative=False)
 34 | 
 35 | Time to process a bit the data. We obtain a DataFrame with five columns: *username*, *date*, *num_characters*,
 36 | *num_interventions* and *avg_characters*.
 37 | 
 38 | .. code-block:: python
 39 | 
 40 |     >>> import pandas as pd
 41 |     >>> counts_len = pd.DataFrame(counts_len.unstack(), columns=['num_characters'])
 42 |     >>> counts_interv = pd.DataFrame(counts_interv.unstack(), columns=['num_interventions'])
 43 |     >>> counts = counts_len.merge(counts_interv, left_index=True, right_index=True)
 44 |     >>> # Remove all zero entries and get average number of characters
 45 |     >>> counts = counts[~(counts['num_interventions'] == 0)].reset_index()
 46 |     >>> counts['avg_characters'] = counts['num_characters']/counts['num_interventions']
 47 |     >>> counts.head(5)
 48 |              username       date  num_characters  num_interventions  avg_characters
 49 |     0  +1 123 456 789 2019-04-16              40                  1       40.000000
 50 |     1  +1 123 456 789 2019-04-17              21                  1       21.000000
 51 |     2  +1 123 456 789 2019-04-21              90                  2       45.000000
 52 |     3  +1 123 456 789 2019-04-25             127                  3       42.333333
 53 |     4  +1 123 456 789 2019-04-26              33                  1       33.000000
 54 | 
 55 |     [5 rows x 5 columns]
 56 | 
 57 | So far we have obtained a dataframe ``counts``, whose rows correspond to a specific message. However, in this example we
 58 | are interested in the aggregated values per day. Hence, we group this dataframe by user and date and re-calculate the
 59 | number of messages sent and average number of characters sent per day.
 60 | 
 61 | .. code-block:: python
 62 | 
 63 |     >>> agg_operations = {'avg_characters': 'mean','num_interventions': 'mean'}
 64 |     >>> counts = counts.groupby(['username', counts.date.dt.date]).agg(agg_operations)
 65 |     >>> counts = counts.rename_axis(index=['username', 'date'])
 66 |     >>> counts = counts.reset_index()
 67 |     >>> counts.head(5)
 68 |               username	      date	avg_characters	num_interventions
 69 |     0	+1 123 456 789	2019-04-16	     40.000000	                1
 70 |     1	+1 123 456 789	2019-04-17	     21.000000	                1
 71 |     2	+1 123 456 789	2019-04-21	     45.000000	                2
 72 |     3	+1 123 456 789	2019-04-25	     42.333333	                3
 73 |     4	+1 123 456 789	2019-04-26	     33.000000	                1
 74 | 
 75 | Once the dataframe is obtained, we generate a plot using `Histogram2dContour
 76 | <https://plotly.com/python/2d-histogram-contour/>`_ by plotly.
 77 | 
 78 | .. code-block:: python
 79 | 
 80 |     >>> from whatstk.graph import plot
 81 |     >>> import plotly.graph_objs as go
 82 |     >>> traces = []
 83 |     >>> for username in fb.usernames:
 84 |     >>>     counts_user = counts[counts['username']==username]
 85 |     >>>     traces.append(
 86 |     >>>         go.Histogram2dContour(
 87 |     >>>             contours={'coloring': 'none'},
 88 |     >>>             x=counts_user['num_interventions'],
 89 |     >>>             y=counts_user['avg_characters'],
 90 |     >>>             # mode='markers',
 91 |     >>>             # marker=dict(color=fb.user_color_mapping[username], opacity=0.2),
 92 |     >>>             name=username,
 93 |     >>>             showlegend=True,
 94 |     >>>             line={'color': fb.user_color_mapping[username]},
 95 |     >>>             nbinsx=10, nbinsy=20
 96 |     >>>         )
 97 |     >>>     )
 98 | 
 99 | 
100 | .. code-block:: python
101 | 
102 |     >>> layout = {
103 |     >>>     'title': 'Average number of characters sent in a day vs Interventions per day',
104 |     >>>     'yaxis_title': 'avg characters',
105 |     >>>     'xaxis_title': 'num interventions',
106 |     >>> }
107 |     >>> fig = go.Figure(data=traces, layout=layout)
108 |     >>> plot(fig)
109 | 
110 | .. raw:: html
111 |     :file: ../../_static/html/custom_interventions_vs_length.html
112 | 
113 | 


--------------------------------------------------------------------------------
/docs/source/code_examples/index.rst:
--------------------------------------------------------------------------------
 1 | Code examples
 2 | ===============
 3 | 
 4 | Basic examples
 5 | --------------
 6 | 
 7 | .. toctree:: 
 8 |    :maxdepth: 1
 9 | 
10 |    Load WhatsApp chat <load_chat>
11 |    Load WhatsApp chat from multiple sources <load_chat_multiple>
12 |    Load WhatsApp chat from Google Drive  <load_chat_gdrive>
13 |    Load WhatsApp chat with specific hformat  <load_chat_hformat>
14 |    Rename usernames <load_chat_multiple>
15 | 
16 | Visualisations
17 | --------------
18 | 
19 | With :class:`FigureBuilder <whatstk.FigureBuilder>` you can get great insights from your chat. Below we provide some
20 | examples on the visualizations that you can get with this library with the help of `plotly <https://github.com/plotly/
21 | plotly.py>`_.
22 | 
23 | 
24 | .. toctree::
25 |    :maxdepth: 1
26 | 
27 |    Count of user interventions <interventions_count>
28 |    Message length boxplot <message_length_boxplot>
29 |    User interaction <user_interaction>
30 |    Custom plot example <custom>
31 | 
32 | ----
33 | 
34 | If you think that something is missing please `raise an issue <https://github.com/lucasrodes/whatstk/issues>`_.


--------------------------------------------------------------------------------
/docs/source/code_examples/interventions_count.rst:
--------------------------------------------------------------------------------
  1 | Counting user interventions
  2 | ===========================
  3 | 
  4 | Counting the user interventions can give relevant insights on which users "dominate" the conversation, even more in a
  5 | group chat. To this end, object :class:`FigureBuilder <whatstk.FigureBuilder>` has the method
  6 | :func:`user_interventions_count_linechart <whatstk.FigureBuilder.user_interventions_count_linechart>`, which generates a
  7 | plotly figure with the count of user interventions.
  8 | 
  9 | First of all, we load a chat and create an instance of :class:`FigureBuilder <whatstk.FigureBuilder>`.
 10 | 
 11 | .. code-block:: python
 12 | 
 13 |     >>> from whatstk import WhatsAppChat, FigureBuilder
 14 |     >>> from whatstk.graph import plot
 15 |     >>> from whatstk.data import whatsapp_urls
 16 |     >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM_2000)
 17 |     >>> fb = FigureBuilder(chat=chat)
 18 | 
 19 | Count of user interventions
 20 | ---------------------------
 21 | 
 22 | Default call of the aforementioned method displays the number of interventions sent by each user per day.
 23 | 
 24 | .. code-block:: python
 25 | 
 26 |     >>> fig = fb.user_interventions_count_linechart()
 27 |     >>> plot(fig)
 28 | 
 29 | 
 30 | .. raw:: html
 31 |     :file: ../../_static/html/interventions_count_date.html
 32 | 
 33 | 
 34 | As seen in previous plot, the number of messages sent per user in a day tends to oscilate quite a lot
 35 | from day to day, which might difficult a good visualisation of the data. Hence, we can use ``cumulative=True`` to
 36 | illustrate the cumulative count of interventions instead.
 37 | 
 38 | .. code-block:: python
 39 | 
 40 |     >>> fig = fb.user_interventions_count_linechart(cumulative=True, title='User inteventions count (cumulative)')
 41 |     >>> plot(fig)
 42 | 
 43 | 
 44 | .. raw:: html
 45 |     :file: ../../_static/html/interventions_count_date_cum.html
 46 | 
 47 | 
 48 | Additionally, we can obtain the counts for all users combined using ``all_users=True``:
 49 | 
 50 | .. code-block:: python
 51 | 
 52 |     >>> fig = fb.user_interventions_count_linechart(cumulative=True, all_users=True, title='Inteventions count (cumulative)')
 53 |     >>> plot(fig)
 54 | 
 55 | 
 56 | .. raw:: html
 57 |     :file: ../../_static/html/interventions_count_date_all.html
 58 | 
 59 | 
 60 | Count of characters sent per user
 61 | ---------------------------------
 62 | 
 63 | Now, instead of counting the number of interventions we might want to explore the number of characters sent. Note that a
 64 | user might send tons of messages with few words, whereas another user might send few messages with tons of words.
 65 | Depending on your analysis you might prefer exploring interventions or number of characters. Getting the number of
 66 | characters sent per user can be done using ``msg_length=True`` when calling function
 67 | :func:`user_interventions_count_linechart <whatstk.FigureBuilder.user_interventions_count_linechart>`.
 68 | 
 69 | In the following we explore the cumulative number of characters sent per user.
 70 | 
 71 | .. code-block:: python
 72 | 
 73 |     >>> fig = fb.user_interventions_count_linechart(msg_length=True, cumulative=True, title='Characters sent by user (cumulative)')
 74 |     >>> plot(fig)
 75 | 
 76 | 
 77 | .. raw:: html
 78 |     :file: ../../_static/html/interventions_count_date_length_cum.html
 79 | 
 80 | 
 81 | 
 82 | Other insights
 83 | --------------
 84 | 
 85 | Method :func:`user_interventions_count_linechart <whatstk.FigureBuilder.user_interventions_count_linechart>` has the
 86 | argument ``date_mode``, which allows for several types of count-grouping methods. By default, the method obtains the
 87 | counts per date (what has been used in previous examples).
 88 | 
 89 | 
 90 | Using ``date_mode=hour`` illustrates the distribution of user interventions over the 24 hours in a day. In this example,
 91 | for instance, Giuseppe has their interventions peak in hour ranges [01:00, 02:00] and [20:00, 21:00], with 21
 92 | interventions in each. 
 93 | 
 94 | .. code-block:: python
 95 | 
 96 |     >>> fig = fb.user_interventions_count_linechart(date_mode='hour', title='User interventions count (hour)',
 97 |     xlabel='Hour')
 98 |     >>> plot(fig)
 99 | 
100 | .. raw:: html
101 |     :file: ../../_static/html/interventions_count_hours.html
102 | 
103 | Using ``date_mode=weekday`` illustrates the distribution of user interventions over the 7 days of the week. In this
104 | example, for instance, we see that Monday and Sunday are the days with the most interventions.
105 | 
106 | .. code-block:: python
107 | 
108 |     >>> fig = fb.user_interventions_count_linechart(date_mode='weekday', title='User interventions count (weekly)',
109 |     xlabel='Week day')
110 |     >>> plot(fig)
111 | 
112 | .. raw:: html
113 |     :file: ../../_static/html/interventions_count_weekday.html
114 | 
115 | 
116 | Using ``date_mode=month`` illustrates the distribution of user interventions over the 12 months of the year. In this
117 | example, for instance, we observe that all users have their interventions peak in June (except for Giuseppe, which has
118 | their peak in July). Maybe summer calling?
119 | 
120 | .. code-block:: python
121 | 
122 |     >>> fig = fb.user_interventions_count_linechart(date_mode='month', title='User interventions count (yearly)', xlabel='Month')
123 |     >>> plot(fig)
124 | 
125 | .. raw:: html
126 |     :file: ../../_static/html/interventions_count_months.html
127 | 


--------------------------------------------------------------------------------
/docs/source/code_examples/load_chat.rst:
--------------------------------------------------------------------------------
 1 | Load WhatsApp chat
 2 | ==================
 3 | 
 4 | Once you have :doc:`exported <../getting_started/export_chat>` a chat it is time to load it in python.
 5 | 
 6 | In this example we load the example `LOREM chat <http://raw.githubusercontent.com/lucasrodes/whatstk/
 7 | main/chats/whatsapp/lorem.txt>`_, which is available online, using library class :class:`WhatsAppChat 
 8 | <whatstk.WhatsAppChat>`.
 9 | 
10 | .. code-block:: python
11 | 
12 |     >>> from whatstk import WhatsAppChat
13 |     >>> from whatstk.data import whatsapp_urls
14 |     >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM)
15 | 
16 | Once loaded, we can check some of the chat messages by accessing its attribute :func:`df <whatstk.WhatsAppChat.df>`,
17 | which is a pandas.DataFrame with columns `date` index (timestamp of message), `username` (name of user sending the
18 | message) and `message` (message sent).
19 | 
20 | .. code-block:: python
21 | 
22 |     >>> chat.df.head(5)
23 |                          date        username                                            message
24 |         0 2020-01-15 02:22:56            Mary                     Nostrud exercitation magna id.
25 |         1 2020-01-15 03:33:01            Mary     Non elit irure irure pariatur exercitation. 🇩🇰
26 |         2 2020-01-15 04:18:42  +1 123 456 789  Exercitation esse lorem reprehenderit ut ex ve...
27 |         3 2020-01-15 06:05:14        Giuseppe  Aliquip dolor reprehenderit voluptate dolore e...
28 |         4 2020-01-15 06:56:00            Mary              Ullamco duis et commodo exercitation.
29 | 
30 | Getting the start and end date of the chat can give us a good overview of the chat content.
31 | 
32 | .. code-block:: python
33 | 
34 |     >>> print(f"Start date: {chat.start_date}\nEnd date: {chat.end_date}")
35 |     Start date: 2020-01-15 02:22:56
36 |     End date: 2020-05-11 22:32:48
37 | 
38 | Also, getting a list with the chat members is simple
39 | 
40 | .. code-block:: python
41 | 
42 |     >>> chat.users
43 |     ['+1 123 456 789', 'Giuseppe', 'John', 'Mary']
44 | 
45 | ----
46 | 
47 | .. seealso::
48 | 
49 |     * :ref:`Load WhatsApp chat from multiple sources <Load WhatsApp chat from multiple sources>`
50 |     * :ref:`Load WhatsApp chat from Google Drive <Load WhatsApp chat from Google Drive>`
51 |     * :ref:`Load WhatsApp chat with specific hformat <Load WhatsApp chat with specific hformat>`


--------------------------------------------------------------------------------
/docs/source/code_examples/load_chat_gdrive.rst:
--------------------------------------------------------------------------------
 1 | Load WhatsApp chat from Google Drive
 2 | ====================================
 3 | 
 4 | .. warning::
 5 | 
 6 |     To load chats from google drive, install the library with the corresponding extension (ignore the
 7 |     ``--upgrade`` option if you haven't installed the library):
 8 | 
 9 |     .. code-block::
10 | 
11 |         pip install whatstk[gdrive] --upgrade
12 | 
13 | You can also load a file saved in your Google Drive. Note that in order to do so, you need first to configure the
14 | credentials to interact with Google Drive.
15 | 
16 | Configure credentials
17 | ---------------------
18 | 
19 | In particular, you need the client secret JSON file. This can be downloaded from th Google Console. To get this file, we recommend following `this tutorial
20 | <https://medium.com/analytics-vidhya/how-to-connect-google-drive-to-python-using-pydrive-9681b2a14f20>`_, which is
21 | inspired by `PyDrive2 documentation <https://iterative.github.io/PyDrive2/docs/build/html/quickstart.html>`_. Some
22 | important  additions to previous tutorials are:
23 | 
24 | - Make sure to add yourself in Test users, as noted in `this thread <https://stackoverflow.com/questions/65980758/pydrive-quickstart-and-error-403-access-denied>`_
25 | - Select Desktop App instead of Web Application as the application type when creating the OAuth Client ID.
26 | 
27 | Once you have downloaded the clients secrets, run :func:`gdrive_init <whatstk.utils.gdrive.gdrive_init>`, which will
28 | guide you through the Authentification process. You will need to access a link via your browser and copy paste a
29 | verification code.
30 | 
31 | .. code-block:: python
32 | 
33 |     >>> from whatstk.utils import gdrive_init
34 |     >>> gdrive_init("path/to/client_secrets.json")
35 |     Go to the following link in your browser:
36 | 
37 |     https://accounts.google.com/...
38 | 
39 |     Enter verification code: 
40 | 
41 | This should only be run the first time to correctly configure your Google credentials.
42 | 
43 | 
44 | Load a file from Google Drive
45 | -----------------------------
46 | 
47 | You can pass a file reference to :class:`WhatsAppChat <whatstk.WhatsAppChat>` by means of its ID. All files in Google
48 | Drive have a unique ID. To obtain it, create a `shareable link
49 | <https://support.google.com/drive/answer/7166529?co=GENIE.Platform%3DDesktop&hl=en>`_, which will have the following format:
50 | 
51 | .. code-block::
52 | 
53 |     https://drive.google.com/file/d/[FILE-ID]/view?usp=sharing
54 | 
55 | 
56 | Now, simply copy ``[FILE-ID]`` and run:
57 | 
58 | .. code-block:: python
59 | 
60 |     >>> from whatstk import WhatsAppChat
61 |     >>> chat = WhatsAppChat.from_source("gdrive://[FILE-ID]")
62 | 
63 | Note that Google Drive file IDs are passed with prefix `gdrive://`.
64 | 


--------------------------------------------------------------------------------
/docs/source/code_examples/load_chat_hformat.rst:
--------------------------------------------------------------------------------
 1 | Load WhatsApp chat with specific hformat
 2 | ========================================
 3 | 
 4 | If ``auto_header`` option fails, you can still load your chat manually specifying the ``hformat``. In the example below,
 5 | we have that the ``hformat='%d.%m.%y, %H:%M - %name:'``.
 6 | 
 7 | .. code-block:: python
 8 | 
 9 |     >>> from whatstk.whatsapp.objects import WhatsAppChat
10 |     >>> from whatstk.data import whatsapp_urls
11 |     >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.POKEMON, hformat='%d.%m.%y, %H:%M - %name:')
12 |     >>> chat.df.head(5)
13 |                      date     username                                            message
14 |     0 2016-08-06 13:23:00  Ash Ketchum                                          Hey guys!
15 |     1 2016-08-06 13:25:00        Brock              Hey Ash, good to have a common group!
16 |     2 2016-08-06 13:30:00        Misty  Hey guys! Long time haven't heard anything fro...
17 |     3 2016-08-06 13:45:00  Ash Ketchum  Indeed. I think having a whatsapp group nowada...
18 |     4 2016-08-06 14:30:00        Misty                                          Definetly
19 | 
20 | ----
21 | 
22 | .. seealso::
23 | 
24 |     * :ref:`The header format <The header format>`
25 |     * :ref:`Load WhatsApp chat <Load WhatsApp chat>`
26 |     * :ref:`Load WhatsApp chat from Google Drive <Load WhatsApp chat from Google Drive>`
27 |     * :ref:`Load WhatsApp chat with specific hformat <Load WhatsApp chat with specific hformat>`
28 | 


--------------------------------------------------------------------------------
/docs/source/code_examples/load_chat_multiple.rst:
--------------------------------------------------------------------------------
 1 | Load WhatsApp chat from multiple sources
 2 | ========================================
 3 | 
 4 | You can also load a chat using multiple source files. You might want to use this when several files have been exported
 5 | from the same chat over the years. 
 6 | 
 7 | In the example below, we load chats
 8 | `LOREM1 <http://raw.githubusercontent.com/lucasrodes/whatstk/main/chats/whatsapp/lorem-merge-part1.txt>`_ and `LOREM2 <http://raw.githubusercontent.com/lucasrodes/whatstk/develop/main/whatsapp/lorem-merge-part2.txt>`_.
 9 | 
10 | .. code-block:: python
11 | 
12 |     >>> from whatstk import WhatsAppChat
13 |     >>> from whatstk.data import whatsapp_urls
14 |     >>> chat = WhatsAppChat.from_sources(filepaths=[whatsapp_urls.LOREM1, whatsapp_urls.LOREM2])
15 | 
16 | Rename usernames
17 | ----------------
18 | 
19 | In the example here, chat `LOREM1
20 | <http://raw.githubusercontent.com/lucasrodes/whatstk/main/chats/whatsapp/lorem-merge-part1.txt>`_ and chat `LOREM2
21 | <http://raw.githubusercontent.com/lucasrodes/whatstk/main/chats/whatsapp/lorem-merge-part2.txt>`_ contain slightly
22 | different usernames. In particular, in chat LOREM2, user *Mary* appears as *Maria* and *Maria2*:
23 | 
24 | .. code-block:: python
25 | 
26 |     >>> WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM1).users
27 |     ['+1 123 456 789', 'Giuseppe', 'John', 'Mary']
28 |     >>> WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM2).users
29 |     ['+1 123 456 789', 'Giuseppe', 'John', 'Maria', 'Maria2']
30 |     >>> >>> chat.users
31 |     ['+1 123 456 789', 'Giuseppe', 'John', 'Maria', 'Maria2', 'Mary']
32 | 
33 | To draw some conclusions based on user behaviour we would like to group *Mary*, *Maria* and *Maria2* under the same
34 | username. To fix this, we rename *Maria* and *Maria2* as *Mary*:
35 | 
36 | .. code-block:: python
37 | 
38 |     
39 |     >>> chat = chat.rename_users({'Mary': ['Maria', 'Maria2']})
40 |     >>> chat.users
41 |     ['+1 123 456 789', 'Giuseppe', 'John', 'Mary']
42 | 
43 | 
44 | ----
45 | 
46 | .. seealso::
47 | 
48 |     * :ref:`Load WhatsApp chat <Load WhatsApp chat>`
49 |     * :ref:`Load WhatsApp chat from Google Drive <Load WhatsApp chat from Google Drive>`
50 |     * :ref:`Load WhatsApp chat with specific hformat <Load WhatsApp chat with specific hformat>`


--------------------------------------------------------------------------------
/docs/source/code_examples/message_length_boxplot.rst:
--------------------------------------------------------------------------------
 1 | Message length boxplot
 2 | ======================
 3 | 
 4 | Different users send different sort of messages. In particular, the length of the messages (number of characters) can
 5 | substatially vary depending on the user sending the message.
 6 | 
 7 | In this example, we explore the statistics behind the length of user messages. To this end, we can use method
 8 | :func:`user_msg_length_boxplot <whatstk.FigureBuilder.user_msg_length_boxplot>`, which illustrates the length of each
 9 | user's messages by means of `box plots <https://en.wikipedia.org/wiki/Box_plot>`_.
10 | 
11 | 
12 | .. code-block:: python
13 | 
14 |     >>> from whatstk import WhatsAppChat, FigureBuilder
15 |     >>> from whatstk.graph import plot
16 |     >>> from whatstk.data import whatsapp_urls
17 |     >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM_2000)
18 |     >>> fig = FigureBuilder(chat=chat).user_msg_length_boxplot()
19 |     >>> plot(fig)
20 | 
21 | 
22 | .. raw:: html
23 |     :file: ../../_static/html/boxplot.html
24 | 


--------------------------------------------------------------------------------
/docs/source/code_examples/user_interaction.rst:
--------------------------------------------------------------------------------
 1 | User interaction
 2 | ================
 3 | 
 4 | The user interaction can shed some light on the different kinds of conversations that occur in a chat group. For
 5 | instance, when a certain topic appears some users might intervene and others will not, forming *user clusters*. To this
 6 | end, a first approach in detecting such clusters resides in which users respond to which users.
 7 | 
 8 | User interaction heatmap
 9 | ------------------------
10 | 
11 | In the following we visualize the *response matrix*, which tells us the number of messages sent by a certain user to the
12 | rest of users.
13 | 
14 | 
15 | For instance, in this specific example we observe that user *Giuseppe* sends 153 messages to + *1 123 456 789* and that
16 | *Mary* receives 122 messages from *John*.
17 | 
18 | .. code-block:: python
19 | 
20 |     >>> from whatstk import WhatsAppChat, FigureBuilder
21 |     >>> from whatstk.graph import plot
22 |     >>> from whatstk.data import whatsapp_urls
23 |     >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM_2000)
24 |     >>> fig = FigureBuilder(chat=chat).user_message_responses_heatmap()
25 |     >>> plot(fig)
26 | 
27 | 
28 | .. raw:: html
29 |     :file: ../../_static/html/user_message_responses_heatmap.html
30 | 
31 | .. seealso::
32 | 
33 |         * :func:`user_message_responses_heatmap <whatstk.FigureBuilder.user_message_responses_heatmap>`
34 | 
35 | User interaction flow
36 | ---------------------
37 | 
38 | A good way o visualize responses between users are `Sankey diagrams <https://en.wikipedia.org/wiki/Sankey_diagram>`_.
39 | The information conveyed by the graph below is the same as the one in previous section, but the way it is done is
40 | slightly different (sankey diagram instead of a heatmap).
41 | 
42 | .. code-block:: python
43 | 
44 |     >>> from whatstk import WhatsAppChat, FigureBuilder
45 |     >>> from whatstk.graph import plot
46 |     >>> from whatstk.data import whatsapp_urls
47 |     >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM_2000)
48 |     >>> fig = FigureBuilder(chat=chat).user_message_responses_flow()
49 |     >>> plot(fig)
50 | 
51 | 
52 | .. raw:: html
53 |     :file: ../../_static/html/user_message_responses_flow.html
54 | 
55 | .. seealso::
56 | 
57 |         * :func:`user_message_responses_flow <whatstk.FigureBuilder.user_message_responses_flow>`
58 | 


--------------------------------------------------------------------------------
/docs/source/community.rst:
--------------------------------------------------------------------------------
 1 | Community &  Governance
 2 | =======================
 3 | 
 4 | **whatstk** is a fully open-source project done for and by the community. It is primarily developed at sociepy by the 
 5 | whatstk team, with the help of open-source developers.
 6 | 
 7 | For library discussions, consider joining `gitter group <https://gitter.im/whatstk/>`_.
 8 | 
 9 | ----
10 | 
11 | Leadership
12 | ----------
13 | 
14 | BDFL
15 | ^^^^
16 | Role: final call in decisions related to the API.
17 | 
18 | - `Lucas Rodés-Guirao <https://lcsrg.me>`_
19 | 
20 | Community Contributors
21 | ^^^^^^^^^^^^^^^^^^^^^^
22 | 
23 | - `Albert Aparicio Isarn <https://github.com/albertaparicio>`_
24 | - `Kolmar Kafran <https://github.com/kafran>`_
25 | - `Clara Sáez Calabuig <https://www.linkedin.com/in/clara-saez-calabuig-6a59351a1>`_ (project logo)
26 | 


--------------------------------------------------------------------------------
/docs/source/contribute.rst:
--------------------------------------------------------------------------------
 1 | Contribute
 2 | ==========
 3 | 
 4 | We are really open to your thoughts and feedback!
 5 | 
 6 | ----
 7 | 
 8 | Bug reporting
 9 | -------------
10 | Please report any bug that you may find to the `issues <https://github.com/lucasrodes/whatstk/issues>`_ section.
11 | 
12 | ----
13 | 
14 | Requesting a Feature
15 | --------------------
16 | If you find a new feature could be useful for the community, please try to add it in the
17 | `issues <https://github.com/lucasrodes/whatstk/issues>`_ section with a clear description.
18 | 
19 | ----
20 | 
21 | Submitting a Pull Request
22 | -------------------------
23 | - Start by forking the `develop <https://github.com/lucasrodes/whatstk/tree/develop>`_ branch.
24 | - Add your code to the project!
25 | - Test your code running script `run-tests.sh <https://github.com/lucasrodes/whatstk/blob/master/run-tests.sh>`_.
26 | This script checks the code style (flake8) and the logic of your code (pytest). Note: Make sure to open and read it. The first time you will need to run steps 1.1, 1.2 and 1.3.
27 | 
28 | .. code-block:: bash
29 | 
30 |     sh ./run-tests.sh
31 | 
32 | This script generates three HTML files which are placed within a created folder `reports`.
33 | 
34 | - Once your code successfully passed the tests, you can submitt a pull request and wait for its aproval
35 | 
36 | 
37 | .. todo::
38 | 
39 |     Use `tox <https://tox.readthedocs.io/en/latest/>`_
40 | 
41 | Aproval of pull request
42 | ^^^^^^^^^^^^^^^^^^^^^^^
43 | 
44 | A pull request will be accepted if:
45 | 
46 | - Adds new functionalities of interest.
47 | - Does not decrease the overall project code `coverage <https://codecov.io/gh/lucasrodes/whatstk>`_. 
48 | 
49 | Note: You will need to add tests for your code. For this, you can check the current `tests <https://github.com/lucasrodes/whatstk/tree/master/tests>`_.
50 | 
51 | ----
52 | 
53 | Adding new examples
54 | -------------------
55 | To add new examples, consider editing yourself a ``rst`` file in ``docs/source/`` directory in the repository. For
56 | questions or doubts, use `GitHub discussions <https://github.com/lucasrodes/whatstk/discussions>`_.
57 | 
58 | ----
59 | 
60 | API discussions
61 | ---------------
62 | Consider posting your questinos or suggestions on `GitHub discussions <https://github.com/lucasrodes/whatstk/discussions>`_ or `Github issues <https://github.com/lucasrodes/whatstk/issues>`_.
63 | 
64 | ----
65 | 
66 | Doubts?
67 | -------
68 | 
69 | Feel free to `contact me <https://lcsrg.me/pages/contact>`_ :)
70 | 


--------------------------------------------------------------------------------
/docs/source/developer_guide/index.rst:
--------------------------------------------------------------------------------
 1 | Developer Guide
 2 | ===============
 3 | In this section
 4 | 
 5 | .. toctree::
 6 |    .. :hidden:
 7 |    :maxdepth: 2
 8 | 
 9 |    Changelog <changelog>
10 | 


--------------------------------------------------------------------------------
/docs/source/getting_started/auto_header.rst:
--------------------------------------------------------------------------------
1 | Auto header
2 | ===========
3 | 
4 | 


--------------------------------------------------------------------------------
/docs/source/getting_started/command_line.rst:
--------------------------------------------------------------------------------
 1 | Command line
 2 | ============
 3 | 
 4 | **whatstk** provides a set of command line tools to obtain quick results using the command line. To use these, make sure
 5 | that you have previously :ref:`installed the library <Installation & compatibility>`.
 6 | 
 7 | For instance, convert a WhatsApp text file to a CSV file using 
 8 | 
 9 | .. code-block::
10 | 
11 |     whatstk-to-csv [input_filename] [output_filename]
12 | 
13 | 
14 | For more details, check the :ref:`command line tools documentation <Command line tools>`.
15 | 


--------------------------------------------------------------------------------
/docs/source/getting_started/export_chat.rst:
--------------------------------------------------------------------------------
 1 | Export a WhatsApp chat
 2 | ======================
 3 | 
 4 | Exporting a WhatsApp chat can be easily done from your Android or iOS device. It is done on a chat basis, so if you want
 5 | to export several chats you will have to export them individually. **When exporting, make sure to select the chats Without Media option**. Once generated, you can send it via mail, so you
 6 | can save it in your computer.
 7 | 
 8 | Android
 9 | --------
10 | The export on Android might include several files. We are only interested in the text file (i.e. ``txt`` extension
11 | file).
12 | 
13 | .. figure:: ../../_static/images/chat-export-android9-wp2.20.123.gif
14 |     :width: 300
15 |     :alt: Concept diagram of WhatsAppChat.from_source
16 |     :align: center
17 |     :figclass: align-center
18 | 
19 |     Android 9, WhatsApp v2.20.123
20 | 
21 | For more details, refer to `official website <https://faq.whatsapp.com/android/chats/how-to-save-your-chat-history/>`_.
22 | 
23 | iOS
24 | ---
25 | The chat is exported as a `zip <https://en.wikipedia.org/wiki/Zip_(file_format)>`_, which can be easily unzipped in
26 | your computer.
27 | 
28 | .. figure:: ../../_static/images/chat-export-ios17-wp24.5.75.gif
29 |     :width: 300
30 |     :alt: Concept diagram of WhatsAppChat.from_source
31 |     :align: center
32 |     :figclass: align-center
33 | 
34 |     iOS 17.3.1, WhatsApp v24.5.75
35 | 


--------------------------------------------------------------------------------
/docs/source/getting_started/hformat.rst:
--------------------------------------------------------------------------------
 1 | The header format
 2 | =================
 3 | 
 4 | In WhatsApp, a chat file syntax can differ between devices, OS and language settings, which makes it hard to correctly
 5 | parse the data for all formats.
 6 | 
 7 | The header appears for each message sent in the chat and contains the timestamp and name of the user that sent the message.
 8 | 
 9 | See it for yourself and open :ref:`an exported chat file <Export a WhatsApp chat>`. You will find that the messages have a similar format like the one below:
10 | 
11 | .. code-block::
12 | 
13 |     15.04.2016, 15:04 - You created group “Sample Group”
14 |     06.08.2016, 13:18 - Messages you send to this group are now secured with end-to-end encryption. Tap for more info.
15 |     06.08.2016, 13:23 - Ash Ketchum: Hey guys!
16 |     06.08.2016, 13:25 - Brock: Hey Ash, good to have a common group!
17 |     06.08.2016, 13:30 - Misty: Hey guys! Long time haven't heard anything from you
18 |     06.08.2016, 13:45 - Ash Ketchum: Indeed. I think having a whatsapp group nowadays is a good idea
19 |     06.08.2016, 14:30 - Misty: Definetly
20 |     06.08.2016, 17:25 - Brock: I totally agree
21 |     07.08.2016, 11:45 - Prof. Oak: Kids, shall I design a smart poke-ball?
22 | 
23 | In this example, the header is **day.month.year, hour:minutes - username:** which corresponds to the header format
24 | (i.e. **hformat**) ``'%d.%m.%y, %H:%M - %name:'``. However, in your case it may be slightly different depending on 
25 | your phone settings. 
26 | 
27 | Check the table below to see the codes for each header format unit:
28 | 
29 | 
30 | .. csv-table:: header format units
31 |    :header: "Date unit code", "Description"
32 |    :widths: 50, 50
33 |    :align: center
34 | 
35 |     ``'%y'`` (or ``'%Y'``), Year
36 |     ``'%m'``,	Month of the year (1-12)
37 |     ``'%d'``,	Day of the month (1-31)
38 |     ``'%H'``,	Hour 24h-clock (0-23)
39 |     ``'%I'``,	Hour 12h-clock (1-12)
40 |     ``'%p'`` (or ``'%P'``),	"AM/PM", "am/pm", "A.M/P.M", "a.m/p.m" characters
41 |     ``'%M'``,	Minutes (0-60)
42 |     ``'%S'``,	Seconds (0-60)
43 |     ``'%name'``,	Name of user
44 | 
45 | .. seealso::
46 |     :ref:`Load WhatsApp chat with specific hformat <Load WhatsApp chat with specific hformat>`
47 | 


--------------------------------------------------------------------------------
/docs/source/getting_started/index.rst:
--------------------------------------------------------------------------------
 1 | Getting started
 2 | ===============
 3 | 
 4 | Getting started with the library is fairly easy.
 5 | 
 6 | .. toctree:: Contents
 7 |    :maxdepth: 1
 8 | 
 9 |    Export a WhatsApp chat from your phone <export_chat>
10 |    Load a WhatsApp chat <load_chat>
11 |    Command line tools <command_line>
12 |    The Header format <hformat>
13 |    Library available chats <library-available-chats>
14 | 
15 | For examples refer to :ref:`code examples <Code examples>` section.
16 | 
17 | For a rapid introduction, check this `tutorial on Medium <https://towardsdatascience.com/analyzing-whatsapp-chats-with-python-20d62ce7fe2d>`_.
18 | 


--------------------------------------------------------------------------------
/docs/source/getting_started/library-available-chats.rst:
--------------------------------------------------------------------------------
  1 | Library available chats
  2 | =======================
  3 | 
  4 | For the purpose of showcasing code examples and benchmarking different implementations, we have created a pool of chats,
  5 | hosted in the `official repository page <https://github.com/lucasrodes/whatstk/tree/master/chats>`_. If you want to test
  6 | the library with one of your own tests, check in the :ref:`code examples <Load chat>`.
  7 | 
  8 | The chats are available via their corresponding URLs, which are listed in source code :mod:`whatstk.data`.
  9 | 
 10 | .. contents:: Contents
 11 |     :depth: 3
 12 | 
 13 | WhatsApp
 14 | --------
 15 | 
 16 | Object ``whatsapp_urls`` contains all URLs for WhatsApp chats.
 17 | 
 18 | .. code-block:: python
 19 | 
 20 |     >>> from whatstk.data import whatsapp_urls
 21 | 
 22 | POKEMON
 23 | ^^^^^^^
 24 | 
 25 | Brief fictional chat with Pokemon characters, which was manually designed by  `@lucasrodes
 26 | <https://github.com/lucasrodes>`_ in `commit 666d6ea9cc030c4322fbe44ae64b8f1a0fdb5169
 27 | <https://github.com/lucasrodes/whatstk/commit/666d6ea9cc030c4322fbe44ae64b8f1a0fdb5169>`_.
 28 | 
 29 | .. code-block:: python
 30 | 
 31 |     >>> from whatstk.data import whatsapp_urls
 32 |     >>> from whatstk import WhatsAppChat
 33 |     >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.POKEMON)
 34 |     >>> chat.df.head(5)
 35 |                      date     username                                            message
 36 |     0 2016-08-06 13:23:00  Ash Ketchum                                          Hey guys!
 37 |     1 2016-08-06 13:25:00        Brock              Hey Ash, good to have a common group!
 38 |     2 2016-08-06 13:30:00        Misty  Hey guys! Long time haven't heard anything fro...
 39 |     3 2016-08-06 13:45:00  Ash Ketchum  Indeed. I think having a whatsapp group nowada...
 40 |     4 2016-08-06 14:30:00        Misty                                          Definetly
 41 | 
 42 | 
 43 | .. seealso:: 
 44 |     `Chat file <http://raw.githubusercontent.com/lucasrodes/whatstk/develop/chats/whatsapp/pokemon.txt>`_
 45 | 
 46 | 
 47 | LOREM
 48 | ^^^^^
 49 | Chat with 500 interventions of fictional users, generated using `python-lorem <https://lorem.jarryshaw.me/en/latest/>`_
 50 | library.
 51 | 
 52 | 
 53 | .. code-block:: python
 54 | 
 55 |     >>> from whatstk.data import whatsapp_urls
 56 |     >>> from whatstk import WhatsAppChat
 57 |     >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM)
 58 |     >>> chat.df.head(5)
 59 |                      date        username                                            message
 60 |     0 2020-01-15 02:22:56            Mary                     Nostrud exercitation magna id.
 61 |     1 2020-01-15 03:33:01            Mary     Non elit irure irure pariatur exercitation. 🇩🇰
 62 |     2 2020-01-15 04:18:42  +1 123 456 789  Exercitation esse lorem reprehenderit ut ex ve...
 63 |     3 2020-01-15 06:05:14        Giuseppe  Aliquip dolor reprehenderit voluptate dolore e...
 64 |     4 2020-01-15 06:56:00            Mary              Ullamco duis et commodo exercitation.
 65 | 
 66 | .. seealso::
 67 |     `Chat file <http://raw.githubusercontent.com/lucasrodes/whatstk/develop/chats/whatsapp/lorem.txt>`_
 68 | 
 69 | LOREM1
 70 | ^^^^^^
 71 | Chat with 300 interventions of fictional users, generated using `python-lorem <https://lorem.jarryshaw.me/en/latest/>`_.
 72 | 
 73 | .. code-block:: python
 74 | 
 75 |     >>> from whatstk.data import whatsapp_urls
 76 |     >>> from whatstk import WhatsAppChat
 77 |     >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM1)
 78 |     >>> chat.df.head(5)
 79 |                      date        username                                            message
 80 |     0 2019-10-20 10:16:00            John        Laborum sed excepteur id eu cillum sunt ut.
 81 |     1 2019-10-20 11:15:00            Mary  Ad aliquip reprehenderit proident est irure mo...
 82 |     2 2019-10-20 12:16:00  +1 123 456 789  Nostrud adipiscing ex enim reprehenderit minim...
 83 |     3 2019-10-20 12:57:00  +1 123 456 789  Deserunt proident laborum exercitation ex temp...
 84 |     4 2019-10-20 17:28:00            John                Do ex dolor consequat tempor et ex.
 85 | 
 86 | .. seealso::
 87 |     `Chat file <http://raw.githubusercontent.com/lucasrodes/whatstk/develop/chats/whatsapp/lorem-merge-part1.txt>`_
 88 | 
 89 | LOREM2
 90 | ^^^^^^
 91 | Chat with 300 interventions of fictional users, generated using `python-lorem <https://lorem.jarryshaw.me/en/latest/>`_.
 92 | 
 93 | Can be used along with **LOREM1** to test :func:`chat merging functionalities <whatstk.WhatsAppChat.merge>` or :ref:`multiple-source loading <Load chat from multiple sources>`.
 94 | 
 95 | .. code-block:: python
 96 | 
 97 |     >>> from whatstk.data import whatsapp_urls
 98 |     >>> from whatstk import WhatsAppChat
 99 |     >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM2)
100 |     >>> chat.df.head(5)
101 |                      date        username                                            message
102 |     0 2020-06-20 10:16:00            John                 Elit incididunt lorem sed nostrud.
103 |     1 2020-06-20 11:15:00           Maria        Esse do irure dolor tempor ipsum fugiat. 🇩🇰
104 |     2 2020-06-20 12:16:00  +1 123 456 789  Cillum anim non eu deserunt consectetur dolor ...
105 |     3 2020-06-20 12:57:00  +1 123 456 789                  Non ipsum proident veniam est. 🏊🏻
106 |     4 2020-06-20 17:28:00            John                      Dolore in cupidatat proident.
107 | 
108 | .. seealso::
109 |     `Chat file <http://raw.githubusercontent.com/lucasrodes/whatstk/develop/chats/whatsapp/lorem-merge-part1.txt>`_
110 | 
111 | LOREM_2000
112 | ^^^^^^^^^^
113 | Chat with 2000 interventions of fictional users, generated using `python-lorem <https://lorem.jarryshaw.me/en/latest/>`_.
114 | 
115 | .. code-block:: python
116 | 
117 |     >>> from whatstk.data import whatsapp_urls
118 |     >>> from whatstk import WhatsAppChat
119 |     >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM_2000)
120 |     >>> chat.df.head(5)
121 |                      date        username                                            message
122 |     0 2019-04-16 02:09:00  +1 123 456 789           Et labore proident laboris do labore ex.
123 |     1 2019-04-16 03:01:00            Mary  Reprehenderit id aute consectetur aliquip nost...
124 |     2 2019-04-17 12:56:00            John  Amet magna officia ullamco pariatur ipsum cupi...
125 |     3 2019-04-17 13:30:00            Mary  Cillum aute et cupidatat ipsum, occaecat lorem...
126 |     4 2019-04-17 15:09:00            John  Eiusmod irure laboris dolore anim, velit velit...
127 | 
128 | .. seealso::
129 |     `Chat file <http://raw.githubusercontent.com/lucasrodes/whatstk/develop/chats/whatsapp/lorem-2000.txt>`_
130 | 


--------------------------------------------------------------------------------
/docs/source/getting_started/load_chat.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../code_examples/load_chat.rst
2 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | whatstk
2 | =======
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    whatstk
8 | 


--------------------------------------------------------------------------------
/docs/source/whatstk.analysis.rst:
--------------------------------------------------------------------------------
 1 | whatstk.analysis package
 2 | ========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | whatstk.analysis.interventions module
 8 | -------------------------------------
 9 | 
10 | .. automodule:: whatstk.analysis.interventions
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | whatstk.analysis.responses module
16 | ---------------------------------
17 | 
18 | .. automodule:: whatstk.analysis.responses
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: whatstk.analysis
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/docs/source/whatstk.graph.figures.rst:
--------------------------------------------------------------------------------
 1 | whatstk.graph.figures package
 2 | =============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | whatstk.graph.figures.boxplot module
 8 | ------------------------------------
 9 | 
10 | .. automodule:: whatstk.graph.figures.boxplot
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | whatstk.graph.figures.heatmap module
16 | ------------------------------------
17 | 
18 | .. automodule:: whatstk.graph.figures.heatmap
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | whatstk.graph.figures.sankey module
24 | -----------------------------------
25 | 
26 | .. automodule:: whatstk.graph.figures.sankey
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | whatstk.graph.figures.scatter module
32 | ------------------------------------
33 | 
34 | .. automodule:: whatstk.graph.figures.scatter
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | whatstk.graph.figures.utils module
40 | ----------------------------------
41 | 
42 | .. automodule:: whatstk.graph.figures.utils
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 
47 | 
48 | Module contents
49 | ---------------
50 | 
51 | .. automodule:: whatstk.graph.figures
52 |    :members:
53 |    :undoc-members:
54 |    :show-inheritance:
55 | 


--------------------------------------------------------------------------------
/docs/source/whatstk.graph.rst:
--------------------------------------------------------------------------------
 1 | whatstk.graph package
 2 | =====================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    whatstk.graph.figures
11 | 
12 | Submodules
13 | ----------
14 | 
15 | whatstk.graph.base module
16 | -------------------------
17 | 
18 | .. automodule:: whatstk.graph.base
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: whatstk.graph
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/docs/source/whatstk.rst:
--------------------------------------------------------------------------------
 1 | whatstk package
 2 | ===============
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    whatstk.analysis
11 |    whatstk.graph
12 |    whatstk.utils
13 |    whatstk.whatsapp
14 | 
15 | Submodules
16 | ----------
17 | 
18 | whatstk.data module
19 | -------------------
20 | 
21 | .. automodule:: whatstk.data
22 |    :members:
23 |    :undoc-members:
24 |    :show-inheritance:
25 | 
26 | 
27 | Module contents
28 | ---------------
29 | 
30 | .. automodule:: whatstk
31 |    :members:
32 |    :undoc-members:
33 |    :show-inheritance:
34 | 


--------------------------------------------------------------------------------
/docs/source/whatstk.utils.rst:
--------------------------------------------------------------------------------
 1 | whatstk.utils package
 2 | =====================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | whatstk.utils.chat\_merge module
 8 | --------------------------------
 9 | 
10 | .. automodule:: whatstk.utils.chat_merge
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | whatstk.utils.exceptions module
16 | -------------------------------
17 | 
18 | .. automodule:: whatstk.utils.exceptions
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | whatstk.utils.gdrive module
24 | --------------------------
25 | 
26 | .. automodule:: whatstk.utils.gdrive
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 |       
31 | whatstk.utils.utils module
32 | --------------------------
33 | 
34 | .. automodule:: whatstk.utils.utils
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | 
40 | Module contents
41 | ---------------
42 | 
43 | .. automodule:: whatstk.utils
44 |    :members:
45 |    :undoc-members:
46 |    :show-inheritance:
47 | 


--------------------------------------------------------------------------------
/docs/source/whatstk.whatsapp.rst:
--------------------------------------------------------------------------------
 1 | whatstk.whatsapp package
 2 | ========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | whatstk.whatsapp.auto\_header module
 8 | ------------------------------------
 9 | 
10 | .. automodule:: whatstk.whatsapp.auto_header
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | whatstk.whatsapp.generation module
16 | ----------------------------------
17 | 
18 | .. automodule:: whatstk.whatsapp.generation
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | whatstk.whatsapp.hformat module
24 | -------------------------------
25 | 
26 | .. automodule:: whatstk.whatsapp.hformat
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | whatstk.whatsapp.objects module
32 | -------------------------------
33 | 
34 | .. automodule:: whatstk.whatsapp.objects
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | whatstk.whatsapp.parser module
40 | ------------------------------
41 | 
42 | .. automodule:: whatstk.whatsapp.parser
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 
47 | 
48 | Module contents
49 | ---------------
50 | 
51 | .. automodule:: whatstk.whatsapp
52 |    :members:
53 |    :undoc-members:
54 |    :show-inheritance:
55 | 


--------------------------------------------------------------------------------
/docs/source/why_whatstk.rst:
--------------------------------------------------------------------------------
 1 | Why choose whatstk?
 2 | ===================
 3 | 
 4 | There are many python libraries to deal with WhatsApp and other platform chat files. Why should you choose **whatstk**
 5 | over these?
 6 | 
 7 | Automatic parser
 8 | ----------------
 9 | In WhatsApp, the chat might be exported in :ref:`different formats <The header format>` depending on your phone
10 | configuration, which adds complexity when parsing the chat. **whatstk** incorporates a reliable and powerful
11 | :mod:`parser <whatstk.whatsapp.parser>` to correctly infer the structure of most of the chats. In the rare and
12 | improbable case that the automatic parser does not work for a certain chat, you can still use
13 | `hformat <code_examples/load_chat_hformat.html>`_.
14 | 
15 | The power of pandas and plotly
16 | ------------------------------
17 | **whatstk** uses well established and mantained python libraries `pandas <https://github.com/pandas-dev/pandas>`_ to
18 | process the data and `plotly <https://github.com/plotly/plotly.py>`_ and exploits their potential to efficiently process
19 | and create figures.
20 | 
21 | Open source and Community oriented
22 | ----------------------------------
23 | The project is distributed under the `GPL-3.0 license <https://github.com/lucasrodes/whatstk/blob/master/LICENSE>`_,
24 | available on `GitHub <http://github.com/lucasrodes/whatstk>`_ and open for `user contributions <contribute.html>`_.
25 | 
26 | The project is mantained since 2016 by `@lucasrodes <https://github.com/lucasrodes>`_.
27 | 


--------------------------------------------------------------------------------
/requirements-docs.txt:
--------------------------------------------------------------------------------
 1 | recommonmark==0.6.0
 2 | Sphinx~=5.3.0
 3 | sphinx-rtd-theme==0.4.3
 4 | sphinx-copybutton~=0.5.2
 5 | sphinx-git==11.0.0
 6 | autodocsumm~=0.2.11
 7 | # gitchangelog==3.0.4
 8 | auto-changelog~=0.6.0
 9 | # pillow, psutil, sphinx-gallery
10 | docutils==0.16
11 | 


--------------------------------------------------------------------------------
/requirements-flake.txt:
--------------------------------------------------------------------------------
1 | #
2 | flake8~=6.0.0
3 | flake8-docstrings~=1.7.0
4 | flake8-bugbear~=23.3.0
5 | flake8-builtins~=2.1.0
6 | flake8-bandit~=4.1.0
7 | flake8-mutable~=1.2.0
8 | flake8-annotations~=3.0.0
9 | flake8-html~=0.4.3


--------------------------------------------------------------------------------
/requirements-test.txt:
--------------------------------------------------------------------------------
1 | pytest~=7.2.0
2 | pytest-cov~=4.0.0
3 | coverage~=7.2.2
4 | codecov~=2.1.0
5 | pytest-html~=3.2.0
6 | pytest-mock~=3.10.0


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | emoji~=2.10.1
2 | numpy~=1.26.4
3 | pandas~=2.2.1
4 | plotly~=5.20.0
5 | seaborn~=0.13.2
6 | 


--------------------------------------------------------------------------------
/run-tests.sh:
--------------------------------------------------------------------------------
 1 | ## (1) Test dependencies (run this part only the first time).
 2 | # If using zsh, install using '\', i.e. `pip install -e .\[full\]`
 3 | # pip install -e .[full]
 4 | 
 5 | ## (1.1) Install pytest dependencies
 6 | # pip install -r requirements-test.txt
 7 | 
 8 | ## (1.2) Install flake8 dependencies
 9 | # pip install -r requirements-flake.txt
10 | 
11 | ## (1.3) Generate chats for test
12 | # mkdir -p tests/chats/hformats tests/chats/merge
13 | # whatstk-generate-chat --size 500 -z --output-path tests/chats/hformats/
14 | # whatstk-generate-chat --size 300 -z --last-timestamp 2019-09-01 \
15 | #                         --hformats '%Y-%m-%d, %H:%M - %name:' \
16 | #                         --output-path tests/chats/merge/ --filenames file1.txt
17 | # whatstk-generate-chat --size 300 -z --last-timestamp 2020-01-01 \
18 | #                         --hformats '%Y-%m-%d, %H:%M - %name:' \
19 | #                         --output-path tests/chats/merge/ --filenames file2.txt
20 | 
21 | 
22 | ## (2) Run flake
23 | flake8 \
24 |     --max-complexity 10\
25 |     --docstring-convention=google\
26 |     --format=html --htmldir=reports/flake-report\
27 |     --max-line-length=120\
28 |     --ignore=ANN101,ANN102,AN401\
29 |     whatstk
30 | 
31 | ## (3) Run tests
32 | py.test \
33 |     --html=reports/testreport.html\
34 |     --cov-report html:reports/htmlcov\
35 |     --cov-report term\
36 |     --cov-report xml:reports/cov.xml\
37 |     --cov=whatstk tests
38 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """setup script."""
 2 | 
 3 | 
 4 | from setuptools import setup, find_packages
 5 | import os
 6 | import glob
 7 | 
 8 | 
 9 | this_directory = os.path.abspath(os.path.dirname(__file__))
10 | with open(os.path.join(this_directory, 'README.md'), encoding='utf8') as f:
11 |     long_description = f.read()
12 | 
13 | with open(os.path.join(this_directory, 'requirements.txt')) as f:
14 |     requirements = f.readlines()
15 | 
16 | with open(os.path.join(this_directory, 'requirements-test.txt')) as f:
17 |     requirements_test = f.readlines()
18 | 
19 | with open(os.path.join(this_directory, 'requirements-flake.txt')) as f:
20 |     requirements_flake = f.readlines()
21 | 
22 | with open(os.path.join(this_directory, 'requirements-docs.txt')) as f:
23 |     requirements_docs = f.readlines()
24 | 
25 | requirements_gdrive = [
26 |     "PyDrive2~=1.15.0",
27 |     "PyYAML~=6.0",
28 | ]
29 | 
30 | requirements_generate = [
31 |     "scipy~=1.12.0",
32 |     "python-lorem==1.2.0",
33 | ]
34 | 
35 | requirements_full = requirements_gdrive + requirements_generate
36 | 
37 | 
38 | extras_require = {
39 |     "gdrive": requirements_gdrive,
40 |     "generate": requirements_generate,
41 |     "full": requirements_full,
42 |     "dev": requirements_test + requirements_flake + requirements_docs,
43 | }
44 | 
45 | 
46 | setup(
47 |     name='whatstk',
48 |     version="0.7.1",
49 |     description="Parser and analytics tools for WhatsApp group chats",
50 |     long_description=long_description,
51 |     long_description_content_type='text/markdown',
52 |     url='http://github.com/lucasrodes/whatstk',
53 |     author='Lucas Rodes-Guirao',
54 |     license='GPL-v3',
55 |     install_requires=requirements,
56 |     packages=find_packages('.'),
57 |     package_dir={'': '.'},
58 |     py_modules=[os.path.splitext(os.path.basename(path))[0] for path in glob.glob('./*.py')],
59 |     include_package_data=True,
60 |     zip_safe=False,
61 |     classifiers=[
62 |         "Development Status :: 4 - Beta",
63 |         "Programming Language :: Python",
64 |         "Programming Language :: Python :: 3 :: Only",
65 |         "Programming Language :: Python :: 3.9",
66 |         "Programming Language :: Python :: 3.10",
67 |         "Programming Language :: Python :: 3.11",
68 |         "Programming Language :: Python :: 3.12",
69 |         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
70 |         "Operating System :: OS Independent",
71 |     ],
72 |     keywords='whatsapp analysis parser chat',
73 |     project_urls={
74 |         'Documentation': 'https://whatstk.readthedocs.io/en/stable/',
75 |         'Github': 'http://github.com/lucasrodes/whatstk',
76 |         'Bug Tracker': 'https://github.com/lucasrodes/whatstk/issues',
77 |     },
78 |     python_requires='>=3.7',
79 |     entry_points={
80 |         'console_scripts': [
81 |             'whatstk-generate-chat=whatstk.scripts.generate_chats:main',
82 |             'whatstk-to-csv=whatstk.scripts.txt_to_csv:main',
83 |             'whatstk-graph=whatstk.scripts.graph:main'
84 |         ]
85 |     },
86 |     package_data = {
87 |         'whatstk': ['whatsapp/assets/header_format_support.json'],
88 |     },
89 |     extras_require=extras_require,
90 | )
91 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/tests/__init__.py


--------------------------------------------------------------------------------
/tests/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/tests/analysis/__init__.py


--------------------------------------------------------------------------------
/tests/analysis/test_interventions.py:
--------------------------------------------------------------------------------
  1 | #  TODO: Assert number of columns equals number of users
  2 | from whatstk.analysis.interventions import get_interventions_count
  3 | from whatstk.whatsapp.objects import WhatsAppChat
  4 | from whatstk.utils.utils import COLNAMES_DF, _map_hformat_filename
  5 | import pandas as pd
  6 | import pytest
  7 | 
  8 | hformat = "[%d.%m.%y_%I:%M:%S_%p]_%name:"
  9 | filename = f"./tests/chats/hformats/{_map_hformat_filename(hformat)}.txt"
 10 | 
 11 | 
 12 | def test_interventions_date_all():
 13 |     chat = WhatsAppChat.from_source(filename)
 14 |     counts = get_interventions_count(chat=chat, date_mode='date', msg_length=False, all_users=True)
 15 | 
 16 |     assert(isinstance(counts, pd.DataFrame))
 17 |     # Asswert chat df and counts df have same users
 18 |     assert(len(counts.columns) == 1)
 19 |     assert(counts.columns == ['interventions count'])
 20 | 
 21 |     # Assert chat df and counts df have same date window
 22 |     assert(chat.df[COLNAMES_DF.DATE].max().date() == counts.index.max().date())
 23 |     assert(chat.df[COLNAMES_DF.DATE].min().date() == counts.index.min().date())
 24 |     
 25 | 
 26 | def test_interventions_date():
 27 |     chat = WhatsAppChat.from_source(filename)
 28 |     counts = get_interventions_count(chat=chat, date_mode='date', msg_length=False)
 29 | 
 30 |     assert(isinstance(counts, pd.DataFrame))
 31 |     # Asswert chat df and counts df have same users
 32 |     assert(set(chat.users) == set(counts.columns))
 33 |     assert(len(chat.users) == counts.shape[1])
 34 | 
 35 |     # Assert chat df and counts df have same date window
 36 |     assert(chat.df[COLNAMES_DF.DATE].max().date() == counts.index.max().date())
 37 |     assert(chat.df[COLNAMES_DF.DATE].min().date() == counts.index.min().date())
 38 | 
 39 | 
 40 | def test_interventions_date_2():
 41 |     chat = WhatsAppChat.from_source(filename)
 42 |     counts = get_interventions_count(df=chat.df, date_mode='date', msg_length=False)
 43 | 
 44 |     assert(isinstance(counts, pd.DataFrame))
 45 |     # Asswert chat df and counts df have same users
 46 |     assert(set(chat.users) == set(counts.columns))
 47 |     assert(len(chat.users) == counts.shape[1])
 48 | 
 49 |     # Assert chat df and counts df have same date window
 50 |     assert(chat.df[COLNAMES_DF.DATE].max().date() == counts.index.max().date())
 51 |     assert(chat.df[COLNAMES_DF.DATE].min().date() == counts.index.min().date())
 52 | 
 53 | 
 54 | def test_interventions_date_msg_length():
 55 |     chat = WhatsAppChat.from_source(filename)
 56 |     counts = get_interventions_count(chat=chat, date_mode='date', msg_length=True)
 57 | 
 58 |     assert(isinstance(counts, pd.DataFrame))
 59 |     # Asswert chat df and counts df have same users
 60 |     assert(set(chat.users) == set(counts.columns))
 61 |     assert(len(chat.users) == counts.shape[1])
 62 | 
 63 |     # Assert chat df and counts df have same date window
 64 |     assert(chat.df[COLNAMES_DF.DATE].max().date() == counts.index.max().date())
 65 |     assert(chat.df[COLNAMES_DF.DATE].min().date() == counts.index.min().date())
 66 | 
 67 | 
 68 | def test_interventions_hour():
 69 |     chat = WhatsAppChat.from_source(filename)
 70 |     counts = get_interventions_count(chat=chat, date_mode='hour', msg_length=False)
 71 | 
 72 |     assert(isinstance(counts, pd.DataFrame))
 73 |     # Asswert chat df and counts df have same users
 74 |     assert(set(chat.users) == set(counts.columns))
 75 |     assert(len(chat.users) == counts.shape[1])
 76 | 
 77 |     # Check range hours
 78 |     assert(counts.index.max() == chat.df[COLNAMES_DF.DATE].dt.hour.max())
 79 |     assert(counts.index.min() == chat.df[COLNAMES_DF.DATE].dt.hour.min())
 80 | 
 81 | 
 82 | def test_interventions_hour_msg_length():
 83 |     chat = WhatsAppChat.from_source(filename)
 84 |     counts = get_interventions_count(chat=chat, date_mode='hour', msg_length=True)
 85 | 
 86 |     assert(isinstance(counts, pd.DataFrame))
 87 |     # Asswert chat df and counts df have same users
 88 |     assert(set(chat.users) == set(counts.columns))
 89 |     assert(len(chat.users) == counts.shape[1])
 90 | 
 91 |     # Check range hours
 92 |     assert(counts.index.max() == chat.df[COLNAMES_DF.DATE].dt.hour.max())
 93 |     assert(counts.index.min() == chat.df[COLNAMES_DF.DATE].dt.hour.min())
 94 | 
 95 | 
 96 | def test_interventions_month():
 97 |     chat = WhatsAppChat.from_source(filename)
 98 |     counts = get_interventions_count(chat=chat, date_mode='month', msg_length=False)
 99 | 
100 |     assert(isinstance(counts, pd.DataFrame))
101 |     # Asswert chat df and counts df have same users
102 |     assert(set(chat.users) == set(counts.columns))
103 |     assert(len(chat.users) == counts.shape[1])
104 | 
105 |     # Check range months
106 |     assert(counts.index.max() == chat.df[COLNAMES_DF.DATE].dt.month.max())
107 |     assert(counts.index.min() == chat.df[COLNAMES_DF.DATE].dt.month.min())
108 | 
109 | 
110 | def test_interventions_month_msg_length():
111 |     chat = WhatsAppChat.from_source(filename)
112 |     counts = get_interventions_count(chat=chat, date_mode='month', msg_length=False)
113 | 
114 |     assert(isinstance(counts, pd.DataFrame))
115 |     # Asswert chat df and counts df have same users
116 |     assert(set(chat.users) == set(counts.columns))
117 |     assert(len(chat.users) == counts.shape[1])
118 | 
119 |     # Check range months
120 |     assert(counts.index.max() == chat.df[COLNAMES_DF.DATE].dt.month.max())
121 |     assert(counts.index.min() == chat.df[COLNAMES_DF.DATE].dt.month.min())
122 | 
123 | 
124 | def test_interventions_weekday():
125 |     chat = WhatsAppChat.from_source(filename)
126 |     counts = get_interventions_count(chat=chat, date_mode='weekday', msg_length=False)
127 | 
128 |     assert(isinstance(counts, pd.DataFrame))
129 |     # Asswert chat df and counts df have same users
130 |     assert(set(chat.users) == set(counts.columns))
131 |     assert(len(chat.users) == counts.shape[1])
132 | 
133 |     # Check range weekdays
134 |     assert(counts.index.max() == chat.df[COLNAMES_DF.DATE].dt.weekday.max())
135 |     assert(counts.index.min() == chat.df[COLNAMES_DF.DATE].dt.weekday.min())
136 | 
137 | 
138 | def test_interventions_weekday_msg_length():
139 |     chat = WhatsAppChat.from_source(filename)
140 |     counts = get_interventions_count(chat=chat, date_mode='weekday', msg_length=True)
141 | 
142 |     assert(isinstance(counts, pd.DataFrame))
143 |     # Asswert chat df and counts df have same users
144 |     assert(set(chat.users) == set(counts.columns))
145 |     assert(len(chat.users) == counts.shape[1])
146 | 
147 |     # Check range weekdays
148 |     assert(counts.index.max() == chat.df[COLNAMES_DF.DATE].dt.weekday.max())
149 |     assert(counts.index.min() == chat.df[COLNAMES_DF.DATE].dt.weekday.min())
150 | 
151 | 
152 | def test_interventions_hourweekday():
153 |     chat = WhatsAppChat.from_source(filename)
154 |     counts = get_interventions_count(chat=chat, date_mode='hourweekday', msg_length=False)
155 | 
156 |     assert(isinstance(counts, pd.DataFrame))
157 |     # Asswert chat df and counts df have same users
158 |     assert(set(chat.users) == set(counts.columns))
159 |     assert(len(chat.users) == counts.shape[1])
160 | 
161 |     # Check range weekdays
162 |     assert(counts.index.levels[0].max() == chat.df[COLNAMES_DF.DATE].dt.weekday.max())
163 |     assert(counts.index.levels[0].min() == chat.df[COLNAMES_DF.DATE].dt.weekday.min())
164 | 
165 |     # Check range hours
166 |     assert(counts.index.levels[1].max() == chat.df[COLNAMES_DF.DATE].dt.hour.max())
167 |     assert(counts.index.levels[1].min() == chat.df[COLNAMES_DF.DATE].dt.hour.min())
168 | 
169 | 
170 | def test_interventions_hourweekday_msg_length():
171 |     chat = WhatsAppChat.from_source(filename)
172 |     counts = get_interventions_count(chat=chat, date_mode='hourweekday', msg_length=True)
173 | 
174 |     assert(isinstance(counts, pd.DataFrame))
175 |     # Assert chat df and counts df have same users
176 |     assert(set(chat.users) == set(counts.columns))
177 |     assert(len(chat.users) == counts.shape[1])
178 | 
179 |     # Check range weekdays
180 |     assert(counts.index.levels[0].max() == chat.df[COLNAMES_DF.DATE].dt.weekday.max())
181 |     assert(counts.index.levels[0].min() == chat.df[COLNAMES_DF.DATE].dt.weekday.min())
182 | 
183 |     # Check range hours
184 |     assert(counts.index.levels[1].max() == chat.df[COLNAMES_DF.DATE].dt.hour.max())
185 |     assert(counts.index.levels[1].min() == chat.df[COLNAMES_DF.DATE].dt.hour.min())
186 | 
187 | 
188 | def test_interventions_error_1():
189 |     chat = WhatsAppChat.from_source(filename)
190 |     with pytest.raises(ValueError):
191 |         _ = get_interventions_count(chat=chat, date_mode='error', msg_length=False)
192 |     with pytest.raises(ValueError):
193 |         _ = get_interventions_count(chat=chat, date_mode='error', msg_length=True)
194 | 
195 | 
196 | def test_interventions_error_2():
197 |     with pytest.raises(ValueError):
198 |         _ = get_interventions_count(date_mode='hour', msg_length=False)
199 | 
200 | 
201 | def test_interventions_date_cumsum():
202 |     chat = WhatsAppChat.from_source(filename)
203 |     counts = get_interventions_count(chat=chat, date_mode='date', msg_length=False, cumulative=True)
204 | 
205 |     assert(isinstance(counts, pd.DataFrame))
206 |     # Asswert chat df and counts df have same users
207 |     assert(set(chat.users) == set(counts.columns))
208 |     assert(len(chat.users) == counts.shape[1])
209 | 
210 |     # Assert chat df and counts df have same date window
211 |     assert(chat.df[COLNAMES_DF.DATE].max().date() == counts.index.max().date())
212 |     assert(chat.df[COLNAMES_DF.DATE].min().date() == counts.index.min().date())
213 | 
214 |     assert(isinstance(counts, pd.DataFrame))
215 |     # Asswert chat df and counts df have same users
216 |     assert(set(chat.users) == set(counts.columns))
217 |     assert(len(chat.users) == counts.shape[1])
218 | 
219 |     # Assert chat df and counts df have same date window
220 |     assert(chat.df[COLNAMES_DF.DATE].max().date() == counts.index.max().date())
221 |     assert(chat.df[COLNAMES_DF.DATE].min().date() == counts.index.min().date())
222 | 


--------------------------------------------------------------------------------
/tests/analysis/test_responses.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import pytest
 3 | from whatstk.whatsapp.objects import WhatsAppChat
 4 | from whatstk.analysis.responses import get_response_matrix
 5 | from whatstk.utils.utils import _map_hformat_filename
 6 | 
 7 | hformat = "[%d.%m.%y_%I:%M:%S_%p]_%name:"
 8 | filename = f"./tests/chats/hformats/{_map_hformat_filename(hformat)}.txt"
 9 | 
10 | 
11 | def test_get_response_matrix_1():
12 |     chat = WhatsAppChat.from_source(filename)
13 |     df_resp = get_response_matrix(chat=chat, zero_own=True)
14 | 
15 |     # Check shape and colnames of returned dataframe
16 |     n_users = len(chat.users)
17 |     assert(df_resp.shape == (n_users, n_users))
18 |     assert(set(chat.users) == set(df_resp.columns))
19 | 
20 |     # Check diagonal of returned dataframe is zero
21 |     assert(all([df_resp.loc[user, user] == 0 for user in df_resp.columns]))
22 | 
23 | 
24 | def test_get_response_matrix_2():
25 |     chat = WhatsAppChat.from_source(filename)
26 |     df_resp = get_response_matrix(chat=chat, zero_own=False)
27 | 
28 |     # Check shape and colnames of returned dataframe
29 |     n_users = len(chat.users)
30 |     assert(df_resp.shape == (n_users, n_users))
31 |     assert(set(chat.users) == set(df_resp.columns))
32 | 
33 | 
34 | def test_get_response_matrix_3():
35 |     chat = WhatsAppChat.from_source(filename)
36 |     df_resp = get_response_matrix(chat=chat, norm='joint')
37 | 
38 |     # Check shape and colnames of returned dataframe
39 |     n_users = len(chat.users)
40 |     assert(df_resp.shape == (n_users, n_users))
41 |     assert(set(chat.users) == set(df_resp.columns))
42 | 
43 |     # Check scaling has been done correct
44 |     assert(math.isclose(df_resp.sum().sum(), 1))
45 | 
46 | 
47 | def test_get_response_matrix_4():
48 |     chat = WhatsAppChat.from_source(filename)
49 |     df_resp = get_response_matrix(chat=chat, norm='sender')
50 | 
51 |     # Check shape and colnames of returned dataframe
52 |     n_users = len(chat.users)
53 |     assert(df_resp.shape == (n_users, n_users))
54 |     assert(set(chat.users) == set(df_resp.columns))
55 | 
56 |     # Check scaling has been done correct
57 |     assert(all([math.isclose(x, 1) for x in df_resp.sum(axis=1)]))
58 | 
59 | 
60 | def test_get_response_matrix_5():
61 |     chat = WhatsAppChat.from_source(filename)
62 |     df_resp = get_response_matrix(chat=chat, norm='receiver')
63 | 
64 |     # Check shape and colnames of returned dataframe
65 |     n_users = len(chat.users)
66 |     assert(df_resp.shape == (n_users, n_users))
67 |     assert(set(chat.users) == set(df_resp.columns))
68 | 
69 |     # Check scaling has been done correct
70 |     assert(all([math.isclose(x, 1) for x in df_resp.sum(axis=0)]))
71 | 
72 | 
73 | def test_get_response_matrix_error():
74 |     chat = WhatsAppChat.from_source(filename)
75 |     with pytest.raises(ValueError):
76 |         _ = get_response_matrix(chat=chat, norm='error')
77 | 


--------------------------------------------------------------------------------
/tests/graph/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/tests/graph/__init__.py


--------------------------------------------------------------------------------
/tests/graph/test_figures.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import plotly.graph_objs as go
 3 | 
 4 | from whatstk.graph.base import FigureBuilder
 5 | from whatstk.whatsapp.objects import WhatsAppChat
 6 | from whatstk.utils.utils import _map_hformat_filename
 7 | 
 8 | 
 9 | hformat = "[%d.%m.%y_%I:%M:%S_%p]_%name:"
10 | filename = f"./tests/chats/hformats/{_map_hformat_filename(hformat)}.txt"
11 | 
12 | 
13 | def load_chat_as_df():
14 |     return WhatsAppChat.from_source(filename).df
15 | 
16 | 
17 | def load_chat():
18 |     return WhatsAppChat.from_source(filename)
19 | 
20 | 
21 | def test_init():
22 |     df = load_chat_as_df()
23 |     _ = FigureBuilder(df=df)
24 |     chat = load_chat()
25 |     _ = FigureBuilder(chat=chat)
26 |     with pytest.raises(ValueError):
27 |         _ = FigureBuilder()
28 | 
29 | 
30 | def test_init_mapping_dict_1():
31 |     df = load_chat_as_df()
32 |     fb = FigureBuilder(df=df)
33 |     mapping = fb.user_color_mapping
34 |     assert(isinstance(fb.user_color_mapping, dict))
35 |     assert(len(mapping) == df['username'].nunique())
36 | 
37 | 
38 | def test_init_mapping_dict_2():
39 |     df = load_chat_as_df()
40 |     fb = FigureBuilder(df=df)
41 |     value = {'a': 'b'}
42 |     fb.user_color_mapping = value
43 |     assert(fb.user_color_mapping == value)
44 | 
45 | 
46 | def test_user_msg_length_boxplot():
47 |     df = load_chat_as_df()
48 |     fb = FigureBuilder(df=df)
49 |     fig = fb.user_msg_length_boxplot()
50 |     assert isinstance(fig, go.Figure)
51 |     assert ('data' in fig and 'layout' in fig)
52 | 
53 | 
54 | def test_user_interventions_count_linechart():
55 |     df = load_chat_as_df()
56 |     fb = FigureBuilder(df=df)
57 |     fig = fb.user_interventions_count_linechart()
58 |     assert isinstance(fig, go.Figure)
59 |     assert ('data' in fig and 'layout' in fig)
60 | 
61 | def test_user_interventions_count_linechart_2():
62 |     df = load_chat_as_df()
63 |     fb = FigureBuilder(df=df)
64 |     fig = fb.user_interventions_count_linechart(all_users=True)
65 |     assert isinstance(fig, go.Figure)
66 |     assert ('data' in fig and 'layout' in fig)
67 | 
68 | 
69 | def test_user_message_responses_flow():
70 |     df = load_chat_as_df()
71 |     fb = FigureBuilder(df=df)
72 |     fig = fb.user_message_responses_flow()
73 |     assert isinstance(fig, go.Figure)
74 |     assert ('data' in fig and 'layout' in fig)
75 | 
76 | 
77 | def test_user_message_responses_heatmap():
78 |     df = load_chat_as_df()
79 |     fb = FigureBuilder(df=df)
80 |     fig = fb.user_message_responses_heatmap()
81 |     assert isinstance(fig, go.Figure)
82 |     assert ('data' in fig and 'layout' in fig)
83 | 


--------------------------------------------------------------------------------
/tests/test_chat.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import pandas as pd
 3 | import pytest
 4 | 
 5 | from whatstk.whatsapp.objects import WhatsAppChat
 6 | from whatstk._chat import BaseChat
 7 | from whatstk.utils.utils import _map_hformat_filename
 8 | from whatstk.utils.utils import COLNAMES_DF
 9 | 
10 | hformat = "[%d.%m.%y_%I:%M:%S_%p]_%name:"
11 | filepath = f"./tests/chats/hformats/{_map_hformat_filename(hformat)}.txt"
12 | 
13 | 
14 | def test_properties():
15 |     chat = WhatsAppChat.from_source(filepath)
16 | 
17 |     assert(isinstance(chat.start_date, datetime))
18 |     assert(isinstance(chat.end_date, datetime))
19 | 
20 | def test_from_source():
21 |     with pytest.raises(NotImplementedError):
22 |         _ = BaseChat.from_source(filepath=filepath)
23 | 
24 | 
25 | def test_from_source_2():
26 |     chat = WhatsAppChat.from_source(filepath)
27 |     df = chat.df
28 | 
29 |     # Fake system column
30 |     data = {
31 |         COLNAMES_DF.DATE: ["2020-11-21 03:02:06"],
32 |         COLNAMES_DF.USERNAME: ["chat_name"],
33 |         COLNAMES_DF.MESSAGE: ["chat was created"],
34 |         COLNAMES_DF.MESSAGE_TYPE: ["system"]
35 |     }
36 |     df_system = pd.DataFrame(data)
37 |     df[COLNAMES_DF.MESSAGE_TYPE] = "user"
38 |     # Add fake row to main df
39 |     df = pd.concat([df_system, df])
40 |     # Ensure type of datetime
41 |     df[COLNAMES_DF.DATE] = pd.to_datetime(df[COLNAMES_DF.DATE])
42 |     
43 |     chat = WhatsAppChat(df)
44 |     assert isinstance(chat.start_date, datetime)
45 |     assert isinstance(chat.end_date, datetime)
46 |     assert isinstance(chat.df, pd.DataFrame)
47 |     assert isinstance(chat.df_system, pd.DataFrame)
48 |     assert chat.is_group
49 | 


--------------------------------------------------------------------------------
/tests/test_data.py:
--------------------------------------------------------------------------------
1 | from whatstk.data import whatsapp_urls
2 | 
3 | 
4 | def test_urls():
5 |     url = whatsapp_urls.POKEMON
6 |     assert(isinstance(url, str))
7 |     assert(url.startswith('http'))
8 | 


--------------------------------------------------------------------------------
/tests/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/tests/utils/__init__.py


--------------------------------------------------------------------------------
/tests/utils/test_chat_merge.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | from whatstk.utils.chat_merge import _merge_two_chats
 4 | from whatstk.whatsapp.parser import df_from_whatsapp
 5 | 
 6 | 
 7 | chats_merge_path = './tests/chats/merge/'
 8 | filename1 = os.path.join(chats_merge_path, 'file1.txt')
 9 | filename2 = os.path.join(chats_merge_path, 'file2.txt')
10 | 
11 | 
12 | def test_merge_two_chats():
13 |     df1 = df_from_whatsapp(filename1)
14 |     df2 = df_from_whatsapp(filename2)
15 |     df = _merge_two_chats(df1, df2)
16 |     assert(isinstance(df, pd.DataFrame))
17 |     df = _merge_two_chats(df2, df1)
18 |     assert(isinstance(df, pd.DataFrame))
19 | 


--------------------------------------------------------------------------------
/tests/utils/test_gdrive.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import pytest
 5 | from pydrive2.files import ApiRequestError
 6 | 
 7 | from whatstk.utils.gdrive import gdrive_init, _check_gdrive_config, _load_str_from_file_id
 8 | import whatstk
 9 | 
10 | 
11 | def test_init_1(tmpdir, mocker):
12 |     # Create tmp secrets
13 |     client_secret = {"field": 1}
14 |     client_secret_file = tmpdir.join("client_secrets.json")
15 |     with open(client_secret_file, 'w') as f:
16 |         json.dump(client_secret, f)
17 |     # Mock 1
18 |     CONFIG_PATH = tmpdir.mkdir(".config")
19 |     mocker.patch.object(whatstk.utils.gdrive, "CONFIG_PATH", CONFIG_PATH)
20 |     mocker.patch.object(whatstk.utils.gdrive, "CLIENT_SECRETS_PATH", os.path.join(CONFIG_PATH, "client_secrets.json"))
21 |     mocker.patch.object(whatstk.utils.gdrive, "SETTINGS_PATH", os.path.join(CONFIG_PATH, "settings.yaml"))
22 |     mocker.patch.object(whatstk.utils.gdrive, "CREDENTIALS_PATH", os.path.join(CONFIG_PATH, "credentials.json"))
23 |     mocker.patch("pydrive2.auth.GoogleAuth.CommandLineAuth", return_value=True)
24 |     gdrive_init(client_secret_file)
25 | 
26 | 
27 | def test_init_2(tmpdir, mocker):
28 |     # Create tmp secrets
29 |     client_secret = {"field": 1}
30 |     client_secret_file = tmpdir.join("client_secrets.json")
31 |     with open(client_secret_file, 'w') as f:
32 |         json.dump(client_secret, f)
33 |     # Mock 2
34 |     CONFIG_PATH = tmpdir.join(".config2")
35 |     mocker.patch.object(whatstk.utils.gdrive, "CONFIG_PATH", CONFIG_PATH)
36 |     mocker.patch.object(whatstk.utils.gdrive, "CLIENT_SECRETS_PATH", os.path.join(CONFIG_PATH, "client_secrets.json"))
37 |     mocker.patch.object(whatstk.utils.gdrive, "SETTINGS_PATH", os.path.join(CONFIG_PATH, "settings.yaml"))
38 |     mocker.patch.object(whatstk.utils.gdrive, "CREDENTIALS_PATH", os.path.join(CONFIG_PATH, "credentials.json"))
39 |     mocker.patch("pydrive2.auth.GoogleAuth.CommandLineAuth", return_value=True)
40 |     gdrive_init(client_secret_file)
41 | 
42 | 
43 | def test_check(tmpdir, mocker):
44 |     with pytest.raises(ValueError):
45 |         mocker.patch("os.path.isdir", return_value=False)
46 |         _check_gdrive_config()
47 |     with pytest.raises(ValueError):
48 |         mocker.patch("os.path.isdir", return_value=True)
49 |         mocker.patch("os.path.isfile", return_value=False)
50 |         _check_gdrive_config()
51 |     mocker.patch("os.path.isdir", return_value=True)
52 |     mocker.patch("os.path.isfile", return_value=True)
53 |     _check_gdrive_config()
54 | 
55 | 
56 | def test_load_2(mocker):
57 |     mocker.patch("whatstk.utils.gdrive._check_gdrive_config", return_value=True)
58 |     mocker.patch("pydrive2.auth.GoogleAuth", return_value=True)
59 |     mocker.patch("pydrive2.drive.GoogleDrive", return_value=True)
60 |     mocker.patch("pydrive2.drive.GoogleDrive.CreateFile", return_value=True)
61 |     mocker.patch("pydrive2.files.GoogleDriveFile.FetchMetadata", return_value=True)
62 |     mocker.patch("pydrive2.files.GoogleDriveFile.GetContentString", return_value="mock text")
63 |     text = _load_str_from_file_id("some-id")
64 |     assert isinstance(text, str)
65 | 


--------------------------------------------------------------------------------
/tests/utils/test_utils.py:
--------------------------------------------------------------------------------
1 | from whatstk.utils.utils import COLNAMES_DF
2 | 
3 | def test_colnames():
4 |     assert COLNAMES_DF.DATE == "date"
5 |     assert COLNAMES_DF.USERNAME == "username"
6 |     assert COLNAMES_DF.MESSAGE == "message"
7 |     assert COLNAMES_DF.MESSAGE_LENGTH == "message_length"
8 |     assert COLNAMES_DF.MESSAGE_TYPE == "message_type"
9 | 


--------------------------------------------------------------------------------
/tests/whatsapp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/tests/whatsapp/__init__.py


--------------------------------------------------------------------------------
/tests/whatsapp/test_auto_header.py:
--------------------------------------------------------------------------------
 1 | from whatstk.whatsapp.auto_header import extract_header_from_text, _extract_elements_template_from_lines
 2 | 
 3 | 
 4 | def test_extract_header_from_text():
 5 |     _ = extract_header_from_text("bla bla bla")
 6 |     assert _ is None
 7 | 
 8 | 
 9 | def test_extract_elements_template_from_lines():
10 |     elements_list, template_list = _extract_elements_template_from_lines(["testing"])
11 |     assert elements_list == []
12 |     assert template_list == []
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/tests/whatsapp/test_generation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from datetime import datetime
 4 | from whatstk.whatsapp.objects import WhatsAppChat
 5 | from whatstk.whatsapp.generation import ChatGenerator, generate_chats_hformats
 6 | 
 7 | 
 8 | USERS = ['laurent', 'anna', 'lua', 'miquel']
 9 | 
10 | 
11 | def test_generate_messages():
12 |     cg = ChatGenerator(size=10, users=USERS)
13 |     messages = cg._generate_messages()
14 |     assert(isinstance(messages, (list, np.ndarray)))
15 |     assert(all([isinstance(m, str) for m in messages]))
16 | 
17 | 
18 | def test_generate_emojis():
19 |     cg = ChatGenerator(size=10, users=USERS)
20 |     emojis = cg._generate_emojis()
21 |     assert(isinstance(emojis, (list, np.ndarray)))
22 |     assert(all([isinstance(e, str) for e in emojis]))
23 | 
24 | 
25 | def test_generate_timestamps_1():
26 |     cg = ChatGenerator(size=10, users=USERS)
27 |     timestamps = cg._generate_timestamps()
28 |     assert(isinstance(timestamps, (list, np.ndarray)))
29 |     assert(all([isinstance(ts, datetime) for ts in timestamps]))
30 | 
31 | 
32 | def test_generate_timestamps_2():
33 |     cg = ChatGenerator(size=10, users=USERS)
34 |     timestamps = cg._generate_timestamps(last=datetime.now())
35 |     assert(isinstance(timestamps, (list, np.ndarray)))
36 |     assert(all([isinstance(ts, datetime) for ts in timestamps]))
37 | 
38 | 
39 | def test_generate_users():
40 |     cg = ChatGenerator(size=10, users=USERS)
41 |     users = cg._generate_users()
42 |     assert(isinstance(users, (list, np.ndarray)))
43 |     assert(all([isinstance(u, str) for u in users]))
44 | 
45 | 
46 | def test_generate_df():
47 |     cg = ChatGenerator(size=10, users=USERS)
48 |     df = cg._generate_df()
49 |     assert(isinstance(df, pd.DataFrame))
50 | 
51 | 
52 | def test_generate_1():
53 |     cg = ChatGenerator(size=10, users=USERS)
54 |     chat = cg.generate()
55 |     assert(isinstance(chat, WhatsAppChat))
56 | 
57 | 
58 | def test_generate_2():
59 |     cg = ChatGenerator(size=10, users=USERS)
60 |     chat = cg.generate(hformat='y-%m-%d, %H:%M - %name:')
61 |     assert(isinstance(chat, WhatsAppChat))
62 | 
63 | 
64 | def test_generate_3(tmpdir):
65 |     cg = ChatGenerator(size=10, users=USERS)
66 |     filepath = tmpdir.join("export.txt")
67 |     chat = cg.generate(filepath=str(filepath))
68 |     assert(isinstance(chat, WhatsAppChat))
69 | 
70 | 
71 | def test_generate_chats_hformats(tmpdir):
72 |     output_path = tmpdir.mkdir("output")
73 |     generate_chats_hformats(output_path, size=2, verbose=False)
74 | 
75 | 
76 | def test_generate_chats_hformats_2(tmpdir):
77 |     output_path = tmpdir.mkdir("output")
78 |     hformat = '%Y-%m-%d, %H:%M - %name:'
79 |     generate_chats_hformats(
80 |         output_path,
81 |         size=2,
82 |         hformats=[hformat],
83 |         filepaths=['file.txt'],
84 |         export_as_zip=True,
85 |         verbose=False
86 |     )
87 | 


--------------------------------------------------------------------------------
/tests/whatsapp/test_hformat.py:
--------------------------------------------------------------------------------
 1 | from whatstk.whatsapp.hformat import is_supported, is_supported_verbose, get_supported_hformats_as_list
 2 | 
 3 | 
 4 | def test_is_supported_1():
 5 |     hformat = '%y-%m-%d, %H:%M - %name:'
 6 |     support, autoh_support = is_supported(hformat)
 7 |     assert(isinstance(support, bool))
 8 |     assert(isinstance(autoh_support, bool))
 9 | 
10 | 
11 | def test_is_supported_2():
12 |     hformat = '%y-%m-%d, %I:%M %p - %name:'
13 |     support, autoh_support = is_supported(hformat)
14 |     assert(isinstance(support, bool))
15 |     assert(isinstance(autoh_support, bool))
16 | 
17 | 
18 | def test_is_supported_verbose():
19 |     hformat = '%y-%m-%d, %I:%M %p - %name:'
20 |     support_msg = is_supported_verbose(hformat)
21 |     assert(isinstance(support_msg, str))
22 | 
23 | 
24 | def test_get_supported_hformats_as_list():
25 |     supported_headers = get_supported_hformats_as_list()
26 |     assert(isinstance(supported_headers, list))
27 |     assert(all([isinstance(h, str) for h in supported_headers]))
28 | 


--------------------------------------------------------------------------------
/tests/whatsapp/test_objects.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import pandas as pd
  4 | import pytest
  5 | 
  6 | from whatstk.whatsapp.objects import WhatsAppChat
  7 | from whatstk.utils.exceptions import HFormatError
  8 | from whatstk.utils.utils import _map_hformat_filename
  9 | 
 10 | 
 11 | hformat = "[%d.%m.%y %I:%M:%S %p] %name:"
 12 | filename = f"./tests/chats/hformats/{_map_hformat_filename(hformat)}.txt"
 13 | 
 14 | chats_merge_path = 'tests/chats/merge/'
 15 | filename1 = os.path.join(chats_merge_path, 'file1.txt')
 16 | filename2 = os.path.join(chats_merge_path, 'file2.txt')
 17 | hformat_merge = '%y-%m-%d, %H:%M - %name:'
 18 | 
 19 | 
 20 | def test_object_auto():
 21 |     chat = WhatsAppChat.from_source(filename)
 22 |     assert(isinstance(chat.df, pd.DataFrame))
 23 | 
 24 | 
 25 | def test_object_hformat():
 26 |     chat = WhatsAppChat.from_source(filename)
 27 |     assert(isinstance(chat.df, pd.DataFrame))
 28 | 
 29 |     chat = WhatsAppChat.from_source(filename)
 30 |     assert(isinstance(chat.df, pd.DataFrame))
 31 | 
 32 | 
 33 | def test_object_error():
 34 |     with pytest.raises(ValueError):
 35 |         _ = WhatsAppChat.from_source(filename, auto_header=False)
 36 | 
 37 | 
 38 | def test_object_to_csv_1(tmpdir):
 39 |     chat = WhatsAppChat.from_source(filename)
 40 |     filename_ = tmpdir.join("export.csv")
 41 |     chat.to_csv(filepath=str(filename_))
 42 | 
 43 | 
 44 | def test_object_to_csv_2(tmpdir):
 45 |     chat = WhatsAppChat.from_source(filename)
 46 |     filename_ = tmpdir.join("export")
 47 |     with pytest.raises(ValueError):
 48 |         chat.to_csv(filepath=str(filename_))
 49 | 
 50 | 
 51 | def test_object_to_txt(tmpdir):
 52 |     chat = WhatsAppChat.from_source(filename)
 53 |     filename_ = tmpdir.join("export")
 54 |     with pytest.raises(ValueError):
 55 |         chat.to_txt(filepath=str(filename_))
 56 | 
 57 | 
 58 | def test_object_to_zip(tmpdir):
 59 |     chat = WhatsAppChat.from_source(filename)
 60 |     filename_ = tmpdir.join("export")
 61 |     print(filename_)
 62 |     with pytest.raises(ValueError):
 63 |         chat.to_zip(filepath=str(filename_))
 64 | 
 65 | 
 66 | def test_object_from_source_error(tmpdir):
 67 |     with pytest.raises((HFormatError, KeyError)):
 68 |         _ = WhatsAppChat.from_source(filename, hformat="%y%name")
 69 | 
 70 | 
 71 | def test_object_from_sources(tmpdir):
 72 |     chat = WhatsAppChat.from_sources([filename1, filename2])
 73 |     assert(isinstance(chat.df, pd.DataFrame))
 74 |     chat = WhatsAppChat.from_sources([filename2, filename1])
 75 |     assert(isinstance(chat.df, pd.DataFrame))
 76 |     chat = WhatsAppChat.from_sources([filename2, filename1], auto_header=True)
 77 |     assert(isinstance(chat.df, pd.DataFrame))
 78 |     hformat = [hformat_merge, hformat_merge]
 79 |     chat = WhatsAppChat.from_sources([filename2, filename1], auto_header=False, hformat=hformat)
 80 |     assert(isinstance(chat.df, pd.DataFrame))
 81 | 
 82 | 
 83 | def test_merge():
 84 |     chat1 = WhatsAppChat.from_source(filename1)
 85 |     chat2 = WhatsAppChat.from_source(filename2)
 86 |     chat = chat1.merge(chat2)
 87 |     assert(isinstance(chat.df, pd.DataFrame))
 88 |     chat = chat1.merge(chat2, rename_users={'J': ['John']})
 89 |     assert(isinstance(chat.df, pd.DataFrame))
 90 | 
 91 | 
 92 | def test_rename_users():
 93 |     chat = WhatsAppChat.from_source(filename)
 94 |     chat = chat.rename_users(mapping={'J': ['John']})
 95 |     assert(isinstance(chat.df, pd.DataFrame))
 96 | 
 97 | 
 98 | def test_rename_users_error():
 99 |     chat = WhatsAppChat.from_source(filename)
100 |     with pytest.raises(ValueError):
101 |         chat = chat.rename_users(mapping={'J': 'John'})
102 | 
103 | 
104 | def test_len():
105 |     chat = WhatsAppChat.from_source(filename)
106 |     assert(isinstance(len(chat), int))
107 | 


--------------------------------------------------------------------------------
/tests/whatsapp/test_parser.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | import pytest
  4 | from whatstk.whatsapp.parser import df_from_whatsapp
  5 | from whatstk.whatsapp.hformat import get_supported_hformats_as_dict
  6 | from whatstk.utils.exceptions import HFormatError
  7 | from whatstk.utils.utils import COLNAMES_DF, _map_hformat_filename
  8 | 
  9 | 
 10 | # Generate chats
 11 | output_folder = "./tests/chats/hformats"
 12 | # generate_chats_hformats(output_folder, 500, verbose=True)
 13 | filenames = [os.path.join(output_folder, f) for f in os.listdir(output_folder) if f.endswith('.txt')]
 14 | # Chats for multiple txt loading
 15 | chats_merge_path = 'tests/chats/merge/'
 16 | filename1 = os.path.join(chats_merge_path, 'file1.txt')
 17 | filename2 = os.path.join(chats_merge_path, 'file2.txt')
 18 | # TODO: Message type chats
 19 | chats_merge_path = 'chats/whatsapp/pokemon.txt'
 20 | file_type_1 = os.path.abspath(chats_merge_path)
 21 | 
 22 | # Chat hosted on repo
 23 | # filepath_url = "http://raw.githubusercontent.com/lucasrodes/whatstk/master/chats/example.txt"
 24 | filepath_url = "http://raw.githubusercontent.com/lucasrodes/whatstk/master/chats/whatsapp/pokemon.txt"
 25 | 
 26 | 
 27 | def test_df_from_whatsapp():
 28 |     """This test checks most of the logic of the library.
 29 | 
 30 |     - Generates tests in all formats to be supported (according to JSON)
 31 |     - Loads them using manual and auto_header approaches (checks they are equivalent).
 32 |     - Checks that all chats (from different hformats) are equivalent.
 33 | 
 34 |     """
 35 |     info_dix = get_supported_hformats_as_dict()
 36 |     all_chats = []
 37 |     hformats = []
 38 |     for elem in info_dix:
 39 |         chats = []
 40 |         hformat = elem['format']
 41 |         auto_header = bool(elem['auto_header'])
 42 |         filename_base = _map_hformat_filename(hformat)
 43 |         filename = os.path.join(output_folder, '{}.txt'.format(filename_base))
 44 |         filename_zip = os.path.join(output_folder, '{}.zip'.format(filename_base))
 45 | 
 46 |         # Auto
 47 |         if auto_header:
 48 |             chat = df_from_whatsapp(filename)
 49 |             chats.append(chat)
 50 |         # Manual
 51 |         chat = df_from_whatsapp(filename, hformat=hformat)
 52 |         chats.append(chat)
 53 | 
 54 |         # ZIP
 55 |         # Auto
 56 |         if auto_header:
 57 |             chat_zip = df_from_whatsapp(filename_zip)
 58 |             assert chat_zip.equals(chat)
 59 |         # Manual
 60 |         chat_zip = df_from_whatsapp(filename_zip, hformat=hformat)
 61 |         assert chat_zip.equals(chat)
 62 | 
 63 |         # Check manual and auto chats are equal
 64 |         assert(chats[0].equals(chats[1]))  # TODO: Assumes there are always two elements in list chats!
 65 | 
 66 |         all_chats.append(chat)
 67 |         hformats.append(hformat)
 68 | 
 69 |     records = []
 70 |     for i in range(len(all_chats)):
 71 |         record = {'chat': i}
 72 |         for j in range(i, len(all_chats)):
 73 |             if (all_chats[i][COLNAMES_DF.DATE].dt.second.nunique() == 1) & (all_chats[j][COLNAMES_DF.DATE].dt.second.nunique() != 1):
 74 |                 all_chats[j][COLNAMES_DF.DATE] = all_chats[j][COLNAMES_DF.DATE].map(lambda x: x.replace(second=0))
 75 |             elif (all_chats[j][COLNAMES_DF.DATE].dt.second.nunique() == 1) & (all_chats[i][COLNAMES_DF.DATE].dt.second.nunique() != 1):
 76 |                 all_chats[i][COLNAMES_DF.DATE] = all_chats[i][COLNAMES_DF.DATE].map(lambda x: x.replace(second=0))
 77 |             record[j] = all_chats[i].equals(all_chats[j])
 78 |         records.append(record)
 79 |     df = pd.DataFrame.from_records(records, index="chat")
 80 |     assert((df == False).sum().sum() == 0)
 81 | 
 82 | 
 83 | def test_df_from_whatsapp_2():
 84 |     with pytest.raises(HFormatError):
 85 |         _ = df_from_whatsapp(filename1, hformat='%y')
 86 | 
 87 | 
 88 | def test_df_from_whatsapp_3():
 89 |     with pytest.raises(ValueError):
 90 |         _ = df_from_whatsapp(filename1, auto_header=False)
 91 | 
 92 | 
 93 | def test_df_from_whatsapp_url():
 94 |     df = df_from_whatsapp(filepath_url)
 95 |     assert(isinstance(df, pd.DataFrame))
 96 | 
 97 | 
 98 | def test_df_from_whatsapp_gdrive(mocker):
 99 |     gdrive_url = "gdrive://456456456-ewgwegegw"
100 |     with open(filename1, "r", encoding='utf8') as f:
101 |         mock_text = f.read()
102 |     # mocker.patch('whatstk.utils.gdrive._load_str_from_file_id', return_value="bla bla")
103 |     mocker.patch("pydrive2.files.GoogleDriveFile.FetchMetadata", return_value=True)
104 |     mocker.patch("pydrive2.files.GoogleDriveFile.GetContentString", return_value=mock_text)
105 |     mocker.patch("whatstk.utils.gdrive._check_gdrive_config", return_value=None)
106 |     df = df_from_whatsapp(gdrive_url)
107 |     assert(isinstance(df, pd.DataFrame))
108 | 
109 | 
110 | def test_df_from_whatsapp_error():
111 |     with pytest.raises(FileNotFoundError):
112 |         _ = df_from_whatsapp('grger')
113 | 
114 | 
115 | def test_df_message_type_true():
116 |     df = df_from_whatsapp(file_type_1, message_type=True)
117 |     assert(isinstance(df, pd.DataFrame))
118 | 
119 |     # Check group name
120 |     group_name = "Pokemon Chat"
121 |     assert set(df.loc[df["username"] == group_name, COLNAMES_DF.MESSAGE_TYPE]) == {"system"}
122 | 


--------------------------------------------------------------------------------
/whatstk/__init__.py:
--------------------------------------------------------------------------------
 1 | """Python wrapper and analysis tools for WhatsApp chats.
 2 | 
 3 | This library provides a powerful wrapper for multiple Languages and OS. In addition, analytics tools are provided.
 4 | 
 5 | """
 6 | 
 7 | 
 8 | from whatstk.whatsapp.objects import WhatsAppChat
 9 | from whatstk.graph import FigureBuilder
10 | from whatstk.whatsapp.parser import df_from_txt_whatsapp, df_from_whatsapp
11 | 
12 | 
13 | name = "whatstk"
14 | 
15 | __version__ = "0.7.1"
16 | 
17 | __all__ = [
18 |     "WhatsAppChat",
19 |     "df_from_txt_whatsapp",
20 |     "df_from_whatsapp",
21 |     "FigureBuilder",
22 | ]
23 | 


--------------------------------------------------------------------------------
/whatstk/_chat.py:
--------------------------------------------------------------------------------
  1 | """Library objects."""
  2 | 
  3 | 
  4 | from copy import deepcopy
  5 | import pandas as pd
  6 | from typing import Optional, List, Union, Dict, Any, Tuple
  7 | from datetime import datetime
  8 | 
  9 | from whatstk.utils.chat_merge import merge_chats
 10 | from whatstk.utils.utils import COLNAMES_DF
 11 | 
 12 | 
 13 | class BaseChat:
 14 |     """Base chat object.
 15 | 
 16 |     Attributes:
 17 |         df: Chat as pandas.DataFrame.
 18 | 
 19 |     ..  seealso::
 20 | 
 21 |         * :func:`WhatsAppChat <whatstk.whatsapp.objects.WhatsAppChat>`
 22 | 
 23 |     """
 24 | 
 25 |     def __init__(self, df: pd.DataFrame, platform: Optional[str] = None) -> None:
 26 |         """Constructor.
 27 | 
 28 |         Args:
 29 |             df (pandas.DataFrame): Chat.
 30 |             platform (str): Name of the platform, e.g. 'whatsapp'.
 31 | 
 32 |         """
 33 |         self._df_raw = df
 34 |         self._df, self._df_system, self._name = self._build_dfs(df.copy())
 35 |         self._platform = platform
 36 | 
 37 |     def _build_dfs(self, df_raw: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame, str]:
 38 |         if (COLNAMES_DF.MESSAGE_TYPE in df_raw.columns) and self.is_group:
 39 |             mask = df_raw[COLNAMES_DF.MESSAGE_TYPE] == "system"
 40 |             # Get chat only with user messages
 41 |             df = df_raw.loc[~mask].drop(columns=COLNAMES_DF.MESSAGE_TYPE)
 42 |             # Get chat only with system messages
 43 |             df_system = df_raw.loc[mask].drop(columns=COLNAMES_DF.MESSAGE_TYPE)
 44 |             # Get system messages dataframe
 45 |             if len(set(df_system[COLNAMES_DF.USERNAME])) != 1:
 46 |                 raise ValueError("System messages dataframe must contain only one username.")
 47 |             chat_name = df_system[COLNAMES_DF.USERNAME].iloc[0]
 48 |             # Drop 'username' from system dataframe
 49 |             df_system = df_system.drop(columns=COLNAMES_DF.USERNAME)
 50 |             return df, df_system, chat_name
 51 |         if (COLNAMES_DF.MESSAGE_TYPE in df_raw.columns) and not self.is_group:
 52 |             df_raw = df_raw.drop(columns=COLNAMES_DF.MESSAGE_TYPE)
 53 |         return df_raw, pd.DataFrame(), ""
 54 | 
 55 |     @property
 56 |     def df(self) -> pd.DataFrame:
 57 |         """Chat as DataFrame.
 58 | 
 59 |         Returns:
 60 |             pandas.DataFrame
 61 |         """
 62 |         return self._df
 63 | 
 64 |     @property
 65 |     def df_system(self) -> pd.DataFrame:
 66 |         """Chat as DataFrame.
 67 | 
 68 |         Returns:
 69 |             pandas.DataFrame
 70 |         """
 71 |         return self._df_system
 72 | 
 73 |     @property
 74 |     def is_group(self) -> bool:
 75 |         """True if the chart is a group.
 76 | 
 77 |         A chat is detected as a group if it has more than 2 users (including the 'system').
 78 |         Groups with one person will not be detected as groups.
 79 | 
 80 |         Returns:
 81 |             bool
 82 |         """
 83 |         if len(set(self._df_raw[COLNAMES_DF.USERNAME])) > 2:
 84 |             return True
 85 |         return False
 86 | 
 87 |     @property
 88 |     def users(self) -> List[str]:
 89 |         """List with users.
 90 | 
 91 |         Returns:
 92 |             list
 93 |         """
 94 |         return sorted(list(self.df[COLNAMES_DF.USERNAME].unique()))
 95 | 
 96 |     @property
 97 |     def name(self) -> Optional[str]:
 98 |         """Name of the chat.
 99 | 
100 |         Returns None if no name could be found. The name is extracted from the username of with
101 |         the first system message in the chat.
102 | 
103 |         Returns:
104 |             list
105 |         """
106 |         return self._name
107 | 
108 |     @property
109 |     def start_date(self) -> Union[str, datetime]:
110 |         """Chat starting date.
111 | 
112 |         Returns:
113 |             datetime
114 | 
115 |         """
116 |         return self._df_raw[COLNAMES_DF.DATE].min()
117 | 
118 |     @property
119 |     def end_date(self) -> Union[str, datetime]:
120 |         """Chat end date.
121 | 
122 |         Returns:
123 |             datetime
124 | 
125 |         """
126 |         return self._df_raw[COLNAMES_DF.DATE].max()
127 | 
128 |     @classmethod
129 |     def from_source(cls, **kwargs: Any) -> None:  # noqa: ANN401
130 |         """Load chat.
131 | 
132 |         Args:
133 |             kwargs: Specific to the child class.
134 | 
135 |         Raises:
136 |             NotImplementedError: Must be implemented in children.
137 | 
138 |         ..  seealso::
139 | 
140 |             * :func:`WhatsAppChat.from_source <whatstk.WhatsAppChat.from_source>`
141 | 
142 |         """
143 |         raise NotImplementedError
144 | 
145 |     def merge(self, chat: "BaseChat", rename_users: Optional[Dict[str, str]] = None) -> "BaseChat":
146 |         """Merge current instance with ``chat``.
147 | 
148 |         Args:
149 |             chat (WhatsAppChat): Another chat.
150 |             rename_users (dict): Dictionary mapping old names to new names. Example: {'John':['Jon', 'J'], 'Ray':
151 |                                  ['Raymond']} will map 'Jon' and 'J' to 'John', and 'Raymond' to 'Ray'. Note that old
152 |                                  names must come as list (even if there is only one).
153 | 
154 |         Returns:
155 |             BaseChat: Merged chat.
156 | 
157 |         ..  seealso::
158 | 
159 |             * :func:`rename_users <whatstk.whatsapp.objects.WhatsAppChat.rename_users>`
160 |             * :func:`merge_chats <whatstk.utils.chat_merge.merge_chats>`
161 | 
162 |         Example:
163 |             Merging two chats can become handy when you have exported a chat in different times with your phone and
164 |             hence each exported file might contain data that is unique to that file.
165 | 
166 |             In this example however, we merge files from different chats.
167 | 
168 |             ..  code-block:: python
169 | 
170 |                 >>> from whatstk.whatsapp.objects import WhatsAppChat
171 |                 >>> from whatstk.data import whatsapp_urls
172 |                 >>> filepath_1 = whatsapp_urls.LOREM1
173 |                 >>> filepath_2 = whatsapp_urls.LOREM2
174 |                 >>> chat_1 = WhatsAppChat.from_source(filepath=filepath_1)
175 |                 >>> chat_2 = WhatsAppChat.from_source(filepath=filepath_2)
176 |                 >>> chat = chat_1.merge(chat_2)
177 | 
178 |         """
179 |         # Can only merge from same platform
180 |         if self._platform != chat._platform:
181 |             raise ValueError("Both chats must come from the same platform.")
182 |         # Merge
183 |         self_ = deepcopy(self)
184 |         self_._df_raw = merge_chats([self._df_raw, chat._df_raw])
185 |         self_._df = merge_chats([self.df, chat.df])
186 |         if (not self.df_system.empty) and (not chat.df_system.empty):
187 |             self_._df_system = merge_chats([self.df_system, chat.df_system])
188 |         if rename_users:
189 |             self_ = self_.rename_users(mapping=rename_users)
190 |         return self_
191 | 
192 |     def rename_users(self, mapping: Dict[str, str]) -> "BaseChat":
193 |         """Rename users.
194 | 
195 |         This might be needed in multiple occations:
196 | 
197 |             - Change typos in user names stored in phone.
198 |             - If a user appears multiple times with different usernames, group these under the same name (this might
199 |                 happen when multiple chats are merged).
200 | 
201 |         Args:
202 |             mapping (dict): Dictionary mapping old names to new names, example:
203 |                             {'John': ['Jon', 'J'], 'Ray': ['Raymond']} will map 'Jon' and 'J' to 'John', and 'Raymond'
204 |                             to 'Ray'. Note that old names must come as list (even if there is only one).
205 | 
206 |         Returns:
207 |             pandas.DataFrame: DataFrame with users renamed according to `mapping`.
208 | 
209 |         Raises:
210 |             ValueError: Raised if mapping is not correct.
211 | 
212 |         Examples:
213 |             Load LOREM2 chat and rename users `Maria` and `Maria2` to `Mary`.
214 | 
215 |             ..  code-block:: python
216 | 
217 |                 >>> from whatstk.whatsapp.objects import WhatsAppChat
218 |                 >>> from whatstk.data import whatsapp_urls
219 |                 >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM2)
220 |                 >>> chat.users
221 |                 ['+1 123 456 789', 'Giuseppe', 'John', 'Maria', 'Maria2']
222 |                 >>> chat = chat.rename_users(mapping={'Mary': ['Maria', 'Maria2']})
223 |                 >>> chat.users
224 |                 ['+1 123 456 789', 'Giuseppe', 'John', 'Mary']
225 | 
226 |         """
227 |         self_ = deepcopy(self)
228 |         for new_name, old_names in mapping.items():
229 |             if not isinstance(old_names, list):
230 |                 raise ValueError("Old names must come as a list of str (even if there is only one).")
231 |             for old_name in old_names:
232 |                 self_.df[COLNAMES_DF.USERNAME][self_.df[COLNAMES_DF.USERNAME] == old_name] = new_name
233 |         return self_
234 | 
235 |     def to_csv(self, filepath: str) -> None:
236 |         """Save chat as csv.
237 | 
238 |         Args:
239 |             filepath (str): Name of file.
240 | 
241 |         """
242 |         if not filepath.endswith(".csv"):
243 |             raise ValueError("filepath must end with .csv")
244 |         self.df.to_csv(filepath, index=False)
245 | 
246 |     def __len__(self) -> int:
247 |         """Number of messages.
248 | 
249 |         Returns:
250 |             int: Instance length, defined as number of samples.
251 | 
252 |         """
253 |         return len(self.df)
254 | 


--------------------------------------------------------------------------------
/whatstk/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | """Analysis tools."""
2 | 
3 | 
4 | from whatstk.analysis.interventions import get_interventions_count
5 | from whatstk.analysis.responses import get_response_matrix
6 | 
7 | 
8 | __all__ = ["get_interventions_count", "get_response_matrix"]
9 | 


--------------------------------------------------------------------------------
/whatstk/analysis/interventions.py:
--------------------------------------------------------------------------------
  1 | """Base analysis tools."""
  2 | 
  3 | from typing import TYPE_CHECKING, List
  4 | 
  5 | import pandas as pd
  6 | from whatstk.utils.utils import COLNAMES_DF, _get_df
  7 | 
  8 | if TYPE_CHECKING:  # pragma: no cover
  9 |     from whatstk._chat import BaseChat  # pragma: no cover
 10 | 
 11 | 
 12 | def get_interventions_count(
 13 |     df: pd.DataFrame = None,
 14 |     chat: "BaseChat" = None,
 15 |     date_mode: str = "date",
 16 |     msg_length: bool = False,
 17 |     cumulative: bool = False,
 18 |     all_users: bool = False,
 19 | ) -> pd.DataFrame:
 20 |     """Get number of interventions per user per unit of time.
 21 | 
 22 |     The unit of time can be chosen by means of argument ``date_mode``.
 23 | 
 24 |     **Note**: Either ``df`` or ``chat`` must be provided.
 25 | 
 26 |     Args:
 27 |         df (pandas.DataFrame, optional): Chat data. Atribute `df` of a chat loaded using Chat. If a value is given,
 28 |                                             ``chat`` is ignored.
 29 |         chat (Chat, optional): Chat data. Object obtained when chat loaded using Chat. Required if ``df`` is None.
 30 |         date_mode (str, optional): Choose mode to group interventions by.
 31 |                                     Defaults to ``date_mode=date``. Available modes are:
 32 | 
 33 |                                     - ``'date'``: Grouped by particular date (year, month and day).
 34 |                                     - ``'hour'``: Grouped by day hours (24 hours).
 35 |                                     - ``'month'``: Grouped by months (12 months).
 36 |                                     - ``'weekday'``: Grouped by weekday (i.e. monday, tuesday, ..., sunday).
 37 |                                     - ``'hourweekday'``: Grouped by weekday and hour.
 38 |         msg_length (bool, optional): Set to True to count the number of characters instead of number of messages sent.
 39 |         cumulative (bool, optional): Set to True to obtain commulative counts.
 40 |         all_users (bool, optional): Obtain number of interventions of all users combined. Defaults to False.
 41 | 
 42 |     Returns:
 43 |         pandas.DataFrame: DataFrame with shape *NxU*, where *N*: number of time-slots and *U*: number of users.
 44 | 
 45 |     Raises:
 46 |         ValueError: if ``date_mode`` value is not supported.
 47 | 
 48 |     Example:
 49 |             Get number of interventions per user from `POKEMON chat
 50 |             <http://raw.githubusercontent.com/lucasrodes/whatstk/develop/chats/whatsapp/pokemon.txt>`_. The counts are
 51 |             represented as a `NxU` matrix, where `N`: number of time-slots and `U`: number of users.
 52 | 
 53 |             ..  code-block:: python
 54 | 
 55 |                 >>> from whatstk import WhatsAppChat
 56 |                 >>> from whatstk.analysis import get_interventions_count
 57 |                 >>> from whatstk.data import whatsapp_urls
 58 |                 >>> filepath = whatsapp_urls.POKEMON
 59 |                 >>> chat = WhatsAppChat.from_source(filepath)
 60 |                 >>> counts = get_interventions_count(chat=chat, date_mode='date', msg_length=False)
 61 |                 >>> counts.head(5)
 62 |                 username    Ash Ketchum  Brock  Jessie & James  ...  Prof. Oak  Raichu  Wobbuffet
 63 |                 date                                            ...
 64 |                 2016-08-06            2      2               0  ...          0       0          0
 65 |                 2016-08-07            1      1               0  ...          1       0          0
 66 |                 2016-08-10            1      0               1  ...          0       2          0
 67 |                 2016-08-11            0      0               0  ...          0       0          0
 68 |                 2016-09-11            0      0               0  ...          0       0          0
 69 | 
 70 |                 [5 rows x 8 columns]
 71 | 
 72 |     """
 73 |     df = _get_df(df=df, chat=chat)
 74 | 
 75 |     if date_mode == "date":
 76 |         n_interventions = _interventions(df, [df[COLNAMES_DF.DATE].dt.date], msg_length)
 77 |         n_interventions.index = pd.to_datetime(n_interventions.index)
 78 |         # print(n_interventions.shape)
 79 |     elif date_mode == "hour":
 80 |         n_interventions = _interventions(df, [df[COLNAMES_DF.DATE].dt.hour], msg_length)
 81 |     elif date_mode == "weekday":
 82 |         n_interventions = _interventions(df, [df[COLNAMES_DF.DATE].dt.weekday], msg_length)
 83 |     elif date_mode == "hourweekday":
 84 |         n_interventions = _interventions(
 85 |             df, [df[COLNAMES_DF.DATE].dt.weekday, df[COLNAMES_DF.DATE].dt.hour], msg_length
 86 |         )
 87 |     elif date_mode == "month":
 88 |         n_interventions = _interventions(df, [df[COLNAMES_DF.DATE].dt.month], msg_length)
 89 |     else:
 90 |         raise ValueError(
 91 |             "Mode {} is not implemented. Valid modes are 'date', 'hour', 'weekday', "
 92 |             "'hourweekday' and 'month'.".format(date_mode)
 93 |         )
 94 | 
 95 |     if date_mode == "hourweekday":
 96 |         n_interventions.index = n_interventions.index.set_names(["weekday", "hour"])
 97 |     else:
 98 |         n_interventions.index.name = date_mode
 99 |     n_interventions.columns = n_interventions.columns.get_level_values(COLNAMES_DF.USERNAME)
100 | 
101 |     if all_users:
102 |         n_interventions = pd.DataFrame(n_interventions.sum(axis=1), columns=["interventions count"])
103 |     if cumulative:
104 |         n_interventions = n_interventions.cumsum()
105 | 
106 |     return n_interventions
107 | 
108 | 
109 | def _interventions(df: pd.DataFrame, series_tf: List[pd.DataFrame], msg_length: bool) -> pd.DataFrame:
110 |     """Get number of interventions per date per user.
111 | 
112 |     Args:
113 |         df (pandas.DataFrame): Chat as DataFrame.
114 |         series_tf (list): List of pandas series with the date transformations applied, so we can group by, e.g., month.
115 |         msg_length (bool, optional): Set to True to count the number of characters instead of number of messages sent.
116 | 
117 |     Returns:
118 |         pandas.DataFrame: Table with interventions per day per user.
119 | 
120 |     """
121 |     if msg_length:
122 |         counts_ = df.copy()
123 |         counts_[COLNAMES_DF.MESSAGE_LENGTH] = counts_[COLNAMES_DF.MESSAGE].apply(lambda x: len(x))
124 |         counts = counts_.groupby(by=series_tf + [COLNAMES_DF.USERNAME]).agg(
125 |             {COLNAMES_DF.MESSAGE_LENGTH: lambda x: x.sum()}
126 |         )
127 |     else:
128 |         counts = df.groupby(by=series_tf + [COLNAMES_DF.USERNAME]).agg({"message": "count"})
129 |     counts = counts.unstack(fill_value=0)
130 | 
131 |     return counts
132 | 


--------------------------------------------------------------------------------
/whatstk/analysis/responses.py:
--------------------------------------------------------------------------------
  1 | """Get infor regarding responses between users."""
  2 | 
  3 | 
  4 | from collections import namedtuple
  5 | from typing import TYPE_CHECKING, Optional
  6 | 
  7 | import pandas as pd
  8 | from whatstk.whatsapp.objects import WhatsAppChat
  9 | from whatstk.utils.utils import _get_df, COLNAMES_DF
 10 | 
 11 | if TYPE_CHECKING:  # pragma: no cover
 12 |     from whatstk._chat import BaseChat  # pragma: no cover
 13 | 
 14 | Norms = namedtuple("Norms", ["ABSOLUTE", "JOINT", "SENDER", "RECEIVER"])
 15 | NORMS = Norms(ABSOLUTE="absolute", JOINT="joint", SENDER="sender", RECEIVER="receiver")
 16 | 
 17 | 
 18 | def get_response_matrix(
 19 |     df: Optional[pd.DataFrame] = None,
 20 |     chat: Optional["BaseChat"] = None,
 21 |     zero_own: bool = True,
 22 |     norm: str = NORMS.ABSOLUTE,
 23 | ) -> pd.DataFrame:
 24 |     """Get response matrix for given chat.
 25 | 
 26 |     Obtains a DataFrame of shape `[n_users, n_users]` counting the number of responses between members. Responses can
 27 |     be counted in different ways, e.g. using absolute values or normalised values. Responses are counted based solely
 28 |     on consecutive messages. That is, if :math:`user_i` sends a message right after :math:`user_j`, it will be counted
 29 |     as a response from :math:`user_i` to :math:`user_j`.
 30 | 
 31 |     Axis 0 lists senders and axis 1 lists receivers. That is, the value in cell (i, j) denotes the number of times
 32 |     :math:`user_i` responded to a message from :math:`user_j`.
 33 | 
 34 |     **Note**: Either ``df`` or ``chat`` must be provided.
 35 | 
 36 |     Args:
 37 |         df (pandas.DataFrame, optional): Chat data. Atribute `df` of a chat loaded using Chat. If a value is given,
 38 |                                             ``chat`` is ignored.
 39 |         chat (Chat, optional): Chat data. Object obtained when chat loaded using Chat. Required if ``df`` is None.
 40 |         zero_own (bool, optional): Set to True to avoid counting own responses. Defaults to True.
 41 |         norm (str, optional): Specifies the type of normalization used for reponse count. Can be:
 42 | 
 43 |                                 - ``'absolute'``: Absolute count of messages.
 44 |                                 - ``'joint'``: Normalized by total number of messages sent by all users.
 45 |                                 - ``'sender'``: Normalized per sender by total number of messages sent by user.
 46 |                                 - ``'receiver'``: Normalized per receiver by total number of messages sent by user.
 47 | 
 48 |     Returns:
 49 |         pandas.DataFrame: Response matrix.
 50 | 
 51 |     Example:
 52 |             Get absolute count on responses (consecutive messages) between users.
 53 | 
 54 |             ..  code-block:: python
 55 | 
 56 |                 >>> from whatstk import WhatsAppChat
 57 |                 >>> from whatstk.analysis import get_response_matrix
 58 |                 >>> from whatstk.data import whatsapp_urls
 59 |                 >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.POKEMON)
 60 |                 >>> responses = get_response_matrix(chat=chat)
 61 |                 >>> responses
 62 |                                 Ash Ketchum  Brock  ...  Raichu  Wobbuffet
 63 |                 Ash Ketchum               0      0  ...       1          0
 64 |                 Brock                     1      0  ...       0          0
 65 |                 Jessie & James            0      1  ...       0          0
 66 |                 Meowth                    0      0  ...       0          0
 67 |                 Misty                     2      1  ...       1          0
 68 |                 Prof. Oak                 0      1  ...       0          0
 69 |                 Raichu                    1      0  ...       0          0
 70 |                 Wobbuffet                 0      0  ...       0          0
 71 | 
 72 |     """
 73 |     # Get chat df and users
 74 |     df = _get_df(df=df, chat=chat)
 75 |     users = WhatsAppChat(df).users
 76 |     # Get list of username transitions and initialize dicitonary with counts
 77 |     user_transitions = df[COLNAMES_DF.USERNAME].tolist()
 78 |     responses = {user: dict(zip(users, [0] * len(users))) for user in users}
 79 |     # Fill count dictionary
 80 |     for i in range(1, len(user_transitions)):
 81 |         sender = user_transitions[i]
 82 |         receiver = user_transitions[i - 1]
 83 |         if zero_own and (sender != receiver):
 84 |             responses[sender][receiver] += 1
 85 |         elif not zero_own:
 86 |             responses[sender][receiver] += 1
 87 |     responses = pd.DataFrame.from_dict(responses, orient="index")
 88 | 
 89 |     # Normalize
 90 |     if norm not in [NORMS.ABSOLUTE, NORMS.JOINT, NORMS.RECEIVER, NORMS.SENDER]:
 91 |         raise ValueError("norm not valid. See NORMS variable in whatstk.analysis.resposes")
 92 |     else:
 93 |         if norm == NORMS.JOINT:
 94 |             responses /= responses.sum().sum()
 95 |         elif norm == NORMS.RECEIVER:
 96 |             responses /= responses.sum(axis=0)
 97 |         elif norm == NORMS.SENDER:
 98 |             responses = responses.divide(responses.sum(axis=1), axis=0)
 99 |     return responses
100 | 


--------------------------------------------------------------------------------
/whatstk/data.py:
--------------------------------------------------------------------------------
 1 | """Load sample chats.
 2 | 
 3 | Tthis module contains the links to currently online-available chats. For more details, please refer
 4 | to the source code.
 5 | 
 6 | """
 7 | # pip install --upgrade certifi
 8 | 
 9 | 
10 | import os
11 | from collections import namedtuple
12 | 
13 | 
14 | Urls = namedtuple("Urls", ["POKEMON", "LOREM", "LOREM1", "LOREM2", "LOREM_2000"])
15 | 
16 | branch = "develop"
17 | chats_folder = f"http://raw.githubusercontent.com/lucasrodes/whatstk/{branch}/chats"  # noqa: E231
18 | 
19 | whatsapp_urls = Urls(
20 |     POKEMON=os.path.join(chats_folder, "whatsapp", "pokemon.txt"),
21 |     LOREM=os.path.join(chats_folder, "whatsapp", "lorem.txt"),
22 |     LOREM1=os.path.join(chats_folder, "whatsapp", "lorem-merge-part1.txt"),
23 |     LOREM2=os.path.join(chats_folder, "whatsapp", "lorem-merge-part2.txt"),
24 |     LOREM_2000=os.path.join(chats_folder, "whatsapp", "lorem-2000.txt"),
25 | )
26 | 


--------------------------------------------------------------------------------
/whatstk/graph/__init__.py:
--------------------------------------------------------------------------------
 1 | """Plot tools using plotly.
 2 | 
 3 | Import :func:`plot <whatstk.graph.plot>` to plot figures.
 4 | 
 5 | ..  code-block:: python
 6 |         >>> from whatstk.graph import plot
 7 | 
 8 | """
 9 | 
10 | 
11 | from plotly.offline import plot
12 | import plotly.io as pio
13 | from whatstk.graph.base import FigureBuilder
14 | 
15 | 
16 | pio.templates.default = "plotly_white"
17 | 
18 | 
19 | __all__ = ["plot", "FigureBuilder"]
20 | 


--------------------------------------------------------------------------------
/whatstk/graph/figures/__init__.py:
--------------------------------------------------------------------------------
1 | """Build Plotly compatible Figures."""
2 | 


--------------------------------------------------------------------------------
/whatstk/graph/figures/boxplot.py:
--------------------------------------------------------------------------------
 1 | """Boxplot figures."""
 2 | 
 3 | from typing import Dict, Optional
 4 | 
 5 | import plotly.graph_objs as go
 6 | import pandas as pd
 7 | 
 8 | from whatstk.utils.utils import COLNAMES_DF
 9 | 
10 | 
11 | def fig_boxplot_msglen(
12 |     df: pd.DataFrame, username_to_color: Dict[str, str] = None, title: str = "", xlabel: Optional[str] = None
13 | ) -> go.Figure:
14 |     """Visualize boxplot.
15 | 
16 |     Args:
17 |         df (pandas.DataFrame): Chat data.
18 |         username_to_color (dict, optional). Dictionary mapping username to color. Defaults to None.
19 |         title (str, optional): Title for plot. Defaults to "".
20 |         xlabel (str, optional): x-axis label title. Defaults to None.
21 | 
22 |     Returns:
23 |         plotly.graph_objs.Figure
24 | 
25 |     """
26 |     df = df.copy()
27 |     # Get message lengths
28 |     df[COLNAMES_DF.MESSAGE_LENGTH] = df[COLNAMES_DF.MESSAGE].apply(lambda x: len(x))
29 |     # Sort users by median
30 |     user_stats = (
31 |         df.groupby(COLNAMES_DF.USERNAME)
32 |         .aggregate({COLNAMES_DF.MESSAGE_LENGTH: "median"})[COLNAMES_DF.MESSAGE_LENGTH]
33 |         .sort_values(ascending=False)
34 |     )
35 | 
36 |     # Create a list of traces
37 |     data = []
38 | 
39 |     for username in user_stats.index:
40 |         x = df[df[COLNAMES_DF.USERNAME] == username][COLNAMES_DF.MESSAGE_LENGTH]
41 |         trace = go.Box(
42 |             y=x.values,
43 |             showlegend=True,
44 |             name=username,
45 |             boxpoints="outliers",
46 |             marker_color=username_to_color[username] if username_to_color else None,
47 |         )
48 |         data.append(trace)
49 | 
50 |     layout = dict(title=title, xaxis=dict(title=xlabel))
51 | 
52 |     fig = go.Figure(data=data, layout=layout)
53 | 
54 |     return fig
55 | 


--------------------------------------------------------------------------------
/whatstk/graph/figures/heatmap.py:
--------------------------------------------------------------------------------
 1 | """Heatmap plot figures."""
 2 | 
 3 | 
 4 | import plotly.graph_objs as go
 5 | import pandas as pd
 6 | 
 7 | 
 8 | def fig_heatmap(df_matrix: pd.DataFrame, title: str = "") -> go.Figure:
 9 |     """Generate heatmap figure from NxN matrix.
10 | 
11 |     Args:
12 |         df_matrix (pandas.DataFrame): Matrix as DataFrame. Index values and column values must be equal.
13 |         title (str): Title of plot. Defaults to "".
14 | 
15 |     Returns:
16 |         plotly.graph_objs.Figure
17 | 
18 |     """
19 |     trace = go.Heatmap(
20 |         z=df_matrix,
21 |         x=df_matrix.columns,
22 |         y=df_matrix.index,
23 |         hovertemplate="%{y} ---> %{x}<extra>%{z}</extra>",
24 |         colorscale="Greens",
25 |     )
26 |     data = [trace]
27 |     layout = {"title": {"text": title}, "xaxis": {"title": "Receiver"}, "yaxis": {"title": "Sender"}}
28 | 
29 |     fig = go.Figure(data=data, layout=layout)
30 |     return fig
31 | 


--------------------------------------------------------------------------------
/whatstk/graph/figures/sankey.py:
--------------------------------------------------------------------------------
 1 | """Sankey plot figures."""
 2 | 
 3 | from typing import List
 4 | 
 5 | import plotly.graph_objs as go
 6 | 
 7 | 
 8 | def fig_sankey(
 9 |     label: List[str], color: List[str], source: List[str], target: List[str], value: List[int], title: str = ""
10 | ) -> go.Figure:
11 |     """Generate sankey image.
12 | 
13 |     Args:
14 |         label (list): List with node labels.
15 |         color (list): List with node colors.
16 |         source (list): List with link source id.
17 |         target (list): List with linke target id.
18 |         value (list): List with link value.
19 |         title (str, optional): Title. Defaults to "".
20 | 
21 |     Returns:
22 |         plotly.graph_objs.Figure
23 | 
24 |     """
25 |     trace = go.Sankey(
26 |         arrangement="fixed",
27 |         orientation="v",
28 |         valueformat=".0f",
29 |         node=dict(
30 |             pad=20,
31 |             thickness=40,
32 |             line=dict(color="black", width=0),
33 |             label=label,
34 |             color=color,
35 |             hovertemplate="%{label}<br>Number of messages: %{value}<extra></extra>",
36 |             # x=x,
37 |             # y=y
38 |         ),
39 |         link=dict(
40 |             source=source,
41 |             target=target,
42 |             value=value,
43 |             hovertemplate="%{source.label} ---> %{target.label}<extra>%{value}</extra>",
44 |         ),
45 |     )
46 |     data = [trace]
47 | 
48 |     layout = {
49 |         "title": dict(text=title),
50 |         "annotations": [
51 |             {
52 |                 "text": "Senders",
53 |                 "font": {
54 |                     "size": 13,
55 |                     "color": "rgb(116, 101, 130)",
56 |                 },
57 |                 "showarrow": False,
58 |                 "align": "center",
59 |                 "x": 0.5,
60 |                 "y": 1.1,
61 |                 "xref": "paper",
62 |                 "yref": "paper",
63 |             },
64 |             {
65 |                 "text": "Receivers",
66 |                 "font": {
67 |                     "size": 13,
68 |                     "color": "rgb(116, 101, 130)",
69 |                 },
70 |                 "showarrow": False,
71 |                 "align": "center",
72 |                 "x": 0.5,
73 |                 "y": -0.1,
74 |                 "xref": "paper",
75 |                 "yref": "paper",
76 |             },
77 |         ],
78 |     }
79 | 
80 |     fig = go.Figure(data=data, layout=layout)
81 | 
82 |     return fig
83 | 


--------------------------------------------------------------------------------
/whatstk/graph/figures/scatter.py:
--------------------------------------------------------------------------------
 1 | """Scatter plot figures."""
 2 | 
 3 | from typing import Optional, Dict
 4 | 
 5 | import pandas as pd
 6 | import plotly.graph_objs as go
 7 | 
 8 | 
 9 | def fig_scatter_time(
10 |     user_data: pd.DataFrame,
11 |     username_to_color: Optional[Dict[str, str]] = None,
12 |     title: str = "",
13 |     xlabel: Optional[str] = None,
14 | ) -> go.Figure:
15 |     """Obtain Figure to plot using plotly.
16 | 
17 |     ``user_data`` must be a pandas.DataFrame with timestamps as index and a column for each user. You can easily
18 |     generate suitable ``user_data`` using the function
19 |     :func:`get_interventions_count <whatstk.analysis.get_interventions_count>` (disclaimer: not compatible with
20 |     ``date_mode='hourweekday'``).
21 | 
22 |     Args:
23 |         user_data (pandas.DataFrame): Input data. Shape nrows x ncols, where nrows = number of timestaps and
24 |                                       ncols = number of users.
25 |         username_to_color (dict, optional). Dictionary mapping username to color. Defaults to None.
26 |         title (str, optional): Title of figure. Defaults to "".
27 |         xlabel (str, optional): x-axis label title. Defaults to None.
28 | 
29 |     Returns:
30 |         plotly.graph_objs.Figure
31 | 
32 |     ..  seealso::
33 | 
34 |             * :func:`get_interventions_count <whatstk.analysis.get_interventions_count>`
35 | 
36 |     """
37 |     # Create a trace
38 |     data = []
39 | 
40 |     for username in user_data:
41 |         trace = go.Scatter(
42 |             x=user_data.index,
43 |             y=user_data[username],
44 |             showlegend=True,
45 |             name=username,
46 |             text=user_data.index,
47 |             line=dict(color=username_to_color[username]) if username_to_color is not None else None,
48 |         )
49 |         data.append(trace)
50 | 
51 |     layout = dict(title=title, xaxis=dict(title=xlabel))
52 | 
53 |     fig = go.Figure(data=data, layout=layout)
54 | 
55 |     return fig
56 | 


--------------------------------------------------------------------------------
/whatstk/graph/figures/utils.py:
--------------------------------------------------------------------------------
 1 | """Utils for library plots."""
 2 | 
 3 | 
 4 | import seaborn as sns
 5 | from typing import List
 6 | 
 7 | 
 8 | def hex_color_palette(n_colors: int) -> List[str]:
 9 |     """Get palette of `n_colors` color hexadecimal codes.
10 | 
11 |     Args:
12 |         n_colors (int): Size of the color palette.
13 | 
14 |     """
15 |     palette = "hls"  # ref: https://seaborn.pydata.org/tutorial/color_palettes.html
16 |     rgb = sns.color_palette(palette=palette, n_colors=n_colors)
17 |     color_codes = ["#" + "".join("%02X" % int(round(i * 255)) for i in r) for r in rgb]
18 |     return color_codes
19 | 


--------------------------------------------------------------------------------
/whatstk/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | """Library generic scripts."""
2 | 


--------------------------------------------------------------------------------
/whatstk/scripts/generate_chats.py:
--------------------------------------------------------------------------------
 1 | """Generate chats in all hformats with `size` number of messages and export them to a given `output_path`."""
 2 | 
 3 | 
 4 | import argparse
 5 | from datetime import datetime
 6 | from whatstk.whatsapp.generation import generate_chats_hformats
 7 | 
 8 | 
 9 | def _parse_args() -> None:
10 |     parser = argparse.ArgumentParser(
11 |         "Generate chat. Make sure to install the library with required extension: pip install whatstk[generate] "
12 |         "--upgrade"
13 |     )
14 |     parser.add_argument(
15 |         "-o", "--output-path", type=str, required=True, help=("Path where to store generated chats. Must exist.")
16 |     )
17 |     parser.add_argument("--filenames", default=None, nargs="+", help="Filenames. Must be equal length of --hformats.")
18 |     parser.add_argument(
19 |         "-s", "--size", type=int, default=500, help="Number of messages to create per chat. Defaults to 500."
20 |     )
21 |     parser.add_argument(
22 |         "-f",
23 |         "--hformats",
24 |         default=None,
25 |         nargs="+",
26 |         help="Header format. If None, defaults to all supported hformats. List formats as 'format 1' 'format 2' ...",
27 |     )
28 |     parser.add_argument(
29 |         "--last-timestamp",
30 |         type=lambda s: datetime.strptime(s, "%Y-%m-%d"),
31 |         default=None,
32 |         help="Timestamp of last message. Format YYYY-mm-dd",
33 |     )
34 |     parser.add_argument(
35 |         "-z",
36 |         "--export-as-zip",
37 |         default=False,
38 |         action="store_true",
39 |         help="Export chat as ZIP (additionally)",
40 |     )
41 |     parser.add_argument("-v", "--verbose", action="store_true", help="Verbosity.")
42 |     args = parser.parse_args()
43 |     return args
44 | 
45 | 
46 | def main() -> None:
47 |     """Main script."""
48 |     args = _parse_args()
49 |     generate_chats_hformats(
50 |         output_path=args.output_path,
51 |         size=args.size,
52 |         hformats=args.hformats,
53 |         last_timestamp=args.last_timestamp,
54 |         filepaths=args.filenames,
55 |         export_as_zip=args.export_as_zip
56 |     )
57 | 


--------------------------------------------------------------------------------
/whatstk/scripts/graph.py:
--------------------------------------------------------------------------------
 1 | """Generate multiple graphics for your chat using plotly."""
 2 | 
 3 | 
 4 | import argparse
 5 | from whatstk.whatsapp.objects import WhatsAppChat
 6 | from whatstk.graph import plot, FigureBuilder
 7 | 
 8 | 
 9 | def _parse_args() -> None:
10 |     parser = argparse.ArgumentParser(
11 |         description="Visualise a WhatsApp chat. For advance settings, see package library" "documentation"
12 |     )
13 |     parser.add_argument("input_filename", type=str, default=None, help="Input txt file.")
14 |     parser.add_argument(
15 |         "-o",
16 |         "--output_filename",
17 |         type=str,
18 |         default="output.html",
19 |         help="Graph generated can be stored as an HTML" " file. Defaults to 'output.html'.",
20 |     )
21 |     parser.add_argument(
22 |         "-t",
23 |         "--type",
24 |         type=str,
25 |         default="interventions_count",
26 |         choices=["interventions_count", "msg_length"],
27 |         help="Type of graph. Defualts to 'interventions_count'.",
28 |     )
29 |     parser.add_argument(
30 |         "-id",
31 |         "--icount-date-mode",
32 |         type=str,
33 |         default="date",
34 |         choices=["date", "hour", "weekday", "month"],
35 |         help="Select date mode. Only valid for --type=interventions_count. Defaults to 'date'.",
36 |     )
37 |     parser.add_argument(
38 |         "-ic",
39 |         "--icount-cumulative",
40 |         action="store_true",
41 |         help="Show values in a cumulative fashion. Only valid for --type=interventions_count.",
42 |     )
43 |     parser.add_argument(
44 |         "-il",
45 |         "--icount-msg-length",
46 |         action="store_true",
47 |         help="Count an intervention with its number of characters. Otherwise an intervention is count as one."
48 |         "Only valid for --type=interventions_count.",
49 |     )
50 |     parser.add_argument(
51 |         "-f",
52 |         "--hformat",
53 |         type=str,
54 |         default=None,
55 |         help="By default, auto-header detection is"
56 |         "attempted. If does not work, you can specify it manually using this argument.",
57 |     )
58 |     args = parser.parse_args()
59 |     return args
60 | 
61 | 
62 | def main() -> None:
63 |     """Main script."""
64 |     args = _parse_args()
65 |     chat = WhatsAppChat.from_source(filepath=args.input_filename, hformat=args.hformat)
66 | 
67 |     if args.type == "interventions_count":
68 |         fig = FigureBuilder(chat=chat).user_interventions_count_linechart(
69 |             date_mode=args.icount_date_mode, msg_length=False, cumulative=args.icount_cumulative
70 |         )
71 |     elif args.type == "msg_length":
72 |         fig = FigureBuilder(chat=chat).user_msg_length_boxplot()
73 |     plot(fig, filename=args.output_filename)
74 | 


--------------------------------------------------------------------------------
/whatstk/scripts/txt_to_csv.py:
--------------------------------------------------------------------------------
 1 | """Generate chats in all hformats with `size` number of messages and export them to a given `output_path`."""
 2 | 
 3 | 
 4 | import argparse
 5 | from whatstk.whatsapp.objects import WhatsAppChat
 6 | 
 7 | 
 8 | def _parse_args() -> None:
 9 |     parser = argparse.ArgumentParser(description="Convert a Whatsapp chat from csv to txt.")
10 |     parser.add_argument("input_filename", type=str, help="Input txt file.")
11 |     parser.add_argument("output_filename", type=str, help="Name of output csv file.")
12 |     parser.add_argument(
13 |         "-f",
14 |         "--hformat",
15 |         type=str,
16 |         default=None,
17 |         help="By default, auto-header detection is"
18 |         "attempted. If does not work, you can specify it manually using this argument.",
19 |     )
20 |     args = parser.parse_args()
21 |     return args
22 | 
23 | 
24 | def main() -> None:
25 |     """Main script."""
26 |     args = _parse_args()
27 |     chat = WhatsAppChat.from_source(filepath=args.input_filename, hformat=args.hformat)
28 |     chat.to_csv(args.output_filename)
29 | 


--------------------------------------------------------------------------------
/whatstk/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Library generic utils."""
2 | 


--------------------------------------------------------------------------------
/whatstk/utils/chat_merge.py:
--------------------------------------------------------------------------------
 1 | """Merging chats."""
 2 | 
 3 | from typing import List
 4 | 
 5 | import pandas as pd
 6 | from whatstk.utils.utils import COLNAMES_DF
 7 | 
 8 | 
 9 | def _merge_two_chats(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
10 |     if df1[COLNAMES_DF.DATE].min() <= df2[COLNAMES_DF.DATE].min():
11 |         df = pd.concat([df1, df2[df2[COLNAMES_DF.DATE] > df1[COLNAMES_DF.DATE].max()]])
12 |     else:
13 |         df = pd.concat([df2, df1[df1[COLNAMES_DF.DATE] > df2[COLNAMES_DF.DATE].max()]])
14 |     return df
15 | 
16 | 
17 | def merge_chats(dfs: List[pd.DataFrame]) -> pd.DataFrame:
18 |     """Merge several chats into a single one.
19 | 
20 |     Can come in handy when you have old exports and new ones, and both have relevant data.
21 | 
22 |     **Note:** The dataframes must have an index with the timestamps of the messages, as this is required to correctly
23 |     sort and merge the chats.
24 | 
25 |     Args:
26 |         dfs (List[pandas.DataFrame]): List with the chats as DataFrames.
27 | 
28 |     Returns:
29 |         pandas.DataFrame: Merged chat.
30 | 
31 |     """
32 |     # Sort from oldest
33 |     dfs = sorted(dfs, key=lambda x: x.index.min())
34 |     # Merge
35 |     df = dfs[0]
36 |     for i in range(1, len(dfs)):
37 |         df = _merge_two_chats(df, dfs[i])
38 |     return df
39 | 


--------------------------------------------------------------------------------
/whatstk/utils/exceptions.py:
--------------------------------------------------------------------------------
 1 | """Library exceptions."""
 2 | 
 3 | 
 4 | class RegexError(Exception):
 5 |     """Raised when regex match is not possible."""
 6 | 
 7 |     pass
 8 | 
 9 | 
10 | class HFormatError(Exception):
11 |     """Raised when hformat could not be found."""
12 | 
13 |     pass
14 | 


--------------------------------------------------------------------------------
/whatstk/utils/gdrive.py:
--------------------------------------------------------------------------------
  1 | """Google Drive utils.
  2 | 
  3 | .. warning::
  4 | 
  5 |     To load chats from google drive, install the library with the corresponding extension (ignore the
  6 |     ``--upgrade`` option if you haven't installed the library):
  7 | 
  8 |     .. code-block::
  9 | 
 10 |         pip install whatstk[gdrive] --upgrade
 11 | """
 12 | 
 13 | 
 14 | from shutil import copyfile
 15 | import os
 16 | 
 17 | try:
 18 |     from pydrive2.auth import GoogleAuth
 19 |     from pydrive2.drive import GoogleDrive
 20 |     from pydrive2.files import ApiRequestError
 21 |     import yaml
 22 | except ImportError as e:  # pragma: no cover
 23 |     msg = (  # pragma: no cover
 24 |         "whatstk Google Drive requirements are not installed.\n\n"
 25 |         "Please pip install as follows:\n\n"
 26 |         '  python -m pip install "whatstk[gdrive]" --upgrade  # or python -m pip install'
 27 |     )  # pragma: no cover
 28 |     raise ImportError(msg) from e  # pragma: no cover
 29 | 
 30 | 
 31 | # Create .config/whatstk/gdrive if it does not exist
 32 | CONFIG_PATH = os.path.join(os.path.expanduser("~"), ".config", "whatstk", "gdrive")
 33 | CLIENT_SECRETS_PATH = os.path.join(CONFIG_PATH, "client_secrets.json")
 34 | SETTINGS_PATH = os.path.join(CONFIG_PATH, "settings.yaml")
 35 | CREDENTIALS_PATH = os.path.join(CONFIG_PATH, "credentials.json")
 36 | 
 37 | 
 38 | def gdrive_init(client_secret_file: str, encoding: str = "utf8") -> None:
 39 |     """Initialize GDrive credentials.
 40 | 
 41 |     This should only run once before reading a file from Google Drive the first time. Subsequent executions should run
 42 |     seamlessly.
 43 | 
 44 |     To obtain `client_secret_file`, follow the instructions from:
 45 |     https://medium.com/analytics-vidhya/how-to-connect-google-drive-to-python-using-pydrive-9681b2a14f20
 46 | 
 47 |     Notes:
 48 |         - Additionally, make sure to add yourself in Test users, as noted in:
 49 |           https://stackoverflow.com/questions/65980758/pydrive-quickstart-and-error-403-access-denied
 50 |         - Select Desktop App instead of Web Application as the application type.
 51 | 
 52 |     Args:
 53 |         client_secret_file (str): Path to clien_secret.json file (Created in Google Console).
 54 |         encoding (str): Encoding to use for UTF when reading/writing (ex. ‘utf-8’).
 55 |                              `List of Python standard encodings
 56 |                              <https://docs.python.org/3/library/codecs.html#standard-encodings>`_.
 57 |     """
 58 |     if not os.path.isdir(CONFIG_PATH):
 59 |         os.makedirs(CONFIG_PATH, exist_ok=True)
 60 | 
 61 |     # Copy credentials to config folder
 62 |     copyfile(client_secret_file, CLIENT_SECRETS_PATH)
 63 | 
 64 |     # Create settings.yaml file
 65 |     dix = {
 66 |         "client_config_backend": "file",
 67 |         "client_config_file": CLIENT_SECRETS_PATH,
 68 |         "save_credentials": True,
 69 |         "save_credentials_backend": "file",
 70 |         "save_credentials_file": CREDENTIALS_PATH,
 71 |         "get_refresh_token": True,
 72 |         "oauth_scope": [
 73 |             "https://www.googleapis.com/auth/drive",
 74 |             "https://www.googleapis.com/auth/drive.install",
 75 |         ],
 76 |     }
 77 |     with open(SETTINGS_PATH, "w", encoding=encoding) as f:
 78 |         yaml.dump(dix, f)
 79 | 
 80 |     # credentials.json
 81 |     gauth = GoogleAuth(settings_file=SETTINGS_PATH)
 82 |     gauth.CommandLineAuth()
 83 | 
 84 | 
 85 | def _check_gdrive_config() -> None:
 86 |     error_msg = (
 87 |         "Google Drive not correctly configured. Run `gdrive_init(client_secret_file)` (from whatstk.utils.gdrive)."
 88 |     )
 89 |     if not os.path.isdir(CONFIG_PATH):
 90 |         raise ValueError(error_msg)
 91 |     for f in [CLIENT_SECRETS_PATH, SETTINGS_PATH]:
 92 |         if not os.path.isfile(f):
 93 |             raise ValueError(error_msg)
 94 | 
 95 | 
 96 | def _load_str_from_file_id(file_id: int) -> str:
 97 |     _check_gdrive_config()
 98 |     gauth = GoogleAuth(settings_file=SETTINGS_PATH)
 99 |     drive = GoogleDrive(gauth)
100 |     # Load file using id
101 |     try:
102 |         file_obj = drive.CreateFile({"id": file_id})
103 |         file_obj.FetchMetadata()
104 |     except ApiRequestError:
105 |         raise ValueError(
106 |             f"File ID {file_id} not valid. Please use a valid File ID. You can find it in the shareable file link."
107 |         )
108 |     # Get raw file content as str
109 |     txt = file_obj.GetContentString()
110 |     return txt
111 | 


--------------------------------------------------------------------------------
/whatstk/utils/utils.py:
--------------------------------------------------------------------------------
 1 | """Utils."""
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | import pandas as pd
 5 | 
 6 | if TYPE_CHECKING:  # pragma: no cover
 7 |     from whatstk._chat import BaseChat  # pragma: no cover
 8 | 
 9 | 
10 | class ColnamesDf:
11 |     """Access class constants using variable ``whatstk.utils.utils.COLNAMES_DF``.
12 | 
13 |     Example:
14 |             Access constant ``COLNAMES_DF.DATE``:
15 | 
16 |             ..  code-block:: python
17 | 
18 |                 >>> from whatstk.utils.utils import COLNAMES_DF
19 |                 >>> COLNAMES_DF.DATE
20 |                 'date'
21 | 
22 |     """
23 | 
24 |     DATE = "date"
25 |     """Date column"""
26 | 
27 |     USERNAME = "username"
28 |     """Username column"""
29 | 
30 |     MESSAGE = "message"
31 |     """Message column"""
32 | 
33 |     MESSAGE_LENGTH = "message_length"
34 |     """Message length column"""
35 | 
36 |     MESSAGE_TYPE = "message_type"
37 |     """Message type column"""
38 | 
39 | 
40 | COLNAMES_DF = ColnamesDf()
41 | 
42 | 
43 | def _get_df(df: pd.DataFrame, chat: "BaseChat") -> pd.DataFrame:
44 |     if (df is None) & (chat is None):
45 |         raise ValueError("Please provide a chat, using either argument `df` or `chat`.")
46 |     if (df is None) and (chat is not None):
47 |         df = chat.df
48 |     return df
49 | 
50 | 
51 | def _map_hformat_filename(filename: str) -> str:
52 |     """Map hformat to valid filename (Linux, MacOS, Win).
53 | 
54 |     Args:
55 |         filename (str): Header format.
56 | 
57 |     Returns:
58 |         str: Mapped header format.
59 |     """
60 |     filename = filename.replace(" ", "_").replace("/", "--").replace(":", ";")
61 |     return filename
62 | 


--------------------------------------------------------------------------------
/whatstk/whatsapp/__init__.py:
--------------------------------------------------------------------------------
1 | """WhatsApp parser."""
2 | 


--------------------------------------------------------------------------------
/whatstk/whatsapp/assets/__init__.py:
--------------------------------------------------------------------------------
1 | """Static assets."""
2 | 


--------------------------------------------------------------------------------
/whatstk/whatsapp/auto_header.py:
--------------------------------------------------------------------------------
  1 | """Detect header from chat."""
  2 | 
  3 | 
  4 | import logging
  5 | import re
  6 | from typing import List, Tuple, Optional
  7 | import pandas as pd
  8 | 
  9 | from whatstk.utils.exceptions import RegexError
 10 | 
 11 | 
 12 | separators = {".", ",", "-", "/", ":", "[", "]"}
 13 | 
 14 | 
 15 | def extract_header_from_text(text: str, encoding: str = "utf-8") -> Optional[str]:
 16 |     """Extract header from text.
 17 | 
 18 |     Args:
 19 |         text (str): Loaded chat as string (whole text).
 20 |         encoding (str): Encoding to use for UTF when reading/writing (ex. ‘utf-8’).
 21 |                              `List of Python standard encodings
 22 |                              <https://docs.python.org/3/library/codecs.html#standard-encodings>`_.
 23 | 
 24 |     Returns:
 25 |         str: Format extracted. None if no header was extracted.
 26 | 
 27 |     Example:
 28 |             Load a chat using two text files. In this example, we use sample chats (available online, see urls in
 29 |             source code :mod:`whatstk.data <whatstk.data>`).
 30 | 
 31 |             ..  code-block:: python
 32 | 
 33 |                 >>> from whatstk.whatsapp.parser import extract_header_from_text
 34 |                 >>> from urllib.request import urlopen
 35 |                 >>> from whatstk.data import whatsapp_urls
 36 |                 >>> filepath_1 = whatsapp_urls.POKEMON
 37 |                 >>> with urlopen(filepath_1) as f:
 38 |                 ...     text = f.read().decode('utf-8')
 39 |                 >>> extract_header_from_text(text)
 40 |                 '%d.%m.%y, %H:%M - %name:
 41 |     """
 42 |     # Split lines
 43 |     lines = text.split("\n")
 44 | 
 45 |     # Get format auto
 46 |     try:
 47 |         hformat = _extract_header_format_from_lines(lines)
 48 |         logging.info("Format found was %s", hformat)
 49 |         return hformat
 50 |     except Exception as err:  # noqa
 51 |         logging.info("Format not found.")
 52 |     return None
 53 | 
 54 | 
 55 | def _extract_header_format_from_lines(lines: List[str]) -> str:
 56 |     """Extract header from list of lines.
 57 | 
 58 |     Args:
 59 |         lines (list): List of str, each element is a line of the loaded chat.
 60 | 
 61 |     Returns:
 62 |         str: Format of the header.
 63 | 
 64 |     """
 65 |     # Obtain header format from list of lines
 66 |     elements_list, template_list = _extract_elements_template_from_lines(lines)
 67 |     return _extract_header_format_from_components(elements_list, template_list)
 68 | 
 69 | 
 70 | def _extract_elements_template_from_lines(lines: List[str]) -> Tuple[List[List[int]], List[str]]:
 71 |     """Get elements_list and template_list from lines.
 72 | 
 73 |     Args:
 74 |         lines (list): List with messages.
 75 | 
 76 |     Returns:
 77 |         tuple: elements_list (list), template_list (list)
 78 | 
 79 |     """
 80 |     # Obtain header format from list of lines
 81 |     elements_list = []
 82 |     template_list = []
 83 |     for line in lines:
 84 |         header = _extract_possible_header_from_line(line)
 85 |         if header:
 86 |             try:
 87 |                 elements, template = _extract_header_parts(header)
 88 |             except RegexError:
 89 |                 continue
 90 |             elements_list.append(elements)
 91 |             template_list.append(template)
 92 |     return elements_list, template_list
 93 | 
 94 | 
 95 | def _extract_possible_header_from_line(line: str) -> str:
 96 |     """Given a `line` extract possible header. Uses ':' as separator.
 97 | 
 98 |     Args:
 99 |         line (str): Line containing header and message body.
100 | 
101 |     Returns:
102 |         str: Possible header.
103 | 
104 |     """
105 |     # Extract possible header from line
106 |     line_split = line.split(": ")
107 |     if len(line_split) >= 2:
108 |         # possible header
109 |         header = line_split[0]
110 |         if not header.isprintable():
111 |             print("""
112 |                   There is some unprintable character in the header.
113 |                   Please report this in https://github.com/lucasrodes/whatstk.
114 |             """)
115 |         if header[-1] != ":":
116 |             header += ":"
117 |         return header
118 |     return None
119 | 
120 | 
121 | def _extract_header_parts(header: str) -> Tuple[List[int], str]:
122 |     """Extract all parts from header (i.e. date elements and name).
123 | 
124 |     Args:
125 |         header (str): Header.
126 | 
127 |     Returns:
128 |         tuple: Contains two elements, (i) list with components and (ii) string template which specifies the formatting
129 |                 of the components.
130 | 
131 |     """
132 | 
133 |     def _get_last_idx_digit(v: str, i: int) -> int:
134 |         if i + 1 < len(v):
135 |             if v[i + 1].isdigit():
136 |                 return _get_last_idx_digit(v, i + 1)
137 |         return i
138 | 
139 |     # def get_last_idx_alpha(v, i):
140 |     #     if i+1 < len(v):
141 |     #         if v[i+1].isalpha():
142 |     #             return get_last_idx_alpha(v, i+1)
143 |     #         elif i+2 < len(v):
144 |     #             if v[i+1].isspace() and v[i+2].isalpha():
145 |     #                 return get_last_idx_alpha(v, i+2)
146 |     #     return i
147 | 
148 |     hformat_elements = []
149 |     hformat_template = ""
150 |     i = 0
151 |     while i < len(header):
152 |         if header[i].isdigit():
153 |             j = _get_last_idx_digit(header, i)
154 |             hformat_elements.append(int(header[i: j + 1]))
155 |             hformat_template += "{}"
156 |             i = j
157 |         else:
158 |             if header[i] in ["[", "]"]:
159 |                 hformat_template += "\\" + header[i]
160 |             else:
161 |                 hformat_template += header[i]
162 |         i += 1
163 |     items = re.findall(r"[-|\]]\s[^:]*:", hformat_template)
164 |     if len(items) != 1:
165 |         raise RegexError(
166 |             "Username match was not possible. Check that header (%s) is of format '... - %name:' or '[...] %name:'",
167 |             hformat_template,
168 |         )
169 |     hformat_template = hformat_template.replace(items[0][2:-1], "%name")
170 |     code = " %p"
171 |     hformat_template = (
172 |         hformat_template.replace(" PM", code)
173 |         .replace(" AM", code)
174 |         .replace(" A.M.", code)
175 |         .replace(" P.M.", code)
176 |         .replace(" am", code)
177 |         .replace(" pm", code)
178 |         .replace(" a.m.", code)
179 |         .replace(" p.m.", code)
180 |     )
181 |     return hformat_elements, hformat_template
182 | 
183 | 
184 | def _extract_header_format_from_components(elements_list: List[List[int]], template_list: List[int]) -> str:
185 |     """Extract header format from list containing elements and list containing templates.
186 | 
187 |     Args:
188 |         elements_list (list): List with component list.
189 |         template_list (list): List with template strings.
190 | 
191 |     Returns:
192 |         str: Header format.
193 | 
194 |     """
195 |     # Remove outliers
196 |     elements_list_ = []
197 |     template_list_ = []
198 |     lengths = [len(e) for e in elements_list]
199 |     types = ["".join([str(type(ee).__name__) for ee in e]) for e in elements_list]
200 |     len_mode = max(set(lengths), key=lengths.count)
201 |     type_mode = max(set(types), key=types.count)
202 |     for e, t in zip(elements_list, template_list):
203 |         if (len(e) == len_mode) and ("".join([str(type(ee).__name__) for ee in e]) == type_mode):
204 |             elements_list_.append(e)
205 |             template_list_.append(t)
206 |     # Get positions
207 |     df = pd.DataFrame(elements_list_)
208 |     # dates_df = df.select_dtypes(int)
209 |     dates_df = df.select_dtypes("number")
210 |     template = template_list[0]
211 | 
212 |     if "%p" in template:
213 |         hour_code = "%I"
214 |     else:
215 |         hour_code = "%H"
216 | 
217 |     # day
218 |     day_pos = ((dates_df.max() > 27) & (dates_df.max() < 32)).idxmax()
219 |     dates_df = dates_df.drop(columns=[day_pos])
220 |     # year
221 |     # year_pos = dates_df.std().idxmin()
222 |     pos = [0, 1, 2]
223 |     pos.remove(day_pos)
224 |     year_pos = dates_df[pos].max().idxmax()  # Only consider positions 0,1,2
225 |     dates_df = dates_df.drop(columns=[year_pos])
226 |     # Month
227 |     month_pos = dates_df.columns.min()
228 |     dates_df = dates_df.drop(columns=[month_pos])
229 |     # Hour
230 |     hour_pos = 3
231 |     dates_df = dates_df.drop(columns=[hour_pos])
232 |     # Minute
233 |     minutes_pos = 4
234 |     dates_df = dates_df.drop(columns=[minutes_pos])
235 |     # Dictionary with positions and date element code
236 |     dates_pos = {day_pos: "%d", year_pos: "%y", month_pos: "%m", hour_pos: hour_code, minutes_pos: "%M"}
237 |     # Seconds
238 |     if dates_df.shape[1] > 0:
239 |         seconds_pos = 5
240 |         dates_pos[seconds_pos] = "%S"
241 | 
242 |     keys_ordered = sorted(dates_pos.keys())
243 |     dates_codes = [dates_pos[k] for k in keys_ordered]
244 | 
245 |     codes = dates_codes + ["%name"]
246 |     # print(codes)
247 |     # print(template)
248 |     # print(template)
249 |     # print(codes)
250 |     code_template = template.format(*codes)
251 |     # print(code_template)
252 |     # print('---------------')
253 |     # print(code_template)
254 |     return code_template
255 | 


--------------------------------------------------------------------------------
/whatstk/whatsapp/generation.py:
--------------------------------------------------------------------------------
  1 | """Automatic generation of chat using Lorem Ipsum text and time series statistics."""
  2 | 
  3 | 
  4 | import os
  5 | from datetime import datetime, timedelta
  6 | import itertools
  7 | from typing import Optional, List
  8 | 
  9 | import numpy as np
 10 | import pandas as pd
 11 | from emoji.unicode_codes import EMOJI_DATA
 12 | from scipy.stats import lomax
 13 | 
 14 | from whatstk.whatsapp.objects import WhatsAppChat
 15 | from whatstk.whatsapp.hformat import get_supported_hformats_as_list
 16 | from whatstk.utils.utils import COLNAMES_DF, _map_hformat_filename
 17 | 
 18 | 
 19 | try:
 20 |     from lorem import sentence
 21 | except ImportError as e:
 22 |     msg = (
 23 |         "whatstk ChatGenerator requirements are not installed.\n\n"
 24 |         "Please pip install as follows:\n\n"
 25 |         '  python -m pip install "whatstk[generate]" --upgrade  # or python -m pip install'
 26 |     )
 27 |     raise ImportError(msg) from e
 28 | 
 29 | 
 30 | USERS = ["John", "Mary", "Giuseppe", "+1 123 456 789"]
 31 | 
 32 | 
 33 | class ChatGenerator:
 34 |     """Generate a chat.
 35 | 
 36 |     Args:
 37 |         size (int): Number of messages to generate.
 38 |         users (list, optional): List with names of the users. Defaults to module variable USERS.
 39 |         seed (int, optional): Seed for random processes. Defaults to 100.
 40 | 
 41 |     Examples:
 42 |         This simple example loads a chat using :func:`WhatsAppChat <whatstk.whatsapp.objects.WhatsAppChat>`. Once
 43 |         loaded, we can access its attribute ``df``, which contains the loaded chat as a DataFrame.
 44 | 
 45 |         ..  code-block:: python
 46 | 
 47 |             >>> from whatstk.whatsapp.generation import ChatGenerator
 48 |             >>> from datetime import datetime
 49 |             >>> from whatstk.data import whatsapp_urls
 50 |             >>> chat = ChatGenerator(size=10).generate(last_timestamp=datetime(2020, 1, 1, 0, 0))
 51 |             >>> chat.df.head(5)
 52 |                                     date  username                                            message
 53 |             0 2019-12-31 09:43:04.000525  Giuseppe                               Nisi ad esse cillum.
 54 |             1 2019-12-31 10:19:21.980039  Giuseppe      Tempor dolore sint in eu lorem veniam veniam.
 55 |             2 2019-12-31 13:56:45.575426  Giuseppe  Do quis fugiat sint ut ut, do anim eu est qui ...
 56 |             3 2019-12-31 15:47:29.995420  Giuseppe  Do qui qui elit ea in sed culpa, aliqua magna ...
 57 |             4 2019-12-31 16:23:00.348542      Mary  Sunt excepteur mollit voluptate dolor sint occ...
 58 | 
 59 |     """
 60 | 
 61 |     def __init__(self, size: int, users: Optional[List[str]] = None, seed: int = 100) -> None:
 62 |         """Instantiate ChatGenerator class.
 63 | 
 64 |         Args:
 65 |             size (int): Number of messages to generate.
 66 |             users (list, optional): List with names of the users. Defaults to module variable USERS.
 67 |             seed (int, optional): Seed for random processes. Defaults to 100.
 68 | 
 69 |         """
 70 |         self.size = size
 71 |         self.users = USERS if not users else users
 72 |         self.seed = seed
 73 |         np.random.seed(seed=self.seed)
 74 | 
 75 |     def _generate_messages(self) -> List[str]:
 76 |         """Generate list of messages.
 77 | 
 78 |         To generate sentences, Lorem Ipsum is used.
 79 | 
 80 |         Returns:
 81 |             list: List with messages (as strings).
 82 | 
 83 |         """
 84 |         emojis = self._generate_emojis()
 85 |         s = sentence(count=self.size, comma=(0, 2), word_range=(4, 8))
 86 |         sentences = list(itertools.islice(s, self.size))
 87 |         messages = [sentences[i] + " " + emojis[i] for i in range(self.size)]
 88 |         return messages
 89 | 
 90 |     def _generate_emojis(self, k: int = 1) -> str:
 91 |         """Generate random list of emojis.
 92 | 
 93 |         Emojis are sampled from a list of `n` emojis and `k*n` empty strings.
 94 | 
 95 |         Args:
 96 |             k (int, optional): Defaults to 20.
 97 | 
 98 |         Returns:
 99 |             list: List with emojis
100 | 
101 |         """
102 |         emojis = list(EMOJI_DATA.keys())
103 |         n = len(emojis)
104 |         emojis = emojis + [""] * k * n
105 |         return np.random.choice(emojis, self.size)
106 | 
107 |     def _generate_timestamps(self, last: Optional[datetime] = None) -> List[datetime]:
108 |         """Generate list of timestamps.
109 | 
110 |         Args:
111 |             last (datetime, optional): Datetime of last message. If ``None``, defaults to current date.
112 | 
113 |         Returns:
114 |             list: List with timestamps.
115 | 
116 |         """
117 |         if not last:
118 |             last = datetime.now()
119 |             last = last.replace(microsecond=0)
120 |         c = 1.0065
121 |         scale = 40.06
122 |         loc = 30
123 |         ts_ = [0] + lomax.rvs(c=c, loc=loc, scale=scale, size=self.size - 1, random_state=self.seed).cumsum().tolist()
124 |         ts = [last - timedelta(seconds=t * 60) for t in ts_]
125 |         return ts[::-1]
126 | 
127 |     def _generate_users(self) -> str:
128 |         """Generate list of users.
129 | 
130 |         Returns:
131 |             list: List of name of the users sending the messages.
132 | 
133 |         """
134 |         return np.random.choice(self.users, self.size)
135 | 
136 |     def _generate_df(self, last_timestamp: Optional[datetime] = None) -> pd.DataFrame:
137 |         """Generate random chat as DataFrame.
138 | 
139 |         Args:
140 |             last_timestamp (datetime, optional): Datetime of last message. If ``None``, defaults to current date.
141 | 
142 |         Returns:
143 |             pandas.DataFrame: DataFrame with random messages.
144 | 
145 |         """
146 |         messages = self._generate_messages()
147 |         timestamps = self._generate_timestamps(last=last_timestamp)
148 |         users = self._generate_users()
149 |         df = pd.DataFrame.from_dict(
150 |             {COLNAMES_DF.DATE: timestamps, COLNAMES_DF.USERNAME: users, COLNAMES_DF.MESSAGE: messages}
151 |         )
152 |         return df
153 | 
154 |     def generate(
155 |         self, filepath: Optional[str] = None, hformat: Optional[str] = None, last_timestamp: Optional[datetime] = None
156 |     ) -> str:
157 |         """Generate random chat as :func:`WhatsAppChat <whatstk.whatsapp.objects.WhatsAppChat>`.
158 | 
159 |         Args:
160 |             filepath (str): If given, generated chat is saved with name ``filepath`` (must be a local path).
161 |             hformat (str, optional): :ref:`Format of the header <The header format>`, e.g.
162 |                                     ``'[%y-%m-%d %H:%M:%S] - %name:'``.
163 |             last_timestamp (datetime, optional): Datetime of last message. If `None`, defaults to current date.
164 | 
165 |         Returns:
166 |             WhatsAppChat: Chat with random messages.
167 | 
168 |         ..  seealso::
169 | 
170 |             * :func:`WhatsAppChat.to_txt <whatstk.whatsapp.objects.WhatsAppChat.to_txt>`
171 | 
172 |         """
173 |         df = self._generate_df(last_timestamp=last_timestamp)
174 |         chat = WhatsAppChat(df)
175 |         if filepath:
176 |             chat.to_txt(filepath=filepath, hformat=hformat)
177 |         return chat
178 | 
179 | 
180 | def generate_chats_hformats(
181 |     output_path: str,
182 |     size: int = 2000,
183 |     hformats: Optional[str] = None,
184 |     filepaths: Optional[str] = None,
185 |     last_timestamp: Optional[datetime] = None,
186 |     seed: int = 100,
187 |     verbose: bool = False,
188 |     export_as_zip: bool = False,
189 | ) -> None:
190 |     r"""Generate a chat and export using given header format.
191 | 
192 |     If no hformat specified, chat is generated & exported using all supported header formats.
193 | 
194 |     Args:
195 |         output_path (str): Path to directory to export all generated chats as txt.
196 |         size (int, optional): Number of messages of the chat. Defaults to 2000.
197 |         hformats (list, optional): List of header formats to use when exporting chat. If None,
198 |                                     defaults to all supported header formats.
199 |         filepaths (list, optional): List with filepaths (only txt files). If None, defaults to
200 |                                     `whatstk.utils.utils._map_hformat_filename(filepath)`.
201 |         last_timestamp (datetime, optional): Datetime of last message. If `None`, defaults to current date.
202 |         seed (int, optional): Seed for random processes. Defaults to 100.
203 |         verbose (bool): Set to True to print runtime messages.
204 |         export_as_zip (bool): Set to True to export the chat(s) zipped, additionally.
205 | 
206 |     ..  seealso::
207 | 
208 |             * :func:`ChatGenerator <ChatGenerator>`
209 |             * :func:`ChatGenerator.generate <ChatGenerator.generate>`
210 | 
211 |     """
212 |     if not hformats:
213 |         hformats = get_supported_hformats_as_list()
214 | 
215 |     # Sanity check
216 |     if filepaths:
217 |         if len(filepaths) != len(hformats):
218 |             raise ValueError("Length of filepaths must be equal to length of hformats.")
219 | 
220 |     # Generate chat
221 |     chat = ChatGenerator(size=size, seed=seed).generate(last_timestamp=last_timestamp)
222 |     for i in range(len(hformats)):
223 |         hformat = hformats[i]
224 |         print("Exporting format: {}".format(hformat)) if verbose else 0
225 |         if filepaths:
226 |             filepath = filepaths[i]
227 |         else:
228 |             filepath = _map_hformat_filename(hformat)
229 |             filepath = "{}.txt".format(filepath)
230 |         filepath = os.path.join(output_path, filepath)
231 |         chat.to_txt(filepath=filepath, hformat=hformat)
232 |         if export_as_zip:
233 |             chat.to_zip(filepath.replace(".txt", ".zip"), hformat)
234 | 


--------------------------------------------------------------------------------
/whatstk/whatsapp/hformat.py:
--------------------------------------------------------------------------------
  1 | """Header format utils.
  2 | 
  3 | Example: Check if header is available.
  4 | 
  5 |     ..  code-block:: python
  6 | 
  7 |         >>> from whatstk.utils.hformat import is_supported
  8 |         >>> is_supported('%y-%m-%d, %H:%M:%S - %name:')
  9 |         (True, True)
 10 | 
 11 | """
 12 | 
 13 | 
 14 | import os
 15 | import json
 16 | from typing import Tuple, List, Dict
 17 | 
 18 | 
 19 | this_directory = os.path.abspath(os.path.dirname(__file__))
 20 | assets_folder = "assets"
 21 | hformat_support_filename = "header_format_support.json"
 22 | hformat_support_filepath = os.path.join(this_directory, assets_folder, hformat_support_filename)
 23 | 
 24 | 
 25 | def is_supported(hformat: str, encoding: str = "utf8") -> Tuple[bool, bool]:
 26 |     """Check if header `hformat` is currently supported.
 27 | 
 28 |     Args:
 29 |         hformat (str): Header format.
 30 |         encoding (str, optional): Encoding to use for UTF when reading/writing (ex. ‘utf-8’).
 31 |                              `List of Python standard encodings
 32 |                              <https://docs.python.org/3/library/codecs.html#standard-encodings>`_.
 33 | 
 34 |     Returns:
 35 |         tuple:
 36 |                 * bool: True if header is supported.
 37 |                 * bool: True if header is supported with `auto_header` feature.
 38 | 
 39 |     """
 40 |     with open(hformat_support_filepath, "r", encoding=encoding) as f:
 41 |         h = json.load(f)
 42 | 
 43 |     if "%P" in hformat or "%p" in hformat:
 44 |         hformat = hformat.replace("%P", "%p").replace("%H", "%I")
 45 |     hformat = hformat.replace("%Y", "%y")
 46 |     auto_header_support = 0
 47 |     support = 0
 48 |     for hh in h:
 49 |         if hformat == hh["format"]:
 50 |             support = 1
 51 |             auto_header_support = hh["auto_header"]
 52 | 
 53 |     return bool(support), bool(auto_header_support)
 54 | 
 55 | 
 56 | def is_supported_verbose(hformat: str) -> str:
 57 |     """Check if header `hformat` is currently supported (both manually and using `auto_header`).
 58 | 
 59 |     Result is shown as a string.
 60 | 
 61 |     Args:
 62 |         hformat (str): Information message.
 63 | 
 64 | 
 65 |     Example:
 66 |             Check if format ``'%y-%m-%d, %H:%M - %name:'`` is supported.
 67 | 
 68 |             .. code-block:: python
 69 | 
 70 |                 >>> from whatstk.whatsapp.hformat import is_supported_verbose
 71 |                 >>> is_supported_verbose('%y-%m-%d, %H:%M - %name:')
 72 |                 "The header '%y-%m-%d, %H:%M - %name:' is supported. `auto_header` for this header is supported."
 73 | 
 74 |     """
 75 |     support, auto_header_support = is_supported(hformat)
 76 | 
 77 |     msg = "The header '{}' is {}supported. `auto_header` for this header is {}supported.".format(
 78 |         hformat,
 79 |         "not " if not support else "",
 80 |         "not " if not auto_header_support else "",
 81 |     )
 82 |     return msg
 83 | 
 84 | 
 85 | def get_supported_hformats_as_list(encoding: str = "utf8") -> List[str]:
 86 |     """Get list of supported formats.
 87 | 
 88 |     Returns:
 89 |         list: List with supported formats (as str).
 90 |         encoding (str, optional): Encoding to use for UTF when reading/writing (ex. ‘utf-8’).
 91 |             `List of Python standard encodings <https://docs.python.org/3/library/codecs.html#standard-encodings>`_.
 92 |     """
 93 |     with open(hformat_support_filepath, "r", encoding=encoding) as f:
 94 |         h = json.load(f)
 95 |     return [hh["format"] for hh in h]
 96 | 
 97 | 
 98 | def get_supported_hformats_as_dict(encoding: str = "utf8") -> Dict[str, int]:
 99 |     """Get dictionary with supported formats and relevant info.
100 | 
101 |     Args:
102 |         encoding (str, optional): Encoding to use for UTF when reading/writing (ex. ‘utf-8’).
103 |                              `List of Python standard encodings
104 |                              <https://docs.python.org/3/library/codecs.html#standard-encodings>`_.
105 | 
106 |     Returns:
107 |         dict: Dict with two elements:
108 |                 * ``format``: Header format. All formats appearing are supported.
109 |                 * ``auto_header``: 1 if auto_header is supported), 0 otherwise.
110 | 
111 |     """
112 |     with open(hformat_support_filepath, "r", encoding=encoding) as f:
113 |         headers = json.load(f)
114 |     return headers
115 | 


--------------------------------------------------------------------------------