├── .bumpversion.cfg ├── .coveragerc ├── .github └── FUNDING.yml ├── .gitignore ├── .readthedocs.yaml ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── SECURITY.md ├── assets ├── example1.png ├── logo.png └── logo.svg ├── chats └── whatsapp │ ├── lorem-2000.txt │ ├── lorem-merge-part1.txt │ ├── lorem-merge-part2.txt │ ├── lorem.txt │ └── pokemon.txt ├── docs ├── Makefile ├── _static │ ├── css │ │ └── custom.css │ ├── favicon.png │ ├── html │ │ ├── boxplot.html │ │ ├── custom_interventions_vs_length.html │ │ ├── interventions_count_date.html │ │ ├── interventions_count_date_all.html │ │ ├── interventions_count_date_cum.html │ │ ├── interventions_count_date_length.html │ │ ├── interventions_count_date_length_cum.html │ │ ├── interventions_count_hours.html │ │ ├── interventions_count_months.html │ │ ├── interventions_count_weekday.html │ │ ├── user_message_responses_flow.html │ │ └── user_message_responses_heatmap.html │ └── images │ │ ├── WhatsAppChat.from_source.png │ │ ├── WhatsAppChat.from_sources.png │ │ ├── chat-export-android9-wp2.20.123.gif │ │ └── chat-export-ios17-wp24.5.75.gif ├── _templates │ ├── autosummary │ │ └── modules.rst │ ├── layout.html │ ├── modules.rst │ └── versioning.html ├── assets │ └── style.css ├── conf.py ├── index.rst ├── make.bat └── source │ ├── about.rst │ ├── api │ ├── cmd │ │ ├── cmd_chat_gen.rst │ │ ├── cmd_graph.rst │ │ └── cmd_to_csv.rst │ ├── index.rst │ ├── whatstk.FigureBuilder.rst │ ├── whatstk.WhatsAppChat.rst │ ├── whatstk._chat.rst │ ├── whatstk.analysis.rst │ ├── whatstk.data.rst │ ├── whatstk.graph.rst │ ├── whatstk.utils.rst │ └── whatstk.whatsapp.rst │ ├── changelog.rst │ ├── code_examples │ ├── custom.rst │ ├── index.rst │ ├── interventions_count.rst │ ├── load_chat.rst │ ├── load_chat_gdrive.rst │ ├── load_chat_hformat.rst │ ├── load_chat_multiple.rst │ ├── message_length_boxplot.rst │ └── user_interaction.rst │ ├── community.rst │ ├── contribute.rst │ ├── developer_guide │ └── index.rst │ ├── getting_started │ ├── auto_header.rst │ ├── command_line.rst │ ├── export_chat.rst │ ├── hformat.rst │ ├── index.rst │ ├── library-available-chats.rst │ └── load_chat.rst │ ├── modules.rst │ ├── whatstk.analysis.rst │ ├── whatstk.graph.figures.rst │ ├── whatstk.graph.rst │ ├── whatstk.rst │ ├── whatstk.utils.rst │ ├── whatstk.whatsapp.rst │ └── why_whatstk.rst ├── requirements-docs.txt ├── requirements-flake.txt ├── requirements-test.txt ├── requirements.txt ├── run-tests.sh ├── setup.py ├── tests ├── __init__.py ├── analysis │ ├── __init__.py │ ├── test_interventions.py │ └── test_responses.py ├── graph │ ├── __init__.py │ └── test_figures.py ├── test_chat.py ├── test_data.py ├── utils │ ├── __init__.py │ ├── test_chat_merge.py │ ├── test_gdrive.py │ └── test_utils.py └── whatsapp │ ├── __init__.py │ ├── test_auto_header.py │ ├── test_generation.py │ ├── test_hformat.py │ ├── test_objects.py │ └── test_parser.py └── whatstk ├── __init__.py ├── _chat.py ├── analysis ├── __init__.py ├── interventions.py └── responses.py ├── data.py ├── graph ├── __init__.py ├── base.py └── figures │ ├── __init__.py │ ├── boxplot.py │ ├── heatmap.py │ ├── sankey.py │ ├── scatter.py │ └── utils.py ├── scripts ├── __init__.py ├── generate_chats.py ├── graph.py └── txt_to_csv.py ├── utils ├── __init__.py ├── chat_merge.py ├── exceptions.py ├── gdrive.py └── utils.py └── whatsapp ├── __init__.py ├── assets ├── __init__.py └── header_format_support.json ├── auto_header.py ├── generation.py ├── hformat.py ├── objects.py └── parser.py /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.7.1 3 | parse = (?P\d+)\.(?P\d+)\.(?P\d+)(.(?P
[a-z]+)(?P\d+))?
 4 | serialize = 
 5 | 	{major}.{minor}.{patch}.{pre}{prenum}
 6 | 	{major}.{minor}.{patch}
 7 | 
 8 | [bumpversion:part:pre]
 9 | optional_value = stable
10 | values = 
11 | 	dev
12 | 	a
13 | 	b
14 | 	rc
15 | 	stable
16 | 
17 | [bumpversion:file:setup.py]
18 | 
19 | [bumpversion:file:README.md]
20 | 
21 | [bumpversion:file:whatstk/__init__.py]
22 | 
23 | [bumpversion:file:docs/conf.py]
24 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | omit = 
4 |         whatstk/tests/*
5 |         whatstk/scripts/*
6 | 
7 | [report]
8 | fail_under=80


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: [lucasrodes]
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | test_data/*
  3 | ### Windows template
  4 | # Windows image file caches
  5 | Thumbs.db
  6 | ehthumbs.db
  7 | 
  8 | # Folder config file
  9 | Desktop.ini
 10 | 
 11 | # Recycle Bin used on file shares
 12 | $RECYCLE.BIN/
 13 | 
 14 | # Windows Installer files
 15 | *.cab
 16 | *.msi
 17 | *.msm
 18 | *.msp
 19 | 
 20 | # Windows shortcuts
 21 | *.lnk
 22 | ### macOS template
 23 | *.DS_Store
 24 | .AppleDouble
 25 | .LSOverride
 26 | 
 27 | # Icon must end with two \r
 28 | Icon
 29 | 
 30 | 
 31 | # Thumbnails
 32 | ._*
 33 | 
 34 | # Files that might appear in the root of a volume
 35 | .DocumentRevisions-V100
 36 | .fseventsd
 37 | .Spotlight-V100
 38 | .TemporaryItems
 39 | .Trashes
 40 | .VolumeIcon.icns
 41 | .com.apple.timemachine.donotpresent
 42 | 
 43 | # Directories potentially created on remote AFP share
 44 | .AppleDB
 45 | .AppleDesktop
 46 | Network Trash Folder
 47 | Temporary Items
 48 | .apdisk
 49 | ### VirtualEnv template
 50 | # Virtualenv
 51 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
 52 | .Python
 53 | [Bb]in
 54 | [Ii]nclude
 55 | [Ll]ib
 56 | [Ll]ib64
 57 | [Ll]ocal
 58 | pyvenv.cfg
 59 | .venv
 60 | pip-selfcheck.json
 61 | ### Vim template
 62 | # swap
 63 | [._]*.s[a-w][a-z]
 64 | [._]s[a-w][a-z]
 65 | # session
 66 | Session.vim
 67 | # temporary
 68 | .netrwhist
 69 | *~
 70 | # auto-generated tag files
 71 | tags
 72 | ### Linux template
 73 | 
 74 | # temporary files which can be created if a process still has a handle open of a deleted file
 75 | .fuse_hidden*
 76 | 
 77 | # KDE directory preferences
 78 | .directory
 79 | 
 80 | # Linux trash folder which might appear on any partition or disk
 81 | .Trash-*
 82 | 
 83 | # .nfs files are created when an open file is removed but is still being accessed
 84 | .nfs*
 85 | ### Python template
 86 | # Byte-compiled / optimized / DLL files
 87 | __pycache__/
 88 | *.py[cod]
 89 | *$py.class
 90 | 
 91 | # C extensions
 92 | *.so
 93 | 
 94 | # Distribution / packaging
 95 | env/
 96 | build/
 97 | develop-eggs/
 98 | dist/
 99 | downloads/
100 | eggs/
101 | .eggs/
102 | lib/
103 | lib64/
104 | parts/
105 | sdist/
106 | var/
107 | *.egg-info/
108 | .installed.cfg
109 | *.egg
110 | 
111 | # PyInstaller
112 | #  Usually these files are written by a python script from a template
113 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
114 | *.manifest
115 | *.spec
116 | 
117 | # Installer logs
118 | pip-log.txt
119 | pip-delete-this-directory.txt
120 | 
121 | # Unit test / coverage reports
122 | htmlcov/
123 | .tox/
124 | .coverage
125 | .coverage.*
126 | .cache
127 | nosetests.xml
128 | coverage.xml
129 | cov.xml
130 | *,cover
131 | .hypothesis/
132 | .vscode
133 | 
134 | # Translations
135 | *.mo
136 | *.pot
137 | 
138 | # Django stuff:
139 | *.log
140 | local_settings.py
141 | 
142 | # Flask stuff:
143 | instance/
144 | .webassets-cache
145 | 
146 | # Scrapy stuff:
147 | .scrapy
148 | 
149 | # Sphinx documentation
150 | #docs/_build/
151 | docs/_build/doctrees
152 | 
153 | # PyBuilder
154 | target/
155 | 
156 | # Jupyter Notebook
157 | .ipynb_checkpoints
158 | 
159 | # pyenv
160 | .python-version
161 | 
162 | # celery beat schedule file
163 | celerybeat-schedule
164 | 
165 | # dotenv
166 | .env
167 | 
168 | # virtualenv
169 | .venv/
170 | venv/
171 | ENV/
172 | 
173 | # Spyder project settings
174 | .spyderproject
175 | 
176 | # Rope project settings
177 | .ropeproject
178 | ### SublimeText template
179 | # cache files for sublime text
180 | *.tmlanguage.cache
181 | *.tmPreferences.cache
182 | *.stTheme.cache
183 | 
184 | # workspace files are user-specific
185 | *.sublime-workspace
186 | 
187 | # project files should be checked into the repository, unless a significant
188 | # proportion of contributors will probably not be using SublimeText
189 | # *.sublime-project
190 | 
191 | # sftp configuration file
192 | sftp-config.json
193 | 
194 | # Package control specific files
195 | Package Control.last-run
196 | Package Control.ca-list
197 | Package Control.ca-bundle
198 | Package Control.system-ca-bundle
199 | Package Control.cache/
200 | Package Control.ca-certs/
201 | bh_unicode_properties.cache
202 | 
203 | # Sublime-github package stores a github token in this file
204 | # https://packagecontrol.io/packages/sublime-github
205 | GitHub.sublime-settings
206 | ### JetBrains template
207 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
208 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
209 | .idea/*
210 | # User-specific stuff:
211 | .idea/workspace.xml
212 | .idea/tasks.xml
213 | 
214 | # Sensitive or high-churn files:
215 | .idea/dataSources/
216 | .idea/dataSources.ids
217 | .idea/dataSources.xml
218 | .idea/dataSources.local.xml
219 | .idea/sqlDataSources.xml
220 | .idea/dynamic.xml
221 | .idea/uiDesigner.xml
222 | 
223 | # Gradle:
224 | .idea/gradle.xml
225 | .idea/libraries
226 | 
227 | # Mongo Explorer plugin:
228 | .idea/mongoSettings.xml
229 | 
230 | ## File-based project format:
231 | *.iws
232 | 
233 | ## Plugin-specific files:
234 | 
235 | # IntelliJ
236 | /out/
237 | 
238 | # mpeltonen/sbt-idea plugin
239 | .idea_modules/
240 | 
241 | # JIRA plugin
242 | atlassian-ide-plugin.xml
243 | 
244 | # Crashlytics plugin (for Android Studio and IntelliJ)
245 | com_crashlytics_export_strings.xml
246 | crashlytics.properties
247 | crashlytics-build.properties
248 | fabric.properties
249 | 
250 | # Chats and results
251 | mychats/
252 | notebooks/Untitled.ipynb
253 | .whatstk
254 | todos.md
255 | examples
256 | 
257 | 
258 | #tox stuff
259 | tox.ini
260 | #.coveragerc
261 | setup.cfg
262 | testreport.html
263 | testreport.xml
264 | version-info.cfg
265 | 
266 | docs2
267 | version-info.cfg
268 | py37
269 | py38
270 | py39
271 | *.ipynb
272 | 
273 | .pypirc
274 | learn/
275 | assets/style.css
276 | tests/chats/*
277 | flake-report
278 | 
279 | notebooks
280 | reports
281 | 
282 | 
283 | docs/_build/
284 | package-lock.json
285 | version-changes
286 | 
287 | client_secrets.json
288 | settings.yaml
289 | credentials.json


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.10"
13 | 
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 |   configuration: docs/conf.py
17 | 
18 | # We recommend specifying your dependencies to enable reproducible builds:
19 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
20 | python:
21 |   install:
22 |     - method: pip
23 |       path: .
24 |       extra_requirements:
25 |         - full
26 |     - requirements: requirements-docs.txt
27 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: focal
 2 | language: python
 3 | python:
 4 |   - 3.9
 5 |   - "3.10"
 6 |   - "3.11"
 7 |   - "3.12"
 8 | git:
 9 |   depth: false
10 | before_install:
11 |   - pip install --upgrade pip
12 | install:
13 |   # - git fetch --tags
14 |   #
15 |   - pip install -r requirements-test.txt
16 |   - pip install -r requirements-flake.txt
17 |   - ls -l
18 |   - pip uninstall whatstk
19 |   - pip install .[full]
20 |   # - ls -l /home/travis/virtualenv/python3.7.1/lib/python3.7/site-packages/whatstk/whatsapp/assets/
21 |   - cat MANIFEST.in
22 |   - mkdir -p tests/chats/hformats tests/chats/merge
23 |   - whatstk-generate-chat --size 500 -z --output-path tests/chats/hformats/ # Generate chats for hformat checks
24 |   - whatstk-generate-chat --size 300 --last-timestamp 2019-09-01 --hformats '%Y-%m-%d, %H:%M - %name:' --output-path tests/chats/merge/ --filenames file1.txt
25 |   - whatstk-generate-chat --size 300 --last-timestamp 2020-01-01 --hformats '%Y-%m-%d, %H:%M - %name:' --output-path tests/chats/merge/ --filenames file2.txt
26 | #pip install -r requirements.txt
27 | script:
28 |   - flake8 --max-complexity=10 --docstring-convention=google --max-line-length=120 --ignore=ANN101,ANN102,ANN401 whatstk
29 |   - pytest --cov-report term --cov=whatstk tests
30 | after_success:
31 |   - codecov # submit coverage
32 | 
33 | jobs:
34 |   include:
35 |     # perform a linux build
36 |     # - services: docker
37 |     # and a windows build
38 |     - os: windows
39 |       language: shell
40 |       before_install:
41 |         - choco upgrade python -y --version 3.12.2
42 |         - export PATH="/c/Python312:/c/Python312/Scripts:$PATH"
43 |         # make sure it's on PATH as 'python3'
44 |         - ln -s /c/Python312/python.exe /c/Python312/python3.exe
45 |     - stage: deploy
46 |       python: 3.11
47 |       deploy:
48 |         - provider: pypi
49 |           user: $USER_PYPI
50 |           password: $PWD_PYPI
51 |           on:
52 |             tags: true
53 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | We are very open to have collaborators. You can freely fork and issue a pull request with your updates!
 4 | For other issues/bugs/suggestions, please report it in the [issues section](https://github.com/lucasrodes/whatstk/issues).
 5 | 
 6 | ## Pull Requests
 7 | 
 8 | Pull requests to branch `develop` are accepted. Please link your forks to specific issues (you may want to open an
 9 | issue). 
10 | 
11 | 
12 | Make sure to test your code before issuing a pull request:
13 | 
14 | 1. Install library in develop mode, 
15 | 
16 | ```bash
17 | pip install -e .
18 | ```
19 | 
20 | 2. Run test script
21 | 
22 | ```bash
23 | sh run-tests.sh
24 | ```
25 | 
26 | However, pull requests will trigger the Travis CI pipeline, which will run the tests as well.
27 | 
28 | ## Join the community
29 | 
30 | Join us on [Gitter](https://gitter.im/sociepy/whatstk).


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.md
 2 | include LICENSE
 3 | include requirements.txt
 4 | include requirements-test.txt
 5 | include requirements-flake.txt
 6 | include requirements-docs.txt
 7 | include whatstk/whatsapp/assets/header_format_support.json
 8 | include .coveragerc
 9 | 
10 | recursive-include altair *.py *.json *.ipynb *.html
11 | global-exclude *.py[co] __pycache__


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
2 | 3 |
4 |

whatstk: analyze WhatsApp chats with python 5 |

6 |

7 | 8 | Package version 9 | 10 |

11 | 12 | 13 |

14 | 15 | Build Status 16 | 17 | 18 | codecov 19 | 20 | 21 | Documentation Status 22 | 23 | 24 | Tutorial 25 | 26 | 27 | Python 3 28 | 29 | 30 | Number of downloads 31 | 32 | 33 | GitHub license 34 | 35 |

36 | 37 | --- 38 | 39 | **Try the [live demo parser](https://whatstk.streamlit.app/) to convert your chats to CSV** 40 | 41 | --- 42 | 43 | 44 | 45 | 46 | **whatstk** is a python package providing tools to parse, analyze and visualise WhatsApp chats developed under the 47 | **[sociepy](https://sociepy.org)** project. Easily convert your chats to csv or simply visualise some stats using 48 | the provided command-line tools or python. The package uses [pandas](https://github.com/pandas-dev/pandas) to process 49 | the data and [plotly](https://github.com/plotly/plotly.py) to visualise it. 50 | 51 | It is distributed under the GPL-3.0 license. 52 | 53 | ⭐ Please **star** our project if you found it interesting to **give us some dopamine** 😄! 54 | 55 | ### Content 56 | 57 | - [Installation](#installation) 58 | - [Getting Started](#getting-started) 59 | - [Documentation](https://whatstk.readthedocs.io/en/stable/) 60 | - [Contribute](#contribute) 61 | - [Covered in](#covered-in) 62 | - [Citation](#citation) 63 | 64 | ## Installation 65 | 66 | ``` 67 | pip install whatstk 68 | ``` 69 | 70 | Install develop version (not stable): 71 | 72 | ``` 73 | pip install git+https://github.com/lucasrodes/whatstk.git@develop 74 | ``` 75 | 76 | _More details [here](https://whatstk.readthedocs.io/en/stable/source/about.html#installation-compatibility)_ 77 | 78 | ## Getting Started 79 | 80 | For a rapid introduction, check this [tutorial on Medium](https://towardsdatascience.com/analyzing-whatsapp-chats-with-python-20d62ce7fe2d). 81 | 82 | #### Export your chat using your phone: 83 | 84 | _See [instructions](https://whatstk.readthedocs.io/en/stable/source/getting_started/export_chat.html)._ 85 | 86 | #### Load chat as a DataFrame 87 | 88 | ```python 89 | from whatstk import df_from_whatsapp 90 | df = df_from_whatsapp("path/to/chat.txt") 91 | ``` 92 | 93 | **NOTE:** You can now also load directly from a zip chat (iOS export). 94 | 95 | #### Convert chat to csv 96 | 97 | ```bash 98 | $ whatstk-to-csv [input_filename] [output_filename] 99 | ``` 100 | 101 | #### More examples 102 | 103 | _See more in sections [getting started](https://whatstk.readthedocs.io/en/stable/source/getting_started/index.html) and 104 | [examples](https://whatstk.readthedocs.io/en/stable/source/code_examples/index.html)._ 105 | 106 | ## Documentation 107 | 108 | _See [official documentation](https://whatstk.readthedocs.io/en/stable/)._ 109 | 110 | ## Contribute 111 | 112 | _See [contribute section](https://whatstk.readthedocs.io/en/stable/source/contribute.html)._ 113 | 114 | ## License 115 | 116 | [GPL-3.0](LICENSE) 117 | 118 | ## Citation 119 | 120 | Lucas Rodés-Guirao. "whatstk, WhatsApp analysis and parsing toolkit", https://github.com/lucasrodes/whatstk 121 | 122 | ## Covered in 123 | 124 | - [Your Whatsapp Chat History in Cool Graphs](https://deepnote.com/@batmanscode/Your-Whatsapp-Chat-History-in-Cool-Graphs-mQoSsYjUSw29D4nZDs_KwA), by [@batmanscode](https://github.com/batmanscode). 125 | - [WhatsAppening to the news](https://whatsappening.joltetn.eu/), by [@enric1994](https://github.com/enric1994) 126 | - [whatsappening source code](https://github.com/enric1994/whatsappening), by [@enric1994](https://github.com/enric1994) 127 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | We release patches for security vulnerabilities for some project version. Check below which are these version:: 6 | 7 | | Version | Supported | 8 | | ------- | ------------------ | 9 | | 0.2.x | :white_check_mark: | 10 | | 0.1.x | :x: | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | Please report (suspected) security vulnerabilities to [issues section](https://github.com/lucasrodes/whatstk/issues). We will analyze it and if the issue is confirmed, we will release a patch as soon as possible. 15 | 16 | 17 | -------------------------------------------------------------------------------- /assets/example1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/assets/example1.png -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/assets/logo.png -------------------------------------------------------------------------------- /assets/logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /chats/whatsapp/pokemon.txt: -------------------------------------------------------------------------------- 1 | 15.04.2016, 15:04 - Pokemon Chat: Messages and calls are end-to-end encrypted. No one outside of this chat, not even WhatsApp, can read or listen to them. 2 | 06.08.2016, 13:18 - Messages you send to this group are now secured with end-to-end encryption. Tap for more info. 3 | 06.08.2016, 13:23 - Ash Ketchum: Hey guys! 4 | 06.08.2016, 13:25 - Brock: Hey Ash, good to have a common group! 5 | 06.08.2016, 13:30 - Misty: Hey guys! Long time since heard anything from you 6 | 06.08.2016, 13:45 - Ash Ketchum: Indeed. I think having a WhatsApp group nowadays is a good idea 7 | 06.08.2016, 14:30 - Misty: Definitely 8 | 06.08.2016, 17:25 - Brock: I totally agree 9 | 07.08.2016, 11:45 - Prof. Oak: Kids, shall I design a smart Pokeball? 10 | 07.08.2016, 18:45 - Ash Ketchum: I don't mind Prof. I quit capturing Pokemon. 11 | 07.08.2016, 19:30 - Misty: Was a great time, but had enough also. 12 | 07.08.2016, 23:25 - Brock: Guys, I am still in the first gym. No one is playing Pokemon, they went crazy with pokemon Go. 13 | 10.08.2016, 09:45 - Jessie & James: Hey, thanks for adding us. Wanna meet soon? Just for the old times. 14 | 10.08.2016, 11:25 - Raichu: I am in! 15 | 10.08.2016, 13:23 - Ash Ketchum: FFS, no way, Pikachu did you evolve? 16 | 10.08.2016, 15:23 - Raichu: Yes... Weird to have a different body! 17 | 11.08.2016, 19:30 - Misty: Gotta see that. 18 | 11.09.2016, 20:25 - Meowth: Hey people, I was on holiday in Sinnoh. Crazy region. 19 | 31.10.2016, 11:45 - Prof. Oak: Smart-pokeball is created. 20 | 31.10.2016, 12:23 - Wobbuffet: Wo-bbu-ffet 21 | 22 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | # gitchangelog > source/changelog.rst 20 | 21 | %: Makefile 22 | # gitchangelog > source/changelog.rst 23 | # auto-changelog --output source/changelog.md -u 24 | # auto-changelog --repo .. --output source/changelog.md -u 25 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 26 | -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | .wy-side-nav-search { 2 | /* background-color: #25D366; */ 3 | background-color: white; 4 | /* color: red; */ 5 | } 6 | 7 | .wy-nav-top{ 8 | background: #3B9B5E; 9 | } 10 | 11 | 12 | .wy-side-nav-search>div.version{ 13 | color: black; 14 | } 15 | 16 | .wy-side-nav-search>a { 17 | color: black; 18 | } 19 | 20 | a { 21 | color: #46ba71; 22 | } 23 | a:hover { 24 | color: #25D366; 25 | } 26 | 27 | .wy-side-nav-search input[type=text] { 28 | border-color: #25D366; 29 | border-radius: 10px; 30 | } 31 | /* a:visited{ 32 | color: #d32593; 33 | } */ 34 | 35 | 36 | .rst-content dl:not(.docutils) dt { 37 | color: #3B9B5E; 38 | border-top: solid 3px #3B9B5E; 39 | background: #D9FCE6; 40 | } 41 | 42 | .rst-content .viewcode-link { 43 | color: #2980B9 44 | } 45 | 46 | .rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal { 47 | color: #3B9B5E; 48 | } 49 | 50 | code.py-func:hover, code.py-class:hover, code.py-obj:hover, code.py-mod:hover{ 51 | background-color: #3B9B5E; 52 | color: white; 53 | } 54 | 55 | code, .rst-content tt, .rst-content code { 56 | white-space: nowrap; 57 | max-width: 100%; 58 | background: transparent; 59 | border-width: 0px; 60 | font-size: 85%; 61 | } 62 | 63 | .rst-content dl:not(.docutils) dl dt { 64 | border-left: solid 0px white; 65 | background: transparent; 66 | font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace; 67 | color: #3B9B5E; 68 | border-left: solid 3px #3B9B5E; 69 | } 70 | 71 | .wy-alert.wy-alert-info, .rst-content .note, .rst-content .wy-alert-info.attention, .rst-content .wy-alert-info.caution, .rst-content .wy-alert-info.danger, .rst-content .wy-alert-info.error, .rst-content .wy-alert-info.hint, .rst-content .wy-alert-info.important, .rst-content .wy-alert-info.tip, .rst-content .wy-alert-info.warning, .rst-content .seealso, .rst-content .wy-alert-info.admonition-todo, .rst-content .wy-alert-info.admonition { 72 | background: white; 73 | border-left: solid 3px #007bff; 74 | } 75 | 76 | .wy-alert.wy-alert-info .wy-alert-title, .rst-content .note .wy-alert-title, .rst-content .wy-alert-info.attention .wy-alert-title, .rst-content .wy-alert-info.caution .wy-alert-title, .rst-content .wy-alert-info.danger .wy-alert-title, .rst-content .wy-alert-info.error .wy-alert-title, .rst-content .wy-alert-info.hint .wy-alert-title, .rst-content .wy-alert-info.important .wy-alert-title, .rst-content .wy-alert-info.tip .wy-alert-title, .rst-content .wy-alert-info.warning .wy-alert-title, .rst-content .seealso .wy-alert-title, .rst-content .wy-alert-info.admonition-todo .wy-alert-title, .rst-content .wy-alert-info.admonition .wy-alert-title, .wy-alert.wy-alert-info .rst-content .admonition-title, .rst-content .wy-alert.wy-alert-info .admonition-title, .rst-content .note .admonition-title, .rst-content .wy-alert-info.attention .admonition-title, .rst-content .wy-alert-info.caution .admonition-title, .rst-content .wy-alert-info.danger .admonition-title, .rst-content .wy-alert-info.error .admonition-title, .rst-content .wy-alert-info.hint .admonition-title, .rst-content .wy-alert-info.important .admonition-title, .rst-content .wy-alert-info.tip .admonition-title, .rst-content .wy-alert-info.warning .admonition-title, .rst-content .seealso .admonition-title, .rst-content .wy-alert-info.admonition-todo .admonition-title, .rst-content .wy-alert-info.admonition .admonition-title { 77 | background: #E0EDFD; 78 | } 79 | 80 | .admonition .admonition-title::before { 81 | color: #007BFF; 82 | } 83 | 84 | .admonition .admonition-title { 85 | color: black; 86 | } 87 | 88 | 89 | .admonition { 90 | box-shadow: 2px 2px 7px 0px rgba(0,0,0,0.25); 91 | } 92 | 93 | .rst-content div[class^='highlight'] { 94 | border-left-width: 0px; 95 | border-bottom-width: 0px; 96 | border-top-width: 0px; 97 | border-right: 3px solid #e88a50; 98 | } 99 | 100 | .rst-content dl:not(.docutils) dt:first-child { 101 | margin-top: 0; 102 | width: 100%; 103 | } 104 | -------------------------------------------------------------------------------- /docs/_static/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/docs/_static/favicon.png -------------------------------------------------------------------------------- /docs/_static/images/WhatsAppChat.from_source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/docs/_static/images/WhatsAppChat.from_source.png -------------------------------------------------------------------------------- /docs/_static/images/WhatsAppChat.from_sources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/docs/_static/images/WhatsAppChat.from_sources.png -------------------------------------------------------------------------------- /docs/_static/images/chat-export-android9-wp2.20.123.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/docs/_static/images/chat-export-android9-wp2.20.123.gif -------------------------------------------------------------------------------- /docs/_static/images/chat-export-ios17-wp24.5.75.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/docs/_static/images/chat-export-ios17-wp24.5.75.gif -------------------------------------------------------------------------------- /docs/_templates/autosummary/modules.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. contents:: 5 | :local: 6 | 7 | .. automodule:: {{fullname}} 8 | 9 | Members 10 | ======= -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends '!layout.html' %} 2 | {% block document %} 3 | {{super()}} 4 | {% endblock %} 5 | -------------------------------------------------------------------------------- /docs/_templates/modules.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. contents:: 5 | :local: 6 | 7 | .. automodule:: {{fullname}} 8 | 9 | Members 10 | ======= -------------------------------------------------------------------------------- /docs/_templates/versioning.html: -------------------------------------------------------------------------------- 1 | {% if versions %} 2 |

{{ _('Versions') }}

3 |
    4 | {%- for item in versions %} 5 |
  • {{ item.name }}
  • 6 | {%- endfor %} 7 |
8 | {% endif %} -------------------------------------------------------------------------------- /docs/assets/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: Helvetica, Arial, sans-serif; 3 | font-size: 12px; 4 | /* do not increase min-width as some may use split screens */ 5 | min-width: 800px; 6 | color: #999; 7 | } 8 | 9 | h1 { 10 | font-size: 24px; 11 | color: black; 12 | } 13 | 14 | h2 { 15 | font-size: 16px; 16 | color: black; 17 | } 18 | 19 | p { 20 | color: black; 21 | } 22 | 23 | a { 24 | color: #999; 25 | } 26 | 27 | table { 28 | border-collapse: collapse; 29 | } 30 | 31 | /****************************** 32 | * SUMMARY INFORMATION 33 | ******************************/ 34 | 35 | #environment td { 36 | padding: 5px; 37 | border: 1px solid #E6E6E6; 38 | } 39 | 40 | #environment tr:nth-child(odd) { 41 | background-color: #f6f6f6; 42 | } 43 | 44 | /****************************** 45 | * TEST RESULT COLORS 46 | ******************************/ 47 | span.passed, .passed .col-result { 48 | color: green; 49 | } 50 | span.skipped, span.xfailed, span.rerun, .skipped .col-result, .xfailed .col-result, .rerun .col-result { 51 | color: orange; 52 | } 53 | span.error, span.failed, span.xpassed, .error .col-result, .failed .col-result, .xpassed .col-result { 54 | color: red; 55 | } 56 | 57 | 58 | /****************************** 59 | * RESULTS TABLE 60 | * 61 | * 1. Table Layout 62 | * 2. Extra 63 | * 3. Sorting items 64 | * 65 | ******************************/ 66 | 67 | /*------------------ 68 | * 1. Table Layout 69 | *------------------*/ 70 | 71 | #results-table { 72 | border: 1px solid #e6e6e6; 73 | color: #999; 74 | font-size: 12px; 75 | width: 100% 76 | } 77 | 78 | #results-table th, #results-table td { 79 | padding: 5px; 80 | border: 1px solid #E6E6E6; 81 | text-align: left 82 | } 83 | #results-table th { 84 | font-weight: bold 85 | } 86 | 87 | /*------------------ 88 | * 2. Extra 89 | *------------------*/ 90 | 91 | .log:only-child { 92 | height: inherit 93 | } 94 | .log { 95 | background-color: #e6e6e6; 96 | border: 1px solid #e6e6e6; 97 | color: black; 98 | display: block; 99 | font-family: "Courier New", Courier, monospace; 100 | height: 230px; 101 | overflow-y: scroll; 102 | padding: 5px; 103 | white-space: pre-wrap 104 | } 105 | div.image { 106 | border: 1px solid #e6e6e6; 107 | float: right; 108 | height: 240px; 109 | margin-left: 5px; 110 | overflow: hidden; 111 | width: 320px 112 | } 113 | div.image img { 114 | width: 320px 115 | } 116 | div.video { 117 | border: 1px solid #e6e6e6; 118 | float: right; 119 | height: 240px; 120 | margin-left: 5px; 121 | overflow: hidden; 122 | width: 320px 123 | } 124 | div.video video { 125 | overflow: hidden; 126 | width: 320px; 127 | height: 240px; 128 | } 129 | .collapsed { 130 | display: none; 131 | } 132 | .expander::after { 133 | content: " (show details)"; 134 | color: #BBB; 135 | font-style: italic; 136 | cursor: pointer; 137 | } 138 | .collapser::after { 139 | content: " (hide details)"; 140 | color: #BBB; 141 | font-style: italic; 142 | cursor: pointer; 143 | } 144 | 145 | /*------------------ 146 | * 3. Sorting items 147 | *------------------*/ 148 | .sortable { 149 | cursor: pointer; 150 | } 151 | 152 | .sort-icon { 153 | font-size: 0px; 154 | float: left; 155 | margin-right: 5px; 156 | margin-top: 5px; 157 | /*triangle*/ 158 | width: 0; 159 | height: 0; 160 | border-left: 8px solid transparent; 161 | border-right: 8px solid transparent; 162 | } 163 | 164 | .inactive .sort-icon { 165 | /*finish triangle*/ 166 | border-top: 8px solid #E6E6E6; 167 | } 168 | 169 | .asc.active .sort-icon { 170 | /*finish triangle*/ 171 | border-bottom: 8px solid #999; 172 | } 173 | 174 | .desc.active .sort-icon { 175 | /*finish triangle*/ 176 | border-top: 8px solid #999; 177 | } 178 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('..')) 16 | # sys.path.insert(0, os.path.abspath('_ext')) 17 | 18 | from sphinx.ext.autosummary import Autosummary 19 | from sphinx.ext.autosummary import get_documenter 20 | from docutils.parsers.rst import directives 21 | from sphinx.util.inspect import safe_getattr 22 | from datetime import datetime 23 | 24 | 25 | # -- Project information ----------------------------------------------------- 26 | 27 | project = 'whatstk' 28 | copy_right = f'{datetime.now().year}, sociepy' 29 | author = 'lucasrodes' 30 | 31 | # The full version, including alpha/beta/rc tags 32 | version = 'v0.7.1' 33 | 34 | 35 | # -- General configuration --------------------------------------------------- 36 | 37 | # Add any Sphinx extension module names here, as strings. They can be 38 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 39 | # ones. 40 | extensions = [ 41 | 'sphinx.ext.autodoc', 42 | 'sphinx.ext.napoleon', 43 | 'sphinx.ext.viewcode', 44 | 'sphinx.ext.todo', 45 | 'sphinx.ext.githubpages', 46 | 'sphinx.ext.autosummary', 47 | 'sphinx_rtd_theme', 48 | 'sphinx_copybutton', 49 | 'sphinx.ext.autosectionlabel', 50 | 'sphinx_git', 51 | 'autodocsumm', 52 | 'sphinx.ext.mathjax', 53 | 'recommonmark' 54 | # "sphinx_multiversion", 55 | # 'sphinx_gallery.gen_gallery' 56 | ] 57 | 58 | # The name of the entry point, without the ".rst" extension. 59 | # By convention this will be "index" 60 | master_doc = "index" 61 | 62 | # Add any paths that contain templates here, relative to this directory. 63 | templates_path = ['_templates'] 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | # This pattern also affects html_static_path and html_extra_path. 68 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '../../setup.py'] 69 | EXCLUDE_PATTERN = ['../setup.py'] 70 | 71 | # -- Options for HTML output ------------------------------------------------- 72 | 73 | # The theme to use for HTML and HTML Help pages. See the documentation for 74 | # a list of builtin themes. 75 | # 76 | html_theme = 'sphinx_rtd_theme' 77 | # html_theme = 'python_docs_theme' 78 | # html_theme = 'alabaster' 79 | 80 | # Add any paths that contain custom static files (such as style sheets) here, 81 | # relative to this directory. They are copied after the builtin static files, 82 | # so a file named "default.css" will overwrite the builtin "default.css". 83 | html_static_path = ['_static'] 84 | 85 | # -- Copybutton --------------------------------------------------------------- 86 | copybutton_prompt_text = ">>> " 87 | 88 | 89 | # -- autoautosummary ---------------------------------------------------------- 90 | class AutoAutoSummary(Autosummary): 91 | option_spec = { 92 | 'methods': directives.unchanged, 93 | 'attributes': directives.unchanged 94 | } 95 | 96 | required_arguments = 1 97 | 98 | @staticmethod 99 | def get_members(obj, typ, include_public=None): 100 | if not include_public: 101 | include_public = [] 102 | items = [] 103 | for name in dir(obj): 104 | try: 105 | documenter = get_documenter(safe_getattr(obj, name), obj) 106 | except AttributeError: 107 | continue 108 | if documenter.objtype == typ: 109 | items.append(name) 110 | public = [x for x in items if x in include_public or not x.startswith('_')] 111 | return public, items 112 | 113 | def run(self): 114 | clazz = str(self.arguments[0]) 115 | try: 116 | (module_name, class_name) = clazz.rsplit('.', 1) 117 | m = __import__(module_name, globals(), locals(), [class_name]) 118 | c = getattr(m, class_name) 119 | if 'methods' in self.options: 120 | _, methods = self.get_members(c, 'method', ['__init__']) 121 | 122 | self.content = ["~%s.%s" % (clazz, method) for method in methods if not method.startswith('_')] 123 | if 'attributes' in self.options: 124 | _, attribs = self.get_members(c, 'attribute') 125 | self.content = ["~%s.%s" % (clazz, attrib) for attrib in attribs if not attrib.startswith('_')] 126 | finally: 127 | return super(AutoAutoSummary, self).run() 128 | 129 | # -- Theme -------------------------------------------------------------------- 130 | def setup(app): 131 | app.add_css_file('css/custom.css') 132 | app.add_directive('autoautosummary', AutoAutoSummary) 133 | 134 | 135 | html_title = "WhatsApp Analysis Toolkit" 136 | html_logo = "../assets/logo.png" 137 | html_favicon = "_static/favicon.png" 138 | 139 | html_show_sourcelink = False 140 | html_copy_source = True 141 | 142 | github_url = 'https://github.com/lucasrodes/whatstk' 143 | 144 | html_theme_options = { 145 | 'logo_only': True, 146 | 'navigation_depth': 4, 147 | 'display_version': True, 148 | 'collapse_navigation': False, 149 | 'sticky_navigation': False, 150 | 'github_banner': True, 151 | } 152 | 153 | # -- Args --------------------------------------------------------------------- 154 | # html4_writer = True 155 | napoleon_use_rtype = False 156 | autosummary_generate = True 157 | 158 | 159 | # Autodocsum 160 | autodoc_default_options = { 161 | 'autosummary': True, 162 | } 163 | 164 | # Sphinx gallery 165 | # from plotly.io._sg_scraper import plotly_sg_scraper 166 | # image_scrapers = ('matplotlib', plotly_sg_scraper,) 167 | 168 | # sphinx_gallery_conf = { 169 | # 'examples_dirs': '_static/examples_py', # path to your example scripts 170 | # 'gallery_dirs': 'source/gallery', # path to where to save gallery generated output 171 | # 'reference_url': {'plotly': None, 172 | # }, 173 | # 'image_scrapers': image_scrapers, 174 | # } 175 | 176 | # html_sidebars = {'**': ['versioning.html']} 177 | # smv_tag_whitelist = r'^(3.0.0.dev0)' 178 | # smv_branch_whitelist = 'feature/documentation' 179 | # smv_tag_whitelist = r'^.*$' 180 | # smv_remote_whitelist = '^.*$' 181 | # smv_branch_whitelist = r'^(feature/documentation)$' 182 | # smv_released_pattern = r'^tags/.*$' 183 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: source/about.rst 2 | 3 | ---- 4 | 5 | Content: 6 | ======== 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | 11 | About whatstk 12 | Getting started 13 | Code examples 14 | API Reference 15 | Why choose whatstk? 16 | Community & Governance 17 | Contribute 18 | Changelog 19 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/about.rst: -------------------------------------------------------------------------------- 1 | About whatstk 2 | ============= 3 | 4 | **whatstk** is a python package providing tools to parse, analyze and visualize WhatsApp chats developed by 5 | `Lucas Rodés-Guirao `_. Easily convert your chats to csv or simply visualize statistics 6 | using the python library. The package uses `pandas `_ to 7 | process the data and `plotly `_ to visualise it. 8 | 9 | You can also `try a live demo `_. 10 | 11 | 12 | The project is distributed under the `GPL-3.0 license `_ 13 | and is available on `GitHub `_. 14 | 15 | ---- 16 | 17 | First contact with whatstk 18 | -------------------------- 19 | **whatstk** is built around :func:`BaseChat ` object interface, which requires class method 20 | :func:`from_source ` to be implemented. This method loads and parses the source 21 | chat file into a pandas.DataFrame. 22 | 23 | Below, we use method :func:`df_from_whatsapp ` to load `LOREM chat 24 | `_. To test it with your own 25 | chat, simply :ref:`export it as a txt file` to your computer and then use class argument ``filepath``, as 26 | shown in the following example. 27 | 28 | 29 | .. code-block:: python 30 | 31 | >>> from whatstk import df_from_whatsapp 32 | >>> from whatstk.data import whatsapp_urls 33 | >>> df = df_from_whatsapp(filepath=whatsapp_urls.LOREM) 34 | >>> df.head(5) 35 | date username message 36 | 0 2020-01-15 02:22:56 Mary Nostrud exercitation magna id. 37 | 1 2020-01-15 03:33:01 Mary Non elit irure irure pariatur exercitation. 🇩🇰 38 | 2 2020-01-15 04:18:42 +1 123 456 789 Exercitation esse lorem reprehenderit ut ex ve... 39 | 3 2020-01-15 06:05:14 Giuseppe Aliquip dolor reprehenderit voluptate dolore e... 40 | 4 2020-01-15 06:56:00 Mary Ullamco duis et commodo exercitation. 41 | 42 | ---- 43 | 44 | Installation & compatibility 45 | ---------------------------- 46 | This project is on `PyPI `_, install it with pip: 47 | 48 | .. code-block:: bash 49 | 50 | pip install whatstk 51 | 52 | Project has been tested in Python>=3.7. 53 | 54 | From source 55 | ^^^^^^^^^^^ 56 | Clone the project from the `official repository `_ and install it locally 57 | 58 | .. code-block:: bash 59 | 60 | git clone https://github.com/lucasrodes/whatstk.git 61 | cd whatstk 62 | pip install . 63 | 64 | Extensions 65 | ^^^^^^^^^^ 66 | To use :ref:`Google Drive ` or Chat Generation support, install the library along with the corresponding extensions: 67 | 68 | .. code-block:: bash 69 | 70 | pip install whatstk[gdrive] 71 | 72 | .. code-block:: bash 73 | 74 | pip install whatstk[generate] 75 | 76 | Or install the full suite: 77 | 78 | .. code-block:: bash 79 | 80 | pip install whatstk[full] 81 | 82 | 83 | Develop 84 | ^^^^^^^ 85 | You can also install the version in development directly from github 86 | `develop `_ branch. 87 | 88 | .. code-block:: bash 89 | 90 | pip install git+https://github.com/lucasrodes/whatstk.git@develop 91 | 92 | Note: It requires `git `_ to be installed. 93 | 94 | ---- 95 | 96 | Support 97 | ------- 98 | You can ask questions and join the development discussion on `GitHub `_. Use the 99 | `GitHub issues `_ section to report bugs or request features and `GitHub discussions `_ to open up broader discussions. You can also check the `project roadmap `_. 100 | 101 | For more details, refer to the :ref:`contribute section `. 102 | 103 | ---- 104 | 105 | Why this name, whatstk? 106 | ----------------------- 107 | whatstk stands for "WhatsApp Toolkit", since the project was initially conceived as a python library to read and process WhatsApp chats. It currently only supports WhatsApp chats, but this might be extended in the future. 108 | -------------------------------------------------------------------------------- /docs/source/api/cmd/cmd_chat_gen.rst: -------------------------------------------------------------------------------- 1 | ``whatstk-generate-chat`` 2 | ========================= 3 | 4 | .. warning:: 5 | 6 | To use the chat generation functionalities, install the library with the corresponding extension (ignore the 7 | ``--upgrade`` option if you haven't installed the library): 8 | 9 | .. code-block:: 10 | 11 | pip install whatstk[generate] --upgrade 12 | 13 | Generate random WhatsApp chat. 14 | 15 | .. code-block:: bash 16 | 17 | whatstk-generate-chat --help 18 | usage: Generate chat. Make sure to install the library with required extension: pip install whatstk[generate] 19 | --upgrade 20 | [-h] -o OUTPUT_PATH 21 | [--filenames FILENAMES [FILENAMES ...]] [-s SIZE] 22 | [-f HFORMATS [HFORMATS ...]] 23 | [--last-timestamp LAST_TIMESTAMP] [-v] 24 | 25 | optional arguments: 26 | -h, --help show this help message and exit 27 | -o OUTPUT_PATH, --output-path OUTPUT_PATH 28 | Path where to store generated chats. Must exist. 29 | --filenames FILENAMES [FILENAMES ...] 30 | Filenames. Must be equal length of --hformats. 31 | -s SIZE, --size SIZE Number of messages to create per chat. Defaults to 32 | 500. 33 | -f HFORMATS [HFORMATS ...], --hformats HFORMATS [HFORMATS ...] 34 | Header format. If None, defaults to all supported 35 | hformats. List formats as 'format 1' 'format 2' ... 36 | --last-timestamp LAST_TIMESTAMP 37 | Timestamp of last message. Format YYYY-mm-dd 38 | -v, --verbose Verbosity. 39 | -------------------------------------------------------------------------------- /docs/source/api/cmd/cmd_graph.rst: -------------------------------------------------------------------------------- 1 | ``whatstk-graph`` 2 | ================= 3 | 4 | Get graph from your WhatsApp txt file. 5 | 6 | .. code-block:: bash 7 | 8 | usage: whatstk-graph [-h] [-o OUTPUT_FILENAME] 9 | [-t {interventions_count,msg_length}] 10 | [-id {date,hour,weekday,month}] [-ic] [-il] [-f HFORMAT] 11 | input_filename 12 | 13 | Visualise a WhatsApp chat. For advance settings, see package 14 | librarydocumentation 15 | 16 | positional arguments: 17 | input_filename Input txt file. 18 | 19 | optional arguments: 20 | -h, --help show this help message and exit 21 | -o OUTPUT_FILENAME, --output_filename OUTPUT_FILENAME 22 | Graph generated can be stored as an HTMLfile. 23 | -t {interventions_count,msg_length}, --type {interventions_count,msg_length} 24 | Type of graph. 25 | -id {date,hour,weekday,month}, --icount-date-mode {date,hour,weekday,month} 26 | Select date mode. Only valid for 27 | --type=interventions_count. 28 | -ic, --icount-cumulative 29 | Show values in a cumulative fashion. Only valid for 30 | --type=interventions_count. 31 | -il, --icount-msg-length 32 | Count an intervention with its number of characters. 33 | Otherwise an intervention is count as one.Only valid 34 | for --type=interventions_count. 35 | -f HFORMAT, --hformat HFORMAT 36 | By default, auto-header detection isattempted. If does 37 | not work, you can specify it manually using this 38 | argument. 39 | -------------------------------------------------------------------------------- /docs/source/api/cmd/cmd_to_csv.rst: -------------------------------------------------------------------------------- 1 | ``whatstk-to-csv`` 2 | ================= 3 | 4 | Convert a WhatsApp txt file to csv. 5 | 6 | .. code-block:: bash 7 | 8 | usage: whatstk-to-csv [-h] [-f HFORMAT] input_filename output_filename 9 | 10 | Convert a Whatsapp chat from csv to txt. 11 | 12 | positional arguments: 13 | input_filename Input txt file. 14 | output_filename Name of output csv file. 15 | 16 | optional arguments: 17 | -h, --help show this help message and exit 18 | -f HFORMAT, --hformat HFORMAT 19 | By default, auto-header detection isattempted. If does 20 | not work, you can specify it manually using this 21 | argument. 22 | -------------------------------------------------------------------------------- /docs/source/api/index.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | =============== 3 | 4 | Main objects 5 | ------------ 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | WhatsAppChat 11 | FigureBuilder 12 | 13 | Core API 14 | -------- 15 | 16 | .. toctree:: 17 | :maxdepth: 4 18 | 19 | whatstk.whatsapp 20 | whatstk.analysis 21 | whatstk.graph 22 | whatstk.utils 23 | whatstk.data 24 | whatstk._chat 25 | 26 | Command line tools 27 | ------------------ 28 | 29 | .. toctree:: 30 | :maxdepth: 4 31 | 32 | whatst-to-csv 33 | whatstk-graph 34 | whatstk-generate-chat 35 | -------------------------------------------------------------------------------- /docs/source/api/whatstk.FigureBuilder.rst: -------------------------------------------------------------------------------- 1 | FigureBuilder 2 | ============= 3 | **whatstk** provides this object to ease the generation of insightfull plots from your chat. :class:`FigureBuilder 4 | ` contains several methods to generate different plots. It assigns a unique color to each user, 5 | so that a user can be easily identified in all plots. 6 | 7 | To insantiate it, you just need to provide the chat (as pandas.DataFrame or :class:`BaseChat `-API-compliant object). 8 | 9 | 10 | 11 | .. autoclass:: whatstk.FigureBuilder 12 | :members: 13 | :undoc-members: 14 | :show-inheritance: 15 | :inherited-members: 16 | -------------------------------------------------------------------------------- /docs/source/api/whatstk.WhatsAppChat.rst: -------------------------------------------------------------------------------- 1 | WhatsAppChat 2 | ============ 3 | 4 | Object :class:`WhatsAppChat ` works as a bridge between the python code and the whatsapp chat text 5 | file. Easily load a chat from a text file and work with it using all the power of 6 | `pandas `_. 7 | 8 | A chat can be loaded from a single source file using :func:`WhatsAppChat.from_source ` 9 | 10 | .. image:: ../../_static/images/WhatsAppChat.from_source.png 11 | :width: 1000 12 | :alt: Concept diagram of WhatsAppChat.from_source 13 | 14 | 15 | or multiple source files using :func:`WhatsAppChat.from_sources ` 16 | 17 | .. image:: ../../_static/images/WhatsAppChat.from_sources.png 18 | :width: 1000 19 | :alt: Concept diagram of WhatsAppChat.from_sources 20 | 21 | 22 | .. autoclass:: whatstk.WhatsAppChat 23 | :members: 24 | :undoc-members: 25 | :show-inheritance: 26 | :inherited-members: 27 | -------------------------------------------------------------------------------- /docs/source/api/whatstk._chat.rst: -------------------------------------------------------------------------------- 1 | whatstk._chat 2 | ================ 3 | 4 | 5 | .. automodule:: whatstk._chat 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/source/api/whatstk.analysis.rst: -------------------------------------------------------------------------------- 1 | whatstk.analysis 2 | ================ 3 | 4 | .. automodule:: whatstk.analysis 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api/whatstk.data.rst: -------------------------------------------------------------------------------- 1 | whatstk.data 2 | ============ 3 | 4 | .. automodule:: whatstk.data 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api/whatstk.graph.rst: -------------------------------------------------------------------------------- 1 | whatstk.graph 2 | ===================== 3 | 4 | Plot tools using plotly. 5 | 6 | 7 | Import `plot `_ (by plotly) to plot figures. 9 | 10 | .. code-block:: python 11 | 12 | >>> from whatstk.graph import plot 13 | 14 | 15 | whatstk.graph.base 16 | ------------------ 17 | 18 | .. automodule:: whatstk.graph.base 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | whatstk.graph.figures 25 | --------------------- 26 | 27 | .. automodule:: whatstk.graph.figures 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | 32 | whatstk.graph.figures.boxplot 33 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 34 | 35 | .. automodule:: whatstk.graph.figures.boxplot 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | 40 | whatstk.graph.figures.heatmap 41 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 42 | 43 | .. automodule:: whatstk.graph.figures.heatmap 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | 48 | whatstk.graph.figures.sankey 49 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 50 | 51 | .. automodule:: whatstk.graph.figures.sankey 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | 56 | whatstk.graph.figures.scatter 57 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 58 | 59 | .. automodule:: whatstk.graph.figures.scatter 60 | :members: 61 | :undoc-members: 62 | :show-inheritance: 63 | 64 | whatstk.graph.figures.utils 65 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 66 | 67 | .. automodule:: whatstk.graph.figures.utils 68 | :members: 69 | :undoc-members: 70 | :show-inheritance: 71 | 72 | 73 | -------------------------------------------------------------------------------- /docs/source/api/whatstk.utils.rst: -------------------------------------------------------------------------------- 1 | whatstk.utils 2 | ============= 3 | 4 | 5 | .. automodule:: whatstk.utils 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | 10 | ---- 11 | 12 | whatstk.utils.chat\_merge 13 | ------------------------- 14 | 15 | .. automodule:: whatstk.utils.chat_merge 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | ---- 21 | 22 | whatstk.utils.gdrive 23 | ------------------------ 24 | 25 | .. automodule:: whatstk.utils.gdrive 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | ---- 31 | 32 | whatstk.utils.exceptions 33 | ------------------------ 34 | 35 | .. automodule:: whatstk.utils.exceptions 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | 40 | ---- 41 | 42 | whatstk.utils.utils 43 | ------------------- 44 | 45 | .. automodule:: whatstk.utils.utils 46 | :members: 47 | :undoc-members: 48 | :show-inheritance: 49 | 50 | 51 | -------------------------------------------------------------------------------- /docs/source/api/whatstk.whatsapp.rst: -------------------------------------------------------------------------------- 1 | whatstk.whatsapp 2 | ================ 3 | 4 | 5 | .. automodule:: whatstk.whatsapp 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | 10 | ---- 11 | 12 | 13 | whatstk.whatsapp.objects 14 | ------------------------ 15 | 16 | .. automodule:: whatstk.whatsapp.objects 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | :inherited-members: 21 | 22 | ---- 23 | 24 | whatstk.whatsapp.parser 25 | ----------------------- 26 | 27 | .. automodule:: whatstk.whatsapp.parser 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | 32 | ---- 33 | 34 | whatstk.whatsapp.auto\_header 35 | --------------------------------- 36 | 37 | .. automodule:: whatstk.whatsapp.auto_header 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | 42 | ---- 43 | 44 | whatstk.whatsapp.generation 45 | ------------------------------ 46 | 47 | .. automodule:: whatstk.whatsapp.generation 48 | :members: 49 | :undoc-members: 50 | :show-inheritance: 51 | 52 | ---- 53 | 54 | whatstk.whatsapp.hformat 55 | ------------------------- 56 | 57 | .. automodule:: whatstk.whatsapp.hformat 58 | :members: 59 | :undoc-members: 60 | :show-inheritance: 61 | -------------------------------------------------------------------------------- /docs/source/changelog.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | 5 | Unreleased 6 | ---------- 7 | .. git_changelog:: 8 | :rev-list: v0.6.2..HEAD 9 | 10 | 11 | v0.6.x 12 | ---------- 13 | .. git_changelog:: 14 | :rev-list: v0.5.0..v0.6.2 15 | 16 | v0.5.x 17 | ---------- 18 | .. git_changelog:: 19 | :rev-list: v0.4.0..v0.5.0 20 | 21 | v0.4.x 22 | ---------- 23 | .. git_changelog:: 24 | :rev-list: v0.3.0..v0.4.0 25 | 26 | v0.3.x 27 | ---------- 28 | .. git_changelog:: 29 | :rev-list: 0.2.0..v0.3.0 30 | 31 | v0.2.x 32 | ---------- 33 | .. git_changelog:: 34 | :rev-list: 0.1.10..0.2.0 35 | -------------------------------------------------------------------------------- /docs/source/code_examples/custom.rst: -------------------------------------------------------------------------------- 1 | Custom plot 2 | =========== 3 | 4 | :class:`FigureBuilder ` provides some tools to easily visualize your chat. However, the possible 5 | visualizations are infinite. Here, we provide some examples of a custom visualization using some library tools together 6 | with pandas and plotly. 7 | 8 | 9 | Number of messages vs. Number of characters sent 10 | ------------------------------------------------ 11 | For each user, we will obtain a 2D scatter plot measuring the number of messages and characters sent in a day. That is, 12 | for a given user we will have `N` points, where `N` is the number of days that the user has sent at least one message. 13 | Each point therefore corresponds to a specific day, where the x-axis and the y-axis measure the number of messages sent 14 | and the average number of characters per message in that day, respectively. 15 | 16 | 17 | First of all, lets instatiate objects :class:`WhatsAppChat` (chat loading) and 18 | :class:`FigureBuilder ` (figure coloring). 19 | 20 | .. code-block:: python 21 | 22 | >>> from whatstk import WhatsAppChat, FigureBuilder 23 | >>> from whatstk.data import whatsapp_urls 24 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM_2000) 25 | >>> fb = FigureBuilder(chat=chat) 26 | 27 | Next, we obtain the number of messages and number of characters sent per user per day. 28 | 29 | .. code-block:: python 30 | 31 | >>> from whatstk.analysis import get_interventions_count 32 | >>> counts_interv = get_interventions_count(chat=chat, date_mode='date', msg_length=False, cumulative=False) 33 | >>> counts_len = get_interventions_count(chat=chat, date_mode='date', msg_length=True, cumulative=False) 34 | 35 | Time to process a bit the data. We obtain a DataFrame with five columns: *username*, *date*, *num_characters*, 36 | *num_interventions* and *avg_characters*. 37 | 38 | .. code-block:: python 39 | 40 | >>> import pandas as pd 41 | >>> counts_len = pd.DataFrame(counts_len.unstack(), columns=['num_characters']) 42 | >>> counts_interv = pd.DataFrame(counts_interv.unstack(), columns=['num_interventions']) 43 | >>> counts = counts_len.merge(counts_interv, left_index=True, right_index=True) 44 | >>> # Remove all zero entries and get average number of characters 45 | >>> counts = counts[~(counts['num_interventions'] == 0)].reset_index() 46 | >>> counts['avg_characters'] = counts['num_characters']/counts['num_interventions'] 47 | >>> counts.head(5) 48 | username date num_characters num_interventions avg_characters 49 | 0 +1 123 456 789 2019-04-16 40 1 40.000000 50 | 1 +1 123 456 789 2019-04-17 21 1 21.000000 51 | 2 +1 123 456 789 2019-04-21 90 2 45.000000 52 | 3 +1 123 456 789 2019-04-25 127 3 42.333333 53 | 4 +1 123 456 789 2019-04-26 33 1 33.000000 54 | 55 | [5 rows x 5 columns] 56 | 57 | So far we have obtained a dataframe ``counts``, whose rows correspond to a specific message. However, in this example we 58 | are interested in the aggregated values per day. Hence, we group this dataframe by user and date and re-calculate the 59 | number of messages sent and average number of characters sent per day. 60 | 61 | .. code-block:: python 62 | 63 | >>> agg_operations = {'avg_characters': 'mean','num_interventions': 'mean'} 64 | >>> counts = counts.groupby(['username', counts.date.dt.date]).agg(agg_operations) 65 | >>> counts = counts.rename_axis(index=['username', 'date']) 66 | >>> counts = counts.reset_index() 67 | >>> counts.head(5) 68 | username date avg_characters num_interventions 69 | 0 +1 123 456 789 2019-04-16 40.000000 1 70 | 1 +1 123 456 789 2019-04-17 21.000000 1 71 | 2 +1 123 456 789 2019-04-21 45.000000 2 72 | 3 +1 123 456 789 2019-04-25 42.333333 3 73 | 4 +1 123 456 789 2019-04-26 33.000000 1 74 | 75 | Once the dataframe is obtained, we generate a plot using `Histogram2dContour 76 | `_ by plotly. 77 | 78 | .. code-block:: python 79 | 80 | >>> from whatstk.graph import plot 81 | >>> import plotly.graph_objs as go 82 | >>> traces = [] 83 | >>> for username in fb.usernames: 84 | >>> counts_user = counts[counts['username']==username] 85 | >>> traces.append( 86 | >>> go.Histogram2dContour( 87 | >>> contours={'coloring': 'none'}, 88 | >>> x=counts_user['num_interventions'], 89 | >>> y=counts_user['avg_characters'], 90 | >>> # mode='markers', 91 | >>> # marker=dict(color=fb.user_color_mapping[username], opacity=0.2), 92 | >>> name=username, 93 | >>> showlegend=True, 94 | >>> line={'color': fb.user_color_mapping[username]}, 95 | >>> nbinsx=10, nbinsy=20 96 | >>> ) 97 | >>> ) 98 | 99 | 100 | .. code-block:: python 101 | 102 | >>> layout = { 103 | >>> 'title': 'Average number of characters sent in a day vs Interventions per day', 104 | >>> 'yaxis_title': 'avg characters', 105 | >>> 'xaxis_title': 'num interventions', 106 | >>> } 107 | >>> fig = go.Figure(data=traces, layout=layout) 108 | >>> plot(fig) 109 | 110 | .. raw:: html 111 | :file: ../../_static/html/custom_interventions_vs_length.html 112 | 113 | -------------------------------------------------------------------------------- /docs/source/code_examples/index.rst: -------------------------------------------------------------------------------- 1 | Code examples 2 | =============== 3 | 4 | Basic examples 5 | -------------- 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | Load WhatsApp chat 11 | Load WhatsApp chat from multiple sources 12 | Load WhatsApp chat from Google Drive 13 | Load WhatsApp chat with specific hformat 14 | Rename usernames 15 | 16 | Visualisations 17 | -------------- 18 | 19 | With :class:`FigureBuilder ` you can get great insights from your chat. Below we provide some 20 | examples on the visualizations that you can get with this library with the help of `plotly `_. 22 | 23 | 24 | .. toctree:: 25 | :maxdepth: 1 26 | 27 | Count of user interventions 28 | Message length boxplot 29 | User interaction 30 | Custom plot example 31 | 32 | ---- 33 | 34 | If you think that something is missing please `raise an issue `_. -------------------------------------------------------------------------------- /docs/source/code_examples/interventions_count.rst: -------------------------------------------------------------------------------- 1 | Counting user interventions 2 | =========================== 3 | 4 | Counting the user interventions can give relevant insights on which users "dominate" the conversation, even more in a 5 | group chat. To this end, object :class:`FigureBuilder ` has the method 6 | :func:`user_interventions_count_linechart `, which generates a 7 | plotly figure with the count of user interventions. 8 | 9 | First of all, we load a chat and create an instance of :class:`FigureBuilder `. 10 | 11 | .. code-block:: python 12 | 13 | >>> from whatstk import WhatsAppChat, FigureBuilder 14 | >>> from whatstk.graph import plot 15 | >>> from whatstk.data import whatsapp_urls 16 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM_2000) 17 | >>> fb = FigureBuilder(chat=chat) 18 | 19 | Count of user interventions 20 | --------------------------- 21 | 22 | Default call of the aforementioned method displays the number of interventions sent by each user per day. 23 | 24 | .. code-block:: python 25 | 26 | >>> fig = fb.user_interventions_count_linechart() 27 | >>> plot(fig) 28 | 29 | 30 | .. raw:: html 31 | :file: ../../_static/html/interventions_count_date.html 32 | 33 | 34 | As seen in previous plot, the number of messages sent per user in a day tends to oscilate quite a lot 35 | from day to day, which might difficult a good visualisation of the data. Hence, we can use ``cumulative=True`` to 36 | illustrate the cumulative count of interventions instead. 37 | 38 | .. code-block:: python 39 | 40 | >>> fig = fb.user_interventions_count_linechart(cumulative=True, title='User inteventions count (cumulative)') 41 | >>> plot(fig) 42 | 43 | 44 | .. raw:: html 45 | :file: ../../_static/html/interventions_count_date_cum.html 46 | 47 | 48 | Additionally, we can obtain the counts for all users combined using ``all_users=True``: 49 | 50 | .. code-block:: python 51 | 52 | >>> fig = fb.user_interventions_count_linechart(cumulative=True, all_users=True, title='Inteventions count (cumulative)') 53 | >>> plot(fig) 54 | 55 | 56 | .. raw:: html 57 | :file: ../../_static/html/interventions_count_date_all.html 58 | 59 | 60 | Count of characters sent per user 61 | --------------------------------- 62 | 63 | Now, instead of counting the number of interventions we might want to explore the number of characters sent. Note that a 64 | user might send tons of messages with few words, whereas another user might send few messages with tons of words. 65 | Depending on your analysis you might prefer exploring interventions or number of characters. Getting the number of 66 | characters sent per user can be done using ``msg_length=True`` when calling function 67 | :func:`user_interventions_count_linechart `. 68 | 69 | In the following we explore the cumulative number of characters sent per user. 70 | 71 | .. code-block:: python 72 | 73 | >>> fig = fb.user_interventions_count_linechart(msg_length=True, cumulative=True, title='Characters sent by user (cumulative)') 74 | >>> plot(fig) 75 | 76 | 77 | .. raw:: html 78 | :file: ../../_static/html/interventions_count_date_length_cum.html 79 | 80 | 81 | 82 | Other insights 83 | -------------- 84 | 85 | Method :func:`user_interventions_count_linechart ` has the 86 | argument ``date_mode``, which allows for several types of count-grouping methods. By default, the method obtains the 87 | counts per date (what has been used in previous examples). 88 | 89 | 90 | Using ``date_mode=hour`` illustrates the distribution of user interventions over the 24 hours in a day. In this example, 91 | for instance, Giuseppe has their interventions peak in hour ranges [01:00, 02:00] and [20:00, 21:00], with 21 92 | interventions in each. 93 | 94 | .. code-block:: python 95 | 96 | >>> fig = fb.user_interventions_count_linechart(date_mode='hour', title='User interventions count (hour)', 97 | xlabel='Hour') 98 | >>> plot(fig) 99 | 100 | .. raw:: html 101 | :file: ../../_static/html/interventions_count_hours.html 102 | 103 | Using ``date_mode=weekday`` illustrates the distribution of user interventions over the 7 days of the week. In this 104 | example, for instance, we see that Monday and Sunday are the days with the most interventions. 105 | 106 | .. code-block:: python 107 | 108 | >>> fig = fb.user_interventions_count_linechart(date_mode='weekday', title='User interventions count (weekly)', 109 | xlabel='Week day') 110 | >>> plot(fig) 111 | 112 | .. raw:: html 113 | :file: ../../_static/html/interventions_count_weekday.html 114 | 115 | 116 | Using ``date_mode=month`` illustrates the distribution of user interventions over the 12 months of the year. In this 117 | example, for instance, we observe that all users have their interventions peak in June (except for Giuseppe, which has 118 | their peak in July). Maybe summer calling? 119 | 120 | .. code-block:: python 121 | 122 | >>> fig = fb.user_interventions_count_linechart(date_mode='month', title='User interventions count (yearly)', xlabel='Month') 123 | >>> plot(fig) 124 | 125 | .. raw:: html 126 | :file: ../../_static/html/interventions_count_months.html 127 | -------------------------------------------------------------------------------- /docs/source/code_examples/load_chat.rst: -------------------------------------------------------------------------------- 1 | Load WhatsApp chat 2 | ================== 3 | 4 | Once you have :doc:`exported <../getting_started/export_chat>` a chat it is time to load it in python. 5 | 6 | In this example we load the example `LOREM chat `_, which is available online, using library class :class:`WhatsAppChat 8 | `. 9 | 10 | .. code-block:: python 11 | 12 | >>> from whatstk import WhatsAppChat 13 | >>> from whatstk.data import whatsapp_urls 14 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM) 15 | 16 | Once loaded, we can check some of the chat messages by accessing its attribute :func:`df `, 17 | which is a pandas.DataFrame with columns `date` index (timestamp of message), `username` (name of user sending the 18 | message) and `message` (message sent). 19 | 20 | .. code-block:: python 21 | 22 | >>> chat.df.head(5) 23 | date username message 24 | 0 2020-01-15 02:22:56 Mary Nostrud exercitation magna id. 25 | 1 2020-01-15 03:33:01 Mary Non elit irure irure pariatur exercitation. 🇩🇰 26 | 2 2020-01-15 04:18:42 +1 123 456 789 Exercitation esse lorem reprehenderit ut ex ve... 27 | 3 2020-01-15 06:05:14 Giuseppe Aliquip dolor reprehenderit voluptate dolore e... 28 | 4 2020-01-15 06:56:00 Mary Ullamco duis et commodo exercitation. 29 | 30 | Getting the start and end date of the chat can give us a good overview of the chat content. 31 | 32 | .. code-block:: python 33 | 34 | >>> print(f"Start date: {chat.start_date}\nEnd date: {chat.end_date}") 35 | Start date: 2020-01-15 02:22:56 36 | End date: 2020-05-11 22:32:48 37 | 38 | Also, getting a list with the chat members is simple 39 | 40 | .. code-block:: python 41 | 42 | >>> chat.users 43 | ['+1 123 456 789', 'Giuseppe', 'John', 'Mary'] 44 | 45 | ---- 46 | 47 | .. seealso:: 48 | 49 | * :ref:`Load WhatsApp chat from multiple sources ` 50 | * :ref:`Load WhatsApp chat from Google Drive ` 51 | * :ref:`Load WhatsApp chat with specific hformat ` -------------------------------------------------------------------------------- /docs/source/code_examples/load_chat_gdrive.rst: -------------------------------------------------------------------------------- 1 | Load WhatsApp chat from Google Drive 2 | ==================================== 3 | 4 | .. warning:: 5 | 6 | To load chats from google drive, install the library with the corresponding extension (ignore the 7 | ``--upgrade`` option if you haven't installed the library): 8 | 9 | .. code-block:: 10 | 11 | pip install whatstk[gdrive] --upgrade 12 | 13 | You can also load a file saved in your Google Drive. Note that in order to do so, you need first to configure the 14 | credentials to interact with Google Drive. 15 | 16 | Configure credentials 17 | --------------------- 18 | 19 | In particular, you need the client secret JSON file. This can be downloaded from th Google Console. To get this file, we recommend following `this tutorial 20 | `_, which is 21 | inspired by `PyDrive2 documentation `_. Some 22 | important additions to previous tutorials are: 23 | 24 | - Make sure to add yourself in Test users, as noted in `this thread `_ 25 | - Select Desktop App instead of Web Application as the application type when creating the OAuth Client ID. 26 | 27 | Once you have downloaded the clients secrets, run :func:`gdrive_init `, which will 28 | guide you through the Authentification process. You will need to access a link via your browser and copy paste a 29 | verification code. 30 | 31 | .. code-block:: python 32 | 33 | >>> from whatstk.utils import gdrive_init 34 | >>> gdrive_init("path/to/client_secrets.json") 35 | Go to the following link in your browser: 36 | 37 | https://accounts.google.com/... 38 | 39 | Enter verification code: 40 | 41 | This should only be run the first time to correctly configure your Google credentials. 42 | 43 | 44 | Load a file from Google Drive 45 | ----------------------------- 46 | 47 | You can pass a file reference to :class:`WhatsAppChat ` by means of its ID. All files in Google 48 | Drive have a unique ID. To obtain it, create a `shareable link 49 | `_, which will have the following format: 50 | 51 | .. code-block:: 52 | 53 | https://drive.google.com/file/d/[FILE-ID]/view?usp=sharing 54 | 55 | 56 | Now, simply copy ``[FILE-ID]`` and run: 57 | 58 | .. code-block:: python 59 | 60 | >>> from whatstk import WhatsAppChat 61 | >>> chat = WhatsAppChat.from_source("gdrive://[FILE-ID]") 62 | 63 | Note that Google Drive file IDs are passed with prefix `gdrive://`. 64 | -------------------------------------------------------------------------------- /docs/source/code_examples/load_chat_hformat.rst: -------------------------------------------------------------------------------- 1 | Load WhatsApp chat with specific hformat 2 | ======================================== 3 | 4 | If ``auto_header`` option fails, you can still load your chat manually specifying the ``hformat``. In the example below, 5 | we have that the ``hformat='%d.%m.%y, %H:%M - %name:'``. 6 | 7 | .. code-block:: python 8 | 9 | >>> from whatstk.whatsapp.objects import WhatsAppChat 10 | >>> from whatstk.data import whatsapp_urls 11 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.POKEMON, hformat='%d.%m.%y, %H:%M - %name:') 12 | >>> chat.df.head(5) 13 | date username message 14 | 0 2016-08-06 13:23:00 Ash Ketchum Hey guys! 15 | 1 2016-08-06 13:25:00 Brock Hey Ash, good to have a common group! 16 | 2 2016-08-06 13:30:00 Misty Hey guys! Long time haven't heard anything fro... 17 | 3 2016-08-06 13:45:00 Ash Ketchum Indeed. I think having a whatsapp group nowada... 18 | 4 2016-08-06 14:30:00 Misty Definetly 19 | 20 | ---- 21 | 22 | .. seealso:: 23 | 24 | * :ref:`The header format ` 25 | * :ref:`Load WhatsApp chat ` 26 | * :ref:`Load WhatsApp chat from Google Drive ` 27 | * :ref:`Load WhatsApp chat with specific hformat ` 28 | -------------------------------------------------------------------------------- /docs/source/code_examples/load_chat_multiple.rst: -------------------------------------------------------------------------------- 1 | Load WhatsApp chat from multiple sources 2 | ======================================== 3 | 4 | You can also load a chat using multiple source files. You might want to use this when several files have been exported 5 | from the same chat over the years. 6 | 7 | In the example below, we load chats 8 | `LOREM1 `_ and `LOREM2 `_. 9 | 10 | .. code-block:: python 11 | 12 | >>> from whatstk import WhatsAppChat 13 | >>> from whatstk.data import whatsapp_urls 14 | >>> chat = WhatsAppChat.from_sources(filepaths=[whatsapp_urls.LOREM1, whatsapp_urls.LOREM2]) 15 | 16 | Rename usernames 17 | ---------------- 18 | 19 | In the example here, chat `LOREM1 20 | `_ and chat `LOREM2 21 | `_ contain slightly 22 | different usernames. In particular, in chat LOREM2, user *Mary* appears as *Maria* and *Maria2*: 23 | 24 | .. code-block:: python 25 | 26 | >>> WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM1).users 27 | ['+1 123 456 789', 'Giuseppe', 'John', 'Mary'] 28 | >>> WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM2).users 29 | ['+1 123 456 789', 'Giuseppe', 'John', 'Maria', 'Maria2'] 30 | >>> >>> chat.users 31 | ['+1 123 456 789', 'Giuseppe', 'John', 'Maria', 'Maria2', 'Mary'] 32 | 33 | To draw some conclusions based on user behaviour we would like to group *Mary*, *Maria* and *Maria2* under the same 34 | username. To fix this, we rename *Maria* and *Maria2* as *Mary*: 35 | 36 | .. code-block:: python 37 | 38 | 39 | >>> chat = chat.rename_users({'Mary': ['Maria', 'Maria2']}) 40 | >>> chat.users 41 | ['+1 123 456 789', 'Giuseppe', 'John', 'Mary'] 42 | 43 | 44 | ---- 45 | 46 | .. seealso:: 47 | 48 | * :ref:`Load WhatsApp chat ` 49 | * :ref:`Load WhatsApp chat from Google Drive ` 50 | * :ref:`Load WhatsApp chat with specific hformat ` -------------------------------------------------------------------------------- /docs/source/code_examples/message_length_boxplot.rst: -------------------------------------------------------------------------------- 1 | Message length boxplot 2 | ====================== 3 | 4 | Different users send different sort of messages. In particular, the length of the messages (number of characters) can 5 | substatially vary depending on the user sending the message. 6 | 7 | In this example, we explore the statistics behind the length of user messages. To this end, we can use method 8 | :func:`user_msg_length_boxplot `, which illustrates the length of each 9 | user's messages by means of `box plots `_. 10 | 11 | 12 | .. code-block:: python 13 | 14 | >>> from whatstk import WhatsAppChat, FigureBuilder 15 | >>> from whatstk.graph import plot 16 | >>> from whatstk.data import whatsapp_urls 17 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM_2000) 18 | >>> fig = FigureBuilder(chat=chat).user_msg_length_boxplot() 19 | >>> plot(fig) 20 | 21 | 22 | .. raw:: html 23 | :file: ../../_static/html/boxplot.html 24 | -------------------------------------------------------------------------------- /docs/source/code_examples/user_interaction.rst: -------------------------------------------------------------------------------- 1 | User interaction 2 | ================ 3 | 4 | The user interaction can shed some light on the different kinds of conversations that occur in a chat group. For 5 | instance, when a certain topic appears some users might intervene and others will not, forming *user clusters*. To this 6 | end, a first approach in detecting such clusters resides in which users respond to which users. 7 | 8 | User interaction heatmap 9 | ------------------------ 10 | 11 | In the following we visualize the *response matrix*, which tells us the number of messages sent by a certain user to the 12 | rest of users. 13 | 14 | 15 | For instance, in this specific example we observe that user *Giuseppe* sends 153 messages to + *1 123 456 789* and that 16 | *Mary* receives 122 messages from *John*. 17 | 18 | .. code-block:: python 19 | 20 | >>> from whatstk import WhatsAppChat, FigureBuilder 21 | >>> from whatstk.graph import plot 22 | >>> from whatstk.data import whatsapp_urls 23 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM_2000) 24 | >>> fig = FigureBuilder(chat=chat).user_message_responses_heatmap() 25 | >>> plot(fig) 26 | 27 | 28 | .. raw:: html 29 | :file: ../../_static/html/user_message_responses_heatmap.html 30 | 31 | .. seealso:: 32 | 33 | * :func:`user_message_responses_heatmap ` 34 | 35 | User interaction flow 36 | --------------------- 37 | 38 | A good way o visualize responses between users are `Sankey diagrams `_. 39 | The information conveyed by the graph below is the same as the one in previous section, but the way it is done is 40 | slightly different (sankey diagram instead of a heatmap). 41 | 42 | .. code-block:: python 43 | 44 | >>> from whatstk import WhatsAppChat, FigureBuilder 45 | >>> from whatstk.graph import plot 46 | >>> from whatstk.data import whatsapp_urls 47 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM_2000) 48 | >>> fig = FigureBuilder(chat=chat).user_message_responses_flow() 49 | >>> plot(fig) 50 | 51 | 52 | .. raw:: html 53 | :file: ../../_static/html/user_message_responses_flow.html 54 | 55 | .. seealso:: 56 | 57 | * :func:`user_message_responses_flow ` 58 | -------------------------------------------------------------------------------- /docs/source/community.rst: -------------------------------------------------------------------------------- 1 | Community & Governance 2 | ======================= 3 | 4 | **whatstk** is a fully open-source project done for and by the community. It is primarily developed at sociepy by the 5 | whatstk team, with the help of open-source developers. 6 | 7 | For library discussions, consider joining `gitter group `_. 8 | 9 | ---- 10 | 11 | Leadership 12 | ---------- 13 | 14 | BDFL 15 | ^^^^ 16 | Role: final call in decisions related to the API. 17 | 18 | - `Lucas Rodés-Guirao `_ 19 | 20 | Community Contributors 21 | ^^^^^^^^^^^^^^^^^^^^^^ 22 | 23 | - `Albert Aparicio Isarn `_ 24 | - `Kolmar Kafran `_ 25 | - `Clara Sáez Calabuig `_ (project logo) 26 | -------------------------------------------------------------------------------- /docs/source/contribute.rst: -------------------------------------------------------------------------------- 1 | Contribute 2 | ========== 3 | 4 | We are really open to your thoughts and feedback! 5 | 6 | ---- 7 | 8 | Bug reporting 9 | ------------- 10 | Please report any bug that you may find to the `issues `_ section. 11 | 12 | ---- 13 | 14 | Requesting a Feature 15 | -------------------- 16 | If you find a new feature could be useful for the community, please try to add it in the 17 | `issues `_ section with a clear description. 18 | 19 | ---- 20 | 21 | Submitting a Pull Request 22 | ------------------------- 23 | - Start by forking the `develop `_ branch. 24 | - Add your code to the project! 25 | - Test your code running script `run-tests.sh `_. 26 | This script checks the code style (flake8) and the logic of your code (pytest). Note: Make sure to open and read it. The first time you will need to run steps 1.1, 1.2 and 1.3. 27 | 28 | .. code-block:: bash 29 | 30 | sh ./run-tests.sh 31 | 32 | This script generates three HTML files which are placed within a created folder `reports`. 33 | 34 | - Once your code successfully passed the tests, you can submitt a pull request and wait for its aproval 35 | 36 | 37 | .. todo:: 38 | 39 | Use `tox `_ 40 | 41 | Aproval of pull request 42 | ^^^^^^^^^^^^^^^^^^^^^^^ 43 | 44 | A pull request will be accepted if: 45 | 46 | - Adds new functionalities of interest. 47 | - Does not decrease the overall project code `coverage `_. 48 | 49 | Note: You will need to add tests for your code. For this, you can check the current `tests `_. 50 | 51 | ---- 52 | 53 | Adding new examples 54 | ------------------- 55 | To add new examples, consider editing yourself a ``rst`` file in ``docs/source/`` directory in the repository. For 56 | questions or doubts, use `GitHub discussions `_. 57 | 58 | ---- 59 | 60 | API discussions 61 | --------------- 62 | Consider posting your questinos or suggestions on `GitHub discussions `_ or `Github issues `_. 63 | 64 | ---- 65 | 66 | Doubts? 67 | ------- 68 | 69 | Feel free to `contact me `_ :) 70 | -------------------------------------------------------------------------------- /docs/source/developer_guide/index.rst: -------------------------------------------------------------------------------- 1 | Developer Guide 2 | =============== 3 | In this section 4 | 5 | .. toctree:: 6 | .. :hidden: 7 | :maxdepth: 2 8 | 9 | Changelog 10 | -------------------------------------------------------------------------------- /docs/source/getting_started/auto_header.rst: -------------------------------------------------------------------------------- 1 | Auto header 2 | =========== 3 | 4 | -------------------------------------------------------------------------------- /docs/source/getting_started/command_line.rst: -------------------------------------------------------------------------------- 1 | Command line 2 | ============ 3 | 4 | **whatstk** provides a set of command line tools to obtain quick results using the command line. To use these, make sure 5 | that you have previously :ref:`installed the library `. 6 | 7 | For instance, convert a WhatsApp text file to a CSV file using 8 | 9 | .. code-block:: 10 | 11 | whatstk-to-csv [input_filename] [output_filename] 12 | 13 | 14 | For more details, check the :ref:`command line tools documentation `. 15 | -------------------------------------------------------------------------------- /docs/source/getting_started/export_chat.rst: -------------------------------------------------------------------------------- 1 | Export a WhatsApp chat 2 | ====================== 3 | 4 | Exporting a WhatsApp chat can be easily done from your Android or iOS device. It is done on a chat basis, so if you want 5 | to export several chats you will have to export them individually. **When exporting, make sure to select the chats Without Media option**. Once generated, you can send it via mail, so you 6 | can save it in your computer. 7 | 8 | Android 9 | -------- 10 | The export on Android might include several files. We are only interested in the text file (i.e. ``txt`` extension 11 | file). 12 | 13 | .. figure:: ../../_static/images/chat-export-android9-wp2.20.123.gif 14 | :width: 300 15 | :alt: Concept diagram of WhatsAppChat.from_source 16 | :align: center 17 | :figclass: align-center 18 | 19 | Android 9, WhatsApp v2.20.123 20 | 21 | For more details, refer to `official website `_. 22 | 23 | iOS 24 | --- 25 | The chat is exported as a `zip `_, which can be easily unzipped in 26 | your computer. 27 | 28 | .. figure:: ../../_static/images/chat-export-ios17-wp24.5.75.gif 29 | :width: 300 30 | :alt: Concept diagram of WhatsAppChat.from_source 31 | :align: center 32 | :figclass: align-center 33 | 34 | iOS 17.3.1, WhatsApp v24.5.75 35 | -------------------------------------------------------------------------------- /docs/source/getting_started/hformat.rst: -------------------------------------------------------------------------------- 1 | The header format 2 | ================= 3 | 4 | In WhatsApp, a chat file syntax can differ between devices, OS and language settings, which makes it hard to correctly 5 | parse the data for all formats. 6 | 7 | The header appears for each message sent in the chat and contains the timestamp and name of the user that sent the message. 8 | 9 | See it for yourself and open :ref:`an exported chat file `. You will find that the messages have a similar format like the one below: 10 | 11 | .. code-block:: 12 | 13 | 15.04.2016, 15:04 - You created group “Sample Group” 14 | 06.08.2016, 13:18 - Messages you send to this group are now secured with end-to-end encryption. Tap for more info. 15 | 06.08.2016, 13:23 - Ash Ketchum: Hey guys! 16 | 06.08.2016, 13:25 - Brock: Hey Ash, good to have a common group! 17 | 06.08.2016, 13:30 - Misty: Hey guys! Long time haven't heard anything from you 18 | 06.08.2016, 13:45 - Ash Ketchum: Indeed. I think having a whatsapp group nowadays is a good idea 19 | 06.08.2016, 14:30 - Misty: Definetly 20 | 06.08.2016, 17:25 - Brock: I totally agree 21 | 07.08.2016, 11:45 - Prof. Oak: Kids, shall I design a smart poke-ball? 22 | 23 | In this example, the header is **day.month.year, hour:minutes - username:** which corresponds to the header format 24 | (i.e. **hformat**) ``'%d.%m.%y, %H:%M - %name:'``. However, in your case it may be slightly different depending on 25 | your phone settings. 26 | 27 | Check the table below to see the codes for each header format unit: 28 | 29 | 30 | .. csv-table:: header format units 31 | :header: "Date unit code", "Description" 32 | :widths: 50, 50 33 | :align: center 34 | 35 | ``'%y'`` (or ``'%Y'``), Year 36 | ``'%m'``, Month of the year (1-12) 37 | ``'%d'``, Day of the month (1-31) 38 | ``'%H'``, Hour 24h-clock (0-23) 39 | ``'%I'``, Hour 12h-clock (1-12) 40 | ``'%p'`` (or ``'%P'``), "AM/PM", "am/pm", "A.M/P.M", "a.m/p.m" characters 41 | ``'%M'``, Minutes (0-60) 42 | ``'%S'``, Seconds (0-60) 43 | ``'%name'``, Name of user 44 | 45 | .. seealso:: 46 | :ref:`Load WhatsApp chat with specific hformat ` 47 | -------------------------------------------------------------------------------- /docs/source/getting_started/index.rst: -------------------------------------------------------------------------------- 1 | Getting started 2 | =============== 3 | 4 | Getting started with the library is fairly easy. 5 | 6 | .. toctree:: Contents 7 | :maxdepth: 1 8 | 9 | Export a WhatsApp chat from your phone 10 | Load a WhatsApp chat 11 | Command line tools 12 | The Header format 13 | Library available chats 14 | 15 | For examples refer to :ref:`code examples ` section. 16 | 17 | For a rapid introduction, check this `tutorial on Medium `_. 18 | -------------------------------------------------------------------------------- /docs/source/getting_started/library-available-chats.rst: -------------------------------------------------------------------------------- 1 | Library available chats 2 | ======================= 3 | 4 | For the purpose of showcasing code examples and benchmarking different implementations, we have created a pool of chats, 5 | hosted in the `official repository page `_. If you want to test 6 | the library with one of your own tests, check in the :ref:`code examples `. 7 | 8 | The chats are available via their corresponding URLs, which are listed in source code :mod:`whatstk.data`. 9 | 10 | .. contents:: Contents 11 | :depth: 3 12 | 13 | WhatsApp 14 | -------- 15 | 16 | Object ``whatsapp_urls`` contains all URLs for WhatsApp chats. 17 | 18 | .. code-block:: python 19 | 20 | >>> from whatstk.data import whatsapp_urls 21 | 22 | POKEMON 23 | ^^^^^^^ 24 | 25 | Brief fictional chat with Pokemon characters, which was manually designed by `@lucasrodes 26 | `_ in `commit 666d6ea9cc030c4322fbe44ae64b8f1a0fdb5169 27 | `_. 28 | 29 | .. code-block:: python 30 | 31 | >>> from whatstk.data import whatsapp_urls 32 | >>> from whatstk import WhatsAppChat 33 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.POKEMON) 34 | >>> chat.df.head(5) 35 | date username message 36 | 0 2016-08-06 13:23:00 Ash Ketchum Hey guys! 37 | 1 2016-08-06 13:25:00 Brock Hey Ash, good to have a common group! 38 | 2 2016-08-06 13:30:00 Misty Hey guys! Long time haven't heard anything fro... 39 | 3 2016-08-06 13:45:00 Ash Ketchum Indeed. I think having a whatsapp group nowada... 40 | 4 2016-08-06 14:30:00 Misty Definetly 41 | 42 | 43 | .. seealso:: 44 | `Chat file `_ 45 | 46 | 47 | LOREM 48 | ^^^^^ 49 | Chat with 500 interventions of fictional users, generated using `python-lorem `_ 50 | library. 51 | 52 | 53 | .. code-block:: python 54 | 55 | >>> from whatstk.data import whatsapp_urls 56 | >>> from whatstk import WhatsAppChat 57 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM) 58 | >>> chat.df.head(5) 59 | date username message 60 | 0 2020-01-15 02:22:56 Mary Nostrud exercitation magna id. 61 | 1 2020-01-15 03:33:01 Mary Non elit irure irure pariatur exercitation. 🇩🇰 62 | 2 2020-01-15 04:18:42 +1 123 456 789 Exercitation esse lorem reprehenderit ut ex ve... 63 | 3 2020-01-15 06:05:14 Giuseppe Aliquip dolor reprehenderit voluptate dolore e... 64 | 4 2020-01-15 06:56:00 Mary Ullamco duis et commodo exercitation. 65 | 66 | .. seealso:: 67 | `Chat file `_ 68 | 69 | LOREM1 70 | ^^^^^^ 71 | Chat with 300 interventions of fictional users, generated using `python-lorem `_. 72 | 73 | .. code-block:: python 74 | 75 | >>> from whatstk.data import whatsapp_urls 76 | >>> from whatstk import WhatsAppChat 77 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM1) 78 | >>> chat.df.head(5) 79 | date username message 80 | 0 2019-10-20 10:16:00 John Laborum sed excepteur id eu cillum sunt ut. 81 | 1 2019-10-20 11:15:00 Mary Ad aliquip reprehenderit proident est irure mo... 82 | 2 2019-10-20 12:16:00 +1 123 456 789 Nostrud adipiscing ex enim reprehenderit minim... 83 | 3 2019-10-20 12:57:00 +1 123 456 789 Deserunt proident laborum exercitation ex temp... 84 | 4 2019-10-20 17:28:00 John Do ex dolor consequat tempor et ex. 85 | 86 | .. seealso:: 87 | `Chat file `_ 88 | 89 | LOREM2 90 | ^^^^^^ 91 | Chat with 300 interventions of fictional users, generated using `python-lorem `_. 92 | 93 | Can be used along with **LOREM1** to test :func:`chat merging functionalities ` or :ref:`multiple-source loading `. 94 | 95 | .. code-block:: python 96 | 97 | >>> from whatstk.data import whatsapp_urls 98 | >>> from whatstk import WhatsAppChat 99 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM2) 100 | >>> chat.df.head(5) 101 | date username message 102 | 0 2020-06-20 10:16:00 John Elit incididunt lorem sed nostrud. 103 | 1 2020-06-20 11:15:00 Maria Esse do irure dolor tempor ipsum fugiat. 🇩🇰 104 | 2 2020-06-20 12:16:00 +1 123 456 789 Cillum anim non eu deserunt consectetur dolor ... 105 | 3 2020-06-20 12:57:00 +1 123 456 789 Non ipsum proident veniam est. 🏊🏻 106 | 4 2020-06-20 17:28:00 John Dolore in cupidatat proident. 107 | 108 | .. seealso:: 109 | `Chat file `_ 110 | 111 | LOREM_2000 112 | ^^^^^^^^^^ 113 | Chat with 2000 interventions of fictional users, generated using `python-lorem `_. 114 | 115 | .. code-block:: python 116 | 117 | >>> from whatstk.data import whatsapp_urls 118 | >>> from whatstk import WhatsAppChat 119 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM_2000) 120 | >>> chat.df.head(5) 121 | date username message 122 | 0 2019-04-16 02:09:00 +1 123 456 789 Et labore proident laboris do labore ex. 123 | 1 2019-04-16 03:01:00 Mary Reprehenderit id aute consectetur aliquip nost... 124 | 2 2019-04-17 12:56:00 John Amet magna officia ullamco pariatur ipsum cupi... 125 | 3 2019-04-17 13:30:00 Mary Cillum aute et cupidatat ipsum, occaecat lorem... 126 | 4 2019-04-17 15:09:00 John Eiusmod irure laboris dolore anim, velit velit... 127 | 128 | .. seealso:: 129 | `Chat file `_ 130 | -------------------------------------------------------------------------------- /docs/source/getting_started/load_chat.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../code_examples/load_chat.rst 2 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | whatstk 2 | ======= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | whatstk 8 | -------------------------------------------------------------------------------- /docs/source/whatstk.analysis.rst: -------------------------------------------------------------------------------- 1 | whatstk.analysis package 2 | ======================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | whatstk.analysis.interventions module 8 | ------------------------------------- 9 | 10 | .. automodule:: whatstk.analysis.interventions 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | whatstk.analysis.responses module 16 | --------------------------------- 17 | 18 | .. automodule:: whatstk.analysis.responses 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: whatstk.analysis 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/source/whatstk.graph.figures.rst: -------------------------------------------------------------------------------- 1 | whatstk.graph.figures package 2 | ============================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | whatstk.graph.figures.boxplot module 8 | ------------------------------------ 9 | 10 | .. automodule:: whatstk.graph.figures.boxplot 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | whatstk.graph.figures.heatmap module 16 | ------------------------------------ 17 | 18 | .. automodule:: whatstk.graph.figures.heatmap 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | whatstk.graph.figures.sankey module 24 | ----------------------------------- 25 | 26 | .. automodule:: whatstk.graph.figures.sankey 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | whatstk.graph.figures.scatter module 32 | ------------------------------------ 33 | 34 | .. automodule:: whatstk.graph.figures.scatter 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | whatstk.graph.figures.utils module 40 | ---------------------------------- 41 | 42 | .. automodule:: whatstk.graph.figures.utils 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | Module contents 49 | --------------- 50 | 51 | .. automodule:: whatstk.graph.figures 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | -------------------------------------------------------------------------------- /docs/source/whatstk.graph.rst: -------------------------------------------------------------------------------- 1 | whatstk.graph package 2 | ===================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | whatstk.graph.figures 11 | 12 | Submodules 13 | ---------- 14 | 15 | whatstk.graph.base module 16 | ------------------------- 17 | 18 | .. automodule:: whatstk.graph.base 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: whatstk.graph 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/source/whatstk.rst: -------------------------------------------------------------------------------- 1 | whatstk package 2 | =============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | whatstk.analysis 11 | whatstk.graph 12 | whatstk.utils 13 | whatstk.whatsapp 14 | 15 | Submodules 16 | ---------- 17 | 18 | whatstk.data module 19 | ------------------- 20 | 21 | .. automodule:: whatstk.data 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | 27 | Module contents 28 | --------------- 29 | 30 | .. automodule:: whatstk 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | -------------------------------------------------------------------------------- /docs/source/whatstk.utils.rst: -------------------------------------------------------------------------------- 1 | whatstk.utils package 2 | ===================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | whatstk.utils.chat\_merge module 8 | -------------------------------- 9 | 10 | .. automodule:: whatstk.utils.chat_merge 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | whatstk.utils.exceptions module 16 | ------------------------------- 17 | 18 | .. automodule:: whatstk.utils.exceptions 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | whatstk.utils.gdrive module 24 | -------------------------- 25 | 26 | .. automodule:: whatstk.utils.gdrive 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | whatstk.utils.utils module 32 | -------------------------- 33 | 34 | .. automodule:: whatstk.utils.utils 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: whatstk.utils 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/source/whatstk.whatsapp.rst: -------------------------------------------------------------------------------- 1 | whatstk.whatsapp package 2 | ======================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | whatstk.whatsapp.auto\_header module 8 | ------------------------------------ 9 | 10 | .. automodule:: whatstk.whatsapp.auto_header 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | whatstk.whatsapp.generation module 16 | ---------------------------------- 17 | 18 | .. automodule:: whatstk.whatsapp.generation 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | whatstk.whatsapp.hformat module 24 | ------------------------------- 25 | 26 | .. automodule:: whatstk.whatsapp.hformat 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | whatstk.whatsapp.objects module 32 | ------------------------------- 33 | 34 | .. automodule:: whatstk.whatsapp.objects 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | whatstk.whatsapp.parser module 40 | ------------------------------ 41 | 42 | .. automodule:: whatstk.whatsapp.parser 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | Module contents 49 | --------------- 50 | 51 | .. automodule:: whatstk.whatsapp 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | -------------------------------------------------------------------------------- /docs/source/why_whatstk.rst: -------------------------------------------------------------------------------- 1 | Why choose whatstk? 2 | =================== 3 | 4 | There are many python libraries to deal with WhatsApp and other platform chat files. Why should you choose **whatstk** 5 | over these? 6 | 7 | Automatic parser 8 | ---------------- 9 | In WhatsApp, the chat might be exported in :ref:`different formats ` depending on your phone 10 | configuration, which adds complexity when parsing the chat. **whatstk** incorporates a reliable and powerful 11 | :mod:`parser ` to correctly infer the structure of most of the chats. In the rare and 12 | improbable case that the automatic parser does not work for a certain chat, you can still use 13 | `hformat `_. 14 | 15 | The power of pandas and plotly 16 | ------------------------------ 17 | **whatstk** uses well established and mantained python libraries `pandas `_ to 18 | process the data and `plotly `_ and exploits their potential to efficiently process 19 | and create figures. 20 | 21 | Open source and Community oriented 22 | ---------------------------------- 23 | The project is distributed under the `GPL-3.0 license `_, 24 | available on `GitHub `_ and open for `user contributions `_. 25 | 26 | The project is mantained since 2016 by `@lucasrodes `_. 27 | -------------------------------------------------------------------------------- /requirements-docs.txt: -------------------------------------------------------------------------------- 1 | recommonmark==0.6.0 2 | Sphinx~=5.3.0 3 | sphinx-rtd-theme==0.4.3 4 | sphinx-copybutton~=0.5.2 5 | sphinx-git==11.0.0 6 | autodocsumm~=0.2.11 7 | # gitchangelog==3.0.4 8 | auto-changelog~=0.6.0 9 | # pillow, psutil, sphinx-gallery 10 | docutils==0.16 11 | -------------------------------------------------------------------------------- /requirements-flake.txt: -------------------------------------------------------------------------------- 1 | # 2 | flake8~=6.0.0 3 | flake8-docstrings~=1.7.0 4 | flake8-bugbear~=23.3.0 5 | flake8-builtins~=2.1.0 6 | flake8-bandit~=4.1.0 7 | flake8-mutable~=1.2.0 8 | flake8-annotations~=3.0.0 9 | flake8-html~=0.4.3 -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- 1 | pytest~=7.2.0 2 | pytest-cov~=4.0.0 3 | coverage~=7.2.2 4 | codecov~=2.1.0 5 | pytest-html~=3.2.0 6 | pytest-mock~=3.10.0 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | emoji~=2.10.1 2 | numpy~=1.26.4 3 | pandas~=2.2.1 4 | plotly~=5.20.0 5 | seaborn~=0.13.2 6 | -------------------------------------------------------------------------------- /run-tests.sh: -------------------------------------------------------------------------------- 1 | ## (1) Test dependencies (run this part only the first time). 2 | # If using zsh, install using '\', i.e. `pip install -e .\[full\]` 3 | # pip install -e .[full] 4 | 5 | ## (1.1) Install pytest dependencies 6 | # pip install -r requirements-test.txt 7 | 8 | ## (1.2) Install flake8 dependencies 9 | # pip install -r requirements-flake.txt 10 | 11 | ## (1.3) Generate chats for test 12 | # mkdir -p tests/chats/hformats tests/chats/merge 13 | # whatstk-generate-chat --size 500 -z --output-path tests/chats/hformats/ 14 | # whatstk-generate-chat --size 300 -z --last-timestamp 2019-09-01 \ 15 | # --hformats '%Y-%m-%d, %H:%M - %name:' \ 16 | # --output-path tests/chats/merge/ --filenames file1.txt 17 | # whatstk-generate-chat --size 300 -z --last-timestamp 2020-01-01 \ 18 | # --hformats '%Y-%m-%d, %H:%M - %name:' \ 19 | # --output-path tests/chats/merge/ --filenames file2.txt 20 | 21 | 22 | ## (2) Run flake 23 | flake8 \ 24 | --max-complexity 10\ 25 | --docstring-convention=google\ 26 | --format=html --htmldir=reports/flake-report\ 27 | --max-line-length=120\ 28 | --ignore=ANN101,ANN102,AN401\ 29 | whatstk 30 | 31 | ## (3) Run tests 32 | py.test \ 33 | --html=reports/testreport.html\ 34 | --cov-report html:reports/htmlcov\ 35 | --cov-report term\ 36 | --cov-report xml:reports/cov.xml\ 37 | --cov=whatstk tests 38 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """setup script.""" 2 | 3 | 4 | from setuptools import setup, find_packages 5 | import os 6 | import glob 7 | 8 | 9 | this_directory = os.path.abspath(os.path.dirname(__file__)) 10 | with open(os.path.join(this_directory, 'README.md'), encoding='utf8') as f: 11 | long_description = f.read() 12 | 13 | with open(os.path.join(this_directory, 'requirements.txt')) as f: 14 | requirements = f.readlines() 15 | 16 | with open(os.path.join(this_directory, 'requirements-test.txt')) as f: 17 | requirements_test = f.readlines() 18 | 19 | with open(os.path.join(this_directory, 'requirements-flake.txt')) as f: 20 | requirements_flake = f.readlines() 21 | 22 | with open(os.path.join(this_directory, 'requirements-docs.txt')) as f: 23 | requirements_docs = f.readlines() 24 | 25 | requirements_gdrive = [ 26 | "PyDrive2~=1.15.0", 27 | "PyYAML~=6.0", 28 | ] 29 | 30 | requirements_generate = [ 31 | "scipy~=1.12.0", 32 | "python-lorem==1.2.0", 33 | ] 34 | 35 | requirements_full = requirements_gdrive + requirements_generate 36 | 37 | 38 | extras_require = { 39 | "gdrive": requirements_gdrive, 40 | "generate": requirements_generate, 41 | "full": requirements_full, 42 | "dev": requirements_test + requirements_flake + requirements_docs, 43 | } 44 | 45 | 46 | setup( 47 | name='whatstk', 48 | version="0.7.1", 49 | description="Parser and analytics tools for WhatsApp group chats", 50 | long_description=long_description, 51 | long_description_content_type='text/markdown', 52 | url='http://github.com/lucasrodes/whatstk', 53 | author='Lucas Rodes-Guirao', 54 | license='GPL-v3', 55 | install_requires=requirements, 56 | packages=find_packages('.'), 57 | package_dir={'': '.'}, 58 | py_modules=[os.path.splitext(os.path.basename(path))[0] for path in glob.glob('./*.py')], 59 | include_package_data=True, 60 | zip_safe=False, 61 | classifiers=[ 62 | "Development Status :: 4 - Beta", 63 | "Programming Language :: Python", 64 | "Programming Language :: Python :: 3 :: Only", 65 | "Programming Language :: Python :: 3.9", 66 | "Programming Language :: Python :: 3.10", 67 | "Programming Language :: Python :: 3.11", 68 | "Programming Language :: Python :: 3.12", 69 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 70 | "Operating System :: OS Independent", 71 | ], 72 | keywords='whatsapp analysis parser chat', 73 | project_urls={ 74 | 'Documentation': 'https://whatstk.readthedocs.io/en/stable/', 75 | 'Github': 'http://github.com/lucasrodes/whatstk', 76 | 'Bug Tracker': 'https://github.com/lucasrodes/whatstk/issues', 77 | }, 78 | python_requires='>=3.7', 79 | entry_points={ 80 | 'console_scripts': [ 81 | 'whatstk-generate-chat=whatstk.scripts.generate_chats:main', 82 | 'whatstk-to-csv=whatstk.scripts.txt_to_csv:main', 83 | 'whatstk-graph=whatstk.scripts.graph:main' 84 | ] 85 | }, 86 | package_data = { 87 | 'whatstk': ['whatsapp/assets/header_format_support.json'], 88 | }, 89 | extras_require=extras_require, 90 | ) 91 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/tests/__init__.py -------------------------------------------------------------------------------- /tests/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/tests/analysis/__init__.py -------------------------------------------------------------------------------- /tests/analysis/test_interventions.py: -------------------------------------------------------------------------------- 1 | # TODO: Assert number of columns equals number of users 2 | from whatstk.analysis.interventions import get_interventions_count 3 | from whatstk.whatsapp.objects import WhatsAppChat 4 | from whatstk.utils.utils import COLNAMES_DF, _map_hformat_filename 5 | import pandas as pd 6 | import pytest 7 | 8 | hformat = "[%d.%m.%y_%I:%M:%S_%p]_%name:" 9 | filename = f"./tests/chats/hformats/{_map_hformat_filename(hformat)}.txt" 10 | 11 | 12 | def test_interventions_date_all(): 13 | chat = WhatsAppChat.from_source(filename) 14 | counts = get_interventions_count(chat=chat, date_mode='date', msg_length=False, all_users=True) 15 | 16 | assert(isinstance(counts, pd.DataFrame)) 17 | # Asswert chat df and counts df have same users 18 | assert(len(counts.columns) == 1) 19 | assert(counts.columns == ['interventions count']) 20 | 21 | # Assert chat df and counts df have same date window 22 | assert(chat.df[COLNAMES_DF.DATE].max().date() == counts.index.max().date()) 23 | assert(chat.df[COLNAMES_DF.DATE].min().date() == counts.index.min().date()) 24 | 25 | 26 | def test_interventions_date(): 27 | chat = WhatsAppChat.from_source(filename) 28 | counts = get_interventions_count(chat=chat, date_mode='date', msg_length=False) 29 | 30 | assert(isinstance(counts, pd.DataFrame)) 31 | # Asswert chat df and counts df have same users 32 | assert(set(chat.users) == set(counts.columns)) 33 | assert(len(chat.users) == counts.shape[1]) 34 | 35 | # Assert chat df and counts df have same date window 36 | assert(chat.df[COLNAMES_DF.DATE].max().date() == counts.index.max().date()) 37 | assert(chat.df[COLNAMES_DF.DATE].min().date() == counts.index.min().date()) 38 | 39 | 40 | def test_interventions_date_2(): 41 | chat = WhatsAppChat.from_source(filename) 42 | counts = get_interventions_count(df=chat.df, date_mode='date', msg_length=False) 43 | 44 | assert(isinstance(counts, pd.DataFrame)) 45 | # Asswert chat df and counts df have same users 46 | assert(set(chat.users) == set(counts.columns)) 47 | assert(len(chat.users) == counts.shape[1]) 48 | 49 | # Assert chat df and counts df have same date window 50 | assert(chat.df[COLNAMES_DF.DATE].max().date() == counts.index.max().date()) 51 | assert(chat.df[COLNAMES_DF.DATE].min().date() == counts.index.min().date()) 52 | 53 | 54 | def test_interventions_date_msg_length(): 55 | chat = WhatsAppChat.from_source(filename) 56 | counts = get_interventions_count(chat=chat, date_mode='date', msg_length=True) 57 | 58 | assert(isinstance(counts, pd.DataFrame)) 59 | # Asswert chat df and counts df have same users 60 | assert(set(chat.users) == set(counts.columns)) 61 | assert(len(chat.users) == counts.shape[1]) 62 | 63 | # Assert chat df and counts df have same date window 64 | assert(chat.df[COLNAMES_DF.DATE].max().date() == counts.index.max().date()) 65 | assert(chat.df[COLNAMES_DF.DATE].min().date() == counts.index.min().date()) 66 | 67 | 68 | def test_interventions_hour(): 69 | chat = WhatsAppChat.from_source(filename) 70 | counts = get_interventions_count(chat=chat, date_mode='hour', msg_length=False) 71 | 72 | assert(isinstance(counts, pd.DataFrame)) 73 | # Asswert chat df and counts df have same users 74 | assert(set(chat.users) == set(counts.columns)) 75 | assert(len(chat.users) == counts.shape[1]) 76 | 77 | # Check range hours 78 | assert(counts.index.max() == chat.df[COLNAMES_DF.DATE].dt.hour.max()) 79 | assert(counts.index.min() == chat.df[COLNAMES_DF.DATE].dt.hour.min()) 80 | 81 | 82 | def test_interventions_hour_msg_length(): 83 | chat = WhatsAppChat.from_source(filename) 84 | counts = get_interventions_count(chat=chat, date_mode='hour', msg_length=True) 85 | 86 | assert(isinstance(counts, pd.DataFrame)) 87 | # Asswert chat df and counts df have same users 88 | assert(set(chat.users) == set(counts.columns)) 89 | assert(len(chat.users) == counts.shape[1]) 90 | 91 | # Check range hours 92 | assert(counts.index.max() == chat.df[COLNAMES_DF.DATE].dt.hour.max()) 93 | assert(counts.index.min() == chat.df[COLNAMES_DF.DATE].dt.hour.min()) 94 | 95 | 96 | def test_interventions_month(): 97 | chat = WhatsAppChat.from_source(filename) 98 | counts = get_interventions_count(chat=chat, date_mode='month', msg_length=False) 99 | 100 | assert(isinstance(counts, pd.DataFrame)) 101 | # Asswert chat df and counts df have same users 102 | assert(set(chat.users) == set(counts.columns)) 103 | assert(len(chat.users) == counts.shape[1]) 104 | 105 | # Check range months 106 | assert(counts.index.max() == chat.df[COLNAMES_DF.DATE].dt.month.max()) 107 | assert(counts.index.min() == chat.df[COLNAMES_DF.DATE].dt.month.min()) 108 | 109 | 110 | def test_interventions_month_msg_length(): 111 | chat = WhatsAppChat.from_source(filename) 112 | counts = get_interventions_count(chat=chat, date_mode='month', msg_length=False) 113 | 114 | assert(isinstance(counts, pd.DataFrame)) 115 | # Asswert chat df and counts df have same users 116 | assert(set(chat.users) == set(counts.columns)) 117 | assert(len(chat.users) == counts.shape[1]) 118 | 119 | # Check range months 120 | assert(counts.index.max() == chat.df[COLNAMES_DF.DATE].dt.month.max()) 121 | assert(counts.index.min() == chat.df[COLNAMES_DF.DATE].dt.month.min()) 122 | 123 | 124 | def test_interventions_weekday(): 125 | chat = WhatsAppChat.from_source(filename) 126 | counts = get_interventions_count(chat=chat, date_mode='weekday', msg_length=False) 127 | 128 | assert(isinstance(counts, pd.DataFrame)) 129 | # Asswert chat df and counts df have same users 130 | assert(set(chat.users) == set(counts.columns)) 131 | assert(len(chat.users) == counts.shape[1]) 132 | 133 | # Check range weekdays 134 | assert(counts.index.max() == chat.df[COLNAMES_DF.DATE].dt.weekday.max()) 135 | assert(counts.index.min() == chat.df[COLNAMES_DF.DATE].dt.weekday.min()) 136 | 137 | 138 | def test_interventions_weekday_msg_length(): 139 | chat = WhatsAppChat.from_source(filename) 140 | counts = get_interventions_count(chat=chat, date_mode='weekday', msg_length=True) 141 | 142 | assert(isinstance(counts, pd.DataFrame)) 143 | # Asswert chat df and counts df have same users 144 | assert(set(chat.users) == set(counts.columns)) 145 | assert(len(chat.users) == counts.shape[1]) 146 | 147 | # Check range weekdays 148 | assert(counts.index.max() == chat.df[COLNAMES_DF.DATE].dt.weekday.max()) 149 | assert(counts.index.min() == chat.df[COLNAMES_DF.DATE].dt.weekday.min()) 150 | 151 | 152 | def test_interventions_hourweekday(): 153 | chat = WhatsAppChat.from_source(filename) 154 | counts = get_interventions_count(chat=chat, date_mode='hourweekday', msg_length=False) 155 | 156 | assert(isinstance(counts, pd.DataFrame)) 157 | # Asswert chat df and counts df have same users 158 | assert(set(chat.users) == set(counts.columns)) 159 | assert(len(chat.users) == counts.shape[1]) 160 | 161 | # Check range weekdays 162 | assert(counts.index.levels[0].max() == chat.df[COLNAMES_DF.DATE].dt.weekday.max()) 163 | assert(counts.index.levels[0].min() == chat.df[COLNAMES_DF.DATE].dt.weekday.min()) 164 | 165 | # Check range hours 166 | assert(counts.index.levels[1].max() == chat.df[COLNAMES_DF.DATE].dt.hour.max()) 167 | assert(counts.index.levels[1].min() == chat.df[COLNAMES_DF.DATE].dt.hour.min()) 168 | 169 | 170 | def test_interventions_hourweekday_msg_length(): 171 | chat = WhatsAppChat.from_source(filename) 172 | counts = get_interventions_count(chat=chat, date_mode='hourweekday', msg_length=True) 173 | 174 | assert(isinstance(counts, pd.DataFrame)) 175 | # Assert chat df and counts df have same users 176 | assert(set(chat.users) == set(counts.columns)) 177 | assert(len(chat.users) == counts.shape[1]) 178 | 179 | # Check range weekdays 180 | assert(counts.index.levels[0].max() == chat.df[COLNAMES_DF.DATE].dt.weekday.max()) 181 | assert(counts.index.levels[0].min() == chat.df[COLNAMES_DF.DATE].dt.weekday.min()) 182 | 183 | # Check range hours 184 | assert(counts.index.levels[1].max() == chat.df[COLNAMES_DF.DATE].dt.hour.max()) 185 | assert(counts.index.levels[1].min() == chat.df[COLNAMES_DF.DATE].dt.hour.min()) 186 | 187 | 188 | def test_interventions_error_1(): 189 | chat = WhatsAppChat.from_source(filename) 190 | with pytest.raises(ValueError): 191 | _ = get_interventions_count(chat=chat, date_mode='error', msg_length=False) 192 | with pytest.raises(ValueError): 193 | _ = get_interventions_count(chat=chat, date_mode='error', msg_length=True) 194 | 195 | 196 | def test_interventions_error_2(): 197 | with pytest.raises(ValueError): 198 | _ = get_interventions_count(date_mode='hour', msg_length=False) 199 | 200 | 201 | def test_interventions_date_cumsum(): 202 | chat = WhatsAppChat.from_source(filename) 203 | counts = get_interventions_count(chat=chat, date_mode='date', msg_length=False, cumulative=True) 204 | 205 | assert(isinstance(counts, pd.DataFrame)) 206 | # Asswert chat df and counts df have same users 207 | assert(set(chat.users) == set(counts.columns)) 208 | assert(len(chat.users) == counts.shape[1]) 209 | 210 | # Assert chat df and counts df have same date window 211 | assert(chat.df[COLNAMES_DF.DATE].max().date() == counts.index.max().date()) 212 | assert(chat.df[COLNAMES_DF.DATE].min().date() == counts.index.min().date()) 213 | 214 | assert(isinstance(counts, pd.DataFrame)) 215 | # Asswert chat df and counts df have same users 216 | assert(set(chat.users) == set(counts.columns)) 217 | assert(len(chat.users) == counts.shape[1]) 218 | 219 | # Assert chat df and counts df have same date window 220 | assert(chat.df[COLNAMES_DF.DATE].max().date() == counts.index.max().date()) 221 | assert(chat.df[COLNAMES_DF.DATE].min().date() == counts.index.min().date()) 222 | -------------------------------------------------------------------------------- /tests/analysis/test_responses.py: -------------------------------------------------------------------------------- 1 | import math 2 | import pytest 3 | from whatstk.whatsapp.objects import WhatsAppChat 4 | from whatstk.analysis.responses import get_response_matrix 5 | from whatstk.utils.utils import _map_hformat_filename 6 | 7 | hformat = "[%d.%m.%y_%I:%M:%S_%p]_%name:" 8 | filename = f"./tests/chats/hformats/{_map_hformat_filename(hformat)}.txt" 9 | 10 | 11 | def test_get_response_matrix_1(): 12 | chat = WhatsAppChat.from_source(filename) 13 | df_resp = get_response_matrix(chat=chat, zero_own=True) 14 | 15 | # Check shape and colnames of returned dataframe 16 | n_users = len(chat.users) 17 | assert(df_resp.shape == (n_users, n_users)) 18 | assert(set(chat.users) == set(df_resp.columns)) 19 | 20 | # Check diagonal of returned dataframe is zero 21 | assert(all([df_resp.loc[user, user] == 0 for user in df_resp.columns])) 22 | 23 | 24 | def test_get_response_matrix_2(): 25 | chat = WhatsAppChat.from_source(filename) 26 | df_resp = get_response_matrix(chat=chat, zero_own=False) 27 | 28 | # Check shape and colnames of returned dataframe 29 | n_users = len(chat.users) 30 | assert(df_resp.shape == (n_users, n_users)) 31 | assert(set(chat.users) == set(df_resp.columns)) 32 | 33 | 34 | def test_get_response_matrix_3(): 35 | chat = WhatsAppChat.from_source(filename) 36 | df_resp = get_response_matrix(chat=chat, norm='joint') 37 | 38 | # Check shape and colnames of returned dataframe 39 | n_users = len(chat.users) 40 | assert(df_resp.shape == (n_users, n_users)) 41 | assert(set(chat.users) == set(df_resp.columns)) 42 | 43 | # Check scaling has been done correct 44 | assert(math.isclose(df_resp.sum().sum(), 1)) 45 | 46 | 47 | def test_get_response_matrix_4(): 48 | chat = WhatsAppChat.from_source(filename) 49 | df_resp = get_response_matrix(chat=chat, norm='sender') 50 | 51 | # Check shape and colnames of returned dataframe 52 | n_users = len(chat.users) 53 | assert(df_resp.shape == (n_users, n_users)) 54 | assert(set(chat.users) == set(df_resp.columns)) 55 | 56 | # Check scaling has been done correct 57 | assert(all([math.isclose(x, 1) for x in df_resp.sum(axis=1)])) 58 | 59 | 60 | def test_get_response_matrix_5(): 61 | chat = WhatsAppChat.from_source(filename) 62 | df_resp = get_response_matrix(chat=chat, norm='receiver') 63 | 64 | # Check shape and colnames of returned dataframe 65 | n_users = len(chat.users) 66 | assert(df_resp.shape == (n_users, n_users)) 67 | assert(set(chat.users) == set(df_resp.columns)) 68 | 69 | # Check scaling has been done correct 70 | assert(all([math.isclose(x, 1) for x in df_resp.sum(axis=0)])) 71 | 72 | 73 | def test_get_response_matrix_error(): 74 | chat = WhatsAppChat.from_source(filename) 75 | with pytest.raises(ValueError): 76 | _ = get_response_matrix(chat=chat, norm='error') 77 | -------------------------------------------------------------------------------- /tests/graph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/tests/graph/__init__.py -------------------------------------------------------------------------------- /tests/graph/test_figures.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import plotly.graph_objs as go 3 | 4 | from whatstk.graph.base import FigureBuilder 5 | from whatstk.whatsapp.objects import WhatsAppChat 6 | from whatstk.utils.utils import _map_hformat_filename 7 | 8 | 9 | hformat = "[%d.%m.%y_%I:%M:%S_%p]_%name:" 10 | filename = f"./tests/chats/hformats/{_map_hformat_filename(hformat)}.txt" 11 | 12 | 13 | def load_chat_as_df(): 14 | return WhatsAppChat.from_source(filename).df 15 | 16 | 17 | def load_chat(): 18 | return WhatsAppChat.from_source(filename) 19 | 20 | 21 | def test_init(): 22 | df = load_chat_as_df() 23 | _ = FigureBuilder(df=df) 24 | chat = load_chat() 25 | _ = FigureBuilder(chat=chat) 26 | with pytest.raises(ValueError): 27 | _ = FigureBuilder() 28 | 29 | 30 | def test_init_mapping_dict_1(): 31 | df = load_chat_as_df() 32 | fb = FigureBuilder(df=df) 33 | mapping = fb.user_color_mapping 34 | assert(isinstance(fb.user_color_mapping, dict)) 35 | assert(len(mapping) == df['username'].nunique()) 36 | 37 | 38 | def test_init_mapping_dict_2(): 39 | df = load_chat_as_df() 40 | fb = FigureBuilder(df=df) 41 | value = {'a': 'b'} 42 | fb.user_color_mapping = value 43 | assert(fb.user_color_mapping == value) 44 | 45 | 46 | def test_user_msg_length_boxplot(): 47 | df = load_chat_as_df() 48 | fb = FigureBuilder(df=df) 49 | fig = fb.user_msg_length_boxplot() 50 | assert isinstance(fig, go.Figure) 51 | assert ('data' in fig and 'layout' in fig) 52 | 53 | 54 | def test_user_interventions_count_linechart(): 55 | df = load_chat_as_df() 56 | fb = FigureBuilder(df=df) 57 | fig = fb.user_interventions_count_linechart() 58 | assert isinstance(fig, go.Figure) 59 | assert ('data' in fig and 'layout' in fig) 60 | 61 | def test_user_interventions_count_linechart_2(): 62 | df = load_chat_as_df() 63 | fb = FigureBuilder(df=df) 64 | fig = fb.user_interventions_count_linechart(all_users=True) 65 | assert isinstance(fig, go.Figure) 66 | assert ('data' in fig and 'layout' in fig) 67 | 68 | 69 | def test_user_message_responses_flow(): 70 | df = load_chat_as_df() 71 | fb = FigureBuilder(df=df) 72 | fig = fb.user_message_responses_flow() 73 | assert isinstance(fig, go.Figure) 74 | assert ('data' in fig and 'layout' in fig) 75 | 76 | 77 | def test_user_message_responses_heatmap(): 78 | df = load_chat_as_df() 79 | fb = FigureBuilder(df=df) 80 | fig = fb.user_message_responses_heatmap() 81 | assert isinstance(fig, go.Figure) 82 | assert ('data' in fig and 'layout' in fig) 83 | -------------------------------------------------------------------------------- /tests/test_chat.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import pandas as pd 3 | import pytest 4 | 5 | from whatstk.whatsapp.objects import WhatsAppChat 6 | from whatstk._chat import BaseChat 7 | from whatstk.utils.utils import _map_hformat_filename 8 | from whatstk.utils.utils import COLNAMES_DF 9 | 10 | hformat = "[%d.%m.%y_%I:%M:%S_%p]_%name:" 11 | filepath = f"./tests/chats/hformats/{_map_hformat_filename(hformat)}.txt" 12 | 13 | 14 | def test_properties(): 15 | chat = WhatsAppChat.from_source(filepath) 16 | 17 | assert(isinstance(chat.start_date, datetime)) 18 | assert(isinstance(chat.end_date, datetime)) 19 | 20 | def test_from_source(): 21 | with pytest.raises(NotImplementedError): 22 | _ = BaseChat.from_source(filepath=filepath) 23 | 24 | 25 | def test_from_source_2(): 26 | chat = WhatsAppChat.from_source(filepath) 27 | df = chat.df 28 | 29 | # Fake system column 30 | data = { 31 | COLNAMES_DF.DATE: ["2020-11-21 03:02:06"], 32 | COLNAMES_DF.USERNAME: ["chat_name"], 33 | COLNAMES_DF.MESSAGE: ["chat was created"], 34 | COLNAMES_DF.MESSAGE_TYPE: ["system"] 35 | } 36 | df_system = pd.DataFrame(data) 37 | df[COLNAMES_DF.MESSAGE_TYPE] = "user" 38 | # Add fake row to main df 39 | df = pd.concat([df_system, df]) 40 | # Ensure type of datetime 41 | df[COLNAMES_DF.DATE] = pd.to_datetime(df[COLNAMES_DF.DATE]) 42 | 43 | chat = WhatsAppChat(df) 44 | assert isinstance(chat.start_date, datetime) 45 | assert isinstance(chat.end_date, datetime) 46 | assert isinstance(chat.df, pd.DataFrame) 47 | assert isinstance(chat.df_system, pd.DataFrame) 48 | assert chat.is_group 49 | -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- 1 | from whatstk.data import whatsapp_urls 2 | 3 | 4 | def test_urls(): 5 | url = whatsapp_urls.POKEMON 6 | assert(isinstance(url, str)) 7 | assert(url.startswith('http')) 8 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/tests/utils/__init__.py -------------------------------------------------------------------------------- /tests/utils/test_chat_merge.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | from whatstk.utils.chat_merge import _merge_two_chats 4 | from whatstk.whatsapp.parser import df_from_whatsapp 5 | 6 | 7 | chats_merge_path = './tests/chats/merge/' 8 | filename1 = os.path.join(chats_merge_path, 'file1.txt') 9 | filename2 = os.path.join(chats_merge_path, 'file2.txt') 10 | 11 | 12 | def test_merge_two_chats(): 13 | df1 = df_from_whatsapp(filename1) 14 | df2 = df_from_whatsapp(filename2) 15 | df = _merge_two_chats(df1, df2) 16 | assert(isinstance(df, pd.DataFrame)) 17 | df = _merge_two_chats(df2, df1) 18 | assert(isinstance(df, pd.DataFrame)) 19 | -------------------------------------------------------------------------------- /tests/utils/test_gdrive.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import pytest 5 | from pydrive2.files import ApiRequestError 6 | 7 | from whatstk.utils.gdrive import gdrive_init, _check_gdrive_config, _load_str_from_file_id 8 | import whatstk 9 | 10 | 11 | def test_init_1(tmpdir, mocker): 12 | # Create tmp secrets 13 | client_secret = {"field": 1} 14 | client_secret_file = tmpdir.join("client_secrets.json") 15 | with open(client_secret_file, 'w') as f: 16 | json.dump(client_secret, f) 17 | # Mock 1 18 | CONFIG_PATH = tmpdir.mkdir(".config") 19 | mocker.patch.object(whatstk.utils.gdrive, "CONFIG_PATH", CONFIG_PATH) 20 | mocker.patch.object(whatstk.utils.gdrive, "CLIENT_SECRETS_PATH", os.path.join(CONFIG_PATH, "client_secrets.json")) 21 | mocker.patch.object(whatstk.utils.gdrive, "SETTINGS_PATH", os.path.join(CONFIG_PATH, "settings.yaml")) 22 | mocker.patch.object(whatstk.utils.gdrive, "CREDENTIALS_PATH", os.path.join(CONFIG_PATH, "credentials.json")) 23 | mocker.patch("pydrive2.auth.GoogleAuth.CommandLineAuth", return_value=True) 24 | gdrive_init(client_secret_file) 25 | 26 | 27 | def test_init_2(tmpdir, mocker): 28 | # Create tmp secrets 29 | client_secret = {"field": 1} 30 | client_secret_file = tmpdir.join("client_secrets.json") 31 | with open(client_secret_file, 'w') as f: 32 | json.dump(client_secret, f) 33 | # Mock 2 34 | CONFIG_PATH = tmpdir.join(".config2") 35 | mocker.patch.object(whatstk.utils.gdrive, "CONFIG_PATH", CONFIG_PATH) 36 | mocker.patch.object(whatstk.utils.gdrive, "CLIENT_SECRETS_PATH", os.path.join(CONFIG_PATH, "client_secrets.json")) 37 | mocker.patch.object(whatstk.utils.gdrive, "SETTINGS_PATH", os.path.join(CONFIG_PATH, "settings.yaml")) 38 | mocker.patch.object(whatstk.utils.gdrive, "CREDENTIALS_PATH", os.path.join(CONFIG_PATH, "credentials.json")) 39 | mocker.patch("pydrive2.auth.GoogleAuth.CommandLineAuth", return_value=True) 40 | gdrive_init(client_secret_file) 41 | 42 | 43 | def test_check(tmpdir, mocker): 44 | with pytest.raises(ValueError): 45 | mocker.patch("os.path.isdir", return_value=False) 46 | _check_gdrive_config() 47 | with pytest.raises(ValueError): 48 | mocker.patch("os.path.isdir", return_value=True) 49 | mocker.patch("os.path.isfile", return_value=False) 50 | _check_gdrive_config() 51 | mocker.patch("os.path.isdir", return_value=True) 52 | mocker.patch("os.path.isfile", return_value=True) 53 | _check_gdrive_config() 54 | 55 | 56 | def test_load_2(mocker): 57 | mocker.patch("whatstk.utils.gdrive._check_gdrive_config", return_value=True) 58 | mocker.patch("pydrive2.auth.GoogleAuth", return_value=True) 59 | mocker.patch("pydrive2.drive.GoogleDrive", return_value=True) 60 | mocker.patch("pydrive2.drive.GoogleDrive.CreateFile", return_value=True) 61 | mocker.patch("pydrive2.files.GoogleDriveFile.FetchMetadata", return_value=True) 62 | mocker.patch("pydrive2.files.GoogleDriveFile.GetContentString", return_value="mock text") 63 | text = _load_str_from_file_id("some-id") 64 | assert isinstance(text, str) 65 | -------------------------------------------------------------------------------- /tests/utils/test_utils.py: -------------------------------------------------------------------------------- 1 | from whatstk.utils.utils import COLNAMES_DF 2 | 3 | def test_colnames(): 4 | assert COLNAMES_DF.DATE == "date" 5 | assert COLNAMES_DF.USERNAME == "username" 6 | assert COLNAMES_DF.MESSAGE == "message" 7 | assert COLNAMES_DF.MESSAGE_LENGTH == "message_length" 8 | assert COLNAMES_DF.MESSAGE_TYPE == "message_type" 9 | -------------------------------------------------------------------------------- /tests/whatsapp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasrodes/whatstk/85344f3a316c3ebed36a51a6ffc2ae651dda3a1a/tests/whatsapp/__init__.py -------------------------------------------------------------------------------- /tests/whatsapp/test_auto_header.py: -------------------------------------------------------------------------------- 1 | from whatstk.whatsapp.auto_header import extract_header_from_text, _extract_elements_template_from_lines 2 | 3 | 4 | def test_extract_header_from_text(): 5 | _ = extract_header_from_text("bla bla bla") 6 | assert _ is None 7 | 8 | 9 | def test_extract_elements_template_from_lines(): 10 | elements_list, template_list = _extract_elements_template_from_lines(["testing"]) 11 | assert elements_list == [] 12 | assert template_list == [] 13 | 14 | 15 | -------------------------------------------------------------------------------- /tests/whatsapp/test_generation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from datetime import datetime 4 | from whatstk.whatsapp.objects import WhatsAppChat 5 | from whatstk.whatsapp.generation import ChatGenerator, generate_chats_hformats 6 | 7 | 8 | USERS = ['laurent', 'anna', 'lua', 'miquel'] 9 | 10 | 11 | def test_generate_messages(): 12 | cg = ChatGenerator(size=10, users=USERS) 13 | messages = cg._generate_messages() 14 | assert(isinstance(messages, (list, np.ndarray))) 15 | assert(all([isinstance(m, str) for m in messages])) 16 | 17 | 18 | def test_generate_emojis(): 19 | cg = ChatGenerator(size=10, users=USERS) 20 | emojis = cg._generate_emojis() 21 | assert(isinstance(emojis, (list, np.ndarray))) 22 | assert(all([isinstance(e, str) for e in emojis])) 23 | 24 | 25 | def test_generate_timestamps_1(): 26 | cg = ChatGenerator(size=10, users=USERS) 27 | timestamps = cg._generate_timestamps() 28 | assert(isinstance(timestamps, (list, np.ndarray))) 29 | assert(all([isinstance(ts, datetime) for ts in timestamps])) 30 | 31 | 32 | def test_generate_timestamps_2(): 33 | cg = ChatGenerator(size=10, users=USERS) 34 | timestamps = cg._generate_timestamps(last=datetime.now()) 35 | assert(isinstance(timestamps, (list, np.ndarray))) 36 | assert(all([isinstance(ts, datetime) for ts in timestamps])) 37 | 38 | 39 | def test_generate_users(): 40 | cg = ChatGenerator(size=10, users=USERS) 41 | users = cg._generate_users() 42 | assert(isinstance(users, (list, np.ndarray))) 43 | assert(all([isinstance(u, str) for u in users])) 44 | 45 | 46 | def test_generate_df(): 47 | cg = ChatGenerator(size=10, users=USERS) 48 | df = cg._generate_df() 49 | assert(isinstance(df, pd.DataFrame)) 50 | 51 | 52 | def test_generate_1(): 53 | cg = ChatGenerator(size=10, users=USERS) 54 | chat = cg.generate() 55 | assert(isinstance(chat, WhatsAppChat)) 56 | 57 | 58 | def test_generate_2(): 59 | cg = ChatGenerator(size=10, users=USERS) 60 | chat = cg.generate(hformat='y-%m-%d, %H:%M - %name:') 61 | assert(isinstance(chat, WhatsAppChat)) 62 | 63 | 64 | def test_generate_3(tmpdir): 65 | cg = ChatGenerator(size=10, users=USERS) 66 | filepath = tmpdir.join("export.txt") 67 | chat = cg.generate(filepath=str(filepath)) 68 | assert(isinstance(chat, WhatsAppChat)) 69 | 70 | 71 | def test_generate_chats_hformats(tmpdir): 72 | output_path = tmpdir.mkdir("output") 73 | generate_chats_hformats(output_path, size=2, verbose=False) 74 | 75 | 76 | def test_generate_chats_hformats_2(tmpdir): 77 | output_path = tmpdir.mkdir("output") 78 | hformat = '%Y-%m-%d, %H:%M - %name:' 79 | generate_chats_hformats( 80 | output_path, 81 | size=2, 82 | hformats=[hformat], 83 | filepaths=['file.txt'], 84 | export_as_zip=True, 85 | verbose=False 86 | ) 87 | -------------------------------------------------------------------------------- /tests/whatsapp/test_hformat.py: -------------------------------------------------------------------------------- 1 | from whatstk.whatsapp.hformat import is_supported, is_supported_verbose, get_supported_hformats_as_list 2 | 3 | 4 | def test_is_supported_1(): 5 | hformat = '%y-%m-%d, %H:%M - %name:' 6 | support, autoh_support = is_supported(hformat) 7 | assert(isinstance(support, bool)) 8 | assert(isinstance(autoh_support, bool)) 9 | 10 | 11 | def test_is_supported_2(): 12 | hformat = '%y-%m-%d, %I:%M %p - %name:' 13 | support, autoh_support = is_supported(hformat) 14 | assert(isinstance(support, bool)) 15 | assert(isinstance(autoh_support, bool)) 16 | 17 | 18 | def test_is_supported_verbose(): 19 | hformat = '%y-%m-%d, %I:%M %p - %name:' 20 | support_msg = is_supported_verbose(hformat) 21 | assert(isinstance(support_msg, str)) 22 | 23 | 24 | def test_get_supported_hformats_as_list(): 25 | supported_headers = get_supported_hformats_as_list() 26 | assert(isinstance(supported_headers, list)) 27 | assert(all([isinstance(h, str) for h in supported_headers])) 28 | -------------------------------------------------------------------------------- /tests/whatsapp/test_objects.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | import pytest 5 | 6 | from whatstk.whatsapp.objects import WhatsAppChat 7 | from whatstk.utils.exceptions import HFormatError 8 | from whatstk.utils.utils import _map_hformat_filename 9 | 10 | 11 | hformat = "[%d.%m.%y %I:%M:%S %p] %name:" 12 | filename = f"./tests/chats/hformats/{_map_hformat_filename(hformat)}.txt" 13 | 14 | chats_merge_path = 'tests/chats/merge/' 15 | filename1 = os.path.join(chats_merge_path, 'file1.txt') 16 | filename2 = os.path.join(chats_merge_path, 'file2.txt') 17 | hformat_merge = '%y-%m-%d, %H:%M - %name:' 18 | 19 | 20 | def test_object_auto(): 21 | chat = WhatsAppChat.from_source(filename) 22 | assert(isinstance(chat.df, pd.DataFrame)) 23 | 24 | 25 | def test_object_hformat(): 26 | chat = WhatsAppChat.from_source(filename) 27 | assert(isinstance(chat.df, pd.DataFrame)) 28 | 29 | chat = WhatsAppChat.from_source(filename) 30 | assert(isinstance(chat.df, pd.DataFrame)) 31 | 32 | 33 | def test_object_error(): 34 | with pytest.raises(ValueError): 35 | _ = WhatsAppChat.from_source(filename, auto_header=False) 36 | 37 | 38 | def test_object_to_csv_1(tmpdir): 39 | chat = WhatsAppChat.from_source(filename) 40 | filename_ = tmpdir.join("export.csv") 41 | chat.to_csv(filepath=str(filename_)) 42 | 43 | 44 | def test_object_to_csv_2(tmpdir): 45 | chat = WhatsAppChat.from_source(filename) 46 | filename_ = tmpdir.join("export") 47 | with pytest.raises(ValueError): 48 | chat.to_csv(filepath=str(filename_)) 49 | 50 | 51 | def test_object_to_txt(tmpdir): 52 | chat = WhatsAppChat.from_source(filename) 53 | filename_ = tmpdir.join("export") 54 | with pytest.raises(ValueError): 55 | chat.to_txt(filepath=str(filename_)) 56 | 57 | 58 | def test_object_to_zip(tmpdir): 59 | chat = WhatsAppChat.from_source(filename) 60 | filename_ = tmpdir.join("export") 61 | print(filename_) 62 | with pytest.raises(ValueError): 63 | chat.to_zip(filepath=str(filename_)) 64 | 65 | 66 | def test_object_from_source_error(tmpdir): 67 | with pytest.raises((HFormatError, KeyError)): 68 | _ = WhatsAppChat.from_source(filename, hformat="%y%name") 69 | 70 | 71 | def test_object_from_sources(tmpdir): 72 | chat = WhatsAppChat.from_sources([filename1, filename2]) 73 | assert(isinstance(chat.df, pd.DataFrame)) 74 | chat = WhatsAppChat.from_sources([filename2, filename1]) 75 | assert(isinstance(chat.df, pd.DataFrame)) 76 | chat = WhatsAppChat.from_sources([filename2, filename1], auto_header=True) 77 | assert(isinstance(chat.df, pd.DataFrame)) 78 | hformat = [hformat_merge, hformat_merge] 79 | chat = WhatsAppChat.from_sources([filename2, filename1], auto_header=False, hformat=hformat) 80 | assert(isinstance(chat.df, pd.DataFrame)) 81 | 82 | 83 | def test_merge(): 84 | chat1 = WhatsAppChat.from_source(filename1) 85 | chat2 = WhatsAppChat.from_source(filename2) 86 | chat = chat1.merge(chat2) 87 | assert(isinstance(chat.df, pd.DataFrame)) 88 | chat = chat1.merge(chat2, rename_users={'J': ['John']}) 89 | assert(isinstance(chat.df, pd.DataFrame)) 90 | 91 | 92 | def test_rename_users(): 93 | chat = WhatsAppChat.from_source(filename) 94 | chat = chat.rename_users(mapping={'J': ['John']}) 95 | assert(isinstance(chat.df, pd.DataFrame)) 96 | 97 | 98 | def test_rename_users_error(): 99 | chat = WhatsAppChat.from_source(filename) 100 | with pytest.raises(ValueError): 101 | chat = chat.rename_users(mapping={'J': 'John'}) 102 | 103 | 104 | def test_len(): 105 | chat = WhatsAppChat.from_source(filename) 106 | assert(isinstance(len(chat), int)) 107 | -------------------------------------------------------------------------------- /tests/whatsapp/test_parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import pytest 4 | from whatstk.whatsapp.parser import df_from_whatsapp 5 | from whatstk.whatsapp.hformat import get_supported_hformats_as_dict 6 | from whatstk.utils.exceptions import HFormatError 7 | from whatstk.utils.utils import COLNAMES_DF, _map_hformat_filename 8 | 9 | 10 | # Generate chats 11 | output_folder = "./tests/chats/hformats" 12 | # generate_chats_hformats(output_folder, 500, verbose=True) 13 | filenames = [os.path.join(output_folder, f) for f in os.listdir(output_folder) if f.endswith('.txt')] 14 | # Chats for multiple txt loading 15 | chats_merge_path = 'tests/chats/merge/' 16 | filename1 = os.path.join(chats_merge_path, 'file1.txt') 17 | filename2 = os.path.join(chats_merge_path, 'file2.txt') 18 | # TODO: Message type chats 19 | chats_merge_path = 'chats/whatsapp/pokemon.txt' 20 | file_type_1 = os.path.abspath(chats_merge_path) 21 | 22 | # Chat hosted on repo 23 | # filepath_url = "http://raw.githubusercontent.com/lucasrodes/whatstk/master/chats/example.txt" 24 | filepath_url = "http://raw.githubusercontent.com/lucasrodes/whatstk/master/chats/whatsapp/pokemon.txt" 25 | 26 | 27 | def test_df_from_whatsapp(): 28 | """This test checks most of the logic of the library. 29 | 30 | - Generates tests in all formats to be supported (according to JSON) 31 | - Loads them using manual and auto_header approaches (checks they are equivalent). 32 | - Checks that all chats (from different hformats) are equivalent. 33 | 34 | """ 35 | info_dix = get_supported_hformats_as_dict() 36 | all_chats = [] 37 | hformats = [] 38 | for elem in info_dix: 39 | chats = [] 40 | hformat = elem['format'] 41 | auto_header = bool(elem['auto_header']) 42 | filename_base = _map_hformat_filename(hformat) 43 | filename = os.path.join(output_folder, '{}.txt'.format(filename_base)) 44 | filename_zip = os.path.join(output_folder, '{}.zip'.format(filename_base)) 45 | 46 | # Auto 47 | if auto_header: 48 | chat = df_from_whatsapp(filename) 49 | chats.append(chat) 50 | # Manual 51 | chat = df_from_whatsapp(filename, hformat=hformat) 52 | chats.append(chat) 53 | 54 | # ZIP 55 | # Auto 56 | if auto_header: 57 | chat_zip = df_from_whatsapp(filename_zip) 58 | assert chat_zip.equals(chat) 59 | # Manual 60 | chat_zip = df_from_whatsapp(filename_zip, hformat=hformat) 61 | assert chat_zip.equals(chat) 62 | 63 | # Check manual and auto chats are equal 64 | assert(chats[0].equals(chats[1])) # TODO: Assumes there are always two elements in list chats! 65 | 66 | all_chats.append(chat) 67 | hformats.append(hformat) 68 | 69 | records = [] 70 | for i in range(len(all_chats)): 71 | record = {'chat': i} 72 | for j in range(i, len(all_chats)): 73 | if (all_chats[i][COLNAMES_DF.DATE].dt.second.nunique() == 1) & (all_chats[j][COLNAMES_DF.DATE].dt.second.nunique() != 1): 74 | all_chats[j][COLNAMES_DF.DATE] = all_chats[j][COLNAMES_DF.DATE].map(lambda x: x.replace(second=0)) 75 | elif (all_chats[j][COLNAMES_DF.DATE].dt.second.nunique() == 1) & (all_chats[i][COLNAMES_DF.DATE].dt.second.nunique() != 1): 76 | all_chats[i][COLNAMES_DF.DATE] = all_chats[i][COLNAMES_DF.DATE].map(lambda x: x.replace(second=0)) 77 | record[j] = all_chats[i].equals(all_chats[j]) 78 | records.append(record) 79 | df = pd.DataFrame.from_records(records, index="chat") 80 | assert((df == False).sum().sum() == 0) 81 | 82 | 83 | def test_df_from_whatsapp_2(): 84 | with pytest.raises(HFormatError): 85 | _ = df_from_whatsapp(filename1, hformat='%y') 86 | 87 | 88 | def test_df_from_whatsapp_3(): 89 | with pytest.raises(ValueError): 90 | _ = df_from_whatsapp(filename1, auto_header=False) 91 | 92 | 93 | def test_df_from_whatsapp_url(): 94 | df = df_from_whatsapp(filepath_url) 95 | assert(isinstance(df, pd.DataFrame)) 96 | 97 | 98 | def test_df_from_whatsapp_gdrive(mocker): 99 | gdrive_url = "gdrive://456456456-ewgwegegw" 100 | with open(filename1, "r", encoding='utf8') as f: 101 | mock_text = f.read() 102 | # mocker.patch('whatstk.utils.gdrive._load_str_from_file_id', return_value="bla bla") 103 | mocker.patch("pydrive2.files.GoogleDriveFile.FetchMetadata", return_value=True) 104 | mocker.patch("pydrive2.files.GoogleDriveFile.GetContentString", return_value=mock_text) 105 | mocker.patch("whatstk.utils.gdrive._check_gdrive_config", return_value=None) 106 | df = df_from_whatsapp(gdrive_url) 107 | assert(isinstance(df, pd.DataFrame)) 108 | 109 | 110 | def test_df_from_whatsapp_error(): 111 | with pytest.raises(FileNotFoundError): 112 | _ = df_from_whatsapp('grger') 113 | 114 | 115 | def test_df_message_type_true(): 116 | df = df_from_whatsapp(file_type_1, message_type=True) 117 | assert(isinstance(df, pd.DataFrame)) 118 | 119 | # Check group name 120 | group_name = "Pokemon Chat" 121 | assert set(df.loc[df["username"] == group_name, COLNAMES_DF.MESSAGE_TYPE]) == {"system"} 122 | -------------------------------------------------------------------------------- /whatstk/__init__.py: -------------------------------------------------------------------------------- 1 | """Python wrapper and analysis tools for WhatsApp chats. 2 | 3 | This library provides a powerful wrapper for multiple Languages and OS. In addition, analytics tools are provided. 4 | 5 | """ 6 | 7 | 8 | from whatstk.whatsapp.objects import WhatsAppChat 9 | from whatstk.graph import FigureBuilder 10 | from whatstk.whatsapp.parser import df_from_txt_whatsapp, df_from_whatsapp 11 | 12 | 13 | name = "whatstk" 14 | 15 | __version__ = "0.7.1" 16 | 17 | __all__ = [ 18 | "WhatsAppChat", 19 | "df_from_txt_whatsapp", 20 | "df_from_whatsapp", 21 | "FigureBuilder", 22 | ] 23 | -------------------------------------------------------------------------------- /whatstk/_chat.py: -------------------------------------------------------------------------------- 1 | """Library objects.""" 2 | 3 | 4 | from copy import deepcopy 5 | import pandas as pd 6 | from typing import Optional, List, Union, Dict, Any, Tuple 7 | from datetime import datetime 8 | 9 | from whatstk.utils.chat_merge import merge_chats 10 | from whatstk.utils.utils import COLNAMES_DF 11 | 12 | 13 | class BaseChat: 14 | """Base chat object. 15 | 16 | Attributes: 17 | df: Chat as pandas.DataFrame. 18 | 19 | .. seealso:: 20 | 21 | * :func:`WhatsAppChat ` 22 | 23 | """ 24 | 25 | def __init__(self, df: pd.DataFrame, platform: Optional[str] = None) -> None: 26 | """Constructor. 27 | 28 | Args: 29 | df (pandas.DataFrame): Chat. 30 | platform (str): Name of the platform, e.g. 'whatsapp'. 31 | 32 | """ 33 | self._df_raw = df 34 | self._df, self._df_system, self._name = self._build_dfs(df.copy()) 35 | self._platform = platform 36 | 37 | def _build_dfs(self, df_raw: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame, str]: 38 | if (COLNAMES_DF.MESSAGE_TYPE in df_raw.columns) and self.is_group: 39 | mask = df_raw[COLNAMES_DF.MESSAGE_TYPE] == "system" 40 | # Get chat only with user messages 41 | df = df_raw.loc[~mask].drop(columns=COLNAMES_DF.MESSAGE_TYPE) 42 | # Get chat only with system messages 43 | df_system = df_raw.loc[mask].drop(columns=COLNAMES_DF.MESSAGE_TYPE) 44 | # Get system messages dataframe 45 | if len(set(df_system[COLNAMES_DF.USERNAME])) != 1: 46 | raise ValueError("System messages dataframe must contain only one username.") 47 | chat_name = df_system[COLNAMES_DF.USERNAME].iloc[0] 48 | # Drop 'username' from system dataframe 49 | df_system = df_system.drop(columns=COLNAMES_DF.USERNAME) 50 | return df, df_system, chat_name 51 | if (COLNAMES_DF.MESSAGE_TYPE in df_raw.columns) and not self.is_group: 52 | df_raw = df_raw.drop(columns=COLNAMES_DF.MESSAGE_TYPE) 53 | return df_raw, pd.DataFrame(), "" 54 | 55 | @property 56 | def df(self) -> pd.DataFrame: 57 | """Chat as DataFrame. 58 | 59 | Returns: 60 | pandas.DataFrame 61 | """ 62 | return self._df 63 | 64 | @property 65 | def df_system(self) -> pd.DataFrame: 66 | """Chat as DataFrame. 67 | 68 | Returns: 69 | pandas.DataFrame 70 | """ 71 | return self._df_system 72 | 73 | @property 74 | def is_group(self) -> bool: 75 | """True if the chart is a group. 76 | 77 | A chat is detected as a group if it has more than 2 users (including the 'system'). 78 | Groups with one person will not be detected as groups. 79 | 80 | Returns: 81 | bool 82 | """ 83 | if len(set(self._df_raw[COLNAMES_DF.USERNAME])) > 2: 84 | return True 85 | return False 86 | 87 | @property 88 | def users(self) -> List[str]: 89 | """List with users. 90 | 91 | Returns: 92 | list 93 | """ 94 | return sorted(list(self.df[COLNAMES_DF.USERNAME].unique())) 95 | 96 | @property 97 | def name(self) -> Optional[str]: 98 | """Name of the chat. 99 | 100 | Returns None if no name could be found. The name is extracted from the username of with 101 | the first system message in the chat. 102 | 103 | Returns: 104 | list 105 | """ 106 | return self._name 107 | 108 | @property 109 | def start_date(self) -> Union[str, datetime]: 110 | """Chat starting date. 111 | 112 | Returns: 113 | datetime 114 | 115 | """ 116 | return self._df_raw[COLNAMES_DF.DATE].min() 117 | 118 | @property 119 | def end_date(self) -> Union[str, datetime]: 120 | """Chat end date. 121 | 122 | Returns: 123 | datetime 124 | 125 | """ 126 | return self._df_raw[COLNAMES_DF.DATE].max() 127 | 128 | @classmethod 129 | def from_source(cls, **kwargs: Any) -> None: # noqa: ANN401 130 | """Load chat. 131 | 132 | Args: 133 | kwargs: Specific to the child class. 134 | 135 | Raises: 136 | NotImplementedError: Must be implemented in children. 137 | 138 | .. seealso:: 139 | 140 | * :func:`WhatsAppChat.from_source ` 141 | 142 | """ 143 | raise NotImplementedError 144 | 145 | def merge(self, chat: "BaseChat", rename_users: Optional[Dict[str, str]] = None) -> "BaseChat": 146 | """Merge current instance with ``chat``. 147 | 148 | Args: 149 | chat (WhatsAppChat): Another chat. 150 | rename_users (dict): Dictionary mapping old names to new names. Example: {'John':['Jon', 'J'], 'Ray': 151 | ['Raymond']} will map 'Jon' and 'J' to 'John', and 'Raymond' to 'Ray'. Note that old 152 | names must come as list (even if there is only one). 153 | 154 | Returns: 155 | BaseChat: Merged chat. 156 | 157 | .. seealso:: 158 | 159 | * :func:`rename_users ` 160 | * :func:`merge_chats ` 161 | 162 | Example: 163 | Merging two chats can become handy when you have exported a chat in different times with your phone and 164 | hence each exported file might contain data that is unique to that file. 165 | 166 | In this example however, we merge files from different chats. 167 | 168 | .. code-block:: python 169 | 170 | >>> from whatstk.whatsapp.objects import WhatsAppChat 171 | >>> from whatstk.data import whatsapp_urls 172 | >>> filepath_1 = whatsapp_urls.LOREM1 173 | >>> filepath_2 = whatsapp_urls.LOREM2 174 | >>> chat_1 = WhatsAppChat.from_source(filepath=filepath_1) 175 | >>> chat_2 = WhatsAppChat.from_source(filepath=filepath_2) 176 | >>> chat = chat_1.merge(chat_2) 177 | 178 | """ 179 | # Can only merge from same platform 180 | if self._platform != chat._platform: 181 | raise ValueError("Both chats must come from the same platform.") 182 | # Merge 183 | self_ = deepcopy(self) 184 | self_._df_raw = merge_chats([self._df_raw, chat._df_raw]) 185 | self_._df = merge_chats([self.df, chat.df]) 186 | if (not self.df_system.empty) and (not chat.df_system.empty): 187 | self_._df_system = merge_chats([self.df_system, chat.df_system]) 188 | if rename_users: 189 | self_ = self_.rename_users(mapping=rename_users) 190 | return self_ 191 | 192 | def rename_users(self, mapping: Dict[str, str]) -> "BaseChat": 193 | """Rename users. 194 | 195 | This might be needed in multiple occations: 196 | 197 | - Change typos in user names stored in phone. 198 | - If a user appears multiple times with different usernames, group these under the same name (this might 199 | happen when multiple chats are merged). 200 | 201 | Args: 202 | mapping (dict): Dictionary mapping old names to new names, example: 203 | {'John': ['Jon', 'J'], 'Ray': ['Raymond']} will map 'Jon' and 'J' to 'John', and 'Raymond' 204 | to 'Ray'. Note that old names must come as list (even if there is only one). 205 | 206 | Returns: 207 | pandas.DataFrame: DataFrame with users renamed according to `mapping`. 208 | 209 | Raises: 210 | ValueError: Raised if mapping is not correct. 211 | 212 | Examples: 213 | Load LOREM2 chat and rename users `Maria` and `Maria2` to `Mary`. 214 | 215 | .. code-block:: python 216 | 217 | >>> from whatstk.whatsapp.objects import WhatsAppChat 218 | >>> from whatstk.data import whatsapp_urls 219 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.LOREM2) 220 | >>> chat.users 221 | ['+1 123 456 789', 'Giuseppe', 'John', 'Maria', 'Maria2'] 222 | >>> chat = chat.rename_users(mapping={'Mary': ['Maria', 'Maria2']}) 223 | >>> chat.users 224 | ['+1 123 456 789', 'Giuseppe', 'John', 'Mary'] 225 | 226 | """ 227 | self_ = deepcopy(self) 228 | for new_name, old_names in mapping.items(): 229 | if not isinstance(old_names, list): 230 | raise ValueError("Old names must come as a list of str (even if there is only one).") 231 | for old_name in old_names: 232 | self_.df[COLNAMES_DF.USERNAME][self_.df[COLNAMES_DF.USERNAME] == old_name] = new_name 233 | return self_ 234 | 235 | def to_csv(self, filepath: str) -> None: 236 | """Save chat as csv. 237 | 238 | Args: 239 | filepath (str): Name of file. 240 | 241 | """ 242 | if not filepath.endswith(".csv"): 243 | raise ValueError("filepath must end with .csv") 244 | self.df.to_csv(filepath, index=False) 245 | 246 | def __len__(self) -> int: 247 | """Number of messages. 248 | 249 | Returns: 250 | int: Instance length, defined as number of samples. 251 | 252 | """ 253 | return len(self.df) 254 | -------------------------------------------------------------------------------- /whatstk/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | """Analysis tools.""" 2 | 3 | 4 | from whatstk.analysis.interventions import get_interventions_count 5 | from whatstk.analysis.responses import get_response_matrix 6 | 7 | 8 | __all__ = ["get_interventions_count", "get_response_matrix"] 9 | -------------------------------------------------------------------------------- /whatstk/analysis/interventions.py: -------------------------------------------------------------------------------- 1 | """Base analysis tools.""" 2 | 3 | from typing import TYPE_CHECKING, List 4 | 5 | import pandas as pd 6 | from whatstk.utils.utils import COLNAMES_DF, _get_df 7 | 8 | if TYPE_CHECKING: # pragma: no cover 9 | from whatstk._chat import BaseChat # pragma: no cover 10 | 11 | 12 | def get_interventions_count( 13 | df: pd.DataFrame = None, 14 | chat: "BaseChat" = None, 15 | date_mode: str = "date", 16 | msg_length: bool = False, 17 | cumulative: bool = False, 18 | all_users: bool = False, 19 | ) -> pd.DataFrame: 20 | """Get number of interventions per user per unit of time. 21 | 22 | The unit of time can be chosen by means of argument ``date_mode``. 23 | 24 | **Note**: Either ``df`` or ``chat`` must be provided. 25 | 26 | Args: 27 | df (pandas.DataFrame, optional): Chat data. Atribute `df` of a chat loaded using Chat. If a value is given, 28 | ``chat`` is ignored. 29 | chat (Chat, optional): Chat data. Object obtained when chat loaded using Chat. Required if ``df`` is None. 30 | date_mode (str, optional): Choose mode to group interventions by. 31 | Defaults to ``date_mode=date``. Available modes are: 32 | 33 | - ``'date'``: Grouped by particular date (year, month and day). 34 | - ``'hour'``: Grouped by day hours (24 hours). 35 | - ``'month'``: Grouped by months (12 months). 36 | - ``'weekday'``: Grouped by weekday (i.e. monday, tuesday, ..., sunday). 37 | - ``'hourweekday'``: Grouped by weekday and hour. 38 | msg_length (bool, optional): Set to True to count the number of characters instead of number of messages sent. 39 | cumulative (bool, optional): Set to True to obtain commulative counts. 40 | all_users (bool, optional): Obtain number of interventions of all users combined. Defaults to False. 41 | 42 | Returns: 43 | pandas.DataFrame: DataFrame with shape *NxU*, where *N*: number of time-slots and *U*: number of users. 44 | 45 | Raises: 46 | ValueError: if ``date_mode`` value is not supported. 47 | 48 | Example: 49 | Get number of interventions per user from `POKEMON chat 50 | `_. The counts are 51 | represented as a `NxU` matrix, where `N`: number of time-slots and `U`: number of users. 52 | 53 | .. code-block:: python 54 | 55 | >>> from whatstk import WhatsAppChat 56 | >>> from whatstk.analysis import get_interventions_count 57 | >>> from whatstk.data import whatsapp_urls 58 | >>> filepath = whatsapp_urls.POKEMON 59 | >>> chat = WhatsAppChat.from_source(filepath) 60 | >>> counts = get_interventions_count(chat=chat, date_mode='date', msg_length=False) 61 | >>> counts.head(5) 62 | username Ash Ketchum Brock Jessie & James ... Prof. Oak Raichu Wobbuffet 63 | date ... 64 | 2016-08-06 2 2 0 ... 0 0 0 65 | 2016-08-07 1 1 0 ... 1 0 0 66 | 2016-08-10 1 0 1 ... 0 2 0 67 | 2016-08-11 0 0 0 ... 0 0 0 68 | 2016-09-11 0 0 0 ... 0 0 0 69 | 70 | [5 rows x 8 columns] 71 | 72 | """ 73 | df = _get_df(df=df, chat=chat) 74 | 75 | if date_mode == "date": 76 | n_interventions = _interventions(df, [df[COLNAMES_DF.DATE].dt.date], msg_length) 77 | n_interventions.index = pd.to_datetime(n_interventions.index) 78 | # print(n_interventions.shape) 79 | elif date_mode == "hour": 80 | n_interventions = _interventions(df, [df[COLNAMES_DF.DATE].dt.hour], msg_length) 81 | elif date_mode == "weekday": 82 | n_interventions = _interventions(df, [df[COLNAMES_DF.DATE].dt.weekday], msg_length) 83 | elif date_mode == "hourweekday": 84 | n_interventions = _interventions( 85 | df, [df[COLNAMES_DF.DATE].dt.weekday, df[COLNAMES_DF.DATE].dt.hour], msg_length 86 | ) 87 | elif date_mode == "month": 88 | n_interventions = _interventions(df, [df[COLNAMES_DF.DATE].dt.month], msg_length) 89 | else: 90 | raise ValueError( 91 | "Mode {} is not implemented. Valid modes are 'date', 'hour', 'weekday', " 92 | "'hourweekday' and 'month'.".format(date_mode) 93 | ) 94 | 95 | if date_mode == "hourweekday": 96 | n_interventions.index = n_interventions.index.set_names(["weekday", "hour"]) 97 | else: 98 | n_interventions.index.name = date_mode 99 | n_interventions.columns = n_interventions.columns.get_level_values(COLNAMES_DF.USERNAME) 100 | 101 | if all_users: 102 | n_interventions = pd.DataFrame(n_interventions.sum(axis=1), columns=["interventions count"]) 103 | if cumulative: 104 | n_interventions = n_interventions.cumsum() 105 | 106 | return n_interventions 107 | 108 | 109 | def _interventions(df: pd.DataFrame, series_tf: List[pd.DataFrame], msg_length: bool) -> pd.DataFrame: 110 | """Get number of interventions per date per user. 111 | 112 | Args: 113 | df (pandas.DataFrame): Chat as DataFrame. 114 | series_tf (list): List of pandas series with the date transformations applied, so we can group by, e.g., month. 115 | msg_length (bool, optional): Set to True to count the number of characters instead of number of messages sent. 116 | 117 | Returns: 118 | pandas.DataFrame: Table with interventions per day per user. 119 | 120 | """ 121 | if msg_length: 122 | counts_ = df.copy() 123 | counts_[COLNAMES_DF.MESSAGE_LENGTH] = counts_[COLNAMES_DF.MESSAGE].apply(lambda x: len(x)) 124 | counts = counts_.groupby(by=series_tf + [COLNAMES_DF.USERNAME]).agg( 125 | {COLNAMES_DF.MESSAGE_LENGTH: lambda x: x.sum()} 126 | ) 127 | else: 128 | counts = df.groupby(by=series_tf + [COLNAMES_DF.USERNAME]).agg({"message": "count"}) 129 | counts = counts.unstack(fill_value=0) 130 | 131 | return counts 132 | -------------------------------------------------------------------------------- /whatstk/analysis/responses.py: -------------------------------------------------------------------------------- 1 | """Get infor regarding responses between users.""" 2 | 3 | 4 | from collections import namedtuple 5 | from typing import TYPE_CHECKING, Optional 6 | 7 | import pandas as pd 8 | from whatstk.whatsapp.objects import WhatsAppChat 9 | from whatstk.utils.utils import _get_df, COLNAMES_DF 10 | 11 | if TYPE_CHECKING: # pragma: no cover 12 | from whatstk._chat import BaseChat # pragma: no cover 13 | 14 | Norms = namedtuple("Norms", ["ABSOLUTE", "JOINT", "SENDER", "RECEIVER"]) 15 | NORMS = Norms(ABSOLUTE="absolute", JOINT="joint", SENDER="sender", RECEIVER="receiver") 16 | 17 | 18 | def get_response_matrix( 19 | df: Optional[pd.DataFrame] = None, 20 | chat: Optional["BaseChat"] = None, 21 | zero_own: bool = True, 22 | norm: str = NORMS.ABSOLUTE, 23 | ) -> pd.DataFrame: 24 | """Get response matrix for given chat. 25 | 26 | Obtains a DataFrame of shape `[n_users, n_users]` counting the number of responses between members. Responses can 27 | be counted in different ways, e.g. using absolute values or normalised values. Responses are counted based solely 28 | on consecutive messages. That is, if :math:`user_i` sends a message right after :math:`user_j`, it will be counted 29 | as a response from :math:`user_i` to :math:`user_j`. 30 | 31 | Axis 0 lists senders and axis 1 lists receivers. That is, the value in cell (i, j) denotes the number of times 32 | :math:`user_i` responded to a message from :math:`user_j`. 33 | 34 | **Note**: Either ``df`` or ``chat`` must be provided. 35 | 36 | Args: 37 | df (pandas.DataFrame, optional): Chat data. Atribute `df` of a chat loaded using Chat. If a value is given, 38 | ``chat`` is ignored. 39 | chat (Chat, optional): Chat data. Object obtained when chat loaded using Chat. Required if ``df`` is None. 40 | zero_own (bool, optional): Set to True to avoid counting own responses. Defaults to True. 41 | norm (str, optional): Specifies the type of normalization used for reponse count. Can be: 42 | 43 | - ``'absolute'``: Absolute count of messages. 44 | - ``'joint'``: Normalized by total number of messages sent by all users. 45 | - ``'sender'``: Normalized per sender by total number of messages sent by user. 46 | - ``'receiver'``: Normalized per receiver by total number of messages sent by user. 47 | 48 | Returns: 49 | pandas.DataFrame: Response matrix. 50 | 51 | Example: 52 | Get absolute count on responses (consecutive messages) between users. 53 | 54 | .. code-block:: python 55 | 56 | >>> from whatstk import WhatsAppChat 57 | >>> from whatstk.analysis import get_response_matrix 58 | >>> from whatstk.data import whatsapp_urls 59 | >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.POKEMON) 60 | >>> responses = get_response_matrix(chat=chat) 61 | >>> responses 62 | Ash Ketchum Brock ... Raichu Wobbuffet 63 | Ash Ketchum 0 0 ... 1 0 64 | Brock 1 0 ... 0 0 65 | Jessie & James 0 1 ... 0 0 66 | Meowth 0 0 ... 0 0 67 | Misty 2 1 ... 1 0 68 | Prof. Oak 0 1 ... 0 0 69 | Raichu 1 0 ... 0 0 70 | Wobbuffet 0 0 ... 0 0 71 | 72 | """ 73 | # Get chat df and users 74 | df = _get_df(df=df, chat=chat) 75 | users = WhatsAppChat(df).users 76 | # Get list of username transitions and initialize dicitonary with counts 77 | user_transitions = df[COLNAMES_DF.USERNAME].tolist() 78 | responses = {user: dict(zip(users, [0] * len(users))) for user in users} 79 | # Fill count dictionary 80 | for i in range(1, len(user_transitions)): 81 | sender = user_transitions[i] 82 | receiver = user_transitions[i - 1] 83 | if zero_own and (sender != receiver): 84 | responses[sender][receiver] += 1 85 | elif not zero_own: 86 | responses[sender][receiver] += 1 87 | responses = pd.DataFrame.from_dict(responses, orient="index") 88 | 89 | # Normalize 90 | if norm not in [NORMS.ABSOLUTE, NORMS.JOINT, NORMS.RECEIVER, NORMS.SENDER]: 91 | raise ValueError("norm not valid. See NORMS variable in whatstk.analysis.resposes") 92 | else: 93 | if norm == NORMS.JOINT: 94 | responses /= responses.sum().sum() 95 | elif norm == NORMS.RECEIVER: 96 | responses /= responses.sum(axis=0) 97 | elif norm == NORMS.SENDER: 98 | responses = responses.divide(responses.sum(axis=1), axis=0) 99 | return responses 100 | -------------------------------------------------------------------------------- /whatstk/data.py: -------------------------------------------------------------------------------- 1 | """Load sample chats. 2 | 3 | Tthis module contains the links to currently online-available chats. For more details, please refer 4 | to the source code. 5 | 6 | """ 7 | # pip install --upgrade certifi 8 | 9 | 10 | import os 11 | from collections import namedtuple 12 | 13 | 14 | Urls = namedtuple("Urls", ["POKEMON", "LOREM", "LOREM1", "LOREM2", "LOREM_2000"]) 15 | 16 | branch = "develop" 17 | chats_folder = f"http://raw.githubusercontent.com/lucasrodes/whatstk/{branch}/chats" # noqa: E231 18 | 19 | whatsapp_urls = Urls( 20 | POKEMON=os.path.join(chats_folder, "whatsapp", "pokemon.txt"), 21 | LOREM=os.path.join(chats_folder, "whatsapp", "lorem.txt"), 22 | LOREM1=os.path.join(chats_folder, "whatsapp", "lorem-merge-part1.txt"), 23 | LOREM2=os.path.join(chats_folder, "whatsapp", "lorem-merge-part2.txt"), 24 | LOREM_2000=os.path.join(chats_folder, "whatsapp", "lorem-2000.txt"), 25 | ) 26 | -------------------------------------------------------------------------------- /whatstk/graph/__init__.py: -------------------------------------------------------------------------------- 1 | """Plot tools using plotly. 2 | 3 | Import :func:`plot ` to plot figures. 4 | 5 | .. code-block:: python 6 | >>> from whatstk.graph import plot 7 | 8 | """ 9 | 10 | 11 | from plotly.offline import plot 12 | import plotly.io as pio 13 | from whatstk.graph.base import FigureBuilder 14 | 15 | 16 | pio.templates.default = "plotly_white" 17 | 18 | 19 | __all__ = ["plot", "FigureBuilder"] 20 | -------------------------------------------------------------------------------- /whatstk/graph/figures/__init__.py: -------------------------------------------------------------------------------- 1 | """Build Plotly compatible Figures.""" 2 | -------------------------------------------------------------------------------- /whatstk/graph/figures/boxplot.py: -------------------------------------------------------------------------------- 1 | """Boxplot figures.""" 2 | 3 | from typing import Dict, Optional 4 | 5 | import plotly.graph_objs as go 6 | import pandas as pd 7 | 8 | from whatstk.utils.utils import COLNAMES_DF 9 | 10 | 11 | def fig_boxplot_msglen( 12 | df: pd.DataFrame, username_to_color: Dict[str, str] = None, title: str = "", xlabel: Optional[str] = None 13 | ) -> go.Figure: 14 | """Visualize boxplot. 15 | 16 | Args: 17 | df (pandas.DataFrame): Chat data. 18 | username_to_color (dict, optional). Dictionary mapping username to color. Defaults to None. 19 | title (str, optional): Title for plot. Defaults to "". 20 | xlabel (str, optional): x-axis label title. Defaults to None. 21 | 22 | Returns: 23 | plotly.graph_objs.Figure 24 | 25 | """ 26 | df = df.copy() 27 | # Get message lengths 28 | df[COLNAMES_DF.MESSAGE_LENGTH] = df[COLNAMES_DF.MESSAGE].apply(lambda x: len(x)) 29 | # Sort users by median 30 | user_stats = ( 31 | df.groupby(COLNAMES_DF.USERNAME) 32 | .aggregate({COLNAMES_DF.MESSAGE_LENGTH: "median"})[COLNAMES_DF.MESSAGE_LENGTH] 33 | .sort_values(ascending=False) 34 | ) 35 | 36 | # Create a list of traces 37 | data = [] 38 | 39 | for username in user_stats.index: 40 | x = df[df[COLNAMES_DF.USERNAME] == username][COLNAMES_DF.MESSAGE_LENGTH] 41 | trace = go.Box( 42 | y=x.values, 43 | showlegend=True, 44 | name=username, 45 | boxpoints="outliers", 46 | marker_color=username_to_color[username] if username_to_color else None, 47 | ) 48 | data.append(trace) 49 | 50 | layout = dict(title=title, xaxis=dict(title=xlabel)) 51 | 52 | fig = go.Figure(data=data, layout=layout) 53 | 54 | return fig 55 | -------------------------------------------------------------------------------- /whatstk/graph/figures/heatmap.py: -------------------------------------------------------------------------------- 1 | """Heatmap plot figures.""" 2 | 3 | 4 | import plotly.graph_objs as go 5 | import pandas as pd 6 | 7 | 8 | def fig_heatmap(df_matrix: pd.DataFrame, title: str = "") -> go.Figure: 9 | """Generate heatmap figure from NxN matrix. 10 | 11 | Args: 12 | df_matrix (pandas.DataFrame): Matrix as DataFrame. Index values and column values must be equal. 13 | title (str): Title of plot. Defaults to "". 14 | 15 | Returns: 16 | plotly.graph_objs.Figure 17 | 18 | """ 19 | trace = go.Heatmap( 20 | z=df_matrix, 21 | x=df_matrix.columns, 22 | y=df_matrix.index, 23 | hovertemplate="%{y} ---> %{x}%{z}", 24 | colorscale="Greens", 25 | ) 26 | data = [trace] 27 | layout = {"title": {"text": title}, "xaxis": {"title": "Receiver"}, "yaxis": {"title": "Sender"}} 28 | 29 | fig = go.Figure(data=data, layout=layout) 30 | return fig 31 | -------------------------------------------------------------------------------- /whatstk/graph/figures/sankey.py: -------------------------------------------------------------------------------- 1 | """Sankey plot figures.""" 2 | 3 | from typing import List 4 | 5 | import plotly.graph_objs as go 6 | 7 | 8 | def fig_sankey( 9 | label: List[str], color: List[str], source: List[str], target: List[str], value: List[int], title: str = "" 10 | ) -> go.Figure: 11 | """Generate sankey image. 12 | 13 | Args: 14 | label (list): List with node labels. 15 | color (list): List with node colors. 16 | source (list): List with link source id. 17 | target (list): List with linke target id. 18 | value (list): List with link value. 19 | title (str, optional): Title. Defaults to "". 20 | 21 | Returns: 22 | plotly.graph_objs.Figure 23 | 24 | """ 25 | trace = go.Sankey( 26 | arrangement="fixed", 27 | orientation="v", 28 | valueformat=".0f", 29 | node=dict( 30 | pad=20, 31 | thickness=40, 32 | line=dict(color="black", width=0), 33 | label=label, 34 | color=color, 35 | hovertemplate="%{label}
Number of messages: %{value}", 36 | # x=x, 37 | # y=y 38 | ), 39 | link=dict( 40 | source=source, 41 | target=target, 42 | value=value, 43 | hovertemplate="%{source.label} ---> %{target.label}%{value}", 44 | ), 45 | ) 46 | data = [trace] 47 | 48 | layout = { 49 | "title": dict(text=title), 50 | "annotations": [ 51 | { 52 | "text": "Senders", 53 | "font": { 54 | "size": 13, 55 | "color": "rgb(116, 101, 130)", 56 | }, 57 | "showarrow": False, 58 | "align": "center", 59 | "x": 0.5, 60 | "y": 1.1, 61 | "xref": "paper", 62 | "yref": "paper", 63 | }, 64 | { 65 | "text": "Receivers", 66 | "font": { 67 | "size": 13, 68 | "color": "rgb(116, 101, 130)", 69 | }, 70 | "showarrow": False, 71 | "align": "center", 72 | "x": 0.5, 73 | "y": -0.1, 74 | "xref": "paper", 75 | "yref": "paper", 76 | }, 77 | ], 78 | } 79 | 80 | fig = go.Figure(data=data, layout=layout) 81 | 82 | return fig 83 | -------------------------------------------------------------------------------- /whatstk/graph/figures/scatter.py: -------------------------------------------------------------------------------- 1 | """Scatter plot figures.""" 2 | 3 | from typing import Optional, Dict 4 | 5 | import pandas as pd 6 | import plotly.graph_objs as go 7 | 8 | 9 | def fig_scatter_time( 10 | user_data: pd.DataFrame, 11 | username_to_color: Optional[Dict[str, str]] = None, 12 | title: str = "", 13 | xlabel: Optional[str] = None, 14 | ) -> go.Figure: 15 | """Obtain Figure to plot using plotly. 16 | 17 | ``user_data`` must be a pandas.DataFrame with timestamps as index and a column for each user. You can easily 18 | generate suitable ``user_data`` using the function 19 | :func:`get_interventions_count ` (disclaimer: not compatible with 20 | ``date_mode='hourweekday'``). 21 | 22 | Args: 23 | user_data (pandas.DataFrame): Input data. Shape nrows x ncols, where nrows = number of timestaps and 24 | ncols = number of users. 25 | username_to_color (dict, optional). Dictionary mapping username to color. Defaults to None. 26 | title (str, optional): Title of figure. Defaults to "". 27 | xlabel (str, optional): x-axis label title. Defaults to None. 28 | 29 | Returns: 30 | plotly.graph_objs.Figure 31 | 32 | .. seealso:: 33 | 34 | * :func:`get_interventions_count ` 35 | 36 | """ 37 | # Create a trace 38 | data = [] 39 | 40 | for username in user_data: 41 | trace = go.Scatter( 42 | x=user_data.index, 43 | y=user_data[username], 44 | showlegend=True, 45 | name=username, 46 | text=user_data.index, 47 | line=dict(color=username_to_color[username]) if username_to_color is not None else None, 48 | ) 49 | data.append(trace) 50 | 51 | layout = dict(title=title, xaxis=dict(title=xlabel)) 52 | 53 | fig = go.Figure(data=data, layout=layout) 54 | 55 | return fig 56 | -------------------------------------------------------------------------------- /whatstk/graph/figures/utils.py: -------------------------------------------------------------------------------- 1 | """Utils for library plots.""" 2 | 3 | 4 | import seaborn as sns 5 | from typing import List 6 | 7 | 8 | def hex_color_palette(n_colors: int) -> List[str]: 9 | """Get palette of `n_colors` color hexadecimal codes. 10 | 11 | Args: 12 | n_colors (int): Size of the color palette. 13 | 14 | """ 15 | palette = "hls" # ref: https://seaborn.pydata.org/tutorial/color_palettes.html 16 | rgb = sns.color_palette(palette=palette, n_colors=n_colors) 17 | color_codes = ["#" + "".join("%02X" % int(round(i * 255)) for i in r) for r in rgb] 18 | return color_codes 19 | -------------------------------------------------------------------------------- /whatstk/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | """Library generic scripts.""" 2 | -------------------------------------------------------------------------------- /whatstk/scripts/generate_chats.py: -------------------------------------------------------------------------------- 1 | """Generate chats in all hformats with `size` number of messages and export them to a given `output_path`.""" 2 | 3 | 4 | import argparse 5 | from datetime import datetime 6 | from whatstk.whatsapp.generation import generate_chats_hformats 7 | 8 | 9 | def _parse_args() -> None: 10 | parser = argparse.ArgumentParser( 11 | "Generate chat. Make sure to install the library with required extension: pip install whatstk[generate] " 12 | "--upgrade" 13 | ) 14 | parser.add_argument( 15 | "-o", "--output-path", type=str, required=True, help=("Path where to store generated chats. Must exist.") 16 | ) 17 | parser.add_argument("--filenames", default=None, nargs="+", help="Filenames. Must be equal length of --hformats.") 18 | parser.add_argument( 19 | "-s", "--size", type=int, default=500, help="Number of messages to create per chat. Defaults to 500." 20 | ) 21 | parser.add_argument( 22 | "-f", 23 | "--hformats", 24 | default=None, 25 | nargs="+", 26 | help="Header format. If None, defaults to all supported hformats. List formats as 'format 1' 'format 2' ...", 27 | ) 28 | parser.add_argument( 29 | "--last-timestamp", 30 | type=lambda s: datetime.strptime(s, "%Y-%m-%d"), 31 | default=None, 32 | help="Timestamp of last message. Format YYYY-mm-dd", 33 | ) 34 | parser.add_argument( 35 | "-z", 36 | "--export-as-zip", 37 | default=False, 38 | action="store_true", 39 | help="Export chat as ZIP (additionally)", 40 | ) 41 | parser.add_argument("-v", "--verbose", action="store_true", help="Verbosity.") 42 | args = parser.parse_args() 43 | return args 44 | 45 | 46 | def main() -> None: 47 | """Main script.""" 48 | args = _parse_args() 49 | generate_chats_hformats( 50 | output_path=args.output_path, 51 | size=args.size, 52 | hformats=args.hformats, 53 | last_timestamp=args.last_timestamp, 54 | filepaths=args.filenames, 55 | export_as_zip=args.export_as_zip 56 | ) 57 | -------------------------------------------------------------------------------- /whatstk/scripts/graph.py: -------------------------------------------------------------------------------- 1 | """Generate multiple graphics for your chat using plotly.""" 2 | 3 | 4 | import argparse 5 | from whatstk.whatsapp.objects import WhatsAppChat 6 | from whatstk.graph import plot, FigureBuilder 7 | 8 | 9 | def _parse_args() -> None: 10 | parser = argparse.ArgumentParser( 11 | description="Visualise a WhatsApp chat. For advance settings, see package library" "documentation" 12 | ) 13 | parser.add_argument("input_filename", type=str, default=None, help="Input txt file.") 14 | parser.add_argument( 15 | "-o", 16 | "--output_filename", 17 | type=str, 18 | default="output.html", 19 | help="Graph generated can be stored as an HTML" " file. Defaults to 'output.html'.", 20 | ) 21 | parser.add_argument( 22 | "-t", 23 | "--type", 24 | type=str, 25 | default="interventions_count", 26 | choices=["interventions_count", "msg_length"], 27 | help="Type of graph. Defualts to 'interventions_count'.", 28 | ) 29 | parser.add_argument( 30 | "-id", 31 | "--icount-date-mode", 32 | type=str, 33 | default="date", 34 | choices=["date", "hour", "weekday", "month"], 35 | help="Select date mode. Only valid for --type=interventions_count. Defaults to 'date'.", 36 | ) 37 | parser.add_argument( 38 | "-ic", 39 | "--icount-cumulative", 40 | action="store_true", 41 | help="Show values in a cumulative fashion. Only valid for --type=interventions_count.", 42 | ) 43 | parser.add_argument( 44 | "-il", 45 | "--icount-msg-length", 46 | action="store_true", 47 | help="Count an intervention with its number of characters. Otherwise an intervention is count as one." 48 | "Only valid for --type=interventions_count.", 49 | ) 50 | parser.add_argument( 51 | "-f", 52 | "--hformat", 53 | type=str, 54 | default=None, 55 | help="By default, auto-header detection is" 56 | "attempted. If does not work, you can specify it manually using this argument.", 57 | ) 58 | args = parser.parse_args() 59 | return args 60 | 61 | 62 | def main() -> None: 63 | """Main script.""" 64 | args = _parse_args() 65 | chat = WhatsAppChat.from_source(filepath=args.input_filename, hformat=args.hformat) 66 | 67 | if args.type == "interventions_count": 68 | fig = FigureBuilder(chat=chat).user_interventions_count_linechart( 69 | date_mode=args.icount_date_mode, msg_length=False, cumulative=args.icount_cumulative 70 | ) 71 | elif args.type == "msg_length": 72 | fig = FigureBuilder(chat=chat).user_msg_length_boxplot() 73 | plot(fig, filename=args.output_filename) 74 | -------------------------------------------------------------------------------- /whatstk/scripts/txt_to_csv.py: -------------------------------------------------------------------------------- 1 | """Generate chats in all hformats with `size` number of messages and export them to a given `output_path`.""" 2 | 3 | 4 | import argparse 5 | from whatstk.whatsapp.objects import WhatsAppChat 6 | 7 | 8 | def _parse_args() -> None: 9 | parser = argparse.ArgumentParser(description="Convert a Whatsapp chat from csv to txt.") 10 | parser.add_argument("input_filename", type=str, help="Input txt file.") 11 | parser.add_argument("output_filename", type=str, help="Name of output csv file.") 12 | parser.add_argument( 13 | "-f", 14 | "--hformat", 15 | type=str, 16 | default=None, 17 | help="By default, auto-header detection is" 18 | "attempted. If does not work, you can specify it manually using this argument.", 19 | ) 20 | args = parser.parse_args() 21 | return args 22 | 23 | 24 | def main() -> None: 25 | """Main script.""" 26 | args = _parse_args() 27 | chat = WhatsAppChat.from_source(filepath=args.input_filename, hformat=args.hformat) 28 | chat.to_csv(args.output_filename) 29 | -------------------------------------------------------------------------------- /whatstk/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Library generic utils.""" 2 | -------------------------------------------------------------------------------- /whatstk/utils/chat_merge.py: -------------------------------------------------------------------------------- 1 | """Merging chats.""" 2 | 3 | from typing import List 4 | 5 | import pandas as pd 6 | from whatstk.utils.utils import COLNAMES_DF 7 | 8 | 9 | def _merge_two_chats(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: 10 | if df1[COLNAMES_DF.DATE].min() <= df2[COLNAMES_DF.DATE].min(): 11 | df = pd.concat([df1, df2[df2[COLNAMES_DF.DATE] > df1[COLNAMES_DF.DATE].max()]]) 12 | else: 13 | df = pd.concat([df2, df1[df1[COLNAMES_DF.DATE] > df2[COLNAMES_DF.DATE].max()]]) 14 | return df 15 | 16 | 17 | def merge_chats(dfs: List[pd.DataFrame]) -> pd.DataFrame: 18 | """Merge several chats into a single one. 19 | 20 | Can come in handy when you have old exports and new ones, and both have relevant data. 21 | 22 | **Note:** The dataframes must have an index with the timestamps of the messages, as this is required to correctly 23 | sort and merge the chats. 24 | 25 | Args: 26 | dfs (List[pandas.DataFrame]): List with the chats as DataFrames. 27 | 28 | Returns: 29 | pandas.DataFrame: Merged chat. 30 | 31 | """ 32 | # Sort from oldest 33 | dfs = sorted(dfs, key=lambda x: x.index.min()) 34 | # Merge 35 | df = dfs[0] 36 | for i in range(1, len(dfs)): 37 | df = _merge_two_chats(df, dfs[i]) 38 | return df 39 | -------------------------------------------------------------------------------- /whatstk/utils/exceptions.py: -------------------------------------------------------------------------------- 1 | """Library exceptions.""" 2 | 3 | 4 | class RegexError(Exception): 5 | """Raised when regex match is not possible.""" 6 | 7 | pass 8 | 9 | 10 | class HFormatError(Exception): 11 | """Raised when hformat could not be found.""" 12 | 13 | pass 14 | -------------------------------------------------------------------------------- /whatstk/utils/gdrive.py: -------------------------------------------------------------------------------- 1 | """Google Drive utils. 2 | 3 | .. warning:: 4 | 5 | To load chats from google drive, install the library with the corresponding extension (ignore the 6 | ``--upgrade`` option if you haven't installed the library): 7 | 8 | .. code-block:: 9 | 10 | pip install whatstk[gdrive] --upgrade 11 | """ 12 | 13 | 14 | from shutil import copyfile 15 | import os 16 | 17 | try: 18 | from pydrive2.auth import GoogleAuth 19 | from pydrive2.drive import GoogleDrive 20 | from pydrive2.files import ApiRequestError 21 | import yaml 22 | except ImportError as e: # pragma: no cover 23 | msg = ( # pragma: no cover 24 | "whatstk Google Drive requirements are not installed.\n\n" 25 | "Please pip install as follows:\n\n" 26 | ' python -m pip install "whatstk[gdrive]" --upgrade # or python -m pip install' 27 | ) # pragma: no cover 28 | raise ImportError(msg) from e # pragma: no cover 29 | 30 | 31 | # Create .config/whatstk/gdrive if it does not exist 32 | CONFIG_PATH = os.path.join(os.path.expanduser("~"), ".config", "whatstk", "gdrive") 33 | CLIENT_SECRETS_PATH = os.path.join(CONFIG_PATH, "client_secrets.json") 34 | SETTINGS_PATH = os.path.join(CONFIG_PATH, "settings.yaml") 35 | CREDENTIALS_PATH = os.path.join(CONFIG_PATH, "credentials.json") 36 | 37 | 38 | def gdrive_init(client_secret_file: str, encoding: str = "utf8") -> None: 39 | """Initialize GDrive credentials. 40 | 41 | This should only run once before reading a file from Google Drive the first time. Subsequent executions should run 42 | seamlessly. 43 | 44 | To obtain `client_secret_file`, follow the instructions from: 45 | https://medium.com/analytics-vidhya/how-to-connect-google-drive-to-python-using-pydrive-9681b2a14f20 46 | 47 | Notes: 48 | - Additionally, make sure to add yourself in Test users, as noted in: 49 | https://stackoverflow.com/questions/65980758/pydrive-quickstart-and-error-403-access-denied 50 | - Select Desktop App instead of Web Application as the application type. 51 | 52 | Args: 53 | client_secret_file (str): Path to clien_secret.json file (Created in Google Console). 54 | encoding (str): Encoding to use for UTF when reading/writing (ex. ‘utf-8’). 55 | `List of Python standard encodings 56 | `_. 57 | """ 58 | if not os.path.isdir(CONFIG_PATH): 59 | os.makedirs(CONFIG_PATH, exist_ok=True) 60 | 61 | # Copy credentials to config folder 62 | copyfile(client_secret_file, CLIENT_SECRETS_PATH) 63 | 64 | # Create settings.yaml file 65 | dix = { 66 | "client_config_backend": "file", 67 | "client_config_file": CLIENT_SECRETS_PATH, 68 | "save_credentials": True, 69 | "save_credentials_backend": "file", 70 | "save_credentials_file": CREDENTIALS_PATH, 71 | "get_refresh_token": True, 72 | "oauth_scope": [ 73 | "https://www.googleapis.com/auth/drive", 74 | "https://www.googleapis.com/auth/drive.install", 75 | ], 76 | } 77 | with open(SETTINGS_PATH, "w", encoding=encoding) as f: 78 | yaml.dump(dix, f) 79 | 80 | # credentials.json 81 | gauth = GoogleAuth(settings_file=SETTINGS_PATH) 82 | gauth.CommandLineAuth() 83 | 84 | 85 | def _check_gdrive_config() -> None: 86 | error_msg = ( 87 | "Google Drive not correctly configured. Run `gdrive_init(client_secret_file)` (from whatstk.utils.gdrive)." 88 | ) 89 | if not os.path.isdir(CONFIG_PATH): 90 | raise ValueError(error_msg) 91 | for f in [CLIENT_SECRETS_PATH, SETTINGS_PATH]: 92 | if not os.path.isfile(f): 93 | raise ValueError(error_msg) 94 | 95 | 96 | def _load_str_from_file_id(file_id: int) -> str: 97 | _check_gdrive_config() 98 | gauth = GoogleAuth(settings_file=SETTINGS_PATH) 99 | drive = GoogleDrive(gauth) 100 | # Load file using id 101 | try: 102 | file_obj = drive.CreateFile({"id": file_id}) 103 | file_obj.FetchMetadata() 104 | except ApiRequestError: 105 | raise ValueError( 106 | f"File ID {file_id} not valid. Please use a valid File ID. You can find it in the shareable file link." 107 | ) 108 | # Get raw file content as str 109 | txt = file_obj.GetContentString() 110 | return txt 111 | -------------------------------------------------------------------------------- /whatstk/utils/utils.py: -------------------------------------------------------------------------------- 1 | """Utils.""" 2 | 3 | from typing import TYPE_CHECKING 4 | import pandas as pd 5 | 6 | if TYPE_CHECKING: # pragma: no cover 7 | from whatstk._chat import BaseChat # pragma: no cover 8 | 9 | 10 | class ColnamesDf: 11 | """Access class constants using variable ``whatstk.utils.utils.COLNAMES_DF``. 12 | 13 | Example: 14 | Access constant ``COLNAMES_DF.DATE``: 15 | 16 | .. code-block:: python 17 | 18 | >>> from whatstk.utils.utils import COLNAMES_DF 19 | >>> COLNAMES_DF.DATE 20 | 'date' 21 | 22 | """ 23 | 24 | DATE = "date" 25 | """Date column""" 26 | 27 | USERNAME = "username" 28 | """Username column""" 29 | 30 | MESSAGE = "message" 31 | """Message column""" 32 | 33 | MESSAGE_LENGTH = "message_length" 34 | """Message length column""" 35 | 36 | MESSAGE_TYPE = "message_type" 37 | """Message type column""" 38 | 39 | 40 | COLNAMES_DF = ColnamesDf() 41 | 42 | 43 | def _get_df(df: pd.DataFrame, chat: "BaseChat") -> pd.DataFrame: 44 | if (df is None) & (chat is None): 45 | raise ValueError("Please provide a chat, using either argument `df` or `chat`.") 46 | if (df is None) and (chat is not None): 47 | df = chat.df 48 | return df 49 | 50 | 51 | def _map_hformat_filename(filename: str) -> str: 52 | """Map hformat to valid filename (Linux, MacOS, Win). 53 | 54 | Args: 55 | filename (str): Header format. 56 | 57 | Returns: 58 | str: Mapped header format. 59 | """ 60 | filename = filename.replace(" ", "_").replace("/", "--").replace(":", ";") 61 | return filename 62 | -------------------------------------------------------------------------------- /whatstk/whatsapp/__init__.py: -------------------------------------------------------------------------------- 1 | """WhatsApp parser.""" 2 | -------------------------------------------------------------------------------- /whatstk/whatsapp/assets/__init__.py: -------------------------------------------------------------------------------- 1 | """Static assets.""" 2 | -------------------------------------------------------------------------------- /whatstk/whatsapp/auto_header.py: -------------------------------------------------------------------------------- 1 | """Detect header from chat.""" 2 | 3 | 4 | import logging 5 | import re 6 | from typing import List, Tuple, Optional 7 | import pandas as pd 8 | 9 | from whatstk.utils.exceptions import RegexError 10 | 11 | 12 | separators = {".", ",", "-", "/", ":", "[", "]"} 13 | 14 | 15 | def extract_header_from_text(text: str, encoding: str = "utf-8") -> Optional[str]: 16 | """Extract header from text. 17 | 18 | Args: 19 | text (str): Loaded chat as string (whole text). 20 | encoding (str): Encoding to use for UTF when reading/writing (ex. ‘utf-8’). 21 | `List of Python standard encodings 22 | `_. 23 | 24 | Returns: 25 | str: Format extracted. None if no header was extracted. 26 | 27 | Example: 28 | Load a chat using two text files. In this example, we use sample chats (available online, see urls in 29 | source code :mod:`whatstk.data `). 30 | 31 | .. code-block:: python 32 | 33 | >>> from whatstk.whatsapp.parser import extract_header_from_text 34 | >>> from urllib.request import urlopen 35 | >>> from whatstk.data import whatsapp_urls 36 | >>> filepath_1 = whatsapp_urls.POKEMON 37 | >>> with urlopen(filepath_1) as f: 38 | ... text = f.read().decode('utf-8') 39 | >>> extract_header_from_text(text) 40 | '%d.%m.%y, %H:%M - %name: 41 | """ 42 | # Split lines 43 | lines = text.split("\n") 44 | 45 | # Get format auto 46 | try: 47 | hformat = _extract_header_format_from_lines(lines) 48 | logging.info("Format found was %s", hformat) 49 | return hformat 50 | except Exception as err: # noqa 51 | logging.info("Format not found.") 52 | return None 53 | 54 | 55 | def _extract_header_format_from_lines(lines: List[str]) -> str: 56 | """Extract header from list of lines. 57 | 58 | Args: 59 | lines (list): List of str, each element is a line of the loaded chat. 60 | 61 | Returns: 62 | str: Format of the header. 63 | 64 | """ 65 | # Obtain header format from list of lines 66 | elements_list, template_list = _extract_elements_template_from_lines(lines) 67 | return _extract_header_format_from_components(elements_list, template_list) 68 | 69 | 70 | def _extract_elements_template_from_lines(lines: List[str]) -> Tuple[List[List[int]], List[str]]: 71 | """Get elements_list and template_list from lines. 72 | 73 | Args: 74 | lines (list): List with messages. 75 | 76 | Returns: 77 | tuple: elements_list (list), template_list (list) 78 | 79 | """ 80 | # Obtain header format from list of lines 81 | elements_list = [] 82 | template_list = [] 83 | for line in lines: 84 | header = _extract_possible_header_from_line(line) 85 | if header: 86 | try: 87 | elements, template = _extract_header_parts(header) 88 | except RegexError: 89 | continue 90 | elements_list.append(elements) 91 | template_list.append(template) 92 | return elements_list, template_list 93 | 94 | 95 | def _extract_possible_header_from_line(line: str) -> str: 96 | """Given a `line` extract possible header. Uses ':' as separator. 97 | 98 | Args: 99 | line (str): Line containing header and message body. 100 | 101 | Returns: 102 | str: Possible header. 103 | 104 | """ 105 | # Extract possible header from line 106 | line_split = line.split(": ") 107 | if len(line_split) >= 2: 108 | # possible header 109 | header = line_split[0] 110 | if not header.isprintable(): 111 | print(""" 112 | There is some unprintable character in the header. 113 | Please report this in https://github.com/lucasrodes/whatstk. 114 | """) 115 | if header[-1] != ":": 116 | header += ":" 117 | return header 118 | return None 119 | 120 | 121 | def _extract_header_parts(header: str) -> Tuple[List[int], str]: 122 | """Extract all parts from header (i.e. date elements and name). 123 | 124 | Args: 125 | header (str): Header. 126 | 127 | Returns: 128 | tuple: Contains two elements, (i) list with components and (ii) string template which specifies the formatting 129 | of the components. 130 | 131 | """ 132 | 133 | def _get_last_idx_digit(v: str, i: int) -> int: 134 | if i + 1 < len(v): 135 | if v[i + 1].isdigit(): 136 | return _get_last_idx_digit(v, i + 1) 137 | return i 138 | 139 | # def get_last_idx_alpha(v, i): 140 | # if i+1 < len(v): 141 | # if v[i+1].isalpha(): 142 | # return get_last_idx_alpha(v, i+1) 143 | # elif i+2 < len(v): 144 | # if v[i+1].isspace() and v[i+2].isalpha(): 145 | # return get_last_idx_alpha(v, i+2) 146 | # return i 147 | 148 | hformat_elements = [] 149 | hformat_template = "" 150 | i = 0 151 | while i < len(header): 152 | if header[i].isdigit(): 153 | j = _get_last_idx_digit(header, i) 154 | hformat_elements.append(int(header[i: j + 1])) 155 | hformat_template += "{}" 156 | i = j 157 | else: 158 | if header[i] in ["[", "]"]: 159 | hformat_template += "\\" + header[i] 160 | else: 161 | hformat_template += header[i] 162 | i += 1 163 | items = re.findall(r"[-|\]]\s[^:]*:", hformat_template) 164 | if len(items) != 1: 165 | raise RegexError( 166 | "Username match was not possible. Check that header (%s) is of format '... - %name:' or '[...] %name:'", 167 | hformat_template, 168 | ) 169 | hformat_template = hformat_template.replace(items[0][2:-1], "%name") 170 | code = " %p" 171 | hformat_template = ( 172 | hformat_template.replace(" PM", code) 173 | .replace(" AM", code) 174 | .replace(" A.M.", code) 175 | .replace(" P.M.", code) 176 | .replace(" am", code) 177 | .replace(" pm", code) 178 | .replace(" a.m.", code) 179 | .replace(" p.m.", code) 180 | ) 181 | return hformat_elements, hformat_template 182 | 183 | 184 | def _extract_header_format_from_components(elements_list: List[List[int]], template_list: List[int]) -> str: 185 | """Extract header format from list containing elements and list containing templates. 186 | 187 | Args: 188 | elements_list (list): List with component list. 189 | template_list (list): List with template strings. 190 | 191 | Returns: 192 | str: Header format. 193 | 194 | """ 195 | # Remove outliers 196 | elements_list_ = [] 197 | template_list_ = [] 198 | lengths = [len(e) for e in elements_list] 199 | types = ["".join([str(type(ee).__name__) for ee in e]) for e in elements_list] 200 | len_mode = max(set(lengths), key=lengths.count) 201 | type_mode = max(set(types), key=types.count) 202 | for e, t in zip(elements_list, template_list): 203 | if (len(e) == len_mode) and ("".join([str(type(ee).__name__) for ee in e]) == type_mode): 204 | elements_list_.append(e) 205 | template_list_.append(t) 206 | # Get positions 207 | df = pd.DataFrame(elements_list_) 208 | # dates_df = df.select_dtypes(int) 209 | dates_df = df.select_dtypes("number") 210 | template = template_list[0] 211 | 212 | if "%p" in template: 213 | hour_code = "%I" 214 | else: 215 | hour_code = "%H" 216 | 217 | # day 218 | day_pos = ((dates_df.max() > 27) & (dates_df.max() < 32)).idxmax() 219 | dates_df = dates_df.drop(columns=[day_pos]) 220 | # year 221 | # year_pos = dates_df.std().idxmin() 222 | pos = [0, 1, 2] 223 | pos.remove(day_pos) 224 | year_pos = dates_df[pos].max().idxmax() # Only consider positions 0,1,2 225 | dates_df = dates_df.drop(columns=[year_pos]) 226 | # Month 227 | month_pos = dates_df.columns.min() 228 | dates_df = dates_df.drop(columns=[month_pos]) 229 | # Hour 230 | hour_pos = 3 231 | dates_df = dates_df.drop(columns=[hour_pos]) 232 | # Minute 233 | minutes_pos = 4 234 | dates_df = dates_df.drop(columns=[minutes_pos]) 235 | # Dictionary with positions and date element code 236 | dates_pos = {day_pos: "%d", year_pos: "%y", month_pos: "%m", hour_pos: hour_code, minutes_pos: "%M"} 237 | # Seconds 238 | if dates_df.shape[1] > 0: 239 | seconds_pos = 5 240 | dates_pos[seconds_pos] = "%S" 241 | 242 | keys_ordered = sorted(dates_pos.keys()) 243 | dates_codes = [dates_pos[k] for k in keys_ordered] 244 | 245 | codes = dates_codes + ["%name"] 246 | # print(codes) 247 | # print(template) 248 | # print(template) 249 | # print(codes) 250 | code_template = template.format(*codes) 251 | # print(code_template) 252 | # print('---------------') 253 | # print(code_template) 254 | return code_template 255 | -------------------------------------------------------------------------------- /whatstk/whatsapp/generation.py: -------------------------------------------------------------------------------- 1 | """Automatic generation of chat using Lorem Ipsum text and time series statistics.""" 2 | 3 | 4 | import os 5 | from datetime import datetime, timedelta 6 | import itertools 7 | from typing import Optional, List 8 | 9 | import numpy as np 10 | import pandas as pd 11 | from emoji.unicode_codes import EMOJI_DATA 12 | from scipy.stats import lomax 13 | 14 | from whatstk.whatsapp.objects import WhatsAppChat 15 | from whatstk.whatsapp.hformat import get_supported_hformats_as_list 16 | from whatstk.utils.utils import COLNAMES_DF, _map_hformat_filename 17 | 18 | 19 | try: 20 | from lorem import sentence 21 | except ImportError as e: 22 | msg = ( 23 | "whatstk ChatGenerator requirements are not installed.\n\n" 24 | "Please pip install as follows:\n\n" 25 | ' python -m pip install "whatstk[generate]" --upgrade # or python -m pip install' 26 | ) 27 | raise ImportError(msg) from e 28 | 29 | 30 | USERS = ["John", "Mary", "Giuseppe", "+1 123 456 789"] 31 | 32 | 33 | class ChatGenerator: 34 | """Generate a chat. 35 | 36 | Args: 37 | size (int): Number of messages to generate. 38 | users (list, optional): List with names of the users. Defaults to module variable USERS. 39 | seed (int, optional): Seed for random processes. Defaults to 100. 40 | 41 | Examples: 42 | This simple example loads a chat using :func:`WhatsAppChat `. Once 43 | loaded, we can access its attribute ``df``, which contains the loaded chat as a DataFrame. 44 | 45 | .. code-block:: python 46 | 47 | >>> from whatstk.whatsapp.generation import ChatGenerator 48 | >>> from datetime import datetime 49 | >>> from whatstk.data import whatsapp_urls 50 | >>> chat = ChatGenerator(size=10).generate(last_timestamp=datetime(2020, 1, 1, 0, 0)) 51 | >>> chat.df.head(5) 52 | date username message 53 | 0 2019-12-31 09:43:04.000525 Giuseppe Nisi ad esse cillum. 54 | 1 2019-12-31 10:19:21.980039 Giuseppe Tempor dolore sint in eu lorem veniam veniam. 55 | 2 2019-12-31 13:56:45.575426 Giuseppe Do quis fugiat sint ut ut, do anim eu est qui ... 56 | 3 2019-12-31 15:47:29.995420 Giuseppe Do qui qui elit ea in sed culpa, aliqua magna ... 57 | 4 2019-12-31 16:23:00.348542 Mary Sunt excepteur mollit voluptate dolor sint occ... 58 | 59 | """ 60 | 61 | def __init__(self, size: int, users: Optional[List[str]] = None, seed: int = 100) -> None: 62 | """Instantiate ChatGenerator class. 63 | 64 | Args: 65 | size (int): Number of messages to generate. 66 | users (list, optional): List with names of the users. Defaults to module variable USERS. 67 | seed (int, optional): Seed for random processes. Defaults to 100. 68 | 69 | """ 70 | self.size = size 71 | self.users = USERS if not users else users 72 | self.seed = seed 73 | np.random.seed(seed=self.seed) 74 | 75 | def _generate_messages(self) -> List[str]: 76 | """Generate list of messages. 77 | 78 | To generate sentences, Lorem Ipsum is used. 79 | 80 | Returns: 81 | list: List with messages (as strings). 82 | 83 | """ 84 | emojis = self._generate_emojis() 85 | s = sentence(count=self.size, comma=(0, 2), word_range=(4, 8)) 86 | sentences = list(itertools.islice(s, self.size)) 87 | messages = [sentences[i] + " " + emojis[i] for i in range(self.size)] 88 | return messages 89 | 90 | def _generate_emojis(self, k: int = 1) -> str: 91 | """Generate random list of emojis. 92 | 93 | Emojis are sampled from a list of `n` emojis and `k*n` empty strings. 94 | 95 | Args: 96 | k (int, optional): Defaults to 20. 97 | 98 | Returns: 99 | list: List with emojis 100 | 101 | """ 102 | emojis = list(EMOJI_DATA.keys()) 103 | n = len(emojis) 104 | emojis = emojis + [""] * k * n 105 | return np.random.choice(emojis, self.size) 106 | 107 | def _generate_timestamps(self, last: Optional[datetime] = None) -> List[datetime]: 108 | """Generate list of timestamps. 109 | 110 | Args: 111 | last (datetime, optional): Datetime of last message. If ``None``, defaults to current date. 112 | 113 | Returns: 114 | list: List with timestamps. 115 | 116 | """ 117 | if not last: 118 | last = datetime.now() 119 | last = last.replace(microsecond=0) 120 | c = 1.0065 121 | scale = 40.06 122 | loc = 30 123 | ts_ = [0] + lomax.rvs(c=c, loc=loc, scale=scale, size=self.size - 1, random_state=self.seed).cumsum().tolist() 124 | ts = [last - timedelta(seconds=t * 60) for t in ts_] 125 | return ts[::-1] 126 | 127 | def _generate_users(self) -> str: 128 | """Generate list of users. 129 | 130 | Returns: 131 | list: List of name of the users sending the messages. 132 | 133 | """ 134 | return np.random.choice(self.users, self.size) 135 | 136 | def _generate_df(self, last_timestamp: Optional[datetime] = None) -> pd.DataFrame: 137 | """Generate random chat as DataFrame. 138 | 139 | Args: 140 | last_timestamp (datetime, optional): Datetime of last message. If ``None``, defaults to current date. 141 | 142 | Returns: 143 | pandas.DataFrame: DataFrame with random messages. 144 | 145 | """ 146 | messages = self._generate_messages() 147 | timestamps = self._generate_timestamps(last=last_timestamp) 148 | users = self._generate_users() 149 | df = pd.DataFrame.from_dict( 150 | {COLNAMES_DF.DATE: timestamps, COLNAMES_DF.USERNAME: users, COLNAMES_DF.MESSAGE: messages} 151 | ) 152 | return df 153 | 154 | def generate( 155 | self, filepath: Optional[str] = None, hformat: Optional[str] = None, last_timestamp: Optional[datetime] = None 156 | ) -> str: 157 | """Generate random chat as :func:`WhatsAppChat `. 158 | 159 | Args: 160 | filepath (str): If given, generated chat is saved with name ``filepath`` (must be a local path). 161 | hformat (str, optional): :ref:`Format of the header `, e.g. 162 | ``'[%y-%m-%d %H:%M:%S] - %name:'``. 163 | last_timestamp (datetime, optional): Datetime of last message. If `None`, defaults to current date. 164 | 165 | Returns: 166 | WhatsAppChat: Chat with random messages. 167 | 168 | .. seealso:: 169 | 170 | * :func:`WhatsAppChat.to_txt ` 171 | 172 | """ 173 | df = self._generate_df(last_timestamp=last_timestamp) 174 | chat = WhatsAppChat(df) 175 | if filepath: 176 | chat.to_txt(filepath=filepath, hformat=hformat) 177 | return chat 178 | 179 | 180 | def generate_chats_hformats( 181 | output_path: str, 182 | size: int = 2000, 183 | hformats: Optional[str] = None, 184 | filepaths: Optional[str] = None, 185 | last_timestamp: Optional[datetime] = None, 186 | seed: int = 100, 187 | verbose: bool = False, 188 | export_as_zip: bool = False, 189 | ) -> None: 190 | r"""Generate a chat and export using given header format. 191 | 192 | If no hformat specified, chat is generated & exported using all supported header formats. 193 | 194 | Args: 195 | output_path (str): Path to directory to export all generated chats as txt. 196 | size (int, optional): Number of messages of the chat. Defaults to 2000. 197 | hformats (list, optional): List of header formats to use when exporting chat. If None, 198 | defaults to all supported header formats. 199 | filepaths (list, optional): List with filepaths (only txt files). If None, defaults to 200 | `whatstk.utils.utils._map_hformat_filename(filepath)`. 201 | last_timestamp (datetime, optional): Datetime of last message. If `None`, defaults to current date. 202 | seed (int, optional): Seed for random processes. Defaults to 100. 203 | verbose (bool): Set to True to print runtime messages. 204 | export_as_zip (bool): Set to True to export the chat(s) zipped, additionally. 205 | 206 | .. seealso:: 207 | 208 | * :func:`ChatGenerator ` 209 | * :func:`ChatGenerator.generate ` 210 | 211 | """ 212 | if not hformats: 213 | hformats = get_supported_hformats_as_list() 214 | 215 | # Sanity check 216 | if filepaths: 217 | if len(filepaths) != len(hformats): 218 | raise ValueError("Length of filepaths must be equal to length of hformats.") 219 | 220 | # Generate chat 221 | chat = ChatGenerator(size=size, seed=seed).generate(last_timestamp=last_timestamp) 222 | for i in range(len(hformats)): 223 | hformat = hformats[i] 224 | print("Exporting format: {}".format(hformat)) if verbose else 0 225 | if filepaths: 226 | filepath = filepaths[i] 227 | else: 228 | filepath = _map_hformat_filename(hformat) 229 | filepath = "{}.txt".format(filepath) 230 | filepath = os.path.join(output_path, filepath) 231 | chat.to_txt(filepath=filepath, hformat=hformat) 232 | if export_as_zip: 233 | chat.to_zip(filepath.replace(".txt", ".zip"), hformat) 234 | -------------------------------------------------------------------------------- /whatstk/whatsapp/hformat.py: -------------------------------------------------------------------------------- 1 | """Header format utils. 2 | 3 | Example: Check if header is available. 4 | 5 | .. code-block:: python 6 | 7 | >>> from whatstk.utils.hformat import is_supported 8 | >>> is_supported('%y-%m-%d, %H:%M:%S - %name:') 9 | (True, True) 10 | 11 | """ 12 | 13 | 14 | import os 15 | import json 16 | from typing import Tuple, List, Dict 17 | 18 | 19 | this_directory = os.path.abspath(os.path.dirname(__file__)) 20 | assets_folder = "assets" 21 | hformat_support_filename = "header_format_support.json" 22 | hformat_support_filepath = os.path.join(this_directory, assets_folder, hformat_support_filename) 23 | 24 | 25 | def is_supported(hformat: str, encoding: str = "utf8") -> Tuple[bool, bool]: 26 | """Check if header `hformat` is currently supported. 27 | 28 | Args: 29 | hformat (str): Header format. 30 | encoding (str, optional): Encoding to use for UTF when reading/writing (ex. ‘utf-8’). 31 | `List of Python standard encodings 32 | `_. 33 | 34 | Returns: 35 | tuple: 36 | * bool: True if header is supported. 37 | * bool: True if header is supported with `auto_header` feature. 38 | 39 | """ 40 | with open(hformat_support_filepath, "r", encoding=encoding) as f: 41 | h = json.load(f) 42 | 43 | if "%P" in hformat or "%p" in hformat: 44 | hformat = hformat.replace("%P", "%p").replace("%H", "%I") 45 | hformat = hformat.replace("%Y", "%y") 46 | auto_header_support = 0 47 | support = 0 48 | for hh in h: 49 | if hformat == hh["format"]: 50 | support = 1 51 | auto_header_support = hh["auto_header"] 52 | 53 | return bool(support), bool(auto_header_support) 54 | 55 | 56 | def is_supported_verbose(hformat: str) -> str: 57 | """Check if header `hformat` is currently supported (both manually and using `auto_header`). 58 | 59 | Result is shown as a string. 60 | 61 | Args: 62 | hformat (str): Information message. 63 | 64 | 65 | Example: 66 | Check if format ``'%y-%m-%d, %H:%M - %name:'`` is supported. 67 | 68 | .. code-block:: python 69 | 70 | >>> from whatstk.whatsapp.hformat import is_supported_verbose 71 | >>> is_supported_verbose('%y-%m-%d, %H:%M - %name:') 72 | "The header '%y-%m-%d, %H:%M - %name:' is supported. `auto_header` for this header is supported." 73 | 74 | """ 75 | support, auto_header_support = is_supported(hformat) 76 | 77 | msg = "The header '{}' is {}supported. `auto_header` for this header is {}supported.".format( 78 | hformat, 79 | "not " if not support else "", 80 | "not " if not auto_header_support else "", 81 | ) 82 | return msg 83 | 84 | 85 | def get_supported_hformats_as_list(encoding: str = "utf8") -> List[str]: 86 | """Get list of supported formats. 87 | 88 | Returns: 89 | list: List with supported formats (as str). 90 | encoding (str, optional): Encoding to use for UTF when reading/writing (ex. ‘utf-8’). 91 | `List of Python standard encodings `_. 92 | """ 93 | with open(hformat_support_filepath, "r", encoding=encoding) as f: 94 | h = json.load(f) 95 | return [hh["format"] for hh in h] 96 | 97 | 98 | def get_supported_hformats_as_dict(encoding: str = "utf8") -> Dict[str, int]: 99 | """Get dictionary with supported formats and relevant info. 100 | 101 | Args: 102 | encoding (str, optional): Encoding to use for UTF when reading/writing (ex. ‘utf-8’). 103 | `List of Python standard encodings 104 | `_. 105 | 106 | Returns: 107 | dict: Dict with two elements: 108 | * ``format``: Header format. All formats appearing are supported. 109 | * ``auto_header``: 1 if auto_header is supported), 0 otherwise. 110 | 111 | """ 112 | with open(hformat_support_filepath, "r", encoding=encoding) as f: 113 | headers = json.load(f) 114 | return headers 115 | --------------------------------------------------------------------------------