├── message_analyser
    ├── retriever
    │   ├── __init__.py
    │   ├── vkOpt.py
    │   └── telegram.py
    ├── __init__.py
    ├── misc.py
    ├── myMessage.py
    ├── storage.py
    ├── analyser.py
    ├── structure_tools.py
    ├── GUI.py
    └── plotter.py
├── .gitignore
├── examples
    ├── sample one
    │   ├── heat_map.png
    │   ├── wordcloud.png
    │   ├── barplot_emojis.png
    │   ├── lineplot_messages.png
    │   ├── barplot_messages_per_day.png
    │   ├── barplot_non_text_messages.png
    │   ├── distplot_messages_per_day.png
    │   ├── lineplot_message_length.png
    │   ├── pie_messages_per_author.png
    │   ├── distplot_messages_per_hour.png
    │   ├── distplot_messages_per_month.png
    │   ├── barplot_messages_per_minutes.png
    │   ├── barplot_messages_per_weekday.png
    │   └── stackplot_non_text_messages_percentage.png
    └── sample two
    │   ├── heat_map.png
    │   ├── barplot_emojis.png
    │   ├── lineplot_messages.png
    │   ├── barplot_messages_per_day.png
    │   ├── barplot_non_text_messages.png
    │   ├── distplot_messages_per_day.png
    │   ├── lineplot_message_length.png
    │   ├── pie_messages_per_author.png
    │   ├── distplot_messages_per_hour.png
    │   ├── distplot_messages_per_month.png
    │   ├── barplot_messages_per_minutes.png
    │   ├── barplot_messages_per_weekday.png
    │   └── stackplot_non_text_messages_percentage.png
├── requirements.txt
├── config.ini
├── config.example.ini
├── main.py
├── LICENSE
└── README.md


/message_analyser/retriever/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | __pycache__/
3 | *.session
4 | *.session-journal


--------------------------------------------------------------------------------
/examples/sample one/heat_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/heat_map.png


--------------------------------------------------------------------------------
/examples/sample one/wordcloud.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/wordcloud.png


--------------------------------------------------------------------------------
/examples/sample two/heat_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/heat_map.png


--------------------------------------------------------------------------------
/examples/sample one/barplot_emojis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/barplot_emojis.png


--------------------------------------------------------------------------------
/examples/sample two/barplot_emojis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/barplot_emojis.png


--------------------------------------------------------------------------------
/examples/sample one/lineplot_messages.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/lineplot_messages.png


--------------------------------------------------------------------------------
/examples/sample two/lineplot_messages.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/lineplot_messages.png


--------------------------------------------------------------------------------
/examples/sample one/barplot_messages_per_day.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/barplot_messages_per_day.png


--------------------------------------------------------------------------------
/examples/sample one/barplot_non_text_messages.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/barplot_non_text_messages.png


--------------------------------------------------------------------------------
/examples/sample one/distplot_messages_per_day.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/distplot_messages_per_day.png


--------------------------------------------------------------------------------
/examples/sample one/lineplot_message_length.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/lineplot_message_length.png


--------------------------------------------------------------------------------
/examples/sample one/pie_messages_per_author.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/pie_messages_per_author.png


--------------------------------------------------------------------------------
/examples/sample two/barplot_messages_per_day.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/barplot_messages_per_day.png


--------------------------------------------------------------------------------
/examples/sample two/barplot_non_text_messages.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/barplot_non_text_messages.png


--------------------------------------------------------------------------------
/examples/sample two/distplot_messages_per_day.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/distplot_messages_per_day.png


--------------------------------------------------------------------------------
/examples/sample two/lineplot_message_length.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/lineplot_message_length.png


--------------------------------------------------------------------------------
/examples/sample two/pie_messages_per_author.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/pie_messages_per_author.png


--------------------------------------------------------------------------------
/examples/sample one/distplot_messages_per_hour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/distplot_messages_per_hour.png


--------------------------------------------------------------------------------
/examples/sample one/distplot_messages_per_month.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/distplot_messages_per_month.png


--------------------------------------------------------------------------------
/examples/sample two/distplot_messages_per_hour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/distplot_messages_per_hour.png


--------------------------------------------------------------------------------
/examples/sample two/distplot_messages_per_month.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/distplot_messages_per_month.png


--------------------------------------------------------------------------------
/examples/sample one/barplot_messages_per_minutes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/barplot_messages_per_minutes.png


--------------------------------------------------------------------------------
/examples/sample one/barplot_messages_per_weekday.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/barplot_messages_per_weekday.png


--------------------------------------------------------------------------------
/examples/sample two/barplot_messages_per_minutes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/barplot_messages_per_minutes.png


--------------------------------------------------------------------------------
/examples/sample two/barplot_messages_per_weekday.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/barplot_messages_per_weekday.png


--------------------------------------------------------------------------------
/examples/sample one/stackplot_non_text_messages_percentage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample one/stackplot_non_text_messages_percentage.png


--------------------------------------------------------------------------------
/examples/sample two/stackplot_non_text_messages_percentage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlajnaya-mol/message-analyser/HEAD/examples/sample two/stackplot_non_text_messages_percentage.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | python-dateutil==2.8.0
 2 | 
 3 | telethon==1.5.5
 4 | 
 5 | numpy==1.16.1
 6 | 
 7 | pandas==0.24.1
 8 | 
 9 | matplotlib==3.0.2
10 | 
11 | seaborn==0.9.0
12 | 
13 | wordcloud==1.5.0
14 | 
15 | emoji==0.5.1


--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
 1 | [telegram_secrets]
 2 | api_id = *Your API ID*
 3 | api_hash = *Your API HASH*
 4 | phone_number = *Your phone*
 5 | session_name = message analyser
 6 | 
 7 | [session_params]
 8 | dialog_id = 
 9 | vkopt_file = 
10 | words_file = 
11 | your_name = 
12 | target_name = 
13 | 
14 | 


--------------------------------------------------------------------------------
/config.example.ini:
--------------------------------------------------------------------------------
 1 | [telegram_secrets]
 2 | api_id = 123456
 3 | api_hash = d49161cade9d408c804a5d58b6ec0aef
 4 | phone_number = +79123456789
 5 | session_name = username1
 6 | 
 7 | [session_params]
 8 | dialog_id = 123456789
 9 | vkopt_file = 
10 | words_file = 
11 | your_name = username1
12 | target_name = username2
13 | 
14 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from message_analyser.GUI import start_gui
 3 | 
 4 | 
 5 | if __name__ == "__main__":
 6 |     aio_loop = asyncio.get_event_loop()
 7 |     try:
 8 |         aio_loop.run_until_complete(start_gui(aio_loop))
 9 |     finally:
10 |         if not aio_loop.is_closed():
11 |             aio_loop.close()


--------------------------------------------------------------------------------
/message_analyser/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | logger = logging.getLogger("message_analyser")
 4 | logger.setLevel(logging.INFO)
 5 | ch = logging.StreamHandler()
 6 | ch.setLevel(logging.INFO)
 7 | formatter = logging.Formatter("%(asctime)s - %(message)s")
 8 | ch.setFormatter(formatter)
 9 | logger.addHandler(ch)
10 | 


--------------------------------------------------------------------------------
/message_analyser/misc.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import logging
 3 | 
 4 | delay = 0.05
 5 | months_border = 2 # if the conversation is shorter than this values than xticks will be weeks, not months.
 6 | 
 7 | 
 8 | def avg(l):
 9 |     if not l:
10 |         return 0
11 |     return sum(l) / len(l)
12 | 
13 | 
14 | def log_line(*args):
15 |     logging.getLogger("message_analyser").log(logging.INFO, ' '.join([str(arg) for arg in args]) + '\n')
16 | 
17 | 
18 | def time_offset(date):
19 |     return (time.timezone if (time.localtime(int(time.mktime(date.timetuple()))).tm_isdst == 0)
20 |             else time.altzone) / 60 / 60 * -1
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Oleg Borovik
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # message-analyser
 2 | Statistical analysis of VKontakte and Telegram message history.
 3 | ![front example](https://github.com/vlajnaya-mol/message-analyser/blob/master/examples/sample%20one/heat_map.png)
 4 | 
 5 | ### Dependencies
 6 | * [Telethon](https://github.com/LonamiWebs/Telethon)
 7 | * [seaborn](https://github.com/mwaskom/seaborn)
 8 | * [wordcloud](https://github.com/amueller/word_cloud)
 9 | 
10 | ### Installation
11 | * Use Python3.6. 3.7 version may not work properly.
12 | * `git clone https://github.com/vlajnaya-mol/message-analyser`
13 | * Install `requirements.txt`. (`pip install -r /path/to/requirements.txt`)
14 | 
15 | ### Usage
16 | #### Execution
17 | * Run `python main.py`
18 | * Follow GUI commands
19 | 
20 | #### Telegram messages
21 | * You need API Hash and API ID from [here](https://core.telegram.org/api/obtaining_api_id)
22 | * Be sure to use proxy if Telegram is blocked in your country.
23 | 
24 | #### VKontakte messages
25 | * Install [VkOpt extension](http://vkopt.net/)
26 | * Save Your conversation as .txt file using this extension
27 | 
28 |   Be sure You used **default** format settings:
29 |   
30 |   ```
31 |   %username% (%date%):
32 |   %message%
33 |   
34 |   HH:MM:ss  dd/mm/yyyy
35 |   ```
36 | * You can concatenate two VkOpt files and use as one
37 | * Include this file in the analysis process
38 | 
39 | #### Words
40 | * Write words You are interested in to a file
41 | * Be sure words are saved correctly. Cyrillic words are ruined by saving in ASCII format. 
42 | * Include this file in the analysis process
43 | 
44 | #### Manual analysis
45 | * Fill `config.ini` file and use `retrieve_and_analyse(loop)` instead of using GUI.
46 | * Use `analyse_from_file(path)` function instead of redownloading messages
47 | 
48 | ### Examples
49 | * All examples can be found [here](examples/)
50 | ![other example](https://github.com/vlajnaya-mol/message-analyser/blob/master/examples/sample%20one/barplot_messages_per_day.png)	 
51 | ![other example](https://github.com/vlajnaya-mol/message-analyser/blob/master/examples/sample%20one/barplot_messages_per_minutes.png)	 
52 | ![other example](https://github.com/vlajnaya-mol/message-analyser/blob/master/examples/sample%20one/barplot_messages_per_weekday.png)	 
53 | ![other example](https://github.com/vlajnaya-mol/message-analyser/blob/master/examples/sample%20one/barplot_non_text_messages.png) 
54 | ![other example](https://github.com/vlajnaya-mol/message-analyser/blob/master/examples/sample%20one/distplot_messages_per_day.png)
55 | ![other example](https://github.com/vlajnaya-mol/message-analyser/blob/master/examples/sample%20one/lineplot_message_length.png)	 
56 | ![other example](https://github.com/vlajnaya-mol/message-analyser/blob/master/examples/sample%20one/lineplot_messages.png)	 
57 | ![other example](https://github.com/vlajnaya-mol/message-analyser/blob/master/examples/sample%20one/pie_messages_per_author.png)	 
58 | ![other example](https://github.com/vlajnaya-mol/message-analyser/blob/master/examples/sample%20one/stackplot_non_text_messages_percentage.png)	 
59 | ![other example](https://github.com/vlajnaya-mol/message-analyser/blob/master/examples/sample%20one/barplot_emojis.png)	 
60 | ![other example](https://github.com/vlajnaya-mol/message-analyser/blob/master/examples/sample%20one/wordcloud.png)
61 | 
62 | ### Potential project improvements
63 | - analysis of group chats.
64 | - improve tkinter theme.
65 | - add VkOpt stickers as emojis to messages.
66 | - add plot correlation between the number of voice messages and the average message length.
67 | - add "first-to-write" and "response time (delay)" plots (lineplot).
68 | - add n-grams plot (lineplot).
69 | 


--------------------------------------------------------------------------------
/message_analyser/myMessage.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from datetime import datetime
 3 | 
 4 | 
 5 | def islink(string):
 6 |     # https://stackoverflow.com/a/7160778
 7 |     regex = re.compile(
 8 |         r'^(?:http|ftp)s?://'  # http:// or https://
 9 |         r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|'  # domain...
10 |         r'localhost|'  # localhost...
11 |         r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # ...or ip
12 |         r'(?::\d+)?'  # optional port
13 |         r'(?:/?|[/?]\S+)$', re.IGNORECASE)
14 |     return re.match(regex, string) is not None
15 | 
16 | 
17 | class MyMessage(dict):
18 |     """Represents a message entity from some messenger.
19 | 
20 |     Attributes:
21 |         See __init__ args.
22 |     """
23 | 
24 |     def __init__(self, text, date, author,
25 |                  is_forwarded=False,
26 |                  document_id=None,
27 |                  has_photo=False,
28 |                  has_voice=False,
29 |                  has_audio=False,
30 |                  has_video=False,
31 |                  has_sticker=False,
32 |                  is_link=None):
33 |         """Inits MyMessage class with all it's attributes values.
34 | 
35 |         Notes:
36 |             How to check a message for being a file:
37 |                 self.has_document = document_id is not None
38 |                 self.is_file = text == "" and (self.has_photo or self.has_document)
39 |                 # Because sometimes a photo is not considered as a document.
40 | 
41 |         Args:
42 |             text (str): A raw content of the message.
43 |             date (string ("%Y-%m-%d %H:%M:%S") date or datetime obj): A time when this message was sent.
44 |             author (str): Author's name.
45 |             is_forwarded (bool): True if the message is forwarded from another person.
46 |             document_id (int): Integer id of the document (-1 for vkOpt messages, None for no document).
47 |             has_photo (bool): True if the message has photo.
48 |             has_voice (bool): True if the message has voice-message file attached.
49 |             has_audio (bool): True if the message has audio file attached (NOT voice!).
50 |             has_video (bool): True if the message has video-message file attached (not just a VIDEO!).
51 |             has_sticker (bool): True if the message has sticker.
52 |             is_link (bool): True if the whole text of the message is a link.
53 |         """
54 |         super().__init__()
55 |         attributes = {"text": text,
56 |                       "date": date,
57 |                       "author": author,
58 |                       "is_forwarded": is_forwarded,
59 |                       "document_id": document_id,
60 |                       "has_photo": has_photo,
61 |                       "has_voice": has_voice,
62 |                       "has_audio": has_audio,
63 |                       "has_video": has_video,
64 |                       "has_sticker": has_sticker,
65 |                       "is_link": is_link
66 |                       }
67 |         if not isinstance(date, datetime):
68 |             attributes["date"] = datetime.strptime(str(date), "%Y-%m-%d %H:%M:%S")
69 |         if is_link is None:
70 |             attributes["is_link"] = islink(text)
71 |         self.update(attributes)
72 | 
73 |     def __str__(self):
74 |         return (f"Author = {self.author}\n"
75 |                 f"Content = [{self.text[:100] + '[...]' if len(self.text) > 100 else self.text}]\n"
76 |                 f"Date = {self.date}\n"
77 |                 f"Contains document = {self.document_id is not None}\n"
78 |                 f"Has photo = {self.has_photo}\n"
79 |                 f"Is link = {self.is_link}\n"
80 |                 f"Is forwarded = {self.is_forwarded}\n")
81 | 
82 |     def __repr__(self):
83 |         return str(self)
84 | 
85 |     def __getattr__(self, attr):
86 |         return self[attr]
87 | 
88 |     def __setattr__(self, key, value):
89 |         if key in self:
90 |             raise Exception("Can't mutate an Immutable: self.%s = %r" % (key, value))
91 |         self[key] = value
92 | 
93 |     @staticmethod
94 |     def from_dict(d):
95 |         return MyMessage(**d)
96 | 


--------------------------------------------------------------------------------
/message_analyser/storage.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import json
 4 | import configparser
 5 | from message_analyser.myMessage import MyMessage
 6 | from message_analyser.misc import log_line
 7 | 
 8 | 
 9 | def _get_config_file_name():
10 |     return os.path.join(os.path.split(os.path.normpath(os.path.dirname(__file__)))[0], "config.ini")
11 | 
12 | 
13 | def store_session_params(params):
14 |     config_file_name = _get_config_file_name()
15 |     config_parser = configparser.ConfigParser()
16 |     config_parser.read(config_file_name, encoding="utf-8-sig")
17 |     assert params["from_vk"] or params["from_telegram"]
18 |     config_parser.set("session_params", "dialog_id",
19 |                       re.compile("\(id=[0-9]+\)$").search(params["dialogue"]).group()[4:-1] if params["from_telegram"]
20 |                       else "")
21 |     config_parser.set("session_params", "vkopt_file", params["vkopt_file"] if params["from_vk"] else "")
22 |     config_parser.set("session_params", "words_file", params["words_file"] if params["plot_words"] else "")
23 | 
24 |     assert params["your_name"] and params["target_name"]
25 |     config_parser.set("session_params", "your_name", params["your_name"])
26 |     config_parser.set("session_params", "target_name", params["target_name"])
27 |     with open(config_file_name, "w+", encoding="utf-8") as config_file:
28 |         config_parser.write(config_file)
29 |     log_line(f"Session parameters were stored in {config_file_name} file.")
30 | 
31 | 
32 | def get_session_params():
33 |     config_file_name = _get_config_file_name()
34 |     config_parser = configparser.ConfigParser()
35 |     config_parser.read(config_file_name, encoding="utf-8-sig")
36 |     dialog_id = config_parser.get("session_params", "dialog_id", fallback="")
37 |     dialog_id = int(dialog_id) if dialog_id else -1
38 |     vkopt_file = config_parser.get("session_params", "vkopt_file", fallback="")
39 |     words_file = config_parser.get("session_params", "words_file", fallback="")
40 |     your_name = config_parser.get("session_params", "your_name", fallback="")
41 |     target_name = config_parser.get("session_params", "target_name", fallback="")
42 |     log_line(f"Session parameters were received from {config_file_name} file.")
43 |     return dialog_id, vkopt_file, words_file, your_name, target_name
44 | 
45 | 
46 | def store_telegram_secrets(api_id, api_hash, phone_number, session_name="Message retriever"):
47 |     config_file_name = _get_config_file_name()
48 |     config_parser = configparser.ConfigParser()
49 |     config_parser.read(config_file_name, encoding="utf-8-sig")
50 |     config_parser.set("telegram_secrets", "api_id", api_id)
51 |     config_parser.set("telegram_secrets", "api_hash", api_hash)
52 |     config_parser.set("telegram_secrets", "session_name", session_name)
53 |     config_parser.set("telegram_secrets", "phone_number", phone_number)
54 |     with open(config_file_name, "w+", encoding="utf-8") as config_file:
55 |         config_parser.write(config_file)
56 |     log_line(f"Telegram secrets were stored in {config_file_name} file.")
57 | 
58 | 
59 | def get_telegram_secrets():
60 |     config_file_name = _get_config_file_name()
61 |     config_parser = configparser.ConfigParser()
62 |     config_parser.read(config_file_name, encoding="utf-8-sig")
63 |     api_id = config_parser.get("telegram_secrets", "api_id", fallback="")
64 |     api_hash = config_parser.get("telegram_secrets", "api_hash", fallback="")
65 |     phone_number = config_parser.get("telegram_secrets", "phone_number", fallback="")
66 |     session_name = config_parser.get("telegram_secrets", "session_name", fallback="")
67 |     log_line(f"Telegram secrets were received from {config_file_name} file.")
68 |     return api_id, api_hash, phone_number, session_name
69 | 
70 | 
71 | def store_msgs(file_path, msgs):
72 |     with open(file_path, 'w') as fp:
73 |         json.dump(msgs, fp, default=str)
74 |     log_line(f"{len(msgs)} messages were stored in {file_path} file.")
75 | 
76 | 
77 | def store_top_words_count(words, your_words_cnt, target_words_cnt, file_path):
78 |     with open(file_path, 'w', encoding="utf-8") as fp:
79 |         fp.write("Word, You sent, Target sent, Total\n")
80 |         for word in words:
81 |             fp.write(f"{word}, {your_words_cnt[word]}, {target_words_cnt[word]}, "
82 |                      f"{your_words_cnt[word]+target_words_cnt[word]}\n")
83 | 
84 | def get_msgs(file_path):
85 |     with open(file_path, 'r') as f:
86 |         msgs = [MyMessage.from_dict(msg) for msg in json.loads(f.read())]
87 |     log_line(f"{len(msgs)} messages were received from {file_path} file.")
88 |     return msgs
89 | 
90 | 
91 | def get_words(file_path):
92 |     with open(file_path, 'r', encoding="utf-8-sig") as f:
93 |         words = [word.strip() for word in f.readlines()
94 |                  if all([ch.isalpha() or ch == '\'' or ch == '`' for ch in word.strip()])]
95 |     log_line(f"{len(words)} words were received from {file_path} file.")
96 |     return words
97 | 


--------------------------------------------------------------------------------
/message_analyser/retriever/vkOpt.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from datetime import datetime
  3 | from message_analyser.myMessage import MyMessage
  4 | from message_analyser.misc import log_line
  5 | 
  6 | 
  7 | def get_mymessages_from_file(your_name, target_name, opt_file_name):
  8 |     """Retrieves a list of MyMessage representations of messages from a file generated by VkOpt GChrome extension.
  9 | 
 10 |     Notes:
 11 |         You must firstly ensure that your_name and target_name are equal to the names in opt_file_name text file.
 12 | 
 13 |     Args:
 14 |         your_name (str): Your name.
 15 |         target_name (str): Target's name.
 16 |         opt_file_name (str): The name of the file to read.
 17 | 
 18 |     Returns:
 19 |         A list of MyMessage objects.
 20 |     """
 21 |     log_line("Start reading vkOpt messages")
 22 |     with open(opt_file_name, 'r', encoding="utf8") as f:
 23 |         lines = f.readlines()
 24 |     opt_message_list = _parse_lines(lines, your_name, target_name)
 25 |     msgs = [_opt_to_mymessage(msg) for msg in opt_message_list]
 26 |     log_line(len(opt_message_list), " vkOpt messages were received.")
 27 |     return msgs
 28 | 
 29 | 
 30 | def _opt_to_mymessage(msg):
 31 |     """Transforms dictionary representation of the VkOpt message to the MeMessage obj.
 32 | 
 33 |     Notes:
 34 |         Document id of a VkOpt message isn't parsed and may only be -1.
 35 |         Photos aren't documents (for some reason).
 36 |         Message is forwarded if only it has attached forwarded messages and doesn't contain any text.
 37 | 
 38 |     Args:
 39 |         msg (dict): Representation of a VkOpt message.
 40 | 
 41 |     Returns:
 42 |         MyMessage representation of vkOpt message
 43 |     """
 44 |     return MyMessage(text=msg["text"], date=msg["date"],
 45 |                      author=msg["author"],
 46 |                      has_sticker=msg["attachment"].startswith("{\"type\":\"sticker\""),
 47 |                      is_forwarded=msg["has_forwards"] and not msg["text"],
 48 |                      document_id=-1 if msg["attachment"] and not msg["attachment"].startswith(
 49 |                          "vk.com/photo") else None,
 50 |                      has_photo=msg["attachment"].startswith("vk.com/photo"),
 51 |                      has_voice=("audio_msg.opus" in msg["attachment"]) or
 52 |                                ("voice_message.webm" in msg["attachment"]) or
 53 |                                ("audiocomment.3gp" in msg["attachment"]),
 54 |                      has_audio=msg["attachment"].startswith("vk.com/audio"),
 55 |                      has_video=False,  # msg["attachment"].startswith("vk.com/video"),
 56 |                      # vk.com doesn't have video messages in the way Telegram does.
 57 |                      is_link=True if (msg["attachment"].startswith("{\"type\":\"wall\"") or
 58 |                                       msg["attachment"].startswith("{\"type\":\"link\"")) else None)
 59 | 
 60 | 
 61 | def _parse_lines(lines, your_name, target_name, num=1000000):
 62 |     """Parses given text lines and retrieves a message list.
 63 | 
 64 |     Notes:
 65 |         Parses messages from vkOpt GChrome extension with a DEFAULT message format.
 66 |         Appropriate message format is "%username% (%date%):
 67 |                                        %message%"
 68 |         Appropriate datetime format is "HH:MM:ss  dd/mm/yyyy".
 69 |         More than one nested forwarded messages are counted as ONE forwarded message.
 70 |         ... As well as a message with multiple photos counts as ONE photo.
 71 |         ... As well as a message with multiple audio files ... what the heck?
 72 | 
 73 |     Args:
 74 |         your_name (str): Your name.
 75 |         target_name (str): Target's name.
 76 |         lines (list of strings): Text lines of the file.
 77 |         num (int): Max number of the messages to retrieve.
 78 | 
 79 |     Returns:
 80 |         List of dictionaries such as:
 81 |             {
 82 |                 "text": text of the message (str),
 83 |                 "has_forwards": flag (bool),
 84 |                 "attachment": text (str) of the attachment (without first line)
 85 |             }
 86 |     """
 87 |     lines[0] = lines[0].replace('\ufeff', '')  # remove start character
 88 |     # assert lines[0].startswith(target_name) or lines[0].strip().startswith(your_name)
 89 |     date_pattern = "[0-2][0-9]:[0-5][0-9]:[0-5][0-9]  [0-3][0-9]/[0-1][0-9]/([0-9]{4})"
 90 |     date_regex = re.compile(date_pattern)
 91 |     title_ending_regex = re.compile(" \(" + date_pattern + "\):\n$")
 92 |     msg_title_regex = re.compile("^\t*(" + your_name + '|' + target_name + ") \(" + date_pattern + "\):\n$")
 93 |     msgs = []
 94 |     current_msg = {"text": "", "has_forwards": False, "attachment": ""}
 95 |     i = 0
 96 |     while i < len(lines) and len(msgs) <= num:
 97 |         line = lines[i]
 98 |         if line.startswith("Attachments:["):
 99 |             i += 1
100 |             current_msg["attachment"] = lines[i]
101 |         else:
102 |             search = title_ending_regex.search(line)
103 |             if search is not None and search.span()[1] == len(line):
104 |                 if line[0].isspace():
105 |                     current_msg["has_forwards"] = True
106 |                     i += 1
107 |                 else:
108 |                     if not msg_title_regex.match(line):
109 |                         log_line(f"[{line}] DOES NOT MATCH ANY SUGGESTED NAME! NO VK OPT MESSAGES WILL BE RECEIVED!")
110 |                         return []
111 |                     # removing redundant spaces after the message
112 |                     current_msg["text"] = current_msg["text"][:-3]
113 |                     msgs.append(current_msg)
114 |                     current_msg = {"text": "", "has_forwards": False, "attachment": ""}
115 |                     current_msg["date"] = datetime.strptime(date_regex.search(line).group(), "%H:%M:%S  %d/%m/%Y")
116 |                     current_msg["author"] = your_name if line.startswith(your_name) else target_name
117 |             elif not current_msg["has_forwards"]:
118 |                 if current_msg["attachment"]:
119 |                     current_msg["attachment"] += line
120 |                 else:
121 |                     current_msg["text"] += line
122 |         i += 1
123 |     if i > 0:
124 |         current_msg["text"] = current_msg["text"] if current_msg["attachment"] else current_msg["text"][:-3]
125 |         msgs.append(current_msg)
126 |     # first message is just a template and should be removed
127 |     return msgs[1:]
128 | 


--------------------------------------------------------------------------------
/message_analyser/retriever/telegram.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import message_analyser.storage as storage
  3 | from dateutil.relativedelta import relativedelta
  4 | from telethon import TelegramClient  # , sync
  5 | from telethon.tl.types import Message
  6 | from telethon.errors.rpcerrorlist import ApiIdInvalidError, PhoneNumberInvalidError, PhoneCodeInvalidError, \
  7 |     SessionPasswordNeededError, PasswordHashInvalidError, FloodWaitError
  8 | from message_analyser.myMessage import MyMessage
  9 | from message_analyser.misc import log_line, time_offset
 10 | 
 11 | 
 12 | async def get_str_dialogs(client=None, loop=None):
 13 |     """Retrieves a list with all user-dialogs of a current client.
 14 | 
 15 |     Args:
 16 |         client (TelegramClient object, optional): A client.
 17 |         loop (asyncio.windows_events._WindowsSelectorEventLoop, optional): An event loop.
 18 | 
 19 |     Returns:
 20 |         A list of strings. An example:
 21 | 
 22 |         ["Alex (id=00001)", "Kate (id=99990)"]
 23 | 
 24 |         Where Alex and Kate are names, 00001 and 99990 are IDs of their dialogs.
 25 |     """
 26 |     return [f"{dialog.name} (id={dialog.id})" for dialog in await _get_dialogs(client, loop)]
 27 | 
 28 | 
 29 | async def get_sign_in_results(api_id, api_hash, code, phone_number, password, session_name, loop=None):
 30 |     """Tries to sign-in in Telegram with given parameters.
 31 | 
 32 |     Notes: Automatically creates .session file for further sign-ins.
 33 | 
 34 |     Args:
 35 |         api_id (str/int): Telegram API id.
 36 |         api_hash (str): Telegram API hash.
 37 |         code (str/int): A confirmation code.
 38 |         phone_number (str): A phone number connected to such id/hash pair.
 39 |         password (str): 2FA password (if needed).
 40 |         session_name (str): A name of the current session.
 41 |         loop (asyncio.windows_events._WindowsSelectorEventLoop, optional): An event loop.
 42 | 
 43 |     Returns:
 44 |         A string describing the results of sign-in.
 45 |     """
 46 |     try:
 47 |         client = TelegramClient(session_name, api_id, api_hash, loop=loop)
 48 |         await client.connect()
 49 |     except (ApiIdInvalidError, ValueError):
 50 |         log_line("Unsuccessful sign-in! Wrong API.")
 51 |         return "wrong api"
 52 |     except OSError:
 53 |         log_line("No Internet connection.")
 54 |         return "no internet"
 55 |     try:
 56 |         if not await client.is_user_authorized():
 57 |             await client.send_code_request(phone_number)
 58 |             try:
 59 |                 await client.sign_in(phone_number, code)
 60 |             except SessionPasswordNeededError:
 61 |                 await client.sign_in(phone_number, password=password)
 62 |         if not await client.is_user_authorized():
 63 |             raise PhoneCodeInvalidError(request=None)
 64 |     except ApiIdInvalidError:
 65 |         log_line("Unsuccessful sign-in! Wrong API.")
 66 |         return "wrong api"
 67 |     except PhoneCodeInvalidError:
 68 |         log_line("Unsuccessful sign-in! Need code.")
 69 |         return "need code"
 70 |     except PasswordHashInvalidError:
 71 |         log_line("Unsuccessful sign-in! Need password.")
 72 |         return "need password"
 73 |     except (PhoneNumberInvalidError, TypeError):
 74 |         log_line("Unsuccessful sign-in! Need phone.")
 75 |         return "need phone"
 76 |     except FloodWaitError as err:
 77 |         log_line(f'Unsuccessful sign-in! {err.message}')
 78 |         return f'need wait for {err.seconds}'
 79 |     finally:
 80 |         if client.is_connected():
 81 |             await client.disconnect()
 82 |     log_line("Successful sign-in.")
 83 |     return "success"
 84 | 
 85 | 
 86 | async def get_telegram_messages(your_name, target_name, loop=None, target_id=None, num=1000000):
 87 |     """Retrieves a list of messages from Telegram dialogue.
 88 | 
 89 |     Notes:
 90 |         Requires a ready-to-use Telegram secrets (id, hash etc).
 91 |         Asks for target's id in a case this parameter is None.
 92 |         Retrieves a photo album as distinct messages with photos.
 93 | 
 94 |     Args:
 95 |         your_name (str): Your name.
 96 |         target_name (str): Target's name.
 97 |         loop (asyncio.windows_events._WindowsSelectorEventLoop, optional): An event loop.
 98 |         target_id (int,optional):  Target's dialogue id.
 99 |         num (int,optional): No more than num NEWEST messages will be retrieved.
100 | 
101 |     Returns:
102 |         A list of MyMessage objects (from older messages to newer).
103 |     """
104 |     async with (await _get_client(loop=loop)) as client:
105 |         if target_id is None:
106 |             target_id = await _get_target_dialog_id(client)
107 |         target_entity = await client.get_entity(target_id)
108 |         log_line("Receiving Telegram messages...")
109 |         telethon_messages = await _retrieve_messages(client, target_entity, num)
110 |         messages = [_telethon_msg_to_mymessage(msg, target_id, your_name, target_name) for msg in telethon_messages]
111 |         log_line(f"{len(messages)} Telegram messages were received")
112 |         return messages
113 | 
114 | 
115 | async def _retrieve_messages(client, target_entity, num):
116 |     """Retrieves messages from client's target_entity batch by batch and return them all."""
117 |     batch_size = min(3000, num)
118 |     msgs = []
119 |     batch = await client.get_messages(target_entity, limit=batch_size)
120 |     while len(batch) and len(msgs) < num:
121 |         offset_id = batch[-1].id
122 |         msgs.extend([msg for msg in batch if isinstance(msg, Message)])
123 |         try:
124 |             batch = await asyncio.wait_for(client.get_messages(target_entity, limit=min(batch_size, num - len(msgs)), offset_id=offset_id), 10*60)
125 |         except ConnectionError:
126 |             log_line("Internet connection was lost.")
127 |             raise
128 |         except asyncio.TimeoutError:
129 |             log_line("Telegram timeout error.")
130 |             break
131 |         if not len(batch):
132 |             log_line(f"{len(msgs[:num])} (100%) messages received.")
133 |         else:
134 |             log_line(f"{len(msgs[:num])} ({len(msgs[:num])/batch.total*100:.2f}%) messages received.")
135 |     return msgs[:num][::-1]
136 | 
137 | 
138 | async def _get_dialogs(client=None, loop=None):
139 |     if client is None:
140 |         async with (await _get_client(loop)) as client:
141 |             return [dialog for dialog in list(await client.get_dialogs()) if dialog.is_user]
142 |     return [dialog for dialog in list(await client.get_dialogs()) if dialog.is_user]
143 | 
144 | 
145 | async def _get_client(loop=None):
146 |     """Creates a Telegram client based on current Telegram secrets.
147 | 
148 |     Returns:
149 |         TelegramClient object.
150 |     """
151 |     api_id, api_hash, phone_number, session_name = storage.get_telegram_secrets()
152 |     if loop:
153 |         client = TelegramClient(session_name, api_id, api_hash, loop=loop)
154 |     else:
155 |         client = TelegramClient(session_name, api_id, api_hash)
156 |     await client.connect()
157 | 
158 |     if not await client.is_user_authorized():
159 |         await client.send_code_request(phone_number)
160 |         await client.sign_in(phone_number, input("Please enter the code you received: "))
161 |     return client
162 | 
163 | 
164 | async def _get_target_dialog_id(client):
165 |     """Interacts with user to get an id of the target's dialogue.
166 | 
167 |     Returns:
168 |         Integer value of target's dialogue id.
169 |     """
170 |     print("Here is a list of all your dialogues. Please find an id of a dialogue you want to analyse messages from.")
171 |     for dialog in await get_str_dialogs(client):
172 |         print(dialog)
173 |     target_id = int(input("Input target dialog ID :"))
174 |     return target_id
175 | 
176 | 
177 | def _telethon_msg_to_mymessage(msg, target_id, your_name, target_name):
178 |     """Transforms telethon.tl.types.Message obj to MyMessage obj.
179 | 
180 |     Notes:
181 |         An emoji representation of a sticker adds up to the message's text.
182 | 
183 |     Args:
184 |         msg (telethon.tl.types.Message): A message.
185 |         target_id (int): Target's dialogue id.
186 |         your_name (str): Your name.
187 |         target_name (str): Target's name.
188 | 
189 |     Returns:
190 |         MyMessage obj.
191 |     """
192 |     return MyMessage(msg.message + (msg.sticker.attributes[1].alt if msg.sticker is not None else ''),
193 |                      msg.date.replace(tzinfo=None) + relativedelta(hours=time_offset(msg.date)),
194 |                      target_name if msg.from_id == target_id else your_name,
195 |                      is_forwarded=msg.forward is not None,
196 |                      document_id=msg.document.id if msg.document is not None else None,
197 |                      has_sticker=msg.sticker is not None,
198 |                      has_video=msg.video is not None,
199 |                      has_voice=(msg.voice is not None and
200 |                                 msg.document.mime_type == "audio/ogg"),
201 |                      has_audio=(msg.audio is not None and
202 |                                 msg.document.mime_type != "audio/ogg"),  # let audio != voice
203 |                      has_photo=msg.photo is not None)
204 | 


--------------------------------------------------------------------------------
/message_analyser/analyser.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import asyncio
  3 | import datetime
  4 | import message_analyser.plotter as plt
  5 | import message_analyser.storage as storage
  6 | import message_analyser.retriever.vkOpt as vkOpt
  7 | import message_analyser.structure_tools as stools
  8 | import message_analyser.retriever.telegram as tlg
  9 | from message_analyser.misc import log_line, delay
 10 | 
 11 | 
 12 | async def save_scalar_info(msgs, your_name, target_name, dir_path):
 13 |     """Saves scalar information about messages into a file. Additionally prints all the info to console.
 14 | 
 15 |     Args:
 16 |         msgs (list of MyMessage objects): Messages.
 17 |         your_name (str): Your name.
 18 |         target_name (str): Target's name.
 19 |         dir_path (str): A path to the file to store info in.
 20 |     """
 21 |     with open(dir_path + "/scalar_info.csv", 'w', encoding="utf-8") as fp:
 22 |         day_messages = stools.get_messages_per_day(msgs)
 23 | 
 24 |         print_func = log_line
 25 | 
 26 |         fp.write(f"Start date:,{msgs[0].date}\n")
 27 |         print_func(f"{'Start date:'.ljust(25)}{msgs[0].date}")
 28 | 
 29 |         fp.write(f"Duration:,{str(msgs[-1].date - msgs[0].date).replace(',',' ')}\n")
 30 |         print_func(f"{'Duration:'.ljust(25)}{msgs[-1].date - msgs[0].date}")
 31 | 
 32 |         empty_days_num = len([day for day in day_messages if not day_messages[day]])
 33 |         fp.write(f"Days without messages:,{empty_days_num},\n")
 34 |         print_func(f"{'Days without messages:'.ljust(25)}{empty_days_num}")
 35 | 
 36 |         most_active = max(day_messages, key=lambda day: len(day_messages[day]))
 37 |         fp.write(f"Most active day:,{most_active} : {len(day_messages[most_active])} messages\n")
 38 |         print_func(f"{'Most active day:'.ljust(25)}{most_active} : {len(day_messages[most_active])} messages")
 39 | 
 40 |         average = len(msgs) / len(day_messages)
 41 |         fp.write(f"Average messages per day:,{average:.2f} messages\n")
 42 |         print_func(f"{'Average messages per day:'.ljust(25)}{average:.2f} messages")
 43 | 
 44 |         max_delta, start_pause, end_pause = stools.get_longest_pause(msgs)
 45 |         fp.write(f"Longest pause:,{str(max_delta).replace(',',' ')} From {start_pause} to {end_pause}\n")
 46 |         print_func(f"{'Longest pause:'.ljust(25)}{max_delta} From {start_pause} to {end_pause}")
 47 | 
 48 |         fp.write(f"\nINFO,TOTAL,{your_name},{target_name}\n")
 49 |         print_func(f"{'INFO'.ljust(20)}{'TOTAL'.ljust(15)}{your_name:<15s}{target_name:<15s}")
 50 | 
 51 |         total_num = len(msgs)
 52 |         target_num = len([msg for msg in msgs if msg.author == target_name])
 53 |         fp.write(f"All messages,{total_num},{total_num-target_num},{target_num}\n")
 54 |         print_func(f"{'All messages'.ljust(20)}{total_num:<15d}{total_num-target_num:<15d}{target_num:<15d}")
 55 | 
 56 |         msgs = stools.get_filtered(msgs, remove_forwards=True, remove_links=True, max_len=4095)
 57 | 
 58 |         total_chars = sum([len(msg.text) for msg in msgs if not msg.is_forwarded])
 59 |         target_chars = sum([len(msg.text) for msg in msgs if not msg.is_forwarded and msg.author == target_name])
 60 |         fp.write(f"Characters,{total_chars},{total_chars-target_chars},{target_chars}\n")
 61 |         print_func(f"{'Characters'.ljust(20)}{total_chars:<15d}{total_chars-target_chars:<15d}{target_chars:<15d}")
 62 | 
 63 |         total_photos = len([msg for msg in msgs if msg.has_photo])
 64 |         target_photos = len([msg for msg in msgs if msg.has_photo and msg.author == target_name])
 65 |         fp.write(f"Photos,{total_photos},{total_photos-target_photos},{target_photos}\n")
 66 |         print_func(f"{'Photos'.ljust(20)}{total_photos:<15d}{total_photos-target_photos:<15d}{target_photos:<15d}")
 67 | 
 68 |         total_stickers = len([msg for msg in msgs if msg.has_sticker])
 69 |         target_stickers = len([msg for msg in msgs if msg.has_sticker and msg.author == target_name])
 70 |         fp.write(f"Stickers,{total_stickers},{total_stickers-target_stickers},{target_stickers}\n")
 71 |         print_func((f"{'Stickers'.ljust(20)}{total_stickers:<15d}{total_stickers-target_stickers:<15d}"
 72 |                     f"{target_stickers:<15d}"))
 73 | 
 74 |         total_songs = len([msg for msg in msgs if msg.has_audio])
 75 |         target_songs = len([msg for msg in msgs if msg.has_audio and msg.author == target_name])
 76 |         fp.write(f"Songs (audio files),{total_songs},{total_songs-target_songs},{target_songs}\n")
 77 |         print_func((f"{'Songs (audio files)'.ljust(20)}{total_songs:<15d}{total_songs-target_songs:<15d}"
 78 |                     f"{target_songs:<15d}"))
 79 | 
 80 |         total_voice = len([msg for msg in msgs if msg.has_voice])
 81 |         target_voice = len([msg for msg in msgs if msg.has_voice and msg.author == target_name])
 82 |         fp.write(f"Voice messages,{total_voice},{total_voice-target_voice},{target_voice}\n")
 83 |         print_func(f"{'Voice messages'.ljust(20)}{total_voice:<15d}{total_voice-target_voice:<15d}{target_voice:<15d}")
 84 | 
 85 |         total_video = len([msg for msg in msgs if msg.has_video])
 86 |         target_video = len([msg for msg in msgs if msg.has_video and msg.author == target_name])
 87 |         fp.write(f"Video messages,{total_video},{total_video-target_video},{target_video}\n")
 88 |         print_func(f"{'Video messages'.ljust(20)}{total_video:<15d}{total_video-target_video:<15d}{target_video:<15d}")
 89 | 
 90 |     log_line(f"Scalar info was saved into {dir_path}/scalar_info.csv file.")
 91 | 
 92 | 
 93 | async def _plot_messages_distribution(msgs, your_name, target_name, results_directory):
 94 |     """Shows how messages are distributed."""
 95 |     plt.heat_map(msgs, results_directory)
 96 |     await asyncio.sleep(delay)
 97 |     plt.pie_messages_per_author(msgs, your_name, target_name, results_directory)
 98 |     await asyncio.sleep(delay)
 99 |     plt.stackplot_non_text_messages_percentage(msgs, results_directory)
100 |     await asyncio.sleep(delay)
101 |     plt.barplot_non_text_messages(msgs, results_directory)
102 |     await asyncio.sleep(delay)
103 |     plt.barplot_messages_per_weekday(msgs, your_name, target_name, results_directory)
104 |     await asyncio.sleep(delay)
105 |     plt.barplot_messages_per_day(msgs, results_directory)
106 |     await asyncio.sleep(delay)
107 |     plt.barplot_messages_per_minutes(msgs, results_directory)
108 |     await asyncio.sleep(delay)
109 |     plt.barplot_non_text_messages(msgs, results_directory)
110 |     await asyncio.sleep(delay)
111 |     plt.distplot_messages_per_hour(msgs, results_directory)
112 |     await asyncio.sleep(delay)
113 |     plt.distplot_messages_per_month(msgs, results_directory)
114 |     await asyncio.sleep(delay)
115 |     plt.distplot_messages_per_day(msgs, results_directory)
116 |     await asyncio.sleep(delay)
117 |     plt.lineplot_messages(msgs, your_name, target_name, results_directory)
118 |     await asyncio.sleep(delay)
119 |     log_line("Messages distribution was analysed.")
120 | 
121 | 
122 | async def _plot_messages_distribution_content_based(msgs, your_name, target_name, results_directory):
123 |     """Shows how some characteristics of messages content are distributed."""
124 |     plt.lineplot_message_length(msgs, your_name, target_name, results_directory)
125 |     await asyncio.sleep(delay)
126 |     plt.barplot_emojis(msgs, your_name, target_name, 10, results_directory)
127 |     await asyncio.sleep(delay)
128 |     log_line("Content based messages distribution was analysed.")
129 | 
130 | 
131 | async def _plot_words_distribution(msgs, your_name, target_name, results_directory, words):
132 |     """Shows how some words are distributed among the users."""
133 |     plt.barplot_words(msgs, your_name, target_name, words, 10, results_directory)
134 |     await asyncio.sleep(delay)
135 |     plt.wordcloud(msgs, words, results_directory)
136 |     await asyncio.sleep(delay)
137 |     log_line("Words distribution was analysed.")
138 | 
139 | 
140 | async def _plot_all(msgs, your_name, target_name, results_directory, words_file):
141 |     await save_scalar_info(msgs, your_name, target_name, results_directory)
142 |     await asyncio.sleep(delay)
143 |     await _plot_messages_distribution(msgs, your_name, target_name, results_directory)
144 |     await asyncio.sleep(delay)
145 | 
146 |     filtered_msgs = stools.get_filtered(msgs, remove_forwards=True, remove_empty=True, remove_links=True, max_len=4095)
147 | 
148 |     await _plot_messages_distribution_content_based(filtered_msgs, your_name, target_name, results_directory)
149 |     await asyncio.sleep(delay)
150 |     if words_file:
151 |         words = storage.get_words(words_file)
152 |         if words:
153 |             await _plot_words_distribution(filtered_msgs, your_name, target_name, results_directory, words)
154 |         await asyncio.sleep(delay)
155 | 
156 | 
157 | async def _get_all_messages(dialog, vkopt_file, your_name, target_name, loop):
158 |     msgs = []
159 |     if dialog != -1:
160 |         msgs.extend(await tlg.get_telegram_messages(your_name, target_name, loop=loop, target_id=dialog))
161 |     await  asyncio.sleep(delay)
162 |     if vkopt_file:
163 |         msgs.extend(vkOpt.get_mymessages_from_file(your_name, target_name, vkopt_file))
164 |         await  asyncio.sleep(delay)
165 |         msgs.sort(key=lambda msg: msg.date)
166 |     await  asyncio.sleep(delay)
167 |     return msgs
168 | 
169 | 
170 | def _save_words(msgs, your_name, target_name, path):
171 |     total_words_cnt = stools.get_words_countered(msgs)
172 |     top_words = [w for w, c in total_words_cnt.most_common(1000)]
173 |     your_words_cnt = stools.get_words_countered([msg for msg in msgs if msg.author == your_name])
174 |     target_words_cnt = stools.get_words_countered([msg for msg in msgs if msg.author == target_name])
175 |     storage.store_top_words_count(top_words, your_words_cnt, target_words_cnt, path)
176 | 
177 | 
178 | async def _analyse(msgs, your_name, target_name, words_file, store_msgs=True, store_words=True):
179 |     """Does analysis and stores results."""
180 |     log_line("Start messages analysis process.")
181 | 
182 |     if not len(msgs):
183 |         log_line("No messages were received.")
184 |         return
185 |     date = datetime.datetime.today().strftime('%d-%m-%y %H-%M-%S')
186 |     results_directory = os.path.join(os.path.split(os.path.normpath(os.path.dirname(__file__)))[0], "results",
187 |                                      f"{date}_{your_name}_{target_name}")
188 | 
189 |     if not os.path.exists(results_directory):
190 |         os.makedirs(results_directory)
191 | 
192 |     await asyncio.sleep(delay)
193 | 
194 |     if store_msgs:
195 |         file_with_msgs = "messages.txt"
196 |         storage.store_msgs(os.path.join(results_directory, file_with_msgs), msgs)
197 |     if store_words:
198 |         file_with_words = "words.txt"
199 |         _save_words(msgs, your_name, target_name, os.path.join(results_directory, file_with_words))
200 | 
201 |     await asyncio.sleep(delay)
202 | 
203 |     await _plot_all(msgs, your_name, target_name, results_directory, words_file)
204 | 
205 |     log_line("Done.")
206 | 
207 | 
208 | def analyse_from_file(path):
209 |     """Analyses messages from a single file which was previously created by this program.
210 | 
211 |     Notes:
212 |         Requires all the necessary configuration parameters (config.ini file) to be set either by GUI or manually.
213 |     """
214 |     _, _, words_file, your_name, target_name = storage.get_session_params()
215 |     msgs = storage.get_msgs(path)
216 |     asyncio.get_event_loop().run_until_complete(_analyse(msgs, your_name, target_name, words_file, store_msgs=False))
217 | 
218 | 
219 | async def retrieve_and_analyse(loop):
220 |     """(async) Analyses messages from VkOpt file and/or Telegram dialogue.
221 | 
222 |     Notes:
223 |         Requires all the necessary configuration parameters (config.ini file) to be set either by GUI or manually.
224 |     """
225 |     dialog, vkopt_file, words_file, your_name, target_name = storage.get_session_params()
226 |     msgs = await _get_all_messages(dialog, vkopt_file, your_name, target_name, loop)
227 |     await _analyse(msgs, your_name, target_name, words_file)
228 | 
229 | 
230 | if __name__ == "__main__":
231 |     asyncio.get_event_loop().run_until_complete(retrieve_and_analyse(asyncio.get_event_loop()))
232 | 


--------------------------------------------------------------------------------
/message_analyser/structure_tools.py:
--------------------------------------------------------------------------------
  1 | import emoji
  2 | import datetime
  3 | import itertools
  4 | from collections import Counter
  5 | from dateutil.relativedelta import relativedelta
  6 | 
  7 | MAX_MSG_LEN = 4096
  8 | 
  9 | 
 10 | def count_months(msgs):
 11 |     """Returns the number of months between first and last messages (calendar months)."""
 12 |     r = relativedelta(msgs[-1].date, msgs[0].date)
 13 |     return r.months + 12 * r.years
 14 | 
 15 | 
 16 | def get_filtered(msgs,
 17 |                  remove_empty=False,
 18 |                  remove_links=False,
 19 |                  remove_forwards=False,
 20 |                  except_patterns=None,
 21 |                  except_samples=None,
 22 |                  min_len=0,
 23 |                  max_len=MAX_MSG_LEN
 24 |                  ):
 25 |     """Filters a list of messages by different parameters.
 26 | 
 27 |     Notes:
 28 |         Patterns and samples are lowered as well as the messages they are compared to.
 29 | 
 30 |     Args:
 31 |         msgs (list of MyMessage objects): Messages to sort.
 32 |         remove_empty (bool): Skips/keeps messages with empty text component.
 33 |         remove_links (bool): Skips/keeps messages which are links.
 34 |         remove_forwards (bool): Skips/keeps messages which are forwarded.
 35 |         except_patterns (list of sets of strings (characters)):
 36 |             Skips messages which are made ONLY from the characters from any set in this list.
 37 |         except_samples (list of strings):
 38 |             Skips messages which are equal to any string in this list.
 39 |         min_len (int): Skips/keeps messages shorter than min_len.
 40 |         max_len (int): Skips/keeps messages longer than max_len.
 41 | 
 42 |     Returns:
 43 |         A list of MyMessage objects.
 44 |     """
 45 |     if except_patterns is not None:
 46 |         except_patterns = set(pattern.lower() for pattern in except_patterns)
 47 |     if except_samples is not None:
 48 |         except_samples = list(sample.lower() for sample in except_samples)
 49 |     return list(filter(lambda msg:
 50 |                        (not remove_empty or msg.text != "")
 51 |                        and min_len <= len(msg.text) <= max_len
 52 |                        and not (remove_forwards and msg.is_forwarded)
 53 |                        and not (remove_links and msg.is_link)
 54 |                        and (except_patterns is None or not any(set(msg.text.lower()) == p for p in except_patterns))
 55 |                        and (except_samples is None or not any(sample == msg.text for sample in except_samples)),
 56 |                        msgs))
 57 | 
 58 | 
 59 | def get_non_text_messages_grouped(groups):
 60 |     """Filters and structures messages for each group and non-text message type.
 61 | 
 62 |     Args:
 63 |         groups (list of lists of MyMessage objects): Messages grouped.
 64 | 
 65 |     Returns:
 66 |         A list of message types grouped:
 67 |         [
 68 |             {
 69 |                 "groups": [list of numbers of specific messages in each group],
 70 |                 "type": string type of these messages.
 71 |             }
 72 |         ]
 73 |     """
 74 |     return [
 75 |         {"groups": [len(list(filter(lambda m: m.has_audio, group))) for group in groups],
 76 |          "type": "audio"},
 77 |         {"groups": [len(list(filter(lambda m: m.has_voice, group))) for group in groups],
 78 |          "type": "voice"},
 79 |         {"groups": [len(list(filter(lambda m: m.has_photo, group))) for group in groups],
 80 |          "type": "photo"},
 81 |         {"groups": [len(list(filter(lambda m: m.has_video, group))) for group in groups],
 82 |          "type": "video"},
 83 |         {"groups": [len(list(filter(lambda m: m.has_sticker, group))) for group in groups],
 84 |          "type": "sticker"},
 85 |         {"groups": [len(list(filter(lambda m: m.is_link, group))) for group in groups],
 86 |          "type": "link"}
 87 |     ]
 88 | 
 89 | 
 90 | def get_response_speed_per_timedelta(msgs, name):
 91 |     """Gets list of response time lengths of a certain person.
 92 | 
 93 |     Notes:
 94 |         This function is not used anywhere (at the time when this docstring was written) because it needs
 95 |         better algorithm for making decisions about message being a response or not.
 96 | 
 97 |     Args:
 98 |         msgs (list of MyMessage objects): Messages.
 99 |         name (str): The name of the person whose response time is calculated.
100 | 
101 |     Returns:
102 |         A a list of the person's (name) response time lengths.
103 |     """
104 |     res = []
105 |     i = 0
106 |     if msgs[0].author == name:
107 |         while i < len(msgs) and msgs[i].author == name:
108 |             i += 1
109 |     while i < len(msgs):
110 |         while i < len(msgs) and msgs[i].author != name:
111 |             i += 1
112 |         if i < len(msgs) and (msgs[i].date - msgs[i - 1].date).seconds <= 4 * 3600:  # because people sleep sometimes
113 |             res.append((msgs[i].date - msgs[i - 1].date).seconds / 60)
114 |         while i < len(msgs) and msgs[i].author == name:
115 |             i += 1
116 |     return res
117 | 
118 | 
119 | def get_messages_per_timedelta(msgs, time_bin):
120 |     """Gets lists of messages for each time interval with a given length. For example:
121 |     time_bin is 7, so we will get lists of messages for each week between the first and last messages.
122 | 
123 |     Args:
124 |         msgs (list of MyMessage objects): Messages.
125 |         time_bin (int): The number of days in each bin (time interval).
126 | 
127 |     Returns:
128 |         A dictionary such as:
129 |             {
130 |                 day (datetime.date object): a list of messages within interval [day, day + time_bin)
131 |             }
132 |     """
133 |     start_d = msgs[0].date.date()
134 |     current_date = start_d
135 |     end_d = msgs[-1].date.date()
136 |     res = dict()
137 |     while current_date <= end_d:
138 |         res[current_date] = []
139 |         current_date += relativedelta(days=time_bin)
140 |     for msg in msgs:
141 |         res[start_d + relativedelta(days=(msg.date.date() - start_d).days // time_bin * time_bin)].append(msg)
142 |     return res
143 | 
144 | 
145 | def get_months(msgs):
146 |     """Gets months (first day for each month) between the first and the last messages in a list.
147 | 
148 |     Notes:
149 |         ATTENTION: datetime objects have day parameter set to 1 (first day of the month) for EACH month.
150 |     Args:
151 |         msgs (list of Mymessage objects): Messages.
152 | 
153 |     Returns:
154 |         A list of datetime.date objects.
155 |     """
156 |     start_d = msgs[0].date.date()
157 |     end_d = msgs[-1].date.date()
158 |     res = []
159 |     month, year = start_d.month, start_d.year
160 |     while (year < end_d.year or not month > end_d.month) and year <= end_d.year:
161 |         res.append(datetime.date(year, month, 1))
162 |         if month == 12:
163 |             year += 1
164 |             month = 0
165 |         month += 1
166 |     return res
167 | 
168 | 
169 | def get_weeks(msgs):
170 |     """Gets weeks (first day for each week) between the first and last messages in a list.
171 | 
172 |     Notes:
173 |         First "week" is 7-days full.
174 |         This function returns calendar weeks, not just 7-days intervals.
175 |     Args:
176 |         msgs (list of Mymessage objects): Messages.
177 | 
178 |     Returns:
179 |         A list of datetime.date objects.
180 |     """
181 |     current_date = msgs[0].date.date()
182 |     end_d = msgs[-1].date.date()
183 |     res = []
184 |     if current_date.weekday() != 0:
185 |         current_date -= relativedelta(days=current_date.weekday())
186 |     while current_date <= end_d:
187 |         res.append(current_date)
188 |         current_date += relativedelta(days=7)
189 |     return res
190 | 
191 | 
192 | def str_day(day):
193 |     """Transforms datetime day object into a "%d/%m/%y" string.
194 | 
195 |     Args:
196 |         day (datetime/datetime.date): Day.
197 | 
198 |     Returns:
199 |         A "%d/%m/%y" string representation.
200 |     """
201 |     return day.strftime("%d/%m/%y")
202 | 
203 | 
204 | def date_days_to_str_days(days):
205 |     """Transforms a list of datetime objects into a list of "%d/%m/%y" strings.
206 | 
207 |     Args:
208 |         days (list of datetime objects): Days.
209 | 
210 |     Returns:
211 |         A list of "%d/%m/%y" days representations.
212 |     """
213 |     return [str_day(day) for day in days]
214 | 
215 | 
216 | def str_month(month):
217 |     """Transforms datetime month object into a "%m/%y" string.
218 | 
219 |         Args:
220 |             month (datetime/datetime.date): Month.
221 | 
222 |         Returns:
223 |             A "%m/%y" string representation.
224 |         """
225 |     return month.strftime("%m/%y")
226 | 
227 | 
228 | def date_months_to_str_months(months):
229 |     """Transforms a list of datetime objects into a list of "%m/%y" strings.
230 | 
231 |     Args:
232 |         months (list of datetime objects): Months.
233 | 
234 |     Returns:
235 |         A list of "%m/%y" months representations.
236 |     """
237 |     return [str_month(month) for month in months]
238 | 
239 | 
240 | def get_messages_per_month(msgs):
241 |     """Gets lists of messages for each month between the first and last message.
242 | 
243 |     Notes:
244 |         Months keys are set to the first day of the month.
245 | 
246 |     Args:
247 |         msgs (list of Mymessage objects): Messages.
248 | 
249 |     Returns:
250 |         A dictionary such as:
251 |             {
252 |                 month (datetime.date): list of messages within this month
253 |             }
254 |     """
255 |     res = dict()
256 |     current_date = msgs[0].date.date().replace(day=1)
257 |     end_d = msgs[-1].date.date().replace(day=1)
258 |     while current_date <= end_d:
259 |         res[current_date] = []
260 |         current_date += relativedelta(months=1)
261 | 
262 |     for msg in msgs:
263 |         res[msg.date.date().replace(day=1)].append(msg)
264 |     return res
265 | 
266 | 
267 | def get_messages_per_week(msgs):
268 |     """Gets lists of messages for each calendar week between the first and the last message.
269 | 
270 |     Args:
271 |         msgs (list of Mymessage objects): Messages.
272 | 
273 |     Returns:
274 |         A dictionary such as:
275 |             {
276 |                 week (datetime.date): list of messages within this week
277 |             }
278 |     """
279 |     res = dict()
280 |     current_date = msgs[0].date.date()
281 |     end_d = msgs[-1].date.date()
282 |     if current_date.weekday() != 0:
283 |         current_date -= relativedelta(days=current_date.weekday())
284 |     while current_date <= end_d:
285 |         res[current_date] = []
286 |         current_date += relativedelta(days=7)
287 | 
288 |     for msg in msgs:
289 |         res[msg.date.date() - relativedelta(days=msg.date.date().weekday())].append(msg)
290 |     return res
291 | 
292 | 
293 | def get_messages_per_minutes(msgs, minutes):
294 |     """Gets lists of messages for each interval in minutes.
295 | 
296 |     Args:
297 |         msgs (list of MyMessage objects): Messages.
298 |         minutes (int): The number of minutes in one interval.
299 | 
300 |     Returns:
301 |         A dictionary such as:
302 |             {
303 |                 minute: list off all messages sent within interval [minute, minute + minutes).
304 |             }
305 |     """
306 |     res = {i: [] for i in range(0, 24 * 60, minutes)}
307 |     for msg in msgs:
308 |         res[(msg.date.hour * 60 + msg.date.minute) // minutes * minutes].append(msg)
309 |     return res
310 | 
311 | 
312 | def get_messages_per_weekday(msgs):
313 |     """Gets lists of messages for each day of the week (7 lists in a dictionary total).
314 | 
315 |     Args:
316 |         msgs (list of MyMessage objects): Messages.
317 | 
318 |     Returns:
319 |         A dictionary such as:
320 |             {
321 |                 day_of_the_week (int 0-6): list off all messages sent on this day
322 |             }
323 |     """
324 |     res = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: []}
325 |     for msg in msgs:
326 |         res[msg.date.weekday()].append(msg)
327 |     # placing Sunday at the end of the week # turned out we don't need it...
328 |     # for i in [0, 1, 2, 3, 4, 5]:
329 |     #     res[i], res[(i + 6) % 7] = res[(i + 6) % 7], res[i]
330 |     return res
331 | 
332 | 
333 | def get_messages_per_day(msgs):
334 |     """Gets lists of messages for each day between the first and the last message.
335 | 
336 |     Notes:
337 |         Days are stored in a dictionary as integers (first day is 0, second is 1 etc).
338 | 
339 |     Args:
340 |         msgs (list of MyMessage objects): Messages.
341 | 
342 |     Returns:
343 |         A dictionary such as:
344 |             {
345 |                 day (int): list of messages sent this day
346 |             }
347 |     """
348 |     current_date = msgs[0].date.date()
349 |     end_d = msgs[-1].date.date()
350 |     res = dict()
351 |     one_day = relativedelta(days=1)
352 |     while current_date <= end_d:
353 |         res[current_date] = []
354 |         current_date += one_day
355 |     for msg in msgs:
356 |         res[msg.date.date()].append(msg)
357 |     return res
358 | 
359 | 
360 | def get_hours():
361 |     """Gets a list of str hours from 01:00 to 23:00"""
362 |     return [f"{i:02d}:00" for i in range(24)]
363 | 
364 | 
365 | def get_messages_per_hour(msgs):
366 |     """Gets lists of messages for each hour of the day (total 24 lists).
367 | 
368 |     Args:
369 |         msgs (list of MyMessage objects): Messages.
370 | 
371 |     Returns:
372 |         A dictionary such as:
373 |             {
374 |                 hour (string "%H:00"): list of messages sent this hour (for all days)
375 |             }
376 |     """
377 |     res = {hour: [] for hour in get_hours()}
378 |     for msg in msgs:
379 |         res[f"{msg.date.hour:02d}:00"].append(msg)
380 |     return res
381 | 
382 | 
383 | def get_longest_pause(msgs):
384 |     """Gets the longest time distance between two consecutive messages.
385 | 
386 |     Args:
387 |         msgs (list of MyMessage objects): Messages.
388 | 
389 |     Returns:
390 |         A tuple such as:
391 |             (timedelta of the longest pause in a dialogue, start datetime of the pause, end datetime of the pause).
392 |     """
393 |     previous_date = msgs[0].date
394 |     max_delta = datetime.datetime.today() - datetime.datetime.today()
395 |     start_pause = end_pause = previous_date
396 |     for msg in msgs[1:]:
397 |         if msg.date - previous_date > max_delta:
398 |             start_pause = previous_date
399 |             end_pause = msg.date
400 |             max_delta = msg.date - previous_date
401 |         previous_date = msg.date
402 |     return max_delta, start_pause, end_pause
403 | 
404 | 
405 | def _tokenize(text, stem=False, filters=None):
406 |     """Tokenizes a text into a list of tokens (words). Words are lowered, punctuation and digits are removed.
407 | 
408 |     Notes:
409 |         filters example: ["NOUN", "ADJF", "VERB", "ADVB"].
410 |         Stemming may work for ukrainian texts but now it is out-of-use.
411 | 
412 |     Args:
413 |         text (str): A text to tokenize.
414 |         stem (bool): True value means the words will be stemmed (currently out-of-use).
415 |         filters (list of strings): List of string types of words (currently out-of-use).
416 | 
417 |     Returns:
418 |         A list of words (strings).
419 | 
420 |     Raises:
421 |         NotImplementedError: If you try to filter or stem.
422 |     """
423 |     # import pymorphy2
424 |     # import pymorphy2_dicts_uk
425 |     # morph = pymorphy2.MorphAnalyzer(lang='uk')
426 |     i = 0
427 |     words = []
428 |     while i < len(text):
429 |         word = ""
430 |         while i < len(text) and (text[i].isalpha() or text[i] == '\'' or text[i] == '`'):
431 |             word += text[i]
432 |             i += 1
433 |         if len(word) > 0:
434 |             if stem or filters is not None:
435 |                 raise NotImplementedError
436 |                 # parsed = morph.parse(word.lower())[0]
437 |                 # if filters is None or any(el in parsed.tag for el in filters):
438 |                 #     words.append(parsed.normal_form)
439 |             else:
440 |                 words.append(word.lower())
441 |         i += 1
442 |     return words
443 | 
444 | 
445 | def get_words_countered(msgs, stem=False):
446 |     """Counts all words in messages.
447 | 
448 |     Notes:
449 |         Punctuation and digits are removed, words are lowered and countered.
450 | 
451 |     Args:
452 |         msgs (list of MyMessage objects): Messages.
453 |         stem (bool): True value means the words will be stemmed (currently out-of-use).
454 | 
455 |     Returns:
456 |         collections.Counter of words.
457 |     """
458 |     return Counter(itertools.chain.from_iterable(_tokenize(msg.text, stem=stem) for msg in msgs))
459 | 
460 | 
461 | def get_emoji_countered(msgs):
462 |     """Counts all emojis in messages.
463 | 
464 |     Args:
465 |         msgs (list of MyMessage objects): Messages.
466 | 
467 |     Returns:
468 |         collections.Counter of emojis.
469 |     """
470 |     cnt = Counter()
471 |     for msg in msgs:
472 |         for character in msg.text:
473 |             if character in emoji.UNICODE_EMOJI:
474 |                 cnt[character] += 1
475 |     return cnt
476 | 
477 | 
478 | def get_messages_lengths_countered(msgs):
479 |     """Counts the length of each message.
480 | 
481 |     Args:
482 |         msgs (list of MyMessage objects): Messages.
483 | 
484 |     Returns:
485 |         collections.Counter of messages lengths.
486 |     """
487 |     return Counter([len(msg.text) for msg in msgs])
488 | 


--------------------------------------------------------------------------------
/message_analyser/GUI.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | import asyncio
  4 | import tkinter as tk
  5 | import message_analyser.retriever.telegram as tlg
  6 | import message_analyser.storage as storage
  7 | from message_analyser import analyser
  8 | from tkinter import filedialog
  9 | 
 10 | 
 11 | async def start_gui(loop):
 12 |     app = MessageAnalyserGUI(tk.Tk(), loop)
 13 |     try:
 14 |         while True:
 15 |             # We want to update the application but get back
 16 |             # to asyncio's event loop. For this we sleep a
 17 |             # short time so the event loop can run.
 18 |             #
 19 |             # https://www.reddit.com/r/Python/comments/33ecpl
 20 |             # print("UPDATED!")
 21 |             app.update()
 22 |             await asyncio.sleep(0.05)
 23 |     except KeyboardInterrupt:
 24 |         pass
 25 |     except tk.TclError as e:
 26 |         if "application has been destroyed" not in e.args[0]:
 27 |             raise
 28 | 
 29 | 
 30 | class LoggingToGUI(logging.Handler):
 31 |     """ Used to redirect logging output to the widget passed in parameters """
 32 | 
 33 |     # https://stackoverflow.com/a/18194597
 34 | 
 35 |     def __init__(self, console):
 36 |         logging.Handler.__init__(self)
 37 | 
 38 |         self.console = console  # Any text widget, you can use the class above or not
 39 | 
 40 |     def emit(self, message):  # Overwrites the default handler's emit method
 41 |         formatted_message = self.format(message)  # You can change the format here
 42 | 
 43 |         # Disabling states so no user can write in it
 44 |         self.console.configure(state=tk.NORMAL)
 45 |         self.console.insert(tk.END, formatted_message)  # Inserting the logger message in the widget
 46 |         self.console.configure(state=tk.DISABLED)
 47 |         self.console.see(tk.END)
 48 |         # print(message)  # You can just print to STDout in your overriden emit no need for black magic
 49 | 
 50 | 
 51 | class MessageAnalyserGUI(tk.Frame):
 52 |     """Represents a GUI for the message analyser app.
 53 | 
 54 |     Contains next frames:
 55 |         A frame with a greeting and choosing of the base analyser parameters (raise_start_frame).
 56 |         A frame to set analyser attributes based on previous frame results (raise_files_frame).
 57 |         A frame to make an initial sign-in into Telegram client (raise_telegram_auth_frame, optional).
 58 |         A frame to choose a Telegram dialogue to analyse messages from (raise_dialogs_select_frame, optional).
 59 |         A frame to show analysing process and results (raise_finish_frame).
 60 | 
 61 |     Attributes:
 62 |         parent (tk.Frame): A root frame (tk.Tk()) of a tkinter app.
 63 |         loop (asyncio.windows_events._WindowsSelectorEventLoop, optional): An event loop.
 64 |         x (int): A horizontal size of the window.
 65 |         y (int): A vertical size of the window.
 66 |         session_params (dict):
 67 |             A dictionary which contains all the message analyser parameters for their future processing. Looks like:
 68 |             {
 69 |                 "from_vk": (bool) True if some messages will be received from the VkOpt file.,
 70 |                 "from_telegram": (bool) True if some messages will be received from the Telegram.,
 71 |                 "plot_words": (bool) True if we need a file with words for future analysis of them.,
 72 |                 "dialogue": (str,optional) String representation ("dialog_name (id=dialog_id)") of a Telegram dialogue.,
 73 |                 "vkopt_file": (str,optional) A path to the file with VkOpt messages.,
 74 |                 "words_file": (str,optional) A path to the file with words.,
 75 |                 "your_name": (str) Your name.,
 76 |                 "target_name": (str) Target's name.
 77 |             }
 78 |     """
 79 | 
 80 |     def __init__(self, parent, loop, *args, **kwargs):
 81 |         """Inits MessageAnalyserGUI class with parent frame and basic attributes. Raises an initial frame of the GUI."""
 82 |         tk.Frame.__init__(self, parent, *args, **kwargs)
 83 |         self.parent = parent
 84 |         self.parent.title("Message analyser")
 85 |         self.x, self.y = 700, 500
 86 |         self.parent.geometry(f"{self.x}x{self.y}")
 87 |         self.parent.grid_columnconfigure(3, weight=8)
 88 |         self.parent.resizable(False, False)
 89 |         self.default_font_name = "Courier"
 90 |         self.default_font = (self.default_font_name, 11)
 91 |         self.button_background = "#ccccff"
 92 |         self.aio_loop = loop
 93 | 
 94 |         self.session_params = dict()
 95 | 
 96 |         self.raise_start_frame()
 97 | 
 98 |     def __set_file_path(self, label_text, file):
 99 |         """Stores file path in session parameters and changes the corresponding label text."""
100 |         self.session_params[file] = filedialog.askopenfilename(title=file, filetypes=[("Text files", ".txt")])
101 |         label_text.set("File :          " + os.path.split(self.session_params[file])[-1])
102 | 
103 |     def raise_start_frame(self):
104 |         """Chooses base analyser parameters (do or do not analyse Telegram messages/vk.com messages/words)."""
105 |         labels_frame = tk.Frame()
106 |         labels_frame.pack(side=tk.TOP)
107 | 
108 |         start_label = tk.Label(labels_frame, text="Hi!\nLet's get started",
109 |                                height=2, width=35, font=(self.default_font_name, 20))
110 |         start_label.pack()
111 | 
112 |         start_label = tk.Label(labels_frame, text="What do You want to analyse?",
113 |                                height=2, width=35, font=(self.default_font_name, 15))
114 |         start_label.pack()
115 | 
116 |         check_boxes_frame = tk.Frame()
117 |         check_boxes_frame.pack(anchor=tk.W)
118 |         from_telegram = tk.BooleanVar()
119 |         telegram_check_button = tk.Checkbutton(check_boxes_frame, text="Messages from Telegram", variable=from_telegram,
120 |                                                font=self.default_font)
121 |         telegram_check_button.pack(anchor=tk.W)
122 |         if "from_telegram" in self.session_params and self.session_params["from_telegram"]:
123 |             telegram_check_button.select()
124 | 
125 |         from_vk = tk.BooleanVar()
126 |         vk_check_button = tk.Checkbutton(check_boxes_frame, text="Messages from vkOpt text file", variable=from_vk,
127 |                                          font=self.default_font)
128 |         vk_check_button.pack(anchor=tk.W)
129 |         if "from_vk" in self.session_params and self.session_params["from_vk"]:
130 |             vk_check_button.select()
131 | 
132 |         plot_words = tk.BooleanVar()
133 |         words_check_button = tk.Checkbutton(check_boxes_frame, text="Add file with words", variable=plot_words,
134 |                                             font=self.default_font)
135 |         words_check_button.pack(anchor=tk.W)
136 |         if "plot_words" in self.session_params and self.session_params["plot_words"]:
137 |             words_check_button.select()
138 | 
139 |         def set_data_and_continue():
140 |             if from_vk.get() or from_telegram.get():
141 |                 self.session_params["plot_words"] = plot_words.get()
142 |                 self.session_params["from_vk"] = from_vk.get()
143 |                 self.session_params["from_telegram"] = from_telegram.get()
144 |                 bottom_frame.destroy()
145 |                 labels_frame.destroy()
146 |                 check_boxes_frame.destroy()
147 |                 return self.raise_files_frame()
148 |             telegram_check_button.config(fg="red")
149 |             vk_check_button.config(fg="red")
150 | 
151 |         bottom_frame = tk.Frame()
152 |         bottom_frame.pack(side=tk.BOTTOM)
153 |         continue_button = tk.Button(bottom_frame, text="Continue", command=set_data_and_continue,
154 |                                     padx=35, background=self.button_background, font=self.default_font)
155 |         continue_button.pack(side=tk.BOTTOM)
156 |         self.parent.bind('<Return>', lambda _: set_data_and_continue())
157 | 
158 |     def raise_files_frame(self):
159 |         """Chooses a file with words and a file with VkOpt messages; assigns names."""
160 |         table_frame = tk.Frame()
161 |         table_frame.pack(expand=True, fill="both")
162 | 
163 |         cur_row = 0
164 |         if self.session_params["from_vk"]:
165 |             cur_row += 1
166 |             vkopt_label = tk.Label(table_frame, text="Choose path to:", height=2, font=self.default_font)
167 |             vkopt_label.grid(row=cur_row, column=1, sticky=tk.W)
168 | 
169 |             vkopt_button = tk.Button(table_frame, text="vkOpt file",
170 |                                      command=lambda: self.__set_file_path(vkopt_filename_label_text, "vkopt_file"),
171 |                                      font=self.default_font)
172 |             vkopt_button.grid(row=cur_row, column=2, sticky=tk.W)
173 | 
174 |             cur_row += 1
175 |             vkopt_filename_label_text = tk.StringVar()
176 |             vkopt_filename_label_text.set("File :          ")
177 |             vkopt_filename_label = tk.Label(table_frame, textvariable=vkopt_filename_label_text, height=2,
178 |                                             font=self.default_font)
179 |             vkopt_filename_label.grid(row=cur_row, column=1, sticky=tk.W, columnspan=30)
180 | 
181 |         if self.session_params["plot_words"]:
182 |             cur_row += 1
183 |             words_label = tk.Label(table_frame, text="Choose path to:", height=2, font=self.default_font)
184 |             words_label.grid(row=cur_row, column=1, sticky=tk.W)
185 | 
186 |             words_button = tk.Button(table_frame, text="words file",
187 |                                      command=lambda: self.__set_file_path(words_filename_label_text, "words_file"),
188 |                                      font=self.default_font)
189 |             words_button.grid(row=cur_row, column=2, sticky=tk.W)
190 | 
191 |             cur_row += 1
192 |             words_filename_label_text = tk.StringVar()
193 |             words_filename_label_text.set("File :          ")
194 |             words_filename_label = tk.Label(table_frame, textvariable=words_filename_label_text, height=2,
195 |                                             font=self.default_font)
196 |             words_filename_label.grid(row=cur_row, column=1, sticky=tk.W, columnspan=30)
197 | 
198 |         _, _, _, your_name, target_name = storage.get_session_params()
199 | 
200 |         cur_row += 1
201 |         your_name_label = tk.Label(table_frame, text="Your name:     ", height=2, font=self.default_font)
202 |         your_name_label.grid(row=cur_row, column=1, sticky=tk.W)
203 | 
204 |         your_name_dir = tk.Entry(table_frame, width=40, font=self.default_font)
205 |         your_name_dir.insert(tk.END, your_name)
206 |         your_name_dir.grid(row=cur_row, column=2)
207 | 
208 |         cur_row += 1
209 |         target_name_label = tk.Label(table_frame, text="Target's name: ", height=2, font=self.default_font)
210 |         target_name_label.grid(row=cur_row, column=1, sticky=tk.W)
211 | 
212 |         target_name_dir = tk.Entry(table_frame, width=40, font=self.default_font)
213 |         target_name_dir.insert(tk.END, target_name)
214 |         target_name_dir.grid(row=cur_row, column=2)
215 | 
216 |         if self.session_params["from_vk"]:
217 |             cur_row += 1
218 |             names_label = tk.Label(table_frame, text=("Please be sure these names are equal to the names in the \n"
219 |                                                       "vkOpt file. Otherwise vkOpt file will not be read correctly."),
220 |                                    fg="red", height=2, font=self.default_font, justify="left")
221 |             names_label.grid(row=cur_row, column=1, sticky=tk.W, columnspan=30)
222 | 
223 |         def set_data_and_continue():
224 |             your_name_label.config(fg="black")
225 |             target_name_label.config(fg="black")
226 |             if your_name_dir.get().isspace() or not your_name_dir.get():
227 |                 return your_name_label.config(fg="red")
228 |             if target_name_dir.get().isspace() or not target_name_dir.get():
229 |                 return target_name_label.config(fg="red")
230 | 
231 |             if self.session_params["from_vk"]:
232 |                 if "vkopt_file" not in self.session_params:
233 |                     return vkopt_filename_label.config(fg="red")
234 |                 vkopt_filename_label.config(fg="black")
235 | 
236 |             if self.session_params["plot_words"]:
237 |                 if "words_file" not in self.session_params:
238 |                     return words_filename_label.config(fg="red")
239 |                 words_filename_label.config(fg="black")
240 | 
241 |             self.session_params["your_name"] = your_name_dir.get()
242 |             self.session_params["target_name"] = target_name_dir.get()
243 |             bottom_frame.destroy()
244 |             table_frame.destroy()
245 |             if self.session_params["from_telegram"]:
246 |                 return self.raise_telegram_auth_frame()
247 |             self.raise_finish_frame()
248 | 
249 |         def raise_start_frame():
250 |             bottom_frame.destroy()
251 |             table_frame.destroy()
252 |             self.raise_start_frame()
253 | 
254 |         bottom_frame = tk.Frame()
255 |         bottom_frame.pack(side=tk.BOTTOM)
256 |         back_button = tk.Button(bottom_frame, text="Back", command=raise_start_frame,
257 |                                     padx=35, background=self.button_background, font=self.default_font)
258 |         back_button.pack(side=tk.LEFT)
259 | 
260 |         continue_button = tk.Button(bottom_frame, text="Continue", command=set_data_and_continue,
261 |                                     padx=35, background=self.button_background, font=self.default_font)
262 |         continue_button.pack(side=tk.RIGHT)
263 |         self.parent.bind('<Return>', lambda _: set_data_and_continue())
264 | 
265 |     def raise_telegram_auth_frame(self):
266 |         """Makes an initial sign-in into Telegram client."""
267 |         table_frame = tk.Frame()
268 |         table_frame.pack(expand=True, fill="both")
269 | 
270 |         assert self.session_params["from_telegram"]
271 | 
272 |         api_id, api_hash, phone_number, _ = storage.get_telegram_secrets()
273 |         
274 |         # A text in labels should be 15 characters long in order to not shift entries. 
275 |         # Should make them more adaptive some day.
276 |         api_id_label = tk.Label(table_frame, text="API id :       ", height=2, font=self.default_font)
277 |         api_id_label.grid(row=1, column=1, sticky=tk.W)
278 | 
279 |         api_id_dir = tk.Entry(table_frame, width=46, font=self.default_font)
280 |         api_id_dir.insert(tk.END, api_id)
281 |         api_id_dir.grid(row=1, column=2, sticky=tk.W)
282 | 
283 |         api_hash_label = tk.Label(table_frame, text="API hash :     ", height=2, font=self.default_font)
284 |         api_hash_label.grid(row=2, column=1, sticky=tk.W)
285 | 
286 |         api_hash_dir = tk.Entry(table_frame, width=46, font=self.default_font)
287 |         api_hash_dir.insert(tk.END, api_hash)
288 |         api_hash_dir.grid(row=2, column=2, sticky=tk.W)
289 | 
290 |         phone_number_label = tk.Label(table_frame, text="Phone number : ", height=2, font=self.default_font)
291 |         phone_number_label.grid(row=3, column=1, sticky=tk.W)
292 | 
293 |         phone_number_dir = tk.Entry(table_frame, width=46, font=self.default_font)
294 |         phone_number_dir.insert(tk.END, phone_number)
295 |         phone_number_dir.grid(row=3, column=2, sticky=tk.W)
296 | 
297 |         code_label = tk.Label(table_frame, text="Code :         ", height=2, font=self.default_font)
298 |         code_label.grid(row=4, column=1, sticky=tk.W)
299 | 
300 |         code_dir = tk.Entry(table_frame, width=46, font=self.default_font)
301 |         code_dir.grid(row=4, column=2, sticky=tk.W)
302 | 
303 |         password_label = tk.Label(table_frame, text="Password :     ", height=2, font=self.default_font)
304 |         password_label.grid(row=5, column=1, sticky=tk.W)
305 | 
306 |         password_dir = tk.Entry(table_frame, width=46, font=self.default_font)
307 |         password_dir.grid(row=5, column=2, sticky=tk.W)
308 | 
309 |         message_label_text = tk.StringVar()
310 | 
311 |         message_label_text.set(("Please be sure You have set the right API ID and key\n"
312 |                                 "They can be obtained from:\n"
313 |                                 "https://core.telegram.org/api/obtaining_api_id"))
314 |         message_label = tk.Label(table_frame, textvariable=message_label_text, height=3,
315 |                                  font=self.default_font, fg="red", justify="left")
316 |         message_label.grid(row=6, column=1, sticky=tk.W, columnspan=2)
317 | 
318 |         async def try_sign_in_and_continue():
319 |             res = await tlg.get_sign_in_results(api_id_dir.get(),
320 |                                                 api_hash_dir.get(),
321 |                                                 code_dir.get(),
322 |                                                 phone_number_dir.get(),
323 |                                                 password_dir.get(),
324 |                                                 self.session_params["your_name"],
325 |                                                 loop=self.aio_loop)
326 |             try:
327 |                 api_id_label.config(fg="black")
328 |             except tk.TclError:  # too fast "continue" button clicks?
329 |                 return
330 |             api_hash_label.config(fg="black")
331 |             phone_number_label.config(fg="black")
332 |             code_label.config(fg="black")
333 |             password_label.config(fg="black")
334 |             if res == "wrong api":
335 |                 api_id_label.config(fg="red")
336 |                 api_hash_label.config(fg="red")
337 |                 return message_label_text.set("Please be sure You have set the right API ID and hash\n"
338 |                                               "They can be obtained from:\n"
339 |                                               "https://core.telegram.org/api/obtaining_api_id")
340 |             elif res == "need phone":
341 |                 phone_number_label.config(fg="red")
342 |                 return message_label_text.set("Please carefully set Your phone number in order to   \n"
343 |                                               "get a confirmation code.\n ")
344 | 
345 |             elif res == "need code":
346 |                 code_label.config(fg="red")
347 |                 return message_label_text.set("Please check Your private messages (or SMS) and      \n"
348 |                                               "copypaste the right code.\n ")
349 |             elif res == "need password":
350 |                 password_label.config(fg="red")
351 |                 return message_label_text.set("Please enter correct password.\n")
352 |             elif res.startswith("need wait for "):
353 |                 return message_label_text.set(f'Please wait. A wait of {res[14:]} seconds is required.\n')
354 |             elif res == "no internet":
355 |                 return message_label_text.set("Please be sure You have stable Internet connection.\n\n")
356 | 
357 |             assert res == "success"
358 |             storage.store_telegram_secrets(api_id_dir.get(), api_hash_dir.get(), phone_number_dir.get(),
359 |                                            session_name=self.session_params["your_name"])
360 |             bottom_frame.destroy()
361 |             table_frame.destroy()
362 |             self.aio_loop.create_task(self.raise_dialogs_select_frame())
363 | 
364 |         bottom_frame = tk.Frame()
365 |         bottom_frame.pack(side=tk.BOTTOM)
366 |         continue_button = tk.Button(bottom_frame, text="Continue",
367 |                                     command=lambda: self.aio_loop.create_task(try_sign_in_and_continue()),
368 |                                     padx=35, background=self.button_background,
369 |                                     font=self.default_font)
370 |         continue_button.pack(side=tk.BOTTOM)
371 |         self.parent.bind('<Return>', lambda _: self.aio_loop.create_task(try_sign_in_and_continue()))
372 | 
373 |     async def raise_dialogs_select_frame(self):
374 |         """Chooses a Telegram dialogue to analyse messages from."""
375 |         table_frame = tk.Frame()
376 |         table_frame.pack(expand=True, fill="both")
377 | 
378 |         dialog_select_label = tk.Label(table_frame, text="Please select a dialog You want to analyse messages from :",
379 |                                        height=2, font=self.default_font)
380 |         dialog_select_label.grid(row=1, column=1, sticky=tk.W)
381 | 
382 |         dialogs = await tlg.get_str_dialogs(loop=self.aio_loop)
383 |         for i in range(len(dialogs)):
384 |             dialogs[i] = ''.join(char for char in dialogs[i] if char < u"\uffff")
385 | 
386 |         dialog_variable = tk.StringVar()
387 |         dialog_variable.set(dialogs[0])  # default value
388 |         dialog_selection_menu = tk.OptionMenu(table_frame, dialog_variable, *dialogs)
389 |         dialog_selection_menu.grid(row=2, column=1, sticky=tk.W)
390 | 
391 |         def select_dialog_and_continue():
392 |             self.session_params["dialogue"] = dialog_variable.get()
393 |             bottom_frame.destroy()
394 |             table_frame.destroy()
395 |             self.raise_finish_frame()
396 | 
397 |         bottom_frame = tk.Frame()
398 |         bottom_frame.pack(side=tk.BOTTOM)
399 |         continue_button = tk.Button(bottom_frame, text="Continue",
400 |                                     command=select_dialog_and_continue, padx=35, background=self.button_background,
401 |                                     font=self.default_font)
402 |         continue_button.pack(side=tk.BOTTOM)
403 |         self.parent.bind('<Return>', lambda _: select_dialog_and_continue())
404 | 
405 |     def raise_finish_frame(self):
406 |         """Shows analysis process and results."""
407 |         table_frame = tk.Frame()
408 |         table_frame.pack(expand=True, fill="both")
409 | 
410 |         finish_label = tk.Label(table_frame,
411 |                                 text=("Plots and other data will be saved in a 'results' folder.\n"
412 |                                       "Please, wait for the 'Done.' line. It takes some time..."),
413 |                                 height=2, justify="left")
414 |         finish_label.pack(anchor=tk.W)
415 | 
416 |         text_widget = tk.Text(table_frame)
417 |         text_widget.pack(expand=True, fill="both")
418 | 
419 |         logger = logging.getLogger("message_analyser")
420 |         logger.addHandler(LoggingToGUI(text_widget))
421 |         self.finalise()
422 | 
423 |     def finalise(self):
424 |         storage.store_session_params(self.session_params)
425 |         self.aio_loop.create_task(analyser.retrieve_and_analyse(self.aio_loop))
426 | 
427 | 
428 | if __name__ == "__main__":
429 |     aio_loop = asyncio.get_event_loop()
430 |     try:
431 |         aio_loop.run_until_complete(start_gui(aio_loop))
432 |     finally:
433 |         if not aio_loop.is_closed():
434 |             aio_loop.close()
435 | 


--------------------------------------------------------------------------------
/message_analyser/plotter.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import emoji
  3 | import random
  4 | import operator
  5 | import numpy as np
  6 | import pandas as pd
  7 | import seaborn as sns
  8 | import wordcloud as wc
  9 | import matplotlib
 10 | 
 11 | matplotlib.use("TkAgg")
 12 | import matplotlib.cm as cm
 13 | import matplotlib.pyplot as plt
 14 | import matplotlib.colors as mpl_colors
 15 | import message_analyser.structure_tools as stools
 16 | from message_analyser.misc import avg, log_line, months_border
 17 | 
 18 | 
 19 | def _change_bar_width(ax, new_value):
 20 |     # https://stackoverflow.com/a/44542112
 21 |     for patch in ax.patches:
 22 |         current_width = patch.get_width()
 23 |         diff = current_width - new_value
 24 | 
 25 |         # we change the bar width
 26 |         patch.set_width(new_value)
 27 | 
 28 |         # we recenter the bar
 29 |         patch.set_x(patch.get_x() + diff * .5)
 30 | 
 31 | 
 32 | def heat_map(msgs, path_to_save, seasons=False):
 33 |     sns.set(style="whitegrid")
 34 | 
 35 |     messages_per_day = stools.get_messages_per_day(msgs)
 36 |     months = stools.date_months_to_str_months(stools.get_months(msgs))
 37 |     heat_calendar = {month: np.array([None] * 31, dtype=np.float64) for month in months}
 38 |     for day, d_msgs in messages_per_day.items():
 39 |         heat_calendar[stools.str_month(day)][day.day - 1] = len(d_msgs)
 40 | 
 41 |     # min_day = len(min(messages_per_day.values(), key=len))
 42 |     max_day = len(max(messages_per_day.values(), key=len))
 43 | 
 44 |     data = np.array(list(heat_calendar.values()))
 45 |     mask = np.array([np.array(arr, dtype=bool) for arr in data])
 46 | 
 47 |     cmap = cm.get_cmap("Purples")
 48 | 
 49 |     center = max_day * 0.4  # (avg([len(d) for d in messages_per_day.values()]) + (max_day - min_day) / 2) / 2
 50 | 
 51 |     ax = sns.heatmap(data=data, cmap=cmap, center=center, xticklabels=True, yticklabels=True,
 52 |                      square=True, linewidths=.2, cbar_kws={"shrink": .5})
 53 | 
 54 |     # builds a mask to highlight empty days
 55 |     sns.heatmap(data, mask=mask,
 56 |                 xticklabels=range(1, 32),
 57 |                 yticklabels=months,
 58 |                 linewidths=.2, cbar=False, cmap=mpl_colors.ListedColormap(["#ffffe6"]))
 59 | 
 60 |     if seasons:  # divides heatmap on seasons
 61 |         season_lines = [i for i, m in enumerate(months) if m.month % 3 == 0 and i != 0]
 62 |         ax.hlines(season_lines, *ax.get_xlim(), colors=["b"])
 63 |     ax.set(xlabel="day", ylabel="month")
 64 |     ax.margins(x=0)
 65 | 
 66 |     plt.tight_layout()
 67 |     fig = plt.gcf()
 68 |     fig.set_size_inches(11, 8)
 69 |     fig.savefig(os.path.join(path_to_save, heat_map.__name__ + ".png"), dpi=500)
 70 | 
 71 |     # plt.show()
 72 |     plt.close("all")
 73 |     log_line(f"{heat_map.__name__} was created.")
 74 | 
 75 | 
 76 | def pie_messages_per_author(msgs, your_name, target_name, path_to_save):
 77 |     forwarded = len([msg for msg in msgs if msg.is_forwarded])
 78 |     msgs = list(filter(lambda msg: not msg.is_forwarded, msgs))
 79 |     your_messages_len = len([msg for msg in msgs if msg.author == your_name])
 80 |     target_messages_len = len(msgs) - your_messages_len
 81 |     data = [your_messages_len, target_messages_len, forwarded]
 82 |     labels = [f"{your_name}\n({your_messages_len})",
 83 |               f"{target_name}\n({target_messages_len})",
 84 |               f"forwarded\n({forwarded})"]
 85 |     explode = (.0, .0, .2)
 86 | 
 87 |     fig, ax = plt.subplots(figsize=(13, 8), subplot_kw=dict(aspect="equal"))
 88 | 
 89 |     wedges, _, autotexts = ax.pie(x=data, explode=explode, colors=["#4982BB", "#5C6093", "#53B8D7"],
 90 |                                   autopct=lambda pct: f"{pct:.1f}%",
 91 |                                   wedgeprops={"edgecolor": "black", "alpha": 0.8})
 92 | 
 93 |     ax.legend(wedges, labels,
 94 |               loc="upper right",
 95 |               bbox_to_anchor=(1, 0, 0.5, 1))
 96 | 
 97 |     plt.setp(autotexts, size=10, weight="bold")
 98 | 
 99 |     fig.savefig(os.path.join(path_to_save, pie_messages_per_author.__name__ + ".png"), dpi=500)
100 |     # plt.show()
101 |     plt.close("all")
102 |     log_line(f"{pie_messages_per_author.__name__} was created.")
103 | 
104 | 
105 | def _get_xticks(msgs, crop=True):
106 |     start_date = msgs[0].date.date()
107 |     xticks = []
108 |     months_num = stools.count_months(msgs)
109 |     if months_num > months_border:
110 |         xlabel = "month"
111 |         months_ticks = stools.get_months(msgs)
112 |         xticks_labels = stools.date_months_to_str_months(months_ticks)
113 |         if (months_ticks[1] - start_date).days < 10 and crop:
114 |             xticks_labels[0] = ""  # remove first short month tick for better look
115 |         for month in months_ticks:
116 |             xticks.append(max(0, (month - start_date).days))
117 |             # it has max because start date is usually later than first month date.
118 |     else:  # too short message history -> we split data by weeks, not months
119 |         xlabel = "week"
120 |         weeks_ticks = stools.get_weeks(msgs)
121 |         xticks_labels = stools.date_days_to_str_days(weeks_ticks)
122 |         if len(weeks_ticks) > 2 and (weeks_ticks[1] - start_date).days < 3 and crop:
123 |             xticks_labels[0] = ""  # remove first short week tick for better look
124 |         for date in weeks_ticks:
125 |             xticks.append(max(0, (date - start_date).days))
126 |             #  it has max because start date is usually later than first week date.
127 | 
128 |     return xticks, xticks_labels, xlabel
129 | 
130 | 
131 | def _get_plot_data(msgs):
132 |     """Gets grouped data to plot.
133 | 
134 |     Returns:
135 |         x, y (tuple):
136 |             x is a list of values for the x axis.
137 |             y is a list of groups of messages (for y axis).
138 |     """
139 |     start_date = msgs[0].date.date()
140 |     end_date = msgs[-1].date.date()
141 |     xticks = []
142 |     months_num = stools.count_months(msgs)
143 |     if months_num > months_border:
144 |         messages_per_month = stools.get_messages_per_month(msgs)
145 |         months_ticks = list(messages_per_month.keys())
146 |         for month in months_ticks:
147 |             xticks.append(max(0, (month - start_date).days))
148 |             # it has max because start date is usually later than first month date.
149 |         y = list(messages_per_month.values())
150 |     else:  # too short message history -> we split data by weeks, not months
151 |         messages_per_week = stools.get_messages_per_week(msgs)
152 |         days_ticks = messages_per_week.keys()
153 |         for date in days_ticks:
154 |             xticks.append(max(0, (date - start_date).days))
155 |             #  it has max because start date is usually later than first week date.
156 |         y = list(messages_per_week.values())
157 | 
158 |     # put x values at the middle of each bar (bin)
159 |     x = [(xticks[i] + xticks[i + 1]) / 2 for i in range(1, len(xticks) - 1)]
160 |     # except for the first and the last values
161 |     x.insert(0, xticks[0])
162 |     if len(y) > 1:
163 |         x.append((xticks[-1] + (end_date - start_date).days) / 2)
164 | 
165 |     return x, y
166 | 
167 | 
168 | def stackplot_non_text_messages_percentage(msgs, path_to_save):
169 |     sns.set(style="whitegrid", palette="muted")
170 | 
171 |     colors = ['y', 'b', 'c', 'r', 'g', 'm']
172 | 
173 |     (x, y_total), (xticks, xticks_labels, xlabel) = _get_plot_data(msgs), _get_xticks(msgs)
174 | 
175 |     stacks = stools.get_non_text_messages_grouped(y_total)
176 | 
177 |     # Normalize values
178 |     for i in range(len(stacks[0]["groups"])):
179 |         total = sum(stack["groups"][i] for stack in stacks)
180 |         for stack in stacks:
181 |             if not total:
182 |                 stack["groups"][i] = 0
183 |             else:
184 |                 stack["groups"][i] /= total
185 | 
186 |     plt.stackplot(x, *[stack["groups"] for stack in stacks], labels=[stack["type"] for stack in stacks],
187 |                   colors=colors, alpha=0.7)
188 | 
189 |     plt.margins(0, 0)
190 |     plt.xticks(xticks, rotation=65)
191 |     plt.yticks([i / 10 for i in range(0, 11, 2)])
192 | 
193 |     ax = plt.gca()
194 |     ax.set_xticklabels(xticks_labels)
195 |     ax.set_yticklabels([f"{i}%" for i in range(0, 101, 20)])
196 |     ax.tick_params(axis='x', bottom=True, color="#A9A9A9")
197 |     ax.set(xlabel=xlabel, ylabel="non-text messages")
198 | 
199 |     # https://stackoverflow.com/a/4701285
200 |     # Shrink current axis by 10%
201 |     box = ax.get_position()
202 |     ax.set_position([box.x0, box.y0, box.width * 0.9, box.height])
203 |     # Put a legend to the right of the current axis
204 |     ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
205 | 
206 |     fig = plt.gcf()
207 |     fig.set_size_inches(11, 8)
208 | 
209 |     fig.savefig(os.path.join(path_to_save, stackplot_non_text_messages_percentage.__name__ + ".png"), dpi=500)
210 |     # plt.show()
211 |     log_line(f"{stackplot_non_text_messages_percentage.__name__} was created.")
212 |     plt.close("all")
213 | 
214 | 
215 | def barplot_non_text_messages(msgs, path_to_save):
216 |     sns.set(style="whitegrid", palette="muted")
217 | 
218 |     colors = ['y', 'b', 'c', 'r', 'g', 'm']
219 | 
220 |     (x, y_total), (xticks, xticks_labels, xlabel) = _get_plot_data(msgs), _get_xticks(msgs, crop=False)
221 | 
222 |     bars = stools.get_non_text_messages_grouped(y_total)
223 | 
224 |     # bars are overlapping, so firstly we need to sum up the all...
225 |     sum_bars = [0] * len(y_total)
226 |     for bar in bars:
227 |         sum_bars = list(map(operator.add, sum_bars, bar["groups"]))
228 |     # ... plot and subtract one by one.
229 |     for i, bar in enumerate(bars[:-1]):
230 |         sns.barplot(x=xticks_labels, y=sum_bars, label=bar["type"], color=colors[i])
231 |         sum_bars = list(map(operator.sub, sum_bars, bar["groups"]))
232 |     ax = sns.barplot(x=xticks_labels, y=sum_bars, label=bars[-1]["type"], color=colors[-1])
233 |     _change_bar_width(ax, 1.)
234 | 
235 |     # https://stackoverflow.com/a/4701285
236 |     # Shrink current axis by 10%
237 |     box = ax.get_position()
238 |     ax.set_position([box.x0, box.y0, box.width * 0.9, box.height])
239 |     # Put a legend to the right of the current axis
240 |     ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
241 | 
242 |     ax.set_xticklabels(xticks_labels, ha="right")
243 |     ax.set(xlabel=xlabel, ylabel="messages")
244 | 
245 |     plt.xticks(rotation=65)
246 |     fig = plt.gcf()
247 |     fig.set_size_inches(16, 8)
248 | 
249 |     fig.savefig(os.path.join(path_to_save, barplot_non_text_messages.__name__ + ".png"), dpi=500)
250 |     # plt.show()
251 |     log_line(f"{barplot_non_text_messages.__name__} was created.")
252 |     plt.close("all")
253 | 
254 | 
255 | def barplot_messages_per_day(msgs, path_to_save):
256 |     sns.set(style="whitegrid", palette="muted")
257 |     sns.despine(top=True)
258 | 
259 |     messages_per_day_vals = stools.get_messages_per_day(msgs).values()
260 | 
261 |     xticks, xticks_labels, xlabel = _get_xticks(msgs)
262 | 
263 |     min_day = len(min(messages_per_day_vals, key=lambda day: len(day)))
264 |     max_day = len(max(messages_per_day_vals, key=lambda day: len(day)))
265 |     pal = sns.color_palette("Greens_d", max_day - min_day + 1)[::-1]
266 | 
267 |     ax = sns.barplot(x=list(range(len(messages_per_day_vals))), y=[len(day) for day in messages_per_day_vals],
268 |                      edgecolor="none", palette=np.array(pal)[[len(day) - min_day for day in messages_per_day_vals]])
269 |     _change_bar_width(ax, 1.)
270 |     ax.set(xlabel=xlabel, ylabel="messages")
271 |     ax.set_xticklabels(xticks_labels)
272 | 
273 |     ax.tick_params(axis='x', bottom=True, color="#A9A9A9")
274 |     plt.xticks(xticks, rotation=65)
275 | 
276 |     fig = plt.gcf()
277 |     fig.set_size_inches(20, 10)
278 |     fig.savefig(os.path.join(path_to_save, barplot_messages_per_day.__name__ + ".png"), dpi=500)
279 | 
280 |     # plt.show()
281 |     log_line(f"{barplot_messages_per_day.__name__} was created.")
282 |     plt.close("all")
283 | 
284 | 
285 | def barplot_messages_per_minutes(msgs, path_to_save, minutes=2):
286 |     sns.set(style="whitegrid", palette="muted")
287 |     sns.despine(top=True)
288 | 
289 |     messages_per_minutes = stools.get_messages_per_minutes(msgs, minutes)
290 | 
291 |     xticks_labels = stools.get_hours()
292 |     xticks = [i * 60 // minutes for i in range(24)]
293 | 
294 |     min_minutes = len(min(messages_per_minutes.values(), key=lambda day: len(day)))
295 |     max_minutes = len(max(messages_per_minutes.values(), key=lambda day: len(day)))
296 |     pal = sns.color_palette("GnBu_d", max_minutes - min_minutes + 1)[::-1]
297 | 
298 |     ax = sns.barplot(x=list(range(len(messages_per_minutes))), y=[len(day) for day in messages_per_minutes.values()],
299 |                      edgecolor="none",
300 |                      palette=np.array(pal)[[len(day) - min_minutes for day in messages_per_minutes.values()]])
301 |     _change_bar_width(ax, 1.)
302 |     ax.set(xlabel="hour", ylabel="messages")
303 |     ax.set_xticklabels(xticks_labels)
304 | 
305 |     ax.tick_params(axis='x', bottom=True, color="#A9A9A9")
306 |     plt.xticks(xticks, rotation=65)
307 | 
308 |     fig = plt.gcf()
309 |     fig.set_size_inches(20, 10)
310 | 
311 |     fig.savefig(os.path.join(path_to_save, barplot_messages_per_minutes.__name__ + ".png"), dpi=500)
312 |     # plt.show()
313 |     log_line(f"{barplot_messages_per_minutes.__name__} was created.")
314 |     plt.close("all")
315 | 
316 | 
317 | def barplot_words(msgs, your_name, target_name, words, topn, path_to_save):
318 |     sns.set(style="whitegrid")
319 | 
320 |     your_msgs = [msg for msg in msgs if msg.author == your_name]
321 |     target_msgs = [msg for msg in msgs if msg.author == target_name]
322 | 
323 |     your_words_cnt = stools.get_words_countered(your_msgs)
324 |     target_words_cnt = stools.get_words_countered(target_msgs)
325 | 
326 |     words.sort(key=lambda w: your_words_cnt[w] + target_words_cnt[w], reverse=True)
327 |     df_dict = {"name": [], "word": [], "num": []}
328 |     for word in words[:topn]:
329 |         df_dict["word"].extend([word, word])
330 |         df_dict["name"].append(your_name)
331 |         df_dict["num"].append(your_words_cnt[word])
332 |         df_dict["name"].append(target_name)
333 |         df_dict["num"].append(target_words_cnt[word])
334 | 
335 |     ax = sns.barplot(x="word", y="num", hue="name", data=pd.DataFrame(df_dict), palette="PuBu")
336 |     ax.legend(ncol=1, loc="upper right", frameon=True)
337 |     ax.set(ylabel="messages", xlabel='')
338 | 
339 |     fig = plt.gcf()
340 |     fig.set_size_inches(14, 8)
341 | 
342 |     fig.savefig(os.path.join(path_to_save, barplot_words.__name__ + ".png"), dpi=500)
343 |     # plt.show()
344 |     log_line(f"{barplot_words.__name__} was created.")
345 |     plt.close("all")
346 | 
347 | 
348 | def barplot_emojis(msgs, your_name, target_name, topn, path_to_save):
349 |     sns.set(style="whitegrid")
350 | 
351 |     mc_emojis = stools.get_emoji_countered(msgs).most_common(topn)
352 |     if not mc_emojis:
353 |         return
354 |     your_msgs = [msg for msg in msgs if msg.author == your_name]
355 |     target_msgs = [msg for msg in msgs if msg.author == target_name]
356 | 
357 |     your_emojis_cnt = stools.get_emoji_countered(your_msgs)
358 |     target_emojis_cnt = stools.get_emoji_countered(target_msgs)
359 | 
360 |     df_dict = {"name": [], "emoji": [], "num": []}
361 |     for e, _ in mc_emojis:
362 |         df_dict["emoji"].extend([emoji.demojize(e), emoji.demojize(e)])
363 |         df_dict["name"].append(your_name)
364 |         df_dict["num"].append(your_emojis_cnt[e])
365 |         df_dict["name"].append(target_name)
366 |         df_dict["num"].append(target_emojis_cnt[e])
367 | 
368 |     ax = sns.barplot(x="num", y="emoji", hue="name", data=pd.DataFrame(df_dict), palette="PuBu")
369 |     ax.set(ylabel="emoji name", xlabel="emojis")
370 |     ax.legend(ncol=1, loc="lower right", frameon=True)
371 | 
372 |     fig = plt.gcf()
373 |     fig.set_size_inches(11, 8)
374 |     plt.tight_layout()
375 | 
376 |     fig.savefig(os.path.join(path_to_save, barplot_emojis.__name__ + ".png"), dpi=500)
377 |     # plt.show()
378 |     log_line(f"{barplot_emojis.__name__} was created.")
379 |     plt.close("all")
380 | 
381 | 
382 | def barplot_messages_per_weekday(msgs, your_name, target_name, path_to_save):
383 |     sns.set(style="whitegrid", palette="pastel")
384 | 
385 |     messages_per_weekday = stools.get_messages_per_weekday(msgs)
386 |     labels = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
387 | 
388 |     ax = sns.barplot(x=labels, y=[len(weekday) for weekday in messages_per_weekday.values()],
389 |                      label=your_name, color="b")
390 |     sns.set_color_codes("muted")
391 |     sns.barplot(x=labels,
392 |                 y=[len([msg for msg in weekday if msg.author == target_name])
393 |                    for weekday in messages_per_weekday.values()],
394 |                 label=target_name, color="b")
395 | 
396 |     ax.legend(ncol=2, loc="lower right", frameon=True)
397 |     ax.set(ylabel="messages")
398 |     sns.despine(right=True, top=True)
399 | 
400 |     fig = plt.gcf()
401 |     fig.set_size_inches(11, 8)
402 | 
403 |     fig.savefig(os.path.join(path_to_save, barplot_messages_per_weekday.__name__ + ".png"), dpi=500)
404 |     # plt.show()
405 |     log_line(f"{barplot_messages_per_weekday.__name__} was created.")
406 |     plt.close("all")
407 | 
408 | 
409 | def distplot_messages_per_hour(msgs, path_to_save):
410 |     sns.set(style="whitegrid")
411 | 
412 |     ax = sns.distplot([msg.date.hour for msg in msgs], bins=range(25), color="m", kde=False)
413 |     ax.set_xticklabels(stools.get_hours())
414 |     ax.set(xlabel="hour", ylabel="messages")
415 |     ax.margins(x=0)
416 | 
417 |     plt.xticks(range(24), rotation=65)
418 |     plt.tight_layout()
419 |     fig = plt.gcf()
420 |     fig.set_size_inches(11, 8)
421 | 
422 |     fig.savefig(os.path.join(path_to_save, distplot_messages_per_hour.__name__ + ".png"), dpi=500)
423 |     # plt.show()
424 |     log_line(f"{distplot_messages_per_hour.__name__} was created.")
425 |     plt.close("all")
426 | 
427 | 
428 | def distplot_messages_per_day(msgs, path_to_save):
429 |     sns.set(style="whitegrid")
430 | 
431 |     data = stools.get_messages_per_day(msgs)
432 | 
433 |     max_day_len = len(max(data.values(), key=len))
434 |     ax = sns.distplot([len(day) for day in data.values()], bins=list(range(0, max_day_len, 50)) + [max_day_len],
435 |                       color="m", kde=False)
436 |     ax.set(xlabel="messages", ylabel="days")
437 |     ax.margins(x=0)
438 | 
439 |     fig = plt.gcf()
440 |     fig.set_size_inches(11, 8)
441 | 
442 |     fig.savefig(os.path.join(path_to_save, distplot_messages_per_day.__name__ + ".png"), dpi=500)
443 |     # plt.show()
444 |     log_line(f"{distplot_messages_per_day.__name__} was created.")
445 |     plt.close("all")
446 | 
447 | 
448 | def distplot_messages_per_month(msgs, path_to_save):
449 |     sns.set(style="whitegrid")
450 | 
451 |     start_date = msgs[0].date.date()
452 |     (xticks, xticks_labels, xlabel) = _get_xticks(msgs)
453 | 
454 |     ax = sns.distplot([(msg.date.date() - start_date).days for msg in msgs],
455 |                       bins=xticks + [(msgs[-1].date.date() - start_date).days], color="m", kde=False)
456 |     ax.set_xticklabels(xticks_labels)
457 |     ax.set(xlabel=xlabel, ylabel="messages")
458 |     ax.margins(x=0)
459 | 
460 |     plt.xticks(xticks, rotation=65)
461 |     plt.tight_layout()
462 |     fig = plt.gcf()
463 |     fig.set_size_inches(11, 8)
464 | 
465 |     fig.savefig(os.path.join(path_to_save, distplot_messages_per_month.__name__ + ".png"), dpi=500)
466 |     # plt.show()
467 |     log_line(f"{distplot_messages_per_month.__name__} was created.")
468 |     plt.close("all")
469 | 
470 | 
471 | def lineplot_message_length(msgs, your_name, target_name, path_to_save):
472 |     sns.set(style="whitegrid")
473 | 
474 |     (x, y_total), (xticks, xticks_labels, xlabel) = _get_plot_data(msgs), _get_xticks(msgs)
475 | 
476 |     y_your = [avg([len(msg.text) for msg in period if msg.author == your_name]) for period in y_total]
477 |     y_target = [avg([len(msg.text) for msg in period if msg.author == target_name]) for period in y_total]
478 | 
479 |     plt.fill_between(x, y_your, alpha=0.3)
480 |     ax = sns.lineplot(x=x, y=y_your, palette="denim blue", linewidth=2.5, label=your_name)
481 |     plt.fill_between(x, y_target, alpha=0.3)
482 |     sns.lineplot(x=x, y=y_target, linewidth=2.5, label=target_name)
483 | 
484 |     ax.set(xlabel=xlabel, ylabel="average message length (characters)")
485 |     ax.set_xticklabels(xticks_labels)
486 | 
487 |     ax.tick_params(axis='x', bottom=True, color="#A9A9A9")
488 |     plt.xticks(xticks, rotation=65)
489 |     ax.margins(x=0, y=0)
490 | 
491 |     # plt.tight_layout()
492 |     fig = plt.gcf()
493 |     fig.set_size_inches(13, 7)
494 | 
495 |     fig.savefig(os.path.join(path_to_save, lineplot_message_length.__name__ + ".png"), dpi=500)
496 |     # plt.show()
497 |     plt.close("all")
498 |     log_line(f"{lineplot_message_length.__name__} was created.")
499 | 
500 | 
501 | def lineplot_messages(msgs, your_name, target_name, path_to_save):
502 |     sns.set(style="whitegrid")
503 | 
504 |     (x, y_total), (xticks, xticks_labels, xlabel) = _get_plot_data(msgs), _get_xticks(msgs)
505 | 
506 |     y_your = [len([msg for msg in period if msg.author == your_name]) for period in y_total]
507 |     y_target = [len([msg for msg in period if msg.author == target_name]) for period in y_total]
508 | 
509 |     plt.fill_between(x, y_your, alpha=0.3)
510 |     ax = sns.lineplot(x=x, y=y_your, palette="denim blue", linewidth=2.5, label=your_name)
511 |     plt.fill_between(x, y_target, alpha=0.3)
512 |     sns.lineplot(x=x, y=y_target, linewidth=2.5, label=target_name)
513 | 
514 |     ax.set(xlabel=xlabel, ylabel="messages")
515 |     ax.set_xticklabels(xticks_labels)
516 | 
517 |     ax.tick_params(axis='x', bottom=True, color="#A9A9A9")
518 |     plt.xticks(xticks, rotation=65)
519 |     ax.margins(x=0, y=0)
520 | 
521 |     # plt.tight_layout()
522 |     fig = plt.gcf()
523 |     fig.set_size_inches(13, 7)
524 | 
525 |     fig.savefig(os.path.join(path_to_save, lineplot_messages.__name__ + ".png"), dpi=500)
526 |     # plt.show()
527 |     plt.close("all")
528 |     log_line(f"{lineplot_messages.__name__} was created.")
529 | 
530 | 
531 | def wordcloud(msgs, words, path_to_save):
532 |     all_words_list = []
533 |     words_cnt = stools.get_words_countered(msgs)
534 |     # we need to create a huge string which contains each word as many times as it encounters in messages.
535 |     for word in set(words):
536 |         all_words_list.extend([word] * (words_cnt[word]))
537 |     random.shuffle(all_words_list, random.random)  # don't forget to shuffle !
538 | 
539 |     if not all_words_list:
540 |         log_line("No such words were found in message history.")
541 |         return
542 | 
543 |     all_words_string = ' '.join(all_words_list)
544 | 
545 |     # the cloud will be a circle.
546 |     radius = 500
547 |     x, y = np.ogrid[:2 * radius, :2 * radius]
548 |     mask = (x - radius) ** 2 + (y - radius) ** 2 > radius ** 2
549 |     mask = 255 * mask.astype(int)
550 | 
551 |     word_cloud = wc.WordCloud(background_color="white", repeat=False, mask=mask)
552 |     word_cloud.generate(all_words_string)
553 | 
554 |     plt.axis("off")
555 |     plt.imshow(word_cloud, interpolation="bilinear")
556 | 
557 |     word_cloud.to_file(os.path.join(path_to_save, wordcloud.__name__ + ".png"))
558 |     # plt.show()
559 |     plt.close()
560 |     log_line(f"{wordcloud.__name__} was created.")
561 | 


--------------------------------------------------------------------------------