├── image
├── line_chat.png
├── line_logo.png
├── punch_card.jpg
├── punch_card.png
├── ramble_plot.png
├── chat_per_day.jpg
├── chat_per_day.png
├── response_time.jpg
├── response_time.png
├── chat_users_per_day.jpg
├── chat_users_per_day.png
└── ramble_plot_square.png
├── docs
├── README.md
├── needyone
│ ├── index.html
│ └── sketch.js
└── rambles
│ ├── index.html
│ └── sketch.js
├── line2midi.py
├── README.md
├── line-visualize.py
├── .gitignore
├── template
└── visualize.html
├── slides
└── index.html
└── line_utils.py
/image/line_chat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/line_chat.png
--------------------------------------------------------------------------------
/image/line_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/line_logo.png
--------------------------------------------------------------------------------
/image/punch_card.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/punch_card.jpg
--------------------------------------------------------------------------------
/image/punch_card.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/punch_card.png
--------------------------------------------------------------------------------
/image/ramble_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/ramble_plot.png
--------------------------------------------------------------------------------
/image/chat_per_day.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/chat_per_day.jpg
--------------------------------------------------------------------------------
/image/chat_per_day.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/chat_per_day.png
--------------------------------------------------------------------------------
/image/response_time.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/response_time.jpg
--------------------------------------------------------------------------------
/image/response_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/response_time.png
--------------------------------------------------------------------------------
/image/chat_users_per_day.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/chat_users_per_day.jpg
--------------------------------------------------------------------------------
/image/chat_users_per_day.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/chat_users_per_day.png
--------------------------------------------------------------------------------
/image/ramble_plot_square.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/ramble_plot_square.png
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | ## Visualize using RuneJS
2 |
3 | Based on code by this [Medium blog post](https://medium.com/@wipaweeeeee/call-me-adele-f37162b6ffe5)
4 | See examples in these following folders.
5 |
6 | - [needyone](needyone)
7 | - [rambles](rambles)
8 |
--------------------------------------------------------------------------------
/docs/needyone/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | ramble
6 |
7 |
8 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/docs/rambles/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | ramble
6 |
7 |
8 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/docs/needyone/sketch.js:
--------------------------------------------------------------------------------
1 | var r = new Rune({
2 | container: "#canvas",
3 | width: 300,
4 | height: 300,
5 | debug: false,
6 | });
7 |
8 |
9 | $.ajax({
10 | type: 'GET',
11 | url: "https://gist.githubusercontent.com/titipata/33bd071df166c135bcd5d87a6b53c32c/raw/e662e0dab90dc37dd615020418770655d4fac522/example_bim.json",
12 | dataType: 'json',
13 | success: function(result){
14 | var x = 10;
15 | var y = 0;
16 | var xt = 10;
17 | var yt = 5;
18 |
19 | for (var i = 0; i < result.length; i++) {
20 | if (result[i].user_id == 1){
21 | r.rect(x, y, 1, 10)
22 | .stroke(false)
23 | .fill(0)
24 | x++;
25 | }
26 | if( x > r.width - 10) {
27 | x = 10;
28 | y += 13;
29 | r.rect(x, y, 1, 10)
30 | .stroke(false)
31 | .fill(0)
32 | x++;
33 | }
34 | x++;
35 |
36 | if (result[i].user_id == 0){
37 | r.ellipse(xt, yt, 2, 2)
38 | .stroke(false)
39 | .fill(0)
40 | xt++;
41 | }
42 | if( xt > r.width - 10) {
43 | xt = 10;
44 | yt += 13;
45 | r.ellipse(xt, yt, 2, 2)
46 | .stroke(false)
47 | .fill(0)
48 | xt++;
49 | }
50 | xt++;
51 | }
52 | r.draw();
53 | }
54 | });
55 |
--------------------------------------------------------------------------------
/docs/rambles/sketch.js:
--------------------------------------------------------------------------------
1 | var r = new Rune({
2 | container: "#canvas",
3 | width: 380,
4 | height: 380,
5 | debug: false,
6 | });
7 |
8 | $.ajax({
9 | type: 'GET',
10 | url: "https://gist.githubusercontent.com/titipata/33bd071df166c135bcd5d87a6b53c32c/raw/e662e0dab90dc37dd615020418770655d4fac522/example_bim.json",
11 | dataType: 'json',
12 | success: function(result){
13 | var x = 0;
14 | var y = 0;
15 | var xt = 0;
16 | var yt = 0;
17 | for( var i = 0; i < result.length; i++ ) {
18 | if ( result[i].chat_length.length != 0 ) {
19 | if( result[i].user_id == 1 ) {
20 | r.rect(x, y, 2, result[i].char_length)
21 | .stroke(false)
22 | .fill(180)
23 |
24 | if( y > r.height ) {
25 | x += 3;
26 | y = 0;
27 | r.rect(x, y, 2, result[i].char_length)
28 | .stroke(false)
29 | .fill(180)
30 | }
31 | }
32 |
33 | if(result[i].user_id == 0) {
34 | r.rect(xt, yt, 2, result[i].char_length)
35 | .stroke(false)
36 | .fill(0)
37 |
38 | if( yt > r.height) {
39 | xt += 3;
40 | yt = 0;
41 | r.rect(xt, yt, 2, result[i].char_length)
42 | .stroke(false)
43 | .fill(0)
44 | }
45 | }
46 | y += result[i].char_length;
47 | yt += result[i].char_length;
48 | }
49 | }
50 | r.draw();
51 | }
52 | });
53 |
--------------------------------------------------------------------------------
/line2midi.py:
--------------------------------------------------------------------------------
1 | import line_utils
2 | import numpy as np
3 | from midiutil.MidiFile import MIDIFile
4 |
5 |
6 | def generate_midi_text(file_name, output_file_name= 'midi.mid'):
7 | chats_dict = line_utils.read_line_chat(file_name)
8 | users = line_utils.get_users(chats_dict)
9 | chats_split = [line_utils.split_chat(chat, users) for (date, chat) in chats_dict['chats']]
10 | chats = [chat for (t, u, chat) in chats_split if chat is not None]
11 | chats_len = np.array([len(c) for c in chats])
12 | variations_round = np.array([int(v/10) for v in chats_len])
13 |
14 | # setting up MIDI
15 | track = 0
16 | time = 0
17 | channel = 0
18 | tempo = 120
19 | duration = 1
20 | volumn = 100
21 | base_pitch = 60
22 | midi = MIDIFile(numTracks=1, adjust_origin=time)
23 | midi.addTrackName(track, time, "testing")
24 | midi.addTempo(track, time, tempo)
25 |
26 | # add variation out of
27 | variations_round = base_pitch + (variations_round[0:100] + np.round(10 * np.random.randn(100)))
28 |
29 | # chord stuff
30 | for t, v in enumerate(list(variations_round)):
31 | midi.addNote(track, channel, int(v), t + 1, duration, volumn)
32 | midi.addNote(track, channel, int(v) + 4, t + 1, duration, volumn)
33 | midi.addNote(track, channel, int(v) + 7, t + 1, duration, volumn)
34 |
35 | with open(output_file_name, 'wb') as file:
36 | midi.writeFile(file)
37 | print('done converting to midi file!')
38 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Visualize LINE chat
2 |
3 | In [LINE chat](https://line.me/en/) (very popular app chat for Asian folks),
4 | we can read the history from exported text file (Save chat).
5 | We can use the following function in order to read the chat history text file.
6 |
7 | ```python
8 | import line_utils
9 | chats_dict = line_utils.read_line_chat('chat_history.txt')
10 | ```
11 |
12 | `chats_dict` is a chat dictionary which the following keys: `count`, `chats`, `total_chats`
13 |
14 |
15 | **Plot total chats activities on given day**
16 |
17 | ```python
18 | line_utils.plot_chat_per_day(chats_dict)
19 | ```
20 |
21 |
22 |
23 |
24 |
25 | **Plot chats activities per users on given day**
26 |
27 | ```python
28 | line_utils.plot_chat_users_per_day(chats_dict)
29 | ```
30 |
31 | Here `users` is a list of username strings in the chat
32 |
33 |
34 |
35 |
36 |
37 | **Plot punch card activities**
38 |
39 | To see at what time in week, you're chatting with your friends the most.
40 |
41 | ```python
42 | line_utils.plot_punch_card_activities(chats_dict)
43 | ```
44 |
45 |
46 |
47 |
48 |
49 | **Plot response rate and time**
50 |
51 | Plot and print average response time for each users before replying
52 | to the next chat
53 |
54 | ```python
55 | line_utils.plot_response_rate(chats_dict)
56 | ```
57 |
58 |
59 |
60 |
61 |
62 | **Generate single HTML file to visualize report**
63 | ```
64 | python line-visualize.py -i INPUT_CHAT.txt -o output.html
65 | ```
66 |
67 | Example HTML visualize report:
68 | [https://goo.gl/tMmGix](https://goo.gl/tMmGix)
69 |
70 | **runejs example**
71 |
72 | We also put example to visualize using `rune.js` in `docs` folder based on [medium post](https://medium.com/@wipaweeeeee/call-me-adele-f37162b6ffe5).
73 | See example at [titipata.github.io/visualize_line_chat](https://titipata.github.io/visualize_line_chat/).
74 |
75 |
76 | **LINE to MIDI example**
77 |
78 | `line2midi.py` stores how to convert chat to MIDI file. This was done at
79 | Stupid Hackathon
80 |
--------------------------------------------------------------------------------
/line-visualize.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import jinja2
3 | import os
4 | import base64
5 | import shutil
6 | import line_utils
7 | import matplotlib.pyplot as plt
8 |
9 |
10 | def generator(input, output):
11 |
12 | if os.path.exists(input) == False:
13 | print("File {} not exist".format(input))
14 | quit()
15 |
16 | directory = 'tmp'
17 |
18 | if not os.path.exists(directory):
19 | os.makedirs(directory)
20 |
21 | chats_dict = line_utils.read_line_chat(input)
22 |
23 | # Chat per day
24 | chat_per_day_path = directory + "/chat_per_day.png"
25 | plt.figure(figsize=(10, 5))
26 | line_utils.plot_chat_per_day(chats_dict, chat_per_day_path)
27 |
28 | # User chat per day
29 | user_chat_per_day_path = directory + "/user_chat_per_day.png"
30 | plt.figure(figsize=(10, 5))
31 | line_utils.plot_chat_users_per_day(chats_dict, user_chat_per_day_path)
32 |
33 | # Punch card activities
34 | punch_card_activities_path = directory + "/punch_card_activities.png"
35 | plt.figure(figsize=(15, 10))
36 | line_utils.plot_punch_card_activities(chats_dict, punch_card_activities_path)
37 |
38 | # Response rate
39 | response_rate_path = directory + "/response_rate.png"
40 | plt.figure(figsize=(15, 10))
41 | response_rate_users = line_utils.plot_response_rate(chats_dict, response_rate_path)
42 |
43 | data = {
44 | "chat_per_day": convert_img_to_base64(chat_per_day_path),
45 | "user_chat_per_day": convert_img_to_base64(user_chat_per_day_path),
46 | "punch_card_activities": convert_img_to_base64(punch_card_activities_path),
47 | "response_rate": convert_img_to_base64(response_rate_path),
48 | "response_rate_users": response_rate_users,
49 | }
50 |
51 | result = render('./template/visualize.html', data)
52 |
53 | with open(output, "w") as html_file:
54 | html_file.write(result)
55 |
56 | # Remove genrate image directory
57 | shutil.rmtree(directory)
58 |
59 | def render(tpl_path, context):
60 | path, filename = os.path.split(tpl_path)
61 | return jinja2.Environment(
62 | loader=jinja2.FileSystemLoader(path or './')
63 | ).get_template(filename).render(context)
64 |
65 | def convert_img_to_base64(file):
66 | with open(file, "rb") as image_file:
67 | encoded = base64.b64encode(image_file.read())
68 | return encoded.decode("utf-8")
69 |
70 | if __name__ == '__main__':
71 | parser = argparse.ArgumentParser()
72 | parser.add_argument(
73 | '-i',
74 | '--input',
75 | help='input chat file to visualize')
76 |
77 | parser.add_argument(
78 | '-o',
79 | '--output',
80 | help='HTML output file',
81 | default='output.html')
82 |
83 | args = parser.parse_args()
84 | arguments = args.__dict__
85 | generator(**arguments)
86 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ### Emacs ###
2 | # -*- mode: gitignore; -*-
3 | *~
4 | \#*\#
5 | /.emacs.desktop
6 | /.emacs.desktop.lock
7 | *.elc
8 | auto-save-list
9 | tramp
10 | .\#*
11 |
12 | # Org-mode
13 | .org-id-locations
14 | *_archive
15 |
16 | # flymake-mode
17 | *_flymake.*
18 |
19 | # eshell files
20 | /eshell/history
21 | /eshell/lastdir
22 |
23 | # elpa packages
24 | /elpa/
25 |
26 | # reftex files
27 | *.rel
28 |
29 | # AUCTeX auto folder
30 | /auto/
31 |
32 | # cask packages
33 | .cask/
34 | dist/
35 |
36 | # Flycheck
37 | flycheck_*.el
38 |
39 | # server auth directory
40 | /server/
41 |
42 | # projectiles files
43 | .projectile
44 |
45 | # directory configuration
46 | .dir-locals.el
47 |
48 | ### macOS ###
49 | *.DS_Store
50 | .AppleDouble
51 | .LSOverride
52 |
53 | # Icon must end with two \r
54 | Icon
55 |
56 |
57 | # Thumbnails
58 | ._*
59 |
60 | # Files that might appear in the root of a volume
61 | .DocumentRevisions-V100
62 | .fseventsd
63 | .Spotlight-V100
64 | .TemporaryItems
65 | .Trashes
66 | .VolumeIcon.icns
67 | .com.apple.timemachine.donotpresent
68 |
69 | # Directories potentially created on remote AFP share
70 | .AppleDB
71 | .AppleDesktop
72 | Network Trash Folder
73 | Temporary Items
74 | .apdisk
75 |
76 | ### Python ###
77 | # Byte-compiled / optimized / DLL files
78 | __pycache__/
79 | *.py[cod]
80 | *$py.class
81 |
82 | # C extensions
83 | *.so
84 |
85 | # Distribution / packaging
86 | .Python
87 | env/
88 | build/
89 | develop-eggs/
90 | downloads/
91 | eggs/
92 | .eggs/
93 | lib/
94 | lib64/
95 | parts/
96 | sdist/
97 | var/
98 | wheels/
99 | *.egg-info/
100 | .installed.cfg
101 | *.egg
102 |
103 | # PyInstaller
104 | # Usually these files are written by a python script from a template
105 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
106 | *.manifest
107 | *.spec
108 |
109 | # Installer logs
110 | pip-log.txt
111 | pip-delete-this-directory.txt
112 |
113 | # Unit test / coverage reports
114 | htmlcov/
115 | .tox/
116 | .coverage
117 | .coverage.*
118 | .cache
119 | nosetests.xml
120 | coverage.xml
121 | *,cover
122 | .hypothesis/
123 |
124 | # Translations
125 | *.mo
126 | *.pot
127 |
128 | # Django stuff:
129 | *.log
130 | local_settings.py
131 |
132 | # Flask stuff:
133 | instance/
134 | .webassets-cache
135 |
136 | # Scrapy stuff:
137 | .scrapy
138 |
139 | # Sphinx documentation
140 | docs/_build/
141 |
142 | # PyBuilder
143 | target/
144 |
145 | # Jupyter Notebook
146 | .ipynb_checkpoints
147 |
148 | # pyenv
149 | .python-version
150 |
151 | # celery beat schedule file
152 | celerybeat-schedule
153 |
154 | # SageMath parsed files
155 | *.sage.py
156 |
157 | # dotenv
158 | .env
159 |
160 | # virtualenv
161 | .venv
162 | venv/
163 | ENV/
164 |
165 | # Spyder project settings
166 | .spyderproject
167 |
168 | # Rope project settings
169 | .ropeproject
170 |
171 | # End of https://www.gitignore.io/api/python,emacs,macos
172 |
--------------------------------------------------------------------------------
/template/visualize.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Visualize LINE chat
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
24 |
25 |
26 |
27 |
28 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
จำนวนข้อความในแต่ละวัน
40 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
จำนวนข้อความในแต่ละวันแยกตามผู้ใช้งาน
53 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
กลุ่มวันและช่วงเวลาที่แชท
66 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
ค่าเฉลี่ยความเร็วในการตอบแชท (นาที)
79 |
80 |
81 |
82 |
83 | ชื่อ
84 | ความเร็วในการตอบแชท (นาที)
85 |
86 |
87 |
88 |
89 | {% for response_rate_user in response_rate_users %}
90 |
91 | {{response_rate_user.user}}
92 | {{response_rate_user.avg_rate}}
93 |
94 | {% endfor %}
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
จำนวนแชทกับช่วงเวลาที่ใช้ในการตอบ
106 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
127 |
128 |
129 |
130 |
131 |
132 |
144 |
145 |
146 |
147 |
--------------------------------------------------------------------------------
/slides/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Crash course in Python
5 |
6 |
23 |
24 |
25 |
214 |
215 |
216 |
238 |
239 |
240 |
--------------------------------------------------------------------------------
/line_utils.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import re
3 | import numpy as np
4 | from dateutil import parser
5 | from collections import defaultdict
6 | from itertools import groupby
7 | import datetime
8 | from operator import itemgetter
9 | from collections import Counter
10 | import scipy.sparse as sp
11 | import matplotlib.pyplot as plt
12 | import random
13 |
14 | def read_line_chat(file_name):
15 | """
16 | Given Line chat history, return chat dictionary which has following keys
17 | count, chats, total_chats
18 | """
19 | lines = csv.reader(open(file_name))
20 | chats = list(lines)
21 | chats = [c[0] for c in chats if len(c) > 0]
22 | chats_dict = defaultdict(list)
23 | is_found_first_date = False
24 | for chat in chats:
25 | date = re.findall(r'\d+\.\d+\.\d+', chat)
26 |
27 | #Android date format
28 | if len(date) == 0:
29 | date = re.findall('^\d+\/\d+\/\d+', chat)
30 |
31 | #Skip line until found date
32 | if is_found_first_date == False and len(date) == 0:
33 | continue
34 |
35 | #Empty line
36 | if len(chat.strip()) == 0:
37 | continue
38 |
39 | if len(date) >= 1:
40 | is_found_first_date = True
41 | d = date[0]
42 | else:
43 | chat = re.sub('^24:', '00:', chat)
44 | chats_dict['chats'].append([d, chat])
45 | chats_dict['total_chats'] = len(chats_dict['chats'])
46 | return chats_dict
47 |
48 | def get_date_range(chats_dict):
49 | """
50 | Get date range out of all chat dictionary
51 | """
52 | dates = list(set([c[0] for c in chats_dict['chats']]))
53 | dates = [parser.parse(d) for d in dates]
54 | date_range = []
55 | for d in range((date_max - date_min).days + 1):
56 | date_range.append(date_min + datetime.timedelta(days=d))
57 | return date_range
58 |
59 | def split_chat(chat, users):
60 | """Given list of users in conversation,
61 | split chat in to (time, user, chat text)
62 | """
63 | time = chat.split()[0]
64 | if len(re.findall(r'\d+:\d+',time)) >= 1:
65 | chat = (' '.join(chat.split()[1::])).strip()
66 | u = ''
67 | for user in users:
68 | if user in chat:
69 | u = user
70 | chat = re.sub(u, '', chat).strip()
71 | else:
72 | chat = chat
73 | return (time, u, chat)
74 | else:
75 | return (None, None, None)
76 |
77 | def bin_time(t, n_bin=8):
78 | """bin time in the day to number of bins"""
79 | bin_size = int(24./n_bin)
80 | h = parser.parse(t).hour
81 | for i in range(n_bin + 1):
82 | r = range((i-1)*bin_size, i*bin_size)
83 | if h in r:
84 | return i - 1
85 |
86 | def day_of_week(day, n_bin=8):
87 | """
88 | Turn date string into days of week index
89 | ['Sunday', 'Monday', ..., 'Saturday']
90 | """
91 | return parser.parse(day).weekday()
92 |
93 | def gen_hex_colour_code():
94 | """
95 | Generate hex color
96 | """
97 | return '#' + ''.join([random.choice('0123456789ABCDEF') for x in range(6)])
98 |
99 | def avg_response_rate(ls, wait_time=60):
100 | """
101 | Weighted response rate of reply in less than 30 mins
102 | """
103 | weight_n_chats = 0
104 | n_chats = 0
105 | for n_chat, r in Counter(ls).items():
106 | if r <= wait_time:
107 | weight_n_chats += n_chat * r
108 | n_chats += n_chat
109 | return weight_n_chats/n_chats
110 |
111 | def get_users(chats_dict):
112 | """
113 | Get all users from chat dictionary
114 | """
115 | chats = [c[1] for c in chats_dict['chats']]
116 | chats = [re.sub('\d+:\d+', '', c).strip() for c in chats]
117 | count_user = Counter([c.split()[0] for c in chats])
118 | users = [k for k, v in count_user.items() if v > 10]
119 |
120 | all_profile_names = []
121 | for user in users:
122 | user_chats = [chat for chat in chats if chat.split()[0] == user]
123 | profile_name = [user]
124 | for i in range(1, 3):
125 | try:
126 | next_key = Counter([u.split()[i] for u in user_chats])
127 | u_next = [k for k, v in next_key.items() if v == len(user_chats)]
128 | profile_name.extend(u_next)
129 | except:
130 | pass
131 | all_profile_names.append(' '.join(profile_name))
132 | return all_profile_names
133 |
134 | def plot_chat_per_day(chats_dict, save_file=None):
135 | """
136 | plot chat per day that from all chats
137 | """
138 | # group chat by date
139 | group_chats = []
140 | for key, group in groupby(chats_dict['chats'], lambda x: x[0]):
141 | group_chats.append({'date': key, 'chats': [g[1] for g in group]})
142 |
143 | chats_per_day = []
144 | for group_chat in group_chats:
145 | dt = parser.parse(group_chat['date'])
146 | chats_per_day.append([dt, len(group_chat['chats'])])
147 |
148 | ax = plt.subplot(111)
149 | ax.bar([c[0] for c in chats_per_day], [c[1] for c in chats_per_day], width=0.5)
150 | ax.xaxis_date()
151 | plt.xlabel('date')
152 | plt.xticks(rotation=60)
153 | plt.ylabel('number of chats')
154 | plt.show(block=False)
155 |
156 | if save_file is not None:
157 | plt.savefig(save_file, bbox_inches='tight')
158 |
159 | def plot_chat_users_per_day(chats_dict, save_file=None):
160 | """
161 | Plot number of chats per users over time
162 | """
163 | users = get_users(chats_dict)
164 | n_users = len(users)
165 |
166 | # split chat to time, user, chat string
167 | chat_users = []
168 | for date, chatlog in chats_dict['chats']:
169 | time, u, chat = split_chat(chatlog, users)
170 | if all((time, u, chat)):
171 | chat_users.append({'date': date,
172 | 'time': time,
173 | 'user': u,
174 | 'chat': chat})
175 |
176 | chats_per_day = []
177 | grouper = itemgetter("date", "user")
178 | for key, group in groupby(sorted(chat_users, key=grouper), grouper):
179 | temp_dict = dict(zip(["date", "user"], key))
180 | temp_dict["n_chat"] = len([item for item in group])
181 | chats_per_day.append(temp_dict)
182 |
183 | n_chats_all = []
184 | for user in users:
185 | n_chats = []
186 | for c in chats_per_day:
187 | if user in c['user']:
188 | n_chats.append([parser.parse(c['date']), c['n_chat'], user])
189 | n_chats_all.append(n_chats)
190 |
191 | colors = ['#007e8c', '#ffc0cb', '#488957'] # first set of colors
192 | colors.extend([gen_hex_colour_code() for i in range(n_users)]) # random
193 |
194 | ax = plt.subplot(111)
195 | for i, n_chat in enumerate(n_chats_all):
196 | date = [v[0] + datetime.timedelta(hours=(i * 24./n_users)) for v in n_chat]
197 | n_chat_user = [v[1] for v in n_chat]
198 | ax.bar(date, n_chat_user, width=(1.0/n_users), color=colors[i])
199 | ax.xaxis_date()
200 | plt.xlabel('date')
201 | plt.ylabel('number of chats')
202 | plt.xticks(rotation=60, ha='right')
203 | plt.legend(users)
204 | plt.show(block=False)
205 |
206 | if save_file is not None:
207 | plt.savefig(save_file, bbox_inches='tight')
208 |
209 | def plot_punch_card_activities(chats_dict, save_file=None):
210 | """
211 | Punch card activities, plot in matrix format
212 | """
213 | users = get_users(chats_dict)
214 |
215 | chat_users = []
216 | for date, chatlog in chats_dict['chats']:
217 | time, u, chat = split_chat(chatlog, users)
218 | if all((time, u, chat)):
219 | chat_users.append({'date': date,
220 | 'time': time,
221 | 'user': u,
222 | 'chat': chat}) # list of dict of chat
223 |
224 | for chat in chat_users:
225 | chat.update({'time_bin': bin_time(chat['time'])})
226 | chat.update({'day_of_week': day_of_week(chat['date'])})
227 | chat.update({'n_activity': 1})
228 |
229 | grouper = itemgetter("day_of_week", "time_bin")
230 | chats_per_day = []
231 | for key, group in groupby(sorted(chat_users, key=grouper), grouper):
232 | temp_dict = dict(zip(["day_of_week", "time_bin"], key))
233 | temp_dict["n_chat"] = len([item for item in group])
234 | chats_per_day.append(temp_dict)
235 |
236 | vals, rows, cols = [], [], []
237 | for c in chats_per_day:
238 | cols.append(c['time_bin'])
239 | rows.append(c['day_of_week'])
240 | vals.append(c['n_chat'])
241 | chat_time_bin = sp.csr_matrix((vals, (rows, cols))).toarray()
242 |
243 | fig = plt.figure()
244 | ax = fig.add_subplot(111)
245 | cax = ax.matshow(chat_time_bin, interpolation='nearest', cmap=plt.get_cmap('bone'))
246 | ax.set_yticklabels(['', 'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'])
247 | ax.set_xticklabels(['', '12 - 3 am', '3 - 6 am', '6 - 9 am', '9 am - 12 pm', '12 - 3 pm', '3 - 6 pm', '6 - 9 pm', '9 pm - 12 pm'],
248 | rotation=60, ha='left')
249 | plt.show(block=False)
250 |
251 | if save_file is not None:
252 | plt.savefig(save_file, bbox_inches='tight')
253 |
254 | def plot_response_rate(chats_dict, save_file=None):
255 | """
256 | This only works for two users right now.
257 | Given chat dictionary, plot histogram of response rate
258 | and find weighted-average response rate of users
259 | """
260 | users = get_users(chats_dict)
261 | n_users = len(users)
262 |
263 | colors = ['#007e8c', '#ffc0cb', '#488957']
264 | colors.extend([gen_hex_colour_code() for i in range(n_users)])
265 |
266 | chat_users = []
267 | for date, chatlog in chats_dict['chats']:
268 | time, u, chat = split_chat(chatlog, users)
269 | if all((time, u, chat)):
270 | chat_users.append({'date': date,
271 | 'time': time,
272 | 'user': u,
273 | 'chat': chat})
274 |
275 | grouper = itemgetter("date")
276 | responses_all = []
277 | for key, group in groupby(sorted(chat_users, key=grouper), grouper):
278 | group_date = [[g['date'] + ' ' + g['time'], g['user'] ] for g in group]
279 | responses_all.append(group_date)
280 |
281 | users_response = []
282 | for responses in responses_all:
283 | time_previous, user_previous = responses[0]
284 | for i in range(1, len(responses)):
285 | if user_previous == responses[i][1]:
286 | time_previous, user_previous = responses[i]
287 | else:
288 | response_time = int((parser.parse(responses[i][0]) - parser.parse(time_previous)).seconds/60.)
289 | time_previous, user_previous = responses[i]
290 | users_response.append([user_previous, response_time])
291 |
292 | users_response_summary = defaultdict(list)
293 | for key, group in groupby(users_response, lambda x: x[0]):
294 | for g in group:
295 | users_response_summary[key].append(g[1])
296 |
297 | ret_data = []
298 | # plot and print response rate of users
299 | for i, user in enumerate(users):
300 | resp = users_response_summary[user]
301 | plt.hist(resp, bins=range(0, 30), alpha=0.5, color=colors[i])
302 | avg_rate = str(1./avg_response_rate(resp, wait_time=60))
303 | ret_data.append({"user": user, "avg_rate": avg_rate})
304 | print('response rate of %s = %s' % (user, avg_rate))
305 | plt.legend(users)
306 | plt.xlabel('response time (mins)')
307 | plt.ylabel('normalized number of chats ')
308 | plt.show(block=False)
309 |
310 | if save_file is not None:
311 | plt.savefig(save_file, bbox_inches='tight')
312 |
313 | return ret_data
314 |
--------------------------------------------------------------------------------