├── image ├── line_chat.png ├── line_logo.png ├── punch_card.jpg ├── punch_card.png ├── ramble_plot.png ├── chat_per_day.jpg ├── chat_per_day.png ├── response_time.jpg ├── response_time.png ├── chat_users_per_day.jpg ├── chat_users_per_day.png └── ramble_plot_square.png ├── docs ├── README.md ├── needyone │ ├── index.html │ └── sketch.js └── rambles │ ├── index.html │ └── sketch.js ├── line2midi.py ├── README.md ├── line-visualize.py ├── .gitignore ├── template └── visualize.html ├── slides └── index.html └── line_utils.py /image/line_chat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/line_chat.png -------------------------------------------------------------------------------- /image/line_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/line_logo.png -------------------------------------------------------------------------------- /image/punch_card.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/punch_card.jpg -------------------------------------------------------------------------------- /image/punch_card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/punch_card.png -------------------------------------------------------------------------------- /image/ramble_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/ramble_plot.png -------------------------------------------------------------------------------- /image/chat_per_day.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/chat_per_day.jpg -------------------------------------------------------------------------------- /image/chat_per_day.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/chat_per_day.png -------------------------------------------------------------------------------- /image/response_time.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/response_time.jpg -------------------------------------------------------------------------------- /image/response_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/response_time.png -------------------------------------------------------------------------------- /image/chat_users_per_day.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/chat_users_per_day.jpg -------------------------------------------------------------------------------- /image/chat_users_per_day.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/chat_users_per_day.png -------------------------------------------------------------------------------- /image/ramble_plot_square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tupleblog/visualize_line_chat/HEAD/image/ramble_plot_square.png -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | ## Visualize using RuneJS 2 | 3 | Based on code by this [Medium blog post](https://medium.com/@wipaweeeeee/call-me-adele-f37162b6ffe5) 4 | See examples in these following folders. 5 | 6 | - [needyone](needyone) 7 | - [rambles](rambles) 8 | -------------------------------------------------------------------------------- /docs/needyone/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | ramble 6 | 7 | 8 | 24 | 25 | 26 |
27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /docs/rambles/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | ramble 6 | 7 | 8 | 24 | 25 | 26 |
27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /docs/needyone/sketch.js: -------------------------------------------------------------------------------- 1 | var r = new Rune({ 2 | container: "#canvas", 3 | width: 300, 4 | height: 300, 5 | debug: false, 6 | }); 7 | 8 | 9 | $.ajax({ 10 | type: 'GET', 11 | url: "https://gist.githubusercontent.com/titipata/33bd071df166c135bcd5d87a6b53c32c/raw/e662e0dab90dc37dd615020418770655d4fac522/example_bim.json", 12 | dataType: 'json', 13 | success: function(result){ 14 | var x = 10; 15 | var y = 0; 16 | var xt = 10; 17 | var yt = 5; 18 | 19 | for (var i = 0; i < result.length; i++) { 20 | if (result[i].user_id == 1){ 21 | r.rect(x, y, 1, 10) 22 | .stroke(false) 23 | .fill(0) 24 | x++; 25 | } 26 | if( x > r.width - 10) { 27 | x = 10; 28 | y += 13; 29 | r.rect(x, y, 1, 10) 30 | .stroke(false) 31 | .fill(0) 32 | x++; 33 | } 34 | x++; 35 | 36 | if (result[i].user_id == 0){ 37 | r.ellipse(xt, yt, 2, 2) 38 | .stroke(false) 39 | .fill(0) 40 | xt++; 41 | } 42 | if( xt > r.width - 10) { 43 | xt = 10; 44 | yt += 13; 45 | r.ellipse(xt, yt, 2, 2) 46 | .stroke(false) 47 | .fill(0) 48 | xt++; 49 | } 50 | xt++; 51 | } 52 | r.draw(); 53 | } 54 | }); 55 | -------------------------------------------------------------------------------- /docs/rambles/sketch.js: -------------------------------------------------------------------------------- 1 | var r = new Rune({ 2 | container: "#canvas", 3 | width: 380, 4 | height: 380, 5 | debug: false, 6 | }); 7 | 8 | $.ajax({ 9 | type: 'GET', 10 | url: "https://gist.githubusercontent.com/titipata/33bd071df166c135bcd5d87a6b53c32c/raw/e662e0dab90dc37dd615020418770655d4fac522/example_bim.json", 11 | dataType: 'json', 12 | success: function(result){ 13 | var x = 0; 14 | var y = 0; 15 | var xt = 0; 16 | var yt = 0; 17 | for( var i = 0; i < result.length; i++ ) { 18 | if ( result[i].chat_length.length != 0 ) { 19 | if( result[i].user_id == 1 ) { 20 | r.rect(x, y, 2, result[i].char_length) 21 | .stroke(false) 22 | .fill(180) 23 | 24 | if( y > r.height ) { 25 | x += 3; 26 | y = 0; 27 | r.rect(x, y, 2, result[i].char_length) 28 | .stroke(false) 29 | .fill(180) 30 | } 31 | } 32 | 33 | if(result[i].user_id == 0) { 34 | r.rect(xt, yt, 2, result[i].char_length) 35 | .stroke(false) 36 | .fill(0) 37 | 38 | if( yt > r.height) { 39 | xt += 3; 40 | yt = 0; 41 | r.rect(xt, yt, 2, result[i].char_length) 42 | .stroke(false) 43 | .fill(0) 44 | } 45 | } 46 | y += result[i].char_length; 47 | yt += result[i].char_length; 48 | } 49 | } 50 | r.draw(); 51 | } 52 | }); 53 | -------------------------------------------------------------------------------- /line2midi.py: -------------------------------------------------------------------------------- 1 | import line_utils 2 | import numpy as np 3 | from midiutil.MidiFile import MIDIFile 4 | 5 | 6 | def generate_midi_text(file_name, output_file_name= 'midi.mid'): 7 | chats_dict = line_utils.read_line_chat(file_name) 8 | users = line_utils.get_users(chats_dict) 9 | chats_split = [line_utils.split_chat(chat, users) for (date, chat) in chats_dict['chats']] 10 | chats = [chat for (t, u, chat) in chats_split if chat is not None] 11 | chats_len = np.array([len(c) for c in chats]) 12 | variations_round = np.array([int(v/10) for v in chats_len]) 13 | 14 | # setting up MIDI 15 | track = 0 16 | time = 0 17 | channel = 0 18 | tempo = 120 19 | duration = 1 20 | volumn = 100 21 | base_pitch = 60 22 | midi = MIDIFile(numTracks=1, adjust_origin=time) 23 | midi.addTrackName(track, time, "testing") 24 | midi.addTempo(track, time, tempo) 25 | 26 | # add variation out of 27 | variations_round = base_pitch + (variations_round[0:100] + np.round(10 * np.random.randn(100))) 28 | 29 | # chord stuff 30 | for t, v in enumerate(list(variations_round)): 31 | midi.addNote(track, channel, int(v), t + 1, duration, volumn) 32 | midi.addNote(track, channel, int(v) + 4, t + 1, duration, volumn) 33 | midi.addNote(track, channel, int(v) + 7, t + 1, duration, volumn) 34 | 35 | with open(output_file_name, 'wb') as file: 36 | midi.writeFile(file) 37 | print('done converting to midi file!') 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Visualize LINE chat 2 | 3 | In [LINE chat](https://line.me/en/) (very popular app chat for Asian folks), 4 | we can read the history from exported text file (Save chat). 5 | We can use the following function in order to read the chat history text file. 6 | 7 | ```python 8 | import line_utils 9 | chats_dict = line_utils.read_line_chat('chat_history.txt') 10 | ``` 11 | 12 | `chats_dict` is a chat dictionary which the following keys: `count`, `chats`, `total_chats` 13 | 14 | 15 | **Plot total chats activities on given day** 16 | 17 | ```python 18 | line_utils.plot_chat_per_day(chats_dict) 19 | ``` 20 | 21 |
22 | 23 |
24 | 25 | **Plot chats activities per users on given day** 26 | 27 | ```python 28 | line_utils.plot_chat_users_per_day(chats_dict) 29 | ``` 30 | 31 | Here `users` is a list of username strings in the chat 32 | 33 |
34 | 35 |
36 | 37 | **Plot punch card activities** 38 | 39 | To see at what time in week, you're chatting with your friends the most. 40 | 41 | ```python 42 | line_utils.plot_punch_card_activities(chats_dict) 43 | ``` 44 | 45 |
46 | 47 |
48 | 49 | **Plot response rate and time** 50 | 51 | Plot and print average response time for each users before replying 52 | to the next chat 53 | 54 | ```python 55 | line_utils.plot_response_rate(chats_dict) 56 | ``` 57 | 58 |
59 | 60 |
61 | 62 | **Generate single HTML file to visualize report** 63 | ``` 64 | python line-visualize.py -i INPUT_CHAT.txt -o output.html 65 | ``` 66 | 67 | Example HTML visualize report: 68 | [https://goo.gl/tMmGix](https://goo.gl/tMmGix) 69 | 70 | **runejs example** 71 | 72 | We also put example to visualize using `rune.js` in `docs` folder based on [medium post](https://medium.com/@wipaweeeeee/call-me-adele-f37162b6ffe5). 73 | See example at [titipata.github.io/visualize_line_chat](https://titipata.github.io/visualize_line_chat/). 74 | 75 | 76 | **LINE to MIDI example** 77 | 78 | `line2midi.py` stores how to convert chat to MIDI file. This was done at 79 | Stupid Hackathon 80 | -------------------------------------------------------------------------------- /line-visualize.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import jinja2 3 | import os 4 | import base64 5 | import shutil 6 | import line_utils 7 | import matplotlib.pyplot as plt 8 | 9 | 10 | def generator(input, output): 11 | 12 | if os.path.exists(input) == False: 13 | print("File {} not exist".format(input)) 14 | quit() 15 | 16 | directory = 'tmp' 17 | 18 | if not os.path.exists(directory): 19 | os.makedirs(directory) 20 | 21 | chats_dict = line_utils.read_line_chat(input) 22 | 23 | # Chat per day 24 | chat_per_day_path = directory + "/chat_per_day.png" 25 | plt.figure(figsize=(10, 5)) 26 | line_utils.plot_chat_per_day(chats_dict, chat_per_day_path) 27 | 28 | # User chat per day 29 | user_chat_per_day_path = directory + "/user_chat_per_day.png" 30 | plt.figure(figsize=(10, 5)) 31 | line_utils.plot_chat_users_per_day(chats_dict, user_chat_per_day_path) 32 | 33 | # Punch card activities 34 | punch_card_activities_path = directory + "/punch_card_activities.png" 35 | plt.figure(figsize=(15, 10)) 36 | line_utils.plot_punch_card_activities(chats_dict, punch_card_activities_path) 37 | 38 | # Response rate 39 | response_rate_path = directory + "/response_rate.png" 40 | plt.figure(figsize=(15, 10)) 41 | response_rate_users = line_utils.plot_response_rate(chats_dict, response_rate_path) 42 | 43 | data = { 44 | "chat_per_day": convert_img_to_base64(chat_per_day_path), 45 | "user_chat_per_day": convert_img_to_base64(user_chat_per_day_path), 46 | "punch_card_activities": convert_img_to_base64(punch_card_activities_path), 47 | "response_rate": convert_img_to_base64(response_rate_path), 48 | "response_rate_users": response_rate_users, 49 | } 50 | 51 | result = render('./template/visualize.html', data) 52 | 53 | with open(output, "w") as html_file: 54 | html_file.write(result) 55 | 56 | # Remove genrate image directory 57 | shutil.rmtree(directory) 58 | 59 | def render(tpl_path, context): 60 | path, filename = os.path.split(tpl_path) 61 | return jinja2.Environment( 62 | loader=jinja2.FileSystemLoader(path or './') 63 | ).get_template(filename).render(context) 64 | 65 | def convert_img_to_base64(file): 66 | with open(file, "rb") as image_file: 67 | encoded = base64.b64encode(image_file.read()) 68 | return encoded.decode("utf-8") 69 | 70 | if __name__ == '__main__': 71 | parser = argparse.ArgumentParser() 72 | parser.add_argument( 73 | '-i', 74 | '--input', 75 | help='input chat file to visualize') 76 | 77 | parser.add_argument( 78 | '-o', 79 | '--output', 80 | help='HTML output file', 81 | default='output.html') 82 | 83 | args = parser.parse_args() 84 | arguments = args.__dict__ 85 | generator(**arguments) 86 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Emacs ### 2 | # -*- mode: gitignore; -*- 3 | *~ 4 | \#*\# 5 | /.emacs.desktop 6 | /.emacs.desktop.lock 7 | *.elc 8 | auto-save-list 9 | tramp 10 | .\#* 11 | 12 | # Org-mode 13 | .org-id-locations 14 | *_archive 15 | 16 | # flymake-mode 17 | *_flymake.* 18 | 19 | # eshell files 20 | /eshell/history 21 | /eshell/lastdir 22 | 23 | # elpa packages 24 | /elpa/ 25 | 26 | # reftex files 27 | *.rel 28 | 29 | # AUCTeX auto folder 30 | /auto/ 31 | 32 | # cask packages 33 | .cask/ 34 | dist/ 35 | 36 | # Flycheck 37 | flycheck_*.el 38 | 39 | # server auth directory 40 | /server/ 41 | 42 | # projectiles files 43 | .projectile 44 | 45 | # directory configuration 46 | .dir-locals.el 47 | 48 | ### macOS ### 49 | *.DS_Store 50 | .AppleDouble 51 | .LSOverride 52 | 53 | # Icon must end with two \r 54 | Icon 55 | 56 | 57 | # Thumbnails 58 | ._* 59 | 60 | # Files that might appear in the root of a volume 61 | .DocumentRevisions-V100 62 | .fseventsd 63 | .Spotlight-V100 64 | .TemporaryItems 65 | .Trashes 66 | .VolumeIcon.icns 67 | .com.apple.timemachine.donotpresent 68 | 69 | # Directories potentially created on remote AFP share 70 | .AppleDB 71 | .AppleDesktop 72 | Network Trash Folder 73 | Temporary Items 74 | .apdisk 75 | 76 | ### Python ### 77 | # Byte-compiled / optimized / DLL files 78 | __pycache__/ 79 | *.py[cod] 80 | *$py.class 81 | 82 | # C extensions 83 | *.so 84 | 85 | # Distribution / packaging 86 | .Python 87 | env/ 88 | build/ 89 | develop-eggs/ 90 | downloads/ 91 | eggs/ 92 | .eggs/ 93 | lib/ 94 | lib64/ 95 | parts/ 96 | sdist/ 97 | var/ 98 | wheels/ 99 | *.egg-info/ 100 | .installed.cfg 101 | *.egg 102 | 103 | # PyInstaller 104 | # Usually these files are written by a python script from a template 105 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 106 | *.manifest 107 | *.spec 108 | 109 | # Installer logs 110 | pip-log.txt 111 | pip-delete-this-directory.txt 112 | 113 | # Unit test / coverage reports 114 | htmlcov/ 115 | .tox/ 116 | .coverage 117 | .coverage.* 118 | .cache 119 | nosetests.xml 120 | coverage.xml 121 | *,cover 122 | .hypothesis/ 123 | 124 | # Translations 125 | *.mo 126 | *.pot 127 | 128 | # Django stuff: 129 | *.log 130 | local_settings.py 131 | 132 | # Flask stuff: 133 | instance/ 134 | .webassets-cache 135 | 136 | # Scrapy stuff: 137 | .scrapy 138 | 139 | # Sphinx documentation 140 | docs/_build/ 141 | 142 | # PyBuilder 143 | target/ 144 | 145 | # Jupyter Notebook 146 | .ipynb_checkpoints 147 | 148 | # pyenv 149 | .python-version 150 | 151 | # celery beat schedule file 152 | celerybeat-schedule 153 | 154 | # SageMath parsed files 155 | *.sage.py 156 | 157 | # dotenv 158 | .env 159 | 160 | # virtualenv 161 | .venv 162 | venv/ 163 | ENV/ 164 | 165 | # Spyder project settings 166 | .spyderproject 167 | 168 | # Rope project settings 169 | .ropeproject 170 | 171 | # End of https://www.gitignore.io/api/python,emacs,macos 172 | -------------------------------------------------------------------------------- /template/visualize.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Visualize LINE chat 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 24 | 25 | 26 | 27 | 32 |
33 |
34 |

35 | 36 |
37 |
38 |
39 | จำนวนข้อความในแต่ละวัน 40 |
41 | 42 | 43 | 44 |
45 |
46 |
47 |
48 | 49 |
50 |
51 |
52 | จำนวนข้อความในแต่ละวันแยกตามผู้ใช้งาน 53 |
54 | 55 | 56 | 57 |
58 |
59 |
60 |
61 | 62 |
63 |
64 |
65 | กลุ่มวันและช่วงเวลาที่แชท 66 |
67 | 68 | 69 | 70 |
71 |
72 |
73 |
74 | 75 |
76 |
77 |
78 | ค่าเฉลี่ยความเร็วในการตอบแชท (นาที) 79 |
80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | {% for response_rate_user in response_rate_users %} 90 | 91 | 92 | 93 | 94 | {% endfor %} 95 | 96 |
ชื่อความเร็วในการตอบแชท (นาที)
{{response_rate_user.user}}{{response_rate_user.avg_rate}}
97 |
98 |
99 |
100 |
101 | 102 |
103 |
104 |
105 | จำนวนแชทกับช่วงเวลาที่ใช้ในการตอบ 106 |
107 | 108 | 109 | 110 |
111 |
112 |
113 |
114 | 115 |
116 |
117 | 118 | 127 | 128 | 129 | 130 | 131 | 132 | 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /slides/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Crash course in Python 5 | 6 | 23 | 24 | 25 | 214 | 215 | 216 | 238 | 239 | 240 | -------------------------------------------------------------------------------- /line_utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import re 3 | import numpy as np 4 | from dateutil import parser 5 | from collections import defaultdict 6 | from itertools import groupby 7 | import datetime 8 | from operator import itemgetter 9 | from collections import Counter 10 | import scipy.sparse as sp 11 | import matplotlib.pyplot as plt 12 | import random 13 | 14 | def read_line_chat(file_name): 15 | """ 16 | Given Line chat history, return chat dictionary which has following keys 17 | count, chats, total_chats 18 | """ 19 | lines = csv.reader(open(file_name)) 20 | chats = list(lines) 21 | chats = [c[0] for c in chats if len(c) > 0] 22 | chats_dict = defaultdict(list) 23 | is_found_first_date = False 24 | for chat in chats: 25 | date = re.findall(r'\d+\.\d+\.\d+', chat) 26 | 27 | #Android date format 28 | if len(date) == 0: 29 | date = re.findall('^\d+\/\d+\/\d+', chat) 30 | 31 | #Skip line until found date 32 | if is_found_first_date == False and len(date) == 0: 33 | continue 34 | 35 | #Empty line 36 | if len(chat.strip()) == 0: 37 | continue 38 | 39 | if len(date) >= 1: 40 | is_found_first_date = True 41 | d = date[0] 42 | else: 43 | chat = re.sub('^24:', '00:', chat) 44 | chats_dict['chats'].append([d, chat]) 45 | chats_dict['total_chats'] = len(chats_dict['chats']) 46 | return chats_dict 47 | 48 | def get_date_range(chats_dict): 49 | """ 50 | Get date range out of all chat dictionary 51 | """ 52 | dates = list(set([c[0] for c in chats_dict['chats']])) 53 | dates = [parser.parse(d) for d in dates] 54 | date_range = [] 55 | for d in range((date_max - date_min).days + 1): 56 | date_range.append(date_min + datetime.timedelta(days=d)) 57 | return date_range 58 | 59 | def split_chat(chat, users): 60 | """Given list of users in conversation, 61 | split chat in to (time, user, chat text) 62 | """ 63 | time = chat.split()[0] 64 | if len(re.findall(r'\d+:\d+',time)) >= 1: 65 | chat = (' '.join(chat.split()[1::])).strip() 66 | u = '' 67 | for user in users: 68 | if user in chat: 69 | u = user 70 | chat = re.sub(u, '', chat).strip() 71 | else: 72 | chat = chat 73 | return (time, u, chat) 74 | else: 75 | return (None, None, None) 76 | 77 | def bin_time(t, n_bin=8): 78 | """bin time in the day to number of bins""" 79 | bin_size = int(24./n_bin) 80 | h = parser.parse(t).hour 81 | for i in range(n_bin + 1): 82 | r = range((i-1)*bin_size, i*bin_size) 83 | if h in r: 84 | return i - 1 85 | 86 | def day_of_week(day, n_bin=8): 87 | """ 88 | Turn date string into days of week index 89 | ['Sunday', 'Monday', ..., 'Saturday'] 90 | """ 91 | return parser.parse(day).weekday() 92 | 93 | def gen_hex_colour_code(): 94 | """ 95 | Generate hex color 96 | """ 97 | return '#' + ''.join([random.choice('0123456789ABCDEF') for x in range(6)]) 98 | 99 | def avg_response_rate(ls, wait_time=60): 100 | """ 101 | Weighted response rate of reply in less than 30 mins 102 | """ 103 | weight_n_chats = 0 104 | n_chats = 0 105 | for n_chat, r in Counter(ls).items(): 106 | if r <= wait_time: 107 | weight_n_chats += n_chat * r 108 | n_chats += n_chat 109 | return weight_n_chats/n_chats 110 | 111 | def get_users(chats_dict): 112 | """ 113 | Get all users from chat dictionary 114 | """ 115 | chats = [c[1] for c in chats_dict['chats']] 116 | chats = [re.sub('\d+:\d+', '', c).strip() for c in chats] 117 | count_user = Counter([c.split()[0] for c in chats]) 118 | users = [k for k, v in count_user.items() if v > 10] 119 | 120 | all_profile_names = [] 121 | for user in users: 122 | user_chats = [chat for chat in chats if chat.split()[0] == user] 123 | profile_name = [user] 124 | for i in range(1, 3): 125 | try: 126 | next_key = Counter([u.split()[i] for u in user_chats]) 127 | u_next = [k for k, v in next_key.items() if v == len(user_chats)] 128 | profile_name.extend(u_next) 129 | except: 130 | pass 131 | all_profile_names.append(' '.join(profile_name)) 132 | return all_profile_names 133 | 134 | def plot_chat_per_day(chats_dict, save_file=None): 135 | """ 136 | plot chat per day that from all chats 137 | """ 138 | # group chat by date 139 | group_chats = [] 140 | for key, group in groupby(chats_dict['chats'], lambda x: x[0]): 141 | group_chats.append({'date': key, 'chats': [g[1] for g in group]}) 142 | 143 | chats_per_day = [] 144 | for group_chat in group_chats: 145 | dt = parser.parse(group_chat['date']) 146 | chats_per_day.append([dt, len(group_chat['chats'])]) 147 | 148 | ax = plt.subplot(111) 149 | ax.bar([c[0] for c in chats_per_day], [c[1] for c in chats_per_day], width=0.5) 150 | ax.xaxis_date() 151 | plt.xlabel('date') 152 | plt.xticks(rotation=60) 153 | plt.ylabel('number of chats') 154 | plt.show(block=False) 155 | 156 | if save_file is not None: 157 | plt.savefig(save_file, bbox_inches='tight') 158 | 159 | def plot_chat_users_per_day(chats_dict, save_file=None): 160 | """ 161 | Plot number of chats per users over time 162 | """ 163 | users = get_users(chats_dict) 164 | n_users = len(users) 165 | 166 | # split chat to time, user, chat string 167 | chat_users = [] 168 | for date, chatlog in chats_dict['chats']: 169 | time, u, chat = split_chat(chatlog, users) 170 | if all((time, u, chat)): 171 | chat_users.append({'date': date, 172 | 'time': time, 173 | 'user': u, 174 | 'chat': chat}) 175 | 176 | chats_per_day = [] 177 | grouper = itemgetter("date", "user") 178 | for key, group in groupby(sorted(chat_users, key=grouper), grouper): 179 | temp_dict = dict(zip(["date", "user"], key)) 180 | temp_dict["n_chat"] = len([item for item in group]) 181 | chats_per_day.append(temp_dict) 182 | 183 | n_chats_all = [] 184 | for user in users: 185 | n_chats = [] 186 | for c in chats_per_day: 187 | if user in c['user']: 188 | n_chats.append([parser.parse(c['date']), c['n_chat'], user]) 189 | n_chats_all.append(n_chats) 190 | 191 | colors = ['#007e8c', '#ffc0cb', '#488957'] # first set of colors 192 | colors.extend([gen_hex_colour_code() for i in range(n_users)]) # random 193 | 194 | ax = plt.subplot(111) 195 | for i, n_chat in enumerate(n_chats_all): 196 | date = [v[0] + datetime.timedelta(hours=(i * 24./n_users)) for v in n_chat] 197 | n_chat_user = [v[1] for v in n_chat] 198 | ax.bar(date, n_chat_user, width=(1.0/n_users), color=colors[i]) 199 | ax.xaxis_date() 200 | plt.xlabel('date') 201 | plt.ylabel('number of chats') 202 | plt.xticks(rotation=60, ha='right') 203 | plt.legend(users) 204 | plt.show(block=False) 205 | 206 | if save_file is not None: 207 | plt.savefig(save_file, bbox_inches='tight') 208 | 209 | def plot_punch_card_activities(chats_dict, save_file=None): 210 | """ 211 | Punch card activities, plot in matrix format 212 | """ 213 | users = get_users(chats_dict) 214 | 215 | chat_users = [] 216 | for date, chatlog in chats_dict['chats']: 217 | time, u, chat = split_chat(chatlog, users) 218 | if all((time, u, chat)): 219 | chat_users.append({'date': date, 220 | 'time': time, 221 | 'user': u, 222 | 'chat': chat}) # list of dict of chat 223 | 224 | for chat in chat_users: 225 | chat.update({'time_bin': bin_time(chat['time'])}) 226 | chat.update({'day_of_week': day_of_week(chat['date'])}) 227 | chat.update({'n_activity': 1}) 228 | 229 | grouper = itemgetter("day_of_week", "time_bin") 230 | chats_per_day = [] 231 | for key, group in groupby(sorted(chat_users, key=grouper), grouper): 232 | temp_dict = dict(zip(["day_of_week", "time_bin"], key)) 233 | temp_dict["n_chat"] = len([item for item in group]) 234 | chats_per_day.append(temp_dict) 235 | 236 | vals, rows, cols = [], [], [] 237 | for c in chats_per_day: 238 | cols.append(c['time_bin']) 239 | rows.append(c['day_of_week']) 240 | vals.append(c['n_chat']) 241 | chat_time_bin = sp.csr_matrix((vals, (rows, cols))).toarray() 242 | 243 | fig = plt.figure() 244 | ax = fig.add_subplot(111) 245 | cax = ax.matshow(chat_time_bin, interpolation='nearest', cmap=plt.get_cmap('bone')) 246 | ax.set_yticklabels(['', 'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']) 247 | ax.set_xticklabels(['', '12 - 3 am', '3 - 6 am', '6 - 9 am', '9 am - 12 pm', '12 - 3 pm', '3 - 6 pm', '6 - 9 pm', '9 pm - 12 pm'], 248 | rotation=60, ha='left') 249 | plt.show(block=False) 250 | 251 | if save_file is not None: 252 | plt.savefig(save_file, bbox_inches='tight') 253 | 254 | def plot_response_rate(chats_dict, save_file=None): 255 | """ 256 | This only works for two users right now. 257 | Given chat dictionary, plot histogram of response rate 258 | and find weighted-average response rate of users 259 | """ 260 | users = get_users(chats_dict) 261 | n_users = len(users) 262 | 263 | colors = ['#007e8c', '#ffc0cb', '#488957'] 264 | colors.extend([gen_hex_colour_code() for i in range(n_users)]) 265 | 266 | chat_users = [] 267 | for date, chatlog in chats_dict['chats']: 268 | time, u, chat = split_chat(chatlog, users) 269 | if all((time, u, chat)): 270 | chat_users.append({'date': date, 271 | 'time': time, 272 | 'user': u, 273 | 'chat': chat}) 274 | 275 | grouper = itemgetter("date") 276 | responses_all = [] 277 | for key, group in groupby(sorted(chat_users, key=grouper), grouper): 278 | group_date = [[g['date'] + ' ' + g['time'], g['user'] ] for g in group] 279 | responses_all.append(group_date) 280 | 281 | users_response = [] 282 | for responses in responses_all: 283 | time_previous, user_previous = responses[0] 284 | for i in range(1, len(responses)): 285 | if user_previous == responses[i][1]: 286 | time_previous, user_previous = responses[i] 287 | else: 288 | response_time = int((parser.parse(responses[i][0]) - parser.parse(time_previous)).seconds/60.) 289 | time_previous, user_previous = responses[i] 290 | users_response.append([user_previous, response_time]) 291 | 292 | users_response_summary = defaultdict(list) 293 | for key, group in groupby(users_response, lambda x: x[0]): 294 | for g in group: 295 | users_response_summary[key].append(g[1]) 296 | 297 | ret_data = [] 298 | # plot and print response rate of users 299 | for i, user in enumerate(users): 300 | resp = users_response_summary[user] 301 | plt.hist(resp, bins=range(0, 30), alpha=0.5, color=colors[i]) 302 | avg_rate = str(1./avg_response_rate(resp, wait_time=60)) 303 | ret_data.append({"user": user, "avg_rate": avg_rate}) 304 | print('response rate of %s = %s' % (user, avg_rate)) 305 | plt.legend(users) 306 | plt.xlabel('response time (mins)') 307 | plt.ylabel('normalized number of chats ') 308 | plt.show(block=False) 309 | 310 | if save_file is not None: 311 | plt.savefig(save_file, bbox_inches='tight') 312 | 313 | return ret_data 314 | --------------------------------------------------------------------------------