├── .gitignore ├── LICENSE ├── README.md ├── Testing.ipynb ├── examples ├── gmail.md └── linkedin.md ├── monica ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── contact_field_types.cpython-36.pyc │ ├── contacts.cpython-36.pyc │ ├── conversations.cpython-36.pyc │ ├── monica.cpython-36.pyc │ └── utils.cpython-36.pyc ├── contact_field_types.py ├── contacts.py ├── conversations.py └── utils.py └── social ├── __pycache__ ├── gmail_data.cpython-36.pyc ├── gmail_data_cleaning.cpython-36.pyc ├── gmail_monica_client.cpython-36.pyc ├── gmail_preprocessing.cpython-36.pyc ├── linkedin_monica_client.cpython-36.pyc ├── linkedin_preprocessing.cpython-36.pyc └── utils.cpython-36.pyc ├── gmail_monica_client.py ├── gmail_preprocessing.py ├── linkedin_monica_client.py ├── linkedin_preprocessing.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # General 2 | .DS_Store 3 | .AppleDouble 4 | .LSOverride 5 | 6 | # Icon must end with two \r 7 | Icon 8 | 9 | # Thumbnails 10 | ._* 11 | 12 | # temporary 13 | *.ipynb 14 | 15 | # permonant to ensure personal messages does not get synced 16 | personal/ 17 | 18 | 19 | # Files that might appear in the root of a volume 20 | .DocumentRevisions-V100 21 | .fseventsd 22 | .Spotlight-V100 23 | .TemporaryItems 24 | .Trashes 25 | .VolumeIcon.icns 26 | .com.apple.timemachine.donotpresent 27 | 28 | # Directories potentially created on remote AFP share 29 | .AppleDB 30 | .AppleDesktop 31 | Network Trash Folder 32 | Temporary Items 33 | .apdisk -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Shahzeb Afroze 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # monica-python 2 | 3 | This Repo is work in progress. Future plan is to integrate data from other social media and emails. 4 | 5 | This is inspired from the this [repo](https://pypi.org/project/monica-client/). 6 | 7 | Contribution needed for: 8 | 1. Other APIs to be converted to Python structure 9 | 2. Social Media Integrations 10 | 11 | 12 | ## Contacts API 13 | 14 | 1. Initating the conversations API 15 | ```python 16 | from monica.contacts import Contacts 17 | import pandas as pd 18 | 19 | contacts = Contacts(access_token) 20 | ``` 21 | 22 | 2. Lists a few contacts from the database with criteria 23 | 24 | ```python 25 | 26 | limit=10 # default 27 | page=1 # default 28 | sort="updated_at" # default 29 | json_data = contacts.list_contacts(self, limit=limit, page=page, sort=sort) # returns json data 30 | 31 | pd.DataFrame(json_data['data']) 32 | 33 | ``` 34 | 35 | 3. List all contacts (Save it as csv file for other features) 36 | 37 | ```python 38 | 39 | sort = "updated_at" # default 40 | json_data = conversations.list_all_your_contacts(sort=sort) 41 | 42 | df = pd.DataFrame(json_data['data']) 43 | df.to_csv('contacts_from_monicahq.csv') 44 | 45 | ``` 46 | 47 | 48 | ## Conversations API 49 | 50 | 1. Initating the conversations API 51 | ```python 52 | from monica.conversations import Conversations 53 | import pandas as pd 54 | 55 | conversations = Conversations(access_token) 56 | 57 | ``` 58 | 59 | 2. Lists a few conversations from the database - Official API does not return all the conversations 60 | 61 | ```python 62 | 63 | conversations.list_conversations() 64 | 65 | ``` 66 | 67 | 3. Create conversation object 68 | 69 | ```python 70 | 71 | conversation_id = conversations.create_conversation_object(happened_at, contact_field_type_id, contact_id) 72 | 73 | ``` 74 | 75 | 4. Add message to a conversation object 76 | ```python 77 | 78 | conversations.add_message(written_at, written_by_me, content, contact_id, conversation_id) 79 | 80 | ``` 81 | 82 | 5. Lists all conversations of a contact 83 | ```python 84 | 85 | 86 | json_response = conversations.list_conversations_of_a_contact(contact_id) # returns the whole json response from API. 87 | 88 | 89 | pd.DataFrame(json_response['data']) # to see it properly in a dataframe format 90 | 91 | ``` 92 | 93 | 6. Delete conversation 94 | ```python 95 | 96 | conversations.delete_conversation(conversation_id) 97 | 98 | ``` 99 | 100 | 7. Delete all conversations of a contact 101 | ```python 102 | 103 | conversations.delete_all_conversations_of_a_contact(contact_id) 104 | 105 | ``` 106 | 107 | 8. Add multiple messages to contact 108 | 109 | ```python 110 | 111 | conversations.add_multiple_messages(contact_id, conversation_id, df) 112 | # df is pandas dataframe in a specific structure expected, will add later 113 | 114 | ``` 115 | 116 | ## Contact Field Types API 117 | 118 | 1. Initating the conversations API 119 | ```python 120 | from monica.contact_field_types import Contact_Field_Types 121 | import pandas as pd 122 | 123 | contact_field_types = Contact_Field_Types(access_token) 124 | 125 | ``` 126 | 127 | 2. Lists all field types on your monica 128 | 129 | ```python 130 | 131 | json_data = contact_field_types.list_all() 132 | df = pd.DataFrame[json_data['data']] 133 | df.head() 134 | 135 | ``` 136 | 137 | 138 | 3. Retreive ID of field type 139 | 140 | ```python 141 | 142 | field_type_id = contact_field_types.get_contact_field_type_id(object_name) 143 | 144 | ``` 145 | 146 | 147 | ## Gmail Monica Client API 148 | Check out how to upload gmail data [here](examples/gmail.md) 149 | 150 | ## Linkedin Monica Client API 151 | Check out how to upload linkedin data [here](examples/linkedin.md) 152 | 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /Testing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "\"\"\"\n", 10 | "Quick understanding of how the API would work\n", 11 | "Can be used to make quick examples for documentation\n", 12 | "\n", 13 | "\"\"\"" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "from monica.contacts import Contacts" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "access_token = \"\"\n", 32 | "a = Contacts(access_token)\n" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "a.list_all_your_contacts()\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "# swqswq" 51 | ] 52 | } 53 | ], 54 | "metadata": { 55 | "kernelspec": { 56 | "display_name": "Python 3", 57 | "language": "python", 58 | "name": "python3" 59 | }, 60 | "language_info": { 61 | "codemirror_mode": { 62 | "name": "ipython", 63 | "version": 3 64 | }, 65 | "file_extension": ".py", 66 | "mimetype": "text/x-python", 67 | "name": "python", 68 | "nbconvert_exporter": "python", 69 | "pygments_lexer": "ipython3", 70 | "version": "3.6.5" 71 | } 72 | }, 73 | "nbformat": 4, 74 | "nbformat_minor": 2 75 | } 76 | -------------------------------------------------------------------------------- /examples/gmail.md: -------------------------------------------------------------------------------- 1 | ## Syncing emails from Gmail 2 | 3 | ```python 4 | from social.gmail_preprocessing import Preprocessing 5 | from social.gmail_monica_client import Gmail_Monica_Client 6 | 7 | gmail_json_file_path="" 8 | monica_contacts_file_path="" 9 | 10 | gmail_preprocessing = Preprocessing(gmail_json_file_path, monica_contacts_file_path) 11 | df_preprocessed = gmail_preprocessing.prepare_dataframe() 12 | 13 | 14 | gmail_monica_client = Gmail_Monica_Client(access_token=access_token) 15 | gmail_monica_client.upload_emails_to_monica(df_preprocessed) 16 | 17 | 18 | ``` -------------------------------------------------------------------------------- /examples/linkedin.md: -------------------------------------------------------------------------------- 1 | ## Syncing conversations from Linkedin 2 | 3 | ```python 4 | from social.linkedin_preprocessing import Preprocessing 5 | from social.linkedin_monica_client import Linkedin_Monica_Client 6 | 7 | linkedin_messages_file_path="" 8 | monica_contacts_file_path="" 9 | 10 | linkedin_preprocessing = Preprocessing(linkedin_messages_file_path, monica_contacts_file_path) 11 | df_preprocessed = preprocessing.prepare_dataframe() 12 | 13 | 14 | linkedin_monica_client = Linkedin_Monica_Client(access_token=access_token) 15 | linkedin_monica_client.upload_conversations_to_monica(df_preprocessed) 16 | 17 | ``` -------------------------------------------------------------------------------- /monica/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Shahzeb Afroze 3 | This will be used for making it easier to manage imports 4 | 5 | """ 6 | -------------------------------------------------------------------------------- /monica/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/monica/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /monica/__pycache__/contact_field_types.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/monica/__pycache__/contact_field_types.cpython-36.pyc -------------------------------------------------------------------------------- /monica/__pycache__/contacts.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/monica/__pycache__/contacts.cpython-36.pyc -------------------------------------------------------------------------------- /monica/__pycache__/conversations.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/monica/__pycache__/conversations.cpython-36.pyc -------------------------------------------------------------------------------- /monica/__pycache__/monica.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/monica/__pycache__/monica.cpython-36.pyc -------------------------------------------------------------------------------- /monica/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/monica/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /monica/contact_field_types.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Shahzeb Afroze 3 | 4 | Offical Documentation: https://www.monicahq.com/api/conversations 5 | 6 | Description: We will put all conversation related requests here 7 | 8 | Todo: 9 | - Need to add an efficient try except rule in all functions to make sure string is accepted for example 10 | - Need to add something to read the respone number and tell if there is a problem and if the post request was successful 11 | instead of sending json dict 12 | 13 | """ 14 | 15 | import time 16 | import pandas as pd 17 | import requests 18 | import json 19 | from monica.utils import Utils 20 | 21 | basic_api = 'https://app.monicahq.com/api' 22 | 23 | 24 | class Contact_Field_Types: 25 | def __init__(self, access_token, wait_time=1): 26 | """ 27 | Connect with monica Contact_Field_Types API found at https://www.monicahq.com/api/conversations 28 | 29 | Parameters: 30 | ----------- 31 | access_token: str 32 | token retreived from monica platform 33 | 34 | wait_time: int 35 | seconds to wait after every request sent 36 | 37 | """ 38 | headers = {'Authorization': f'Bearer {access_token}', 39 | 'Content-type': 'application/json', 40 | 'Accept': 'text/plain'} 41 | 42 | self.headers = headers 43 | self.basic_api = basic_api 44 | self.wait_time = wait_time 45 | self.utils = Utils() 46 | 47 | def list_all(self): 48 | """ 49 | Checkout monica API documentation for detailed description. 50 | 51 | Parameters: None 52 | ----------- 53 | 54 | 55 | Returns: 56 | ------- 57 | json_data: dict/json 58 | can be easily converted to pandas dataframe 59 | 60 | """ 61 | headers = self.headers 62 | wait_time = self.wait_time 63 | basic_api = self.basic_api 64 | 65 | api = f"{basic_api}/contactfieldtypes" 66 | 67 | response = requests.get(api, headers=headers) 68 | 69 | json_data = response.json() 70 | 71 | return json_data 72 | 73 | def get_contact_field_type_id(self, object_name): 74 | json_data = self.list_all() 75 | df = pd.DataFrame(json_data['data']) 76 | contact_field_type_id = df[df['name']==object_name]['id'].values[0] 77 | 78 | return contact_field_type_id 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /monica/contacts.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Shahzeb Afroze 3 | 4 | Offical Documentation: https://www.monicahq.com/api/contacts 5 | 6 | Description: We will put all contacts related requests here 7 | 8 | Todo: 9 | - Will need to add an efficient try except rule in all functions to make sure string is accepted for example 10 | - Understand query parameter of get contacts 11 | 12 | """ 13 | 14 | import time 15 | import requests 16 | from monica.utils import Utils 17 | import json 18 | 19 | basic_api = 'https://app.monicahq.com/api' 20 | 21 | 22 | class Contacts: 23 | def __init__(self, access_token, wait_time=1): 24 | """ 25 | Connect with monica contacts API found at https://www.monicahq.com/api/contacts 26 | 27 | Parameters: 28 | ----------- 29 | access_token: str 30 | token retreived from monica platform 31 | 32 | wait_time: int 33 | time to wait after every request sent 34 | 35 | """ 36 | headers = {'Authorization': f'Bearer {access_token}', 'Content-type': 'application/json', 'Accept': 'text/plain'} 37 | 38 | self.headers = headers 39 | self.basic_api = basic_api 40 | self.wait_time = wait_time 41 | self.utils = Utils() 42 | 43 | def list_contacts(self, limit=10, page=1, sort="updated_at"): 44 | """ 45 | Gets the contacts from monica database with page and limit criteria. Checkout monica API documentation for detailed description. 46 | 47 | Parameters: 48 | ----------- 49 | limit: int 50 | Indicates the page size 51 | 52 | page: int 53 | Indicates the page to return 54 | 55 | sort: str 56 | Indicates how the query should be ordered by. 57 | Possible values: created_at, -created_at, updated_at, -updated_at 58 | 59 | Returns: 60 | ------- 61 | json_data: dict/json 62 | can be easily converted to pandas dataframe 63 | 64 | """ 65 | headers = self.headers 66 | basic_api = self.basic_api 67 | 68 | api = f"{basic_api}/contacts" 69 | 70 | payload = {'limit': limit, 71 | 'page': page, 72 | 'sort':sort} 73 | 74 | 75 | response = requests.get(api, params=payload, headers=headers) 76 | json_data = response.json() 77 | 78 | 79 | return json_data 80 | 81 | 82 | def list_all_your_contacts(self, sort="updated_at"): 83 | """ 84 | Gets ALL the contacts from monica database. Checkout monica API documentation for detailed description. 85 | 86 | Parameters: 87 | ----------- 88 | sort: str 89 | Indicates how the query should be ordered by. 90 | Possible values: created_at, -created_at, updated_at, -updated_at 91 | 92 | Returns: 93 | ------- 94 | json_data: dict/json 95 | can be easily converted to pandas dataframe 96 | 97 | """ 98 | utils = self.utils 99 | wait_time = self.wait_time 100 | 101 | json_orig = self.list_contacts(limit=100, page=1, sort=sort) # pull 1st page 102 | max_page = json_orig['meta']['last_page']+1 103 | for page_number in range(2, max_page): 104 | time.sleep(wait_time) 105 | json_new = self.list_contacts(limit=100, page=page_number, sort=sort) 106 | json_orig = utils.merge_json_data(json_orig, json_new) 107 | 108 | json_merged = json_orig.copy() # renmaing variable in the end of the loop 109 | 110 | return json_merged 111 | 112 | 113 | def create_contact(self, first_name, last_name, gender_id=3, is_birthdate_known=False, is_deceased=False, is_deceased_date_known=False): 114 | """ 115 | Create the contacts in monica database. Checkout monica API documentation for detailed description. 116 | 117 | """ 118 | 119 | headers = self.headers 120 | basic_api = self.basic_api 121 | 122 | api = f"{basic_api}/contacts/" 123 | 124 | payload_raw = {"first_name": first_name, 125 | "last_name": last_name, 126 | "gender_id": gender_id, 127 | "is_birthdate_known": is_birthdate_known, 128 | "is_deceased": is_deceased, 129 | "is_deceased_date_known": is_deceased_date_known 130 | } 131 | 132 | payload = json.dumps(payload_raw) # necessary for converting Boolean to json form e.g True to true 133 | response = requests.post(api, params=payload, headers=headers) 134 | json_data = response.json() 135 | 136 | 137 | return json_data 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /monica/conversations.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Shahzeb Afroze 3 | 4 | Offical Documentation: https://www.monicahq.com/api/conversations 5 | 6 | Description: We will put all conversation related requests here 7 | 8 | Todo: 9 | - Need to add an efficient try except rule in all functions to make sure string is accepted for example 10 | - Need to add something to read the respone number and tell if there is a problem and if the post request was successful 11 | instead of sending json dict 12 | 13 | """ 14 | 15 | import time 16 | import requests 17 | import json 18 | from monica.utils import Utils 19 | from pprint import pprint 20 | import pandas as pd 21 | 22 | basic_api = 'https://app.monicahq.com/api' 23 | 24 | 25 | class Conversations: 26 | def __init__(self, access_token, wait_time=1): 27 | """ 28 | Connect with monica conversations API found at https://www.monicahq.com/api/conversations 29 | 30 | Parameters: 31 | ----------- 32 | access_token: str 33 | token retreived from monica platform 34 | 35 | wait_time: int 36 | seconds to wait after every request sent 37 | 38 | """ 39 | headers = {'Authorization': f'Bearer {access_token}', 40 | 'Content-type': 'application/json', 41 | 'Accept': 'text/plain'} 42 | 43 | self.headers = headers 44 | self.basic_api = basic_api 45 | self.wait_time = wait_time 46 | self.utils = Utils() 47 | 48 | def list_conversations(self): 49 | """ 50 | Gets the conversations from monica. Checkout monica API documentation for detailed description. 51 | 52 | Parameters: None 53 | ----------- 54 | 55 | 56 | Returns: 57 | ------- 58 | json_data: dict/json 59 | can be easily converted to pandas dataframe 60 | 61 | """ 62 | headers = self.headers 63 | wait_time = self.wait_time 64 | basic_api = self.basic_api 65 | 66 | api = f"{basic_api}/conversations" 67 | 68 | response = requests.get(api, headers=headers) 69 | print(response) 70 | 71 | json_data = response.json() 72 | 73 | return json_data 74 | 75 | 76 | def create_conversation_object(self, happened_at, contact_field_type_id, contact_id): 77 | """ 78 | Creating a conversation only creates the conversation itself. 79 | You will have to add messages one by one to populate it with actual content. 80 | Checkout monica API documentation for detailed description. 81 | 82 | Parameters: 83 | ----------- 84 | happened_at: str 85 | The date the conversation happened. Format: YYYY-MM-DD. 86 | 87 | contact_field_type_id: int 88 | The type of the contact field. Has to be a valid, existing contact field type ID. 89 | 90 | contact_id: int 91 | The ID of the contact that the conversation field is associated with. 92 | 93 | Returns: 94 | ------- 95 | conversation_id: str 96 | The ID of created conversation object 97 | 98 | """ 99 | headers = self.headers 100 | basic_api = self.basic_api 101 | 102 | api = f"{basic_api}/conversations" 103 | 104 | payload = {'happened_at': happened_at, 105 | 'contact_field_type_id': contact_field_type_id, 106 | 'contact_id':contact_id} 107 | 108 | response = requests.post(api, params=payload, headers=headers) 109 | 110 | json_data = response.json() 111 | 112 | conversation_id = json_data['data']['id'] 113 | 114 | return conversation_id 115 | 116 | def add_message(self, written_at, written_by_me, content, contact_id, conversation_id): 117 | """ 118 | Add a message to a conversation object. Checkout monica API documentation for detailed description. 119 | 120 | Parameters: 121 | ----------- 122 | written_at: str 123 | The date the conversation happened. Format: YYYY-MM-DD. 124 | 125 | written_by_me: Bool 126 | True if the user has written the message. False if the contact has written the message. 127 | 128 | content: str 129 | The actual message. 130 | 131 | contact_id: int 132 | The ID of the contact that the conversation is associated with. 133 | 134 | conversation_id: str 135 | The ID is retreived when conversation_id is created. 136 | 137 | Returns: 138 | ------- 139 | json_data: dict/json 140 | can be easily converted to pandas dataframe 141 | 142 | """ 143 | headers = self.headers 144 | basic_api = self.basic_api 145 | contact_id = int(contact_id) 146 | 147 | api = f"{basic_api}/conversations/{conversation_id}/messages" 148 | 149 | 150 | payload_raw = { 151 | "contact_id": contact_id, 152 | "written_at": written_at, 153 | "content": content, 154 | "written_by_me": written_by_me 155 | } 156 | 157 | payload = json.dumps(payload_raw) # necessary for converting Boolean to json form e.g True to true 158 | 159 | response = requests.post(api, json=payload_raw, headers=headers) 160 | print(response) 161 | json_data = response.json() 162 | 163 | return json_data 164 | 165 | 166 | def list_conversations_of_a_contact(self, contact_id): 167 | """ 168 | List all the conversations of a contact 169 | 170 | """ 171 | headers = self.headers 172 | basic_api = self.basic_api 173 | contact_id = int(contact_id) 174 | 175 | api = f"{basic_api}/contacts/{contact_id}/conversations" 176 | 177 | response = requests.get(api, headers=headers) 178 | print(response) 179 | json_data = response.json() 180 | 181 | return json_data 182 | 183 | 184 | def delete_conversation(self, conversation_id): 185 | """ 186 | 187 | 188 | """ 189 | headers = self.headers 190 | basic_api = self.basic_api 191 | 192 | api = f"{basic_api}/conversations/{conversation_id}" 193 | 194 | response = requests.delete(api, headers=headers) 195 | print(response) 196 | 197 | # json_data = response.json() 198 | # return json_data 199 | 200 | 201 | def delete_all_conversations_of_a_contact(self, contact_id): 202 | """ 203 | List all the conversations of a contact 204 | 205 | """ 206 | headers = self.headers 207 | basic_api = self.basic_api 208 | wait_time = self.wait_time 209 | 210 | all_conversations_json = self.list_conversations_of_a_contact(contact_id) 211 | try: 212 | all_conversations_df = pd.DataFrame(all_conversations_json['data']) 213 | except: 214 | print(f"Problem with {contact_id}") 215 | 216 | try: 217 | all_conversations_id = list(all_conversations_df['id']) 218 | for conversation_id in all_conversations_id: 219 | time.sleep(wait_time) 220 | self.delete_conversation(conversation_id) 221 | except: 222 | # no conversation with this contact 223 | pass 224 | # create upload all conversations func 225 | 226 | def add_multiple_messages(self, contact_id, conversation_id, df): 227 | wait_time = self.wait_time 228 | time.sleep(wait_time) 229 | 230 | # iterate over messages 231 | number_of_messages = len(df) 232 | 233 | for i in range(number_of_messages): 234 | written_by_me = int(df['written_by_me'].values[i]) # 1 & 0 instead of True & False, important because numpy.bool is created from pandas 235 | written_at = df['date'].values[i] 236 | content = df['text'].values[i] 237 | self.add_message(written_at, written_by_me, content, contact_id, conversation_id) 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | -------------------------------------------------------------------------------- /monica/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Shahzeb Afroze 3 | 4 | Offical Documentation: https://www.monicahq.com/api/ 5 | 6 | Description: We will put all general functions to help the other classes 7 | 8 | """ 9 | import pandas as pd 10 | 11 | 12 | class Utils: 13 | """ 14 | Helper class for several classes 15 | 16 | """ 17 | 18 | def merge_json_data(self, json_orig, json_new): 19 | """ 20 | Merges json resonses to create 1 complete json file 21 | 22 | Parameters: 23 | ----------- 24 | json_orig: json/dict 25 | The previous json response from monica 26 | 27 | json_new: json/dict 28 | The most recent json response from monica 29 | 30 | Returns: 31 | ------- 32 | json_merged: dict/json 33 | Appending lists within json where ever possible and overwritting the information 34 | with the latest json_response where not possible. This will help in the json response 35 | staying consistent with how the orignal json file looks 36 | 37 | 38 | """ 39 | json_merged = {} 40 | 41 | all_keys = list(json_orig.keys()) 42 | 43 | for key in all_keys: 44 | try: 45 | new_data = json_new[key] 46 | orignal_data = json_orig[key] 47 | json_merged[key] = new_data + orignal_data 48 | except: 49 | # print(f"{key} will be overwritten with the latest dictionary") 50 | json_merged[key] = json_new[key] # this will only be "meta" dict for contacts 51 | 52 | return json_merged 53 | 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /social/__pycache__/gmail_data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/gmail_data.cpython-36.pyc -------------------------------------------------------------------------------- /social/__pycache__/gmail_data_cleaning.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/gmail_data_cleaning.cpython-36.pyc -------------------------------------------------------------------------------- /social/__pycache__/gmail_monica_client.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/gmail_monica_client.cpython-36.pyc -------------------------------------------------------------------------------- /social/__pycache__/gmail_preprocessing.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/gmail_preprocessing.cpython-36.pyc -------------------------------------------------------------------------------- /social/__pycache__/linkedin_monica_client.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/linkedin_monica_client.cpython-36.pyc -------------------------------------------------------------------------------- /social/__pycache__/linkedin_preprocessing.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/linkedin_preprocessing.cpython-36.pyc -------------------------------------------------------------------------------- /social/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /social/gmail_monica_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Shahzeb Afroze 3 | 4 | We will put functions to help us clean the data concerning gmail 5 | 6 | """ 7 | import re 8 | import time 9 | import pandas as pd 10 | from datetime import datetime 11 | from social.gmail_preprocessing import Preprocessing 12 | from social.utils import Utils 13 | from pprint import pprint 14 | 15 | # import sys 16 | # sys.path.append('..') 17 | 18 | from monica.conversations import Conversations 19 | from monica.contact_field_types import Contact_Field_Types 20 | 21 | class Gmail_Monica_Client: 22 | def __init__(self, access_token, wait_time=0.2): 23 | """ 24 | Preparing gmail data for monica conversations 25 | 26 | Parameters: 27 | ----------- 28 | 29 | """ 30 | 31 | self.conversations = Conversations(access_token=access_token, wait_time=wait_time) 32 | self.contact_field_types = Contact_Field_Types(access_token=access_token) 33 | 34 | 35 | def upload_emails_to_monica(self, df_preprocessed): 36 | conversations = self.conversations 37 | contact_field_types = self.contact_field_types 38 | 39 | df_preprocessed = df_preprocessed[df_preprocessed['contact_id']!=""] # remove contacts which are not on monica 40 | df_preprocessed = df_preprocessed[~df_preprocessed['key'].isna()] # remove nan for subjects 41 | 42 | contact_field_type_id = contact_field_types.get_contact_field_type_id("Email") 43 | 44 | unique_keys = list(df_preprocessed['key'].unique()) 45 | # unique_keys = [x for x in unique_keys if str(x) == 'permission432610'] # testing 46 | 47 | for key in unique_keys: 48 | print(key) 49 | print() 50 | subset = df_preprocessed[df_preprocessed['key']==key] 51 | subset.sort_values(by='date_time', inplace=True) 52 | contact_id = subset['contact_id'].values[0] 53 | happened_at = subset['date'].values[0] # started conv 54 | conversation_id = conversations.create_conversation_object(happened_at=happened_at, contact_field_type_id=contact_field_type_id, contact_id=contact_id) 55 | conversations.add_multiple_messages(contact_id=contact_id, conversation_id=conversation_id, df=subset) 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /social/gmail_preprocessing.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Shahzeb Afroze 3 | 4 | We will put functions to help us clean the data concerning gmail 5 | 6 | # we can replace email by most popular email in dataframe!! 7 | 8 | """ 9 | import re 10 | import pandas as pd 11 | from datetime import datetime 12 | from social.utils import Utils 13 | 14 | class Preprocessing: 15 | def __init__(self, gmail_json_file_path, monica_contacts_file_path): 16 | """ 17 | Preparing gmail data for monica conversations 18 | 19 | Parameters: 20 | ----------- 21 | 22 | """ 23 | self.gmail_json_file_path = gmail_json_file_path 24 | self.monica_contacts_file_path = monica_contacts_file_path 25 | self.utils = Utils() 26 | 27 | def combine_monica_contacts_with_gmail_df(self, monica_contacts_df, gmail_df_cleaned, my_email): 28 | utils = self.utils 29 | contact_id_dict = utils.create_contact_id_dict(monica_contacts_df) 30 | gmail_df_cleaned = gmail_df_cleaned.copy() 31 | 32 | gmail_df_cleaned['contact_id_from'] = gmail_df_cleaned['from_dict_name'].apply(lambda x:utils.retreive_contact_id(full_name=x, contact_id_dict=contact_id_dict)) 33 | gmail_df_cleaned['contact_id_to'] = gmail_df_cleaned['to_dict_name'].apply(lambda x:utils.retreive_contact_id(full_name=x, contact_id_dict=contact_id_dict)) 34 | 35 | gmail_df_cleaned.loc[gmail_df_cleaned['from_dict_email']==my_email, 'contact_id_from'] = "" 36 | gmail_df_cleaned.loc[gmail_df_cleaned['to_dict_email']==my_email, 'contact_id_to'] = "" 37 | 38 | return gmail_df_cleaned 39 | 40 | def prepare_dataframe(self): 41 | utils = self.utils 42 | 43 | gmail_json_file_path = self.gmail_json_file_path 44 | monica_contacts_file_path = self.monica_contacts_file_path 45 | 46 | monica_contacts_df = pd.read_csv(monica_contacts_file_path) 47 | gmail_df_raw = pd.read_json(gmail_json_file_path) 48 | 49 | # gmail_df_cleaned = self.apply(gmail_df_raw).copy() # fixing copy error pandas 50 | gmail_df_cleaned = self.apply(gmail_df_raw) 51 | my_email = utils.find_my_info(col1='to_dict_email', col2='from_dict_email', df=gmail_df_cleaned) 52 | # print(my_email) 53 | 54 | merged_df = self.combine_monica_contacts_with_gmail_df(monica_contacts_df, gmail_df_cleaned, my_email) 55 | 56 | merged_df['written_by_me'] = False 57 | merged_df.loc[merged_df['from_dict_email']==my_email, 'written_by_me'] = True 58 | 59 | merged_df['contact_id'] = merged_df.apply(lambda x: max(str(x['contact_id_from']), str(x['contact_id_to'])), axis=1) 60 | 61 | # key identifier 62 | merged_df['key'] = merged_df['subject'] + merged_df['contact_id'] 63 | 64 | 65 | return merged_df 66 | 67 | 68 | def apply(self, df): 69 | 70 | 71 | df['from_dict'] = df['from'].apply(lambda x: self.extract_first_element(x)) 72 | df['from_dict_email'] = df['from_dict'].apply(lambda x: self.extract_email_address(x)) 73 | df['from_dict_name'] = df['from_dict'].apply(lambda x: self.extract_name(x)) 74 | 75 | df['to_dict'] = df['to'].apply(lambda x: self.extract_first_element(x)) 76 | df['to_dict_email'] = df['to_dict'].apply(lambda x: self.extract_email_address(x)) 77 | 78 | df['to_dict_name'] = df['to_dict'].apply(lambda x: self.extract_name(x)) 79 | 80 | df['subject'] = df['subject'].apply(lambda x: self.clean_subject_line(x)) 81 | df['date_time'] = df['date'] 82 | df['date'] = df['date'].apply(lambda x: x.strftime('%Y-%m-%d')) 83 | 84 | # remove addresses with 'no reply' 85 | df = df[~df['from_dict_email'].str.contains('reply', na=False)] 86 | 87 | # remove columns not needed for upload 88 | df.drop(columns=['from', 'to', 'from_dict', 'to_dict', 'receivedDate', 89 | 'html', 'cc', 'headers', 90 | 'priority', 'attachments', 'bcc', 'alternatives', 91 | 'references', 'inReplyTo', 'replyTo'], inplace=True) 92 | 93 | return df 94 | 95 | # credits to @jfs from stackoverflow - https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python 96 | def deEmojify(self, text): 97 | """ 98 | Removes emoji from text - sometimes used in subject lines 99 | 100 | Parameters: 101 | ----------- 102 | text: str 103 | text input could be either subject or content of email 104 | 105 | 106 | Returns: 107 | ------- 108 | clean_text: str 109 | text without emoji 110 | 111 | """ 112 | regrex_pattern = re.compile(pattern = "[" 113 | u"\U0001F600-\U0001F64F" # emoticons 114 | u"\U0001F300-\U0001F5FF" # symbols & pictographs 115 | u"\U0001F680-\U0001F6FF" # transport & map symbols 116 | u"\U0001F1E0-\U0001F1FF" # flags (iOS) 117 | "]+", flags = re.UNICODE) 118 | try: 119 | clean_text = regrex_pattern.sub(r'',text) 120 | except: 121 | clean_text = text 122 | 123 | return clean_text 124 | 125 | def clean_subject_line(self, subject): 126 | try: 127 | subject_lower = subject.lower() 128 | 129 | subject_clean = subject_lower.replace('fwd:','') 130 | subject_clean = subject_clean.replace('re:','') 131 | subject_clean = subject_clean.replace('fw:','') 132 | subject_clean = subject_clean.strip() 133 | 134 | subject_clean_no_emoji = self.deEmojify(subject_clean) 135 | 136 | return subject_clean_no_emoji 137 | 138 | except: 139 | return subject 140 | 141 | 142 | def extract_first_element(self, list_of_emails): 143 | """ 144 | Extracts the first email from the list 145 | 146 | Parameters: 147 | ----------- 148 | list_of_emails: list 149 | list of emails 150 | 151 | 152 | Returns: 153 | ------- 154 | email: str 155 | first email in the list for example if the email has been sent to multiple to several people, 156 | only first email address will be taken into account. 157 | 158 | """ 159 | try: 160 | return list_of_emails[0] 161 | except: 162 | return "" 163 | 164 | def extract_name(self, gmail_dict): 165 | """ 166 | Extracts the first name from the dict 167 | 168 | Parameters: 169 | ----------- 170 | gmail_dict: dict 171 | dict holding the recipient or sender's information 172 | 173 | 174 | Returns: 175 | ------- 176 | name: str 177 | name of the recipient or sender of email 178 | 179 | """ 180 | try: 181 | return gmail_dict['name'].lower() 182 | except: 183 | return "" 184 | 185 | 186 | def extract_email_address(self, gmail_dict): 187 | """ 188 | Extracts the email address from the dict 189 | 190 | Parameters: 191 | ----------- 192 | gmail_dict: dict 193 | dict holding the recipient or sender's information 194 | 195 | 196 | Returns: 197 | ------- 198 | email: str 199 | email of the recipient or sender depending on gmail dict 200 | 201 | """ 202 | try: 203 | return gmail_dict['address'].lower() 204 | except: 205 | return "" 206 | 207 | def extract_date(self, date_time): 208 | """ 209 | Extracts the date from receivedDate key in dict and parse in a format for monica api 210 | 211 | 212 | Parameters: 213 | ----------- 214 | date_time: datetime object 215 | datetime object from the gmail data 216 | 217 | 218 | Returns: 219 | ------- 220 | date: str 221 | Returns date in Format: YYYY-MM-DD. 222 | 223 | """ 224 | 225 | try: 226 | return date_time.strftime('%Y-%m-%d') 227 | except: 228 | return "" 229 | 230 | 231 | 232 | 233 | 234 | -------------------------------------------------------------------------------- /social/linkedin_monica_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Shahzeb Afroze 3 | 4 | We will put functions to help us clean the data concerning linkedin 5 | 6 | """ 7 | import re 8 | import time 9 | import pandas as pd 10 | from datetime import datetime 11 | from social.linkedin_preprocessing import Preprocessing 12 | from social.utils import Utils 13 | from pprint import pprint 14 | 15 | # import sys 16 | # sys.path.append('..') 17 | 18 | from monica.conversations import Conversations 19 | from monica.contact_field_types import Contact_Field_Types 20 | 21 | class Linkedin_Monica_Client: 22 | def __init__(self, access_token, wait_time=0.2): 23 | """ 24 | Preparing linkedin data for monica conversations 25 | 26 | Parameters: 27 | ----------- 28 | 29 | """ 30 | 31 | self.conversations = Conversations(access_token=access_token, wait_time=wait_time) 32 | self.contact_field_types = Contact_Field_Types(access_token=access_token) 33 | 34 | 35 | def upload_conversations_to_monica(self, df_preprocessed): 36 | conversations = self.conversations 37 | contact_field_types = self.contact_field_types 38 | 39 | df_preprocessed = df_preprocessed[df_preprocessed['contact_id']!=""] # remove contacts which are not on monica 40 | df_preprocessed = df_preprocessed[~df_preprocessed['key'].isna()] # remove nan for subjects 41 | 42 | contact_field_type_id = contact_field_types.get_contact_field_type_id("LinkedIn") 43 | 44 | unique_keys = list(df_preprocessed['key'].unique()) 45 | # unique_keys = [x for x in unique_keys if str(x) == 'permission432610'] # testing 46 | # unique_keys = [unique_keys[0]] 47 | # pprint(df_preprocessed) 48 | 49 | for key in unique_keys: 50 | subset = df_preprocessed[df_preprocessed['key']==key] 51 | subset.sort_values(by='date_time', inplace=True) 52 | contact_id = subset['contact_id'].values[0] 53 | happened_at = subset['date'].values[0] # started conv 54 | conversation_id = conversations.create_conversation_object(happened_at=happened_at, contact_field_type_id=contact_field_type_id, contact_id=contact_id) 55 | conversations.add_multiple_messages(contact_id=contact_id, conversation_id=conversation_id, df=subset) 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /social/linkedin_preprocessing.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Shahzeb Afroze 3 | 4 | We will put functions to help us clean the data concerning gmail 5 | 6 | """ 7 | import re 8 | import pandas as pd 9 | from datetime import datetime 10 | from social.utils import Utils 11 | import jellyfish 12 | 13 | class Preprocessing: 14 | def __init__(self, linkedin_messages_file_path, monica_contacts_file_path): 15 | """ 16 | Preparing gmail data for monica conversations 17 | 18 | Parameters: 19 | ----------- 20 | 21 | """ 22 | self.linkedin_messages_file_path = linkedin_messages_file_path 23 | self.monica_contacts_file_path = monica_contacts_file_path 24 | self.utils = Utils() 25 | 26 | 27 | def prepare_dataframe(self): 28 | utils = self.utils 29 | linkedin_messages_file_path = self.linkedin_messages_file_path 30 | monica_contacts_file_path = self.monica_contacts_file_path 31 | 32 | monica_contacts_df = pd.read_csv(monica_contacts_file_path) 33 | monica_contacts_df['complete_name'] = monica_contacts_df['complete_name'].str.lower() 34 | 35 | linkedin_df_raw = pd.read_csv(linkedin_messages_file_path) 36 | 37 | # linkedin_df_cleaned = self.apply(linkedin_df_raw).copy() # fixing copy error pandas 38 | linkedin_df_cleaned = self.apply(linkedin_df_raw) 39 | my_name = utils.find_my_info(col1='FROM', col2='TO', df=linkedin_df_cleaned) 40 | 41 | merged_df = self.combine_monica_contacts_with_linkedin_df(monica_contacts_df, linkedin_df_cleaned, my_name) 42 | 43 | merged_df['written_by_me'] = False 44 | merged_df.loc[merged_df['FROM']==my_name, 'written_by_me'] = True 45 | 46 | merged_df['contact_id'] = merged_df.apply(lambda x: max(str(x['contact_id_from']), str(x['contact_id_to'])), axis=1) 47 | 48 | # key identifier 49 | merged_df['key'] = merged_df['CONVERSATION ID'] 50 | 51 | 52 | return merged_df 53 | 54 | 55 | def apply(self, df): 56 | 57 | df = df[~df['FROM'].isna()] 58 | df = df[~df['TO'].isna()] 59 | 60 | df['FROM'] = df['FROM'].str.lower() 61 | df['TO'] = df['TO'].str.lower() 62 | 63 | df['FROM'] = df['FROM'].str.strip() 64 | df['TO'] = df['TO'].str.strip() 65 | 66 | df["date_time"] = pd.to_datetime(df['DATE']) 67 | 68 | df['date'] = df['date_time'].apply(lambda x: x.strftime('%Y-%m-%d')) 69 | df["text"] = df["CONTENT"].str.replace(" ", " ") 70 | # df["CONTENT"] = df["CONTENT"].apply(lambda x: self.deEmojify(x)) 71 | 72 | 73 | # remove columns not needed for upload 74 | df.drop(columns=['CONVERSATION TITLE', 'FOLDER', 'SUBJECT', 'CONTENT', 'DATE'], inplace=True) 75 | 76 | return df 77 | 78 | def prepare_fuzzy_dataframe(self, monica_contacts_df, linkedin_df_cleaned, my_name): 79 | utils = self.utils 80 | 81 | temp_df = pd.DataFrame() 82 | temp_df['FROM']= linkedin_df_cleaned.drop_duplicates(subset=['FROM'])['FROM'] 83 | temp_df['TO']= linkedin_df_cleaned.drop_duplicates(subset=['TO'])['TO'] 84 | 85 | to_list = list(linkedin_df_cleaned.drop_duplicates(subset=['TO'])['TO'].reset_index(drop=True)) 86 | from_list = list(linkedin_df_cleaned.drop_duplicates(subset=['FROM'])['FROM'].reset_index(drop=True)) 87 | 88 | all_correspondence = from_list + to_list 89 | df = pd.DataFrame(all_correspondence, columns=['linkedin_name']) 90 | df.drop_duplicates(subset='linkedin_name') 91 | 92 | df['linkedin_name'] = df['linkedin_name'].str.lower() 93 | df = df[~df['linkedin_name'].isna()] 94 | 95 | monica_contact_list = list(monica_contacts_df["complete_name"].str.lower()) 96 | 97 | # connect names of monica 98 | df['monica_name'] = df['linkedin_name'].apply(lambda x:utils.fuzzy_contact_name_match(search_name=x, monica_contact_list=monica_contact_list, my_name=my_name, benchmark=0.8)) 99 | 100 | return df 101 | 102 | 103 | def combine_monica_contacts_with_linkedin_df(self, monica_contacts_df, linkedin_df_cleaned, my_name): 104 | utils = self.utils 105 | fuzzy_df = self.prepare_fuzzy_dataframe(monica_contacts_df, linkedin_df_cleaned, my_name) 106 | fuzzy_monica_linkedin_dict = utils.create_fuzzy_monica_linkedin_dict(fuzzy_df) 107 | 108 | # connect names of monica 109 | linkedin_df_cleaned['monica_from_full_name'] = linkedin_df_cleaned['FROM'].apply(lambda x:utils.retreive_fuzzy_monica_name(linkedin_name=x, fuzzy_monica_linkedin_dict=fuzzy_monica_linkedin_dict)) 110 | linkedin_df_cleaned['monica_to_full_name'] = linkedin_df_cleaned['TO'].apply(lambda x:utils.retreive_fuzzy_monica_name(linkedin_name=x, fuzzy_monica_linkedin_dict=fuzzy_monica_linkedin_dict)) 111 | 112 | contact_id_dict = utils.create_contact_id_dict(monica_contacts_df) 113 | linkedin_df_cleaned = linkedin_df_cleaned.copy() 114 | 115 | linkedin_df_cleaned['contact_id_from'] = linkedin_df_cleaned['monica_from_full_name'].apply(lambda x:utils.retreive_contact_id(full_name=x, contact_id_dict=contact_id_dict)) 116 | linkedin_df_cleaned['contact_id_to'] = linkedin_df_cleaned['monica_to_full_name'].apply(lambda x:utils.retreive_contact_id(full_name=x, contact_id_dict=contact_id_dict)) 117 | 118 | linkedin_df_cleaned.loc[linkedin_df_cleaned['monica_from_full_name']==my_name, 'contact_id_from'] = "" 119 | linkedin_df_cleaned.loc[linkedin_df_cleaned['monica_to_full_name']==my_name, 'contact_id_to'] = "" 120 | 121 | return linkedin_df_cleaned 122 | 123 | 124 | # credits to @jfs from stackoverflow - https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python 125 | def deEmojify(self, text): 126 | """ 127 | Removes emoji from text - sometimes used in subject lines 128 | 129 | Parameters: 130 | ----------- 131 | text: str 132 | text input could be either subject or content of email 133 | 134 | 135 | Returns: 136 | ------- 137 | clean_text: str 138 | text without emoji 139 | 140 | """ 141 | regrex_pattern = re.compile(pattern = "[" 142 | u"\U0001F600-\U0001F64F" # emoticons 143 | u"\U0001F300-\U0001F5FF" # symbols & pictographs 144 | u"\U0001F680-\U0001F6FF" # transport & map symbols 145 | u"\U0001F1E0-\U0001F1FF" # flags (iOS) 146 | "]+", flags = re.UNICODE) 147 | try: 148 | clean_text = regrex_pattern.sub(r'',text) 149 | except: 150 | clean_text = text 151 | 152 | return clean_text 153 | 154 | def clean_subject_line(self, subject): 155 | try: 156 | subject_lower = subject.lower() 157 | 158 | subject_clean = subject_lower.replace('fwd:','') 159 | subject_clean = subject_clean.replace('re:','') 160 | subject_clean = subject_clean.replace('fw:','') 161 | subject_clean = subject_clean.strip() 162 | 163 | subject_clean_no_emoji = self.deEmojify(subject_clean) 164 | 165 | return subject_clean_no_emoji 166 | 167 | except: 168 | return subject 169 | 170 | -------------------------------------------------------------------------------- /social/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Shahzeb Afroze 3 | 4 | Offical Documentation: https://www.monicahq.com/api/ 5 | 6 | Description: We will put all general functions to help the other classes 7 | 8 | """ 9 | import pandas as pd 10 | from monica.conversations import Conversations 11 | import jellyfish 12 | 13 | basic_api = 'https://app.monicahq.com/api' 14 | 15 | class Utils: 16 | def __init__(self, wait_time=1): 17 | """ 18 | Helper class for several classes 19 | 20 | Parameters: 21 | ----------- 22 | wait_time: int 23 | seconds to wait after every request sent 24 | 25 | """ 26 | 27 | self.wait_time = wait_time 28 | 29 | 30 | def retreive_contact_id(self, contact_id_dict, full_name): 31 | """ 32 | Merges json resonses to create 1 complete json file 33 | 34 | Parameters: 35 | ----------- 36 | json_orig: json/dict 37 | The previous json response from monica 38 | 39 | json_new: json/dict 40 | The most recent json response from monica 41 | 42 | Returns: 43 | ------- 44 | json_merged: dict/json 45 | Appending lists within json where ever possible and overwritting the information 46 | with the latest json_response where not possible. This will help in the json response 47 | staying consistent with how the orignal json file looks 48 | 49 | 50 | """ 51 | # we can later apply fuzzy search if later needed 52 | 53 | 54 | try: 55 | contact_id = contact_id_dict[full_name] 56 | except: 57 | contact_id = '' 58 | 59 | 60 | return contact_id 61 | 62 | def create_contact_id_dict(self, monica_contacts_df): 63 | monica_contacts_df.sort_values(by="complete_name", inplace=True) 64 | monica_contacts_df["complete_name"] = monica_contacts_df["complete_name"].str.lower() 65 | monica_contacts_df.drop_duplicates(subset=["complete_name"], inplace=True) 66 | 67 | contact_id_dict = pd.Series(monica_contacts_df.id.values,index=monica_contacts_df.complete_name).to_dict() 68 | 69 | return contact_id_dict 70 | 71 | def create_fuzzy_monica_linkedin_dict(self, fuzzy_df): 72 | 73 | fuzzy_monica_linkedin_dict = pd.Series(fuzzy_df.linkedin_name.values,index=fuzzy_df.monica_name).to_dict() 74 | 75 | return fuzzy_monica_linkedin_dict 76 | 77 | def retreive_fuzzy_monica_name(self, fuzzy_monica_linkedin_dict, linkedin_name): 78 | try: 79 | monica_name = fuzzy_monica_linkedin_dict[linkedin_name] 80 | except: 81 | monica_name = '' 82 | 83 | 84 | return monica_name 85 | 86 | 87 | def most_frequent(self, List): 88 | return max(set(List), key = List.count) 89 | 90 | # to be used for converting dataframe into columns and applying it to most frequent 91 | def find_my_info(self, col1, col2, df): 92 | list_1=list(df[col1].values) 93 | list_2=list(df[col2].values) 94 | 95 | all_info=list_1 + list_2 96 | my_info = self.most_frequent(all_info) 97 | 98 | return my_info 99 | 100 | 101 | def fuzzy_contact_name_match(self, search_name, monica_contact_list, my_name, benchmark=0.85): 102 | all_score = [] 103 | if search_name!=my_name: 104 | for monica_contact in monica_contact_list: 105 | score = jellyfish.jaro_winkler_similarity(search_name, monica_contact) 106 | all_score.append(score) 107 | 108 | name_matched = self.find_max_score_name(monica_contact_list, all_score, benchmark) 109 | return name_matched 110 | 111 | 112 | def find_max_score_name(self, monica_contact_list, all_score, benchmark): 113 | max_score = max(all_score) 114 | if max_score>=benchmark: 115 | index_of_max_score = all_score.index(max_score) 116 | name_with_highest_similarity = monica_contact_list[index_of_max_score] 117 | return name_with_highest_similarity 118 | 119 | 120 | --------------------------------------------------------------------------------