├── .gitignore
├── LICENSE
├── README.md
├── Testing.ipynb
├── examples
    ├── gmail.md
    └── linkedin.md
├── monica
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-36.pyc
    │   ├── contact_field_types.cpython-36.pyc
    │   ├── contacts.cpython-36.pyc
    │   ├── conversations.cpython-36.pyc
    │   ├── monica.cpython-36.pyc
    │   └── utils.cpython-36.pyc
    ├── contact_field_types.py
    ├── contacts.py
    ├── conversations.py
    └── utils.py
└── social
    ├── __pycache__
        ├── gmail_data.cpython-36.pyc
        ├── gmail_data_cleaning.cpython-36.pyc
        ├── gmail_monica_client.cpython-36.pyc
        ├── gmail_preprocessing.cpython-36.pyc
        ├── linkedin_monica_client.cpython-36.pyc
        ├── linkedin_preprocessing.cpython-36.pyc
        └── utils.cpython-36.pyc
    ├── gmail_monica_client.py
    ├── gmail_preprocessing.py
    ├── linkedin_monica_client.py
    ├── linkedin_preprocessing.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # General
 2 | .DS_Store
 3 | .AppleDouble
 4 | .LSOverride
 5 | 
 6 | # Icon must end with two \r
 7 | Icon
 8 | 
 9 | # Thumbnails
10 | ._*
11 | 
12 | # temporary
13 | *.ipynb
14 | 
15 | # permonant to ensure personal messages does not get synced
16 | personal/
17 | 
18 | 
19 | # Files that might appear in the root of a volume
20 | .DocumentRevisions-V100
21 | .fseventsd
22 | .Spotlight-V100
23 | .TemporaryItems
24 | .Trashes
25 | .VolumeIcon.icns
26 | .com.apple.timemachine.donotpresent
27 | 
28 | # Directories potentially created on remote AFP share
29 | .AppleDB
30 | .AppleDesktop
31 | Network Trash Folder
32 | Temporary Items
33 | .apdisk


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Shahzeb Afroze
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # monica-python
  2 | 
  3 | This Repo is work in progress. Future plan is to integrate data from other social media and emails.
  4 | 
  5 | This is inspired from the this [repo](https://pypi.org/project/monica-client/).
  6 | 
  7 | Contribution needed for:
  8 | 1. Other APIs to be converted to Python structure
  9 | 2. Social Media Integrations
 10 | 
 11 | 
 12 | ## Contacts API
 13 | 
 14 | 1. Initating the conversations API
 15 | ```python
 16 | from monica.contacts import Contacts
 17 | import pandas as pd
 18 | 
 19 | contacts = Contacts(access_token)
 20 | ```
 21 | 
 22 | 2. Lists a few contacts from the database with criteria
 23 | 
 24 | ```python
 25 | 
 26 | limit=10 # default
 27 | page=1 # default
 28 | sort="updated_at" # default
 29 | json_data = contacts.list_contacts(self, limit=limit, page=page, sort=sort) # returns json data
 30 | 
 31 | pd.DataFrame(json_data['data'])
 32 | 
 33 | ``` 
 34 | 
 35 | 3. List all contacts (Save it as csv file for other features)
 36 | 
 37 | ```python
 38 | 
 39 | sort = "updated_at" # default
 40 | json_data = conversations.list_all_your_contacts(sort=sort)
 41 | 
 42 | df = pd.DataFrame(json_data['data'])
 43 | df.to_csv('contacts_from_monicahq.csv')
 44 | 
 45 | ```
 46 | 
 47 | 
 48 | ## Conversations API
 49 | 
 50 | 1. Initating the conversations API
 51 | ```python
 52 | from monica.conversations import Conversations
 53 | import pandas as pd
 54 | 
 55 | conversations = Conversations(access_token)
 56 | 
 57 | ```
 58 | 
 59 | 2. Lists a few conversations from the database - Official API does not return all the conversations
 60 | 
 61 | ```python
 62 | 
 63 | conversations.list_conversations()
 64 | 
 65 | ```
 66 | 
 67 | 3. Create conversation object
 68 | 
 69 | ```python
 70 | 
 71 | conversation_id = conversations.create_conversation_object(happened_at, contact_field_type_id, contact_id)
 72 | 
 73 | ```
 74 | 
 75 | 4. Add message to a conversation object
 76 | ```python
 77 | 
 78 | conversations.add_message(written_at, written_by_me, content, contact_id, conversation_id)
 79 | 
 80 | ```
 81 | 
 82 | 5. Lists all conversations of a contact
 83 | ```python
 84 | 
 85 | 
 86 | json_response = conversations.list_conversations_of_a_contact(contact_id) # returns the whole json response from API.
 87 | 
 88 | 
 89 | pd.DataFrame(json_response['data']) # to see it properly in a dataframe format
 90 | 
 91 | ```
 92 | 
 93 | 6. Delete conversation
 94 | ```python
 95 | 
 96 | conversations.delete_conversation(conversation_id)
 97 | 
 98 | ```
 99 | 
100 | 7. Delete all conversations of a contact
101 | ```python
102 | 
103 | conversations.delete_all_conversations_of_a_contact(contact_id)
104 | 
105 | ```
106 | 
107 | 8. Add multiple messages to contact
108 | 
109 | ```python
110 | 
111 | conversations.add_multiple_messages(contact_id, conversation_id, df) 
112 | # df is pandas dataframe in a specific structure expected, will add later
113 | 
114 | ```
115 | 
116 | ## Contact Field Types API
117 | 
118 | 1. Initating the conversations API
119 | ```python
120 | from monica.contact_field_types import Contact_Field_Types
121 | import pandas as pd
122 | 
123 | contact_field_types = Contact_Field_Types(access_token)
124 | 
125 | ```
126 | 
127 | 2. Lists all field types on your monica 
128 | 
129 | ```python
130 | 
131 | json_data = contact_field_types.list_all()
132 | df = pd.DataFrame[json_data['data']]
133 | df.head()
134 | 
135 | ```
136 | 
137 | 
138 | 3. Retreive ID of field type
139 | 
140 | ```python
141 | 
142 | field_type_id = contact_field_types.get_contact_field_type_id(object_name)
143 | 
144 | ```
145 | 
146 | 
147 | ## Gmail Monica Client API
148 | Check out how to upload gmail data [here](examples/gmail.md)
149 | 
150 | ## Linkedin Monica Client API
151 | Check out how to upload linkedin data [here](examples/linkedin.md)
152 | 
153 | 
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/Testing.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "\"\"\"\n",
10 |     "Quick understanding of how the API would work\n",
11 |     "Can be used to make quick examples for documentation\n",
12 |     "\n",
13 |     "\"\"\""
14 |    ]
15 |   },
16 |   {
17 |    "cell_type": "code",
18 |    "execution_count": null,
19 |    "metadata": {},
20 |    "outputs": [],
21 |    "source": [
22 |     "from monica.contacts import Contacts"
23 |    ]
24 |   },
25 |   {
26 |    "cell_type": "code",
27 |    "execution_count": null,
28 |    "metadata": {},
29 |    "outputs": [],
30 |    "source": [
31 |     "access_token = \"\"\n",
32 |     "a = Contacts(access_token)\n"
33 |    ]
34 |   },
35 |   {
36 |    "cell_type": "code",
37 |    "execution_count": null,
38 |    "metadata": {},
39 |    "outputs": [],
40 |    "source": [
41 |     "a.list_all_your_contacts()\n"
42 |    ]
43 |   },
44 |   {
45 |    "cell_type": "code",
46 |    "execution_count": null,
47 |    "metadata": {},
48 |    "outputs": [],
49 |    "source": [
50 |     "# swqswq"
51 |    ]
52 |   }
53 |  ],
54 |  "metadata": {
55 |   "kernelspec": {
56 |    "display_name": "Python 3",
57 |    "language": "python",
58 |    "name": "python3"
59 |   },
60 |   "language_info": {
61 |    "codemirror_mode": {
62 |     "name": "ipython",
63 |     "version": 3
64 |    },
65 |    "file_extension": ".py",
66 |    "mimetype": "text/x-python",
67 |    "name": "python",
68 |    "nbconvert_exporter": "python",
69 |    "pygments_lexer": "ipython3",
70 |    "version": "3.6.5"
71 |   }
72 |  },
73 |  "nbformat": 4,
74 |  "nbformat_minor": 2
75 | }
76 | 


--------------------------------------------------------------------------------
/examples/gmail.md:
--------------------------------------------------------------------------------
 1 | ## Syncing emails from Gmail
 2 | 
 3 | ```python
 4 | from social.gmail_preprocessing import Preprocessing
 5 | from social.gmail_monica_client import Gmail_Monica_Client
 6 | 
 7 | gmail_json_file_path=""
 8 | monica_contacts_file_path=""
 9 | 
10 | gmail_preprocessing = Preprocessing(gmail_json_file_path, monica_contacts_file_path)
11 | df_preprocessed = gmail_preprocessing.prepare_dataframe()
12 | 
13 | 
14 | gmail_monica_client = Gmail_Monica_Client(access_token=access_token)
15 | gmail_monica_client.upload_emails_to_monica(df_preprocessed)
16 | 
17 | 
18 | ```


--------------------------------------------------------------------------------
/examples/linkedin.md:
--------------------------------------------------------------------------------
 1 | ## Syncing conversations from Linkedin
 2 | 
 3 | ```python
 4 | from social.linkedin_preprocessing import Preprocessing
 5 | from social.linkedin_monica_client import Linkedin_Monica_Client
 6 | 
 7 | linkedin_messages_file_path=""
 8 | monica_contacts_file_path=""
 9 | 
10 | linkedin_preprocessing = Preprocessing(linkedin_messages_file_path, monica_contacts_file_path)
11 | df_preprocessed = preprocessing.prepare_dataframe()
12 | 
13 | 
14 | linkedin_monica_client = Linkedin_Monica_Client(access_token=access_token)
15 | linkedin_monica_client.upload_conversations_to_monica(df_preprocessed)
16 | 
17 | ```


--------------------------------------------------------------------------------
/monica/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @author: Shahzeb Afroze
3 | This will be used for making it easier to manage imports
4 | 
5 | """
6 | 


--------------------------------------------------------------------------------
/monica/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/monica/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/monica/__pycache__/contact_field_types.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/monica/__pycache__/contact_field_types.cpython-36.pyc


--------------------------------------------------------------------------------
/monica/__pycache__/contacts.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/monica/__pycache__/contacts.cpython-36.pyc


--------------------------------------------------------------------------------
/monica/__pycache__/conversations.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/monica/__pycache__/conversations.cpython-36.pyc


--------------------------------------------------------------------------------
/monica/__pycache__/monica.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/monica/__pycache__/monica.cpython-36.pyc


--------------------------------------------------------------------------------
/monica/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/monica/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/monica/contact_field_types.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @author: Shahzeb Afroze
 3 | 
 4 | Offical Documentation: https://www.monicahq.com/api/conversations
 5 | 
 6 | Description: We will put all conversation related requests here
 7 | 
 8 | Todo: 
 9 |  - Need to add an efficient try except rule in all functions to make sure string is accepted for example
10 |  - Need to add something to read the respone number and tell if there is a problem and if the post request was successful
11 |  	instead of sending json dict
12 | 
13 | """
14 | 
15 | import time
16 | import pandas as pd
17 | import requests
18 | import json
19 | from monica.utils import Utils
20 | 
21 | basic_api = 'https://app.monicahq.com/api'
22 | 
23 | 
24 | class Contact_Field_Types:
25 | 	def __init__(self, access_token, wait_time=1):
26 | 		"""
27 | 		Connect with monica Contact_Field_Types API found at https://www.monicahq.com/api/conversations
28 | 
29 | 		Parameters: 
30 | 		-----------
31 | 			access_token: str
32 | 				token retreived from monica platform
33 | 
34 | 			wait_time: int
35 | 				seconds to wait after every request sent
36 | 
37 | 		"""
38 | 		headers = {'Authorization': f'Bearer {access_token}', 
39 | 					'Content-type': 'application/json', 
40 | 					'Accept': 'text/plain'}
41 | 		
42 | 		self.headers = headers	
43 | 		self.basic_api = basic_api
44 | 		self.wait_time = wait_time
45 | 		self.utils = Utils()
46 | 
47 | 	def list_all(self):
48 | 		"""
49 | 		Checkout monica API documentation for detailed description.
50 | 
51 | 		Parameters: None
52 | 		-----------
53 | 
54 | 		
55 | 		Returns: 
56 | 		-------
57 | 		json_data: dict/json
58 | 			can be easily converted to pandas dataframe	
59 | 
60 | 		"""
61 | 		headers = self.headers
62 | 		wait_time = self.wait_time
63 | 		basic_api = self.basic_api		
64 | 
65 | 		api = f"{basic_api}/contactfieldtypes"
66 | 
67 | 		response = requests.get(api, headers=headers)    
68 | 
69 | 		json_data = response.json()
70 | 
71 | 		return json_data
72 | 
73 | 	def get_contact_field_type_id(self, object_name):
74 | 		json_data = self.list_all()
75 | 		df = pd.DataFrame(json_data['data'])
76 | 		contact_field_type_id = df[df['name']==object_name]['id'].values[0]
77 | 
78 | 		return contact_field_type_id
79 | 
80 | 
81 | 
82 | 
83 | 	


--------------------------------------------------------------------------------
/monica/contacts.py:
--------------------------------------------------------------------------------
  1 | """
  2 | @author: Shahzeb Afroze
  3 | 
  4 | Offical Documentation: https://www.monicahq.com/api/contacts
  5 | 
  6 | Description: We will put all contacts related requests here
  7 | 
  8 | Todo: 
  9 |  - Will need to add an efficient try except rule in all functions to make sure string is accepted for example
 10 |  - Understand query parameter of get contacts
 11 | 
 12 | """
 13 | 
 14 | import time
 15 | import requests
 16 | from monica.utils import Utils
 17 | import json
 18 | 
 19 | basic_api = 'https://app.monicahq.com/api'
 20 | 
 21 | 
 22 | class Contacts:
 23 | 	def __init__(self, access_token, wait_time=1):
 24 | 		"""
 25 | 		Connect with monica contacts API found at https://www.monicahq.com/api/contacts
 26 | 
 27 | 		Parameters: 
 28 | 		-----------
 29 | 		access_token: str
 30 | 		token retreived from monica platform
 31 | 
 32 | 		wait_time: int
 33 | 		time to wait after every request sent
 34 | 
 35 | 		"""
 36 | 		headers = {'Authorization': f'Bearer {access_token}', 'Content-type': 'application/json', 'Accept': 'text/plain'}
 37 | 
 38 | 		self.headers = headers	
 39 | 		self.basic_api = basic_api
 40 | 		self.wait_time = wait_time
 41 | 		self.utils = Utils()
 42 | 
 43 | 	def list_contacts(self, limit=10, page=1, sort="updated_at"):
 44 | 		"""
 45 | 		Gets the contacts from monica database with page and limit criteria. Checkout monica API documentation for detailed description.
 46 | 
 47 | 		Parameters: 
 48 | 		-----------
 49 | 		limit: int
 50 | 			Indicates the page size
 51 | 
 52 | 		page: int
 53 | 			Indicates the page to return
 54 | 
 55 | 		sort: str
 56 | 			Indicates how the query should be ordered by. 
 57 | 			Possible values: created_at, -created_at, updated_at, -updated_at           
 58 | 		
 59 | 		Returns: 
 60 | 		-------
 61 | 		json_data: dict/json
 62 | 			can be easily converted to pandas dataframe	
 63 | 
 64 | 		"""
 65 | 		headers = self.headers
 66 | 		basic_api = self.basic_api		
 67 | 
 68 | 		api = f"{basic_api}/contacts"
 69 | 
 70 | 		payload = {'limit': limit, 
 71 | 					'page': page,
 72 | 					'sort':sort}
 73 | 
 74 | 
 75 | 		response = requests.get(api,  params=payload, headers=headers)    
 76 | 		json_data = response.json()
 77 | 
 78 | 
 79 | 		return json_data
 80 | 
 81 | 
 82 | 	def list_all_your_contacts(self, sort="updated_at"):
 83 | 		"""
 84 | 		Gets ALL the contacts from monica database. Checkout monica API documentation for detailed description.
 85 | 
 86 | 		Parameters: 
 87 | 		-----------
 88 | 		sort: str
 89 | 			Indicates how the query should be ordered by. 
 90 | 			Possible values: created_at, -created_at, updated_at, -updated_at           
 91 | 		
 92 | 		Returns: 
 93 | 		-------
 94 | 		json_data: dict/json
 95 | 			can be easily converted to pandas dataframe	
 96 | 
 97 | 		"""
 98 | 		utils = self.utils
 99 | 		wait_time = self.wait_time
100 | 
101 | 		json_orig = self.list_contacts(limit=100, page=1, sort=sort) # pull 1st page
102 | 		max_page = json_orig['meta']['last_page']+1
103 | 		for page_number in range(2, max_page):
104 | 			time.sleep(wait_time)
105 | 			json_new = self.list_contacts(limit=100, page=page_number, sort=sort)
106 | 			json_orig = utils.merge_json_data(json_orig, json_new)
107 | 
108 | 		json_merged = json_orig.copy() # renmaing variable in the end of the loop
109 | 
110 | 		return json_merged
111 | 
112 | 
113 | 	def create_contact(self, first_name, last_name, gender_id=3, is_birthdate_known=False, is_deceased=False, is_deceased_date_known=False):
114 | 		"""
115 | 		Create the contacts in monica database. Checkout monica API documentation for detailed description.
116 | 		
117 | 		"""
118 | 
119 | 		headers = self.headers
120 | 		basic_api = self.basic_api		
121 | 
122 | 		api = f"{basic_api}/contacts/"
123 | 
124 | 		payload_raw = {"first_name": first_name,
125 | 						"last_name": last_name,
126 | 						"gender_id": gender_id,
127 | 						"is_birthdate_known": is_birthdate_known,
128 | 						"is_deceased": is_deceased,
129 | 						"is_deceased_date_known": is_deceased_date_known
130 | 						}
131 | 
132 | 		payload = json.dumps(payload_raw) # necessary for converting Boolean to json form e.g True to true
133 | 		response = requests.post(api,  params=payload, headers=headers)    
134 | 		json_data = response.json()
135 | 
136 | 
137 | 		return json_data
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 


--------------------------------------------------------------------------------
/monica/conversations.py:
--------------------------------------------------------------------------------
  1 | """
  2 | @author: Shahzeb Afroze
  3 | 
  4 | Offical Documentation: https://www.monicahq.com/api/conversations
  5 | 
  6 | Description: We will put all conversation related requests here
  7 | 
  8 | Todo: 
  9 |  - Need to add an efficient try except rule in all functions to make sure string is accepted for example
 10 |  - Need to add something to read the respone number and tell if there is a problem and if the post request was successful
 11 |  	instead of sending json dict
 12 | 
 13 | """
 14 | 
 15 | import time
 16 | import requests
 17 | import json
 18 | from monica.utils import Utils
 19 | from pprint import pprint
 20 | import pandas as pd
 21 | 
 22 | basic_api = 'https://app.monicahq.com/api'
 23 | 
 24 | 
 25 | class Conversations:
 26 | 	def __init__(self, access_token, wait_time=1):
 27 | 		"""
 28 | 		Connect with monica conversations API found at https://www.monicahq.com/api/conversations
 29 | 
 30 | 		Parameters: 
 31 | 		-----------
 32 | 			access_token: str
 33 | 				token retreived from monica platform
 34 | 
 35 | 			wait_time: int
 36 | 				seconds to wait after every request sent
 37 | 
 38 | 		"""
 39 | 		headers = {'Authorization': f'Bearer {access_token}', 
 40 | 					'Content-type': 'application/json', 
 41 | 					'Accept': 'text/plain'}
 42 | 		
 43 | 		self.headers = headers	
 44 | 		self.basic_api = basic_api
 45 | 		self.wait_time = wait_time
 46 | 		self.utils = Utils()
 47 | 
 48 | 	def list_conversations(self):
 49 | 		"""
 50 | 		Gets the conversations from monica. Checkout monica API documentation for detailed description.
 51 | 
 52 | 		Parameters: None
 53 | 		-----------
 54 | 
 55 | 		
 56 | 		Returns: 
 57 | 		-------
 58 | 		json_data: dict/json
 59 | 			can be easily converted to pandas dataframe	
 60 | 
 61 | 		"""
 62 | 		headers = self.headers
 63 | 		wait_time = self.wait_time
 64 | 		basic_api = self.basic_api		
 65 | 
 66 | 		api = f"{basic_api}/conversations"
 67 | 
 68 | 		response = requests.get(api, headers=headers)    
 69 | 		print(response)
 70 | 
 71 | 		json_data = response.json()
 72 | 
 73 | 		return json_data
 74 | 
 75 | 
 76 | 	def create_conversation_object(self, happened_at, contact_field_type_id, contact_id):
 77 | 		"""
 78 | 		Creating a conversation only creates the conversation itself. 
 79 | 		You will have to add messages one by one to populate it with actual content. 
 80 | 		Checkout monica API documentation for detailed description.
 81 | 
 82 | 		Parameters: 
 83 | 		-----------
 84 | 		happened_at: str
 85 | 			The date the conversation happened. Format: YYYY-MM-DD.
 86 | 
 87 | 		contact_field_type_id: int
 88 | 			The type of the contact field. Has to be a valid, existing contact field type ID. 			
 89 | 
 90 | 		contact_id: int
 91 | 			The ID of the contact that the conversation field is associated with.
 92 | 		
 93 | 		Returns: 
 94 | 		-------
 95 | 		conversation_id: str
 96 | 			The ID of created conversation object
 97 | 
 98 | 		"""
 99 | 		headers = self.headers
100 | 		basic_api = self.basic_api		
101 | 
102 | 		api = f"{basic_api}/conversations"
103 | 
104 | 		payload = {'happened_at': happened_at, 
105 | 					'contact_field_type_id': contact_field_type_id,
106 | 					'contact_id':contact_id}
107 | 
108 | 		response = requests.post(api,  params=payload, headers=headers)    
109 | 
110 | 		json_data = response.json()
111 | 
112 | 		conversation_id =  json_data['data']['id']
113 | 
114 | 		return conversation_id
115 | 
116 | 	def add_message(self, written_at, written_by_me, content, contact_id, conversation_id):
117 | 		"""
118 | 		Add a message to a conversation object. Checkout monica API documentation for detailed description.
119 | 
120 | 		Parameters: 
121 | 		-----------
122 | 		written_at: str
123 | 			The date the conversation happened. Format: YYYY-MM-DD.
124 | 
125 | 		written_by_me: Bool
126 | 			True if the user has written the message. False if the contact has written the message.
127 | 
128 | 		content: str
129 | 			The actual message.
130 | 		
131 | 		contact_id: int
132 | 			The ID of the contact that the conversation is associated with.
133 | 
134 | 		conversation_id: str
135 | 			The ID is retreived when conversation_id is created.
136 | 
137 | 		Returns: 
138 | 		-------
139 | 		json_data: dict/json
140 | 			can be easily converted to pandas dataframe	
141 | 
142 | 		"""
143 | 		headers = self.headers
144 | 		basic_api = self.basic_api		
145 | 		contact_id = int(contact_id)
146 | 
147 | 		api = f"{basic_api}/conversations/{conversation_id}/messages"
148 | 
149 | 
150 | 		payload_raw = {
151 | 					"contact_id": contact_id,
152 | 					"written_at": written_at,          
153 | 					"content": content,
154 | 					"written_by_me": written_by_me
155 | 					}
156 | 
157 | 		payload = json.dumps(payload_raw) # necessary for converting Boolean to json form e.g True to true
158 | 
159 | 		response = requests.post(api,  json=payload_raw, headers=headers) 
160 | 		print(response)
161 | 		json_data = response.json()
162 | 
163 | 		return json_data
164 | 
165 | 
166 | 	def list_conversations_of_a_contact(self, contact_id):
167 | 		"""
168 | 		List all the conversations of a contact
169 | 
170 | 		"""
171 | 		headers = self.headers
172 | 		basic_api = self.basic_api		
173 | 		contact_id = int(contact_id)
174 | 
175 | 		api = f"{basic_api}/contacts/{contact_id}/conversations"			
176 | 
177 | 		response = requests.get(api, headers=headers) 
178 | 		print(response)
179 | 		json_data = response.json()
180 | 
181 | 		return json_data
182 | 
183 | 	
184 | 	def delete_conversation(self, conversation_id):
185 | 		"""
186 | 		
187 | 
188 | 		"""
189 | 		headers = self.headers
190 | 		basic_api = self.basic_api		
191 | 
192 | 		api = f"{basic_api}/conversations/{conversation_id}"
193 | 
194 | 		response = requests.delete(api, headers=headers) 
195 | 		print(response)
196 | 
197 | 		# json_data = response.json()
198 | 		# return json_data
199 | 
200 | 
201 | 	def delete_all_conversations_of_a_contact(self, contact_id):
202 | 		"""
203 | 		List all the conversations of a contact
204 | 
205 | 		"""
206 | 		headers = self.headers
207 | 		basic_api = self.basic_api	
208 | 		wait_time = self.wait_time
209 | 			
210 | 		all_conversations_json = self.list_conversations_of_a_contact(contact_id)
211 | 		try:
212 | 			all_conversations_df = pd.DataFrame(all_conversations_json['data'])		
213 | 		except:
214 | 			print(f"Problem with {contact_id}")
215 | 
216 | 		try:
217 | 			all_conversations_id = list(all_conversations_df['id'])
218 | 			for conversation_id in all_conversations_id:
219 | 				time.sleep(wait_time)
220 | 				self.delete_conversation(conversation_id)
221 | 		except:			
222 | 			# no conversation with this contact
223 | 			pass
224 | 	# create upload all conversations func
225 | 
226 | 	def add_multiple_messages(self, contact_id, conversation_id, df):
227 | 		wait_time = self.wait_time
228 | 		time.sleep(wait_time)
229 | 
230 | 		# iterate over messages
231 | 		number_of_messages = len(df)
232 | 
233 | 		for i in range(number_of_messages):
234 | 			written_by_me = int(df['written_by_me'].values[i]) # 1 & 0 instead of True & False, important because numpy.bool is created from pandas
235 | 			written_at = df['date'].values[i]
236 | 			content = df['text'].values[i]
237 | 			self.add_message(written_at, written_by_me, content, contact_id, conversation_id)
238 | 
239 | 
240 | 
241 | 
242 | 
243 | 
244 | 
245 | 
246 | 
247 | 
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 
254 | 
255 | 
256 | 
257 | 
258 | 
259 | 
260 | 
261 | 
262 | 
263 | 
264 | 
265 | 
266 | 


--------------------------------------------------------------------------------
/monica/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @author: Shahzeb Afroze
 3 | 
 4 | Offical Documentation: https://www.monicahq.com/api/
 5 | 
 6 | Description: We will put all general functions to help the other classes
 7 | 
 8 | """
 9 | import pandas as pd
10 | 
11 | 
12 | class Utils:	
13 | 	"""
14 | 	Helper class for several classes
15 | 
16 | 	"""
17 | 
18 | 	def merge_json_data(self, json_orig, json_new):
19 | 		"""
20 | 		Merges json resonses to create 1 complete json file
21 | 
22 | 		Parameters: 
23 | 		-----------
24 | 		json_orig: json/dict
25 | 			The previous json response from monica
26 | 
27 | 		json_new: json/dict
28 | 			The most recent json response from monica
29 | 		    		
30 | 		Returns: 
31 | 		-------
32 | 		json_merged: dict/json
33 | 			Appending lists within json where ever possible and overwritting the information 
34 | 			with the latest json_response where not possible. This will help in the json response
35 | 			staying consistent with how the orignal json file looks
36 | 
37 | 
38 | 		"""
39 | 		json_merged = {}
40 | 
41 | 		all_keys = list(json_orig.keys())
42 | 
43 | 		for key in all_keys:			
44 | 			try:
45 | 				new_data = json_new[key]
46 | 				orignal_data = json_orig[key]
47 | 				json_merged[key] = new_data + orignal_data
48 | 			except:
49 | 				# print(f"{key} will be overwritten with the latest dictionary")
50 | 				json_merged[key] = json_new[key] # this will only be "meta" dict for contacts
51 | 
52 | 		return json_merged
53 | 
54 | 
55 | 		
56 | 
57 | 
58 | 
59 | 	


--------------------------------------------------------------------------------
/social/__pycache__/gmail_data.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/gmail_data.cpython-36.pyc


--------------------------------------------------------------------------------
/social/__pycache__/gmail_data_cleaning.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/gmail_data_cleaning.cpython-36.pyc


--------------------------------------------------------------------------------
/social/__pycache__/gmail_monica_client.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/gmail_monica_client.cpython-36.pyc


--------------------------------------------------------------------------------
/social/__pycache__/gmail_preprocessing.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/gmail_preprocessing.cpython-36.pyc


--------------------------------------------------------------------------------
/social/__pycache__/linkedin_monica_client.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/linkedin_monica_client.cpython-36.pyc


--------------------------------------------------------------------------------
/social/__pycache__/linkedin_preprocessing.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/linkedin_preprocessing.cpython-36.pyc


--------------------------------------------------------------------------------
/social/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s3afroze/monica-python/5806718f6c90e1776b680d0bf8187040b0a528d4/social/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/social/gmail_monica_client.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @author: Shahzeb Afroze
 3 | 
 4 | We will put functions to help us clean the data concerning gmail
 5 | 
 6 | """
 7 | import re
 8 | import time
 9 | import pandas as pd
10 | from datetime import datetime
11 | from social.gmail_preprocessing import Preprocessing
12 | from social.utils import Utils
13 | from pprint import pprint 
14 | 
15 | # import sys
16 | # sys.path.append('..')
17 | 
18 | from monica.conversations import Conversations
19 | from monica.contact_field_types import Contact_Field_Types
20 | 
21 | class Gmail_Monica_Client:
22 | 	def __init__(self, access_token, wait_time=0.2):
23 | 		"""
24 | 		Preparing gmail data for monica conversations
25 | 
26 | 		Parameters: 
27 | 		-----------		
28 | 
29 | 		"""
30 | 
31 | 		self.conversations = Conversations(access_token=access_token, wait_time=wait_time)
32 | 		self.contact_field_types = Contact_Field_Types(access_token=access_token)
33 | 		
34 | 
35 | 	def upload_emails_to_monica(self, df_preprocessed):
36 | 		conversations = self.conversations
37 | 		contact_field_types = self.contact_field_types
38 | 		
39 | 		df_preprocessed = df_preprocessed[df_preprocessed['contact_id']!=""] # remove contacts which are not on monica
40 | 		df_preprocessed = df_preprocessed[~df_preprocessed['key'].isna()] # remove nan for subjects
41 | 
42 | 		contact_field_type_id = contact_field_types.get_contact_field_type_id("Email")
43 | 		
44 | 		unique_keys = list(df_preprocessed['key'].unique())
45 | 		# unique_keys = [x for x in unique_keys if str(x) == 'permission432610'] # testing
46 | 
47 | 		for key in unique_keys:
48 | 			print(key)
49 | 			print()
50 | 			subset = df_preprocessed[df_preprocessed['key']==key]
51 | 			subset.sort_values(by='date_time', inplace=True)
52 | 			contact_id = subset['contact_id'].values[0]
53 | 			happened_at = subset['date'].values[0] # started conv		    
54 | 			conversation_id = conversations.create_conversation_object(happened_at=happened_at, contact_field_type_id=contact_field_type_id, contact_id=contact_id)
55 | 			conversations.add_multiple_messages(contact_id=contact_id, conversation_id=conversation_id, df=subset)
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/social/gmail_preprocessing.py:
--------------------------------------------------------------------------------
  1 | """
  2 | @author: Shahzeb Afroze
  3 | 
  4 | We will put functions to help us clean the data concerning gmail
  5 | 
  6 | # we can replace email by most popular email in dataframe!!
  7 | 
  8 | """
  9 | import re
 10 | import pandas as pd
 11 | from datetime import datetime
 12 | from social.utils import Utils
 13 | 
 14 | class Preprocessing:
 15 | 	def __init__(self, gmail_json_file_path, monica_contacts_file_path):
 16 | 		"""
 17 | 		Preparing gmail data for monica conversations
 18 | 
 19 | 		Parameters: 
 20 | 		-----------		
 21 | 
 22 | 		"""
 23 | 		self.gmail_json_file_path = gmail_json_file_path
 24 | 		self.monica_contacts_file_path = monica_contacts_file_path
 25 | 		self.utils = Utils()
 26 | 
 27 | 	def combine_monica_contacts_with_gmail_df(self, monica_contacts_df, gmail_df_cleaned, my_email):
 28 | 		utils = self.utils
 29 | 		contact_id_dict = utils.create_contact_id_dict(monica_contacts_df)
 30 | 		gmail_df_cleaned = gmail_df_cleaned.copy()	
 31 | 
 32 | 		gmail_df_cleaned['contact_id_from'] = gmail_df_cleaned['from_dict_name'].apply(lambda x:utils.retreive_contact_id(full_name=x, contact_id_dict=contact_id_dict))
 33 | 		gmail_df_cleaned['contact_id_to'] = gmail_df_cleaned['to_dict_name'].apply(lambda x:utils.retreive_contact_id(full_name=x, contact_id_dict=contact_id_dict))
 34 | 		
 35 | 		gmail_df_cleaned.loc[gmail_df_cleaned['from_dict_email']==my_email, 'contact_id_from'] = ""
 36 | 		gmail_df_cleaned.loc[gmail_df_cleaned['to_dict_email']==my_email, 'contact_id_to'] = ""
 37 | 
 38 | 		return gmail_df_cleaned
 39 | 
 40 | 	def prepare_dataframe(self):
 41 | 		utils = self.utils
 42 | 		
 43 | 		gmail_json_file_path = self.gmail_json_file_path
 44 | 		monica_contacts_file_path = self.monica_contacts_file_path
 45 | 
 46 | 		monica_contacts_df = pd.read_csv(monica_contacts_file_path)
 47 | 		gmail_df_raw = pd.read_json(gmail_json_file_path)
 48 | 
 49 | 		# gmail_df_cleaned = self.apply(gmail_df_raw).copy() # fixing copy error pandas
 50 | 		gmail_df_cleaned = self.apply(gmail_df_raw)			
 51 | 		my_email = utils.find_my_info(col1='to_dict_email', col2='from_dict_email', df=gmail_df_cleaned)
 52 | 		# print(my_email)
 53 | 
 54 | 		merged_df = self.combine_monica_contacts_with_gmail_df(monica_contacts_df, gmail_df_cleaned, my_email)
 55 | 
 56 | 		merged_df['written_by_me'] = False
 57 | 		merged_df.loc[merged_df['from_dict_email']==my_email, 'written_by_me'] = True
 58 | 
 59 | 		merged_df['contact_id'] = merged_df.apply(lambda x: max(str(x['contact_id_from']), str(x['contact_id_to'])), axis=1)
 60 | 
 61 | 		# key identifier
 62 | 		merged_df['key'] = merged_df['subject'] + merged_df['contact_id']
 63 | 		
 64 | 
 65 | 		return merged_df
 66 | 
 67 | 
 68 | 	def apply(self, df):
 69 | 		
 70 | 
 71 | 		df['from_dict'] = df['from'].apply(lambda x: self.extract_first_element(x))
 72 | 		df['from_dict_email'] = df['from_dict'].apply(lambda x: self.extract_email_address(x))   
 73 | 		df['from_dict_name'] = df['from_dict'].apply(lambda x: self.extract_name(x)) 
 74 | 
 75 | 		df['to_dict'] = df['to'].apply(lambda x: self.extract_first_element(x))
 76 | 		df['to_dict_email'] = df['to_dict'].apply(lambda x: self.extract_email_address(x)) 
 77 | 
 78 | 		df['to_dict_name'] = df['to_dict'].apply(lambda x: self.extract_name(x)) 
 79 | 
 80 | 		df['subject'] = df['subject'].apply(lambda x: self.clean_subject_line(x))
 81 | 		df['date_time'] = df['date']
 82 | 		df['date'] = df['date'].apply(lambda x: x.strftime('%Y-%m-%d'))
 83 | 		
 84 | 		# remove addresses with 'no reply'
 85 | 		df = df[~df['from_dict_email'].str.contains('reply', na=False)]
 86 | 
 87 | 		# remove columns not needed for upload
 88 | 		df.drop(columns=['from', 'to', 'from_dict', 'to_dict', 'receivedDate', 
 89 | 						'html', 'cc', 'headers', 
 90 | 						'priority', 'attachments', 'bcc', 'alternatives', 
 91 | 						'references', 'inReplyTo', 'replyTo'], inplace=True)
 92 | 
 93 | 		return df
 94 | 
 95 | 	# credits to @jfs from stackoverflow - https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python
 96 | 	def deEmojify(self, text):
 97 | 		"""
 98 | 		Removes emoji from text - sometimes used in subject lines
 99 | 
100 | 		Parameters: 
101 | 		-----------
102 | 		text: str
103 | 			text input could be either subject or content of email
104 | 		
105 | 
106 | 		Returns: 
107 | 		-------
108 | 		clean_text: str
109 | 			text without emoji
110 | 
111 | 		"""    
112 | 		regrex_pattern = re.compile(pattern = "["
113 | 									u"\U0001F600-\U0001F64F"  # emoticons
114 | 									u"\U0001F300-\U0001F5FF"  # symbols & pictographs
115 | 									u"\U0001F680-\U0001F6FF"  # transport & map symbols
116 | 									u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
117 | 									"]+", flags = re.UNICODE)
118 | 		try:
119 | 			clean_text = regrex_pattern.sub(r'',text)			
120 | 		except:
121 | 			clean_text = text
122 | 
123 | 		return clean_text
124 | 
125 | 	def clean_subject_line(self, subject):
126 | 		try:
127 | 			subject_lower = subject.lower()
128 | 
129 | 			subject_clean = subject_lower.replace('fwd:','')
130 | 			subject_clean = subject_clean.replace('re:','')
131 | 			subject_clean = subject_clean.replace('fw:','')
132 | 			subject_clean = subject_clean.strip()
133 | 			
134 | 			subject_clean_no_emoji = self.deEmojify(subject_clean)
135 | 
136 | 			return subject_clean_no_emoji
137 | 
138 | 		except:
139 | 			return subject
140 | 
141 | 
142 | 	def extract_first_element(self, list_of_emails):
143 | 		"""
144 | 		Extracts the first email from the list
145 | 
146 | 		Parameters: 
147 | 		-----------
148 | 		list_of_emails: list
149 | 			list of emails
150 | 		
151 | 
152 | 		Returns: 
153 | 		-------
154 | 		email: str
155 | 			first email in the list for example if the email has been sent to multiple to several people,
156 | 			only first email address will be taken into account.
157 | 
158 | 		"""    
159 | 		try:
160 | 			return list_of_emails[0]
161 | 		except:
162 | 			return ""
163 | 
164 | 	def extract_name(self, gmail_dict):
165 | 		"""
166 | 		Extracts the first name from the dict
167 | 
168 | 		Parameters: 
169 | 		-----------
170 | 		gmail_dict: dict
171 | 			dict holding the recipient or sender's information
172 | 		
173 | 
174 | 		Returns: 
175 | 		-------
176 | 		name: str
177 | 			name of the recipient or sender of email 
178 | 
179 | 		"""
180 | 		try:
181 | 			return gmail_dict['name'].lower()
182 | 		except:
183 | 			return ""
184 | 
185 | 
186 | 	def extract_email_address(self, gmail_dict):
187 | 		"""
188 | 		Extracts the email address from the dict
189 | 
190 | 		Parameters: 
191 | 		-----------
192 | 		gmail_dict: dict
193 | 			dict holding the recipient or sender's information
194 | 		
195 | 
196 | 		Returns: 
197 | 		-------
198 | 		email: str
199 | 			email of the recipient or sender depending on gmail dict 
200 | 
201 | 		"""
202 | 		try:
203 | 			return gmail_dict['address'].lower()
204 | 		except:
205 | 			return ""
206 | 
207 | 	def extract_date(self, date_time):
208 | 		"""
209 | 		Extracts the date from receivedDate key in dict and parse in a format for monica api
210 | 
211 | 
212 | 		Parameters: 
213 | 		-----------
214 | 		date_time: datetime object
215 | 			datetime object from the gmail data
216 | 		
217 | 
218 | 		Returns: 
219 | 		-------
220 | 		date: str
221 | 			Returns date in Format: YYYY-MM-DD.
222 | 
223 | 		"""
224 | 
225 | 		try:
226 | 			return date_time.strftime('%Y-%m-%d')
227 | 		except:
228 | 			return ""
229 | 
230 | 
231 | 
232 | 
233 | 
234 | 


--------------------------------------------------------------------------------
/social/linkedin_monica_client.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @author: Shahzeb Afroze
 3 | 
 4 | We will put functions to help us clean the data concerning linkedin
 5 | 
 6 | """
 7 | import re
 8 | import time
 9 | import pandas as pd
10 | from datetime import datetime
11 | from social.linkedin_preprocessing import Preprocessing
12 | from social.utils import Utils
13 | from pprint import pprint 
14 | 
15 | # import sys
16 | # sys.path.append('..')
17 | 
18 | from monica.conversations import Conversations
19 | from monica.contact_field_types import Contact_Field_Types
20 | 
21 | class Linkedin_Monica_Client:
22 | 	def __init__(self, access_token, wait_time=0.2):
23 | 		"""
24 | 		Preparing linkedin data for monica conversations
25 | 
26 | 		Parameters: 
27 | 		-----------		
28 | 
29 | 		"""
30 | 
31 | 		self.conversations = Conversations(access_token=access_token, wait_time=wait_time)
32 | 		self.contact_field_types = Contact_Field_Types(access_token=access_token)
33 | 		
34 | 
35 | 	def upload_conversations_to_monica(self, df_preprocessed):
36 | 		conversations = self.conversations
37 | 		contact_field_types = self.contact_field_types
38 | 		
39 | 		df_preprocessed = df_preprocessed[df_preprocessed['contact_id']!=""] # remove contacts which are not on monica
40 | 		df_preprocessed = df_preprocessed[~df_preprocessed['key'].isna()] # remove nan for subjects
41 | 
42 | 		contact_field_type_id = contact_field_types.get_contact_field_type_id("LinkedIn")
43 | 		
44 | 		unique_keys = list(df_preprocessed['key'].unique())
45 | 		# unique_keys = [x for x in unique_keys if str(x) == 'permission432610'] # testing
46 | 		# unique_keys = [unique_keys[0]]
47 | 		# pprint(df_preprocessed)
48 | 
49 | 		for key in unique_keys:
50 | 			subset = df_preprocessed[df_preprocessed['key']==key]
51 | 			subset.sort_values(by='date_time', inplace=True)
52 | 			contact_id = subset['contact_id'].values[0]
53 | 			happened_at = subset['date'].values[0] # started conv		    
54 | 			conversation_id = conversations.create_conversation_object(happened_at=happened_at, contact_field_type_id=contact_field_type_id, contact_id=contact_id)
55 | 			conversations.add_multiple_messages(contact_id=contact_id, conversation_id=conversation_id, df=subset)
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/social/linkedin_preprocessing.py:
--------------------------------------------------------------------------------
  1 | """
  2 | @author: Shahzeb Afroze
  3 | 
  4 | We will put functions to help us clean the data concerning gmail
  5 | 
  6 | """
  7 | import re
  8 | import pandas as pd
  9 | from datetime import datetime
 10 | from social.utils import Utils
 11 | import jellyfish
 12 | 
 13 | class Preprocessing:
 14 | 	def __init__(self, linkedin_messages_file_path, monica_contacts_file_path):
 15 | 		"""
 16 | 		Preparing gmail data for monica conversations
 17 | 
 18 | 		Parameters: 
 19 | 		-----------		
 20 | 
 21 | 		"""
 22 | 		self.linkedin_messages_file_path = linkedin_messages_file_path
 23 | 		self.monica_contacts_file_path = monica_contacts_file_path
 24 | 		self.utils = Utils()
 25 | 
 26 | 
 27 | 	def prepare_dataframe(self):
 28 | 		utils = self.utils
 29 | 		linkedin_messages_file_path = self.linkedin_messages_file_path
 30 | 		monica_contacts_file_path = self.monica_contacts_file_path
 31 | 				
 32 | 		monica_contacts_df = pd.read_csv(monica_contacts_file_path)
 33 | 		monica_contacts_df['complete_name'] = monica_contacts_df['complete_name'].str.lower()
 34 | 
 35 | 		linkedin_df_raw = pd.read_csv(linkedin_messages_file_path)
 36 | 
 37 | 		# linkedin_df_cleaned = self.apply(linkedin_df_raw).copy() # fixing copy error pandas
 38 | 		linkedin_df_cleaned = self.apply(linkedin_df_raw)			
 39 | 		my_name = utils.find_my_info(col1='FROM', col2='TO', df=linkedin_df_cleaned)
 40 | 
 41 | 		merged_df = self.combine_monica_contacts_with_linkedin_df(monica_contacts_df, linkedin_df_cleaned, my_name)
 42 | 
 43 | 		merged_df['written_by_me'] = False
 44 | 		merged_df.loc[merged_df['FROM']==my_name, 'written_by_me'] = True
 45 | 
 46 | 		merged_df['contact_id'] = merged_df.apply(lambda x: max(str(x['contact_id_from']), str(x['contact_id_to'])), axis=1)
 47 | 
 48 | 		# key identifier
 49 | 		merged_df['key'] = merged_df['CONVERSATION ID']
 50 | 		
 51 | 
 52 | 		return merged_df
 53 | 
 54 | 
 55 | 	def apply(self, df):
 56 | 		
 57 | 		df = df[~df['FROM'].isna()]
 58 | 		df = df[~df['TO'].isna()]
 59 | 
 60 | 		df['FROM'] = df['FROM'].str.lower()
 61 | 		df['TO'] = df['TO'].str.lower()
 62 | 
 63 | 		df['FROM'] = df['FROM'].str.strip()
 64 | 		df['TO'] = df['TO'].str.strip()
 65 | 
 66 | 		df["date_time"] = pd.to_datetime(df['DATE'])
 67 | 
 68 | 		df['date'] = df['date_time'].apply(lambda x: x.strftime('%Y-%m-%d'))
 69 | 		df["text"] = df["CONTENT"].str.replace("&nbsp", " ")
 70 | 		# df["CONTENT"] = df["CONTENT"].apply(lambda x: self.deEmojify(x))
 71 | 
 72 | 
 73 | 		# remove columns not needed for upload
 74 | 		df.drop(columns=['CONVERSATION TITLE', 'FOLDER', 'SUBJECT', 'CONTENT', 'DATE'], inplace=True)
 75 | 
 76 | 		return df
 77 | 
 78 | 	def prepare_fuzzy_dataframe(self, monica_contacts_df, linkedin_df_cleaned, my_name):
 79 | 		utils = self.utils
 80 | 		
 81 | 		temp_df = pd.DataFrame()
 82 | 		temp_df['FROM']= linkedin_df_cleaned.drop_duplicates(subset=['FROM'])['FROM']
 83 | 		temp_df['TO']= linkedin_df_cleaned.drop_duplicates(subset=['TO'])['TO']
 84 | 
 85 | 		to_list = list(linkedin_df_cleaned.drop_duplicates(subset=['TO'])['TO'].reset_index(drop=True))
 86 | 		from_list = list(linkedin_df_cleaned.drop_duplicates(subset=['FROM'])['FROM'].reset_index(drop=True))
 87 | 
 88 | 		all_correspondence = from_list + to_list
 89 | 		df = pd.DataFrame(all_correspondence, columns=['linkedin_name'])
 90 | 		df.drop_duplicates(subset='linkedin_name')
 91 | 
 92 | 		df['linkedin_name'] = df['linkedin_name'].str.lower()
 93 | 		df = df[~df['linkedin_name'].isna()]
 94 | 
 95 | 		monica_contact_list = list(monica_contacts_df["complete_name"].str.lower())
 96 | 
 97 | 		# connect names of monica
 98 | 		df['monica_name'] = df['linkedin_name'].apply(lambda x:utils.fuzzy_contact_name_match(search_name=x, monica_contact_list=monica_contact_list, my_name=my_name, benchmark=0.8))
 99 | 
100 | 		return df
101 | 
102 | 
103 | 	def combine_monica_contacts_with_linkedin_df(self, monica_contacts_df, linkedin_df_cleaned, my_name):
104 | 		utils = self.utils
105 | 		fuzzy_df = self.prepare_fuzzy_dataframe(monica_contacts_df, linkedin_df_cleaned, my_name)
106 | 		fuzzy_monica_linkedin_dict = utils.create_fuzzy_monica_linkedin_dict(fuzzy_df)
107 | 			
108 | 		# connect names of monica
109 | 		linkedin_df_cleaned['monica_from_full_name'] = linkedin_df_cleaned['FROM'].apply(lambda x:utils.retreive_fuzzy_monica_name(linkedin_name=x, fuzzy_monica_linkedin_dict=fuzzy_monica_linkedin_dict))
110 | 		linkedin_df_cleaned['monica_to_full_name'] = linkedin_df_cleaned['TO'].apply(lambda x:utils.retreive_fuzzy_monica_name(linkedin_name=x, fuzzy_monica_linkedin_dict=fuzzy_monica_linkedin_dict))
111 | 
112 | 		contact_id_dict = utils.create_contact_id_dict(monica_contacts_df)
113 | 		linkedin_df_cleaned = linkedin_df_cleaned.copy()	
114 | 
115 | 		linkedin_df_cleaned['contact_id_from'] = linkedin_df_cleaned['monica_from_full_name'].apply(lambda x:utils.retreive_contact_id(full_name=x, contact_id_dict=contact_id_dict))
116 | 		linkedin_df_cleaned['contact_id_to'] = linkedin_df_cleaned['monica_to_full_name'].apply(lambda x:utils.retreive_contact_id(full_name=x, contact_id_dict=contact_id_dict))
117 | 
118 | 		linkedin_df_cleaned.loc[linkedin_df_cleaned['monica_from_full_name']==my_name, 'contact_id_from'] = ""
119 | 		linkedin_df_cleaned.loc[linkedin_df_cleaned['monica_to_full_name']==my_name, 'contact_id_to'] = ""
120 | 
121 | 		return linkedin_df_cleaned
122 | 
123 | 
124 | 	# credits to @jfs from stackoverflow - https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python
125 | 	def deEmojify(self, text):
126 | 		"""
127 | 		Removes emoji from text - sometimes used in subject lines
128 | 
129 | 		Parameters: 
130 | 		-----------
131 | 		text: str
132 | 			text input could be either subject or content of email
133 | 		
134 | 
135 | 		Returns: 
136 | 		-------
137 | 		clean_text: str
138 | 			text without emoji
139 | 
140 | 		"""    
141 | 		regrex_pattern = re.compile(pattern = "["
142 | 									u"\U0001F600-\U0001F64F"  # emoticons
143 | 									u"\U0001F300-\U0001F5FF"  # symbols & pictographs
144 | 									u"\U0001F680-\U0001F6FF"  # transport & map symbols
145 | 									u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
146 | 									"]+", flags = re.UNICODE)
147 | 		try:
148 | 			clean_text = regrex_pattern.sub(r'',text)			
149 | 		except:
150 | 			clean_text = text
151 | 
152 | 		return clean_text
153 | 
154 | 	def clean_subject_line(self, subject):
155 | 		try:
156 | 			subject_lower = subject.lower()
157 | 
158 | 			subject_clean = subject_lower.replace('fwd:','')
159 | 			subject_clean = subject_clean.replace('re:','')
160 | 			subject_clean = subject_clean.replace('fw:','')
161 | 			subject_clean = subject_clean.strip()
162 | 			
163 | 			subject_clean_no_emoji = self.deEmojify(subject_clean)
164 | 
165 | 			return subject_clean_no_emoji
166 | 
167 | 		except:
168 | 			return subject
169 | 
170 | 


--------------------------------------------------------------------------------
/social/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | @author: Shahzeb Afroze
  3 | 
  4 | Offical Documentation: https://www.monicahq.com/api/
  5 | 
  6 | Description: We will put all general functions to help the other classes
  7 | 
  8 | """
  9 | import pandas as pd
 10 | from monica.conversations import Conversations
 11 | import jellyfish
 12 | 
 13 | basic_api = 'https://app.monicahq.com/api'
 14 | 
 15 | class Utils:	
 16 | 	def __init__(self, wait_time=1):
 17 | 		"""
 18 | 		Helper class for several classes
 19 | 
 20 | 		Parameters: 
 21 | 		-----------
 22 | 			wait_time: int
 23 | 				seconds to wait after every request sent
 24 | 
 25 | 		"""		
 26 | 		
 27 | 		self.wait_time = wait_time
 28 | 
 29 | 
 30 | 	def retreive_contact_id(self, contact_id_dict, full_name):
 31 | 		"""
 32 | 		Merges json resonses to create 1 complete json file
 33 | 
 34 | 		Parameters: 
 35 | 		-----------
 36 | 		json_orig: json/dict
 37 | 			The previous json response from monica
 38 | 
 39 | 		json_new: json/dict
 40 | 			The most recent json response from monica
 41 | 		    		
 42 | 		Returns: 
 43 | 		-------
 44 | 		json_merged: dict/json
 45 | 			Appending lists within json where ever possible and overwritting the information 
 46 | 			with the latest json_response where not possible. This will help in the json response
 47 | 			staying consistent with how the orignal json file looks
 48 | 
 49 | 
 50 | 		"""
 51 | 		# we can later apply fuzzy search if later needed
 52 | 		
 53 | 
 54 | 		try:
 55 | 			contact_id = contact_id_dict[full_name]
 56 | 		except:
 57 | 			contact_id = ''
 58 | 
 59 | 
 60 | 		return contact_id
 61 | 	
 62 | 	def create_contact_id_dict(self, monica_contacts_df):
 63 | 		monica_contacts_df.sort_values(by="complete_name", inplace=True)
 64 | 		monica_contacts_df["complete_name"] = monica_contacts_df["complete_name"].str.lower()
 65 | 		monica_contacts_df.drop_duplicates(subset=["complete_name"], inplace=True)
 66 | 
 67 | 		contact_id_dict = pd.Series(monica_contacts_df.id.values,index=monica_contacts_df.complete_name).to_dict()
 68 | 
 69 | 		return contact_id_dict
 70 | 
 71 | 	def create_fuzzy_monica_linkedin_dict(self, fuzzy_df):
 72 | 
 73 | 		fuzzy_monica_linkedin_dict = pd.Series(fuzzy_df.linkedin_name.values,index=fuzzy_df.monica_name).to_dict()
 74 | 
 75 | 		return fuzzy_monica_linkedin_dict
 76 | 	
 77 | 	def retreive_fuzzy_monica_name(self, fuzzy_monica_linkedin_dict, linkedin_name):
 78 | 		try:
 79 | 			monica_name = fuzzy_monica_linkedin_dict[linkedin_name]
 80 | 		except:
 81 | 			monica_name = ''
 82 | 
 83 | 
 84 | 		return monica_name
 85 | 	
 86 | 
 87 | 	def most_frequent(self, List): 
 88 | 		return max(set(List), key = List.count) 
 89 | 
 90 | 	# to be used for converting dataframe into columns and applying it to most frequent
 91 | 	def find_my_info(self, col1, col2, df):
 92 | 		list_1=list(df[col1].values)
 93 | 		list_2=list(df[col2].values)
 94 | 
 95 | 		all_info=list_1 + list_2
 96 | 		my_info = self.most_frequent(all_info)
 97 | 		
 98 | 		return my_info
 99 | 
100 | 
101 | 	def fuzzy_contact_name_match(self, search_name, monica_contact_list, my_name, benchmark=0.85):
102 | 		all_score = []
103 | 		if search_name!=my_name:
104 | 			for monica_contact in monica_contact_list:
105 | 				score = jellyfish.jaro_winkler_similarity(search_name, monica_contact)			
106 | 				all_score.append(score)
107 | 		
108 | 			name_matched = self.find_max_score_name(monica_contact_list, all_score, benchmark)
109 | 			return name_matched
110 | 
111 | 
112 | 	def find_max_score_name(self, monica_contact_list, all_score, benchmark):
113 | 		max_score = max(all_score)
114 | 		if max_score>=benchmark:
115 | 			index_of_max_score = all_score.index(max_score)
116 | 			name_with_highest_similarity = monica_contact_list[index_of_max_score]
117 | 			return name_with_highest_similarity
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------