├── LICENSE
├── PythonScript.py
└── README.md


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Fabian Maume
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/PythonScript.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Nov 24 10:34:30 2021
  4 | 
  5 | @author: Fabian Maume. Tetriz.io
  6 | """
  7 | 
  8 | #%%
  9 | import requests, json
 10 | import time
 11 | import pandas as pd
 12 | 
 13 | #Parrameters
 14 | Key = "Add you own key" 
 15 | Target_jobs = ["job title 1", "job title 2"]
 16 | Lead_list = "C:/Users/fabia/Downloads/QApop outreach 2 (1).xlsx"
 17 | Website_collum = "Website"
 18 | out_path = "C:\\Users\\fabia\\Dropbox\\QApopOutreach2.xlsx"
 19 | out_path_not_found = "C:\\Users\\fabia\\Dropbox\\LeadForPhantombuster.xlsx"
 20 | #Parrameters
 21 | 
 22 | 
 23 | 
 24 | #
 25 | data = pd.read_excel(Lead_list)
 26 | target = data[Website_collum]
 27 | 
 28 | #remove any duplicate
 29 | target = list(set(target))
 30 | 
 31 | 
 32 | 
 33 | def get_comapny_data(Key, domain):
 34 |     response = requests.get("https://api.apollo.io/v1/organizations/enrich?api_key="+ Key + "&domain=" + domain)
 35 |     result = response.json()
 36 |     try:
 37 |         result = result["organization"]
 38 |     except:
 39 |         print("no data for")
 40 |         print(domain)
 41 |     try:
 42 |         alexa_ranking = result["alexa_ranking"]
 43 |     except:
 44 |         alexa_ranking = "unknown"
 45 |     try:
 46 |         annual_revenue = result["annual_revenue"]
 47 |     except:
 48 |         annual_revenue = "unknown"
 49 |     try:
 50 |         country  = result["country"]
 51 |     except:
 52 |         country  = "unknown"
 53 |     try:
 54 |         estimated_num_employees  = result["estimated_num_employees"]
 55 |     except:
 56 |         estimated_num_employees  = "unknown"
 57 |     try:
 58 |         industry = result["industry"]
 59 |     except:
 60 |         industry = "unknown"
 61 |     try:
 62 |         keywords  = result["keywords"]
 63 |     except:
 64 |         keywords  = "unknown"
 65 |     try:
 66 |         Linkedin_uid = result["linkedin_uid"]
 67 |     except:
 68 |         Linkedin_uid ="unknown"
 69 |     
 70 |     return alexa_ranking, annual_revenue, country, estimated_num_employees, industry, keywords  , Linkedin_uid 
 71 | 
 72 | def getEmailEstension(email):
 73 |     start = email.index("@")
 74 |     result = email[start+1:len(email)]
 75 |     return result
 76 | 
 77 | def getDomaine(url):
 78 |     #short url to the part after http:// or https://
 79 |     start = url.index("//")
 80 |     result = url[start+2:len(url)]
 81 |     
 82 |     #remove / at the end of url
 83 |     if result[len(result)-1] == "/":
 84 |         result = result[0:len(result)-2]
 85 |     
 86 |     
 87 |     return result
 88 | 
 89 | def getEmployee(url, job):
 90 |         body =  {"api_key": Key,  "q_organization_domains": url, "page":1, "person_titles" : job}
 91 |         response = requests.post("https://api.apollo.io/v1/mixed_people/search", json = body)
 92 |         
 93 |         
 94 |         #response = requests.post("https://api.apollo.io/v1/mixed_people/search", data = {"api_key": Key,  "q_organization_domains": url, "page": 1, "person_titles" : ["organic", "SEO"]}, headers= { "Content-Type": "application/json", "Cache-Control": "no-cache"})
 95 |         result = response.json()
 96 |         return result
 97 |         
 98 |         s = requests.Session()
 99 |         s.headers.update({ "Content-Type: application/json"})
100 |         
101 | 
102 | 
103 | 
104 | #
105 | #########
106 | #get list of employ
107 | 
108 | List_website = ""
109 | Domain_list = list()
110 | 
111 | #create list of website
112 | for url in target:
113 |     List_website = List_website + "\n" + getDomaine(url)
114 |     
115 |     #create list of domain to check domain without leads later
116 |     Domain_list.append(url)
117 |     
118 | 
119 | List_website = List_website[1:len(List_website)]
120 | People = list()
121 | #Query first page
122 | body =  {"api_key": Key,  "q_organization_domains": List_website, "page": 1, "person_titles" : Target_jobs}
123 | response = requests.post("https://api.apollo.io/v1/mixed_people/search", json = body)
124 | result = response.json()
125 | 
126 | [People.append(x) for x in result["people"]]
127 | total_page = result["pagination"]["total_pages"]
128 | page = 2
129 | 
130 | #get data from all the page
131 | while ( page < total_page + 1):
132 |     try:
133 |         body =  {"api_key": Key,  "q_organization_domains": List_website, "page": page, "person_titles" : ["organic", "SEO"]}
134 |         response = requests.post("https://api.apollo.io/v1/mixed_people/search", json = body)
135 |         result = response.json()
136 | 
137 |         [People.append(x) for x in result["people"]]
138 |         page = page + 1
139 |     except:
140 |         print("WARNING")
141 |         print("run out of API calls")
142 |         print("page:" + str(page))
143 |         print("WARNING")
144 |     
145 | #Post process people
146 | country = list()
147 | first_name = list()
148 | headline = list()
149 | last_name = list()
150 | linkedin_url = list()
151 | oragnization = list()
152 | title = list()
153 | 
154 | 
155 | for element in People:
156 |     try:
157 |         stemp = element["country"]
158 |     except:
159 |         stemp = "unknwon"
160 |     country.append(stemp)
161 |     
162 |     try:
163 |         stemp = element["first_name"]
164 |     except:
165 |         stemp = "unknwon"
166 |     first_name.append(stemp)
167 |     try:
168 |         stemp = element["last_name"]
169 |     except:
170 |         stemp = "unknwon"
171 |     last_name.append(stemp)
172 |     try:
173 |         stemp = element["headline"]
174 |     except:
175 |         stemp = "unknwon"
176 |     headline.append(stemp)
177 |     try:
178 |         stemp = element["linkedin_url"]
179 |     except:
180 |         stemp = "unknwon"
181 |     linkedin_url.append(stemp)
182 |     try:
183 |         stemp = element["organization"]["website_url"]
184 |     except:
185 |         stemp = "unknwon"
186 |     oragnization.append(stemp)
187 |     try:
188 |         stemp = element["title"]
189 |     except:
190 |         stemp = "unknwon"
191 |     title.append(stemp)
192 | 
193 | 
194 | export = pd.DataFrame({'country' :country ,'first_name' :first_name ,'headline' :headline ,'last_name' :last_name ,'linkedin_url' :linkedin_url ,'oragnization' :oragnization ,'title' :title , "detail": People})
195 | 
196 | 
197 |  
198 | writer = pd.ExcelWriter(out_path, engine='xlsxwriter',options={'strings_to_urls': False})                   
199 | export.to_excel(writer,   header=True, index=False ) 
200 | writer.close()   
201 | 
202 | #%%
203 | #check list of company without url
204 | Full_list = pd.DataFrame({"Domain": Domain_list})
205 | 
206 | #create list of found domain
207 | found_url = list()
208 | found = list()
209 | for url in oragnization:
210 |     stemp = getDomaine(url)
211 |     found_url.append(stemp)
212 |     found.append(True)
213 |     
214 | 
215 | Result_list = pd.DataFrame({"Domain": found_url, "Found": found})
216 | 
217 | result = pd.merge(Full_list, Result_list, how = "left", on = "Domain")
218 | result = result[result["Found"].isna()]
219 | 
220 | writer= pd.ExcelWriter(out_path_not_found, engine='xlsxwriter',options={'strings_to_urls': False})                   
221 | result.to_excel(writer,   header=True, index=False ) 
222 | writer.close()   
223 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # apollo.io-API-for-linkedin-lead-generation
2 | Python script to call [apollo.io](https://apollo.grsm.io/9dm66e70xo97) API in order to transform a list of websites into a list of leads based on your ideal customer profile (ICP).
3 | 
4 | Tutorial about how to use the script is available on [my medium](https://fabian-maume.medium.com/how-to-use-appolo-io-api-to-quickly-turn-a-list-of-websites-into-a-list-of-leads-2d6b73c082dd).
5 | 


--------------------------------------------------------------------------------