├── README.md ├── config.json ├── cookies.png ├── demo.gif ├── nqntnqnqmb.py ├── nqntnqnqmb ├── __init__.py └── core.py └── setup.py /README.md: -------------------------------------------------------------------------------- 1 | # Nqntnqnqmb 2 | 👋 Hi there! For any professional inquiries or collaborations, please reach out to me at: 3 | megadose@protonmail.com 4 | 5 | 📧 Preferably, use your professional email for correspondence. Let's keep it short and sweet, and all in English! 6 | 7 | ![PyPI](https://img.shields.io/pypi/v/nqntnqnqmb) ![PyPI - Week](https://img.shields.io/pypi/dw/nqntnqnqmb) ![PyPI - Downloads](https://static.pepy.tech/badge/nqntnqnqmb) ![PyPI - License](https://img.shields.io/pypi/l/nqntnqnqmb) 8 | #### For BTC Donations : 1FHDM49QfZX6pJmhjLE5tB2K6CaTLMZpXZ 9 | ## Educational purposes only 10 | Allows you to retrieve information on linkedin profiles, companies on linkedin and search on linkedin companies/persons 11 | 12 | ## Project example : [Nqntnqnqmb maltego](https://github.com/megadose/nqntnqnqmb-maltego) 13 | 14 | # Demo 15 | ![](demo.gif) 16 | 17 | ## ![hammer_and_wrench](https://github.githubassets.com/images/icons/emoji/unicode/1f6e0.png) Installation 18 | 19 | ### With PyPI 20 | 21 | ```bash 22 | pip3 install nqntnqnqmb 23 | ``` 24 | 25 | ### With Github 26 | 27 | ```bash 28 | git clone https://github.com/megadose/nqntnqnqmb.git 29 | cd nqntnqnqmb/ 30 | python3 setup.py install 31 | ``` 32 | 33 | # Usage of nqntnqnqmb.py 34 | You just have to put the li_at and JSESSIONID cookies in the config.json file. 35 | ![](cookies.png) 36 | If you want to do automatic account rotations, simply add the cookies like this 37 | ```json 38 | [{ 39 | "li_at": "1st account", 40 | "JSESSIONID": "1st account" 41 | }, 42 | { 43 | "li_at": "2nd account", 44 | "JSESSIONID": "2nd account" 45 | }] 46 | ``` 47 | 48 | 49 | ``` 50 | usage: nqntnqnqmb.py [-h] --mode MODE [--company COMPANY] [--profile PROFILE] 51 | [--searchCompany SEARCHCOMPANY] 52 | [--searchProfile SEARCHPROFILE] --output OUTPUT 53 | 54 | optional arguments: 55 | -h, --help show this help message and exit 56 | --mode MODE There are different modes: getEmployees to get all 57 | employees of a company, getProfileInformations to get 58 | all informations on a profile, searchCompany to search 59 | a company from the name of the company , searchProfile 60 | to search a profile from a name 61 | --company COMPANY Url of the company for get all employes (getEmployees) 62 | --profile PROFILE Url of the profile for get all informations 63 | (getProfileInformations) 64 | --searchCompany SEARCHCOMPANY 65 | The name of the target company (searchCompany) 66 | --searchProfile SEARCHPROFILE 67 | The name of the target (searchProfile) 68 | --output OUTPUT Name of the csv output file 69 | ``` 70 | # Usage of nqntnqnqmb with python: 71 | 72 | ```python 73 | from nqntnqnqmb import * 74 | getCompanyFromName(company,JSESSIONID,li_at)#Search company on linkedin from name 75 | getProfileFromName(search_string,JSESSIONID,li_at)#Search linkedin profile from name (Simple Usage) 76 | getProfileFromName(search_string,JSESSIONID,li_at,pages_to_scrape=5,results_per_page=20)#Search linkedin profile from name (Advanced Usage) 77 | getCompanyFromProfile(profile_url,JSESSIONID,li_at)#Extract linkedin company from a profile 78 | getAllEmployees(company,JSESSIONID,li_at)#Get all employees of a company from the linkedin company url 79 | getContactInformations(profile_url,JSESSIONID,li_at)#Extract contact informations from a profiles like the email, phone number and more 80 | ``` 81 | 82 | # Thank To : 83 | - [linkedin-scraper](https://github.com/hakimkhalafi/linkedin-scraper) 84 | -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | [{ 2 | "li_at": "", 3 | "JSESSIONID": "" 4 | }] 5 | -------------------------------------------------------------------------------- /cookies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megadose/nqntnqnqmb/3e83919cab2f4e198c92325ce00cafcee7cdf2ec/cookies.png -------------------------------------------------------------------------------- /demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megadose/nqntnqnqmb/3e83919cab2f4e198c92325ce00cafcee7cdf2ec/demo.gif -------------------------------------------------------------------------------- /nqntnqnqmb.py: -------------------------------------------------------------------------------- 1 | from nqntnqnqmb import * 2 | import random,argparse,csv 3 | parser = argparse.ArgumentParser() 4 | parser.add_argument("--mode", help="There are different modes: getEmployees to get all employees of a company, getProfileInformations to get all informations on a profile, searchCompany to search a company from the name of the company , searchProfile to search a profile from a name",required=True) 5 | parser.add_argument("--company", help="Url of the company for get all employes (getEmployees)",required=False) 6 | parser.add_argument("--profile", help="Url of the profile for get all informations (getProfileInformations)",required=False) 7 | parser.add_argument("--searchCompany", help="The name of the target company (searchCompany)",required=False) 8 | parser.add_argument("--searchProfile", help="The name of the target (searchProfile)",required=False) 9 | parser.add_argument("--output", help="Name of the csv output file",required=True) 10 | args = parser.parse_args() 11 | 12 | with open('./config.json') as config_file: 13 | config = random.choice(json.load(config_file)) 14 | 15 | if args.mode == "getEmployees": 16 | if args.company!=None: 17 | company=args.company.split("company/")[1].replace("/","") 18 | result= getAllEmployees(company,config["JSESSIONID"],config["li_at"]) 19 | keys = result[0].keys() 20 | with open(str(args.output), 'w') as csvfile: 21 | writer = csv.DictWriter(csvfile, fieldnames=keys) 22 | writer.writeheader() 23 | for data in result: 24 | writer.writerow(data) 25 | print("You can see all employes in : "+str(args.output)) 26 | elif args.mode == "getProfileInformations": 27 | if args.profile!=None: 28 | profileTarget=args.profile 29 | try: 30 | result= getCompanyFromProfile(profileTarget,config["JSESSIONID"],config["li_at"]) 31 | keys = result[0].keys() 32 | with open("companys_"+str(args.output), 'w') as csvfile: 33 | writer = csv.DictWriter(csvfile, fieldnames=keys) 34 | writer.writeheader() 35 | for data in result: 36 | writer.writerow(data) 37 | print("You can see all companys found for the target profile in : "+"companys_"+str(args.output)) 38 | except: 39 | print("Problems with the company of the target profiles") 40 | try: 41 | profileTarget=args.profile 42 | if profileTarget[len(profileTarget)-1]=="/": 43 | profileTarget=profileTarget[:len(profileTarget)-1] 44 | result= getContactInformations(profileTarget,config["JSESSIONID"],config["li_at"]) 45 | keys = result.keys() 46 | with open("informations_"+str(args.output), 'w') as csvfile: 47 | writer = csv.DictWriter(csvfile, fieldnames=keys) 48 | writer.writeheader() 49 | writer.writerow(result) 50 | print("You can see the contact informations the target profile in : "+"informations_"+str(args.output)) 51 | except: 52 | print("Problems with the contact informations of the target profiles") 53 | elif args.mode == "searchCompany": 54 | if args.searchCompany!=None: 55 | company=args.searchCompany 56 | result= getCompanyFromName(company,config["JSESSIONID"],config["li_at"]) 57 | keys = result[0].keys() 58 | with open(str(args.output), 'w') as csvfile: 59 | writer = csv.DictWriter(csvfile, fieldnames=keys) 60 | writer.writeheader() 61 | for data in result: 62 | writer.writerow(data) 63 | print("You can see all found companys in : "+str(args.output)) 64 | elif args.mode == "searchProfile": 65 | if args.searchProfile!=None: 66 | profile=args.searchProfile 67 | result= getProfileFromName(profile,config["JSESSIONID"],config["li_at"]) 68 | keys = result[0].keys() 69 | with open(str(args.output), 'w') as csvfile: 70 | writer = csv.DictWriter(csvfile, fieldnames=keys) 71 | writer.writeheader() 72 | for data in result: 73 | writer.writerow(data) 74 | print("You can see all found profiles in : "+str(args.output)) 75 | -------------------------------------------------------------------------------- /nqntnqnqmb/__init__.py: -------------------------------------------------------------------------------- 1 | from nqntnqnqmb.core import * 2 | -------------------------------------------------------------------------------- /nqntnqnqmb/core.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from fake_useragent import UserAgent 3 | import json,random,string,requests,sys,os,time,traceback,urllib 4 | 5 | ua = UserAgent(verify_ssl=False) 6 | 7 | def getCompanyFromName(company,JSESSIONID,li_at): 8 | 9 | cookies = {'JSESSIONID':JSESSIONID} 10 | cookies['li_at'] = li_at 11 | headers = {'Csrf-Token': JSESSIONID, 12 | 'User-Agent': ua.firefox} 13 | 14 | params = ( 15 | ('keywords', company), 16 | ('origin', 'GLOBAL_SEARCH_HEADER'), 17 | ('q', 'blended'), 18 | ) 19 | 20 | response = requests.get('https://www.linkedin.com/voyager/api/typeahead/hitsV2', headers=headers, params=params, cookies=cookies) 21 | result=[] 22 | for i in response.json()["elements"]: 23 | if i["type"]=="COMPANY": 24 | if "miniCompany" in i["image"]["attributes"][0].keys(): 25 | if i["image"]["attributes"][0]["miniCompany"]["logo"]==None: 26 | logo="" 27 | else: 28 | logo=i["image"]["attributes"][0]["miniCompany"]["logo"]["com.linkedin.common.VectorImage"]["rootUrl"]+i["image"]["attributes"][0]["miniCompany"]["logo"]["com.linkedin.common.VectorImage"]["artifacts"][0]["fileIdentifyingUrlPathSegment"] 29 | result.append({"name":i["image"]["attributes"][0]["miniCompany"]["name"],"urlCompany":"https://www.linkedin.com/company/"+i["image"]["attributes"][0]["miniCompany"]["universalName"],"logo":logo}) 30 | return(result) 31 | def getProfileFromName(search_string,JSESSIONID,li_at,pages_to_scrape=5,results_per_page=20): 32 | search_results = [] 33 | 34 | 35 | cookies = {'JSESSIONID':JSESSIONID} 36 | cookies['li_at'] = li_at 37 | headers = {'Csrf-Token': JSESSIONID, 38 | 'User-Agent': ua.firefox} 39 | 40 | search_url = "https://www.linkedin.com/voyager/api/search/cluster?" \ 41 | "count=%i&guides=List(v-%%3EPEOPLE,facetGeoRegion-%%3Ear" \ 42 | "%%3A0)&keywords=%s&origin=FACETED_SEARCH&q=guided&start=0" 43 | 44 | page_url = "https://www.linkedin.com/voyager/api/search/cluster?" \ 45 | "count=%i&guides=List(v-%%3EPEOPLE,facetGeoRegion-%%3Ear" \ 46 | "%%3A0)&keywords=%s&origin=FACETED_SEARCH&q=guided&start=%i" 47 | 48 | 49 | url = search_url % (results_per_page, 50 | search_string) 51 | try: 52 | r = requests.get(url, cookies=cookies, headers=headers) 53 | except Exception: 54 | exit() 55 | try: 56 | content = json.loads(r.text) 57 | except: 58 | return(r.text) 59 | 60 | data_total = content['paging']['total'] 61 | 62 | pages = data_total / results_per_page 63 | if data_total % results_per_page == 0: 64 | pages = pages - 1 65 | if pages == 0: 66 | pages = 1 67 | 68 | if data_total > 1000: 69 | pages = pages_to_scrape 70 | 71 | pages=int(pages)+1 72 | for p in range(pages): 73 | # Request results for each page using the start offset 74 | 75 | url = page_url % (results_per_page, 76 | search_string, 77 | p*results_per_page) 78 | 79 | r = requests.get(url, cookies=cookies, headers=headers) 80 | 81 | content = r.text.encode('UTF-8') 82 | content = json.loads(content.decode("utf-8")) 83 | 84 | #print"Fetching page %i (contains %i results)" % 85 | #(p+1, len(content['elements'][0]['elements']))) 86 | 87 | profiles_skipped = False 88 | for c in content['elements'][0]['elements']: 89 | try: 90 | # Using these lookup strings to shorten query lines below 91 | lookup = 'com.linkedin.voyager.search.SearchProfile' 92 | h = 'hitInfo' 93 | m = 'miniProfile' 94 | 95 | # Doesn't work anymore 96 | pic_url = "https://media.licdn.com/mpr/mpr/shrinknp_400_400"+"%s" 97 | pic_query = "com.linkedin.voyager.common.MediaProcessorImage" 98 | 99 | if not c[h][lookup]['headless']: 100 | try: 101 | data_industry = c[h][lookup]['industry'] 102 | except Exception: 103 | data_industry = "" 104 | 105 | data_firstname = c[h][lookup][m]['firstName'] 106 | data_lastname = c[h][lookup][m]['lastName'] 107 | data_url = "https://www.linkedin.com/in/%s" % \ 108 | c[h][lookup][m]['publicIdentifier'] 109 | data_occupation = c[h][lookup][m]['occupation'] 110 | data_location = c[h][lookup]['location'] 111 | # This section doesn't work 112 | try: 113 | data_picture = c[h][lookup][m]["picture"]["com.linkedin.common.VectorImage"]["rootUrl"]+c[h][lookup][m]["picture"]["com.linkedin.common.VectorImage"]["artifacts"][0]["fileIdentifyingUrlPathSegment"] 114 | except Exception: 115 | # No pic found for (data_firstn, data_lastn, d_occ) 116 | data_picture = "" 117 | search_results.append({"firstname":data_firstname,"lastname":data_lastname,"occupation":data_occupation,"profile-url":data_url,"location":data_location,"industry":data_industry,"picture-url":data_picture}) 118 | else: 119 | pass 120 | except Exception: 121 | profiles_skipped = True 122 | continue 123 | if profiles_skipped: 124 | pass 125 | return(search_results) 126 | def getCompanyFromProfile(profile_url,JSESSIONID,li_at): 127 | def scraping_dict(): 128 | items = { 129 | "fs_course": ["name"], 130 | 131 | "fs_education": 132 | ['schoolName', 'description', 'degreeName', 'activities', 'grade', 133 | 'fieldOfStudy', 'projects', 'entityLocale', 'recommendations'], 134 | 135 | "fs_honor": ['title', 'description', 'issuer'], 136 | 137 | "fs_language": ['name'], 138 | 139 | "fs_position": 140 | ['companyName', 'description', 'title', {"company": "industries"}, 141 | 'courses', 'locationName', 'projects', 'entityLocale', 142 | 'organizations', 'region', 'recommendations', 'honors', 143 | 'promotion'], 144 | 145 | "fs_profile": ["headline", "summary", "industryName", "locationName"], 146 | 147 | "fs_project": ['title', 'occupation', 'description'], 148 | 149 | "fs_publication": ['name', 'publisher', 'description'], 150 | 151 | "fs_skill": ["name"] 152 | } 153 | 154 | return items 155 | 156 | 157 | cookies = {'JSESSIONID':JSESSIONID} 158 | cookies['li_at'] = li_at 159 | headers = {'Csrf-Token': JSESSIONID, 160 | 'User-Agent': ua.firefox} 161 | 162 | fields_to_scrape = scraping_dict().keys() 163 | empty_dict = dict.fromkeys(fields_to_scrape, None) 164 | 165 | data_dict = {} 166 | try: 167 | r = requests.get(profile_url, cookies=cookies, headers=headers) 168 | except Exception: 169 | print(traceback.format_exc()) 170 | exit() 171 | 172 | 173 | soup = BeautifulSoup(r.text, "html.parser") 174 | found = soup.find( 175 | lambda tag: tag.name == "code" and "*profile" in tag.text) 176 | extract = found.contents[0].strip() 177 | 178 | data = json.loads(extract) 179 | results=[] 180 | for i in data["included"]: 181 | if "url" in i.keys() and i["url"]!=None: 182 | if "https://www.linkedin.com/company/" in i["url"]: 183 | 184 | if i["logo"]!=None: 185 | logo=str(i["logo"]["vectorImage"]["rootUrl"]+i["logo"]["vectorImage"]["artifacts"][0]["fileIdentifyingUrlPathSegment"]) 186 | else: 187 | logo="" 188 | results.append({"name":i["name"].replace('é','é'),"linkedin_url":i["url"],"logo":logo}) 189 | 190 | return(results) 191 | def getAllEmployees(company,JSESSIONID,li_at): 192 | 193 | headers={'Accept-Language': 'en,en-US;q=0.5', 194 | 'Accept-Encoding': 'gzip, deflate, br', 195 | 'DNT': '1', 196 | 'Connection': 'keep-alive', 'Accept': 'application/vnd.linkedin.normalized+json+2.1', 197 | 'User-Agent': ua.firefox, 198 | 'csrf-token': JSESSIONID, 'Host': 'www.linkedin.com', 199 | 'TE': 'Trailers', 200 | 'x-restli-protocol-version': '2.0.0'} 201 | 202 | cookies = {'JSESSIONID':JSESSIONID} 203 | cookies['li_at'] = li_at 204 | result = [] 205 | params = ( 206 | ('companyIdOrUniversalName', company), 207 | ('count', '3'), 208 | ('moduleKey', 'ORGANIZATION_MEMBER_FEED_DESKTOP'), 209 | ('numComments', '0'), 210 | ('numLikes', '0'), 211 | ('q', 'companyRelevanceFeed'), 212 | ) 213 | response = requests.get('https://www.linkedin.com/voyager/api/organization/updatesV2', headers=headers, params=params, cookies=cookies) 214 | idcompany=response.text.split("urn:li:company:")[1].split('"')[0] 215 | response = requests.get('https://www.linkedin.com/voyager/api/search/hits?count=49&educationEndYear=List()&educationStartYear=List()&facetCurrentCompany=List('+idcompany+')&facetCurrentFunction=List()&facetFieldOfStudy=List()&facetGeoRegion=List()&facetNetwork=List()&facetSchool=List()&facetSkillExplicit=List()&keywords=List()&maxFacetValues=49&origin=organization&q=people&start=0&supportedFacets=List(GEO_REGION,SCHOOL,CURRENT_COMPANY,CURRENT_FUNCTION,FIELD_OF_STUDY,SKILL_EXPLICIT,NETWORK)',headers=headers,cookies=cookies) 216 | max=int(response.json()["data"]["metadata"]["totalResultCount"])-49 217 | result.append(response.json()["included"]) 218 | c=0 219 | while max >0: 220 | c+=49 221 | response = requests.get('https://www.linkedin.com/voyager/api/search/hits?count=49&educationEndYear=List()&educationStartYear=List()&facetCurrentCompany=List('+idcompany+')&facetCurrentFunction=List()&facetFieldOfStudy=List()&facetGeoRegion=List()&facetNetwork=List()&facetSchool=List()&facetSkillExplicit=List()&keywords=List()&maxFacetValues=49&origin=organization&q=people&start='+str(c)+'&supportedFacets=List(GEO_REGION,SCHOOL,CURRENT_COMPANY,CURRENT_FUNCTION,FIELD_OF_STUDY,SKILL_EXPLICIT,NETWORK)',headers=headers,cookies=cookies) 222 | max=max-49 223 | result.append(response.json()["included"]) 224 | results=[] 225 | for profile in result: 226 | for pro in profile: 227 | if "occupation" in pro.keys(): 228 | name = pro["firstName"] + " " + pro["lastName"] 229 | if name == " ": 230 | pro["firstName"]="Linkedin" 231 | pro["lastName"]="User" 232 | if pro["picture"]==None: 233 | propicture="" 234 | else: 235 | propicture=pro["picture"]["rootUrl"]+pro["picture"]["artifacts"][2]["fileIdentifyingUrlPathSegment"] 236 | results.append({"firstname":pro["firstName"],"lastname":pro["lastName"],'occupation':pro["occupation"],"profile-url":"https://www.linkedin.com/in/"+str(pro["publicIdentifier"]),"picture-url":propicture}) 237 | return(results) 238 | def GetContactInformations(profile_url,JSESSIONID,li_at): 239 | profile_url=profile_url+"/detail/contact-info/" 240 | 241 | headers = { 242 | 'authority': 'www.linkedin.com', 243 | 'cache-control': 'max-age=0', 244 | 'upgrade-insecure-requests': '1', 245 | 'user-agent': ua.firefox, 246 | 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 247 | 'sec-fetch-site': 'none', 248 | 'sec-fetch-mode': 'navigate', 249 | 'sec-fetch-user': '?1', 250 | 'sec-fetch-dest': 'document', 251 | 'accept-language': 'en-US,en;q=0.9,fr;q=0.8,fr-FR;q=0.7', 252 | 'cookie': 'JSESSIONID="'+JSESSIONID+'";li_at='+li_at+';', 253 | } 254 | 255 | 256 | 257 | try: 258 | r = requests.get(profile_url, headers=headers) 259 | except Exception: 260 | print(traceback.format_exc()) 261 | exit() 262 | 263 | 264 | soup = BeautifulSoup(r.text, "html.parser") 265 | data =json.loads('{"data":{"birthDateOn"'+str(soup).split('"data":{"birthDateOn"')[1].split('},"included":[')[0]+'}}') 266 | months = {"1":"January", 267 | "2":"Febuary", 268 | "3":"March", 269 | "4":"April", 270 | "5":"May", 271 | "6":"June", 272 | "7":"July", 273 | "8":"August", 274 | "9":"September", 275 | "10":"October", 276 | "11":"November", 277 | "12":"December"} 278 | data=data["data"] 279 | if data['birthDateOn']!=None: 280 | birthDate=str(data['birthDateOn']["day"])+" "+months[str(data['birthDateOn']["month"])] 281 | else: 282 | birthDate=None 283 | twittersAccount=[] 284 | if data["twitterHandles"]!=None: 285 | for twitter in data["twitterHandles"]: 286 | twittersAccount.append(twitter["name"]) 287 | 288 | emailAddress=data["emailAddress"] 289 | address=data["address"] 290 | websites=[] 291 | if data["websites"]!=None: 292 | for website in data["websites"]: 293 | websites.append(website["url"]) 294 | phoneNumbers=[] 295 | if data["phoneNumbers"]!=None: 296 | for phoneNumber in data["phoneNumbers"]: 297 | phoneNumbers.append(phoneNumber["number"]) 298 | return({"birthDate":birthDate,"twittersAccount":twittersAccount,"emailAddress":emailAddress,"address":address,"websites":websites,"phoneNumbers":phoneNumbers}) 299 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from setuptools import setup, find_packages 3 | 4 | 5 | setup( 6 | name='nqntnqnqmb', 7 | version="1.01", 8 | packages=find_packages(), 9 | author="megadose", 10 | install_requires=["requests","fake_useragent","bs4","argparse"], 11 | description="nqntnqnqmb.", 12 | include_package_data=True, 13 | url='http://github.com/megadose/nqntnqnqmb', 14 | classifiers=[ 15 | "Programming Language :: Python", 16 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 17 | ], 18 | ) 19 | --------------------------------------------------------------------------------