├── requirements.txt
├── README.md
└── peepedIn.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | requests>=2.20.0
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # peepedIn
 2 | 
 3 | Peep the LinkedIn profiles of a company's employees.
 4 | 
 5 | There are other ways to do this kind of thing, but this is simple, targeted to what I need it to do, and easy to integrate into other projects.
 6 | 
 7 | Results are limited by the connections of the account used to run the tool. Keep pumping those sock puppets! 
 8 | 
 9 | ## Install
10 | 
11 |     git clone https://github.com/chm0dx/peepedIn.git
12 |     cd peepedIn
13 |     pip install -r requirements.txt
14 |     
15 | ## Use
16 | 
17 |     usage: peepedIn.py [-h] [-j] url email password
18 | 
19 |     Return a list of employee profiles and info from a company's LinkedIn profile URL.
20 | 
21 |     positional arguments:
22 |       url         The LinkedIn profile URL of the company you want to peep
23 |       email       The account email to use for logging into LinkedIn
24 |       password    The account password to use for logging into LinkedIn
25 | 
26 |     optional arguments:
27 |       -h, --help  show this help message and exit
28 |       -j, --json  Output in json format
29 | 
30 |     Example: python3 peepedIn.py url email password
31 |     
32 | ## NOTE
33 | 
34 | LinkedIn doesn't love automated logins. You may be rewarded with a challenge_url before LinkedIn will complete the login. Logging in ahead of time from a browser at the same IP you will run the tool from is a good way to avoid getting hit with the challenge. I'll look to add automated handling of the challenge at some point.
35 | 
36 | ## Credit
37 | 
38 | Thanks to https://github.com/nickls/linkedin-unofficial-api for API documentation
39 | 


--------------------------------------------------------------------------------
/peepedIn.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from argparse import RawTextHelpFormatter
 3 | import requests
 4 | import string
 5 | 
 6 | def scrape(company_url,user,pw):
 7 | 		li_base_url = "https://www.linkedin.com/"
 8 | 		li_login_url = li_base_url + "uas/authenticate"
 9 | 		if company_url.endswith("/"):
10 | 			company_url = company_url[:-1]
11 | 		company_public_id = company_url.split("/")[-1]
12 | 
13 | 		session = requests.Session()
14 | 		session.get(li_login_url)
15 | 		session.headers["User-Agent"] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
16 | 		session.headers["Accept-Language"] = 'en-US,en;q=0.9'
17 | 		session.headers["X-Li-User-Agent"] = "LIAuthLibrary:3.2.4 com.linkedin.LinkedIn:8.8.1 iPhone:8.3"
18 | 		session.headers["X-User-Language"] = "en"
19 | 		session.headers["X-User-Locale"] = "en_US"
20 | 		session.headers["Accept-Language"] = "en-us"
21 | 		session.headers["csrf-token"] = session.cookies["JSESSIONID"].strip('"')
22 | 
23 | 		data = {
24 | 					"session_key": user,
25 | 					"session_password": pw,
26 | 					"JSESSIONID": session.cookies["JSESSIONID"],
27 | 				}
28 | 
29 | 		r = session.post(li_login_url, data=data)
30 | 
31 | 		if r.json()['login_result'] != "PASS":
32 | 				return r.json()
33 | 
34 | 		r = session.get(f"https://www.linkedin.com/voyager/api/organization/companies?decorationId=com.linkedin.voyager.deco.organization.web.WebFullCompanyMain-12&q=universalName&universalName={company_public_id}")
35 | 		company_id = r.json().get("elements")[0].get("entityUrn").split(":")[-1]
36 | 		r = session.get(f"https://www.linkedin.com/voyager/api/search/blended?count=49&filters=List(resultType-%3EPEOPLE,currentCompany-%3E{company_id})&origin=GLOBAL_SEARCH_HEADER&q=all&start=0&queryContext=List(spellCorrectionEnabled-%3Etrue,relatedSearchesEnabled-%3Etrue,kcardTypes-%3EPROFILE%7CCOMPANY)")
37 | 		
38 | 		results = r.json()["elements"][0]["elements"]
39 | 		peeps = []
40 | 
41 | 		for result in results:
42 | 			linkedin = result["image"]["attributes"][0]["miniProfile"]["publicIdentifier"]
43 | 			if "UNKNOWN" in linkedin:
44 | 				continue
45 | 			linkedin = f"https://linkedin.com/in/{linkedin}"
46 | 			first_name = result["image"]["attributes"][0]["miniProfile"]["firstName"]
47 | 			first_name = ''.join([char for char in first_name if char in string.printable])
48 | 			last_name = result["image"]["attributes"][0]["miniProfile"]["lastName"]
49 | 			last_name = ''.join([char for char in last_name if char in string.printable])
50 | 			title = result["image"]["attributes"][0]["miniProfile"]["occupation"]
51 | 			
52 | 			peeps.append({"first":first_name, "last":last_name, "title":title, "linkedin":linkedin})
53 | 			
54 | 		return {"results":peeps}
55 | 
56 | if __name__ == "__main__":
57 |     parser = argparse.ArgumentParser(
58 |         description = "Return a list of employee profiles and info from a company's LinkedIn profile URL.",
59 |         epilog = '''Example: python3 peepedIn.py company_linkedin_url email password
60 |         ''',
61 |         formatter_class=RawTextHelpFormatter)
62 |     parser.add_argument('url', help="The LinkedIn profile URL of the company you want to peep")
63 |     parser.add_argument('email', help="The account email to use for logging into LinkedIn")
64 |     parser.add_argument('password', help="The account password to use for logging into LinkedIn")
65 |     parser.add_argument('-j','--json', help="Output in json format", action='store_true')
66 |     args = parser.parse_args()
67 | 
68 |     results = scrape(args.url,args.email,args.password)
69 |     if args.json:
70 |         print(results)
71 |     else:
72 |         if results.get("results"):
73 |             for result in results.get("results"):
74 |                 print(f'{result["first"]} {result["last"]},{result["title"]},{result["linkedin"]}')
75 |         else:
76 |             print(results)
77 |             if results.get("challenge_url"):
78 |                 print("NOTE: You got a challenge. Use a browser to login into LinkedIn from the same IP you are running the script from and try again.")


--------------------------------------------------------------------------------