├── api
    ├── __init__.py
    └── routes.py
├── services
    ├── __init__.py
    ├── company_scraper.py
    ├── candidate_scraper.py
    └── scraping_utils.py
├── run.py
├── settings.py
├── requirements.txt
├── LICENSE
├── .gitignore
└── README.md


/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/services/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | import uvicorn
2 | from api.routes import app
3 | 
4 | if __name__ == '__main__':
5 |     uvicorn.run(app, host="127.0.0.1", port=8000, log_level="info")


--------------------------------------------------------------------------------
/settings.py:
--------------------------------------------------------------------------------
1 | from dotenv import load_dotenv
2 | from os import getenv
3 | 
4 | load_dotenv(override=True)
5 | 
6 | LINKEDIN_ACCEESS_TOKEN = getenv('LINKEDIN_ACCEESS_TOKEN')
7 | LINKEDIN_ACCEESS_TOKEN_EXP = getenv('LINKEDIN_ACCEESS_TOKEN_EXP')
8 | HEADLESS = getenv('HEADLESS')


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | annotated-types==0.6.0
 2 | anyio==4.3.0
 3 | attrs==23.2.0
 4 | certifi==2024.2.2
 5 | cffi==1.16.0
 6 | charset-normalizer==3.3.2
 7 | click==8.1.7
 8 | colorama==0.4.6
 9 | exceptiongroup==1.2.0
10 | fastapi==0.110.0
11 | h11==0.14.0
12 | idna==3.6
13 | outcome==1.3.0.post0
14 | packaging==24.0
15 | pycparser==2.21
16 | pydantic==2.6.4
17 | pydantic_core==2.16.3
18 | PySocks==1.7.1
19 | python-dotenv==1.0.1
20 | requests==2.31.0
21 | selenium==4.18.1
22 | sniffio==1.3.1
23 | sortedcontainers==2.4.0
24 | starlette==0.36.3
25 | trio==0.24.0
26 | trio-websocket==0.11.1
27 | typing_extensions==4.10.0
28 | urllib3==2.2.1
29 | uvicorn==0.28.0
30 | webdriver-manager==4.0.1
31 | wsproto==1.2.0
32 | 


--------------------------------------------------------------------------------
/api/routes.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI, HTTPException
 2 | 
 3 | from services.candidate_scraper import scrape_linkedin_profile
 4 | from services.company_scraper import scrape_linkedin_company
 5 | 
 6 | app = FastAPI()
 7 | 
 8 | @app.get("/profile-data/{linkedin_id}")
 9 | async def profile_data(linkedin_id: str):
10 |     try:
11 |         profile_infos = scrape_linkedin_profile(linkedin_id)
12 |         return profile_infos
13 |     except Exception as e:
14 |         raise HTTPException(status_code=500, detail="Error fetching profile details")
15 | 
16 | @app.get("/comapny-data/{linkedin_id}")
17 | async def comapny_data(linkedin_id: str):
18 |     try:
19 |         profile_infos = scrape_linkedin_company(linkedin_id)
20 |         return profile_infos
21 |     except Exception as e:
22 |         raise HTTPException(status_code=500, detail="Error fetching company details")


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Driss Briksine
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 
91 | tests


--------------------------------------------------------------------------------
/services/company_scraper.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | from time import sleep
 3 | from services.scraping_utils import options, service, search_for_company_name, search_for_company_industry, search_for_company_about, add_session_cookie
 4 | 
 5 | 
 6 | def scrape_linkedin_company(linkedin_id):
 7 |     """Scraping linkedIn company data"""
 8 |     try:
 9 |         # Setup Selenium WebDriver
10 |         driver = webdriver.Chrome(service=service,options=options)
11 | 
12 |         # Load cookies from the file
13 |         add_session_cookie(driver)
14 | 
15 |         print(f'Scraping data for company id: {linkedin_id}')
16 | 
17 |         # LinkedIn URL for the company
18 |         company_url = f"https://www.linkedin.com/company/{linkedin_id}/"
19 | 
20 |         # Navigate to the LinkedIn company
21 |         driver.get(company_url)
22 | 
23 |         if "/unavailable" in driver.current_url or "Page not found" in driver.page_source:
24 |             driver.quit()
25 |             print(f"Company profile for {linkedin_id} not found (404)")
26 |             return {"error": f"Company profile for {linkedin_id} not found."}
27 |         
28 |         sleep(1)
29 | 
30 |         # Scrape name,about form the LinkedIn company
31 |         try:
32 |             name = search_for_company_name(driver)
33 |             if not name:
34 |                 driver.quit()
35 |                 print("scraping failed due to session token not setup or expired")
36 |                 return {"error": "Your Linkedin session token is not set up correctly or has expired"}
37 |             industry = search_for_company_industry(driver)
38 |             about = search_for_company_about(driver)
39 |         except Exception as e:
40 |             print(f"Error scraping details for company {linkedin_id} : {e}")
41 |             return {"error": f"Error searching for details for company {linkedin_id}"}
42 | 
43 |         driver.quit()
44 | 
45 |         print(f"finished feching details for company {linkedin_id} successfully")
46 |         return {
47 |             "linkedin_id": linkedin_id,
48 |             "name": name,
49 |             "industry": industry,
50 |             "about": about,
51 |         }
52 |     except Exception as e:
53 |         print(f"Error feching details for comapny {linkedin_id} : {e}")
54 |         return {"error": f"Error feching company details for {linkedin_id}"}


--------------------------------------------------------------------------------
/services/candidate_scraper.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | from time import sleep
 3 | from services.scraping_utils import options, service, search_for_candidate_name, search_for_candidate_headline, search_for_section, add_session_cookie
 4 | 
 5 | 
 6 | def scrape_linkedin_profile(linkedin_id):
 7 |     """Scraping linkedIn profile data"""
 8 |     try:
 9 |         # Setup Selenium WebDriver
10 |         driver = webdriver.Chrome(service=service,options=options)    
11 | 
12 |         # Load cookies from the file
13 |         add_session_cookie(driver)
14 | 
15 |         print(f'Scraping data for id: {linkedin_id}')
16 | 
17 |         # LinkedIn URL for the profile
18 |         profile_url = f"https://www.linkedin.com/in/{linkedin_id}/"
19 | 
20 |         # Navigate to the LinkedIn profile
21 |         driver.get(profile_url)
22 | 
23 |         if "/404" in driver.current_url or "Page not found" in driver.page_source:
24 |             driver.quit()
25 |             print(f"Profile for {linkedin_id} not found (404)")
26 |             return {"error": f"Profile for {linkedin_id} not found."}
27 | 
28 |         sleep(1)
29 | 
30 |         # Scrape name,experinces,education form the LinkedIn profile
31 |         try:
32 |             name = search_for_candidate_name(driver)
33 |             if not name:
34 |                 driver.quit()
35 |                 print("scraping failed due to session token not setup or expired")
36 |                 return {"error": "Your Linkedin session token is not set up correctly or has expired"}
37 |             headline = search_for_candidate_headline(driver)
38 |             education = search_for_section(driver,"Education")
39 |             experience = search_for_section(driver,"Experience")
40 |         except Exception as e:
41 |             print(f"Error scraping details for {linkedin_id} : {e}")
42 |             return {"error": f"Error searching for details for {linkedin_id}"}
43 |     
44 |         driver.quit()
45 | 
46 |         print(f"finished feching details for profile {linkedin_id} successfully")
47 |         return {
48 |             "linkedin_id": linkedin_id,
49 |             "name": name,
50 |             "headline": headline,
51 |             "education": education,
52 |             "experience": experience,
53 |         }
54 |     
55 |     except Exception as e:
56 |         print(f"Error feching details for {linkedin_id} : {e}")
57 |         return {"error": f"Error feching profile details for {linkedin_id}"}


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LinkedIn Scraper RESTful API
  2 | 
  3 | ## Overview
  4 | 
  5 | This project is a LinkedIn Scraper built using Selenium for web scraping and FastAPI for serving the scraped data via RESTful APIs. It allows users to fetch detailed profile and company information from LinkedIn based on the provided LinkedIn ID.
  6 | 
  7 | ## Installation
  8 | 
  9 | ### Prerequisites
 10 | 
 11 | - Python 3.8+
 12 | - pip
 13 | - git
 14 | 
 15 | ### Setup
 16 | 
 17 | 1. **Clone the Repository**
 18 |    - Clone this repository to your local machine using `git clone https://github.com/drissbri/linkedin-scraper`.
 19 | 
 20 | 2. **Create and Activate Virtual Environment**
 21 |    - Navigate to the project directory.
 22 |    - Create a virtual environment by running `python -m venv venv`.
 23 |    - Activate the virtual environment:
 24 |      - On Windows, run `venv\Scripts\activate`.
 25 |      - On macOS/Linux, run `source venv/bin/activate`.
 26 | 
 27 | 3. **Install Dependencies**
 28 |    - Install the required packages by running `pip install -r requirements.txt`.
 29 | 
 30 | ## Configuration
 31 | 
 32 | **Environment Variables**
 33 | 
 34 |    - Create a `.env` file in the root of the project.
 35 |    - Add the following environment variables:
 36 |      ```
 37 |      LINKEDIN_ACCESS_TOKEN="YourLinkedInAccessToken"
 38 |      LINKEDIN_ACCESS_TOKEN_EXP=AccessTokenExpiration
 39 |      HEADLESS=True
 40 |      ```
 41 |    - Replace `"YourLinkedInAccessToken"` and `"AccessTokenExpiration"` with your actual LinkedIn access token and its expiration time.
 42 |    - Change  `"HEADLESS"` to False if you dont want the browser to open in headless mode.
 43 | 
 44 | ### Obtaining LinkedIn Access Token with Cookie-Editor
 45 | 
 46 | To fetch data using the LinkedIn Scraper, you'll need a LinkedIn access token. The following steps will guide you through obtaining this token using the Cookie-Editor browser addon:
 47 | 
 48 | #### Step 1: Install Cookie-Editor
 49 | 
 50 | - **Install Cookie-Editor**: Add the [Cookie-Editor](https://cookie-editor.com/) addon to your browser. It's available for [Chrome](https://chromewebstore.google.com/detail/cookie-editor/hlkenndednhfkekhgcdicdfddnkalmdm), [Firefox](https://addons.mozilla.org/en-US/firefox/addon/cookie-editor), and other popular browsers. Visit the addon store for your browser and search for "Cookie-Editor" to install.
 51 | 
 52 | #### Step 2: Access LinkedIn
 53 | 
 54 | - **Log into LinkedIn**: Open your browser and log into your LinkedIn account as you normally would.
 55 | 
 56 | #### Step 3: Open Cookie-Editor
 57 | 
 58 | - **Launch Cookie-Editor**: Once logged in, click on the Cookie-Editor icon in your browser's toolbar to open the addon.
 59 | 
 60 | #### Step 4: Find the Access Token
 61 | 
 62 | - **Search for Access Token**: In the Cookie-Editor interface, look for a cookie named `li_at` or similar. This cookie contains your LinkedIn access token.
 63 | ![LinkedIn Cookie-Editor](https://i.imgur.com/JzdNF3n.png "Linkedin access token")
 64 | - **Copy the Token**: Click on the `li_at` cookie to view its details, and copy the value. This is your LinkedIn access token.
 65 | - **Copy the expiration time**: Convert the Expiration time into numeric format (timestamp) like `1743212774.0` , and copy the value. This is your LinkedIn access token expiration time.
 66 | 
 67 | 
 68 | ### Notes
 69 | - **Token Validity**: LinkedIn access tokens are temporary. Ensure to check the token's validity periodically and update it as needed.
 70 | - **Privacy and Security**: Handle your access token securely as it grants access to your LinkedIn data. Do not share your token publicly.
 71 | 
 72 | > [!IMPORTANT]
 73 | > **If you continue to encounter issues with the access tokens being undefined or expired even after following these steps, please replace the values directly in the `settings.py` file.**
 74 | 
 75 | ## Usage
 76 | 
 77 | To start the server, run the following command in the root directory of the project:
 78 | 
 79 | ```shell
 80 | python run.py
 81 | ```
 82 | 
 83 | This command will start the Uvicorn server and make the API accessible on `http://localhost:8000` by default.
 84 | 
 85 | ## API Endpoints
 86 | 
 87 | The LinkedIn Scraper offers endpoints for retrieving detailed profile and company information from LinkedIn. Here's what you can expect from each:
 88 | 
 89 | ### Profile Data
 90 | 
 91 | - **GET** `/profile-data/{linkedin_id}`
 92 |   - Fetches profile information for the specified LinkedIn ID.
 93 |   - **Path Parameters:**
 94 |     - `linkedin_id`: The unique LinkedIn ID of the profile.
 95 |   - **Response:** JSON object containing the profile information as per the provided format.
 96 |   - **Response Format:**
 97 |     ```json
 98 |     {
 99 |       "linkedin_id": "string",
100 |       "name": "string",
101 |       "headline": "string",
102 |       "education": {
103 |         "positions": ["string"],
104 |         "institutions": ["string"],
105 |         "dates": ["string"]
106 |       },
107 |       "experience": {
108 |         "positions": ["string"],
109 |         "institutions": ["string"],
110 |         "dates": ["string"]
111 |       }
112 |     }
113 |     ```
114 |   - The `education` and `experience` fields are objects that include arrays of strings for positions, institutions, and dates, providing a concise summary of the individual's educational background and work history.
115 | 
116 | ### Company Data
117 | 
118 | - **GET** `/company-data/{linkedin_id}`
119 |   - Fetches company information based on the given LinkedIn ID.
120 |   - **Path Parameters:**
121 |     - `linkedin_id`: The unique LinkedIn ID of the company.
122 |   - **Response:** JSON object containing the company information as structured below.
123 |   - **Response Format:**
124 |     ```json
125 |     {
126 |       "linkedin_id": "string",
127 |       "name": "string",
128 |       "industry": "string",
129 |       "about": "string"
130 |     }
131 |     ```
132 |   - This format outlines the company's LinkedIn ID, name, industry sector, and a brief description of the company in the `about` field.
133 | 
134 | ## Error Handling
135 | 
136 | Error handling is consistent across endpoints, aiming to provide meaningful feedback in case of failures:
137 | 
138 | - **Example Error Response:**
139 | 
140 |   ```json
141 |   {
142 |     "detail": "Error fetching profile details"
143 |   }
144 |   ```
145 |   This response is returned with an HTTP status code of 500, indicating a server-side error during data fetching, along with a detail message explaining the error.
146 | 
147 | ## Notes
148 | 
149 | - Ensure that the LinkedIn access token is valid and not expired to avoid authentication errors.
150 | 
151 | ## Project Structure
152 | 
153 | - `api/routes.py` - Contains api routes.
154 | - `services/candidate_scraper.py` - contians the scraping function for individuals profiles.
155 | - `services/company_scraper.py` - contians the scraping function for companies profiles.
156 | - `services/scraping_utils.py` - contians the functions and options used in the scraping process for both profiles and companies.
157 | 
158 | ## Contributing
159 | 
160 | I welcome contributions from the community and I'm excited to see how you can improve and extend this LinkedIn Scraper project! If you're looking to contribute, here are a few ways you can do so:
161 | 
162 | ### Reporting Bugs
163 | 
164 | - **Submit an Issue**: If you find a bug or encounter an issue, please create an issue on our GitHub repository. Provide as much detail as possible, including steps to reproduce the issue, the expected outcome, and the actual outcome.
165 | 
166 | ### Feature Requests
167 | 
168 | - **Request a Feature**: Have an idea for a new feature or an enhancement to existing functionality? Submit a feature request through our issue tracker. Describe the feature, its potential benefits, and how it might work.
169 | 
170 | ### Submitting Changes
171 | 
172 | - **Fork the Repository**: Start by forking the repository to your GitHub account.
173 | - **Create a Branch**: Create a new branch for your changes. Use a clear and descriptive name for your branch, such as `fix-issue-1` or `add-new-feature`.
174 | - **Make Your Changes**: Implement your changes, adhering to the existing coding style as much as possible.
175 | - **Write Tests**: If you're adding new functionality or fixing a bug, please add tests to cover your changes.
176 | - **Document Your Changes**: Update the README or documentation with any necessary changes due to your contribution.
177 | - **Submit a Pull Request (PR)**: Once your changes are complete, submit a pull request to the main branch of the original repository. Include a clear description of your changes and any other relevant information.
178 | 
179 | ### Code Review Process
180 | 
181 | - **Review & Feedback**: After submitting a PR, the project maintainers will review your changes. Be open to feedback and ready to make adjustments as needed.
182 | - **Approval & Merge**: If your contribution is approved, the project maintainers will merge your changes into the main branch.
183 | 
184 | ### General Guidelines
185 | 
186 | - Ensure your code follows the project's coding conventions and best practices.
187 | - Keep your commits small and focused; it makes the review process easier.
188 | - Update any documentation that your changes might affect.
189 | 
190 | I appreciate your interest in contributing to the LinkedIn Scraper project! By participating, you agree to abide by the code of conduct and collaboration guidelines. Let's build something great together!
191 | 


--------------------------------------------------------------------------------
/services/scraping_utils.py:
--------------------------------------------------------------------------------
  1 | from selenium.webdriver.common.by import By
  2 | from selenium.webdriver.chrome.options import Options
  3 | from selenium.common.exceptions import NoSuchElementException
  4 | from selenium.webdriver.chrome.service import Service
  5 | from webdriver_manager.chrome import ChromeDriverManager
  6 | 
  7 | from settings import LINKEDIN_ACCEESS_TOKEN, LINKEDIN_ACCEESS_TOKEN_EXP, HEADLESS
  8 | 
  9 | # Setting up the options
 10 | options = Options()
 11 | if not HEADLESS=="False":
 12 |     options.add_argument("--headless=new")
 13 | options.add_argument('--ignore-ssl-errors=yes')
 14 | options.add_argument('--ignore-certificate-errors=yes')
 15 | options.add_argument("--log-level=3")
 16 | 
 17 | # Setting up service
 18 | service = Service(ChromeDriverManager().install(), log_output='nul')
 19 | 
 20 | def find_by_xpath_or_None(driver, *xpaths):
 21 |     """returns the text inside and elemnt by its xPath"""
 22 |     for xpath in xpaths:
 23 |         try:
 24 |             return driver.find_element(By.XPATH, xpath).text
 25 |         except NoSuchElementException:
 26 |             #print(f"Element not found : {xpath}")
 27 |             continue
 28 |     return None
 29 | 
 30 | 
 31 | def search_for_candidate_name(driver):
 32 |     """search for profile's name in the page"""
 33 |     try:
 34 |         name = find_by_xpath_or_None(driver, '/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[1]/div[2]/div[2]/div[1]/div[1]/span/a/h1','/html/body/div[4]/div[3]/div/div/div[2]/div/div/main/section[1]/div[2]/div[2]/div[1]/div[1]/span/a/h1')
 35 |         return name
 36 |     except Exception as e:
 37 |         print(f"Error finding name: {e}")
 38 |     return None
 39 | 
 40 | 
 41 | def search_for_candidate_headline(driver):
 42 |     """search for profile's headline in the page"""
 43 |     try:
 44 |         headline = find_by_xpath_or_None(driver, '/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[1]/div[2]/div[2]/div[1]/div[2]','/html/body/div[4]/div[3]/div/div/div[2]/div/div/main/section[1]/div[2]/div[2]/div[1]/div[2]')
 45 |         return headline
 46 |     except Exception as e:
 47 |         print(f"Error finding headline: {e}")
 48 |     return None
 49 | 
 50 | 
 51 | def search_for_section(driver,section_name,min_index=2,max_index=8) :
 52 |     """search for a section's content by section name in the page"""
 53 |     try:
 54 |         # Initialize variables
 55 |         sectionIndex = min_index
 56 |         found_elements = {
 57 |             'positions': [],
 58 |             'institutions': [],
 59 |             'dates': []
 60 |         }
 61 | 
 62 |         # Function to add found elements to the dictionary
 63 |         def add_elements(position, institution, date):
 64 |             if position: found_elements['positions'].append(position)
 65 |             if institution: found_elements['institutions'].append(institution)
 66 |             if date: found_elements['dates'].append(date)
 67 | 
 68 |         # Loop through sections until "section_title" section is found
 69 |         while sectionIndex <= max_index :
 70 |             # Check if the section title matches "section_name"
 71 |             section_title = find_by_xpath_or_None(driver, f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[2]/div/div/div/h2/span[1]')
 72 |             if section_title == section_name:
 73 |                 # Experience
 74 |                 elementIndex = 1
 75 |                 if section_name == "Experience" :
 76 |                     while True:
 77 |                         target_element_position = find_by_xpath_or_None(driver, f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div[1]/div/div/div/div/div/span[1]',f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div[2]/ul/li[1]/div/div[2]/div/a/div/div/div/div/span[1]',f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div[2]/ul/li[1]/div/div[2]/div/a/div/div/div/div/div/span[1]',f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div/div/span[1]/span[1]')
 78 |                         target_element_institution = find_by_xpath_or_None(driver, f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div[1]/div/span[1]/span[1]',f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div[1]/a/div/div/div/div/span[1]',f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div[1]/a/div/div/div/div/span[1]')
 79 |                         target_element_date = find_by_xpath_or_None(driver, f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div[1]/div/span[2]/span[1]',f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div[1]/a/span[1]/span[1]',f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div/div/span[2]/span[1]')
 80 |                         if not target_element_position:
 81 |                             break
 82 | 
 83 |                         add_elements(target_element_position, target_element_institution, target_element_date)
 84 |                         elementIndex += 1
 85 |                 # Education
 86 |                 if section_name == "Education" :
 87 |                     while True:
 88 |                         target_element_position = find_by_xpath_or_None(driver, f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div/a/span[1]/span[1]',f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div/a/span[1]/span[1]')
 89 |                         target_element_institution = find_by_xpath_or_None(driver, f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div/a/div/div/div/div/span[1]',f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div/a/div/div/div/div/span[1]')
 90 |                         target_element_date = find_by_xpath_or_None(driver, f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div/a/span[2]/span[1]',f'/html/body/div[5]/div[3]/div/div/div[2]/div/div/main/section[{sectionIndex}]/div[3]/ul/li[{elementIndex}]/div/div[2]/div/a/span[2]/span[1]')
 91 | 
 92 |                         if not target_element_position:
 93 |                             break
 94 | 
 95 |                         add_elements(target_element_position, target_element_institution, target_element_date)
 96 |                         elementIndex += 1
 97 |                 break
 98 |             sectionIndex += 1  # Move to the next section
 99 |         
100 |         return found_elements
101 |     except Exception as e:
102 |         print(f"Error finding section :{e}")
103 |         return None
104 | 
105 | 
106 | def search_for_company_name(driver):
107 |     """search for comapny's name in the page"""
108 |     try:
109 |         company_name = find_by_xpath_or_None(driver, '/html/body/div[5]/div[3]/div/div[2]/div/div[2]/main/div[1]/section/div/div[2]/div[2]/div[1]/div[2]/div/h1')
110 |         return company_name
111 |     except Exception as e:
112 |         print(f"Error finding company name: {e}")
113 |     return None
114 | 
115 | 
116 | def search_for_company_industry(driver):
117 |     """search for comapny's industry in the page"""
118 |     try:
119 |         company_industry = find_by_xpath_or_None(driver, '/html/body/div[4]/div[3]/div/div[2]/div/div[2]/main/div[1]/section/div/div[2]/div[2]/div[1]/div[2]/div/div/div[1]', '/html/body/div[5]/div[3]/div/div[2]/div/div[2]/main/div[1]/section/div/div[2]/div[2]/div[1]/div[2]/div/div/div[1]')
120 |         return company_industry
121 |     except Exception as e:
122 |         print(f"Error finding company industry: {e}")
123 |     return None
124 | 
125 | 
126 | def search_for_company_about(driver):
127 |     """search for comapny's name in the page"""
128 |     try:
129 |         more_button = driver.find_element(By.XPATH, '/html/body/div[5]/div[3]/div/div[2]/div/div[2]/main/div[2]/div/div[1]/section/div/div/div[1]/div/span[3]/span/a')
130 |         more_button.click()
131 |         company_about = find_by_xpath_or_None(driver, '/html/body/div[5]/div[3]/div/div[2]/div/div[2]/main/div[2]/div/div[1]/section/div/div/div[1]/div/span[1]')
132 |         return company_about
133 |     except Exception as e:
134 |         print(f"Error finding company about: {e}")
135 |     return None
136 |     
137 | 
138 | def add_session_cookie(driver):
139 |     """load cookies from a file and add it to the driver"""
140 |     cookie = {
141 |         "domain": ".www.linkedin.com",
142 |         "name": "li_at",
143 |         "value": LINKEDIN_ACCEESS_TOKEN,
144 |         "path": "/",
145 |         "secure": True,
146 |         "httpOnly": True,
147 |         "expirationDate":LINKEDIN_ACCEESS_TOKEN_EXP,
148 |     }
149 |     # Add cookies to the driver
150 |     try:
151 |         driver.get("https://www.linkedin.com")
152 |         driver.add_cookie(cookie)
153 |     except Exception as e:
154 |         print(f"Error adding cookies to driver : {e}")
155 |     


--------------------------------------------------------------------------------