├── .devcontainer └── devcontainer.json ├── Internship_extractor ├── agents.py ├── final.json ├── main.py ├── requirements.txt └── tasks.py ├── LICENSE.txt ├── README.md ├── internships_dataset.json ├── main.py ├── requirements.txt ├── resume_temp.json └── tools.py /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Python 3", 3 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 4 | "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", 5 | "customizations": { 6 | "codespaces": { 7 | "openFiles": [ 8 | "README.md", 9 | "main.py" 10 | ] 11 | }, 12 | "vscode": { 13 | "settings": {}, 14 | "extensions": [ 15 | "ms-python.python", 16 | "ms-python.vscode-pylance" 17 | ] 18 | } 19 | }, 20 | "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y str: 11 | # try: 12 | # # Implementation to read CSV file row by row with explicit encoding 13 | # with open(csv_file_path, 'r', encoding='utf-8') as file: 14 | # for line in file: 15 | # # Process each row as needed 16 | # print(line) 17 | # return "CSV file read successfully" 18 | # except UnicodeDecodeError as e: 19 | # return f"Error reading CSV file: {e}" 20 | 21 | # Define tools 22 | web_search_tool = WebsiteSearchTool() 23 | file_read_tool = FileReadTool( 24 | file_path='input.json', 25 | description='A tool to read the internship job description file.' 26 | ) 27 | # csv_reader_tool = CSVReaderTool(file_path='internships.csv') 28 | class Agents: 29 | def research_agent(self): 30 | return Agent( 31 | role='Research Analyst', 32 | goal='Analyze the internship details and provided descriptions to extract a complete summary of the company job/internship posting.', 33 | tools=[web_search_tool, file_read_tool], 34 | backstory='Expert in analyzing internship descriptions and identifying key values and needs from various sources.', 35 | verbose=True 36 | ) 37 | 38 | def writer_agent(self): 39 | return Agent( 40 | role='Job Description Writer', 41 | goal='Use insights from the Research Analyst to create a detailed, engaging, and enticing internship posting.', 42 | tools=[web_search_tool, file_read_tool], 43 | backstory="Skilled in crafting compelling internship descriptions that attract the right candidates.", 44 | verbose=True 45 | ) 46 | 47 | -------------------------------------------------------------------------------- /Internship_extractor/final.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "company": "", 4 | "apply_link": "", 5 | "date_published": "", 6 | "country": "", 7 | "city": "", 8 | "skills": [], 9 | "degree": "", 10 | "field": [], 11 | "experience": [], 12 | "summary": "" 13 | } 14 | -------------------------------------------------------------------------------- /Internship_extractor/main.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from dotenv import load_dotenv 3 | from crewai import Crew 4 | from tasks import Tasks 5 | from agents import Agents 6 | import json 7 | import pandas as pd 8 | import time 9 | import os 10 | 11 | load_dotenv() 12 | tasks = Tasks() 13 | agents = Agents() 14 | 15 | csv_file = 'internships.csv' 16 | df = pd.read_csv(csv_file) 17 | 18 | final_output_file = 'internships_dataset.json' # Specify the final output JSON file 19 | 20 | # Create or open the final output file in append mode 21 | with open(final_output_file, 'a') as final_output: 22 | for index, row in df.iterrows(): 23 | company_data = row.to_dict() 24 | company_json = json.dumps(company_data) 25 | 26 | output_json_file = f'output_{index}.json' # Specify the output JSON file path based on index or other criteria 27 | 28 | with open(output_json_file, 'w') as json_file: 29 | json.dump(company_data, json_file) 30 | 31 | research_agent = agents.research_agent() 32 | review = tasks.extract_info(research_agent, company_json,output_json_file) 33 | 34 | crew = Crew( 35 | agents=[research_agent], 36 | tasks=[review] 37 | ) 38 | 39 | # Kick off the process 40 | result = crew.kickoff() 41 | 42 | # Append the content of the output file to the final output file 43 | with open(output_json_file, 'r') as output_file: 44 | final_output.write(output_file.read() + ',\n') 45 | 46 | # Delete the last output file 47 | os.remove(output_json_file) 48 | 49 | time.sleep(5) 50 | -------------------------------------------------------------------------------- /Internship_extractor/requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv==1.0.1 2 | crewai==0.14.3 3 | -------------------------------------------------------------------------------- /Internship_extractor/tasks.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | from crewai import Task 3 | 4 | class Tasks(): 5 | def extract_info(self, agent, json_file, output_file): 6 | return Task( 7 | description=dedent(f"""\ 8 | Extracts key information of each company in order such as company,apply_link,date_published,location,country,city,state,roles,skills,eligibility,degree,field,experience,summary,etc from the internships.csv file {json_file} resumes and make a overall summary of the job posting . 9 | Please also include most important skill(not basic skills) needed for the job .and give summary a little longer. Also don't write [Here] in apply_link. 10 | In experience field just give important experience need for the job and don't make it longer 11 | like """), 12 | expected_output=dedent("""\ 13 | Give output like strictly like this json file like { 14 | "name": "", 15 | "company": "", 16 | "apply_link": "", 17 | "date_published": "", 18 | "country": "", 19 | "city": "", 20 | "skills": [], 21 | "degree": "", 22 | "field": [], 23 | "experience": [], 24 | "summary": "" 25 | } 26 | Please give summary a little longer of about 60 words"""), 27 | agent=agent, 28 | output_file=output_file # Pass the output file path to the Task 29 | ) -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Muratcan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🔍Internship Finder 2 | 3 | ## 📝 Project Overview 4 | 5 | JeezAI Internship Finder is an AI-powered web application that connects students with the most compatible internship opportunities. It leverages advanced natural language processing techniques and the power of DSPy framework by Stanford NLP (Omar Khattab, Herumb Shandilya, Arnav Singhvi) to analyze resumes, generate search queries, and provide detailed match analyses between students' credentials and internship requirements. 6 | 7 | ![image](https://github.com/JeezAI/DSPy_matchmaking/assets/114735073/50934dc0-0b03-4fc2-946d-21794d1a489a) 8 | 9 | 10 | ## 🔑 Key Features 11 | 12 | - **Resume Parsing:** Automated extraction of pertinent information from resumes. 13 | - **Keyword-Based Search Queries:** Generates dynamic queries to find the best internships. 14 | - **Match Analysis:** Compares student profiles with internship requirements for precise matches. 15 | - **Scalable Data Storage:** Uses Weaviate, a vector database for efficient data storage and retrieval. 16 | - **Streamlit Web Interface:** Provides an easy-to-navigate interface for users to upload resumes and explore internship opportunities. 17 | 18 | ## 🖥️ Technical Architecture 19 | 20 | The Internship Finder is built with a stack that includes DSPy for structured AI programming, Cohere for text processing, and Streamlit for the front-end, encapsulated within a modular architectural framework. 21 | 22 | ### Core Technologies 23 | 24 | - **DSPy:** A framework for declarative structured programming in AI. It enables the modularization of NLP tasks into reusable components, enhancing both development efficiency and system scalability. DSPy allows defining declarative language model calls that get compiled into self-improving pipelines. This shifts the focus from manual prompt engineering to treating LMs as devices. It introduces concepts like natural language type signatures, modules, and optimizers to specify transformations, encapsulate prompting techniques, and update LM weights to achieve target metrics. DSPy can be used to build complex pipelines, like multi-hop question answering systems that break down questions, retrieve relevant passages, and synthesize answers. The DSPy compiler enables systematic optimization of prompts by running inputs through the pipeline, analyzing traces, and treating prompt engineering as a discrete AI optimization problem. 25 | 26 | ![image](https://github.com/JeezAI/DSPy_matchmaking/assets/114735073/59ebbbdb-d382-4422-a530-361b112b8eb5) 27 | 28 | 29 | Some exciting applications mentioned include: 30 | - Generating structured outputs with type predictors, JSON templates, and retry logic 31 | - Building self-correcting pipelines with DSPy assertions and custom guard rails 32 | - Optimizing Chain-of-Thought reasoning for complex question answering 33 | - Integrating with retrieval systems and APIs for grounded language modeling 34 | 35 | You can learn more about the DSPy framework by visiting Stanford NLP GitHub Repo here [https://github.com/stanfordnlp/dspy]. 36 | 37 | Overall, DSPy provides an impressive framework for building reliable, optimized and complex language model applications in a more automated and scalable way compared to manual prompt engineering. We're excited to try it out for our mathmaking projects. 38 | 39 | We also use; 40 | - **Cohere:** Utilized for its newest and most powerful model, Commard R+ in generating search queries and performing deep linguistic analyses. [https://docs.cohere.com/docs/command-r-plus] 41 | - **Weaviate:** Chosen for its vector search capabilities, allowing quick retrieval of internship opportunities from large datasets.We used Weaviate Hybrid search to combine multiple search algorithms to improve the accuracy and relevance of search results. This hybrid query involves both keyword matching and semantic vector search to return the most relevant internship opportunities from the database. [https://weaviate.io/] 42 | - **Streamlit:** Facilitates rapid development of interactive web apps, used here to craft the user interface. 43 | 44 | 45 | ### 📚 Directory Structure 46 | 47 | - `main.py` - Orchestrates the user interaction and integrates various modules. 48 | - `tools.py` - Contains utility functions and custom methods for data processing. 49 | - `resume_temp.json` - Template for standardizing resume data format. 50 | 51 | ## 🤖 DSPy Integration 52 | 53 | The Internship Finder leverages the power of DSPy to create reusable and modular components for various AI tasks. 54 | 55 | DSPy (Declarative Structured Programming for AI) is instrumental in building the core functionality of the Internship Finder application. As a framework, DSPy facilitates the creation of modular and reusable components for natural language processing tasks. The implementation in our application is outlined as follows: 56 | 57 | ### Initialization and Configuration 58 | - DSPy modules `dspy` and `dsp`, along with `WeaviateRM` (Weaviate Retrieval Model), are imported for foundational setup. 59 | - The connection to the Weaviate database is established using configurations like cluster URL, API key, and timeouts. 60 | - DSPy settings are configured to integrate the language model (Cohere) and the retrieval model (WeaviateRM) for smooth operations. 61 | 62 | ### Defining the Internship Finder Module 63 | - A custom DSPy module, `Internship_finder`, derived from `dspy.Module`, encapsulates the logic for internship matching. 64 | - Within this module: 65 | - `generate_query`: Employs `dspy.ChainOfThought` to instantiate the signature for dynamic query generation. 66 | - `generate_analysis`: Utilizes `dspy.Predict` for conducting a thorough match analysis. 67 | 68 | ### Defining Signatures 69 | - Two DSPy signatures are crafted: 70 | - `generate_analysis`: Accepts internship context and resume to output structured matches in JSON format. 71 | - `generate_query`: Analyzes resumes to produce a targeted search query for the Weaviate database. 72 | 73 | ### Internship Matching Process 74 | - The module's `forward` method orchestrates the matching process: 75 | - Executes query generation through the `generate_query` signature, iterating as needed. 76 | - Performs a database search for each query, collating results into a list of passages. 77 | - Deduplicates the list to remove any repeated internship listings. 78 | - Carries out a detailed match analysis using the `generate_analysis` signature. 79 | - The output is a comprehensive analysis detailing the matched internships. 80 | 81 | By adopting DSPy, the Internship Finder application benefits from a structured, maintainable, and extensible framework. It demonstrates the efficient utilization of modules and signatures, streamlining the integration with various models and databases. 82 | 83 | ## 📊Data Extraction with Crew AI LLM Agents 84 | 85 | Crew AI enhances our data extraction capabilities by automatically pulling structured insights from unstructured internship descriptions. This enriched data supports improved matching accuracy and created JSON file for each database records. 86 | 87 | ## 🚀 Getting Started 88 | 89 | ### Installation 90 | 91 | **Clone the repository:** 92 | 93 | ```bash 94 | git clone https://github.com/JeezAI/DSPy_matchmaking.git 95 | ``` 96 | 97 | # Environment Setup 98 | 99 | Set environment variables for API keys: 100 | 101 | ```bash 102 | export CO_API_KEY="Your_Cohere_API_Key" 103 | export WCS_API_KEY="Your_Weaviate_API_Key" 104 | export OPENAI_API_KEY="Your_OpenAI_API_Key" 105 | ``` 106 | 107 | Running the Application 108 | Launch the application with Streamlit: 109 | 110 | ```bash 111 | streamlit run main.py 112 | ``` 113 | Visit http://localhost:8501 in your web browser to interact with the application. 114 | 115 | 116 | ### Future Enhancements 117 | - Data Source Expansion: Link to more databases for a broader internship selection. 118 | - Personalized Recommendations: Adapt search results based on individual career aspirations and feedback. 119 | - Interactive User Feedback: Use collaborative filtering to refine matching algorithms based on user interactions. 120 | - Real-time Notifications: Implement a system to notify users of new opportunities. 121 | - Integrate with LinkedIn Analyzer and Career Roadmap Planner [https://github.com/JeezAI/careerbuilder_Linkedin2CareerRoadmap] 122 | 123 | ### 📝Conclusion 124 | The Internship Finder exemplifies the powerful combination of DSPy for structured AI development and Cohere for sophisticated text analysis, providing a robust solution for internship matching. This platform not only streamlines the search process but also offers a scalable framework for future enhancements. 125 | 126 | ### 🤝Contribution Guidelines 127 | We welcome contributions from the community. Please read our contribution guidelines to learn how you can help improve the Internship Finder. 128 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import dspy 2 | from dspy import dsp 3 | import os 4 | from dspy.retrieve.weaviate_rm import WeaviateRM 5 | from weaviate.classes.init import AdditionalConfig, Timeout 6 | import weaviate 7 | import json 8 | import streamlit as st 9 | from typing import Optional, List 10 | from datetime import datetime 11 | from pydantic import BaseModel, Field, HttpUrl 12 | from tools import check_json, company_url, resume_into_json 13 | import nltk 14 | from PyPDF2 import PdfReader 15 | import cohere 16 | 17 | co_api_key = os.getenv("CO_API_KEY") 18 | nltk.download('punkt') 19 | 20 | # Weaviate client configuration 21 | url = "https://internship-finder-52en6hka.weaviate.network" 22 | apikey = os.getenv("WCS_API_KEY") 23 | openai_api_key = os.getenv("OPENAI_API_KEY") 24 | 25 | # Connect to Weaviate 26 | weaviate_client = weaviate.connect_to_wcs( 27 | cluster_url=url, 28 | auth_credentials=weaviate.auth.AuthApiKey(apikey), 29 | headers={ 30 | "X-OpenAI-Api-Key": openai_api_key 31 | },additional_config=AdditionalConfig( 32 | timeout=Timeout(init=2, query=45, insert=120) # Values in seconds 33 | ) 34 | 35 | ) 36 | 37 | cohere = dsp.Cohere(model='command-r-plus',api_key=co_api_key) 38 | 39 | retriever_model = WeaviateRM("Internship", weaviate_client=weaviate_client) 40 | 41 | dspy.settings.configure(lm=cohere,rm=retriever_model) 42 | # Weaviate client configuration 43 | st.title("Internship Finder") 44 | my_bar = st.progress(0) 45 | 46 | class JobListing(BaseModel): 47 | city: str 48 | date_published: datetime # Assuming the date can be parsed into a datetime object 49 | apply_link: HttpUrl # Pydantic will validate this is a valid URL 50 | company: str 51 | location: Optional[str] # Assuming 'location' could be a string or None 52 | country: str 53 | name: str 54 | 55 | class Out_Internship(BaseModel): 56 | output: list[JobListing] = Field(description="list of internships") 57 | 58 | def search_datbase(query): 59 | url = "https://internship-finder-52en6hka.weaviate.network" 60 | apikey = os.getenv("WCS_API_KEY") 61 | openai_api_key = os.getenv("OPENAI_API_KEY") 62 | 63 | # Connect to Weaviate 64 | weaviate_client = weaviate.connect_to_wcs( 65 | cluster_url=url, 66 | auth_credentials=weaviate.auth.AuthApiKey(apikey), 67 | headers={ 68 | "X-OpenAI-Api-Key": openai_api_key 69 | } 70 | 71 | ) 72 | questions = weaviate_client.collections.get("Internship") 73 | 74 | response = questions.query.hybrid( 75 | query=query, 76 | limit=10 77 | ) 78 | 79 | interns = [] 80 | 81 | # adding internships to list 82 | for item in response.objects: 83 | interns.append(item.properties) 84 | 85 | 86 | context = json.dumps(interns) 87 | weaviate_client.close() 88 | return json.loads(context) 89 | 90 | def check_resume(resume): 91 | if (resume != None): 92 | pdf_reader = PdfReader(resume) 93 | text="" 94 | for page_num in range(len(pdf_reader.pages)): 95 | 96 | page = pdf_reader.pages[page_num] 97 | 98 | # Extract text from the page 99 | text += page.extract_text() 100 | 101 | 102 | tokens = nltk.word_tokenize(text) 103 | 104 | # Check if the total character count of all tokens exceeds the limit 105 | total_length = sum(len(token) for token in tokens) 106 | if total_length >= 16000: 107 | return False # Return False if the total length of tokens exceeds the limit 108 | 109 | tokens_to_check = ["summary", "skills", "experience", "projects", "education"] 110 | 111 | # Convert tokens to lower case for case-insensitive comparison 112 | text_tokens_lower = [token.lower() for token in tokens] 113 | 114 | # Check if any of the specified tokens are in the tokenized text 115 | tokens_found = [token for token in tokens_to_check if token.lower() in text_tokens_lower] 116 | 117 | # Return False if none of the specified tokens were found, True otherwise 118 | return bool(tokens_found) 119 | 120 | 121 | 122 | class Internship_finder(dspy.Module): 123 | cohere = dsp.Cohere(model='command-r-plus',api_key=co_api_key) 124 | 125 | dspy.settings.configure(lm=cohere) 126 | def __init__(self): 127 | super().__init__() 128 | self.generate_query = [dspy.ChainOfThought(generate_query) for _ in range(3)] 129 | self.generate_analysis = dspy.Predict(generate_analysis,max_tokens=4000) 130 | 131 | def forward(self, resume): 132 | #resume to pass as context 133 | 134 | passages = [] 135 | 136 | for hop in range(3): 137 | query = self.generate_query[hop](context=str(resume)).query 138 | info=search_datbase(query) 139 | passages.append(info) 140 | 141 | context = deduplicate(passages) 142 | my_bar.progress(60,text="Doing Analysis") 143 | 144 | analysis = self.generate_analysis(resume=str(resume), context=context).output 145 | 146 | return analysis 147 | 148 | 149 | 150 | def deduplicate(context): 151 | """ 152 | Removes duplicate elements from the context list while preserving the order. 153 | 154 | Parameters: 155 | context (list): List containing context elements. 156 | 157 | Returns: 158 | list: List with duplicates removed. 159 | """ 160 | json_strings = [json.dumps(d, sort_keys=True) for d in context] 161 | 162 | # Use a set to remove duplicate JSON strings 163 | unique_json_strings = set(json_strings) 164 | 165 | # Convert JSON strings back to dictionaries 166 | unique_dicts = [json.loads(s) for s in unique_json_strings] 167 | return unique_dicts 168 | 169 | def check_answer(assessment_answer): 170 | if assessment_answer == "no": 171 | return False 172 | return True 173 | 174 | def get_resume(): 175 | with open('resume.json', 'r') as file: 176 | resume = json.load(file) 177 | 178 | return resume 179 | 180 | 181 | 182 | class generate_analysis(dspy.Signature): 183 | """ 184 | Your Role: 185 | You are a Matchmaking Manager, an expert at connecting students with their ideal internship opportunities. 186 | 187 | Input: 188 | You will be provided with a student's resume and a list of potential internship opportunities. Your task is to carefully analyze and match the student's credentials with the requirements of each internship, following the specific criteria outlined below. 189 | Matching Criteria: 190 | 191 | 192 | Educational Background: 193 | Degree Level and Major: Seek exact matches or close alignments between the student's degree level (bachelor's, master's, etc.) and major with the educational requirements specified in the internships. 194 | Related Fields of Study: Consider closely related fields of study as a potential match. For example, a student majoring in Computer Science could be a good fit for internships seeking IT or Software Engineering majors. 195 | Relevant Coursework: Give bonus points to internships that specifically mention or prefer certain courses that the student has completed. For example, if an internship seeks candidates with a background in Data Structures and the student has taken an advanced course in that area, it strengthens the match. 196 | Skill and Experience Match: 197 | Required Skills: Look for strong overlaps between the technical skills listed on the student's resume and the required skills outlined in the internship descriptions. 198 | Tools and Frameworks: Prioritize internships that specifically mention tools, programming languages, or frameworks that the student has hands-on experience with. For example, if an internship seeks proficiency in Python, and the student has worked on Python projects, it is a strong match. 199 | Applied Skills: Value projects or previous work experiences that demonstrate the practical application of the required skills. For instance, if an internship seeks candidates with web development skills, and the student has built and deployed websites, it is a clear indication of a good fit. 200 | Project Relevance: 201 | Project Experience: Analyze the student's project portfolio to identify technical skills and areas of expertise that align with the internships' requirements. 202 | AI/ML and Data Focus: Match internships that specifically seek experience or interest in AI/ML model development, data analysis, or similar areas. Look for keywords like "machine learning," "data engineering," or "data-driven solutions" in the internship descriptions. 203 | Ensure that the internships do not include "research" in their titles, skills, or descriptions. 204 | Practical Implementation: Prioritize internships that emphasize hands-on experience in development, engineering, application development, or implementation roles over theoretical or research-focused roles. 205 | For Match Analysis: do a detailed match analysis for each internship, explaining how resume matches with internship. Provide a brief summary of the match analysis for each internship. 206 | 207 | For Output: 208 | Use the following JSON array format to provide the top-matched internships in a single array: 209 | 210 | [ 211 | { 212 | "name": "Job Title", 213 | "company": "Company Name", 214 | "apply_link": "Application Link", 215 | "match_analysis": "Detailed match analysis here. Explain how the student's background matches the internship requirements, using specific examples." 216 | }, 217 | { 218 | "name": "Another Job Title", 219 | "company": "Another Company", 220 | "apply_link": "Application Link", 221 | "match_analysis": "Provide a detailed match analysis for this internship opportunity as well, highlighting relevant matches." 222 | } 223 | ] 224 | 225 | If there are no suitable matches, return None. Ensure that no additional words or JSON annotations are included outside the code block. 226 | 227 | 228 | """ 229 | 230 | context = dspy.InputField(desc="Internships") 231 | resume = dspy.InputField(desc="resume") 232 | output = dspy.OutputField(desc="list of internships",type=list[JobListing]) 233 | 234 | class generate_query(dspy.Signature): 235 | """ 236 | Generate query to search in the weaviate database hybrid search by following below rules: 237 | 1. Analyze the resume, extract keywords from skills, education, experience, projects 238 | 2. then use the keywords to generate query to search in the weaviate database 239 | 3. query should be keyword based to find the best internships for the resume 240 | """ 241 | 242 | context = dspy.InputField(desc="Resume") 243 | query = dspy.OutputField(desc="query in simple string format") 244 | 245 | 246 | def main(): 247 | 248 | 249 | file = st.file_uploader("Upload Resume to get started", type=["pdf"]) 250 | my_bar.progress(0,text="Starting...") 251 | 252 | if file is not None: 253 | msg = st.toast("Resume Uploaded") 254 | if check_resume(file): 255 | with st.status("Extracting Details from Resume"): 256 | resume = resume_into_json(file) 257 | st.write(resume) 258 | 259 | analysis = Internship_finder() 260 | 261 | my_bar.progress(30,text="Finding Internships") 262 | 263 | generate = analysis(resume) 264 | print(generate) 265 | 266 | if generate != "None": 267 | st.subheader("List of Internships:") 268 | col_company, col_url = st.columns([2,6]) 269 | interns = json.loads(generate) 270 | my_bar.progress(100, "Internships Found !!") 271 | 272 | with col_company: 273 | for intern in interns: 274 | st.link_button(intern["company"],company_url(intern["company"])) 275 | 276 | 277 | with col_url: 278 | for intern in interns: 279 | st.link_button(intern["name"], intern["apply_link"]) 280 | with st.status("Match Analysis"): 281 | st.write(intern["match_analysis"]) 282 | 283 | 284 | else: 285 | my_bar.progress(100, "Sorry, No Internships Found for you !!") 286 | st.write(" We are adding more internships every day, please check back later.") 287 | 288 | 289 | else: 290 | st.warning("Invalid File Uploaded !!") 291 | my_bar.progress(0,text="Invalid File Uploaded") 292 | 293 | 294 | if __name__ == "__main__": 295 | main() 296 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/stanfordnlp/dspy.git 2 | weaviate-client==4.5.4 3 | streamlit==1.32.2 4 | pydantic 5 | PyPDF2 6 | nltk 7 | cohere 8 | -------------------------------------------------------------------------------- /resume_temp.json: -------------------------------------------------------------------------------- 1 | { 2 | "links": [{ 3 | }], 4 | "Summary":"", 5 | "Education": [ 6 | { 7 | "Institution": "", 8 | "Degree": "", 9 | "FieldOfStudy": "", 10 | "StartDate": "", 11 | "EndDate": "", 12 | "Achievements": [] 13 | } 14 | ], 15 | "Skills":[{ 16 | 17 | }], 18 | 19 | "Projects":[{ 20 | "ProjectName":"", 21 | "StartDate":"", 22 | "EndDate": "", 23 | "Responsibilities": [] 24 | 25 | }], 26 | "Experience": [ 27 | { 28 | "CompanyName": "", 29 | "Role": "", 30 | "Location": "", 31 | "StartDate": "", 32 | "EndDate": "", 33 | "Responsibilities": [] 34 | } 35 | ], 36 | "Activities": [], 37 | "Interests": [], 38 | "AdditionalInformation": "" 39 | } 40 | -------------------------------------------------------------------------------- /tools.py: -------------------------------------------------------------------------------- 1 | import json 2 | from PyPDF2 import PdfReader 3 | import os 4 | import cohere 5 | import requests 6 | 7 | with open("resume_temp.json") as f: 8 | file = json.load(f) 9 | 10 | temp = json.dumps(file) 11 | 12 | def resume_into_json(resume): 13 | cohere_api_key = os.getenv("CO_API_KEY") 14 | co = cohere.Client(cohere_api_key) 15 | 16 | pdf_reader = PdfReader(resume) 17 | text = "" 18 | for page_num in range(len(pdf_reader.pages)): 19 | page = pdf_reader.pages[page_num] 20 | text += page.extract_text() 21 | 22 | prompt = f"Act as Master in extracting data from resume. Don't give any explanation. Please analyze and convert resume data from this {text} into JSON, remove data like name, email, or personal information, and please return only the JSON file." 23 | 24 | response = co.generate( 25 | model='command-r-plus', 26 | prompt=prompt, 27 | max_tokens=10000, 28 | num_generations=1, 29 | temperature=0.2, 30 | 31 | ) 32 | 33 | return json.loads(response.generations[0].text) 34 | 35 | def company_url(company): 36 | 37 | if company == "Astranis": 38 | return "https://www.jeezai.com/companies/astranis-space-technologies" 39 | 40 | company = (company.lower()).replace(" ", "-") 41 | 42 | return f"https://www.jeezai.com/companies/{company}/" 43 | 44 | 45 | def get_company_info(company): 46 | data = requests.post( 47 | "https://advanced-research-agents.onrender.com", 48 | json={ 49 | "query": company, 50 | } 51 | ) 52 | return data.json() 53 | --------------------------------------------------------------------------------