├── sandbox
├── cover-letter-generator
│ ├── src
│ │ ├── __init__.py
│ │ └── core.py
│ ├── requirements.txt
│ ├── .env.example
│ ├── .gitignore
│ ├── README.md
│ └── app.py
└── hiring-agent
│ ├── .env.local
│ ├── requirements.txt
│ ├── images
│ ├── arch-diagram.png
│ └── hiring-process-flowchart.png
│ ├── README.md
│ ├── app.py
│ └── utils
│ └── utils.py
└── README.md
/sandbox/cover-letter-generator/src/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/sandbox/hiring-agent/.env.local:
--------------------------------------------------------------------------------
1 | FIRECRAWL_API_KEY=
2 | OPENAI_API_KEY=
--------------------------------------------------------------------------------
/sandbox/hiring-agent/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | openai
3 | firecrawl
4 | PyPDF2
5 | python-dotenv
6 | pydantic
--------------------------------------------------------------------------------
/sandbox/cover-letter-generator/requirements.txt:
--------------------------------------------------------------------------------
1 | # Core dependencies
2 | streamlit
3 | openai
4 | firecrawl
5 | PyPDF2
6 | python-dotenv
7 | pydantic
8 |
--------------------------------------------------------------------------------
/sandbox/hiring-agent/images/arch-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/ai-engineer-toolkit/main/sandbox/hiring-agent/images/arch-diagram.png
--------------------------------------------------------------------------------
/sandbox/cover-letter-generator/.env.example:
--------------------------------------------------------------------------------
1 | # Template for required environment variables
2 | OPENAI_API_KEY=your-openai-key-here
3 | FIRECRAWL_API_KEY=your-firecrawl-key-here
4 |
--------------------------------------------------------------------------------
/sandbox/hiring-agent/images/hiring-process-flowchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/ai-engineer-toolkit/main/sandbox/hiring-agent/images/hiring-process-flowchart.png
--------------------------------------------------------------------------------
/sandbox/cover-letter-generator/.gitignore:
--------------------------------------------------------------------------------
1 | # Virtual Environment
2 | venv/
3 | env/
4 | .env
5 |
6 | # Python
7 | __pycache__/
8 | *.py[cod]
9 | *.so
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 |
25 | # IDE
26 | .vscode/
27 | .idea/
28 |
29 | # OS
30 | .DS_Store
31 |
32 | .env.local
33 | .env.*.local
--------------------------------------------------------------------------------
/sandbox/cover-letter-generator/README.md:
--------------------------------------------------------------------------------
1 | # 📝 AI Cover Letter Generator
2 |
3 | [](https://www.python.org/downloads/)
4 | [](https://streamlit.io)
5 | [](https://openai.com/)
6 | [](https://github.com/psf/black)
7 | [](https://opensource.org/licenses/MIT)
8 |
9 | An AI-powered tool that generates customized cover letters by analyzing your resume and job postings.
10 |
11 | ## 🚀 Features
12 |
13 | - PDF resume parsing
14 | - Automatic job posting analysis
15 | - Parallel processing for faster results
16 | - Customized cover letter generation
17 | - Easy-to-use web interface
18 |
19 | ## 🛠️ Quick Start
20 |
21 | 1. Clone the repository
22 | 2. Install dependencies:
23 |
--------------------------------------------------------------------------------
/sandbox/hiring-agent/README.md:
--------------------------------------------------------------------------------
1 | # An AI agent, that helps with resume screening
2 |
3 | [](https://www.python.org/downloads/)
4 | [](https://streamlit.io)
5 | [](https://openai.com/)
6 | [](https://github.com/psf/black)
7 | [](https://opensource.org/licenses/MIT)
8 |
9 | This is a Streamlit app that uses an AI agent to process a job description and candidate resumes, score the candidates, and generate custom email templates.
10 |
11 | 
12 |
13 | ## Setup
14 |
15 | ### 1. Create a Virtual Environment
16 |
17 | Open a terminal and navigate to the project directory. Then run:
18 |
19 | ```bash
20 | # Create a virtual environment in the "venv" folder
21 | python -m venv venv
22 |
23 | # Activate the virtual environment (Linux/macOS)
24 | source venv/bin/activate
25 |
26 | # Activate the virtual environment (Windows)
27 | venv\Scripts\activate
28 | ```
29 |
30 | ### 2. Install Requirements
31 |
32 | Install the dependencies listed in the `requirements.txt` file:
33 |
34 | ```bash
35 | pip install -r requirements.txt
36 | ```
37 |
38 | ### 3. Configure Environment Variables
39 |
40 | Create a `.env` file in the project root directory and add your API keys:
41 |
42 | ```dotenv
43 | # .env file
44 | FIRECRAWL_API_KEY=fc-YOUR_FIRECRAWL_API_KEY
45 | OPENAI_API_KEY=sk-YOUR_OPENAI_API_KEY
46 | ```
47 |
48 | ### 4. Run the Streamlit App
49 |
50 | Run the app with the following command:
51 |
52 | ```bash
53 | streamlit run app.py
54 | ```
55 |
56 | Your app should now be running locally.
57 |
58 | ### How It Works
59 |
60 | 
61 |
62 |
63 | **User Input**
64 |
65 | - Job Description: Enter text or provide a URL.
66 | - Resumes: Upload PDF/Word files.
67 |
68 | **Ingestion**
69 |
70 | - Inputs are read and processed.
71 | - Job description URLs are scraped for content.
72 |
73 | **Parsing**
74 |
75 | - The job description is analyzed by an LLM (GPT-4) to extract essential details.
76 | - Resumes are processed to extract candidate profiles.
77 |
78 | **Scoring and Ranking**
79 |
80 | - Candidates are scored on relevance, experience, and skills.
81 | - An average score is computed, and candidates are sorted in descending order.
82 |
83 |
84 | **Email Generation**
85 |
86 | - Custom email templates are created for top candidates and for rejections.
87 |
88 |
89 | This pipeline uses Streamlit for the interface and LLM-powered functions for text extraction, evaluation, and communication, automating the resume screening process.
--------------------------------------------------------------------------------
/sandbox/hiring-agent/app.py:
--------------------------------------------------------------------------------
1 | # main.py
2 | import streamlit as st
3 | import pandas as pd
4 | from utils.utils import (
5 | ingest_inputs,
6 | parse_job_description,
7 | parse_resumes,
8 | score_candidates,
9 | rank_candidates,
10 | generate_email_templates,
11 | )
12 | import asyncio
13 |
14 |
15 | # Main App Title
16 | st.title("Resume Screening Agent")
17 |
18 | # Input section for job description
19 | st.header("Job Description Input")
20 | job_description = st.text_area("Paste the job description or URL", height=150)
21 |
22 | # Input section for candidate resumes
23 | st.header("Candidate Resumes")
24 | resume_files = st.file_uploader(
25 | "Upload resume files (PDF/Word)",
26 | type=["pdf", "docx", "doc"],
27 | accept_multiple_files=True,
28 | )
29 |
30 | st.header("Candidates to Select")
31 | num_candidates = st.slider(
32 | "Select the number of candidates to invite for an interview", 1, 4, 2
33 | )
34 |
35 |
36 | # Button to trigger the agent
37 | if st.button("Run Agent"):
38 | if not job_description:
39 | st.error("Please provide a job description or URL.")
40 | elif not resume_files:
41 | st.error("Please upload at least one resume file.")
42 | else:
43 | st.markdown("### Your AI Agent is now processing your inputs...")
44 | status_text = st.empty() # placeholder for status updates
45 |
46 | # Step 1: processing resumes
47 | with st.spinner("Step 1: Processing Inputs..."):
48 | # raw_data = ingest_inputs(job_description, resume_files)
49 | raw_data = asyncio.run(ingest_inputs(job_description, resume_files))
50 | status_text.text("Step 1 complete: Inputs processed.")
51 | with st.expander("View Processed Inputs", expanded=False):
52 | st.json(raw_data)
53 |
54 | # Step 2: processing Job description
55 | with st.spinner("Step 2: Processing Job Description & Resume..."):
56 | parsed_requirements = asyncio.run(parse_job_description(raw_data))
57 | parsed_resumes = asyncio.run(parse_resumes(resume_files))
58 | status_text.text("Step 2 complete: Job description & Resume processed.")
59 | with st.expander("View Parsed Job Description", expanded=False):
60 | st.json(parsed_requirements)
61 | with st.expander("View processed Resume", expanded=False):
62 | st.json(parsed_resumes)
63 |
64 | # Step 3: Score candidates based on the parsed data
65 | with st.spinner("Step 3: Scoring candidates..."):
66 | status_text.text("Step 3: Scoring candidates...")
67 | candidate_scores = asyncio.run(
68 | score_candidates(parsed_requirements, parsed_resumes)
69 | )
70 | status_text.text("Step 3 complete: Candidates scored.")
71 | with st.expander("View Resume Summaries", expanded=False):
72 | st.json(candidate_scores)
73 |
74 | # Step 4: Rank the candidates
75 | with st.spinner("Step 4: Ranking candidates..."):
76 | status_text.text("Step 4: Ranking candidates...")
77 | ranked_candidates = rank_candidates(candidate_scores)
78 | status_text.text("Step 4 complete: Candidates ranked.")
79 | with st.expander("View Ranked Candidates", expanded=False):
80 | st.json(ranked_candidates)
81 |
82 | # Step 5: Generate email templates for top candidates and others
83 | with st.spinner("Step 5: Generating email templates..."):
84 | status_text.text("Step 5: Generating email templates...")
85 | # 'num_candidates' is assumed to come from the frontend (e.g., top X candidates)
86 | email_templates = asyncio.run(
87 | generate_email_templates(
88 | ranked_candidates, parsed_requirements, num_candidates
89 | )
90 | )
91 | status_text.text("Step 5 complete: Email templates generated.")
92 | with st.expander("View Email Templates", expanded=False):
93 | st.json(email_templates)
94 |
95 | # Final update
96 | status_text.text("Agent processing complete! Your results are ready.")
97 |
--------------------------------------------------------------------------------
/sandbox/cover-letter-generator/app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | from openai import AsyncOpenAI
3 | from firecrawl import FirecrawlApp
4 | import os
5 | from dotenv import load_dotenv
6 | import asyncio
7 | from src.core import process_cover_letter_request
8 | import tempfile
9 |
10 | # Load environment variables
11 | load_dotenv()
12 |
13 | # Initialize API clients
14 | openai_client = AsyncOpenAI(api_key=os.getenv('OPENAI_API_KEY'))
15 | firecrawl_client = FirecrawlApp(api_key=os.getenv('FIRECRAWL_API_KEY'))
16 |
17 | def main():
18 | st.set_page_config(
19 | page_title="AI Cover Letter Generator",
20 | page_icon="📝",
21 | layout="wide"
22 | )
23 |
24 | st.title("📝 AI Cover Letter Generator")
25 | st.write("Upload your resume and provide a job posting URL to generate a customized cover letter.")
26 |
27 | # Input section
28 | col1, col2 = st.columns(2)
29 | with col1:
30 | uploaded_file = st.file_uploader("Upload your resume (PDF)", type=['pdf'])
31 | with col2:
32 | job_url = st.text_input("Enter job posting URL")
33 |
34 | if st.button("Generate Cover Letter", type="primary"):
35 | if uploaded_file is not None and job_url:
36 | try:
37 | # Create a placeholder for the progress messages
38 | progress_placeholder = st.empty()
39 |
40 | async def process_with_status():
41 | # Step 1: Processing PDF
42 | progress_placeholder.info("📄 Processing your resume...")
43 |
44 | # Step 2: Parallel Processing
45 | progress_placeholder.info("🔍 Analyzing resume and job posting...")
46 |
47 | cover_letter = await process_cover_letter_request(
48 | uploaded_file,
49 | job_url,
50 | openai_client,
51 | firecrawl_client
52 | )
53 |
54 | # Step 3: Final Generation
55 | progress_placeholder.info("✍️ Generating your cover letter...")
56 |
57 | return cover_letter
58 |
59 | # Run the async function
60 | cover_letter = asyncio.run(process_with_status())
61 |
62 | if cover_letter:
63 | # Clear the progress message
64 | progress_placeholder.empty()
65 |
66 | # Display success and results
67 | st.success("✨ Your cover letter has been generated!")
68 |
69 | # Create tabs for different views
70 | tab1, tab2 = st.tabs(["📄 View", "📋 Copy & Download"])
71 |
72 | with tab1:
73 | st.markdown("### Your Cover Letter")
74 | st.markdown(cover_letter)
75 |
76 | with tab2:
77 | st.text_area(
78 | "Copy your cover letter",
79 | value=cover_letter,
80 | height=400
81 | )
82 |
83 | # Single download button for TXT
84 | st.download_button(
85 | label="📥 Download as TXT",
86 | data=cover_letter,
87 | file_name="cover_letter.txt",
88 | mime="text/plain",
89 | help="Click to download your cover letter as a text file"
90 | )
91 | else:
92 | progress_placeholder.empty()
93 | st.error("Failed to generate cover letter. Please try again.")
94 |
95 | except Exception as e:
96 | st.error(f"An error occurred: {str(e)}")
97 | else:
98 | st.warning("Please upload a PDF resume and provide a job posting URL.")
99 |
100 | # Add helpful instructions
101 | with st.expander("ℹ️ How to use"):
102 | st.write("""
103 | 1. Upload your resume in PDF format
104 | 2. Paste the URL of the job posting you're interested in
105 | 3. Click 'Generate Cover Letter'
106 | 4. View, copy, or download your customized cover letter
107 | """)
108 |
109 | if __name__ == "__main__":
110 | main()
--------------------------------------------------------------------------------
/sandbox/cover-letter-generator/src/core.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 | from pydantic import BaseModel, Field
3 | import tempfile
4 | import os
5 | import asyncio
6 | import logging
7 | from openai import AsyncOpenAI
8 | from firecrawl import FirecrawlApp
9 | import PyPDF2
10 |
11 | # Set up logging
12 | logging.basicConfig(level=logging.INFO)
13 | logger = logging.getLogger(__name__)
14 |
15 | # --------------------------------------------------------------
16 | # Data Models with Pydantic
17 | # --------------------------------------------------------------
18 |
19 | class ResumeExtraction(BaseModel):
20 | experience: List[str] = Field(description="List of work experiences")
21 | skills: List[str] = Field(description="List of skills")
22 | education: List[str] = Field(description="List of education details")
23 | contact_info: str = Field(description="Contact information")
24 |
25 | class JobExtraction(BaseModel):
26 | title: str = Field(description="Job title")
27 | company: str = Field(description="Company name")
28 | requirements: List[str] = Field(description="Job requirements")
29 | description: str = Field(description="Job description")
30 |
31 | class CoverLetter(BaseModel):
32 | content: str = Field(description="Generated cover letter text")
33 |
34 | # --------------------------------------------------------------
35 | # Step 2. Create 3 functions
36 | # --------------------------------------------------------------
37 |
38 | async def extract_resume_info(client: AsyncOpenAI, pdf_text: str) -> Optional[ResumeExtraction]:
39 | """First LLM call: Extract structured information from resume"""
40 | try:
41 | completion = await client.chat.completions.create(
42 | model="gpt-4",
43 | messages=[
44 | {
45 | "role": "system",
46 | "content": """You are a resume parser. Return ONLY a JSON object with this structure:
47 | {
48 | "experience": ["list of work experiences"],
49 | "skills": ["list of technical and soft skills"],
50 | "education": ["list of education details"],
51 | "contact_info": "full contact information"
52 | }
53 | IMPORTANT: Return ONLY valid JSON, no other text."""
54 | },
55 | {
56 | "role": "user",
57 | "content": pdf_text
58 | }
59 | ]
60 | )
61 | response_text = completion.choices[0].message.content.strip()
62 | logger.info(f"Resume LLM Response: {response_text}")
63 | return ResumeExtraction.model_validate_json(response_text)
64 | except Exception as e:
65 | logger.error(f"Resume extraction failed: {str(e)}")
66 | logger.error(f"Response was: {response_text if 'response_text' in locals() else 'No response'}")
67 | return None
68 |
69 | async def extract_job_info(client: AsyncOpenAI, job_content: str) -> Optional[JobExtraction]:
70 | """Second LLM call: Extract structured information from job posting"""
71 | try:
72 | # Convert job_content to string and handle potential None
73 | job_text = str(job_content) if job_content is not None else ""
74 | logger.info(f"Job content type: {type(job_text)}")
75 |
76 | completion = await client.chat.completions.create(
77 | model="gpt-4",
78 | messages=[
79 | {
80 | "role": "system",
81 | "content": """You are a job posting parser. Return ONLY a JSON object with this structure:
82 | {
83 | "title": "exact job title",
84 | "company": "company name",
85 | "requirements": ["list of key requirements"],
86 | "description": "brief job description"
87 | }
88 | IMPORTANT: Return ONLY valid JSON, no other text."""
89 | },
90 | {
91 | "role": "user",
92 | "content": job_text
93 | }
94 | ]
95 | )
96 | response_text = completion.choices[0].message.content.strip()
97 | logger.info(f"Job LLM Response: {response_text}")
98 | return JobExtraction.model_validate_json(response_text)
99 | except Exception as e:
100 | logger.error(f"Job info extraction failed: {str(e)}")
101 | logger.error(f"Job content was: {job_text if 'job_text' in locals() else 'No content'}")
102 | return None
103 |
104 | async def generate_cover_letter(
105 | client: AsyncOpenAI,
106 | resume_info: ResumeExtraction,
107 | job_info: JobExtraction
108 | ) -> Optional[str]:
109 | """Third LLM call: Generate the cover letter using the results from previous async calls"""
110 | try:
111 | completion = await client.chat.completions.create(
112 | model="gpt-4",
113 | messages=[
114 | {
115 | "role": "system",
116 | "content": """Write a compelling cover letter following these guidelines:
117 | 1. Start with a strong hook about the company/role
118 | 2. Focus on relevant achievements matching job requirements
119 | 3. Use specific metrics from past experience
120 | 4. Keep it concise (300-400 words)
121 | 5. End with a confident call to action"""
122 | },
123 | {
124 | "role": "user",
125 | "content": f"Resume: {resume_info.model_dump()}\nJob: {job_info.model_dump()}"
126 | }
127 | ]
128 | )
129 | return completion.choices[0].message.content
130 | except Exception as e:
131 | logger.error(f"Cover letter generation failed: {str(e)}")
132 | return None
133 |
134 | # --------------------------------------------------------------
135 | # Main Processing Function
136 | # --------------------------------------------------------------
137 |
138 | async def process_cover_letter_request(
139 | pdf_file,
140 | job_url: str,
141 | openai_client: AsyncOpenAI,
142 | firecrawl_client: FirecrawlApp
143 | ) -> Optional[str]:
144 | """Main async function that chains all the processing steps together"""
145 | try:
146 | # Save uploaded file temporarily
147 | with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
148 | temp_file.write(pdf_file.read())
149 | temp_path = temp_file.name
150 |
151 | # Extract text from PDF
152 | with open(temp_path, 'rb') as file:
153 | pdf_reader = PyPDF2.PdfReader(file)
154 | pdf_text = ' '.join(page.extract_text() for page in pdf_reader.pages)
155 | logger.info(f"Extracted PDF text length: {len(pdf_text)}")
156 |
157 | # Get job content
158 | try:
159 | job_content = firecrawl_client.scrape_url(job_url)
160 | if not isinstance(job_content, str):
161 | job_content = str(job_content)
162 | logger.info(f"Job content type after conversion: {type(job_content)}")
163 | except Exception as e:
164 | logger.error(f"Error scraping job URL: {str(e)}")
165 | return None
166 |
167 | # Process resume and job info in parallel
168 | resume_info, job_info = await asyncio.gather(
169 | extract_resume_info(openai_client, pdf_text),
170 | extract_job_info(openai_client, job_content)
171 | )
172 |
173 | if not resume_info or not job_info:
174 | logger.error("Failed to extract either resume or job information")
175 | return None
176 |
177 | # Generate cover letter
178 | cover_letter = await generate_cover_letter(openai_client, resume_info, job_info)
179 |
180 | if not cover_letter:
181 | return None
182 |
183 | return cover_letter
184 |
185 | except Exception as e:
186 | logger.error(f"Error processing cover letter request: {str(e)}")
187 | return None
188 | finally:
189 | if 'temp_path' in locals():
190 | os.unlink(temp_path)
191 |
192 |
193 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AI Engineer Toolkit 🛠️
2 |
3 | Welcome to the **AI Engineer Toolkit**!
4 |
5 | Designed to help software engineers and non-engineers build production-grade AI applications from scratch.
6 |
7 | You will find every popular framework, tool, and resource to ship one AI project at a time!
8 |
9 |
10 |
11 |
12 | ## Getting Started
13 |
14 | - If you are **new to software engineering** check out the fundamentals folder.
15 |
16 | - If you need **inspiration**, check out example projects inside the [sandbox](./sandbox).
17 |
18 | - If you are **ready to build**, check out the table below.
19 |
20 | - If you want **more guidance**, check out the upcoming [AI Engineering Bootcamp](https://dub.sh/Z0JHySz) that starts on March 10th, 2025.
21 |
22 |
23 |
24 |
25 | ## Tools & Resources for AI Engineers 🏗️
26 |
27 | Explore the table below to find the right tools for your AI projects.
28 | Each category includes cost details, use cases, and links to official documentation.
29 |
30 |
31 |
32 |
33 | | Category | Tool Name | Cost Model | Use Case | Link |
34 | |----------------------|--------------------------|---------------|-------------------------------------------|--------------------------------------------|
35 | |😎 **Model Providers** | | | | |
36 | | | OpenAI API | Paid API | GPT models | [OpenAI](https://platform.openai.com/docs/) |
37 | | | Grok (xAI) | Paid API | Human-like reasoning for complex queries | [Grok](https://xai.ai/docs) |
38 | | | Google Gemini | Paid API | Multimodal AI for text and images | [Gemini](https://cloud.google.com/gemini) |
39 | | | Hugging Face | Freemium | Open-source models and hosted inference | [Hugging Face](https://huggingface.co/docs) |
40 | | | Anthropic Claude | Paid API | Safe, interpretable language models | [Claude](https://docs.anthropic.com/) |
41 | |📝 **Prompt Engineering** | | | | |
42 | | | DSPy | Open-Source | Automate prompt optimization with code | [DSPy](https://dspy-docs.vercel.app/) |
43 | | | LangChain | Open-Source | Build prompt-driven AI workflows | [LangChain](https://python.langchain.com/docs/) |
44 | | | Haystack | Open-Source | Optimize prompts for RAG pipelines | [Haystack](https://haystack.deepset.ai/) |
45 | | | Agenta | Open-Source | Experiment and deploy prompt variants | [Agenta](https://www.agenta.ai/)
46 | |⚙️ **Backend** | | | | |
47 | | | FastAPI | Open-Source | Fast, modern Python APIs | [FastAPI](https://fastapi.tiangolo.com/) |
48 | | | Flask | Open-Source | Lightweight web apps and APIs | [Flask](https://flask.palletsprojects.com/) |
49 | | | Django | Open-Source | Robust web framework with ORM | [Django](https://docs.djangoproject.com/) |
50 | | | Node.js (Express) | Open-Source | Scalable backend with JavaScript | [Express](https://expressjs.com/) |
51 | |💻 **Frontend** | | | | |
52 | | | AI SDK (Vercel) | Open-Source | AI-ready UI components for React | [AI SDK](https://sdk.vercel.ai/docs) |
53 | | | Streamlit | Open-Source | Rapid prototyping for data apps | [Streamlit](https://docs.streamlit.io/) |
54 | | | Gradio | Open-Source | Interactive ML model interfaces | [Gradio](https://gradio.app/docs/) |
55 | | | Dash | Open-Source | Data-driven dashboards with Python | [Dash](https://dash.plotly.com/) |
56 | | 🎈 **Fullstack** | | | | |
57 | | | Next.js | Open-Source | React with SSR and built-in API routes | [Next.js](https://nextjs.org/docs) |
58 | | | SvelteKit | Open-Source | Lightweight full-stack with Svelte | [SvelteKit](https://kit.svelte.dev/docs) |
59 | | | Nuxt.js | Open-Source | Vue.js with server-side rendering | [Nuxt.js](https://nuxt.com/docs) |
60 | | | Meteor | Open-Source | Full-stack JavaScript framework | [Meteor](https://docs.meteor.com/) |
61 | | 📀 **Databases** | | | | |
62 | | | Supabase | Freemium | Open-source Firebase alternative | [Supabase](https://supabase.com/docs) |
63 | | | PostgreSQL | Open-Source | Reliable relational database | [PostgreSQL](https://www.postgresql.org/docs/) |
64 | | | MongoDB | Open-Source | Flexible NoSQL for unstructured data | [MongoDB](https://docs.mongodb.com/) |
65 | | | SQLite | Open-Source | Lightweight, serverless database | [SQLite](https://www.sqlite.org/docs.html) |
66 | | 🔢 **Vector Databases** | | | | |
67 | | | ChromaDB | Open-Source | Simple vector store for embeddings | [ChromaDB](https://docs.trychroma.com/) |
68 | | | Pinecone | Paid API | Scalable, hosted vector search | [Pinecone](https://docs.pinecone.io/) |
69 | | | Weaviate | Open-Source | GraphQL-based vector database | [Weaviate](https://weaviate.io/developers/weaviate) |
70 | | | Qdrant | Open-Source | High-performance vector search | [Qdrant](https://qdrant.tech/documentation/) |
71 | | 📻 **Fine-tuning** | | | | |
72 | | | OpenAI Fine-Tuning | Paid API | Customize GPT models for your needs | [OpenAI Fine-Tuning](https://platform.openai.com/docs/guides/fine-tuning) |
73 | | | Hugging Face Trainer | Open-Source | Fine-tune transformers easily | [Hugging Face Trainer](https://huggingface.co/docs/transformers/training) |
74 | | | Ludwig | Open-Source | Declarative fine-tuning for ML models | [Ludwig](https://ludwig.ai/latest/) |
75 | | | PyTorch Lightning | Open-Source | Structured fine-tuning with PyTorch | [PyTorch Lightning](https://lightning.ai/docs/pytorch) |
76 | | 🤖 **Agent Development**| | | | |
77 | | | LangGraph | Open-Source | Stateful agents with LangChain | [LangGraph](https://langchain-ai.github.io/langgraph/) |
78 | | | AG2 (AgentGraph) | Open-Source | Multi-agent orchestrators | [AG2](https://github.com/agentgraph/ag2) |
79 | | | CrewAI | Open-Source | Multi-agent workflows for automation | [CrewAI](https://docs.crewai.com/) |
80 | | | AutoGen | Open-Source | Conversational agents with flexibility | [AutoGen](https://microsoft.github.io/autogen/) |
81 | | 🕸️ **Web Data Access** | | | | |
82 | | | Firecrawl | Freemium | Web scraping with caching | [Firecrawl](https://firecrawl.dev/) |
83 | | | Jina AI Reader API | Freemium | Extract structured data from web pages | [Jina AI](https://jina.ai/reader) |
84 | | | Puppeteer | Open-Source | Dynamic content scraping via browser | [Puppeteer](https://pptr.dev/) |
85 | | | Scrapy | Open-Source | Powerful web crawling framework | [Scrapy](https://docs.scrapy.org/) |
86 | | 🤔 **Evaluation Frameworks** | | | | |
87 | | | RAGAS | Open-Source | Evaluate RAG pipelines | [RAGAS](https://docs.ragas.io/) |
88 | | | DeepEval | Open-Source | Custom metrics for LLM evaluation | [DeepEval](https://docs.deepeval.com/) |
89 | | | LangSmith | Freemium | Debug and evaluate LLM apps | [LangSmith](https://docs.smith.langchain.com/) |
90 | | | TruLens | Open-Source | Ground truth evaluation for LLMs | [TruLens](https://trulens.org/) |
91 | | 📊 **Monitoring & Logging** | | | | |
92 | | | LangSmith | Freemium | Monitor and trace LLM applications | [LangSmith](https://docs.smith.langchain.com/) |
93 | | | Sentry | Freemium | Real-time error tracking | [Sentry](https://docs.sentry.io/) |
94 | | | Prometheus | Open-Source | Metrics and alerting for systems | [Prometheus](https://prometheus.io/docs/) |
95 | | | Grafana | Open-Source | Visualize logs and metrics | [Grafana](https://grafana.com/docs/) |
96 | | 🔊 **Audio** | | | | |
97 | | | Whisper (OpenAI) | Open-Source | High-accuracy speech-to-text | [Whisper](https://github.com/openai/whisper) |
98 | | | DeepSpeech | Open-Source | End-to-end speech recognition | [DeepSpeech](https://github.com/mozilla/DeepSpeech) |
99 | | | Coqui TTS | Open-Source | Text-to-speech with diverse voices | [Coqui TTS](https://docs.coqui.ai/) |
100 | | | SpeechBrain | Open-Source | All-in-one audio processing toolkit | [SpeechBrain](https://speechbrain.github.io/) |
101 | | 👀 **Vision** | | | | |
102 | | | YOLO | Open-Source | Real-time object detection | [YOLO](https://docs.ultralytics.com/) |
103 | | | OpenCV | Open-Source | Versatile computer vision library | [OpenCV](https://docs.opencv.org/) |
104 | | | Detectron2 | Open-Source | Advanced detection and segmentation | [Detectron2](https://detectron2.readthedocs.io/) |
105 | | | CLIP (OpenAI) | Open-Source | Image-text understanding and search | [CLIP](https://github.com/openai/CLIP) |
106 | | 🚀 **Deployment Tools** | | | | |
107 | | | Vercel | Freemium | Deploy full-stack apps with ease | [Vercel](https://vercel.com/docs) |
108 | | | Docker | Open-Source | Containerize apps for consistent deployment | [Docker](https://docs.docker.com/) |
109 | | | Kubernetes | Open-Source | Orchestrate and scale containerized apps | [Kubernetes](https://kubernetes.io/docs/) |
110 | | | Heroku | Freemium | Quick deployment for prototypes and MVPs | [Heroku](https://devcenter.heroku.com/) |
111 |
--------------------------------------------------------------------------------
/sandbox/hiring-agent/utils/utils.py:
--------------------------------------------------------------------------------
1 | # utils/utils.py
2 | import os
3 | from typing import List, Dict, Any
4 | from firecrawl import FirecrawlApp
5 | from dotenv import load_dotenv
6 | import json
7 | from openai import OpenAI
8 | from pydantic import BaseModel, Field
9 | from typing import List, Optional
10 | import asyncio
11 |
12 | load_dotenv()
13 | import tempfile
14 | import PyPDF2
15 |
16 |
17 | app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
18 | openai_api_key = os.environ.get("OPENAI_API_KEY")
19 | openai_client = OpenAI(api_key=openai_api_key)
20 |
21 |
22 | class CandidateScore(BaseModel):
23 | name: str = Field(..., description="Candidate's name")
24 | relevance: int = Field(
25 | ...,
26 | description="How relevant the candidate's resume is to the job description (0-100)",
27 | )
28 | experience: int = Field(
29 | ..., description="Candidate's match in terms of work experience (0-100)"
30 | )
31 | skills: int = Field(..., description="Candidate's match based on skills (0-100)")
32 | overall: int = Field(..., description="Overall score (0-100)")
33 | comment: str = Field(
34 | ..., description="A cbrief omment explaining the rationale behind the scores"
35 | )
36 |
37 |
38 | class Resume(BaseModel):
39 | name: str = Field(..., description="Candidate's full name")
40 | work_experiences: List[str] = Field(..., description="List of work experiences")
41 | location: str = Field(..., description="Candidate's location")
42 | skills: List[str] = Field(..., description="List of candidate's skills")
43 | education: List[str] = Field(..., description="Educational background")
44 | summary: Optional[str] = Field(
45 | None, description="A short summary or objective statement"
46 | )
47 | certifications: Optional[List[str]] = Field(
48 | None, description="List of certifications"
49 | )
50 | languages: Optional[List[str]] = Field(
51 | None, description="Languages spoken by the candidate"
52 | )
53 |
54 |
55 | class JobDescription(BaseModel):
56 | title: str
57 | company: str
58 | location: str
59 | requirements: list[str]
60 | responsibilities: list[str]
61 |
62 |
63 | async def ingest_inputs(
64 | job_description: str, resume_files: List[Any]
65 | ) -> Dict[str, Any]:
66 | """
67 | Ingests the job description and resume files.
68 |
69 | Parameters:
70 | job_description (str): The job description text or URL.
71 | resume_files (List[Any]): List of uploaded resume files.
72 |
73 | Returns:
74 | dict: A dictionary with two keys:
75 | - "job_description": The processed job description (in markdown).
76 | - "resumes": A list of resume file names.
77 | """
78 | # Determine if job_description is a URL.
79 | if job_description.startswith("http"):
80 | try:
81 | result = app.scrape_url(job_description, params={"formats": ["markdown"]})
82 | # Check if markdown data is present in the result.
83 | if not result or "markdown" not in result:
84 | raise ValueError("Scraping did not return markdown data.")
85 | job_desc_text = result.get("markdown", "")
86 | except Exception as e:
87 | raise Exception(f"Failed to scrape the job description URL: {e}")
88 | else:
89 | job_desc_text = job_description
90 | resumes = [file.name for file in resume_files]
91 | return {"job_description": job_desc_text, "resumes": resumes}
92 |
93 |
94 | def call_llm(messages: list, response_fromat: None) -> str:
95 | """
96 | Calls the OpenAI GPT-4 model with the provided prompt and returns the response text.
97 |
98 | Parameters:
99 | prompt (str): The prompt to send to the LLM.
100 |
101 | Returns:
102 | str: The LLM's response.
103 | """
104 |
105 | params = {"model": "gpt-4o-2024-08-06", "messages": messages}
106 |
107 | if response_fromat:
108 | params["response_format"] = response_fromat
109 |
110 | response = openai_client.beta.chat.completions.parse(**params)
111 |
112 | return response.choices[0].message.content
113 |
114 |
115 | async def parse_job_description(data: Dict[str, Any]) -> Dict[str, Any]:
116 | """
117 | Parses the job description to extract key requirements in a structured format.
118 |
119 | This function takes the ingested job description (which might be scraped from a URL)
120 | and uses an LLM (GPT-4) to extract and return only the essential job details.
121 | Extraneous content from the scraped page is removed.
122 |
123 | Parameters:
124 | data (dict): Dictionary containing the job description details, with a key "job_description".
125 |
126 | Returns:
127 | dict: A dictionary with the structured job description containing keys:
128 | "title", "company", "location", "requirements", "responsibilities", "benefits", and "experience".
129 |
130 | Raises:
131 | Exception: If the LLM call fails or the returned JSON cannot be parsed.
132 | """
133 | job_text = data.get("job_description", "")
134 | if not job_text:
135 | raise ValueError("No job description text provided.")
136 |
137 | # Build the prompt for the LLM
138 | prompt = (
139 | "Extract the key job information from the text below. Return only valid JSON "
140 | "with the following keys: title, company, location, requirements, responsibilities, benefits, experience. "
141 | "Do not include any extraneous information.\n\n"
142 | "Job description:\n" + job_text
143 | )
144 | messages = [
145 | {
146 | "role": "system",
147 | "content": (
148 | "You are an assistant that extracts key job description information from text. "
149 | "Return only the job details in valid JSON format using the keys: "
150 | "title, company, location, requirements (as a list), responsibilities (as a list), "
151 | "benefits (as a list), and experience."
152 | ),
153 | },
154 | {"role": "user", "content": prompt},
155 | ]
156 |
157 | try:
158 | llm_output = call_llm(messages, response_fromat=JobDescription)
159 | # Parse the JSON returned by the LLM
160 | structured_jd = json.loads(llm_output)
161 | except Exception as e:
162 | raise Exception(f"Error parsing job description: {e}")
163 |
164 | return structured_jd
165 |
166 |
167 | async def parse_resumes(resume_files: List[Any]) -> Dict[str, Any]:
168 | """
169 | Parses resume files to extract candidate information.
170 |
171 | This function reads each uploaded resume file and uses an LLM (via the call_llm helper)
172 | to extract candidate details. The LLM is asked to return only valid JSON following the
173 | schema defined by the Resume Pydantic model. The expected JSON should include keys such as:
174 |
175 | {
176 | "name": string,
177 | "work_experiences": list[string],
178 | "location": string,
179 | "skills": list[string],
180 | "education": list[string],
181 | "summary": string (optional),
182 | "certifications": list[string] (optional),
183 | "languages": list[string] (optional)
184 | }
185 |
186 | Parameters:
187 | resume_files (List[Any]): List of uploaded resume file objects (e.g., from Streamlit's file uploader).
188 |
189 | Returns:
190 | dict: A dictionary with a key "parsed_resumes" that is a list of parsed resume details.
191 |
192 | Raises:
193 | Exception: If any LLM call or JSON parsing fails.
194 | """
195 | parsed_resumes = []
196 | for resume in resume_files:
197 | # Save uploaded file temporarily
198 | with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
199 | temp_file.write(resume.read())
200 | temp_path = temp_file.name
201 |
202 | # Extract text from PDF
203 | with open(temp_path, "rb") as file:
204 | pdf_reader = PyPDF2.PdfReader(file)
205 | pdf_text = " ".join(page.extract_text() for page in pdf_reader.pages)
206 | # Build messages for the LLM.
207 | messages = [
208 | {
209 | "role": "system",
210 | "content": (
211 | "You are an assistant that extracts candidate resume details. "
212 | "Extract only the information following this JSON schema: "
213 | ),
214 | },
215 | {
216 | "role": "user",
217 | "content": f"Extract resume details from the following resume text:\n\n{pdf_text}",
218 | },
219 | ]
220 |
221 | try:
222 | # Call the LLM to process the resume text.
223 | # Pass the JSON schema (as a string) to instruct the LLM on the expected format.
224 | llm_response = call_llm(messages, response_fromat=Resume)
225 | # Parse the JSON response from the LLM.
226 | parsed_resume = json.loads(llm_response)
227 | except Exception as e:
228 | parsed_resume = {"error": f"Failed to parse resume using LLM: {e}"}
229 |
230 | parsed_resumes.append(parsed_resume)
231 | return {"parsed_resumes": parsed_resumes}
232 |
233 |
234 | async def score_candidates(
235 | parsed_requirements: Dict[str, Any], parsed_resumes: Dict[str, Any]
236 | ) -> List[Dict[str, Any]]:
237 | """
238 | Scores candidates based on the parsed job description and resume data.
239 | Parameters:
240 | parsed_requirements (dict): Parsed job description data.
241 | Expected to have a key "parsed_requirements" with the job description details.
242 | parsed_resumes (dict): Parsed resume data.
243 | Expected to have a key "parsed_resumes" which is a list of candidate details.
244 |
245 | Returns:
246 | list: A list of dictionaries with candidate scores as per the CandidateScore model.
247 |
248 | Raises:
249 | Exception: If any LLM call or JSON parsing fails.
250 | """
251 | candidate_scores = []
252 |
253 | job_description_text = json.dumps(parsed_requirements)
254 | resume_list = parsed_resumes.get("parsed_resumes", [])
255 | for candidate in resume_list:
256 | # Build messages for the LLM.
257 | messages = [
258 | {
259 | "role": "system",
260 | "content": (
261 | "You are an unbiased hiring manager. Compare the following job description with the candidate's resume and provide "
262 | "scores (0-100) for relevance, experience, and skills. Also compute an overall score that reflects the candidate's fit "
263 | "and provide a comment explaining your evaluation. Return only valid JSON using the following schema: "
264 | ),
265 | },
266 | {
267 | "role": "user",
268 | "content": (
269 | f"Job Description:\n{job_description_text}\n\n"
270 | f"Candidate Resume:\n{json.dumps(candidate)}"
271 | ),
272 | },
273 | ]
274 |
275 | try:
276 | llm_response = call_llm(messages, response_fromat=CandidateScore)
277 | score_data = json.loads(llm_response)
278 | score_data["resume"] = candidate
279 | except Exception as e:
280 | # In case of an error, record a default score with error comment.
281 | score_data = {
282 | "name": candidate.get("name", "Unknown"),
283 | "relevance": 0,
284 | "experience": 0,
285 | "skills": 0,
286 | "overall": 0,
287 | "comment": f"Error during evaluation: {e}",
288 | }
289 |
290 | candidate_scores.append(score_data)
291 |
292 | return candidate_scores
293 |
294 |
295 | def rank_candidates(candidate_scores: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
296 | """
297 | Ranks candidates based on the average of their overall scores.
298 |
299 | For each candidate, this function calculates the average score from the keys:
300 | "relevance", "experience", "skills", and "overall". It adds a new key "avg_score"
301 | to each candidate's dictionary and then returns the sorted list in descending order.
302 |
303 | Parameters:
304 | candidate_scores (list): List of candidate score dictionaries.
305 |
306 | Returns:
307 | list: Sorted list of candidate scores in descending order based on avg_score.
308 | """
309 | for candidate in candidate_scores:
310 | # Compute the average of the relevant scores.
311 | relevance = candidate.get("relevance", 0)
312 | experience = candidate.get("experience", 0)
313 | skills = candidate.get("skills", 0)
314 | overall = candidate.get("overall", 0)
315 | candidate["avg_score"] = (relevance + experience + skills + overall) / 4.0
316 |
317 | # Return the sorted list of candidates based on avg_score.
318 | return sorted(
319 | candidate_scores, key=lambda candidate: candidate["avg_score"], reverse=True
320 | )
321 |
322 |
323 | async def generate_email_templates(
324 | ranked_candidates: List[Dict[str, Any]], job_description: Dict[str, Any], top_x: int
325 | ) -> Dict[str, List[Dict[str, Any]]]:
326 | """
327 | Generates custom email templates using an LLM for each candidate.
328 | Parameters:
329 | ranked_candidates (list): List of candidate score dictionaries.
330 | job_description (dict): The structured job description.
331 | top_x (int): Number of top candidates to invite for a call.
332 |
333 | Returns:
334 | dict: A dictionary with two keys:
335 | - "invitations": A list of dictionaries with candidate "name" and "email_body" for invitations.
336 | - "rejections": A list of dictionaries with candidate "name" and "email_body" for rejections.
337 |
338 | Raises:
339 | Exception: If the LLM call fails for any candidate.
340 | """
341 | invitations = []
342 | rejections = []
343 |
344 | for idx, candidate in enumerate(ranked_candidates):
345 | candidate_name = candidate.get("name", "Candidate")
346 |
347 | # Build the base messages for the LLM.
348 | messages = [
349 | {
350 | "role": "system",
351 | "content": (
352 | "You are an unbiased HR professional. Your task is to craft clear, concise, "
353 | "and professional email responses to candidates based on the job description, "
354 | "the candidate's resume details, and evaluation scores. "
355 | "Return only the email body as plain text."
356 | ),
357 | },
358 | {
359 | "role": "user",
360 | "content": (
361 | f"Job Description (structured):\n{json.dumps(job_description, indent=2)}\n\n"
362 | f"Candidate Evaluation (structured):\n{json.dumps(candidate, indent=2)}\n\n"
363 | ),
364 | },
365 | ]
366 |
367 | # Append specific instructions based on candidate ranking.
368 | if idx < top_x:
369 | messages.append(
370 | {
371 | "role": "assistant",
372 | "content": (
373 | "Please create an invitation email inviting the candidate for a quick call. "
374 | "The email should be friendly, professional, and include a scheduling request."
375 | ),
376 | }
377 | )
378 | else:
379 | messages.append(
380 | {
381 | "role": "assistant",
382 | "content": (
383 | "Please create a polite rejection email. Include constructive feedback and key "
384 | "suggestions for improvement based on the candidate's evaluation."
385 | ),
386 | }
387 | )
388 |
389 | try:
390 | email_body = call_llm(messages, response_fromat=None)
391 | except Exception as e:
392 | email_body = f"Error generating email: {e}"
393 |
394 | email_template = {"name": candidate_name, "email_body": email_body}
395 | if idx < top_x:
396 | invitations.append(email_template)
397 | else:
398 | rejections.append(email_template)
399 |
400 | return {"invitations": invitations, "rejections": rejections}
401 |
--------------------------------------------------------------------------------