├── sandbox ├── cover-letter-generator │ ├── src │ │ ├── __init__.py │ │ └── core.py │ ├── requirements.txt │ ├── .env.example │ ├── .gitignore │ ├── README.md │ └── app.py └── hiring-agent │ ├── .env.local │ ├── requirements.txt │ ├── images │ ├── arch-diagram.png │ └── hiring-process-flowchart.png │ ├── README.md │ ├── app.py │ └── utils │ └── utils.py └── README.md /sandbox/cover-letter-generator/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sandbox/hiring-agent/.env.local: -------------------------------------------------------------------------------- 1 | FIRECRAWL_API_KEY= 2 | OPENAI_API_KEY= -------------------------------------------------------------------------------- /sandbox/hiring-agent/requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | openai 3 | firecrawl 4 | PyPDF2 5 | python-dotenv 6 | pydantic -------------------------------------------------------------------------------- /sandbox/cover-letter-generator/requirements.txt: -------------------------------------------------------------------------------- 1 | # Core dependencies 2 | streamlit 3 | openai 4 | firecrawl 5 | PyPDF2 6 | python-dotenv 7 | pydantic 8 | -------------------------------------------------------------------------------- /sandbox/hiring-agent/images/arch-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/ai-engineer-toolkit/main/sandbox/hiring-agent/images/arch-diagram.png -------------------------------------------------------------------------------- /sandbox/cover-letter-generator/.env.example: -------------------------------------------------------------------------------- 1 | # Template for required environment variables 2 | OPENAI_API_KEY=your-openai-key-here 3 | FIRECRAWL_API_KEY=your-firecrawl-key-here 4 | -------------------------------------------------------------------------------- /sandbox/hiring-agent/images/hiring-process-flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/ai-engineer-toolkit/main/sandbox/hiring-agent/images/hiring-process-flowchart.png -------------------------------------------------------------------------------- /sandbox/cover-letter-generator/.gitignore: -------------------------------------------------------------------------------- 1 | # Virtual Environment 2 | venv/ 3 | env/ 4 | .env 5 | 6 | # Python 7 | __pycache__/ 8 | *.py[cod] 9 | *.so 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | 25 | # IDE 26 | .vscode/ 27 | .idea/ 28 | 29 | # OS 30 | .DS_Store 31 | 32 | .env.local 33 | .env.*.local -------------------------------------------------------------------------------- /sandbox/cover-letter-generator/README.md: -------------------------------------------------------------------------------- 1 | # 📝 AI Cover Letter Generator 2 | 3 | [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/) 4 | [![Streamlit](https://img.shields.io/badge/streamlit-1.29.0-FF4B4B.svg)](https://streamlit.io) 5 | [![OpenAI](https://img.shields.io/badge/OpenAI-GPT4-00A67E.svg)](https://openai.com/) 6 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 7 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 8 | 9 | An AI-powered tool that generates customized cover letters by analyzing your resume and job postings. 10 | 11 | ## 🚀 Features 12 | 13 | - PDF resume parsing 14 | - Automatic job posting analysis 15 | - Parallel processing for faster results 16 | - Customized cover letter generation 17 | - Easy-to-use web interface 18 | 19 | ## 🛠️ Quick Start 20 | 21 | 1. Clone the repository 22 | 2. Install dependencies: 23 | -------------------------------------------------------------------------------- /sandbox/hiring-agent/README.md: -------------------------------------------------------------------------------- 1 | # An AI agent, that helps with resume screening 2 | 3 | [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/) 4 | [![Streamlit](https://img.shields.io/badge/streamlit-1.29.0-FF4B4B.svg)](https://streamlit.io) 5 | [![OpenAI](https://img.shields.io/badge/OpenAI-GPT4-00A67E.svg)](https://openai.com/) 6 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 7 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 8 | 9 | This is a Streamlit app that uses an AI agent to process a job description and candidate resumes, score the candidates, and generate custom email templates. 10 | 11 | ![Ai Agent](images/arch-diagram.png "Title") 12 | 13 | ## Setup 14 | 15 | ### 1. Create a Virtual Environment 16 | 17 | Open a terminal and navigate to the project directory. Then run: 18 | 19 | ```bash 20 | # Create a virtual environment in the "venv" folder 21 | python -m venv venv 22 | 23 | # Activate the virtual environment (Linux/macOS) 24 | source venv/bin/activate 25 | 26 | # Activate the virtual environment (Windows) 27 | venv\Scripts\activate 28 | ``` 29 | 30 | ### 2. Install Requirements 31 | 32 | Install the dependencies listed in the `requirements.txt` file: 33 | 34 | ```bash 35 | pip install -r requirements.txt 36 | ``` 37 | 38 | ### 3. Configure Environment Variables 39 | 40 | Create a `.env` file in the project root directory and add your API keys: 41 | 42 | ```dotenv 43 | # .env file 44 | FIRECRAWL_API_KEY=fc-YOUR_FIRECRAWL_API_KEY 45 | OPENAI_API_KEY=sk-YOUR_OPENAI_API_KEY 46 | ``` 47 | 48 | ### 4. Run the Streamlit App 49 | 50 | Run the app with the following command: 51 | 52 | ```bash 53 | streamlit run app.py 54 | ``` 55 | 56 | Your app should now be running locally. 57 | 58 | ### How It Works 59 | 60 | ![flowchart](images/hiring-process-flowchart.png "Flowchart") 61 | 62 | 63 | **User Input** 64 | 65 | - Job Description: Enter text or provide a URL. 66 | - Resumes: Upload PDF/Word files. 67 | 68 | **Ingestion** 69 | 70 | - Inputs are read and processed. 71 | - Job description URLs are scraped for content. 72 | 73 | **Parsing** 74 | 75 | - The job description is analyzed by an LLM (GPT-4) to extract essential details. 76 | - Resumes are processed to extract candidate profiles. 77 | 78 | **Scoring and Ranking** 79 | 80 | - Candidates are scored on relevance, experience, and skills. 81 | - An average score is computed, and candidates are sorted in descending order. 82 | 83 | 84 | **Email Generation** 85 | 86 | - Custom email templates are created for top candidates and for rejections. 87 | 88 | 89 | This pipeline uses Streamlit for the interface and LLM-powered functions for text extraction, evaluation, and communication, automating the resume screening process. -------------------------------------------------------------------------------- /sandbox/hiring-agent/app.py: -------------------------------------------------------------------------------- 1 | # main.py 2 | import streamlit as st 3 | import pandas as pd 4 | from utils.utils import ( 5 | ingest_inputs, 6 | parse_job_description, 7 | parse_resumes, 8 | score_candidates, 9 | rank_candidates, 10 | generate_email_templates, 11 | ) 12 | import asyncio 13 | 14 | 15 | # Main App Title 16 | st.title("Resume Screening Agent") 17 | 18 | # Input section for job description 19 | st.header("Job Description Input") 20 | job_description = st.text_area("Paste the job description or URL", height=150) 21 | 22 | # Input section for candidate resumes 23 | st.header("Candidate Resumes") 24 | resume_files = st.file_uploader( 25 | "Upload resume files (PDF/Word)", 26 | type=["pdf", "docx", "doc"], 27 | accept_multiple_files=True, 28 | ) 29 | 30 | st.header("Candidates to Select") 31 | num_candidates = st.slider( 32 | "Select the number of candidates to invite for an interview", 1, 4, 2 33 | ) 34 | 35 | 36 | # Button to trigger the agent 37 | if st.button("Run Agent"): 38 | if not job_description: 39 | st.error("Please provide a job description or URL.") 40 | elif not resume_files: 41 | st.error("Please upload at least one resume file.") 42 | else: 43 | st.markdown("### Your AI Agent is now processing your inputs...") 44 | status_text = st.empty() # placeholder for status updates 45 | 46 | # Step 1: processing resumes 47 | with st.spinner("Step 1: Processing Inputs..."): 48 | # raw_data = ingest_inputs(job_description, resume_files) 49 | raw_data = asyncio.run(ingest_inputs(job_description, resume_files)) 50 | status_text.text("Step 1 complete: Inputs processed.") 51 | with st.expander("View Processed Inputs", expanded=False): 52 | st.json(raw_data) 53 | 54 | # Step 2: processing Job description 55 | with st.spinner("Step 2: Processing Job Description & Resume..."): 56 | parsed_requirements = asyncio.run(parse_job_description(raw_data)) 57 | parsed_resumes = asyncio.run(parse_resumes(resume_files)) 58 | status_text.text("Step 2 complete: Job description & Resume processed.") 59 | with st.expander("View Parsed Job Description", expanded=False): 60 | st.json(parsed_requirements) 61 | with st.expander("View processed Resume", expanded=False): 62 | st.json(parsed_resumes) 63 | 64 | # Step 3: Score candidates based on the parsed data 65 | with st.spinner("Step 3: Scoring candidates..."): 66 | status_text.text("Step 3: Scoring candidates...") 67 | candidate_scores = asyncio.run( 68 | score_candidates(parsed_requirements, parsed_resumes) 69 | ) 70 | status_text.text("Step 3 complete: Candidates scored.") 71 | with st.expander("View Resume Summaries", expanded=False): 72 | st.json(candidate_scores) 73 | 74 | # Step 4: Rank the candidates 75 | with st.spinner("Step 4: Ranking candidates..."): 76 | status_text.text("Step 4: Ranking candidates...") 77 | ranked_candidates = rank_candidates(candidate_scores) 78 | status_text.text("Step 4 complete: Candidates ranked.") 79 | with st.expander("View Ranked Candidates", expanded=False): 80 | st.json(ranked_candidates) 81 | 82 | # Step 5: Generate email templates for top candidates and others 83 | with st.spinner("Step 5: Generating email templates..."): 84 | status_text.text("Step 5: Generating email templates...") 85 | # 'num_candidates' is assumed to come from the frontend (e.g., top X candidates) 86 | email_templates = asyncio.run( 87 | generate_email_templates( 88 | ranked_candidates, parsed_requirements, num_candidates 89 | ) 90 | ) 91 | status_text.text("Step 5 complete: Email templates generated.") 92 | with st.expander("View Email Templates", expanded=False): 93 | st.json(email_templates) 94 | 95 | # Final update 96 | status_text.text("Agent processing complete! Your results are ready.") 97 | -------------------------------------------------------------------------------- /sandbox/cover-letter-generator/app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from openai import AsyncOpenAI 3 | from firecrawl import FirecrawlApp 4 | import os 5 | from dotenv import load_dotenv 6 | import asyncio 7 | from src.core import process_cover_letter_request 8 | import tempfile 9 | 10 | # Load environment variables 11 | load_dotenv() 12 | 13 | # Initialize API clients 14 | openai_client = AsyncOpenAI(api_key=os.getenv('OPENAI_API_KEY')) 15 | firecrawl_client = FirecrawlApp(api_key=os.getenv('FIRECRAWL_API_KEY')) 16 | 17 | def main(): 18 | st.set_page_config( 19 | page_title="AI Cover Letter Generator", 20 | page_icon="📝", 21 | layout="wide" 22 | ) 23 | 24 | st.title("📝 AI Cover Letter Generator") 25 | st.write("Upload your resume and provide a job posting URL to generate a customized cover letter.") 26 | 27 | # Input section 28 | col1, col2 = st.columns(2) 29 | with col1: 30 | uploaded_file = st.file_uploader("Upload your resume (PDF)", type=['pdf']) 31 | with col2: 32 | job_url = st.text_input("Enter job posting URL") 33 | 34 | if st.button("Generate Cover Letter", type="primary"): 35 | if uploaded_file is not None and job_url: 36 | try: 37 | # Create a placeholder for the progress messages 38 | progress_placeholder = st.empty() 39 | 40 | async def process_with_status(): 41 | # Step 1: Processing PDF 42 | progress_placeholder.info("📄 Processing your resume...") 43 | 44 | # Step 2: Parallel Processing 45 | progress_placeholder.info("🔍 Analyzing resume and job posting...") 46 | 47 | cover_letter = await process_cover_letter_request( 48 | uploaded_file, 49 | job_url, 50 | openai_client, 51 | firecrawl_client 52 | ) 53 | 54 | # Step 3: Final Generation 55 | progress_placeholder.info("✍️ Generating your cover letter...") 56 | 57 | return cover_letter 58 | 59 | # Run the async function 60 | cover_letter = asyncio.run(process_with_status()) 61 | 62 | if cover_letter: 63 | # Clear the progress message 64 | progress_placeholder.empty() 65 | 66 | # Display success and results 67 | st.success("✨ Your cover letter has been generated!") 68 | 69 | # Create tabs for different views 70 | tab1, tab2 = st.tabs(["📄 View", "📋 Copy & Download"]) 71 | 72 | with tab1: 73 | st.markdown("### Your Cover Letter") 74 | st.markdown(cover_letter) 75 | 76 | with tab2: 77 | st.text_area( 78 | "Copy your cover letter", 79 | value=cover_letter, 80 | height=400 81 | ) 82 | 83 | # Single download button for TXT 84 | st.download_button( 85 | label="📥 Download as TXT", 86 | data=cover_letter, 87 | file_name="cover_letter.txt", 88 | mime="text/plain", 89 | help="Click to download your cover letter as a text file" 90 | ) 91 | else: 92 | progress_placeholder.empty() 93 | st.error("Failed to generate cover letter. Please try again.") 94 | 95 | except Exception as e: 96 | st.error(f"An error occurred: {str(e)}") 97 | else: 98 | st.warning("Please upload a PDF resume and provide a job posting URL.") 99 | 100 | # Add helpful instructions 101 | with st.expander("ℹ️ How to use"): 102 | st.write(""" 103 | 1. Upload your resume in PDF format 104 | 2. Paste the URL of the job posting you're interested in 105 | 3. Click 'Generate Cover Letter' 106 | 4. View, copy, or download your customized cover letter 107 | """) 108 | 109 | if __name__ == "__main__": 110 | main() -------------------------------------------------------------------------------- /sandbox/cover-letter-generator/src/core.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | from pydantic import BaseModel, Field 3 | import tempfile 4 | import os 5 | import asyncio 6 | import logging 7 | from openai import AsyncOpenAI 8 | from firecrawl import FirecrawlApp 9 | import PyPDF2 10 | 11 | # Set up logging 12 | logging.basicConfig(level=logging.INFO) 13 | logger = logging.getLogger(__name__) 14 | 15 | # -------------------------------------------------------------- 16 | # Data Models with Pydantic 17 | # -------------------------------------------------------------- 18 | 19 | class ResumeExtraction(BaseModel): 20 | experience: List[str] = Field(description="List of work experiences") 21 | skills: List[str] = Field(description="List of skills") 22 | education: List[str] = Field(description="List of education details") 23 | contact_info: str = Field(description="Contact information") 24 | 25 | class JobExtraction(BaseModel): 26 | title: str = Field(description="Job title") 27 | company: str = Field(description="Company name") 28 | requirements: List[str] = Field(description="Job requirements") 29 | description: str = Field(description="Job description") 30 | 31 | class CoverLetter(BaseModel): 32 | content: str = Field(description="Generated cover letter text") 33 | 34 | # -------------------------------------------------------------- 35 | # Step 2. Create 3 functions 36 | # -------------------------------------------------------------- 37 | 38 | async def extract_resume_info(client: AsyncOpenAI, pdf_text: str) -> Optional[ResumeExtraction]: 39 | """First LLM call: Extract structured information from resume""" 40 | try: 41 | completion = await client.chat.completions.create( 42 | model="gpt-4", 43 | messages=[ 44 | { 45 | "role": "system", 46 | "content": """You are a resume parser. Return ONLY a JSON object with this structure: 47 | { 48 | "experience": ["list of work experiences"], 49 | "skills": ["list of technical and soft skills"], 50 | "education": ["list of education details"], 51 | "contact_info": "full contact information" 52 | } 53 | IMPORTANT: Return ONLY valid JSON, no other text.""" 54 | }, 55 | { 56 | "role": "user", 57 | "content": pdf_text 58 | } 59 | ] 60 | ) 61 | response_text = completion.choices[0].message.content.strip() 62 | logger.info(f"Resume LLM Response: {response_text}") 63 | return ResumeExtraction.model_validate_json(response_text) 64 | except Exception as e: 65 | logger.error(f"Resume extraction failed: {str(e)}") 66 | logger.error(f"Response was: {response_text if 'response_text' in locals() else 'No response'}") 67 | return None 68 | 69 | async def extract_job_info(client: AsyncOpenAI, job_content: str) -> Optional[JobExtraction]: 70 | """Second LLM call: Extract structured information from job posting""" 71 | try: 72 | # Convert job_content to string and handle potential None 73 | job_text = str(job_content) if job_content is not None else "" 74 | logger.info(f"Job content type: {type(job_text)}") 75 | 76 | completion = await client.chat.completions.create( 77 | model="gpt-4", 78 | messages=[ 79 | { 80 | "role": "system", 81 | "content": """You are a job posting parser. Return ONLY a JSON object with this structure: 82 | { 83 | "title": "exact job title", 84 | "company": "company name", 85 | "requirements": ["list of key requirements"], 86 | "description": "brief job description" 87 | } 88 | IMPORTANT: Return ONLY valid JSON, no other text.""" 89 | }, 90 | { 91 | "role": "user", 92 | "content": job_text 93 | } 94 | ] 95 | ) 96 | response_text = completion.choices[0].message.content.strip() 97 | logger.info(f"Job LLM Response: {response_text}") 98 | return JobExtraction.model_validate_json(response_text) 99 | except Exception as e: 100 | logger.error(f"Job info extraction failed: {str(e)}") 101 | logger.error(f"Job content was: {job_text if 'job_text' in locals() else 'No content'}") 102 | return None 103 | 104 | async def generate_cover_letter( 105 | client: AsyncOpenAI, 106 | resume_info: ResumeExtraction, 107 | job_info: JobExtraction 108 | ) -> Optional[str]: 109 | """Third LLM call: Generate the cover letter using the results from previous async calls""" 110 | try: 111 | completion = await client.chat.completions.create( 112 | model="gpt-4", 113 | messages=[ 114 | { 115 | "role": "system", 116 | "content": """Write a compelling cover letter following these guidelines: 117 | 1. Start with a strong hook about the company/role 118 | 2. Focus on relevant achievements matching job requirements 119 | 3. Use specific metrics from past experience 120 | 4. Keep it concise (300-400 words) 121 | 5. End with a confident call to action""" 122 | }, 123 | { 124 | "role": "user", 125 | "content": f"Resume: {resume_info.model_dump()}\nJob: {job_info.model_dump()}" 126 | } 127 | ] 128 | ) 129 | return completion.choices[0].message.content 130 | except Exception as e: 131 | logger.error(f"Cover letter generation failed: {str(e)}") 132 | return None 133 | 134 | # -------------------------------------------------------------- 135 | # Main Processing Function 136 | # -------------------------------------------------------------- 137 | 138 | async def process_cover_letter_request( 139 | pdf_file, 140 | job_url: str, 141 | openai_client: AsyncOpenAI, 142 | firecrawl_client: FirecrawlApp 143 | ) -> Optional[str]: 144 | """Main async function that chains all the processing steps together""" 145 | try: 146 | # Save uploaded file temporarily 147 | with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file: 148 | temp_file.write(pdf_file.read()) 149 | temp_path = temp_file.name 150 | 151 | # Extract text from PDF 152 | with open(temp_path, 'rb') as file: 153 | pdf_reader = PyPDF2.PdfReader(file) 154 | pdf_text = ' '.join(page.extract_text() for page in pdf_reader.pages) 155 | logger.info(f"Extracted PDF text length: {len(pdf_text)}") 156 | 157 | # Get job content 158 | try: 159 | job_content = firecrawl_client.scrape_url(job_url) 160 | if not isinstance(job_content, str): 161 | job_content = str(job_content) 162 | logger.info(f"Job content type after conversion: {type(job_content)}") 163 | except Exception as e: 164 | logger.error(f"Error scraping job URL: {str(e)}") 165 | return None 166 | 167 | # Process resume and job info in parallel 168 | resume_info, job_info = await asyncio.gather( 169 | extract_resume_info(openai_client, pdf_text), 170 | extract_job_info(openai_client, job_content) 171 | ) 172 | 173 | if not resume_info or not job_info: 174 | logger.error("Failed to extract either resume or job information") 175 | return None 176 | 177 | # Generate cover letter 178 | cover_letter = await generate_cover_letter(openai_client, resume_info, job_info) 179 | 180 | if not cover_letter: 181 | return None 182 | 183 | return cover_letter 184 | 185 | except Exception as e: 186 | logger.error(f"Error processing cover letter request: {str(e)}") 187 | return None 188 | finally: 189 | if 'temp_path' in locals(): 190 | os.unlink(temp_path) 191 | 192 | 193 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI Engineer Toolkit 🛠️ 2 | 3 | Welcome to the **AI Engineer Toolkit**! 4 | 5 | Designed to help software engineers and non-engineers build production-grade AI applications from scratch. 6 | 7 | You will find every popular framework, tool, and resource to ship one AI project at a time! 8 | 9 | 10 |
11 | 12 | ## Getting Started 13 | 14 | - If you are **new to software engineering** check out the fundamentals folder. 15 | 16 | - If you need **inspiration**, check out example projects inside the [sandbox](./sandbox). 17 | 18 | - If you are **ready to build**, check out the table below. 19 | 20 | - If you want **more guidance**, check out the upcoming [AI Engineering Bootcamp](https://dub.sh/Z0JHySz) that starts on March 10th, 2025. 21 | 22 | 23 |
24 | 25 | ## Tools & Resources for AI Engineers 🏗️ 26 | 27 | Explore the table below to find the right tools for your AI projects. 28 | Each category includes cost details, use cases, and links to official documentation. 29 | 30 | 31 |
32 | 33 | | Category | Tool Name | Cost Model | Use Case | Link | 34 | |----------------------|--------------------------|---------------|-------------------------------------------|--------------------------------------------| 35 | |😎 **Model Providers** | | | | | 36 | | | OpenAI API | Paid API | GPT models | [OpenAI](https://platform.openai.com/docs/) | 37 | | | Grok (xAI) | Paid API | Human-like reasoning for complex queries | [Grok](https://xai.ai/docs) | 38 | | | Google Gemini | Paid API | Multimodal AI for text and images | [Gemini](https://cloud.google.com/gemini) | 39 | | | Hugging Face | Freemium | Open-source models and hosted inference | [Hugging Face](https://huggingface.co/docs) | 40 | | | Anthropic Claude | Paid API | Safe, interpretable language models | [Claude](https://docs.anthropic.com/) | 41 | |📝 **Prompt Engineering** | | | | | 42 | | | DSPy | Open-Source | Automate prompt optimization with code | [DSPy](https://dspy-docs.vercel.app/) | 43 | | | LangChain | Open-Source | Build prompt-driven AI workflows | [LangChain](https://python.langchain.com/docs/) | 44 | | | Haystack | Open-Source | Optimize prompts for RAG pipelines | [Haystack](https://haystack.deepset.ai/) | 45 | | | Agenta | Open-Source | Experiment and deploy prompt variants | [Agenta](https://www.agenta.ai/) 46 | |⚙️ **Backend** | | | | | 47 | | | FastAPI | Open-Source | Fast, modern Python APIs | [FastAPI](https://fastapi.tiangolo.com/) | 48 | | | Flask | Open-Source | Lightweight web apps and APIs | [Flask](https://flask.palletsprojects.com/) | 49 | | | Django | Open-Source | Robust web framework with ORM | [Django](https://docs.djangoproject.com/) | 50 | | | Node.js (Express) | Open-Source | Scalable backend with JavaScript | [Express](https://expressjs.com/) | 51 | |💻 **Frontend** | | | | | 52 | | | AI SDK (Vercel) | Open-Source | AI-ready UI components for React | [AI SDK](https://sdk.vercel.ai/docs) | 53 | | | Streamlit | Open-Source | Rapid prototyping for data apps | [Streamlit](https://docs.streamlit.io/) | 54 | | | Gradio | Open-Source | Interactive ML model interfaces | [Gradio](https://gradio.app/docs/) | 55 | | | Dash | Open-Source | Data-driven dashboards with Python | [Dash](https://dash.plotly.com/) | 56 | | 🎈 **Fullstack** | | | | | 57 | | | Next.js | Open-Source | React with SSR and built-in API routes | [Next.js](https://nextjs.org/docs) | 58 | | | SvelteKit | Open-Source | Lightweight full-stack with Svelte | [SvelteKit](https://kit.svelte.dev/docs) | 59 | | | Nuxt.js | Open-Source | Vue.js with server-side rendering | [Nuxt.js](https://nuxt.com/docs) | 60 | | | Meteor | Open-Source | Full-stack JavaScript framework | [Meteor](https://docs.meteor.com/) | 61 | | 📀 **Databases** | | | | | 62 | | | Supabase | Freemium | Open-source Firebase alternative | [Supabase](https://supabase.com/docs) | 63 | | | PostgreSQL | Open-Source | Reliable relational database | [PostgreSQL](https://www.postgresql.org/docs/) | 64 | | | MongoDB | Open-Source | Flexible NoSQL for unstructured data | [MongoDB](https://docs.mongodb.com/) | 65 | | | SQLite | Open-Source | Lightweight, serverless database | [SQLite](https://www.sqlite.org/docs.html) | 66 | | 🔢 **Vector Databases** | | | | | 67 | | | ChromaDB | Open-Source | Simple vector store for embeddings | [ChromaDB](https://docs.trychroma.com/) | 68 | | | Pinecone | Paid API | Scalable, hosted vector search | [Pinecone](https://docs.pinecone.io/) | 69 | | | Weaviate | Open-Source | GraphQL-based vector database | [Weaviate](https://weaviate.io/developers/weaviate) | 70 | | | Qdrant | Open-Source | High-performance vector search | [Qdrant](https://qdrant.tech/documentation/) | 71 | | 📻 **Fine-tuning** | | | | | 72 | | | OpenAI Fine-Tuning | Paid API | Customize GPT models for your needs | [OpenAI Fine-Tuning](https://platform.openai.com/docs/guides/fine-tuning) | 73 | | | Hugging Face Trainer | Open-Source | Fine-tune transformers easily | [Hugging Face Trainer](https://huggingface.co/docs/transformers/training) | 74 | | | Ludwig | Open-Source | Declarative fine-tuning for ML models | [Ludwig](https://ludwig.ai/latest/) | 75 | | | PyTorch Lightning | Open-Source | Structured fine-tuning with PyTorch | [PyTorch Lightning](https://lightning.ai/docs/pytorch) | 76 | | 🤖 **Agent Development**| | | | | 77 | | | LangGraph | Open-Source | Stateful agents with LangChain | [LangGraph](https://langchain-ai.github.io/langgraph/) | 78 | | | AG2 (AgentGraph) | Open-Source | Multi-agent orchestrators | [AG2](https://github.com/agentgraph/ag2) | 79 | | | CrewAI | Open-Source | Multi-agent workflows for automation | [CrewAI](https://docs.crewai.com/) | 80 | | | AutoGen | Open-Source | Conversational agents with flexibility | [AutoGen](https://microsoft.github.io/autogen/) | 81 | | 🕸️ **Web Data Access** | | | | | 82 | | | Firecrawl | Freemium | Web scraping with caching | [Firecrawl](https://firecrawl.dev/) | 83 | | | Jina AI Reader API | Freemium | Extract structured data from web pages | [Jina AI](https://jina.ai/reader) | 84 | | | Puppeteer | Open-Source | Dynamic content scraping via browser | [Puppeteer](https://pptr.dev/) | 85 | | | Scrapy | Open-Source | Powerful web crawling framework | [Scrapy](https://docs.scrapy.org/) | 86 | | 🤔 **Evaluation Frameworks** | | | | | 87 | | | RAGAS | Open-Source | Evaluate RAG pipelines | [RAGAS](https://docs.ragas.io/) | 88 | | | DeepEval | Open-Source | Custom metrics for LLM evaluation | [DeepEval](https://docs.deepeval.com/) | 89 | | | LangSmith | Freemium | Debug and evaluate LLM apps | [LangSmith](https://docs.smith.langchain.com/) | 90 | | | TruLens | Open-Source | Ground truth evaluation for LLMs | [TruLens](https://trulens.org/) | 91 | | 📊 **Monitoring & Logging** | | | | | 92 | | | LangSmith | Freemium | Monitor and trace LLM applications | [LangSmith](https://docs.smith.langchain.com/) | 93 | | | Sentry | Freemium | Real-time error tracking | [Sentry](https://docs.sentry.io/) | 94 | | | Prometheus | Open-Source | Metrics and alerting for systems | [Prometheus](https://prometheus.io/docs/) | 95 | | | Grafana | Open-Source | Visualize logs and metrics | [Grafana](https://grafana.com/docs/) | 96 | | 🔊 **Audio** | | | | | 97 | | | Whisper (OpenAI) | Open-Source | High-accuracy speech-to-text | [Whisper](https://github.com/openai/whisper) | 98 | | | DeepSpeech | Open-Source | End-to-end speech recognition | [DeepSpeech](https://github.com/mozilla/DeepSpeech) | 99 | | | Coqui TTS | Open-Source | Text-to-speech with diverse voices | [Coqui TTS](https://docs.coqui.ai/) | 100 | | | SpeechBrain | Open-Source | All-in-one audio processing toolkit | [SpeechBrain](https://speechbrain.github.io/) | 101 | | 👀 **Vision** | | | | | 102 | | | YOLO | Open-Source | Real-time object detection | [YOLO](https://docs.ultralytics.com/) | 103 | | | OpenCV | Open-Source | Versatile computer vision library | [OpenCV](https://docs.opencv.org/) | 104 | | | Detectron2 | Open-Source | Advanced detection and segmentation | [Detectron2](https://detectron2.readthedocs.io/) | 105 | | | CLIP (OpenAI) | Open-Source | Image-text understanding and search | [CLIP](https://github.com/openai/CLIP) | 106 | | 🚀 **Deployment Tools** | | | | | 107 | | | Vercel | Freemium | Deploy full-stack apps with ease | [Vercel](https://vercel.com/docs) | 108 | | | Docker | Open-Source | Containerize apps for consistent deployment | [Docker](https://docs.docker.com/) | 109 | | | Kubernetes | Open-Source | Orchestrate and scale containerized apps | [Kubernetes](https://kubernetes.io/docs/) | 110 | | | Heroku | Freemium | Quick deployment for prototypes and MVPs | [Heroku](https://devcenter.heroku.com/) | 111 | -------------------------------------------------------------------------------- /sandbox/hiring-agent/utils/utils.py: -------------------------------------------------------------------------------- 1 | # utils/utils.py 2 | import os 3 | from typing import List, Dict, Any 4 | from firecrawl import FirecrawlApp 5 | from dotenv import load_dotenv 6 | import json 7 | from openai import OpenAI 8 | from pydantic import BaseModel, Field 9 | from typing import List, Optional 10 | import asyncio 11 | 12 | load_dotenv() 13 | import tempfile 14 | import PyPDF2 15 | 16 | 17 | app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY")) 18 | openai_api_key = os.environ.get("OPENAI_API_KEY") 19 | openai_client = OpenAI(api_key=openai_api_key) 20 | 21 | 22 | class CandidateScore(BaseModel): 23 | name: str = Field(..., description="Candidate's name") 24 | relevance: int = Field( 25 | ..., 26 | description="How relevant the candidate's resume is to the job description (0-100)", 27 | ) 28 | experience: int = Field( 29 | ..., description="Candidate's match in terms of work experience (0-100)" 30 | ) 31 | skills: int = Field(..., description="Candidate's match based on skills (0-100)") 32 | overall: int = Field(..., description="Overall score (0-100)") 33 | comment: str = Field( 34 | ..., description="A cbrief omment explaining the rationale behind the scores" 35 | ) 36 | 37 | 38 | class Resume(BaseModel): 39 | name: str = Field(..., description="Candidate's full name") 40 | work_experiences: List[str] = Field(..., description="List of work experiences") 41 | location: str = Field(..., description="Candidate's location") 42 | skills: List[str] = Field(..., description="List of candidate's skills") 43 | education: List[str] = Field(..., description="Educational background") 44 | summary: Optional[str] = Field( 45 | None, description="A short summary or objective statement" 46 | ) 47 | certifications: Optional[List[str]] = Field( 48 | None, description="List of certifications" 49 | ) 50 | languages: Optional[List[str]] = Field( 51 | None, description="Languages spoken by the candidate" 52 | ) 53 | 54 | 55 | class JobDescription(BaseModel): 56 | title: str 57 | company: str 58 | location: str 59 | requirements: list[str] 60 | responsibilities: list[str] 61 | 62 | 63 | async def ingest_inputs( 64 | job_description: str, resume_files: List[Any] 65 | ) -> Dict[str, Any]: 66 | """ 67 | Ingests the job description and resume files. 68 | 69 | Parameters: 70 | job_description (str): The job description text or URL. 71 | resume_files (List[Any]): List of uploaded resume files. 72 | 73 | Returns: 74 | dict: A dictionary with two keys: 75 | - "job_description": The processed job description (in markdown). 76 | - "resumes": A list of resume file names. 77 | """ 78 | # Determine if job_description is a URL. 79 | if job_description.startswith("http"): 80 | try: 81 | result = app.scrape_url(job_description, params={"formats": ["markdown"]}) 82 | # Check if markdown data is present in the result. 83 | if not result or "markdown" not in result: 84 | raise ValueError("Scraping did not return markdown data.") 85 | job_desc_text = result.get("markdown", "") 86 | except Exception as e: 87 | raise Exception(f"Failed to scrape the job description URL: {e}") 88 | else: 89 | job_desc_text = job_description 90 | resumes = [file.name for file in resume_files] 91 | return {"job_description": job_desc_text, "resumes": resumes} 92 | 93 | 94 | def call_llm(messages: list, response_fromat: None) -> str: 95 | """ 96 | Calls the OpenAI GPT-4 model with the provided prompt and returns the response text. 97 | 98 | Parameters: 99 | prompt (str): The prompt to send to the LLM. 100 | 101 | Returns: 102 | str: The LLM's response. 103 | """ 104 | 105 | params = {"model": "gpt-4o-2024-08-06", "messages": messages} 106 | 107 | if response_fromat: 108 | params["response_format"] = response_fromat 109 | 110 | response = openai_client.beta.chat.completions.parse(**params) 111 | 112 | return response.choices[0].message.content 113 | 114 | 115 | async def parse_job_description(data: Dict[str, Any]) -> Dict[str, Any]: 116 | """ 117 | Parses the job description to extract key requirements in a structured format. 118 | 119 | This function takes the ingested job description (which might be scraped from a URL) 120 | and uses an LLM (GPT-4) to extract and return only the essential job details. 121 | Extraneous content from the scraped page is removed. 122 | 123 | Parameters: 124 | data (dict): Dictionary containing the job description details, with a key "job_description". 125 | 126 | Returns: 127 | dict: A dictionary with the structured job description containing keys: 128 | "title", "company", "location", "requirements", "responsibilities", "benefits", and "experience". 129 | 130 | Raises: 131 | Exception: If the LLM call fails or the returned JSON cannot be parsed. 132 | """ 133 | job_text = data.get("job_description", "") 134 | if not job_text: 135 | raise ValueError("No job description text provided.") 136 | 137 | # Build the prompt for the LLM 138 | prompt = ( 139 | "Extract the key job information from the text below. Return only valid JSON " 140 | "with the following keys: title, company, location, requirements, responsibilities, benefits, experience. " 141 | "Do not include any extraneous information.\n\n" 142 | "Job description:\n" + job_text 143 | ) 144 | messages = [ 145 | { 146 | "role": "system", 147 | "content": ( 148 | "You are an assistant that extracts key job description information from text. " 149 | "Return only the job details in valid JSON format using the keys: " 150 | "title, company, location, requirements (as a list), responsibilities (as a list), " 151 | "benefits (as a list), and experience." 152 | ), 153 | }, 154 | {"role": "user", "content": prompt}, 155 | ] 156 | 157 | try: 158 | llm_output = call_llm(messages, response_fromat=JobDescription) 159 | # Parse the JSON returned by the LLM 160 | structured_jd = json.loads(llm_output) 161 | except Exception as e: 162 | raise Exception(f"Error parsing job description: {e}") 163 | 164 | return structured_jd 165 | 166 | 167 | async def parse_resumes(resume_files: List[Any]) -> Dict[str, Any]: 168 | """ 169 | Parses resume files to extract candidate information. 170 | 171 | This function reads each uploaded resume file and uses an LLM (via the call_llm helper) 172 | to extract candidate details. The LLM is asked to return only valid JSON following the 173 | schema defined by the Resume Pydantic model. The expected JSON should include keys such as: 174 | 175 | { 176 | "name": string, 177 | "work_experiences": list[string], 178 | "location": string, 179 | "skills": list[string], 180 | "education": list[string], 181 | "summary": string (optional), 182 | "certifications": list[string] (optional), 183 | "languages": list[string] (optional) 184 | } 185 | 186 | Parameters: 187 | resume_files (List[Any]): List of uploaded resume file objects (e.g., from Streamlit's file uploader). 188 | 189 | Returns: 190 | dict: A dictionary with a key "parsed_resumes" that is a list of parsed resume details. 191 | 192 | Raises: 193 | Exception: If any LLM call or JSON parsing fails. 194 | """ 195 | parsed_resumes = [] 196 | for resume in resume_files: 197 | # Save uploaded file temporarily 198 | with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: 199 | temp_file.write(resume.read()) 200 | temp_path = temp_file.name 201 | 202 | # Extract text from PDF 203 | with open(temp_path, "rb") as file: 204 | pdf_reader = PyPDF2.PdfReader(file) 205 | pdf_text = " ".join(page.extract_text() for page in pdf_reader.pages) 206 | # Build messages for the LLM. 207 | messages = [ 208 | { 209 | "role": "system", 210 | "content": ( 211 | "You are an assistant that extracts candidate resume details. " 212 | "Extract only the information following this JSON schema: " 213 | ), 214 | }, 215 | { 216 | "role": "user", 217 | "content": f"Extract resume details from the following resume text:\n\n{pdf_text}", 218 | }, 219 | ] 220 | 221 | try: 222 | # Call the LLM to process the resume text. 223 | # Pass the JSON schema (as a string) to instruct the LLM on the expected format. 224 | llm_response = call_llm(messages, response_fromat=Resume) 225 | # Parse the JSON response from the LLM. 226 | parsed_resume = json.loads(llm_response) 227 | except Exception as e: 228 | parsed_resume = {"error": f"Failed to parse resume using LLM: {e}"} 229 | 230 | parsed_resumes.append(parsed_resume) 231 | return {"parsed_resumes": parsed_resumes} 232 | 233 | 234 | async def score_candidates( 235 | parsed_requirements: Dict[str, Any], parsed_resumes: Dict[str, Any] 236 | ) -> List[Dict[str, Any]]: 237 | """ 238 | Scores candidates based on the parsed job description and resume data. 239 | Parameters: 240 | parsed_requirements (dict): Parsed job description data. 241 | Expected to have a key "parsed_requirements" with the job description details. 242 | parsed_resumes (dict): Parsed resume data. 243 | Expected to have a key "parsed_resumes" which is a list of candidate details. 244 | 245 | Returns: 246 | list: A list of dictionaries with candidate scores as per the CandidateScore model. 247 | 248 | Raises: 249 | Exception: If any LLM call or JSON parsing fails. 250 | """ 251 | candidate_scores = [] 252 | 253 | job_description_text = json.dumps(parsed_requirements) 254 | resume_list = parsed_resumes.get("parsed_resumes", []) 255 | for candidate in resume_list: 256 | # Build messages for the LLM. 257 | messages = [ 258 | { 259 | "role": "system", 260 | "content": ( 261 | "You are an unbiased hiring manager. Compare the following job description with the candidate's resume and provide " 262 | "scores (0-100) for relevance, experience, and skills. Also compute an overall score that reflects the candidate's fit " 263 | "and provide a comment explaining your evaluation. Return only valid JSON using the following schema: " 264 | ), 265 | }, 266 | { 267 | "role": "user", 268 | "content": ( 269 | f"Job Description:\n{job_description_text}\n\n" 270 | f"Candidate Resume:\n{json.dumps(candidate)}" 271 | ), 272 | }, 273 | ] 274 | 275 | try: 276 | llm_response = call_llm(messages, response_fromat=CandidateScore) 277 | score_data = json.loads(llm_response) 278 | score_data["resume"] = candidate 279 | except Exception as e: 280 | # In case of an error, record a default score with error comment. 281 | score_data = { 282 | "name": candidate.get("name", "Unknown"), 283 | "relevance": 0, 284 | "experience": 0, 285 | "skills": 0, 286 | "overall": 0, 287 | "comment": f"Error during evaluation: {e}", 288 | } 289 | 290 | candidate_scores.append(score_data) 291 | 292 | return candidate_scores 293 | 294 | 295 | def rank_candidates(candidate_scores: List[Dict[str, Any]]) -> List[Dict[str, Any]]: 296 | """ 297 | Ranks candidates based on the average of their overall scores. 298 | 299 | For each candidate, this function calculates the average score from the keys: 300 | "relevance", "experience", "skills", and "overall". It adds a new key "avg_score" 301 | to each candidate's dictionary and then returns the sorted list in descending order. 302 | 303 | Parameters: 304 | candidate_scores (list): List of candidate score dictionaries. 305 | 306 | Returns: 307 | list: Sorted list of candidate scores in descending order based on avg_score. 308 | """ 309 | for candidate in candidate_scores: 310 | # Compute the average of the relevant scores. 311 | relevance = candidate.get("relevance", 0) 312 | experience = candidate.get("experience", 0) 313 | skills = candidate.get("skills", 0) 314 | overall = candidate.get("overall", 0) 315 | candidate["avg_score"] = (relevance + experience + skills + overall) / 4.0 316 | 317 | # Return the sorted list of candidates based on avg_score. 318 | return sorted( 319 | candidate_scores, key=lambda candidate: candidate["avg_score"], reverse=True 320 | ) 321 | 322 | 323 | async def generate_email_templates( 324 | ranked_candidates: List[Dict[str, Any]], job_description: Dict[str, Any], top_x: int 325 | ) -> Dict[str, List[Dict[str, Any]]]: 326 | """ 327 | Generates custom email templates using an LLM for each candidate. 328 | Parameters: 329 | ranked_candidates (list): List of candidate score dictionaries. 330 | job_description (dict): The structured job description. 331 | top_x (int): Number of top candidates to invite for a call. 332 | 333 | Returns: 334 | dict: A dictionary with two keys: 335 | - "invitations": A list of dictionaries with candidate "name" and "email_body" for invitations. 336 | - "rejections": A list of dictionaries with candidate "name" and "email_body" for rejections. 337 | 338 | Raises: 339 | Exception: If the LLM call fails for any candidate. 340 | """ 341 | invitations = [] 342 | rejections = [] 343 | 344 | for idx, candidate in enumerate(ranked_candidates): 345 | candidate_name = candidate.get("name", "Candidate") 346 | 347 | # Build the base messages for the LLM. 348 | messages = [ 349 | { 350 | "role": "system", 351 | "content": ( 352 | "You are an unbiased HR professional. Your task is to craft clear, concise, " 353 | "and professional email responses to candidates based on the job description, " 354 | "the candidate's resume details, and evaluation scores. " 355 | "Return only the email body as plain text." 356 | ), 357 | }, 358 | { 359 | "role": "user", 360 | "content": ( 361 | f"Job Description (structured):\n{json.dumps(job_description, indent=2)}\n\n" 362 | f"Candidate Evaluation (structured):\n{json.dumps(candidate, indent=2)}\n\n" 363 | ), 364 | }, 365 | ] 366 | 367 | # Append specific instructions based on candidate ranking. 368 | if idx < top_x: 369 | messages.append( 370 | { 371 | "role": "assistant", 372 | "content": ( 373 | "Please create an invitation email inviting the candidate for a quick call. " 374 | "The email should be friendly, professional, and include a scheduling request." 375 | ), 376 | } 377 | ) 378 | else: 379 | messages.append( 380 | { 381 | "role": "assistant", 382 | "content": ( 383 | "Please create a polite rejection email. Include constructive feedback and key " 384 | "suggestions for improvement based on the candidate's evaluation." 385 | ), 386 | } 387 | ) 388 | 389 | try: 390 | email_body = call_llm(messages, response_fromat=None) 391 | except Exception as e: 392 | email_body = f"Error generating email: {e}" 393 | 394 | email_template = {"name": candidate_name, "email_body": email_body} 395 | if idx < top_x: 396 | invitations.append(email_template) 397 | else: 398 | rejections.append(email_template) 399 | 400 | return {"invitations": invitations, "rejections": rejections} 401 | --------------------------------------------------------------------------------