diff --git a/.gitignore b/.gitignore index a2e75f9..d99706e 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ run/*.pdf test_*.py cache/ resume_evaluations.csv +job_evaluations.csv +package-lock.json greenhouse_resumes/* # Byte-compiled / optimized / DLL files @@ -223,4 +225,7 @@ marimo/_lsp/ __marimo__/ # Streamlit -.streamlit/secrets.toml \ No newline at end of file +.streamlit/secrets.toml + + +CLAUDE.md \ No newline at end of file diff --git a/evaluator.py b/evaluator.py index 1f9e91f..1d3fcdf 100644 --- a/evaluator.py +++ b/evaluator.py @@ -1,6 +1,13 @@ from typing import Dict, List, Optional, Tuple, Any from pydantic import BaseModel, Field, field_validator -from models import JSONResume, EvaluationData +from models import ( + JSONResume, + EvaluationData, + JobDescriptionData, + JobScores, + LLMJobEvaluationResponse, + JobEvaluationData, +) from llm_utils import initialize_llm_provider, extract_json_from_response import logging import json @@ -89,3 +96,137 @@ def evaluate_resume(self, resume_text: str) -> EvaluationData: except Exception as e: logger.error(f"Error evaluating resume: {str(e)}") raise + + +class JobDescriptionEvaluator: + WEIGHTS = { + "skills_match": 0.30, + "experience_match": 0.20, + "semantic_match": 0.15, + "job_title_alignment": 0.10, + "education": 0.10, + "resume_quality": 0.10, + "missing_critical_requirements": 0.05, + } + + def __init__(self, job_description: str, model_name: str = DEFAULT_MODEL, model_params: dict = None): + if not job_description or not job_description.strip(): + raise ValueError("Job description cannot be empty") + if not model_name: + raise ValueError("Model name cannot be empty") + + self.job_description = job_description + self.model_name = model_name + self.model_params = model_params or MODEL_PARAMETERS.get( + model_name, {"temperature": 0.1, "top_p": 0.9} + ) + self.template_manager = TemplateManager() + self.provider = initialize_llm_provider(model_name) + self._load_embedding_model() + + def _load_embedding_model(self): + from sentence_transformers import SentenceTransformer + logger.info("Loading Sentence Transformers model (all-MiniLM-L6-v2)...") + self.embedding_model = SentenceTransformer("all-MiniLM-L6-v2") + + def extract_job_requirements(self) -> JobDescriptionData: + prompt = self.template_manager.render_template( + "job_description_extraction", job_description=self.job_description + ) + if prompt is None: + raise ValueError("Failed to render job_description_extraction template") + + chat_params = { + "model": self.model_name, + "messages": [ + { + "role": "system", + "content": "You are an expert at extracting structured requirements from job descriptions. Return only valid JSON.", + }, + {"role": "user", "content": prompt}, + ], + "options": self.model_params, + } + + response = self.provider.chat(**chat_params, format=JobDescriptionData.model_json_schema()) + response_text = extract_json_from_response(response["message"]["content"]) + return JobDescriptionData(**json.loads(response_text)) + + def compute_semantic_score(self, resume_text: str) -> float: + from sentence_transformers import util + job_embedding = self.embedding_model.encode(self.job_description, convert_to_tensor=True) + resume_embedding = self.embedding_model.encode(resume_text, convert_to_tensor=True) + similarity = util.cos_sim(job_embedding, resume_embedding).item() + return round(max(0.0, similarity) * 100, 1) + + def _score_resume(self, resume_text: str, job_data: JobDescriptionData) -> LLMJobEvaluationResponse: + system_message = self.template_manager.render_template("job_evaluation_system_message") + if system_message is None: + raise ValueError("Failed to render job_evaluation_system_message template") + + criteria_prompt = self.template_manager.render_template( + "job_evaluation_criteria", + job_description=self.job_description, + job_title=job_data.job_title, + required_skills=job_data.required_skills, + preferred_skills=job_data.preferred_skills, + years_of_experience=job_data.years_of_experience, + education_requirements=job_data.education_requirements, + must_have_qualifications=job_data.must_have_qualifications, + resume_text=resume_text, + ) + if criteria_prompt is None: + raise ValueError("Failed to render job_evaluation_criteria template") + + chat_params = { + "model": self.model_name, + "messages": [ + {"role": "system", "content": system_message}, + {"role": "user", "content": criteria_prompt}, + ], + "options": { + "stream": False, + "temperature": self.model_params.get("temperature", 0.1), + "top_p": self.model_params.get("top_p", 0.9), + }, + } + + response = self.provider.chat(**chat_params, format=LLMJobEvaluationResponse.model_json_schema()) + response_text = extract_json_from_response(response["message"]["content"]) + logger.info(f"Job evaluation LLM response: {response_text}") + return LLMJobEvaluationResponse(**json.loads(response_text)) + + def _compute_weighted_total(self, scores: JobScores, semantic_score: float) -> float: + total = ( + scores.skills_match.score * self.WEIGHTS["skills_match"] + + scores.experience_match.score * self.WEIGHTS["experience_match"] + + semantic_score * self.WEIGHTS["semantic_match"] + + scores.job_title_alignment.score * self.WEIGHTS["job_title_alignment"] + + scores.education.score * self.WEIGHTS["education"] + + scores.resume_quality.score * self.WEIGHTS["resume_quality"] + + scores.missing_critical_requirements.score * self.WEIGHTS["missing_critical_requirements"] + ) + return round(min(total, 100.0), 1) + + def evaluate(self, resume_text: str) -> JobEvaluationData: + logger.info("Extracting requirements from job description...") + job_data = self.extract_job_requirements() + logger.info(f"Job title: {job_data.job_title} | Required skills: {job_data.required_skills}") + + logger.info("Computing semantic similarity score...") + semantic_score = self.compute_semantic_score(resume_text) + logger.info(f"Semantic match score: {semantic_score}") + + logger.info("Scoring resume against job requirements...") + llm_result = self._score_resume(resume_text, job_data) + + weighted_total = self._compute_weighted_total(llm_result.scores, semantic_score) + + return JobEvaluationData( + scores=llm_result.scores, + semantic_match_score=semantic_score, + weighted_total=weighted_total, + key_strengths=llm_result.key_strengths, + areas_for_improvement=llm_result.areas_for_improvement, + job_title=job_data.job_title, + ) diff --git a/job_description.txt b/job_description.txt new file mode 100644 index 0000000..e69de29 diff --git a/models.py b/models.py index e714600..6251915 100644 --- a/models.py +++ b/models.py @@ -249,6 +249,45 @@ class EvaluationData(BaseModel): areas_for_improvement: List[str] = Field(min_items=1, max_items=5) +class JobDescriptionData(BaseModel): + job_title: str + required_skills: List[str] + preferred_skills: List[str] = [] + years_of_experience: Optional[float] = None + education_requirements: Optional[str] = None + must_have_qualifications: List[str] = [] + industry: Optional[str] = None + + +class JobCategoryScore(BaseModel): + score: float = Field(ge=0, le=100, description="Score for this category out of 100") + evidence: str = Field(min_length=1, description="Evidence from the resume supporting this score") + + +class JobScores(BaseModel): + skills_match: JobCategoryScore + experience_match: JobCategoryScore + job_title_alignment: JobCategoryScore + education: JobCategoryScore + resume_quality: JobCategoryScore + missing_critical_requirements: JobCategoryScore + + +class LLMJobEvaluationResponse(BaseModel): + scores: JobScores + key_strengths: List[str] = Field(min_items=1, max_items=5) + areas_for_improvement: List[str] = Field(min_items=1, max_items=5) + + +class JobEvaluationData(BaseModel): + scores: JobScores + semantic_match_score: float = Field(ge=0, le=100) + weighted_total: float = Field(ge=0, le=100) + key_strengths: List[str] + areas_for_improvement: List[str] + job_title: str + + class GitHubProfile(BaseModel): """Pydantic model for GitHub profile data.""" diff --git a/prompts/template_manager.py b/prompts/template_manager.py index b68f680..35c0e7c 100644 --- a/prompts/template_manager.py +++ b/prompts/template_manager.py @@ -45,6 +45,9 @@ def _load_templates(self): "github_project_selection": "github_project_selection.jinja", "resume_evaluation_criteria": "resume_evaluation_criteria.jinja", "resume_evaluation_system_message": "resume_evaluation_system_message.jinja", + "job_description_extraction": "job_description_extraction.jinja", + "job_evaluation_criteria": "job_evaluation_criteria.jinja", + "job_evaluation_system_message": "job_evaluation_system_message.jinja", } for section_name, filename in template_files.items(): diff --git a/prompts/templates/job_description_extraction.jinja b/prompts/templates/job_description_extraction.jinja new file mode 100644 index 0000000..5432aa0 --- /dev/null +++ b/prompts/templates/job_description_extraction.jinja @@ -0,0 +1,24 @@ +Extract structured requirements from the following job description and return them as JSON. + +Job Description: +{{ job_description }} + +Rules: +- required_skills: Only skills explicitly stated as required, essential, or must-have +- preferred_skills: Only skills explicitly stated as preferred, nice-to-have, or a bonus +- years_of_experience: Minimum years required as a number (e.g. 2 for "2+ years"). Null if not specified. +- education_requirements: The degree, field of study, or certification explicitly required. Null if not specified. +- must_have_qualifications: Non-negotiable requirements such as work authorization, security clearance, licenses, or certifications explicitly stated as mandatory +- industry: The primary industry or domain of the role. Null if not clear. + +Return ONLY this JSON structure, no other text: + +{ + "job_title": "the target job title from the description", + "required_skills": ["skill1", "skill2"], + "preferred_skills": ["skill1", "skill2"], + "years_of_experience": null, + "education_requirements": null, + "must_have_qualifications": [], + "industry": null +} diff --git a/prompts/templates/job_evaluation_criteria.jinja b/prompts/templates/job_evaluation_criteria.jinja new file mode 100644 index 0000000..6aa8792 --- /dev/null +++ b/prompts/templates/job_evaluation_criteria.jinja @@ -0,0 +1,130 @@ +You are evaluating how well a candidate's resume matches a job description. Score each category from 0 to 100. + +## FULL JOB DESCRIPTION + +{{ job_description }} + +--- + +## EXTRACTED JOB REQUIREMENTS + +Job Title: {{ job_title }} + +Required Skills: {{ required_skills | join(", ") if required_skills else "None specified" }} + +Preferred Skills: {{ preferred_skills | join(", ") if preferred_skills else "None specified" }} + +Years of Experience Required: {{ years_of_experience if years_of_experience is not none else "Not specified" }} + +Education Requirements: {{ education_requirements if education_requirements else "Not specified" }} + +Must-Have Qualifications: {{ must_have_qualifications | join(", ") if must_have_qualifications else "None specified" }} + +--- + +## SCORING CRITERIA + +### Skills Match (score 0-100) +Compare the candidate's skills, work experience, and projects against the required and preferred skills. + +Required skills carry 80% of the weight, preferred skills carry 20%. + +Score bands: +- 90-100: All required skills present, most preferred skills also present +- 70-89: Most required skills present, minor gaps +- 50-69: Around half of required skills present +- 0-49: Few or no required skills present + +### Experience Match (score 0-100) +Evaluate the relevance of work history and projects to the target role. + +Consider: +- How closely past roles and responsibilities match the target role +- Whether technologies used in work experience match the job requirements +- Years of relevant experience vs years required (if years_of_experience is specified: score 100% if met, scale down proportionally if not) +- Industry or domain similarity + +Score bands: +- 90-100: Highly relevant experience that meets or exceeds all requirements +- 70-89: Mostly relevant experience with minor gaps +- 50-69: Somewhat relevant experience with notable gaps +- 0-49: Little relevant experience or significant shortfall in years + +### Job Title Alignment (score 0-100) +Compare the candidate's previous job titles to the target job title. + +Score bands: +- 90-100: Previous titles directly match or are very similar (e.g. "Software Engineer" → "Software Engineer") +- 70-89: Previous titles are closely related (e.g. "Software Engineer Intern" → "Software Engineer") +- 50-69: Previous titles are somewhat related (e.g. "Backend Developer" → "Full Stack Engineer") +- 20-49: Tangentially related titles +- 0-19: No previous titles or completely unrelated titles + +### Education (score 0-100) +Compare the candidate's education against the education requirements. + +Consider degree level, field of study, and any required certifications or licenses. + +Score bands: +- 90-100: Education fully meets or exceeds requirements +- 70-89: Education mostly meets requirements with minor gaps +- 50-69: Education partially meets requirements (e.g. relevant field but wrong level) +- 0-49: Education does not meet requirements or is absent + +If no education requirements are specified, score based on relevance of the candidate's education to the role. + +### Resume Quality (score 0-100) +Evaluate the quality of the resume's writing and presentation. + +Assess: +- Bullet points use strong action verbs: Built, Designed, Implemented, Optimized, Reduced, Automated, Architected, Led, Deployed, Improved +- Achievements are quantified with numbers (e.g. "Reduced latency by 35%", "Processed 1M requests/day", "Served 20k users") +- No vague filler statements (e.g. "Worked on APIs", "Helped with development", "Was responsible for") +- All major sections present: work experience, skills, projects or education +- Descriptions are clear and concise + +Score bands: +- 90-100: Strong action verbs throughout, most achievements quantified, no vague statements +- 70-89: Good action verbs, some quantified achievements, minor vague statements +- 50-69: Mixed quality, noticeable vague statements, few quantified achievements +- 0-49: Mostly vague, no quantified achievements, weak structure + +### Missing Critical Requirements (score 0-100) +Start at 100 and penalize for missing must-have qualifications and required skills. + +Deductions: +- -40 points for each missing must-have qualification (work authorization, clearance, license, etc.) +- -15 points for each required skill completely absent from the resume + +If no must-have qualifications are specified, base this score only on required skill presence. +Minimum score is 0. + +--- + +## CANDIDATE RESUME + +{{ resume_text }} + +--- + +## INSTRUCTIONS + +Score each category from 0 to 100 using only evidence from the resume above. +Provide specific evidence for each score — reference actual content from the resume, not generic statements. +Identify 1-5 key strengths relevant to this specific role. +For areas_for_improvement, identify 1-5 SPECIFIC GAPS between the candidate's resume and the job requirements listed above. Each item must reference something the job requires that the candidate is missing or weak on. Do NOT give generic resume advice (e.g. "add more detail", "include links"). Every improvement must name a specific requirement from the job description that is absent or underdeveloped in the resume. + +Return ONLY this JSON structure, no other text: + +{ + "scores": { + "skills_match": {"score": 0, "evidence": "string"}, + "experience_match": {"score": 0, "evidence": "string"}, + "job_title_alignment": {"score": 0, "evidence": "string"}, + "education": {"score": 0, "evidence": "string"}, + "resume_quality": {"score": 0, "evidence": "string"}, + "missing_critical_requirements": {"score": 0, "evidence": "string"} + }, + "key_strengths": ["strength1", "strength2"], + "areas_for_improvement": ["area1", "area2"] +} diff --git a/prompts/templates/job_evaluation_system_message.jinja b/prompts/templates/job_evaluation_system_message.jinja new file mode 100644 index 0000000..17738a1 --- /dev/null +++ b/prompts/templates/job_evaluation_system_message.jinja @@ -0,0 +1,21 @@ +You are an expert technical recruiter evaluating how well a candidate's resume matches a specific job description. + +Score each category objectively from 0 to 100 based solely on evidence in the resume. + +CRITICAL FAIRNESS REQUIREMENTS: +SCORES MUST NEVER DEPEND ON: +- Candidate's name, gender, or any personal demographic information +- College or university name +- GPA or academic grades unless the job description explicitly requires a minimum GPA +- City, location, or geographical information unless the job explicitly requires it +- Any personal characteristics unrelated to the job requirements + +EVALUATION MUST BE BASED ONLY ON: +- How well the candidate's technical skills match the job requirements +- Relevance and depth of work experience and projects +- Alignment of previous job titles to the target role +- Education and certifications as specified in the job description +- Quality of resume writing — action verbs, quantified achievements, clarity + +You MUST respond with ONLY the JSON structure specified in the prompt. +Do not add explanatory text, summaries, or any fields beyond what is specified. diff --git a/requirements.txt b/requirements.txt index df14bd5..9cdbf36 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ pymupdf4llm==0.0.27 Jinja2==3.1.6 google-generativeai==0.4.0 python-dotenv==1.0.1 -black==25.9.0 \ No newline at end of file +black==25.9.0 +sentence-transformers \ No newline at end of file diff --git a/resume.pdf b/resume.pdf new file mode 100644 index 0000000..e69de29 diff --git a/score.py b/score.py index 21fd06c..35c2859 100644 --- a/score.py +++ b/score.py @@ -5,13 +5,14 @@ import csv from pdf import PDFHandler from github import fetch_and_display_github_info -from models import JSONResume, EvaluationData -from typing import List, Optional, Dict -from evaluator import ResumeEvaluator +from models import JSONResume, EvaluationData, JobEvaluationData +from typing import Optional +from evaluator import ResumeEvaluator, JobDescriptionEvaluator from pathlib import Path from prompt import DEFAULT_MODEL, MODEL_PARAMETERS from transform import ( transform_evaluation_response, + transform_job_evaluation_response, convert_json_resume_to_text, convert_github_data_to_text, convert_blog_data_to_text, @@ -25,11 +26,38 @@ format="%(asctime)s - %(name)5s - %(lineno)5d - %(funcName)33s - %(levelname)5s - %(message)s", ) +RESUME_PATH = "resume.pdf" +JOB_DESCRIPTION_PATH = "job_description.txt" + + +def select_mode() -> int: + print("\nChoose scoring mode:") + print(" 1. HackerRank Intern (original)") + print(" 2. Custom Job Description") + while True: + choice = input("Enter choice (1 or 2): ").strip() + if choice in ("1", "2"): + return int(choice) + print("Invalid choice. Please enter 1 or 2.") + + +def load_job_description() -> str: + if not os.path.exists(JOB_DESCRIPTION_PATH): + print(f"Error: '{JOB_DESCRIPTION_PATH}' not found in the project root.") + sys.exit(1) + content = Path(JOB_DESCRIPTION_PATH).read_text(encoding="utf-8").strip() + if not content: + print( + f"Error: '{JOB_DESCRIPTION_PATH}' is empty. " + "Paste a job description into it before running in Custom Job Description mode." + ) + sys.exit(1) + return content + def print_evaluation_results( evaluation: EvaluationData, candidate_name: str = "Candidate" ): - """Print evaluation results in a readable format.""" print("\n" + "=" * 80) print(f"📊 RESUME EVALUATION RESULTS FOR: {candidate_name}") print("=" * 80) @@ -38,7 +66,6 @@ def print_evaluation_results( print("❌ No evaluation data available") return - # Calculate overall score total_score = 0 max_score = 0 @@ -48,35 +75,28 @@ def print_evaluation_results( total_score += category_score max_score += category_data["max"] - # Log warning if score was capped if category_score < category_data["score"]: print( f"⚠️ Warning: {category_name} score capped from {category_data['score']} to {category_score} (max: {category_data['max']})" ) - # Add bonus points if hasattr(evaluation, "bonus_points") and evaluation.bonus_points: total_score += evaluation.bonus_points.total - # Subtract deductions if hasattr(evaluation, "deductions") and evaluation.deductions: total_score -= evaluation.deductions.total - # Ensure total score doesn't exceed maximum possible score - max_possible_score = max_score + 20 # 120 (100 categories + 20 bonus) + max_possible_score = max_score + 20 if total_score > max_possible_score: total_score = max_possible_score print(f"⚠️ Warning: Total score capped at maximum possible value") - # Overall Score print(f"\n🎯 OVERALL SCORE: {total_score:.1f}/{max_score}") - # Detailed Scores print("\n📈 DETAILED SCORES:") print("-" * 60) if hasattr(evaluation, "scores") and evaluation.scores: - # Define category maximums category_maxes = { "open_source": 35, "self_projects": 30, @@ -84,7 +104,6 @@ def print_evaluation_results( "technical_skills": 10, } - # Open Source if hasattr(evaluation.scores, "open_source") and evaluation.scores.open_source: os_score = evaluation.scores.open_source capped_score = min(os_score.score, category_maxes["open_source"]) @@ -92,7 +111,6 @@ def print_evaluation_results( print(f" Evidence: {os_score.evidence}") print() - # Self Projects if ( hasattr(evaluation.scores, "self_projects") and evaluation.scores.self_projects @@ -103,7 +121,6 @@ def print_evaluation_results( print(f" Evidence: {sp_score.evidence}") print() - # Production Experience if hasattr(evaluation.scores, "production") and evaluation.scores.production: prod_score = evaluation.scores.production capped_score = min(prod_score.score, category_maxes["production"]) @@ -111,7 +128,6 @@ def print_evaluation_results( print(f" Evidence: {prod_score.evidence}") print() - # Technical Skills if ( hasattr(evaluation.scores, "technical_skills") and evaluation.scores.technical_skills @@ -122,13 +138,11 @@ def print_evaluation_results( print(f" Evidence: {tech_score.evidence}") print() - # Bonus Points if hasattr(evaluation, "bonus_points") and evaluation.bonus_points: print(f"\n⭐ BONUS POINTS: {evaluation.bonus_points.total}") print("-" * 30) print(f" {evaluation.bonus_points.breakdown}") - # Deductions if ( hasattr(evaluation, "deductions") and evaluation.deductions @@ -139,14 +153,12 @@ def print_evaluation_results( if evaluation.deductions.reasons: print(f" {evaluation.deductions.reasons}") - # Key Strengths if hasattr(evaluation, "key_strengths") and evaluation.key_strengths: print(f"\n✅ KEY STRENGTHS:") print("-" * 30) for i, strength in enumerate(evaluation.key_strengths, 1): print(f" {i}. {strength}") - # Areas for Improvement if ( hasattr(evaluation, "areas_for_improvement") and evaluation.areas_for_improvement @@ -159,37 +171,84 @@ def print_evaluation_results( print("\n" + "=" * 80) +def print_job_evaluation_results( + evaluation: JobEvaluationData, candidate_name: str = "Candidate" +): + print("\n" + "=" * 80) + print(f"📊 JOB MATCH EVALUATION FOR: {candidate_name}") + print(f" Target Role: {evaluation.job_title}") + print("=" * 80) + + print(f"\n🎯 OVERALL MATCH: {evaluation.weighted_total}/100") + + print("\n📈 CATEGORY BREAKDOWN:") + print("-" * 60) + + categories = [ + ("💻 Skills Match (30%)", evaluation.scores.skills_match), + ("🏢 Experience Match (20%)", evaluation.scores.experience_match), + ("📋 Title Alignment (10%)", evaluation.scores.job_title_alignment), + ("🎓 Education (10%)", evaluation.scores.education), + ("📝 Resume Quality (10%)", evaluation.scores.resume_quality), + ("⚠️ Missing Critical (5%)", evaluation.scores.missing_critical_requirements), + ] + + for label, category in categories: + print(f"{label}: {category.score:.0f}/100") + print(f" Evidence: {category.evidence}") + print() + + print(f"🔍 Semantic Match (15%): {evaluation.semantic_match_score:.1f}/100") + print(" Computed via Sentence Transformers (all-MiniLM-L6-v2).") + print() + + if evaluation.key_strengths: + print("✅ KEY STRENGTHS:") + print("-" * 30) + for i, strength in enumerate(evaluation.key_strengths, 1): + print(f" {i}. {strength}") + + if evaluation.areas_for_improvement: + print(f"\n🔧 AREAS FOR IMPROVEMENT:") + print("-" * 30) + for i, area in enumerate(evaluation.areas_for_improvement, 1): + print(f" {i}. {area}") + + print("\n" + "=" * 80) + + def _evaluate_resume( resume_data: JSONResume, github_data: dict = None, blog_data: dict = None ) -> Optional[EvaluationData]: - """Evaluate the resume using AI and display results.""" - model_params = MODEL_PARAMETERS.get(DEFAULT_MODEL) evaluator = ResumeEvaluator(model_name=DEFAULT_MODEL, model_params=model_params) - # Convert JSON resume data to text resume_text = convert_json_resume_to_text(resume_data) - # Add GitHub data if available if github_data: github_text = convert_github_data_to_text(github_data) resume_text += github_text - # Add blog data if available if blog_data: blog_text = convert_blog_data_to_text(blog_data) resume_text += blog_text - # Evaluate the enhanced resume - evaluation_result = evaluator.evaluate_resume(resume_text) + return evaluator.evaluate_resume(resume_text) - # print(evaluation_result) - return evaluation_result +def _evaluate_with_job_description( + resume_text: str, job_description: str +) -> Optional[JobEvaluationData]: + model_params = MODEL_PARAMETERS.get(DEFAULT_MODEL) + evaluator = JobDescriptionEvaluator( + job_description=job_description, + model_name=DEFAULT_MODEL, + model_params=model_params, + ) + return evaluator.evaluate(resume_text) def is_valid_resume_data(resume_data: JSONResume) -> bool: - """Check if the resume data has at least some extracted core content.""" if not resume_data: return False core_sections = [ @@ -211,8 +270,19 @@ def find_profile(profiles, network): ) -def main(pdf_path): - # Create cache filename based on PDF path +def main(): + pdf_path = RESUME_PATH + + if not os.path.exists(pdf_path): + print(f"Error: '{RESUME_PATH}' not found. Place your resume PDF in the project root.") + sys.exit(1) + + mode = select_mode() + + job_description = None + if mode == 2: + job_description = load_job_description() + cache_filename = ( f"cache/resumecache_{os.path.basename(pdf_path).replace('.pdf', '')}.json" ) @@ -223,8 +293,7 @@ def main(pdf_path): resume_data = None cache_loaded = False - # Check if cache exists and we're in development mode - if DEVELOPMENT_MODE and os.path.exists(cache_filename): + if DEVELOPMENT_MODE and os.path.exists(cache_filename) and os.path.getmtime(cache_filename) >= os.path.getmtime(pdf_path): print(f"Loading cached data from {cache_filename}") try: cached_data = json.loads(Path(cache_filename).read_text(encoding="utf-8")) @@ -239,9 +308,7 @@ def main(pdf_path): try: os.remove(cache_filename) except Exception as delete_err: - print( - f"Failed to delete invalid cache file {cache_filename}: {delete_err}" - ) + print(f"Failed to delete invalid cache file {cache_filename}: {delete_err}") if not cache_loaded: logger.debug( @@ -251,7 +318,7 @@ def main(pdf_path): pdf_handler = PDFHandler() resume_data = pdf_handler.extract_json_from_pdf(pdf_path) - if resume_data == None: + if resume_data is None: return None if DEVELOPMENT_MODE: @@ -266,7 +333,6 @@ def main(pdf_path): "Newly extracted resume data is empty/invalid. Skipping cache write." ) - # Check if cache exists and we're in development mode github_data = {} github_cache_loaded = False if DEVELOPMENT_MODE and os.path.exists(github_cache_filename): @@ -289,12 +355,9 @@ def main(pdf_path): try: os.remove(github_cache_filename) except Exception as delete_err: - print( - f"Failed to delete invalid GitHub cache file {github_cache_filename}: {delete_err}" - ) + print(f"Failed to delete invalid GitHub cache file {github_cache_filename}: {delete_err}") if not github_cache_loaded: - # Add validation to handle None values profiles = [] if resume_data and hasattr(resume_data, "basics") and resume_data.basics: profiles = resume_data.basics.profiles or [] @@ -323,9 +386,6 @@ def main(pdf_path): encoding="utf-8", ) - score = _evaluate_resume(resume_data, github_data) - - # Get candidate name for display candidate_name = os.path.basename(pdf_path).replace(".pdf", "") if ( resume_data @@ -335,43 +395,53 @@ def main(pdf_path): ): candidate_name = resume_data.basics.name - # Print evaluation results in readable format - print_evaluation_results(score, candidate_name) + if mode == 1: + score = _evaluate_resume(resume_data, github_data) + print_evaluation_results(score, candidate_name) - if DEVELOPMENT_MODE: - csv_row = transform_evaluation_response( - file_name=os.path.basename(pdf_path), - evaluation=score, - resume_data=resume_data, - github_data=github_data, - ) + if DEVELOPMENT_MODE: + csv_row = transform_evaluation_response( + file_name=os.path.basename(pdf_path), + evaluation=score, + resume_data=resume_data, + github_data=github_data, + ) + csv_path = "resume_evaluations.csv" + file_exists = os.path.exists(csv_path) + with open(csv_path, "a", newline="", encoding="utf-8") as csvfile: + fieldnames = list(csv_row.keys()) + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + if not file_exists: + writer.writeheader() + writer.writerow(csv_row) - # Write CSV row to file - csv_path = "resume_evaluations.csv" - file_exists = os.path.exists(csv_path) + return score - with open(csv_path, "a", newline="", encoding="utf-8") as csvfile: - fieldnames = list(csv_row.keys()) - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + else: + resume_text = convert_json_resume_to_text(resume_data) + if github_data: + resume_text += convert_github_data_to_text(github_data) - # Write headers if file doesn't exist - if not file_exists: - writer.writeheader() + job_evaluation = _evaluate_with_job_description(resume_text, job_description) + print_job_evaluation_results(job_evaluation, candidate_name) - # Write the row - writer.writerow(csv_row) + if DEVELOPMENT_MODE: + csv_row = transform_job_evaluation_response( + file_name=os.path.basename(pdf_path), + evaluation=job_evaluation, + resume_data=resume_data, + ) + csv_path = "job_evaluations.csv" + file_exists = os.path.exists(csv_path) + with open(csv_path, "a", newline="", encoding="utf-8") as csvfile: + fieldnames = list(csv_row.keys()) + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + if not file_exists: + writer.writeheader() + writer.writerow(csv_row) - return score + return job_evaluation if __name__ == "__main__": - if len(sys.argv) < 2: - print("Usage: python score.py ") - exit(1) - pdf_path = sys.argv[1] - - if not os.path.exists(pdf_path): - print(f"Error: File '{pdf_path}' does not exist.") - exit(1) - - main(pdf_path) + main() diff --git a/transform.py b/transform.py index 25eab1d..d434073 100644 --- a/transform.py +++ b/transform.py @@ -1,5 +1,4 @@ from typing import Dict, List, Optional -import pdb from models import JSONResume @@ -741,6 +740,69 @@ def transform_evaluation_response( return csv_row +def transform_job_evaluation_response(file_name=None, resume_data=None, evaluation=None): + csv_row = {} + csv_row["file_name"] = file_name + + if resume_data and hasattr(resume_data, "basics") and resume_data.basics: + basics = resume_data.basics + csv_row["name"] = basics.name or "" + csv_row["email"] = basics.email or "" + csv_row["phone"] = basics.phone or "" + csv_row["location"] = ( + f"{basics.location.city}, {basics.location.region}" + if basics.location and basics.location.city and basics.location.region + else "" + ) + + if basics.profiles: + github_profile = fetch_profile(basics.profiles, ["github"], "github") + linkedin_profile = fetch_profile(basics.profiles, ["linkedin"], "linkedin") + csv_row["github_url"] = github_profile.url if github_profile else "" + csv_row["linkedin_url"] = linkedin_profile.url if linkedin_profile else "" + else: + csv_row["github_url"] = "" + csv_row["linkedin_url"] = "" + else: + csv_row["name"] = "" + csv_row["email"] = "" + csv_row["phone"] = "" + csv_row["location"] = "" + csv_row["github_url"] = "" + csv_row["linkedin_url"] = "" + + if evaluation: + csv_row["job_title"] = evaluation.job_title + csv_row["weighted_total"] = evaluation.weighted_total + csv_row["semantic_match_score"] = evaluation.semantic_match_score + + if evaluation.scores: + csv_row["skills_match_score"] = evaluation.scores.skills_match.score + csv_row["experience_match_score"] = evaluation.scores.experience_match.score + csv_row["job_title_alignment_score"] = evaluation.scores.job_title_alignment.score + csv_row["education_score"] = evaluation.scores.education.score + csv_row["resume_quality_score"] = evaluation.scores.resume_quality.score + csv_row["missing_critical_score"] = evaluation.scores.missing_critical_requirements.score + else: + for field in ["skills_match_score", "experience_match_score", "job_title_alignment_score", + "education_score", "resume_quality_score", "missing_critical_score"]: + csv_row[field] = "N/A" + + csv_row["key_strengths"] = "; ".join(evaluation.key_strengths) if evaluation.key_strengths else "" + csv_row["areas_for_improvement"] = "; ".join(evaluation.areas_for_improvement) if evaluation.areas_for_improvement else "" + else: + csv_row["job_title"] = "" + csv_row["weighted_total"] = "N/A" + csv_row["semantic_match_score"] = "N/A" + for field in ["skills_match_score", "experience_match_score", "job_title_alignment_score", + "education_score", "resume_quality_score", "missing_critical_score"]: + csv_row[field] = "N/A" + csv_row["key_strengths"] = "" + csv_row["areas_for_improvement"] = "" + + return csv_row + + def convert_json_resume_to_text(resume_data: JSONResume) -> str: text_parts = []