Resume Analyser Project For Machine Learning Job
INTRODUCTION :
Natural language processing helps computers communicate with humans in their own language and scales other language-related tasks. For example, NLP makes it possible for computers to read text, hear speech, interpret it, measure sentiment and determine which parts are important.
CODE ππ:
!pip install PyPDF2
import nltk
nltk.download(‘stopwords’)
nltk.download(‘punkt’)
import re
import docx
import PyPDF2
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
# Load predefined keywords and skills (example list; you can expand as needed)
skills_keywords = [
“python”, “data analysis”, “machine learning”, “deep learning”, “artificial intelligence”,
“nlp”, “tensorflow”, “keras”, “data visualization”, “data science”, “sql”, “r”,
“statistics”, “cloud computing”, “aws”, “azure”, “git”, “docker”, “kubernetes”
]
# Stopwords for filtering non-informative words
stop_words = set(stopwords.words(“english”))
def extract_text_from_pdf(file_path):
“””Extracts text from a PDF file.”””
text = “”
with open(file_path, “rb”) as file:
pdf_reader = PyPDF2.PdfReader(file)
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
#print(“Extracted Text from PDF:”, text) # Debug statement
return text
def extract_text_from_docx(file_path):
“””Extracts text from a DOCX file.”””
text = “”
doc = docx.Document(file_path)
for paragraph in doc.paragraphs:
text += paragraph.text
print(“Extracted Text from DOCX:”, text) # Debug statement
return text
def clean_and_tokenize_text(text):
“””Tokenizes and cleans text.”””
# Lowercase text
text = text.lower()
# Remove punctuation
text = text.translate(str.maketrans(“”, “”, string.punctuation))
# Tokenize text
tokens = word_tokenize(text)
# Remove stopwords and filter tokens
filtered_tokens = [token for token in tokens if token not in stop_words]
print(“Tokens after cleaning:”, filtered_tokens) # Debug statement
return filtered_tokens
def match_keywords(tokens, keywords):
“””Matches tokens with predefined keywords.”””
matched_keywords = [word for word in tokens if word in keywords]
return matched_keywords
def analyze_resume(file_path):
“””Analyzes a resume for matching skills and keywords.”””
# Determine file type
if file_path.endswith(“.pdf”):
text = extract_text_from_pdf(file_path)
elif file_path.endswith(“.docx”):
text = extract_text_from_docx(file_path)
else:
raise ValueError(“Unsupported file format. Please use PDF or DOCX files.”)
# Clean and tokenize text
tokens = clean_and_tokenize_text(text)
# Match keywords
matched_keywords = match_keywords(tokens, skills_keywords)
# Calculate match score
match_score = len(matched_keywords)
# Display analysis results
print(“Matched Keywords:”, matched_keywords)
print(“Match Score:”, match_score)
print(f”Total relevant skills found: {match_score}/{len(skills_keywords)}”)
# Example usage
file_path = “/” # Replace with your resume file path
analyze_resume(file_path)
No comments:
Post a Comment