├── Images ├── php.png ├── java.png ├── oracle.png ├── php-word.png ├── python0.png ├── python1.png ├── python2.png ├── oracle-Word.png ├── python_word.png └── python_word1.png ├── 5-Images ├── java.png ├── php.png ├── oracle.png ├── php-word.png ├── python0.png ├── oracle-Word.png ├── python_word.png ├── python_word1.png └── Directory_Structure.png ├── 4-Top_recommendations ├── center.css ├── load_css.py ├── style.css └── job_output.py ├── Data ├── Job-Locations │ ├── state.csv │ └── india-city-state.csv └── working_jd_sample.csv ├── README.md └── 2-Preprocessing_and_Modelling ├── Pre-processing_Resume for matchingv2.ipynb ├── Pre-processing_Resume for matchingv1.ipynb └── Pre-processing Jobs for modellingv2.ipynb /Images/php.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/php.png -------------------------------------------------------------------------------- /5-Images/java.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/java.png -------------------------------------------------------------------------------- /5-Images/php.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/php.png -------------------------------------------------------------------------------- /Images/java.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/java.png -------------------------------------------------------------------------------- /Images/oracle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/oracle.png -------------------------------------------------------------------------------- /5-Images/oracle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/oracle.png -------------------------------------------------------------------------------- /Images/php-word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/php-word.png -------------------------------------------------------------------------------- /Images/python0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python0.png -------------------------------------------------------------------------------- /Images/python1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python1.png -------------------------------------------------------------------------------- /Images/python2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python2.png -------------------------------------------------------------------------------- /5-Images/php-word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/php-word.png -------------------------------------------------------------------------------- /5-Images/python0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/python0.png -------------------------------------------------------------------------------- /Images/oracle-Word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/oracle-Word.png -------------------------------------------------------------------------------- /Images/python_word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python_word.png -------------------------------------------------------------------------------- /5-Images/oracle-Word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/oracle-Word.png -------------------------------------------------------------------------------- /5-Images/python_word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/python_word.png -------------------------------------------------------------------------------- /Images/python_word1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python_word1.png -------------------------------------------------------------------------------- /5-Images/python_word1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/python_word1.png -------------------------------------------------------------------------------- /5-Images/Directory_Structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/Directory_Structure.png -------------------------------------------------------------------------------- /4-Top_recommendations/center.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color: #eee; 3 | } 4 | 5 | .fullScreenFrame > div { 6 | display: flex; 7 | justify-content: center; 8 | } -------------------------------------------------------------------------------- /4-Top_recommendations/load_css.py: -------------------------------------------------------------------------------- 1 | 2 | import streamlit as st 3 | 4 | def local_css(file_name): 5 | with open(file_name) as f: 6 | st.markdown(''.format(f.read()), unsafe_allow_html=True) -------------------------------------------------------------------------------- /4-Top_recommendations/style.css: -------------------------------------------------------------------------------- 1 | .highlight { 2 | border-radius: 0.2rem; 3 | color: white; 4 | padding: 0.1rem; 5 | margin-bottom: 1rem; 6 | } 7 | .bold { 8 | padding-left: 1rem; 9 | font-weight: 700; 10 | } 11 | .blue { 12 | background-color: rgba(19, 179, 139, 0.842); 13 | } 14 | .red { 15 | background-color: rgb(207, 79, 79); 16 | } 17 | .orange { 18 | background-color: rgb(202, 107, 17); 19 | } 20 | .green { 21 | background-color: rgb(19, 190, 42); 22 | } -------------------------------------------------------------------------------- /Data/Job-Locations/state.csv: -------------------------------------------------------------------------------- 1 | State_id,State 2 | 1,India 3 | 2,Andaman & Nicobar Islands 4 | 2,Andhra Pradesh 5 | 3,Arunachal Pradesh 6 | 4,Assam 7 | 5,Bihar 8 | 6,Chhattisgarh 9 | 7,Dadra & Nagar Haveli 10 | 8,Daman & Diu 11 | 9,Delhi 12 | 10,Goa 13 | 11,Gujarat 14 | 12,Haryana 15 | 13,Himachal Pradesh 16 | 14,Jammu & Kashmir 17 | 15,Jharkhand 18 | 16,Karnataka 19 | 17,Kerala 20 | 18,Lakshadweep 21 | 19,Madhya Pradesh 22 | 20,Maharashtra 23 | 21,Manipur 24 | 22,Meghalaya 25 | 23,Mizoram 26 | 24,Nagaland 27 | 25,Orissa 28 | 26,Pondicherry 29 | 27,Punjab 30 | 28,Rajasthan 31 | 29,Sikkim 32 | 30,Tamil Nadu 33 | 31,Uttar Pradesh 34 | 32,Uttarakhand 35 | 33,West Bengal 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bidirectional-Job-Resume-Recommender-System 2 | ## Introduction: 3 | A must have tool for job seekers and recruiters. This project is intended to find and recommend the best fit. Job seekers can find best matching jobs to their resume and Recruiters find the best fit resumes for any job posting. Its based on Machine learning "NLP" concepts of text content match via Doc2Vec and similarity scores. 4 | Primary feature of this recommender system is its roburst nature. It enables both Job-seekers and Recruiters to find best fit. 5 | 1. It reads the resume features and finds the top (n) relavant jobs based on Education, Work experience, location and text content. 6 | 7 | 2. Same code can be used to find best matching resumes for a job posting (based on Education, Work experience, location and text content). 8 | 9 | Project involves extensive use of NLP features as in: 10 | 11 | • tokenization 12 | 13 | • lemmatization (English) 14 | 15 | -- Tried WordNet, spaCy, Textblob 16 | 17 | -- spaCy used (-PRON-) if identified pronoun 18 | 19 | -- Got same results with NLTK WordNet and TextBlob - chose to stick with wordNet 20 | 21 | • Count Vectorization 22 | 23 | • TF-IDF 24 | 25 | • entity extraction 26 | 27 | 28 | ## Model 29 | 30 | Text data is trained on **Doc2Vec** Model. Doc2Vec uses NLP but rather than working on frequency of each word, Doc2Vec can create a numeric representation of each document by providing n-dimensional vectors. 31 | 32 | Uses Cosine similarity to find the closest match and recommend top (n) matches 33 | 34 | ## Directory Structure 35 | ![image](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/blob/master/5-Images/Directory_Structure.png) 36 | 37 | 38 | ## Directory Details 39 | 40 | ### [1-Data_gathering_EDA](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/1-Data_gathering_EDA) 41 | Job_EDA.ipynb - File to gather raw data from csv and EDA on JOBS 42 | Resume_EDA - File to gather raw data from csv and EDA on Resumes 43 | fuzzy-wuzzy-logic-Resume_EDA.ipynb – To obtain similar titles based on score. We see similar titles written in different forms like Java Developer, Dev (java), Jave Deve. Etc which all should be only Java Developer. Fuzzy -wuzzy helps resolving the issue. 44 | 45 | ### [2-Preprocessing_and_Modelling](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/2-Preprocessing_and_Modelling) 46 | Pre-processing Jobs for modellingv1.ipynb – First iteration of Doc2Vec Model on Jobs text Data 47 | Pre-processing Jobs for modellingv2.ipynb – Second and final iteration of Doc2Vec Model on Jobs 48 | Pre-processing_Resume for matchingv1.ipynb - First iteration of Doc2Vec Model on resume text Data 49 | Pre-processing_Resume for matchingv2.ipynb - Second and final iteration of Doc2Vec Model on resume 50 | ** one can just look into v2 to understand the flow. 51 | 52 | ### [3-Matching_Sprints](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/3-Matching_Sprints) 53 | Sprint1_matching_resume_to_jobs.ipynb 54 | Sprint2_matching_resume_to_jobs-with-location-add-on.ipynb 55 | Sprint3_matching_resume_to_jobs-with-text-add-on.ipynb 56 | Sprint4_matching_resume_to_jobs-final.ipynb 57 | 58 | ### [4-Top_recommendations](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/4-Top_recommendations) 59 | job_output.py – Python file to run streamlit to see more intercative user interface to input resume and get top 10 jobs 60 | center.css – Support file to help align text / images to center 61 | load_css.py – support file for better UI 62 | style.css – support file for color coding in streamlit 63 | ** one can focus only on job_output.py for understanding the code 64 | 65 | ### [5-Images](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/5-Images) 66 | Contains images used / created during coding 67 | 68 | ### 6-Model 69 | Contains the final model, so just load, and run the model (Doc2Vec model trained on 40,000 jobs with 20-D vectors and 200 epochs) 70 | 71 | ### [Data](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/Data) 72 | Due to file size limitations showing samples datasets. 73 | 74 | • Resumes: Contains sample 15 resumes in .csv format (look and feel of dataset) 75 | 76 | • Jobs: Contains sample 15 jobs in .csv format (look and feel of dataset) 77 | 78 | • Actual datasets can be found on Kaggle: 79 | 80 | https://www.kaggle.com/PromptCloudHQ/jobs-on-naukricom 81 | 82 | https://www.kaggle.com/avanisiddhapura27/resume-dataset 83 | 84 | 85 | 86 | ## Resources 87 | • Datasets 88 | 89 | o https://www.kaggle.com/ 90 | 91 | o https://www.britannica.com/ 92 | 93 | • Lemmatization Approaches with Examples in Python 94 | 95 | o https://www.machinelearningplus.com/ 96 | 97 | • Doc2Vec Tutorial and Implementation 98 | 99 | o https://radimrehurek.com/gensim/ 100 | 101 | o https://towardsdatascience.com/ 102 | 103 | • Fuzzy-Wuzzy Matching 104 | 105 | o https://towardsdatascience.com/ 106 | 107 | • And shoutout to – 108 | 109 | o Scikit-learn documentation 110 | 111 | o Geekforgeeks 112 | 113 | o Stackoverflow 114 | -------------------------------------------------------------------------------- /4-Top_recommendations/job_output.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from PIL import Image 3 | # image = Image.open('title_page.png') 4 | # st.image(image,width = 600) 5 | st.markdown("

Bidirectional Job-Resume Recommender

", unsafe_allow_html=True) 6 | #st.markdown("

Selected Resume Title : {R_title} " 89 | with open("center.css") as f: 90 | st.markdown(t.format(f.read()), unsafe_allow_html=True) 91 | """ 92 | 93 | 94 | """ 95 | #st.markdown(f'{R_title}') 96 | loc_ex = f"
Location: {R_location}Total Experience: {R_total_exp}
" 97 | #st.markdown(f'**Current location:** {R_location} \t **Total Experience:** {R_total_exp}') 98 | st.markdown(loc_ex, unsafe_allow_html=True) 99 | #st.subheader(f'Experience description: {R_work_ex}') 100 | 101 | 102 | import ast 103 | # for index, rows in r2.iterrows(): 104 | # resume_desc= [] 105 | # #pick work experience col and read it as JSON 106 | result_work = r2['work_experiences'] 107 | #st.subheader(result_work) 108 | #st.subheader(type(result_work)) 109 | result_work = ast.literal_eval(result_work) 110 | #st.subheader(type(result_work)) 111 | # try: result_work = eval(work) 112 | # except: continue 113 | # #read description 114 | #for i in result_work.keys(): 115 | # st.subheader(i) 116 | w_title = (result_work[0][0]['wtitle:']) 117 | #st.markdown(f'') 118 | w_company= (result_work[0][1]['wcompany:']) 119 | t_com = f"
Current Work Title : {w_title}Company : {w_company}
" 120 | w_city= (result_work[0][2]['wcity:']) 121 | w_state= (result_work[0][3]['wstate:']) 122 | w_duration= (result_work[0][4]['wduration:']) 123 | w_descr= (result_work[0][5]['wdescr:']) 124 | #des = f"
Description : {w_descr}
" 125 | des = f"
Description : {R_desc}
" 126 | #st.markdown(f'**Current Work Title :** {w_title} **Company :** {w_company}') 127 | st.markdown(t_com,unsafe_allow_html=True) 128 | st.markdown(des,unsafe_allow_html=True) 129 | #st.markdown(f'**Description :** {w_descr}') 130 | 131 | 132 | 133 | #from pool of 34,000 jobs, selecting jobs that are releated to sql dba (resume in question) 134 | related_jobs = job.loc[job['jobtitle'].str.contains(match_key)] 135 | related_jobs=related_jobs.loc[related_jobs['location']==r2['location']] 136 | #job features need to be matched with resume 137 | job_m = related_jobs[['j_id','experience_range','is_grad','is_postgrad','is_doc','location', 138 | 'vec_1','vec_2','vec_3','vec_4','vec_5','vec_6','vec_7','vec_8','vec_9','vec_10','vec_11','vec_12', 139 | 'vec_13','vec_14','vec_15','vec_16','vec_17','vec_18','vec_19','vec_20']] 140 | # """ 141 | # ************************************************* 142 | # """ 143 | st.markdown('# System Recommended Top 10 Jobs : ') 144 | image = Image.open('jobs.png') 145 | st.image(image, width = 200) #, use_column_width=True) 146 | st.write('Recommendation is based on cosine similarity of multiple factors like skills, location, experience, education, description, title etc ') 147 | # call recommender by passing selected resume 148 | matched_jobs = jobs_recommender(r1) 149 | matched_jobs = matched_jobs.head(10) 150 | 151 | st.write(matched_jobs) 152 | st.write('**Note:** Similarity Scores may round off to nearest integer value, so itcould be hard to visualize the difference. But they are displayed in ranked order.') 153 | """ 154 | ************************************************* 155 | 156 | """ 157 | # st.markdown('# Phrases suggestions in word-cloud ') 158 | # st.write('WordCloud pulls words, pairs from all related jobs to form a cloud') 159 | # from PIL import Image 160 | # if match_key == 'java': 161 | # image = Image.open('java.png') 162 | # st.image(image, caption=(f'Suggestions for {match_key}'), 163 | # use_column_width=True) 164 | # elif match_key == 'oracle': 165 | # image = Image.open('oracle.png') 166 | # st.image(image, caption=(f'Suggestions for {match_key}'), 167 | # use_column_width=True) 168 | # elif match_key == 'php': 169 | # image = Image.open('php.png') 170 | # st.image(image, caption=(f'Suggestions for {match_key}'), 171 | # use_column_width=True) 172 | # elif match_key == 'python': 173 | # image = Image.open('python_word1.png') 174 | # st.image(image, caption=(f'Suggestions for {match_key}'), 175 | # use_column_width=True) 176 | 177 | 178 | -------------------------------------------------------------------------------- /Data/working_jd_sample.csv: -------------------------------------------------------------------------------- 1 | company,education,experience,industry,jobdescription,jobid,joblocation_address,jobtitle,numberofpositions,payrate,postdate,site_name,skills,uniq_id 2 | Covalense Technologies Private Limited,,5 - 9 yrs,IT-Software / Software Services,"Job Description   Send me Jobs like this Experience: 5yrs. - 9yrs. Job Description: * Engineering Graduate/Post-Graduate with 6-9 years of experience in Java Programming with strong in Object Oriented concepts. * Good Exposure to spring, hibernate, web services, Threading, Socket Programming, Collections, Data Structure and IO with strong knowledge on either Spring Batch or JEE frameworks.. * Experienced in XML configuration; setting up Eclipse or any other IDEs with basic knowledge of SQL. * Energetic with strong analytical, communication and interpersonal skills. * Ability to learn and apply the new concepts quickly. Preferred Skills * Working knowledge of Oracle/DB2, ClearCase/CVS. * Experienced in test tools like JUNIT. * Ability to build tools like Ant and Maven. Salary:INR 7,50,000 - 15,00,000 P.A Industry: IT-Software / Software Services Functional Area: IT Software - Client/Server Programming Role Category:Programming & Design Role:Team Lead/Technical Lead Keyskills Hibernate Spring Java Maven JUnit Ant JEE Eclipse Oracle Web Services AngularJS Desired Candidate Profile Please refer to the Job description above Company Profile: Covalense Technologies Private Limited  Covalense is an IT services and solutions company, established in 2006 and now with the 300 + professionals are working across the location.  Our office presence is in New Zealand, Australia , US and India.  Bangalore, New Zealand is majorly focusing on End-to-End Oracle Telecom stack and Hyderabad, New Zealand focusing on Microsoft, Open Source, Mobility Apps (MOM Services) development  There are multiple project engagements with Tier 1 SI partners in BLR and Gurgaon.  Our portfolio majorly consists of telecom implementations along with a wide selection of industry verticals. Download PPT Photo 1   View Contact Details",70916001822,Bengaluru,Java - SSE / Technical Lead,,"7,50,000 - 15,00,000 P.A",2016-10-12 16:21:02 +0000,,IT Software - Client/Server Programming,60b28f3eb5c9c5c004e0b86678d99b5e 3 | Cambio Consulting,"UG: B.Tech/B.E. - Any Specialization, Other Graduate PG:M.Tech - Any Specialization, MCA - Computers, M.S/M.D - Any Specialization, Other Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",4 - 9 yrs,IT-Software / Software Services,"Job Description   Send me Jobs like this Hi, we have urgent requirement for embedded developers Job Description: Very good proficiency in programming in Java Very good proficiency in designing software applications, e.g. using design patterns, employing test driven development Good understanding of the principles of GUI programming in the context of Web, e.g. GWT, CSS, JavaScript Knowledge of telecommunication management protocols like SNMP, REST Preferable to have exposure to protocols like DHCP, DNS, SIP etc. Experience in maintaining Continuous Integration Environments e.g. using Maven, Gradle, Subversion Strong competences in working in a team, e.g. using Agile Frameworks like Scrum If interested, Please forward your updated CV to rafi@cambio.co.in Salary:INR 7,00,000 - 17,00,000 P.A Industry: IT-Software / Software Services Functional Area: IT Software - System Programming Role Category:Programming & Design Role:Software Developer Desired Candidate Profile Education- UG: B.Tech/B.E. - Any Specialization, Other Graduate PG:M.Tech - Any Specialization, MCA - Computers, M.S/M.D - Any Specialization, Other Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: Cambio Consulting We are established consulting firm providing a wide spectrum of services in the HR Domain consulting services. Our core belief is that people are the most valuable asset for any company. Leading from that is our aim to offer not just recruitment support but also be a strategic adviser to all our customers. We plan to achieve this by understanding the client's business process, industry domain and develop market intelligence in order to provide a right fit for all the positions. Download PPT Photo 1   View Contact Details",1.20317E+11,"Bengaluru/Bangalore , Hyderabad / Secunderabad",Java & NMS Development Openings @ Bangalore and Hyderabad,,"7,00,000 - 17,00,000 P.A",2016-03-11 02:30:18 +0000,,IT Software - System Programming,da267e3b96a4ed51faf0e610ea662c20 4 | SATYAM VENTURE ENGINEERING SERVICES,"UG: Any Graduate PG:Any Postgraduate Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",10 - 15 yrs,Automobile / Auto Anciliary / Auto Components,"Job Description   Send me Jobs like this Experience Profile - Experience in Should Costing of Product is must. Knowledge about various Manufacturing processes such as Stamping, Pressure Die Casting, Plastic Injection Molding etc., Knowledge about costing of various commodities such as Plastic parts, Al. Die Casting, Electric Motors, Automotive Seating System etc., Preferred knowledge in Costing softwares like aPriori, PCS, DFM/ DFA etc., Personal Attributes - Ability to develop a team Keenness to innovation, problem solving abilities, commitment to personal and professional growth, and eye for details. Presentation/ communication skills are prerequisites. Good team players Salary: Not Disclosed by Recruiter Industry: Automobile / Auto Anciliary / Auto Components Functional Area: Production , Manufacturing , Maintenance Role Category:Production/Manufacturing/Maintenance Role:Project Manager-Production/Manufacturing/Maintenance Desired Candidate Profile Education- UG: Any Graduate PG:Any Postgraduate Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: SATYAM VENTURE ENGINEERING SERVICES SATYAM VENTURE ENGINEERING SERVICES Download PPT Photo 1   View Contact Details",3.10317E+11,Hyderabad / Secunderabad,Project Manager,,Not Disclosed by Recruiter,2016-03-31 02:30:24 +0000,,Production,d6ff245ae99d79e4be094fbe47c50284 5 | Envision Enterprise Solutions Pvt Ltd,"UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",7 - 12 yrs,IT-Software / Software Services,"Job Description   Send me Jobs like this Project Managers with more than 7 years of experience of large project management and delivery. Working experience in multiple countries with multi culture onsite project engagements, with in Ability to manage stake holders with excellent written, verbal communication skills and team management abilities. Expertise in delivery models like waterfall and Agile is essential. PMP Qualification will be an advantage. Willing to travel to client locations globally Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Application Programming , Maintenance Role Category:Programming & Design Role:Testing Engineer Desired Candidate Profile Education- UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: Envision Enterprise Solutions Pvt Ltd Envision is a leading IT Solutions Provider and System Integrator, providing solutions for enterprises across the globe, to optimize resource utilization, streamline operations, reduce the costs, maximise return on investment. We provide cost effective solutions within budget and timelines. We are known for providing Enterprise Asset Management, Port and Terminal Automation Solutions, Transportation, Logistics, Enterprise Mobility Solutions, IOT, Smarter manufacturing, facilities solutions. Download PPT Photo 1   View Contact Details",70316503401,Hyderabad / Secunderabad,Project Managers,,Not Disclosed by Recruiter,2016-03-07 02:31:09 +0000,,IT Software - Application Programming,9f819c69b3578157baf8b83a5820b27e 6 | NEW HOPE MEDICAL CENTRE,"UG: B.B.A PG:MBA/PGDM Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",1 - 2 yrs,Medical / Healthcare / Hospitals,"Job Description   Send me Jobs like this Persons with great initiative, negotiation skills, sound financial insight, managerial skills to lead and support our new projects. Salary: Not Disclosed by Recruiter Industry: Medical / Healthcare / Hospitals Functional Area: Medical , Healthcare , R&D , Pharmaceuticals , Biotechnology Role Category:Drug Regulatory Affairs/Documentation Role:Regulatory Affairs Manager Desired Candidate Profile Education- UG: B.B.A PG:MBA/PGDM Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: NEW HOPE MEDICAL CENTRE NEW HOPE MEDICAL CENTRE Download PPT Photo 1   View Contact Details",1.60317E+11,Hyderabad / Secunderabad,Project Manager,,Not Disclosed by Recruiter,2016-03-16 02:31:16 +0000,,Medical,2a1d64deb55ed947cec34818eb7abf9a 7 | Maven Workforce,"UG: B.Tech/B.E. - Any Specialization PG:M.Tech - Any Specialization, MCA - Computers, M.Sc - Any Specialization Doctorate:Doctorate Not Required",5 - 10 yrs,IT-Software / Software Services,"Job Description   Send me Jobs like this 1. The candidate must have done atleast N3 of JLPT (Japanese Language Proficiency Test). Currently JLPT has 5 levels (previously 4 levels). If the candidate has N3 (current), N2 or N1 certification, we can consider. 2. Candidate must know atleast 1000 kanjis 3. Candidate must be good in speaking Japanese. There are no certifications to check that. If a person says he/she lived in Japan or speak business Japanese on a day-to- day basis. 4. M.A in Japanese (especially from Jawaharlal Nehru University (JNU), Delhi) and even if he/she has not cleared any JLPT certifications. 5. If a person has not done any certifications but lived in Japan and can read, write, speak Japanese can be considered. Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Other Role Category:Other Role:Outside Consultant Keyskills Japanese JLPT Japanese Interpreter Japanese translator Japanese Language proficiency test Desired Candidate Profile Education- UG: B.Tech/B.E. - Any Specialization PG:M.Tech - Any Specialization, MCA - Computers, M.Sc - Any Specialization Doctorate:Doctorate Not Required   Company Profile: Maven Workforce Leading client of Maven Workforce Download PPT Photo 1   View Contact Details",30516900761,"Delhi/NCR(National Capital Region) , Gurgaon",Japanese Interpreter,,Not Disclosed by Recruiter,2016-05-03 11:35:55 +0000,,IT Software - Other,30649a930cae66477fb4e0eb93f2ccf9 8 | Confidential,,2 - 5 yrs,IT-Software / Software Services,"Job Description   Send me Jobs like this hi all, please find the mentioned JD below, Skills: Dot.Net, C#, SQL, OOPS ,Web services Experience: 2 -5 Years Work Location: Bangalore M G Road Interview Timming- 10:00 A M to 2:00 P M Mode:C2H Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Application Programming , Maintenance Role Category:Programming & Design Role:Software Developer Keyskills .Net Desired Candidate Profile   Company Profile: Confidential Confidential Download PPT Photo 1   View Contact Details",1.30416E+11,Bengaluru,.Net Developer,,Not Disclosed by Recruiter,2016-10-06 16:21:31 +0000,,IT Software - Application Programming,b529711ee8c1b4c1bea4849d18594132 9 | Melstar Information Technologies Ltd,UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required,2 - 5 yrs,IT-Software / Software Services,"Job Description   Send me Jobs like this We have an urgent openings for .net developer Location : Bangalore Skills:.net, sql Exp : 2 to 4 Years, If you are interested please send your updated resume with following details: Full Name: Email ID: Phone: Primary skills: Total Exp : Relevant Exp: Minimum Notice Period: Current Company: Current CTC: Current Employment(Perm/Cont): Current Location: Preferred Location: Availability on weekend/WeekDay for a F2F discussion : PAN : Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Application Programming , Maintenance Role Category:Programming & Design Role:Software Developer Keyskills .net developer .net developer Desired Candidate Profile   Education- UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required Company Profile: Melstar Information Technologies Ltd Melstar is a CMM Level global IT company with its headquarters in Mumbai; India. We have eight software development facilities, with eleven offices worldwide including the US,Europe and India. We offer a unique blend of domain expertise in the field of Banking, Finance, Insurance and Manufacturing. We are engaged in cutting-edge technologies like e-commerce,web development and dot-com projects with strong N-tier approach. We have global partnerships with IBM,Microsoft, Oracle,Informix and other IT Stalwarts. Our strong customer focus can be seen from the prestigious list of clients like Citibank N.A., IBM, Genpact, Standard Chartered, HP etc. Download PPT Photo 1   View Contact Details",71016900650,"Bengaluru, Delhi, Noida",.net Developer,,Not Disclosed by Recruiter,2016-10-07 16:21:40 +0000,,IT Software - Application Programming,e233e57a6b2eeefc24d43cbb58a86096 10 | Unitforce technologies Pvt. Ltd.,"UG: Any Graduate - Any Specialization PG:Any Postgraduate - Any Specialization Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",5 - 6 yrs,IT-Software / Software Services,"Job Description   Send me Jobs like this Net Framework 4.5 - ASP & C# - HTML5, Java Script, AJAX, JQuery Secondary Skills Required: Entity Framework MVC Angular JS RDBMS Oracle 11g Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Application Programming , Maintenance Role Category:Programming & Design Role:Software Developer Desired Candidate Profile Education- UG: Any Graduate - Any Specialization PG:Any Postgraduate - Any Specialization Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: Unitforce technologies Pvt. Ltd. www.uftech.com Download PPT Photo 1   View Contact Details",60316600219,Hyderabad / Secunderabad,.NET Developer,,Not Disclosed by Recruiter,2016-03-05 02:30:23 +0000,,IT Software - Application Programming,4ed3d0cca70fcac5acf1557de46ef176 11 | Karvy Analytics Limited,UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required,4 - 8 yrs,KPO / Research / Analytics,"Job Description   Send me Jobs like this Responsibilities : - Selecting features, building and optimizing classifiers using machine learning techniques - Data mining using state-of-the-art methods - Enhancing data collection procedures to include information that is relevant for building analytic systems - Processing, cleansing, and verifying the integrity of data used for analysis - Doing ad-hoc analysis and presenting results in a clear manner Skills and Qualifications : - Excellent understanding of machine learning techniques and algorithms, such as k-NN, Naive Bayes, SVM, GBM, Decision Forests, Time Series Forecasting etc. - Experience with common data science toolkits in R or Python Excellence in at least one of these is highly desirable - Good communication skills - Experience with data visualization tools, such as D3.js, Tableau etc. would be added advantage - Proficiency in using query languages such as SQL, Hive, Pig would be added advantage - Good applied statistics skills, such as distributions, statistical testing, regression, etc. - Good scripting and programming skills - Data-oriented personality - More than 4 years of experience in Data analysis. Salary:INR 6,00,000 - 12,00,000 P.A Industry: KPO / Research / Analytics Functional Area: Analytics & Business Intelligence Role Category:Analytics & BI Role:Analytics Manager Keyskills Data Science Hive Machine Learning Data Mining R Data Visualization SQL Python Data Analysis Time Series Desired Candidate Profile Please refer to the Job description above Education- UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required Company Profile: Karvy Analytics Limited Karvy Analytics Limited is a new age company and a modern arm of the leading Karvy Conglomerate. Led by visionary management, the young and forward thinking team is building world class solutions for the global analytics universe. We are focused on multi-industry use cases for companies that need technology and professional services for their functional and operational analytics projects. We offer a range of solutions that bring immediate business benefits to our global customers who are interested in leveraging big data, statistical and mathematical modeling techniques, social analytics, and mobile descriptive analytics for new business insights. Download PPT Photo 1   View Contact Details",2.01216E+11,Hyderabad,Sr Data Scietist,2,"6,00,000 - 12,00,000 P.A",2016-12-20 18:19:23 +0000,www.naukri.com,Analytics & Business Intelligence,0abdcbe9423d9e4730c1b16db7954f77 12 | Rinalytics Advisors Pvt. Ltd,"UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",2 - 7 yrs,Recruitment / Staffing,Job Description   Send me Jobs like this Job Description,2.50516E+11,Bengaluru/Bangalore,Data Science Role,,Not Disclosed by Recruiter,2016-01-29 14:07:13 +0000,,Analytics & Business Intelligence,8e9ba1f084f9fe31c7878258fba47188 13 | Prism Manpower Services,UG: Any Graduate PG:Post Graduation Not Required,0 - 4 yrs,Recruitment / Staffing,"Job Description   Send me Jobs like this Computer Operators. Should have knowledge of Excel. Decent typing speed. should know English typing. Freshers are also fine. Interested Candidates can forward their resume at prismmanpower@yahoo.in call us on 9702897822 Salary: Not Disclosed by Recruiter Industry: Recruitment / Staffing Functional Area: Executive Assistant , Front Office , Data Entry Role Category:Other Role:Stenographer/Data Entry Operator Desired Candidate Profile Please refer to the Job description above Education- UG: Any Graduate PG:Post Graduation Not Required Company Profile: Prism Manpower Services We , Prism Manpower Services , provide a wide range of Recruitment Solutions for various requirements. Located in Mumbai city of Maharashtra , the company was incepted in the year 2007. With an experience of serving industries like Insurance , Event Management etc , we are today recognized as a trustworthy Service Provider in Maharashtra. Download PPT Photo 1   View Contact Details",2.51017E+11,Mumbai,DATA ENTRY OPERATOR,,Not Disclosed by Recruiter,2016-10-25 19:49:07 +0000,www.naukri.com,Executive Assistant,7fb17e8480a9978d68e30de0f39fea04 14 | "Risk Management Solutions, Inc.","UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",4 - 5 yrs,Banking / Financial Services / Broking,"Job Description   Send me Jobs like this RMS - Senior Analyst, Data Analytics Senior Analyst, Data Analytics Objective of the Role: The applicant will be extensively involved in exposure modeling and data analytics. Executes delivers original analysis and insights and own all or part of an analytics module Provide professional skills necessary for all phases of data analysis, including the application of standard statistical methods for conducting analysis, documentation and presentation. Communicates analytical insights through sophisticated synthesis and packaging of results (Including PPT slides and charts) Establishes credibility by thought partnering on analytics topics; takes positions and draws conclusions on a range of external and internal issues Serve as an active participant on cross- functional projects, interpreting data, and translating into actionable insights, provide support on ad- hoc analysis and reports. B. Tech./ Post graduate (geostatistics) from a premier institute with good academic record 4 to 5 years of total experience with minimum 3 years experience on analytical projects requiring comprehensive data analysis, interpretation and presentation skills. Knowledge of analysis techniques like statistical methodology, data manipulation. Critical thinking skills and hands on experience in data interpretation, formulating hypotheses and being able to make educated guesses when data may be sparse or unavailable. Strong MS SQL knowledge and experience, ability to write custom queries of medium to high complexity. Strong documentation skills with experience of working on MS Word, Excel (advanced knowledge such as using pivots, filters, using external data etc.) , PowerPoint and Project. Excellent communication skills and ability to independently lead and drive projects. Technical skills: Experience with multiple analytics methods (one or more required) Data management skills (e.g. data modeling, data integrity QA/ QC) Geospatial data visualization and analytics (specialties such as cluster detection or geo- statistical methods) Spatialtemporal analysis (cartographic animation of timeseries data) Experience in core analytics methods (one or more of the following) : Geo coding geo referencing. Knowledge of open source proprietary geo analytics data sources. Geographic cluster recognition. Network analysis (locationallocation, OD Matrix travelling sales person, vehicle routing problem) Spatialtemporal analysis Familiarity with analytics tools (one or more required) GIS toolkits (ESRI, Quantum GIS, MapInfo or similar) Working knowledge of Property Causality insurance or reinsurance or and Risk Assessment Analysis would be advantageous Knowledge of catastrophe modeling domain would also be advantageous Working knowledge of and experience in statistical tools like R, SPSS etc. RMS models and software help insurers, financial markets, corporations, and public agencies evaluate and manage catastrophe risks throughout the world. We lead an industry that we helped to pioneercatastrophe risk modelingand are the innovators of the RMS (one) platform, which is transforming the world's understanding and quantification of risk through open, real- time exposure and risk management. More than 400 insurers, reinsurers, trading companies, and other financial institutions trust RMS models and SaaS solutions to better understand and manage the risks of natural and human- made catastrophes, including hurricanes, earthquakes, floods, terrorism, and pandemics. We think about the unthinkable, enabling the management of even the most extreme events. Our scientific and objective measurement of risk facilitates the efficient flow of capital needed to insure, manage, and ultimately mitigate these risks to reduce the consequences of disasters, promoting resilient societies and a sustainable global economy. RMS is proud to be an equal opportunity employer. Salary: Not Disclosed by Recruiter Industry: Banking / Financial Services / Broking Functional Area: Analytics & Business Intelligence Role Category:Analytics & BI Role:Data Analyst Desired Candidate Profile Education- UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: Risk Management Solutions, Inc. Risk Management Solutions, Inc. Download PPT Photo 1   View Contact Details",1.10517E+11,Noida,Data Analytics,,Not Disclosed by Recruiter,2016-05-11 06:05:20 +0000,,Analytics & Business Intelligence,187a7d0b53f5211639157026daaf6dca 15 | ZSoft Internet Media Pvt Ltd.,"UG: Diploma PG:Post Graduation Not Required Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",2 - 4 yrs,IT-Software / Software Services,"Job Description   Send me Jobs like this Job description:The post is responsible for maintaining, updating and enhancing Client database. Work on Data Entry and generate reports as per needs Must have good hands on experience with MS OfficeRoles and Responsibilities: Good Typing Skills, Communication SkillsQualification: Any Graduate ( BE/B Tech/B. Sc Computers/BCA and Diploma Holders )Speed and accuracy essential. Must have be organized, self disciplined and self starter Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: Executive Assistant , Front Office , Data Entry Role:Stenographer/Data Entry Operator Keyskills bca diploma maintaining good typing skills data entry operator b sc b tech responsible be database Desired Candidate Profile Education- UG: Diploma PG:Post Graduation Not Required Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: ZSoft Internet Media Pvt Ltd. ZSoft Internet Media Pvt. Ltd. - Website Design | Software Development | SEO | SMO | PPC | ORM | IT Services Business Technology Consulting Download PPT Photo 1   View Contact Details",1.00816E+11,"Delhi , Delhi",Data Entry Operator,,Not Disclosed by Recruiter,2015-11-23 22:17:37 +0000,,Executive Assistant,18ee0fbcfa297155ef90876b0fda0608 16 | Startup - Entransys,"UG: Any Graduate - Any Specialization, B.Tech/B.E. - Any Specialization Doctorate:Doctorate Not Required",3 - 5 yrs,Internet / Ecommerce,"Job Description   Send me Jobs like this We are looking for an Analytics Designer with strong interests and capabilities in the design and development of engaging user experiences. Salary: Not Disclosed by Recruiter Industry: Internet / Ecommerce Functional Area: Analytics & Business Intelligence Role Category:Analytics & BI Role:Data Analyst Keyskills Design Development Data Science Analytics Desired Candidate Profile   Education- UG: Any Graduate - Any Specialization, B.Tech/B.E. - Any Specialization Doctorate:Doctorate Not Required Company Profile: Startup - Entransys Entransys approach and methodology is aimed towards converting the Business Chain into Value Chain and ensures the reconfiguration of Business processes to maximize the Business value. Download PPT Photo 1   View Contact Details",2.21217E+11,Hyderabad,Analytics & Data Science,,Not Disclosed by Recruiter,2016-12-22 18:19:00 +0000,www.naukri.com,Analytics & Business Intelligence,e5cc1a2789b45f1161636fc3681670ee 17 | AR Enterprises hiring for US Based MNC,,0 - 2 yrs,Recruitment / Staffing,"Job Description   Send me Jobs like this SECRETARY / FRONT OFFICE / DATA ENTRY Job Profile Salary: Not Disclosed by Recruiter Industry: Recruitment / Staffing Functional Area: Executive Assistant , Front Office , Data Entry Role Category:Other Role:Stenographer/Data Entry Operator Keyskills Data Entry Operation Front Office Secretarial Activities Desired Candidate Profile Please refer to the Job description above Company Profile: AR Enterprises US Based MNC our aim is to make future and to give best placement. Download PPT Photo 1   View Contact Details",51116002047,"Hyderabad, Chennai, Bengaluru, Gwalior",Data Entry Operator,3,Not Disclosed by Recruiter,2017-01-11 21:00:00 +0000,www.naukri.com,Executive Assistant,0e1a1f05ed979b8139dfb814058f68ac 18 | -------------------------------------------------------------------------------- /2-Preprocessing_and_Modelling/Pre-processing_Resume for matchingv2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pre-Processing Resume Text Column to Prepare for Matching - final " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import json\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "%matplotlib inline\n", 21 | "\n", 22 | "import re\n", 23 | "import datetime\n", 24 | "from datetime import date\n", 25 | "from time import strptime\n", 26 | "\n", 27 | "import RAKE as rake\n", 28 | "import operator\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "###############################################################################################\n", 36 | "## Working on Resume data\n", 37 | "###############################################################################################" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# First reading my resume csv\n", 47 | "resume = pd.read_csv('wip/resume_sorted6.csv')" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "\n", 60 | "RangeIndex: 14428 entries, 0 to 14427\n", 61 | "Data columns (total 26 columns):\n", 62 | " # Column Non-Null Count Dtype \n", 63 | "--- ------ -------------- ----- \n", 64 | " 0 index 14428 non-null int64 \n", 65 | " 1 Resume_title 14428 non-null object\n", 66 | " 2 City 14428 non-null object\n", 67 | " 3 location 14428 non-null int64 \n", 68 | " 4 Description 14428 non-null object\n", 69 | " 5 work_experiences 14428 non-null object\n", 70 | " 6 Educations 14428 non-null object\n", 71 | " 7 Skills 14428 non-null object\n", 72 | " 8 Links 14428 non-null object\n", 73 | " 9 Certificates 14428 non-null object\n", 74 | " 10 Additional Information 14428 non-null object\n", 75 | " 11 is_grad 14428 non-null int64 \n", 76 | " 12 is_postgrad 14428 non-null int64 \n", 77 | " 13 is_doc 14428 non-null int64 \n", 78 | " 14 edu_unknown 14428 non-null int64 \n", 79 | " 15 Computer_Eng 14428 non-null int64 \n", 80 | " 16 Finance 14428 non-null int64 \n", 81 | " 17 HR 14428 non-null int64 \n", 82 | " 18 AI_stats 14428 non-null int64 \n", 83 | " 19 MBA 14428 non-null int64 \n", 84 | " 20 Other_specialization 14428 non-null int64 \n", 85 | " 21 resume_id 14428 non-null int64 \n", 86 | " 22 total_experience 14428 non-null int64 \n", 87 | " 23 experience_range 14428 non-null int64 \n", 88 | " 24 loc_name 14428 non-null object\n", 89 | " 25 experience_desc 14428 non-null object\n", 90 | "dtypes: int64(15), object(11)\n", 91 | "memory usage: 2.9+ MB\n" 92 | ] 93 | } 94 | ], 95 | "source": [ 96 | "#initial info\n", 97 | "resume.info()" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "#########################################################################################################\n", 105 | "## To match resume with jobs, I need to have similar 20 vectors, that I created to train my Doc2Vec model for jobs. \n", 106 | "\n", 107 | "### For training my jobs model, I picked text data from :\n", 108 | "* job title\n", 109 | "* job description\n", 110 | "* skills\n", 111 | "* industry\n", 112 | "\n", 113 | "### So for training my resume model, I need similar text, thus picking:\n", 114 | "* Resume_title\n", 115 | "* Resume description \n", 116 | "* skills\n", 117 | "* Additional Information\n", 118 | "\n", 119 | "\n", 120 | "#########################################################################################################" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 40, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "resume['Resume_title'] = resume['Resume_title'].str.lower()\n", 130 | "resume['Skills']=resume['Skills'].str.lower()\n", 131 | "resume['Description'] = resume['Description'].str.lower()\n", 132 | "resume['Additional Information'] = resume['Additional Information'].str.lower()" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 41, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "resume['Description'].replace('none', ' ',inplace=True)\n", 142 | "resume['Additional Information'].replace('none', ' ',inplace=True)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 5, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "name": "stderr", 152 | "output_type": "stream", 153 | "text": [ 154 | "C:\\Users\\shail\\anaconda\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", 155 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 156 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 157 | "\n", 158 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 159 | " \n" 160 | ] 161 | }, 162 | { 163 | "data": { 164 | "text/html": [ 165 | "
\n", 166 | "\n", 179 | "\n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | "
resume_idResume_titleresume_combo
00java developerjava developer to prove myself dedicated, wort...
11software developersoftware developer working as software develop...
22java developerjava developer looking for a challenging caree...
33seeking innovative and challenging career assi...seeking innovative and challenging career assi...
44java developerjava developer ['project: hr payroll systems...
\n", 221 | "
" 222 | ], 223 | "text/plain": [ 224 | " resume_id Resume_title \\\n", 225 | "0 0 java developer \n", 226 | "1 1 software developer \n", 227 | "2 2 java developer \n", 228 | "3 3 seeking innovative and challenging career assi... \n", 229 | "4 4 java developer \n", 230 | "\n", 231 | " resume_combo \n", 232 | "0 java developer to prove myself dedicated, wort... \n", 233 | "1 software developer working as software develop... \n", 234 | "2 java developer looking for a challenging caree... \n", 235 | "3 seeking innovative and challenging career assi... \n", 236 | "4 java developer ['project: hr payroll systems... " 237 | ] 238 | }, 239 | "execution_count": 5, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "df_resume = resume[['resume_id','Resume_title' ]]\n", 246 | "df_resume['resume_combo'] = resume['Resume_title'] +\" \" + resume['Description'] +\" \" + resume['Skills'] + \" \"+resume['Additional Information'] + \" \"+resume['experience_desc']\n", 247 | "df_resume.head()" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 6, 253 | "metadata": {}, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/plain": [ 258 | "0 java developer to prove myself dedicated, wort...\n", 259 | "1 software developer working as software develop...\n", 260 | "2 java developer looking for a challenging caree...\n", 261 | "3 seeking innovative and challenging career assi...\n", 262 | "4 java developer ['project: hr payroll systems...\n", 263 | "5 java developer ['java'] ['have the potenti...\n", 264 | "6 java developer to secure a challenging positio...\n", 265 | "7 searching job for java developer ['c++', ' h...\n", 266 | "8 mca / with 3 years of development experience •...\n", 267 | "9 java developer attain the position of 'java de...\n", 268 | "Name: resume_combo, dtype: object" 269 | ] 270 | }, 271 | "execution_count": 6, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [ 277 | "docs = df_resume['resume_combo']\n", 278 | "docs_sample = docs.head(10)\n", 279 | "docs_sample" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 7, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "name": "stderr", 289 | "output_type": "stream", 290 | "text": [ 291 | "[nltk_data] Downloading package wordnet to\n", 292 | "[nltk_data] C:\\Users\\shail\\AppData\\Roaming\\nltk_data...\n", 293 | "[nltk_data] Package wordnet is already up-to-date!\n" 294 | ] 295 | } 296 | ], 297 | "source": [ 298 | "#Import all the dependencies\n", 299 | "import nltk\n", 300 | "nltk.download('wordnet')\n", 301 | "from nltk.stem import WordNetLemmatizer\n", 302 | "wordnet_lemmatizer = WordNetLemmatizer()\n", 303 | "from nltk.corpus import stopwords\n", 304 | "from nltk.tokenize import word_tokenize \n", 305 | "set(stopwords.words('english'))\n", 306 | "\n", 307 | "import string\n", 308 | "\n", 309 | "import gensim\n", 310 | "from gensim.test.utils import common_texts\n", 311 | "from gensim.models.doc2vec import Doc2Vec, TaggedDocument" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 8, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "name": "stderr", 321 | "output_type": "stream", 322 | "text": [ 323 | "C:\\Users\\shail\\anaconda\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:385: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ëœ'] not in stop_words.\n", 324 | " 'stop_words.' % sorted(inconsistent))\n" 325 | ] 326 | }, 327 | { 328 | "name": "stdout", 329 | "output_type": "stream", 330 | "text": [ 331 | "(14428, 70688)\n", 332 | "(14428, 3)\n" 333 | ] 334 | } 335 | ], 336 | "source": [ 337 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 338 | "stopwords = nltk.corpus.stopwords.words('english')\n", 339 | "stopwords.append('ã¯æ’ëœ')\n", 340 | "stopwords.append('\\n')\n", 341 | "stopwords.append('•')\n", 342 | "#Transforms words to TFIDF\n", 343 | "vectorizer = TfidfVectorizer(stop_words = stopwords)\n", 344 | "\n", 345 | "index = 0\n", 346 | "keys = {}\n", 347 | "\n", 348 | "for rem in df_resume.itertuples() :\n", 349 | " key = rem[1]\n", 350 | " keys[key] = index\n", 351 | " index += 1\n", 352 | "\n", 353 | "#Fit the vectorizer to the data\n", 354 | "vectorizer.fit(df_resume['resume_combo'].fillna(''))\n", 355 | "\n", 356 | "#Transform the data\n", 357 | "tfidf_scores = vectorizer.transform(df_resume['resume_combo'].fillna(''))\n", 358 | "\n", 359 | "print(tfidf_scores.shape)\n", 360 | "print(df_resume.shape)" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 10, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [ 369 | "test = pd.DataFrame(tfidf_scores.toarray(), columns = vectorizer.get_feature_names())" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 11, 375 | "metadata": {}, 376 | "outputs": [ 377 | { 378 | "data": { 379 | "text/html": [ 380 | "
\n", 381 | "\n", 394 | "\n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | "
0000000000008976500089805000webhostapp00100200300353...õleøcreatedǁǁǁǁǁǁηadoopτrainτοοlsчєαrfiledfinancialfixing
00.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
20.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
30.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
40.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", 544 | "

5 rows × 70688 columns

\n", 545 | "
" 546 | ], 547 | "text/plain": [ 548 | " 00 000 0000 00089765 00089805 000webhostapp 001 002 003 00353 \\\n", 549 | "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 550 | "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 551 | "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 552 | "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 553 | "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 554 | "\n", 555 | " ... õle øcreated ǁǁǁǁǁǁ ηadoop τrain τοοls чєαr filed financial \\\n", 556 | "0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 557 | "1 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 558 | "2 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 559 | "3 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 560 | "4 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 561 | "\n", 562 | " fixing \n", 563 | "0 0.0 \n", 564 | "1 0.0 \n", 565 | "2 0.0 \n", 566 | "3 0.0 \n", 567 | "4 0.0 \n", 568 | "\n", 569 | "[5 rows x 70688 columns]" 570 | ] 571 | }, 572 | "execution_count": 11, 573 | "metadata": {}, 574 | "output_type": "execute_result" 575 | } 576 | ], 577 | "source": [ 578 | "test.head()" 579 | ] 580 | }, 581 | { 582 | "cell_type": "markdown", 583 | "metadata": {}, 584 | "source": [ 585 | "### Creating my Stopword list\n", 586 | "#### As seen there are so many unwanted tokens like numbers, etc , I need to add them in \"stop words\" list to train model" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": 12, 592 | "metadata": {}, 593 | "outputs": [], 594 | "source": [ 595 | "#getting list of all tokens\n", 596 | "word_list = test.columns.tolist()" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 13, 602 | "metadata": {}, 603 | "outputs": [], 604 | "source": [ 605 | "##Getting a list of unwanted words as s_words and adding to stopwords\n", 606 | "s_words =[]\n", 607 | "for word in word_list:\n", 608 | " #print(word)\n", 609 | " if re.search(\"^\\W|^\\d\",word):\n", 610 | " s_words.append(word)" 611 | ] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "execution_count": 14, 616 | "metadata": {}, 617 | "outputs": [], 618 | "source": [ 619 | "s_words.append('') \n", 620 | "from nltk.corpus import stopwords\n", 621 | "stopword_set = set(stopwords.words('english'))\n", 622 | "stopword_set = list(stopword_set)\n", 623 | "stopword_set.extend(s_words)" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": 15, 629 | "metadata": {}, 630 | "outputs": [], 631 | "source": [ 632 | "def preprocess(text):\n", 633 | " stop_words = stopword_set\n", 634 | " #0. split words by whitespace\n", 635 | " text = text.split()\n", 636 | " \n", 637 | " \n", 638 | " # 1. lower case\n", 639 | " text = [word.lower() for word in text]\n", 640 | " \n", 641 | " # 2. remove punctuations\n", 642 | " punc_table = str.maketrans('','',string.punctuation)\n", 643 | " text = [word.translate(punc_table) for word in text]\n", 644 | " \n", 645 | " # 3. remove stop words\n", 646 | " text = [word for word in text if word not in stop_words]\n", 647 | " \n", 648 | " return text" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 16, 654 | "metadata": {}, 655 | "outputs": [], 656 | "source": [ 657 | "tokenized_doc = []\n", 658 | "doc = df_resume['resume_combo']\n", 659 | "#doc = docs_sample\n", 660 | "for d in doc:\n", 661 | " tokenized_doc.append(preprocess(d))\n", 662 | "#tokenized_doc" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 17, 668 | "metadata": {}, 669 | "outputs": [], 670 | "source": [ 671 | "# Convert tokenized document into gensim formated tagged data\n", 672 | "tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": 18, 678 | "metadata": {}, 679 | "outputs": [ 680 | { 681 | "data": { 682 | "text/plain": [ 683 | "14428" 684 | ] 685 | }, 686 | "execution_count": 18, 687 | "metadata": {}, 688 | "output_type": "execute_result" 689 | } 690 | ], 691 | "source": [ 692 | "num_doc = len(tagged_data)\n", 693 | "num_doc\n", 694 | "#confirm length (should be 14428)\n", 695 | "len(tokenized_doc)" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": 21, 701 | "metadata": {}, 702 | "outputs": [], 703 | "source": [ 704 | "## Load saved doc2vec model\n", 705 | "model= Doc2Vec.load(\"Model/my_doc2vec_v2.model\")" 706 | ] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": 24, 711 | "metadata": {}, 712 | "outputs": [], 713 | "source": [ 714 | "## Get vector value\n", 715 | "vec = np.empty([14428,20])\n", 716 | "\n", 717 | "for k,i in enumerate(tokenized_doc):\n", 718 | " \n", 719 | " #print(i)\n", 720 | " vector = model.infer_vector(i)\n", 721 | " vec[k] = vector\n", 722 | "\n", 723 | "# reshape into 2D\n", 724 | "new_arr = np.reshape(vec,(-1,20))" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": 25, 730 | "metadata": {}, 731 | "outputs": [], 732 | "source": [ 733 | "rng = range(1, 21)\n", 734 | "vec_df = pd.DataFrame(new_arr, columns=['vec_' + str(i) for i in rng])" 735 | ] 736 | }, 737 | { 738 | "cell_type": "code", 739 | "execution_count": 26, 740 | "metadata": {}, 741 | "outputs": [ 742 | { 743 | "data": { 744 | "text/html": [ 745 | "
\n", 746 | "\n", 759 | "\n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | "
vec_1vec_2vec_3vec_4vec_5vec_6vec_7vec_8vec_9vec_10vec_11vec_12vec_13vec_14vec_15vec_16vec_17vec_18vec_19vec_20
03.0033971.462391-0.7322062.032145-3.2914251.6266221.269785-1.303818-1.781690-3.8936060.582851-2.3904300.6126944.274847-1.6413251.098874-0.5349980.338975-2.081308-3.480031
13.969832-1.478794-1.9974241.502539-3.5075082.108994-0.3866401.4943960.454764-2.268685-1.505257-2.332494-0.4310221.431269-0.896382-0.2672691.4333520.438305-0.992093-0.096142
21.4427010.011723-2.1265060.655804-3.9845130.7920351.317094-0.696710-1.563318-3.040591-0.367393-3.774975-1.1835952.456486-1.2709812.475039-1.9901100.130853-0.589791-2.782936
31.803033-0.120398-1.1599590.066225-3.5225081.321965-0.756211-0.249010-0.074644-2.3143890.557041-3.887409-1.0700273.894971-0.957399-0.952996-0.8242660.0387121.194561-1.206788
4-0.4340190.551527-1.531551-0.767032-0.5144730.286549-0.5638880.3107480.457921-1.3346320.183150-0.5478341.2189950.5361820.995981-0.874730-0.1389160.882186-0.129402-1.793177
\n", 903 | "
" 904 | ], 905 | "text/plain": [ 906 | " vec_1 vec_2 vec_3 vec_4 vec_5 vec_6 vec_7 \\\n", 907 | "0 3.003397 1.462391 -0.732206 2.032145 -3.291425 1.626622 1.269785 \n", 908 | "1 3.969832 -1.478794 -1.997424 1.502539 -3.507508 2.108994 -0.386640 \n", 909 | "2 1.442701 0.011723 -2.126506 0.655804 -3.984513 0.792035 1.317094 \n", 910 | "3 1.803033 -0.120398 -1.159959 0.066225 -3.522508 1.321965 -0.756211 \n", 911 | "4 -0.434019 0.551527 -1.531551 -0.767032 -0.514473 0.286549 -0.563888 \n", 912 | "\n", 913 | " vec_8 vec_9 vec_10 vec_11 vec_12 vec_13 vec_14 \\\n", 914 | "0 -1.303818 -1.781690 -3.893606 0.582851 -2.390430 0.612694 4.274847 \n", 915 | "1 1.494396 0.454764 -2.268685 -1.505257 -2.332494 -0.431022 1.431269 \n", 916 | "2 -0.696710 -1.563318 -3.040591 -0.367393 -3.774975 -1.183595 2.456486 \n", 917 | "3 -0.249010 -0.074644 -2.314389 0.557041 -3.887409 -1.070027 3.894971 \n", 918 | "4 0.310748 0.457921 -1.334632 0.183150 -0.547834 1.218995 0.536182 \n", 919 | "\n", 920 | " vec_15 vec_16 vec_17 vec_18 vec_19 vec_20 \n", 921 | "0 -1.641325 1.098874 -0.534998 0.338975 -2.081308 -3.480031 \n", 922 | "1 -0.896382 -0.267269 1.433352 0.438305 -0.992093 -0.096142 \n", 923 | "2 -1.270981 2.475039 -1.990110 0.130853 -0.589791 -2.782936 \n", 924 | "3 -0.957399 -0.952996 -0.824266 0.038712 1.194561 -1.206788 \n", 925 | "4 0.995981 -0.874730 -0.138916 0.882186 -0.129402 -1.793177 " 926 | ] 927 | }, 928 | "execution_count": 26, 929 | "metadata": {}, 930 | "output_type": "execute_result" 931 | } 932 | ], 933 | "source": [ 934 | "vec_df.head(5)" 935 | ] 936 | }, 937 | { 938 | "cell_type": "code", 939 | "execution_count": 27, 940 | "metadata": {}, 941 | "outputs": [], 942 | "source": [ 943 | "# concatenate and safe the resume csv file\n", 944 | "con_resume_1 = pd.concat([resume, vec_df], axis=1)\n", 945 | "con_resume_1.to_csv('wip/con_resume_1.csv', index=False)" 946 | ] 947 | } 948 | ], 949 | "metadata": { 950 | "kernelspec": { 951 | "display_name": "Python 3", 952 | "language": "python", 953 | "name": "python3" 954 | }, 955 | "language_info": { 956 | "codemirror_mode": { 957 | "name": "ipython", 958 | "version": 3 959 | }, 960 | "file_extension": ".py", 961 | "mimetype": "text/x-python", 962 | "name": "python", 963 | "nbconvert_exporter": "python", 964 | "pygments_lexer": "ipython3", 965 | "version": "3.7.6" 966 | } 967 | }, 968 | "nbformat": 4, 969 | "nbformat_minor": 4 970 | } 971 | -------------------------------------------------------------------------------- /2-Preprocessing_and_Modelling/Pre-processing_Resume for matchingv1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pre-Processing Resume Text Column to Prepare for matching - first iteration" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 3, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import json\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "%matplotlib inline\n", 21 | "\n", 22 | "import re\n", 23 | "import datetime\n", 24 | "from datetime import date\n", 25 | "from time import strptime\n", 26 | "\n", 27 | "import RAKE as rake\n", 28 | "import operator\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "###############################################################################################\n", 36 | "## Working on Resume data\n", 37 | "###############################################################################################" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 38, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# First reading my resume csv\n", 47 | "resume = pd.read_csv('wip/resume_sorted5.csv')" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 39, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "\n", 60 | "RangeIndex: 14428 entries, 0 to 14427\n", 61 | "Data columns (total 26 columns):\n", 62 | " # Column Non-Null Count Dtype \n", 63 | "--- ------ -------------- ----- \n", 64 | " 0 index 14428 non-null int64 \n", 65 | " 1 Resume_title 14428 non-null object\n", 66 | " 2 City 14428 non-null object\n", 67 | " 3 location 14428 non-null int64 \n", 68 | " 4 Description 14428 non-null object\n", 69 | " 5 work_experiences 14428 non-null object\n", 70 | " 6 Educations 14428 non-null object\n", 71 | " 7 Skills 14428 non-null object\n", 72 | " 8 Links 14428 non-null object\n", 73 | " 9 Certificates 14428 non-null object\n", 74 | " 10 Additional Information 14428 non-null object\n", 75 | " 11 is_grad 14428 non-null int64 \n", 76 | " 12 is_postgrad 14428 non-null int64 \n", 77 | " 13 is_doc 14428 non-null int64 \n", 78 | " 14 edu_unknown 14428 non-null int64 \n", 79 | " 15 Computer_Eng 14428 non-null int64 \n", 80 | " 16 Finance 14428 non-null int64 \n", 81 | " 17 HR 14428 non-null int64 \n", 82 | " 18 AI_stats 14428 non-null int64 \n", 83 | " 19 MBA 14428 non-null int64 \n", 84 | " 20 Other_specialization 14428 non-null int64 \n", 85 | " 21 resume_id 14428 non-null int64 \n", 86 | " 22 total_experience 14428 non-null int64 \n", 87 | " 23 experience_range 14428 non-null int64 \n", 88 | " 24 loc_name 14428 non-null object\n", 89 | " 25 experience_desc 14428 non-null object\n", 90 | "dtypes: int64(15), object(11)\n", 91 | "memory usage: 2.9+ MB\n" 92 | ] 93 | } 94 | ], 95 | "source": [ 96 | "#initial info\n", 97 | "resume.info()" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "#########################################################################################################\n", 105 | "## To match resume with jobs, I need to have similar 20 vectors, that I created to train my Doc2Vec model for jobs. \n", 106 | "\n", 107 | "### For training my jobs model, I picked text data from :\n", 108 | "* job title\n", 109 | "* job description\n", 110 | "* skills\n", 111 | "* industry\n", 112 | "\n", 113 | "### So for training my resume model, I need similar text, thus picking:\n", 114 | "* Resume_title\n", 115 | "* Resume description \n", 116 | "* skills\n", 117 | "* Additional Information\n", 118 | "\n", 119 | "\n", 120 | "#########################################################################################################" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 40, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "resume['Resume_title'] = resume['Resume_title'].str.lower()\n", 130 | "resume['Skills']=resume['Skills'].str.lower()\n", 131 | "resume['Description'] = resume['Description'].str.lower()\n", 132 | "resume['Additional Information'] = resume['Additional Information'].str.lower()" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 41, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "resume['Description'].replace('none', ' ',inplace=True)\n", 142 | "resume['Additional Information'].replace('none', ' ',inplace=True)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 43, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "name": "stderr", 152 | "output_type": "stream", 153 | "text": [ 154 | "C:\\Users\\shail\\anaconda\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", 155 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 156 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 157 | "\n", 158 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 159 | " \n" 160 | ] 161 | }, 162 | { 163 | "data": { 164 | "text/html": [ 165 | "
\n", 166 | "\n", 179 | "\n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | "
resume_idResume_titleresume_combo
00java developerjava developer to prove myself dedicated, wort...
11software developersoftware developer working as software develop...
22java developerjava developer looking for a challenging caree...
33seeking innovative and challenging career assi...seeking innovative and challenging career assi...
44java developerjava developer ['project: hr payroll systems...
\n", 221 | "
" 222 | ], 223 | "text/plain": [ 224 | " resume_id Resume_title \\\n", 225 | "0 0 java developer \n", 226 | "1 1 software developer \n", 227 | "2 2 java developer \n", 228 | "3 3 seeking innovative and challenging career assi... \n", 229 | "4 4 java developer \n", 230 | "\n", 231 | " resume_combo \n", 232 | "0 java developer to prove myself dedicated, wort... \n", 233 | "1 software developer working as software develop... \n", 234 | "2 java developer looking for a challenging caree... \n", 235 | "3 seeking innovative and challenging career assi... \n", 236 | "4 java developer ['project: hr payroll systems... " 237 | ] 238 | }, 239 | "execution_count": 43, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "df_resume = resume[['resume_id','Resume_title' ]]\n", 246 | "df_resume['resume_combo'] = resume['Resume_title'] +\" \" + resume['Description'] +\" \" + resume['Skills'] + \" \"+resume['Additional Information'] + \" \"+resume['experience_desc']\n", 247 | "df_resume.head()" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 44, 253 | "metadata": {}, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/plain": [ 258 | "0 java developer to prove myself dedicated, wort...\n", 259 | "1 software developer working as software develop...\n", 260 | "2 java developer looking for a challenging caree...\n", 261 | "3 seeking innovative and challenging career assi...\n", 262 | "4 java developer ['project: hr payroll systems...\n", 263 | "5 java developer ['java'] ['have the potenti...\n", 264 | "6 java developer to secure a challenging positio...\n", 265 | "7 searching job for java developer ['c++', ' h...\n", 266 | "8 mca / with 3 years of development experience •...\n", 267 | "9 java developer attain the position of 'java de...\n", 268 | "Name: resume_combo, dtype: object" 269 | ] 270 | }, 271 | "execution_count": 44, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [ 277 | "docs = df_resume['resume_combo']\n", 278 | "docs_sample = docs.head(10)\n", 279 | "docs_sample" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 45, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "name": "stderr", 289 | "output_type": "stream", 290 | "text": [ 291 | "[nltk_data] Downloading package wordnet to\n", 292 | "[nltk_data] C:\\Users\\shail\\AppData\\Roaming\\nltk_data...\n", 293 | "[nltk_data] Package wordnet is already up-to-date!\n" 294 | ] 295 | } 296 | ], 297 | "source": [ 298 | "#Import all the dependencies\n", 299 | "import nltk\n", 300 | "nltk.download('wordnet')\n", 301 | "from nltk.stem import WordNetLemmatizer\n", 302 | "wordnet_lemmatizer = WordNetLemmatizer()\n", 303 | "from nltk.corpus import stopwords\n", 304 | "from nltk.tokenize import word_tokenize \n", 305 | "set(stopwords.words('english'))\n", 306 | "\n", 307 | "import string\n", 308 | "\n", 309 | "import gensim\n", 310 | "from gensim.test.utils import common_texts\n", 311 | "from gensim.models.doc2vec import Doc2Vec, TaggedDocument" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 47, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "name": "stderr", 321 | "output_type": "stream", 322 | "text": [ 323 | "C:\\Users\\shail\\anaconda\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:385: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ëœ'] not in stop_words.\n", 324 | " 'stop_words.' % sorted(inconsistent))\n" 325 | ] 326 | }, 327 | { 328 | "name": "stdout", 329 | "output_type": "stream", 330 | "text": [ 331 | "(14428, 70688)\n", 332 | "(14428, 3)\n" 333 | ] 334 | } 335 | ], 336 | "source": [ 337 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 338 | "stopwords = nltk.corpus.stopwords.words('english')\n", 339 | "stopwords.append('ã¯æ’ëœ')\n", 340 | "stopwords.append('\\n')\n", 341 | "stopwords.append('•')\n", 342 | "#Transforms words to TFIDF\n", 343 | "vectorizer = TfidfVectorizer(stop_words = stopwords)\n", 344 | "\n", 345 | "index = 0\n", 346 | "keys = {}\n", 347 | "\n", 348 | "for rem in df_resume.itertuples() :\n", 349 | " key = rem[1]\n", 350 | " keys[key] = index\n", 351 | " index += 1\n", 352 | "\n", 353 | "#Fit the vectorizer to the data\n", 354 | "vectorizer.fit(df_resume['resume_combo'].fillna(''))\n", 355 | "\n", 356 | "#Transform the data\n", 357 | "tfidf_scores = vectorizer.transform(df_resume['resume_combo'].fillna(''))\n", 358 | "\n", 359 | "print(tfidf_scores.shape)\n", 360 | "print(df_resume.shape)" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 48, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [ 369 | "test = pd.DataFrame(tfidf_scores.toarray(), columns = vectorizer.get_feature_names())" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 49, 375 | "metadata": {}, 376 | "outputs": [ 377 | { 378 | "data": { 379 | "text/html": [ 380 | "
\n", 381 | "\n", 394 | "\n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | "
0000000000008976500089805000webhostapp00100200300353...õleøcreatedǁǁǁǁǁǁηadoopτrainτοοlsчєαrfiledfinancialfixing
00.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
20.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
30.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
40.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", 544 | "

5 rows × 70688 columns

\n", 545 | "
" 546 | ], 547 | "text/plain": [ 548 | " 00 000 0000 00089765 00089805 000webhostapp 001 002 003 00353 \\\n", 549 | "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 550 | "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 551 | "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 552 | "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 553 | "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 554 | "\n", 555 | " ... õle øcreated ǁǁǁǁǁǁ ηadoop τrain τοοls чєαr filed financial \\\n", 556 | "0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 557 | "1 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 558 | "2 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 559 | "3 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 560 | "4 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 561 | "\n", 562 | " fixing \n", 563 | "0 0.0 \n", 564 | "1 0.0 \n", 565 | "2 0.0 \n", 566 | "3 0.0 \n", 567 | "4 0.0 \n", 568 | "\n", 569 | "[5 rows x 70688 columns]" 570 | ] 571 | }, 572 | "execution_count": 49, 573 | "metadata": {}, 574 | "output_type": "execute_result" 575 | } 576 | ], 577 | "source": [ 578 | "test.head()" 579 | ] 580 | }, 581 | { 582 | "cell_type": "markdown", 583 | "metadata": {}, 584 | "source": [ 585 | "### Creating my Stopword list\n", 586 | "#### As seen there are so many unwanted tokens like numbers, etc , I need to add them in \"stop words\" list to train model" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": 50, 592 | "metadata": {}, 593 | "outputs": [], 594 | "source": [ 595 | "#getting list of all tokens\n", 596 | "word_list = test.columns.tolist()" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 51, 602 | "metadata": {}, 603 | "outputs": [], 604 | "source": [ 605 | "##Getting a list of unwanted words as s_words and adding to stopwords\n", 606 | "s_words =[]\n", 607 | "for word in word_list:\n", 608 | " #print(word)\n", 609 | " if re.search(\"^\\W|^\\d\",word):\n", 610 | " s_words.append(word)" 611 | ] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "execution_count": 52, 616 | "metadata": {}, 617 | "outputs": [], 618 | "source": [ 619 | "s_words.append('') \n", 620 | "from nltk.corpus import stopwords\n", 621 | "stopword_set = set(stopwords.words('english'))\n", 622 | "stopword_set = list(stopword_set)\n", 623 | "stopword_set.extend(s_words)" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": 53, 629 | "metadata": {}, 630 | "outputs": [], 631 | "source": [ 632 | "def preprocess(text):\n", 633 | " stop_words = stopword_set\n", 634 | " #0. split words by whitespace\n", 635 | " text = text.split()\n", 636 | " \n", 637 | " \n", 638 | " # 1. lower case\n", 639 | " text = [word.lower() for word in text]\n", 640 | " \n", 641 | " # 2. remove punctuations\n", 642 | " punc_table = str.maketrans('','',string.punctuation)\n", 643 | " text = [word.translate(punc_table) for word in text]\n", 644 | " \n", 645 | " # 3. remove stop words\n", 646 | " text = [word for word in text if word not in stop_words]\n", 647 | " \n", 648 | " return text" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 54, 654 | "metadata": {}, 655 | "outputs": [], 656 | "source": [ 657 | "tokenized_doc = []\n", 658 | "doc = df_resume['resume_combo']\n", 659 | "#doc = docs_sample\n", 660 | "for d in doc:\n", 661 | " tokenized_doc.append(preprocess(d))\n", 662 | "#tokenized_doc" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 55, 668 | "metadata": {}, 669 | "outputs": [], 670 | "source": [ 671 | "# Convert tokenized document into gensim formated tagged data\n", 672 | "tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": 56, 678 | "metadata": {}, 679 | "outputs": [ 680 | { 681 | "data": { 682 | "text/plain": [ 683 | "14428" 684 | ] 685 | }, 686 | "execution_count": 56, 687 | "metadata": {}, 688 | "output_type": "execute_result" 689 | } 690 | ], 691 | "source": [ 692 | "num_doc = len(tagged_data)\n", 693 | "num_doc\n", 694 | "#confirm length (should be 14428)\n", 695 | "len(tokenized_doc)" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": 58, 701 | "metadata": {}, 702 | "outputs": [], 703 | "source": [ 704 | "from gensim.test.utils import get_tmpfile\n", 705 | "from gensim.models.callbacks import CallbackAny2Vec\n", 706 | "\n", 707 | "class EpochSaver(CallbackAny2Vec):\n", 708 | "\n", 709 | " def __init__(self, path_prefix):\n", 710 | " self.path_prefix = path_prefix\n", 711 | " self.epoch = 0\n", 712 | "\n", 713 | " def on_epoch_end(self, model):\n", 714 | " output_path = get_tmpfile('{}_epoch{}.model'.format(self.path_prefix, self.epoch))\n", 715 | " model.save(output_path)\n", 716 | " self.epoch += 1" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": 59, 722 | "metadata": {}, 723 | "outputs": [], 724 | "source": [ 725 | "class EpochLogger(CallbackAny2Vec):\n", 726 | " \n", 727 | " def __init__(self):\n", 728 | " self.epoch = 0\n", 729 | " \n", 730 | " def on_epoch_begin(self, model):\n", 731 | " print(\"Epoch #{} start\".format(self.epoch))\n", 732 | "\n", 733 | " def on_epoch_end(self, model):\n", 734 | " print(\"Epoch #{} end\".format(self.epoch))\n", 735 | " self.epoch += 1" 736 | ] 737 | }, 738 | { 739 | "cell_type": "code", 740 | "execution_count": 61, 741 | "metadata": {}, 742 | "outputs": [], 743 | "source": [ 744 | "## Load saved doc2vec model\n", 745 | "model= Doc2Vec.load(\"Model/my_doc2vec.model\")" 746 | ] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": 62, 751 | "metadata": {}, 752 | "outputs": [], 753 | "source": [ 754 | "## Get vector value\n", 755 | "vec = np.empty([14428,20])\n", 756 | "\n", 757 | "for k,i in enumerate(tokenized_doc):\n", 758 | " \n", 759 | " #print(i)\n", 760 | " vector = model.infer_vector(i)\n", 761 | " vec[k] = vector\n", 762 | " #vec = np.append(vector)\n", 763 | " #vecf = np.append(vec,vector)\n", 764 | "\n", 765 | "# reshape into 2D\n", 766 | "new_arr = np.reshape(vec,(-1,20))" 767 | ] 768 | }, 769 | { 770 | "cell_type": "code", 771 | "execution_count": 64, 772 | "metadata": {}, 773 | "outputs": [], 774 | "source": [ 775 | "rng = range(1, 21)\n", 776 | "vec_df = pd.DataFrame(new_arr, columns=['vec_' + str(i) for i in rng])" 777 | ] 778 | }, 779 | { 780 | "cell_type": "code", 781 | "execution_count": 65, 782 | "metadata": {}, 783 | "outputs": [ 784 | { 785 | "data": { 786 | "text/html": [ 787 | "
\n", 788 | "\n", 801 | "\n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | "
vec_1vec_2vec_3vec_4vec_5vec_6vec_7vec_8vec_9vec_10vec_11vec_12vec_13vec_14vec_15vec_16vec_17vec_18vec_19vec_20
0-3.145642-0.4093800.701160-0.9387450.5852393.585946-0.1207810.1112221.6441052.184981-2.117909-0.085430-2.8773920.239383-1.5828711.435642-1.0514501.9608311.786694-2.375981
1-0.786235-1.306011-1.383107-1.6697080.8321361.8497900.178872-1.7368940.7416851.553933-2.916478-0.712572-0.502129-0.8492930.4354060.3393300.060282-0.4150353.203696-3.607635
2-2.747642-1.721797-0.910322-0.7759501.4723252.455998-0.852150-0.1505170.8442021.380623-1.0188320.777981-1.9775560.853214-1.2813442.1953910.8003051.0780352.166900-2.658121
3-1.771770-1.375850-0.475922-0.784473-0.3772401.5963891.094220-0.2536420.4682652.149588-1.2344150.295536-2.6155320.115959-2.044196-0.769109-0.7166041.1453883.452934-1.008162
4-0.097372-1.405603-0.801234-0.248921-0.376417-0.157050-0.290440-1.440582-0.1696691.190537-0.291407-1.080500-2.9504970.0316930.119182-0.8835550.178819-0.8583241.239632-0.043914
\n", 945 | "
" 946 | ], 947 | "text/plain": [ 948 | " vec_1 vec_2 vec_3 vec_4 vec_5 vec_6 vec_7 \\\n", 949 | "0 -3.145642 -0.409380 0.701160 -0.938745 0.585239 3.585946 -0.120781 \n", 950 | "1 -0.786235 -1.306011 -1.383107 -1.669708 0.832136 1.849790 0.178872 \n", 951 | "2 -2.747642 -1.721797 -0.910322 -0.775950 1.472325 2.455998 -0.852150 \n", 952 | "3 -1.771770 -1.375850 -0.475922 -0.784473 -0.377240 1.596389 1.094220 \n", 953 | "4 -0.097372 -1.405603 -0.801234 -0.248921 -0.376417 -0.157050 -0.290440 \n", 954 | "\n", 955 | " vec_8 vec_9 vec_10 vec_11 vec_12 vec_13 vec_14 \\\n", 956 | "0 0.111222 1.644105 2.184981 -2.117909 -0.085430 -2.877392 0.239383 \n", 957 | "1 -1.736894 0.741685 1.553933 -2.916478 -0.712572 -0.502129 -0.849293 \n", 958 | "2 -0.150517 0.844202 1.380623 -1.018832 0.777981 -1.977556 0.853214 \n", 959 | "3 -0.253642 0.468265 2.149588 -1.234415 0.295536 -2.615532 0.115959 \n", 960 | "4 -1.440582 -0.169669 1.190537 -0.291407 -1.080500 -2.950497 0.031693 \n", 961 | "\n", 962 | " vec_15 vec_16 vec_17 vec_18 vec_19 vec_20 \n", 963 | "0 -1.582871 1.435642 -1.051450 1.960831 1.786694 -2.375981 \n", 964 | "1 0.435406 0.339330 0.060282 -0.415035 3.203696 -3.607635 \n", 965 | "2 -1.281344 2.195391 0.800305 1.078035 2.166900 -2.658121 \n", 966 | "3 -2.044196 -0.769109 -0.716604 1.145388 3.452934 -1.008162 \n", 967 | "4 0.119182 -0.883555 0.178819 -0.858324 1.239632 -0.043914 " 968 | ] 969 | }, 970 | "execution_count": 65, 971 | "metadata": {}, 972 | "output_type": "execute_result" 973 | } 974 | ], 975 | "source": [ 976 | "vec_df.head(5)" 977 | ] 978 | }, 979 | { 980 | "cell_type": "code", 981 | "execution_count": 66, 982 | "metadata": {}, 983 | "outputs": [], 984 | "source": [ 985 | "con_resume = pd.concat([resume, vec_df], axis=1)\n", 986 | "con_resume.to_csv('wip/con_resume.csv', index=False)" 987 | ] 988 | }, 989 | { 990 | "cell_type": "code", 991 | "execution_count": 44, 992 | "metadata": {}, 993 | "outputs": [], 994 | "source": [ 995 | "#con_resume.info()" 996 | ] 997 | }, 998 | { 999 | "cell_type": "code", 1000 | "execution_count": null, 1001 | "metadata": {}, 1002 | "outputs": [], 1003 | "source": [] 1004 | }, 1005 | { 1006 | "cell_type": "code", 1007 | "execution_count": 49, 1008 | "metadata": {}, 1009 | "outputs": [], 1010 | "source": [ 1011 | "tokenized_doc = []\n", 1012 | "#doc = df_resume['resume_combo']\n", 1013 | "doc = docs_sample\n", 1014 | "for d in doc:\n", 1015 | " tokenized_doc.append(preprocess(d))\n", 1016 | "#tokenized_doc" 1017 | ] 1018 | }, 1019 | { 1020 | "cell_type": "code", 1021 | "execution_count": 50, 1022 | "metadata": {}, 1023 | "outputs": [], 1024 | "source": [ 1025 | "# Convert tokenized document into gensim formated tagged data\n", 1026 | "tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]" 1027 | ] 1028 | }, 1029 | { 1030 | "cell_type": "code", 1031 | "execution_count": 51, 1032 | "metadata": {}, 1033 | "outputs": [ 1034 | { 1035 | "data": { 1036 | "text/plain": [ 1037 | "10" 1038 | ] 1039 | }, 1040 | "execution_count": 51, 1041 | "metadata": {}, 1042 | "output_type": "execute_result" 1043 | } 1044 | ], 1045 | "source": [ 1046 | "num_doc = len(tagged_data)\n", 1047 | "num_doc\n", 1048 | "#confirm length (should be 38941)\n", 1049 | "len(tokenized_doc)" 1050 | ] 1051 | }, 1052 | { 1053 | "cell_type": "code", 1054 | "execution_count": 58, 1055 | "metadata": {}, 1056 | "outputs": [], 1057 | "source": [ 1058 | "## Load saved doc2vec model\n", 1059 | "model= Doc2Vec.load(\"my_doc2vec.model\")\n", 1060 | "\n", 1061 | "## Get vector value\n", 1062 | "vec = np.empty([10,20])\n", 1063 | "\n", 1064 | "for k,i in enumerate(tokenized_doc):\n", 1065 | " \n", 1066 | " #print(i)\n", 1067 | " vector = model.infer_vector(i)\n", 1068 | " vec[k] = vector\n", 1069 | " #vec = np.append(vector)\n", 1070 | " #vecf = np.append(vec,vector)\n", 1071 | "\n", 1072 | "# reshape into 2D\n", 1073 | "new_arr = np.reshape(vec,(-1,20))" 1074 | ] 1075 | }, 1076 | { 1077 | "cell_type": "code", 1078 | "execution_count": null, 1079 | "metadata": {}, 1080 | "outputs": [], 1081 | "source": [ 1082 | "test = np.array([[1,2,3],[4,5,6]])\n", 1083 | "test[0]" 1084 | ] 1085 | }, 1086 | { 1087 | "cell_type": "code", 1088 | "execution_count": 61, 1089 | "metadata": {}, 1090 | "outputs": [ 1091 | { 1092 | "data": { 1093 | "text/plain": [ 1094 | "array([-3.14492106, -0.41021681, 0.70149601, -0.93887955, 0.58496076,\n", 1095 | " 3.58589458, -0.12033088, 0.11019378, 1.64519656, 2.18371987,\n", 1096 | " -2.11720061, -0.08485675, -2.87654066, 0.24021174, -1.58367932,\n", 1097 | " 1.43522847, -1.05121636, 1.96061814, 1.78778028, -2.37729073])" 1098 | ] 1099 | }, 1100 | "execution_count": 61, 1101 | "metadata": {}, 1102 | "output_type": "execute_result" 1103 | } 1104 | ], 1105 | "source": [ 1106 | "new_arr[0]" 1107 | ] 1108 | }, 1109 | { 1110 | "cell_type": "code", 1111 | "execution_count": 62, 1112 | "metadata": {}, 1113 | "outputs": [], 1114 | "source": [ 1115 | "rng = range(1, 21)\n", 1116 | "vec_df = pd.DataFrame(new_arr, columns=['vec_' + str(i) for i in rng])" 1117 | ] 1118 | }, 1119 | { 1120 | "cell_type": "code", 1121 | "execution_count": 63, 1122 | "metadata": {}, 1123 | "outputs": [ 1124 | { 1125 | "name": "stdout", 1126 | "output_type": "stream", 1127 | "text": [ 1128 | "\n", 1129 | "RangeIndex: 10 entries, 0 to 9\n", 1130 | "Data columns (total 20 columns):\n", 1131 | " # Column Non-Null Count Dtype \n", 1132 | "--- ------ -------------- ----- \n", 1133 | " 0 vec_1 10 non-null float64\n", 1134 | " 1 vec_2 10 non-null float64\n", 1135 | " 2 vec_3 10 non-null float64\n", 1136 | " 3 vec_4 10 non-null float64\n", 1137 | " 4 vec_5 10 non-null float64\n", 1138 | " 5 vec_6 10 non-null float64\n", 1139 | " 6 vec_7 10 non-null float64\n", 1140 | " 7 vec_8 10 non-null float64\n", 1141 | " 8 vec_9 10 non-null float64\n", 1142 | " 9 vec_10 10 non-null float64\n", 1143 | " 10 vec_11 10 non-null float64\n", 1144 | " 11 vec_12 10 non-null float64\n", 1145 | " 12 vec_13 10 non-null float64\n", 1146 | " 13 vec_14 10 non-null float64\n", 1147 | " 14 vec_15 10 non-null float64\n", 1148 | " 15 vec_16 10 non-null float64\n", 1149 | " 16 vec_17 10 non-null float64\n", 1150 | " 17 vec_18 10 non-null float64\n", 1151 | " 18 vec_19 10 non-null float64\n", 1152 | " 19 vec_20 10 non-null float64\n", 1153 | "dtypes: float64(20)\n", 1154 | "memory usage: 1.7 KB\n" 1155 | ] 1156 | } 1157 | ], 1158 | "source": [ 1159 | "vec_df.info()" 1160 | ] 1161 | }, 1162 | { 1163 | "cell_type": "code", 1164 | "execution_count": 35, 1165 | "metadata": {}, 1166 | "outputs": [], 1167 | "source": [ 1168 | "r1.to_csv('test_r.csv',index=False)" 1169 | ] 1170 | }, 1171 | { 1172 | "cell_type": "code", 1173 | "execution_count": 24, 1174 | "metadata": {}, 1175 | "outputs": [], 1176 | "source": [ 1177 | "r1 = resume.head(10)" 1178 | ] 1179 | }, 1180 | { 1181 | "cell_type": "code", 1182 | "execution_count": 36, 1183 | "metadata": { 1184 | "scrolled": false 1185 | }, 1186 | "outputs": [], 1187 | "source": [ 1188 | "# read each work experience\n", 1189 | "resume['work_experiences'] = resume['work_experiences'].str.lower()\n", 1190 | "\n", 1191 | "resume_all_desc = []\n", 1192 | "for index, rows in resume.iterrows():\n", 1193 | " #print('#@#@#@#@#@@#@#@#@#@##@@#@#@@##@#@#@#@#@#@##@#@#@##@#@@#@#@#')\n", 1194 | " #print(f'resume no. {index}')\n", 1195 | " resume_desc= []\n", 1196 | " #pick work experience col and read it as JSON \n", 1197 | " \n", 1198 | " work = resume['work_experiences'][index]\n", 1199 | " try: result_work = eval(work)\n", 1200 | " except: continue\n", 1201 | " #print(f'resume : {index}')\n", 1202 | " #read description to match with job\n", 1203 | " \n", 1204 | " for i in result_work: \n", 1205 | " w_title_n = (result_work[0][0]['wtitle:']) \n", 1206 | " w_company= (result_work[i][1]['wcompany:'])\n", 1207 | "# resume_desc.append(w_company) \n", 1208 | " w_city= (result_work[i][2]['wcity:'])\n", 1209 | " w_state= (result_work[i][3]['wstate:'])\n", 1210 | " w_duration= (result_work[i][4]['wduration:'])\n", 1211 | " \n", 1212 | " w_descr= (result_work[i][5]['wdescr:'])\n", 1213 | " if (w_descr == 'none'):\n", 1214 | " continue\n", 1215 | " #print(w_descr)\n", 1216 | " #print('**************')\n", 1217 | " resume_desc.append(w_descr + '') \n", 1218 | " \n", 1219 | " #print(resume_desc)\n", 1220 | " resume_all_desc.append(resume_desc)\n", 1221 | "#print(resume_test)\n", 1222 | "resume['experience_desc'] = resume_all_desc" 1223 | ] 1224 | }, 1225 | { 1226 | "cell_type": "code", 1227 | "execution_count": 37, 1228 | "metadata": {}, 1229 | "outputs": [], 1230 | "source": [ 1231 | "#resume.to_csv('wip/resume_sorted5.csv',index=False)" 1232 | ] 1233 | } 1234 | ], 1235 | "metadata": { 1236 | "kernelspec": { 1237 | "display_name": "Python 3", 1238 | "language": "python", 1239 | "name": "python3" 1240 | }, 1241 | "language_info": { 1242 | "codemirror_mode": { 1243 | "name": "ipython", 1244 | "version": 3 1245 | }, 1246 | "file_extension": ".py", 1247 | "mimetype": "text/x-python", 1248 | "name": "python", 1249 | "nbconvert_exporter": "python", 1250 | "pygments_lexer": "ipython3", 1251 | "version": "3.7.6" 1252 | } 1253 | }, 1254 | "nbformat": 4, 1255 | "nbformat_minor": 4 1256 | } 1257 | -------------------------------------------------------------------------------- /Data/Job-Locations/india-city-state.csv: -------------------------------------------------------------------------------- 1 | city_id,city_name,state 2 | 1,Kolhapur,Maharashtra 3 | 2,Port Blair,Andaman & Nicobar Islands 4 | 3,Adilabad,Andhra Pradesh 5 | 4,Adoni,Andhra Pradesh 6 | 5,Amadalavalasa,Andhra Pradesh 7 | 6,Amalapuram,Andhra Pradesh 8 | 7,Anakapalle,Andhra Pradesh 9 | 8,Anantapur,Andhra Pradesh 10 | 9,Badepalle,Andhra Pradesh 11 | 10,Banganapalle,Andhra Pradesh 12 | 11,Bapatla,Andhra Pradesh 13 | 12,Bellampalle,Andhra Pradesh 14 | 13,Bethamcherla,Andhra Pradesh 15 | 14,Bhadrachalam,Andhra Pradesh 16 | 15,Bhainsa,Andhra Pradesh 17 | 16,Bheemunipatnam,Andhra Pradesh 18 | 17,Bhimavaram,Andhra Pradesh 19 | 18,Bhongir,Andhra Pradesh 20 | 19,Bobbili,Andhra Pradesh 21 | 20,Bodhan,Andhra Pradesh 22 | 21,Chilakaluripet,Andhra Pradesh 23 | 22,Chirala,Andhra Pradesh 24 | 23,Chittoor,Andhra Pradesh 25 | 24,Cuddapah,Andhra Pradesh 26 | 25,Devarakonda,Andhra Pradesh 27 | 26,Dharmavaram,Andhra Pradesh 28 | 27,Eluru,Andhra Pradesh 29 | 28,Farooqnagar,Andhra Pradesh 30 | 29,Gadwal,Andhra Pradesh 31 | 30,Gooty,Andhra Pradesh 32 | 31,Gudivada,Andhra Pradesh 33 | 32,Gudur,Andhra Pradesh 34 | 33,Guntakal,Andhra Pradesh 35 | 34,Guntur,Andhra Pradesh 36 | 35,Hanuman Junction,Andhra Pradesh 37 | 36,Hindupur,Andhra Pradesh 38 | 37,Hyderabad,Andhra Pradesh 39 | 38,Ichchapuram,Andhra Pradesh 40 | 39,Jaggaiahpet,Andhra Pradesh 41 | 40,Jagtial,Andhra Pradesh 42 | 41,Jammalamadugu,Andhra Pradesh 43 | 42,Jangaon,Andhra Pradesh 44 | 43,Kadapa,Andhra Pradesh 45 | 44,Kadiri,Andhra Pradesh 46 | 45,Kagaznagar,Andhra Pradesh 47 | 46,Kakinada,Andhra Pradesh 48 | 47,Kalyandurg,Andhra Pradesh 49 | 48,Kamareddy,Andhra Pradesh 50 | 49,Kandukur,Andhra Pradesh 51 | 50,Karimnagar,Andhra Pradesh 52 | 51,Kavali,Andhra Pradesh 53 | 52,Khammam,Andhra Pradesh 54 | 53,Koratla,Andhra Pradesh 55 | 54,Kothagudem,Andhra Pradesh 56 | 55,Kothapeta,Andhra Pradesh 57 | 56,Kovvur,Andhra Pradesh 58 | 57,Kurnool,Andhra Pradesh 59 | 58,Kyathampalle,Andhra Pradesh 60 | 59,Macherla,Andhra Pradesh 61 | 60,Machilipatnam,Andhra Pradesh 62 | 61,Madanapalle,Andhra Pradesh 63 | 62,Mahbubnagar,Andhra Pradesh 64 | 63,Mancherial,Andhra Pradesh 65 | 64,Mandamarri,Andhra Pradesh 66 | 65,Mandapeta,Andhra Pradesh 67 | 66,Manuguru,Andhra Pradesh 68 | 67,Markapur,Andhra Pradesh 69 | 68,Medak,Andhra Pradesh 70 | 69,Miryalaguda,Andhra Pradesh 71 | 70,Mogalthur,Andhra Pradesh 72 | 71,Nagari,Andhra Pradesh 73 | 72,Nagarkurnool,Andhra Pradesh 74 | 73,Nandyal,Andhra Pradesh 75 | 74,Narasapur,Andhra Pradesh 76 | 75,Narasaraopet,Andhra Pradesh 77 | 76,Narayanpet,Andhra Pradesh 78 | 77,Narsipatnam,Andhra Pradesh 79 | 78,Nellore,Andhra Pradesh 80 | 79,Nidadavole,Andhra Pradesh 81 | 80,Nirmal,Andhra Pradesh 82 | 81,Nizamabad,Andhra Pradesh 83 | 82,Nuzvid,Andhra Pradesh 84 | 83,Ongole,Andhra Pradesh 85 | 84,Palacole,Andhra Pradesh 86 | 85,Palasa Kasibugga,Andhra Pradesh 87 | 86,Palwancha,Andhra Pradesh 88 | 87,Parvathipuram,Andhra Pradesh 89 | 88,Pedana,Andhra Pradesh 90 | 89,Peddapuram,Andhra Pradesh 91 | 90,Pithapuram,Andhra Pradesh 92 | 91,Pondur,Andhra pradesh 93 | 92,Ponnur,Andhra Pradesh 94 | 93,Proddatur,Andhra Pradesh 95 | 94,Punganur,Andhra Pradesh 96 | 95,Puttur,Andhra Pradesh 97 | 96,Rajahmundry,Andhra Pradesh 98 | 97,Rajam,Andhra Pradesh 99 | 98,Ramachandrapuram,Andhra Pradesh 100 | 99,Ramagundam,Andhra Pradesh 101 | 100,Rayachoti,Andhra Pradesh 102 | 101,Rayadurg,Andhra Pradesh 103 | 102,Renigunta,Andhra Pradesh 104 | 103,Repalle,Andhra Pradesh 105 | 104,Sadasivpet,Andhra Pradesh 106 | 105,Salur,Andhra Pradesh 107 | 106,Samalkot,Andhra Pradesh 108 | 107,Sangareddy,Andhra Pradesh 109 | 108,Sattenapalle,Andhra Pradesh 110 | 109,Siddipet,Andhra Pradesh 111 | 110,Singapur,Andhra Pradesh 112 | 111,Sircilla,Andhra Pradesh 113 | 112,Srikakulam,Andhra Pradesh 114 | 113,Srikalahasti,Andhra Pradesh 115 | 115,Suryapet,Andhra Pradesh 116 | 116,Tadepalligudem,Andhra Pradesh 117 | 117,Tadpatri,Andhra Pradesh 118 | 118,Tandur,Andhra Pradesh 119 | 119,Tanuku,Andhra Pradesh 120 | 120,Tenali,Andhra Pradesh 121 | 121,Tirupati,Andhra Pradesh 122 | 122,Tuni,Andhra Pradesh 123 | 123,Uravakonda,Andhra Pradesh 124 | 124,Venkatagiri,Andhra Pradesh 125 | 125,Vicarabad,Andhra Pradesh 126 | 126,Vijayawada,Andhra Pradesh 127 | 127,Vinukonda,Andhra Pradesh 128 | 128,Visakhapatnam,Andhra Pradesh 129 | 129,Vizianagaram,Andhra Pradesh 130 | 130,Wanaparthy,Andhra Pradesh 131 | 131,Warangal,Andhra Pradesh 132 | 132,Yellandu,Andhra Pradesh 133 | 133,Yemmiganur,Andhra Pradesh 134 | 134,Yerraguntla,Andhra Pradesh 135 | 135,Zahirabad,Andhra Pradesh 136 | 136,Rajampet,Andhra Pradesh 137 | 137,Along,Arunachal Pradesh 138 | 138,Bomdila,Arunachal Pradesh 139 | 139,Itanagar,Arunachal Pradesh 140 | 140,Naharlagun,Arunachal Pradesh 141 | 141,Pasighat,Arunachal Pradesh 142 | 142,Abhayapuri,Assam 143 | 143,Amguri,Assam 144 | 144,Anandnagaar,Assam 145 | 145,Barpeta,Assam 146 | 146,Barpeta Road,Assam 147 | 147,Bilasipara,Assam 148 | 148,Bongaigaon,Assam 149 | 149,Dhekiajuli,Assam 150 | 150,Dhubri,Assam 151 | 151,Dibrugarh,Assam 152 | 152,Digboi,Assam 153 | 153,Diphu,Assam 154 | 154,Dispur,Assam 155 | 156,Gauripur,Assam 156 | 157,Goalpara,Assam 157 | 158,Golaghat,Assam 158 | 159,Guwahati,Assam 159 | 160,Haflong,Assam 160 | 161,Hailakandi,Assam 161 | 162,Hojai,Assam 162 | 163,Jorhat,Assam 163 | 164,Karimganj,Assam 164 | 165,Kokrajhar,Assam 165 | 166,Lanka,Assam 166 | 167,Lumding,Assam 167 | 168,Mangaldoi,Assam 168 | 169,Mankachar,Assam 169 | 170,Margherita,Assam 170 | 171,Mariani,Assam 171 | 172,Marigaon,Assam 172 | 173,Nagaon,Assam 173 | 174,Nalbari,Assam 174 | 175,North Lakhimpur,Assam 175 | 176,Rangia,Assam 176 | 177,Sibsagar,Assam 177 | 178,Silapathar,Assam 178 | 179,Silchar,Assam 179 | 180,Tezpur,Assam 180 | 181,Tinsukia,Assam 181 | 182,Amarpur,Bihar 182 | 183,Araria,Bihar 183 | 184,Areraj,Bihar 184 | 185,Arrah,Bihar 185 | 186,Asarganj,Bihar 186 | 187,Aurangabad,Bihar 187 | 188,Bagaha,Bihar 188 | 189,Bahadurganj,Bihar 189 | 190,Bairgania,Bihar 190 | 191,Bakhtiarpur,Bihar 191 | 192,Banka,Bihar 192 | 193,Banmankhi Bazar,Bihar 193 | 194,Barahiya,Bihar 194 | 195,Barauli,Bihar 195 | 196,Barbigha,Bihar 196 | 197,Barh,Bihar 197 | 198,Begusarai,Bihar 198 | 199,Behea,Bihar 199 | 200,Bettiah,Bihar 200 | 201,Bhabua,Bihar 201 | 202,Bhagalpur,Bihar 202 | 203,Bihar Sharif,Bihar 203 | 204,Bikramganj,Bihar 204 | 205,Bodh Gaya,Bihar 205 | 206,Buxar,Bihar 206 | 207,Chandan Bara,Bihar 207 | 208,Chanpatia,Bihar 208 | 209,Chhapra,Bihar 209 | 210,Colgong,Bihar 210 | 211,Dalsinghsarai,Bihar 211 | 212,Darbhanga,Bihar 212 | 213,Daudnagar,Bihar 213 | 214,Dehri-on-Sone,Bihar 214 | 215,Dhaka,Bihar 215 | 216,Dighwara,Bihar 216 | 217,Dumraon,Bihar 217 | 218,Fatwah,Bihar 218 | 219,Forbesganj,Bihar 219 | 220,Gaya,Bihar 220 | 221,Gogri Jamalpur,Bihar 221 | 222,Gopalganj,Bihar 222 | 223,Hajipur,Bihar 223 | 224,Hilsa,Bihar 224 | 225,Hisua,Bihar 225 | 226,Islampur,Bihar 226 | 227,Jagdispur,Bihar 227 | 228,Jamalpur,Bihar 228 | 229,Jamui,Bihar 229 | 230,Jehanabad,Bihar 230 | 231,Jhajha,Bihar 231 | 232,Jhanjharpur,Bihar 232 | 233,Jogabani,Bihar 233 | 234,Kanti,Bihar 234 | 235,Katihar,Bihar 235 | 236,Khagaria,Bihar 236 | 237,Kharagpur,Bihar 237 | 238,Kishanganj,Bihar 238 | 239,Lakhisarai,Bihar 239 | 240,Lalganj,Bihar 240 | 241,Madhepura,Bihar 241 | 242,Madhubani,Bihar 242 | 243,Maharajganj,Bihar 243 | 244,Mahnar Bazar,Bihar 244 | 245,Makhdumpur,Bihar 245 | 246,Maner,Bihar 246 | 247,Manihari,Bihar 247 | 248,Marhaura,Bihar 248 | 249,Masaurhi,Bihar 249 | 250,Mirganj,Bihar 250 | 251,Mokameh,Bihar 251 | 252,Motihari,Bihar 252 | 253,Motipur,Bihar 253 | 254,Munger,Bihar 254 | 255,Murliganj,Bihar 255 | 256,Muzaffarpur,Bihar 256 | 257,Narkatiaganj,Bihar 257 | 258,Naugachhia,Bihar 258 | 259,Nawada,Bihar 259 | 260,Nokha,Bihar 260 | 261,Patna,Bihar 261 | 262,Piro,Bihar 262 | 263,Purnia,Bihar 263 | 264,Rafiganj,Bihar 264 | 265,Rajgir,Bihar 265 | 266,Ramnagar,Bihar 266 | 267,Raxaul Bazar,Bihar 267 | 268,Revelganj,Bihar 268 | 269,Rosera,Bihar 269 | 270,Saharsa,Bihar 270 | 271,Samastipur,Bihar 271 | 272,Sasaram,Bihar 272 | 273,Sheikhpura,Bihar 273 | 274,Sheohar,Bihar 274 | 275,Sherghati,Bihar 275 | 276,Silao,Bihar 276 | 277,Sitamarhi,Bihar 277 | 278,Siwan,Bihar 278 | 279,Sonepur,Bihar 279 | 280,Sugauli,Bihar 280 | 281,Sultanganj,Bihar 281 | 282,Supaul,Bihar 282 | 283,Warisaliganj,Bihar 283 | 284,Ahiwara,Chhattisgarh 284 | 285,Akaltara,Chhattisgarh 285 | 286,Ambagarh Chowki,Chhattisgarh 286 | 287,Ambikapur,Chhattisgarh 287 | 288,Arang,Chhattisgarh 288 | 289,Bade Bacheli,Chhattisgarh 289 | 290,Balod,Chhattisgarh 290 | 291,Baloda Bazar,Chhattisgarh 291 | 292,Bemetra,Chhattisgarh 292 | 293,Bhatapara,Chhattisgarh 293 | 294,Bilaspur,Chhattisgarh 294 | 295,Birgaon,Chhattisgarh 295 | 296,Champa,Chhattisgarh 296 | 297,Chirmiri,Chhattisgarh 297 | 298,Dalli-Rajhara,Chhattisgarh 298 | 299,Dhamtari,Chhattisgarh 299 | 300,Dipka,Chhattisgarh 300 | 301,Dongargarh,Chhattisgarh 301 | 302,Durg-Bhilai Nagar,Chhattisgarh 302 | 303,Gobranawapara,Chhattisgarh 303 | 304,Jagdalpur,Chhattisgarh 304 | 305,Janjgir,Chhattisgarh 305 | 306,Jashpurnagar,Chhattisgarh 306 | 307,Kanker,Chhattisgarh 307 | 308,Kawardha,Chhattisgarh 308 | 309,Kondagaon,Chhattisgarh 309 | 310,Korba,Chhattisgarh 310 | 311,Mahasamund,Chhattisgarh 311 | 312,Mahendragarh,Chhattisgarh 312 | 313,Mungeli,Chhattisgarh 313 | 314,Naila Janjgir,Chhattisgarh 314 | 315,Raigarh,Chhattisgarh 315 | 316,Raipur,Chhattisgarh 316 | 317,Rajnandgaon,Chhattisgarh 317 | 318,Sakti,Chhattisgarh 318 | 319,Tilda Newra,Chhattisgarh 319 | 320,Amli,Dadra & Nagar Haveli 320 | 321,Silvassa,Dadra & Nagar Haveli 321 | 322,Daman and Diu,Daman & Diu 322 | 323,Daman and Diu,Daman & Diu 323 | 324,Asola,Delhi 324 | 325,Delhi,Delhi 325 | 326,Aldona,Goa 326 | 327,Curchorem Cacora,Goa 327 | 328,Madgaon,Goa 328 | 329,Mapusa,Goa 329 | 330,Margao,Goa 330 | 331,Marmagao,Goa 331 | 332,Panaji,Goa 332 | 333,Ahmedabad,Gujarat 333 | 334,Amreli,Gujarat 334 | 335,Anand,Gujarat 335 | 336,Ankleshwar,Gujarat 336 | 337,Bharuch,Gujarat 337 | 338,Bhavnagar,Gujarat 338 | 339,Bhuj,Gujarat 339 | 340,Cambay,Gujarat 340 | 341,Dahod,Gujarat 341 | 342,Deesa,Gujarat 342 | 343,Dharampur, India 343 | 344,Dholka,Gujarat 344 | 345,Gandhinagar,Gujarat 345 | 346,Godhra,Gujarat 346 | 347,Himatnagar,Gujarat 347 | 348,Idar,Gujarat 348 | 349,Jamnagar,Gujarat 349 | 350,Junagadh,Gujarat 350 | 351,Kadi,Gujarat 351 | 352,Kalavad,Gujarat 352 | 353,Kalol,Gujarat 353 | 354,Kapadvanj,Gujarat 354 | 355,Karjan,Gujarat 355 | 356,Keshod,Gujarat 356 | 357,Khambhalia,Gujarat 357 | 358,Khambhat,Gujarat 358 | 359,Kheda,Gujarat 359 | 360,Khedbrahma,Gujarat 360 | 361,Kheralu,Gujarat 361 | 362,Kodinar,Gujarat 362 | 363,Lathi,Gujarat 363 | 364,Limbdi,Gujarat 364 | 365,Lunawada,Gujarat 365 | 366,Mahesana,Gujarat 366 | 367,Mahuva,Gujarat 367 | 368,Manavadar,Gujarat 368 | 369,Mandvi,Gujarat 369 | 370,Mangrol,Gujarat 370 | 371,Mansa,Gujarat 371 | 372,Mehmedabad,Gujarat 372 | 373,Modasa,Gujarat 373 | 374,Morvi,Gujarat 374 | 375,Nadiad,Gujarat 375 | 376,Navsari,Gujarat 376 | 377,Padra,Gujarat 377 | 378,Palanpur,Gujarat 378 | 379,Palitana,Gujarat 379 | 380,Pardi,Gujarat 380 | 381,Patan,Gujarat 381 | 382,Petlad,Gujarat 382 | 383,Porbandar,Gujarat 383 | 384,Radhanpur,Gujarat 384 | 385,Rajkot,Gujarat 385 | 386,Rajpipla,Gujarat 386 | 387,Rajula,Gujarat 387 | 388,Ranavav,Gujarat 388 | 389,Rapar,Gujarat 389 | 390,Salaya,Gujarat 390 | 391,Sanand,Gujarat 391 | 392,Savarkundla,Gujarat 392 | 393,Sidhpur,Gujarat 393 | 394,Sihor,Gujarat 394 | 395,Songadh,Gujarat 395 | 396,Surat,Gujarat 396 | 397,Talaja,Gujarat 397 | 398,Thangadh,Gujarat 398 | 399,Tharad,Gujarat 399 | 400,Umbergaon,Gujarat 400 | 401,Umreth,Gujarat 401 | 402,Una,Gujarat 402 | 403,Unjha,Gujarat 403 | 404,Upleta,Gujarat 404 | 405,Vadnagar,Gujarat 405 | 406,Vadodara,Gujarat 406 | 407,Valsad,Gujarat 407 | 408,Vapi,Gujarat 408 | 409,Vapi,Gujarat 409 | 410,Veraval,Gujarat 410 | 411,Vijapur,Gujarat 411 | 412,Viramgam,Gujarat 412 | 413,Visnagar,Gujarat 413 | 414,Vyara,Gujarat 414 | 415,Wadhwan,Gujarat 415 | 416,Wankaner,Gujarat 416 | 417,Adalaj,Gujrat 417 | 418,Adityana,Gujrat 418 | 419,Alang,Gujrat 419 | 420,Ambaji,Gujrat 420 | 421,Ambaliyasan,Gujrat 421 | 422,Andada,Gujrat 422 | 423,Anjar,Gujrat 423 | 424,Anklav,Gujrat 424 | 425,Antaliya,Gujrat 425 | 426,Arambhada,Gujrat 426 | 427,Atul,Gujrat 427 | 428,Ballabhgarh,Hariyana 428 | 429,Ambala,Haryana 429 | 430,Ambala,Haryana 430 | 431,Asankhurd,Haryana 431 | 432,Assandh,Haryana 432 | 433,Ateli,Haryana 433 | 434,Babiyal,Haryana 434 | 435,Bahadurgarh,Haryana 435 | 436,Barwala,Haryana 436 | 437,Bhiwani,Haryana 437 | 438,Charkhi Dadri,Haryana 438 | 439,Cheeka,Haryana 439 | 440,Ellenabad 2,Haryana 440 | 441,Faridabad,Haryana 441 | 442,Fatehabad,Haryana 442 | 443,Ganaur,Haryana 443 | 444,Gharaunda,Haryana 444 | 445,Gohana,Haryana 445 | 446,Gurgaon,Haryana 446 | 447,Haibat(Yamuna Nagar),Haryana 447 | 448,Hansi,Haryana 448 | 449,Hisar,Haryana 449 | 450,Hodal,Haryana 450 | 451,Jhajjar,Haryana 451 | 452,Jind,Haryana 452 | 453,Kaithal,Haryana 453 | 454,Kalan Wali,Haryana 454 | 455,Kalka,Haryana 455 | 456,Karnal,Haryana 456 | 457,Ladwa,Haryana 457 | 458,Mahendragarh,Haryana 458 | 459,Mandi Dabwali,Haryana 459 | 460,Narnaul,Haryana 460 | 461,Narwana,Haryana 461 | 462,Palwal,Haryana 462 | 463,Panchkula,Haryana 463 | 464,Panipat,Haryana 464 | 465,Pehowa,Haryana 465 | 466,Pinjore,Haryana 466 | 467,Rania,Haryana 467 | 468,Ratia,Haryana 468 | 469,Rewari,Haryana 469 | 470,Rohtak,Haryana 470 | 471,Safidon,Haryana 471 | 472,Samalkha,Haryana 472 | 473,Shahbad,Haryana 473 | 474,Sirsa,Haryana 474 | 475,Sohna,Haryana 475 | 476,Sonipat,Haryana 476 | 477,Taraori,Haryana 477 | 478,Thanesar,Haryana 478 | 479,Tohana,Haryana 479 | 480,Yamunanagar,Haryana 480 | 481,Arki,Himachal Pradesh 481 | 482,Baddi,Himachal Pradesh 482 | 483,Bilaspur,Himachal Pradesh 483 | 484,Chamba,Himachal Pradesh 484 | 485,Dalhousie,Himachal Pradesh 485 | 486,Dharamsala,Himachal Pradesh 486 | 487,Hamirpur,Himachal Pradesh 487 | 488,Mandi,Himachal Pradesh 488 | 489,Nahan,Himachal Pradesh 489 | 490,Shimla,Himachal Pradesh 490 | 491,Solan,Himachal Pradesh 491 | 492,Sundarnagar,Himachal Pradesh 492 | 493,Jammu,Jammu & Kashmir 493 | 494,Achabbal,Jammu & Kashmir 494 | 495,Akhnoor,Jammu & Kashmir 495 | 496,Anantnag,Jammu & Kashmir 496 | 497,Arnia,Jammu & Kashmir 497 | 498,Awantipora,Jammu & Kashmir 498 | 499,Bandipore,Jammu & Kashmir 499 | 500,Baramula,Jammu & Kashmir 500 | 501,Kathua,Jammu & Kashmir 501 | 502,Leh,Jammu & Kashmir 502 | 503,Punch,Jammu & Kashmir 503 | 504,Rajauri,Jammu & Kashmir 504 | 505,Sopore,Jammu & Kashmir 505 | 506,Srinagar,Jammu & Kashmir 506 | 507,Udhampur,Jammu & Kashmir 507 | 508,Amlabad,Jharkhand 508 | 509,Ara,Jharkhand 509 | 510,Barughutu,Jharkhand 510 | 511,Bokaro Steel City,Jharkhand 511 | 512,Chaibasa,Jharkhand 512 | 513,Chakradharpur,Jharkhand 513 | 514,Chandrapura,Jharkhand 514 | 515,Chatra,Jharkhand 515 | 516,Chirkunda,Jharkhand 516 | 517,Churi,Jharkhand 517 | 518,Daltonganj,Jharkhand 518 | 519,Deoghar,Jharkhand 519 | 520,Dhanbad,Jharkhand 520 | 521,Dumka,Jharkhand 521 | 522,Garhwa,Jharkhand 522 | 523,Ghatshila,Jharkhand 523 | 524,Giridih,Jharkhand 524 | 525,Godda,Jharkhand 525 | 526,Gomoh,Jharkhand 526 | 527,Gumia,Jharkhand 527 | 528,Gumla,Jharkhand 528 | 529,Hazaribag,Jharkhand 529 | 530,Hussainabad,Jharkhand 530 | 531,Jamshedpur,Jharkhand 531 | 532,Jamtara,Jharkhand 532 | 533,Jhumri Tilaiya,Jharkhand 533 | 534,Khunti,Jharkhand 534 | 535,Lohardaga,Jharkhand 535 | 536,Madhupur,Jharkhand 536 | 537,Mihijam,Jharkhand 537 | 538,Musabani,Jharkhand 538 | 539,Pakaur,Jharkhand 539 | 540,Patratu,Jharkhand 540 | 541,Phusro,Jharkhand 541 | 542,Ramngarh,Jharkhand 542 | 543,Ranchi,Jharkhand 543 | 544,Sahibganj,Jharkhand 544 | 545,Saunda,Jharkhand 545 | 546,Simdega,Jharkhand 546 | 547,Tenu Dam-cum- Kathhara,Jharkhand 547 | 548,Arasikere,Karnataka 548 | 549,Bangalore,Karnataka 549 | 550,Belgaum,Karnataka 550 | 551,Bellary,Karnataka 551 | 552,Chamrajnagar,Karnataka 552 | 553,Chikkaballapur,Karnataka 553 | 554,Chintamani,Karnataka 554 | 555,Chitradurga,Karnataka 555 | 556,Gulbarga,Karnataka 556 | 557,Gundlupet,Karnataka 557 | 558,Hassan,Karnataka 558 | 559,Hospet,Karnataka 559 | 560,Hubli,Karnataka 560 | 561,Karkala,Karnataka 561 | 562,Karwar,Karnataka 562 | 563,Kolar,Karnataka 563 | 564,Kota,Karnataka 564 | 565,Lakshmeshwar,Karnataka 565 | 566,Lingsugur,Karnataka 566 | 567,Maddur,Karnataka 567 | 568,Madhugiri,Karnataka 568 | 569,Madikeri,Karnataka 569 | 570,Magadi,Karnataka 570 | 571,Mahalingpur,Karnataka 571 | 572,Malavalli,Karnataka 572 | 573,Malur,Karnataka 573 | 574,Mandya,Karnataka 574 | 575,Mangalore,Karnataka 575 | 576,Manvi,Karnataka 576 | 577,Mudalgi,Karnataka 577 | 578,Mudbidri,Karnataka 578 | 579,Muddebihal,Karnataka 579 | 580,Mudhol,Karnataka 580 | 581,Mulbagal,Karnataka 581 | 582,Mundargi,Karnataka 582 | 583,Mysore,Karnataka 583 | 584,Nanjangud,Karnataka 584 | 585,Pavagada,Karnataka 585 | 586,Puttur,Karnataka 586 | 587,Rabkavi Banhatti,Karnataka 587 | 588,Raichur,Karnataka 588 | 589,Ramanagaram,Karnataka 589 | 590,Ramdurg,Karnataka 590 | 591,Ranibennur,Karnataka 591 | 592,Robertson Pet,Karnataka 592 | 593,Ron,Karnataka 593 | 594,Sadalgi,Karnataka 594 | 595,Sagar,Karnataka 595 | 596,Sakleshpur,Karnataka 596 | 597,Sandur,Karnataka 597 | 598,Sankeshwar,Karnataka 598 | 599,Saundatti-Yellamma,Karnataka 599 | 600,Savanur,Karnataka 600 | 601,Sedam,Karnataka 601 | 602,Shahabad,Karnataka 602 | 603,Shahpur,Karnataka 603 | 604,Shiggaon,Karnataka 604 | 605,Shikapur,Karnataka 605 | 606,Shimoga,Karnataka 606 | 607,Shorapur,Karnataka 607 | 608,Shrirangapattana,Karnataka 608 | 609,Sidlaghatta,Karnataka 609 | 610,Sindgi,Karnataka 610 | 611,Sindhnur,Karnataka 611 | 612,Sira,Karnataka 612 | 613,Sirsi,Karnataka 613 | 614,Siruguppa,Karnataka 614 | 615,Srinivaspur,Karnataka 615 | 616,Talikota,Karnataka 616 | 617,Tarikere,Karnataka 617 | 618,Tekkalakota,Karnataka 618 | 619,Terdal,Karnataka 619 | 620,Tiptur,Karnataka 620 | 621,Tumkur,Karnataka 621 | 622,Udupi,Karnataka 622 | 623,Vijayapura,Karnataka 623 | 624,Wadi,Karnataka 624 | 625,Yadgir,Karnataka 625 | 626,Adoor,Kerala 626 | 627,Akathiyoor,Kerala 627 | 628,Alappuzha,Kerala 628 | 629,Ancharakandy,Kerala 629 | 630,Aroor,Kerala 630 | 631,Ashtamichira,Kerala 631 | 632,Attingal,Kerala 632 | 633,Avinissery,Kerala 633 | 634,Chalakudy,Kerala 634 | 635,Changanassery,Kerala 635 | 636,Chendamangalam,Kerala 636 | 637,Chengannur,Kerala 637 | 638,Cherthala,Kerala 638 | 639,Cheruthazham,Kerala 639 | 640,Chittur-Thathamangalam,Kerala 640 | 641,Chockli,Kerala 641 | 642,Erattupetta,Kerala 642 | 643,Guruvayoor,Kerala 643 | 644,Irinjalakuda,Kerala 644 | 645,Kadirur,Kerala 645 | 646,Kalliasseri,Kerala 646 | 647,Kalpetta,Kerala 647 | 648,Kanhangad,Kerala 648 | 649,Kanjikkuzhi,Kerala 649 | 650,Kannur,Kerala 650 | 651,Kasaragod,Kerala 651 | 652,Kayamkulam,Kerala 652 | 653,Kochi,Kerala 653 | 654,Kodungallur,Kerala 654 | 655,Kollam,Kerala 655 | 656,Koothuparamba,Kerala 656 | 657,Kothamangalam,Kerala 657 | 658,Kottayam,Kerala 658 | 659,Kozhikode,Kerala 659 | 660,Kunnamkulam,Kerala 660 | 661,Malappuram,Kerala 661 | 662,Mattannur,Kerala 662 | 663,Mavelikkara,Kerala 663 | 664,Mavoor,Kerala 664 | 665,Muvattupuzha,Kerala 665 | 666,Nedumangad,Kerala 666 | 667,Neyyattinkara,Kerala 667 | 668,Ottappalam,Kerala 668 | 669,Palai,Kerala 669 | 670,Palakkad,Kerala 670 | 671,Panniyannur,Kerala 671 | 672,Pappinisseri,Kerala 672 | 673,Paravoor,Kerala 673 | 674,Pathanamthitta,Kerala 674 | 675,Payyannur,Kerala 675 | 676,Peringathur,Kerala 676 | 677,Perinthalmanna,Kerala 677 | 678,Perumbavoor,Kerala 678 | 679,Ponnani,Kerala 679 | 680,Punalur,Kerala 680 | 681,Quilandy,Kerala 681 | 682,Shoranur,Kerala 682 | 683,Taliparamba,Kerala 683 | 684,Thiruvalla,Kerala 684 | 685,Thiruvananthapuram,Kerala 685 | 686,Thodupuzha,Kerala 686 | 687,Thrissur,Kerala 687 | 688,Tirur,Kerala 688 | 689,Vadakara,Kerala 689 | 690,Vaikom,Kerala 690 | 691,Varkala,Kerala 691 | 692,Kavaratti,Lakshadweep 692 | 693,Ashok Nagar,Madhya Pradesh 693 | 694,Balaghat,Madhya Pradesh 694 | 695,Betul,Madhya Pradesh 695 | 696,Bhopal,Madhya Pradesh 696 | 697,Burhanpur,Madhya Pradesh 697 | 698,Chhatarpur,Madhya Pradesh 698 | 699,Dabra,Madhya Pradesh 699 | 700,Datia,Madhya Pradesh 700 | 701,Dewas,Madhya Pradesh 701 | 702,Dhar,Madhya Pradesh 702 | 703,Fatehabad,Madhya Pradesh 703 | 704,Gwalior,Madhya Pradesh 704 | 705,Indore,Madhya Pradesh 705 | 706,Itarsi,Madhya Pradesh 706 | 707,Jabalpur,Madhya Pradesh 707 | 708,Katni,Madhya Pradesh 708 | 709,Kotma,Madhya Pradesh 709 | 710,Lahar,Madhya Pradesh 710 | 711,Lundi,Madhya Pradesh 711 | 712,Maharajpur,Madhya Pradesh 712 | 713,Mahidpur,Madhya Pradesh 713 | 714,Maihar,Madhya Pradesh 714 | 715,Malajkhand,Madhya Pradesh 715 | 716,Manasa,Madhya Pradesh 716 | 717,Manawar,Madhya Pradesh 717 | 718,Mandideep,Madhya Pradesh 718 | 719,Mandla,Madhya Pradesh 719 | 720,Mandsaur,Madhya Pradesh 720 | 721,Mauganj,Madhya Pradesh 721 | 722,Mhow Cantonment,Madhya Pradesh 722 | 723,Mhowgaon,Madhya Pradesh 723 | 724,Morena,Madhya Pradesh 724 | 725,Multai,Madhya Pradesh 725 | 726,Murwara,Madhya Pradesh 726 | 727,Nagda,Madhya Pradesh 727 | 728,Nainpur,Madhya Pradesh 728 | 729,Narsinghgarh,Madhya Pradesh 729 | 730,Narsinghgarh,Madhya Pradesh 730 | 731,Neemuch,Madhya Pradesh 731 | 732,Nepanagar,Madhya Pradesh 732 | 733,Niwari,Madhya Pradesh 733 | 734,Nowgong,Madhya Pradesh 734 | 735,Nowrozabad,Madhya Pradesh 735 | 736,Pachore,Madhya Pradesh 736 | 737,Pali,Madhya Pradesh 737 | 738,Panagar,Madhya Pradesh 738 | 739,Pandhurna,Madhya Pradesh 739 | 740,Panna,Madhya Pradesh 740 | 741,Pasan,Madhya Pradesh 741 | 742,Pipariya,Madhya Pradesh 742 | 743,Pithampur,Madhya Pradesh 743 | 744,Porsa,Madhya Pradesh 744 | 745,Prithvipur,Madhya Pradesh 745 | 746,Raghogarh-Vijaypur,Madhya Pradesh 746 | 747,Rahatgarh,Madhya Pradesh 747 | 748,Raisen,Madhya Pradesh 748 | 749,Rajgarh,Madhya Pradesh 749 | 750,Ratlam,Madhya Pradesh 750 | 751,Rau,Madhya Pradesh 751 | 752,Rehli,Madhya Pradesh 752 | 753,Rewa,Madhya Pradesh 753 | 754,Sabalgarh,Madhya Pradesh 754 | 755,Sagar,Madhya Pradesh 755 | 756,Sanawad,Madhya Pradesh 756 | 757,Sarangpur,Madhya Pradesh 757 | 758,Sarni,Madhya Pradesh 758 | 759,Satna,Madhya Pradesh 759 | 760,Sausar,Madhya Pradesh 760 | 761,Sehore,Madhya Pradesh 761 | 762,Sendhwa,Madhya Pradesh 762 | 763,Seoni,Madhya Pradesh 763 | 764,Seoni-Malwa,Madhya Pradesh 764 | 765,Shahdol,Madhya Pradesh 765 | 766,Shajapur,Madhya Pradesh 766 | 767,Shamgarh,Madhya Pradesh 767 | 768,Sheopur,Madhya Pradesh 768 | 769,Shivpuri,Madhya Pradesh 769 | 770,Shujalpur,Madhya Pradesh 770 | 771,Sidhi,Madhya Pradesh 771 | 772,Sihora,Madhya Pradesh 772 | 773,Singrauli,Madhya Pradesh 773 | 774,Sironj,Madhya Pradesh 774 | 775,Sohagpur,Madhya Pradesh 775 | 776,Tarana,Madhya Pradesh 776 | 777,Tikamgarh,Madhya Pradesh 777 | 778,Ujhani,Madhya Pradesh 778 | 779,Ujjain,Madhya Pradesh 779 | 780,Umaria,Madhya Pradesh 780 | 781,Vidisha,Madhya Pradesh 781 | 782,Wara Seoni,Madhya Pradesh 782 | 783,Ahmednagar,Maharashtra 783 | 784,Akola,Maharashtra 784 | 785,Amravati,Maharashtra 785 | 786,Aurangabad,Maharashtra 786 | 787,Baramati,Maharashtra 787 | 788,Chalisgaon,Maharashtra 788 | 789,Chinchani,Maharashtra 789 | 790,Devgarh,Maharashtra 790 | 791,Dhule,Maharashtra 791 | 792,Dombivli,Maharashtra 792 | 793,Durgapur,Maharashtra 793 | 794,Ichalkaranji,Maharashtra 794 | 795,Jalna,Maharashtra 795 | 796,Kalyan,Maharashtra 796 | 797,Latur,Maharashtra 797 | 798,Loha,Maharashtra 798 | 799,Lonar,Maharashtra 799 | 800,Lonavla,Maharashtra 800 | 801,Mahad,Maharashtra 801 | 802,Mahuli,Maharashtra 802 | 803,Malegaon,Maharashtra 803 | 804,Malkapur,Maharashtra 804 | 805,Manchar,Maharashtra 805 | 806,Mangalvedhe,Maharashtra 806 | 807,Mangrulpir,Maharashtra 807 | 808,Manjlegaon,Maharashtra 808 | 809,Manmad,Maharashtra 809 | 810,Manwath,Maharashtra 810 | 811,Mehkar,Maharashtra 811 | 812,Mhaswad,Maharashtra 812 | 813,Miraj,Maharashtra 813 | 814,Morshi,Maharashtra 814 | 815,Mukhed,Maharashtra 815 | 816,Mul,Maharashtra 816 | 817,Mumbai,Maharashtra 817 | 818,Murtijapur,Maharashtra 818 | 819,Nagpur,Maharashtra 819 | 820,Nalasopara,Maharashtra 820 | 821,Nanded-Waghala,Maharashtra 821 | 822,Nandgaon,Maharashtra 822 | 823,Nandura,Maharashtra 823 | 824,Nandurbar,Maharashtra 824 | 825,Narkhed,Maharashtra 825 | 826,Nashik,Maharashtra 826 | 827,Navi Mumbai,Maharashtra 827 | 828,Nawapur,Maharashtra 828 | 829,Nilanga,Maharashtra 829 | 830,Osmanabad,Maharashtra 830 | 831,Ozar,Maharashtra 831 | 832,Pachora,Maharashtra 832 | 833,Paithan,Maharashtra 833 | 834,Palghar,Maharashtra 834 | 835,Pandharkaoda,Maharashtra 835 | 836,Pandharpur,Maharashtra 836 | 837,Panvel,Maharashtra 837 | 838,Parbhani,Maharashtra 838 | 839,Parli,Maharashtra 839 | 840,Parola,Maharashtra 840 | 841,Partur,Maharashtra 841 | 842,Pathardi,Maharashtra 842 | 843,Pathri,Maharashtra 843 | 844,Patur,Maharashtra 844 | 845,Pauni,Maharashtra 845 | 846,Pen,Maharashtra 846 | 847,Phaltan,Maharashtra 847 | 848,Pulgaon,Maharashtra 848 | 849,Pune,Maharashtra 849 | 850,Purna,Maharashtra 850 | 851,Pusad,Maharashtra 851 | 852,Rahuri,Maharashtra 852 | 853,Rajura,Maharashtra 853 | 854,Ramtek,Maharashtra 854 | 855,Ratnagiri,Maharashtra 855 | 856,Raver,Maharashtra 856 | 857,Risod,Maharashtra 857 | 858,Sailu,Maharashtra 858 | 859,Sangamner,Maharashtra 859 | 860,Sangli,Maharashtra 860 | 861,Sangole,Maharashtra 861 | 862,Sasvad,Maharashtra 862 | 863,Satana,Maharashtra 863 | 864,Satara,Maharashtra 864 | 865,Savner,Maharashtra 865 | 866,Sawantwadi,Maharashtra 866 | 867,Shahade,Maharashtra 867 | 868,Shegaon,Maharashtra 868 | 869,Shendurjana,Maharashtra 869 | 870,Shirdi,Maharashtra 870 | 871,Shirpur-Warwade,Maharashtra 871 | 872,Shirur,Maharashtra 872 | 873,Shrigonda,Maharashtra 873 | 874,Shrirampur,Maharashtra 874 | 875,Sillod,Maharashtra 875 | 876,Sinnar,Maharashtra 876 | 877,Solapur,Maharashtra 877 | 878,Soyagaon,Maharashtra 878 | 879,Talegaon Dabhade,Maharashtra 879 | 880,Talode,Maharashtra 880 | 881,Tasgaon,Maharashtra 881 | 882,Tirora,Maharashtra 882 | 883,Tuljapur,Maharashtra 883 | 884,Tumsar,Maharashtra 884 | 885,Uran,Maharashtra 885 | 886,Uran Islampur,Maharashtra 886 | 887,Wadgaon Road,Maharashtra 887 | 888,Wai,Maharashtra 888 | 889,Wani,Maharashtra 889 | 890,Wardha,Maharashtra 890 | 891,Warora,Maharashtra 891 | 892,Warud,Maharashtra 892 | 893,Washim,Maharashtra 893 | 894,Yevla,Maharashtra 894 | 895,Uchgaon,Maharashtra 895 | 896,Udgir,Maharashtra 896 | 897,Umarga,Maharastra 897 | 898,Umarkhed,Maharastra 898 | 899,Umred,Maharastra 899 | 900,Vadgaon Kasba,Maharastra 900 | 901,Vaijapur,Maharastra 901 | 902,Vasai,Maharastra 902 | 903,Virar,Maharastra 903 | 904,Vita,Maharastra 904 | 905,Yavatmal,Maharastra 905 | 906,Yawal,Maharastra 906 | 907,Imphal,Manipur 907 | 908,Kakching,Manipur 908 | 909,Lilong,Manipur 909 | 910,Mayang Imphal,Manipur 910 | 911,Thoubal,Manipur 911 | 912,Jowai,Meghalaya 912 | 913,Nongstoin,Meghalaya 913 | 914,Shillong,Meghalaya 914 | 915,Tura,Meghalaya 915 | 916,Aizawl,Mizoram 916 | 917,Champhai,Mizoram 917 | 918,Lunglei,Mizoram 918 | 919,Saiha,Mizoram 919 | 920,Dimapur,Nagaland 920 | 921,Kohima,Nagaland 921 | 922,Mokokchung,Nagaland 922 | 923,Tuensang,Nagaland 923 | 924,Wokha,Nagaland 924 | 925,Zunheboto,Nagaland 925 | 950,Anandapur,Orissa 926 | 951,Anugul,Orissa 927 | 952,Asika,Orissa 928 | 953,Balangir,Orissa 929 | 954,Balasore,Orissa 930 | 955,Baleshwar,Orissa 931 | 956,Bamra,Orissa 932 | 957,Barbil,Orissa 933 | 958,Bargarh,Orissa 934 | 959,Bargarh,Orissa 935 | 960,Baripada,Orissa 936 | 961,Basudebpur,Orissa 937 | 962,Belpahar,Orissa 938 | 963,Bhadrak,Orissa 939 | 964,Bhawanipatna,Orissa 940 | 965,Bhuban,Orissa 941 | 966,Bhubaneswar,Orissa 942 | 967,Biramitrapur,Orissa 943 | 968,Brahmapur,Orissa 944 | 969,Brajrajnagar,Orissa 945 | 970,Byasanagar,Orissa 946 | 971,Cuttack,Orissa 947 | 972,Debagarh,Orissa 948 | 973,Dhenkanal,Orissa 949 | 974,Gunupur,Orissa 950 | 975,Hinjilicut,Orissa 951 | 976,Jagatsinghapur,Orissa 952 | 977,Jajapur,Orissa 953 | 978,Jaleswar,Orissa 954 | 979,Jatani,Orissa 955 | 980,Jeypur,Orissa 956 | 981,Jharsuguda,Orissa 957 | 982,Joda,Orissa 958 | 983,Kantabanji,Orissa 959 | 984,Karanjia,Orissa 960 | 985,Kendrapara,Orissa 961 | 986,Kendujhar,Orissa 962 | 987,Khordha,Orissa 963 | 988,Koraput,Orissa 964 | 989,Malkangiri,Orissa 965 | 990,Nabarangapur,Orissa 966 | 991,Paradip,Orissa 967 | 992,Parlakhemundi,Orissa 968 | 993,Pattamundai,Orissa 969 | 994,Phulabani,Orissa 970 | 995,Puri,Orissa 971 | 996,Rairangpur,Orissa 972 | 997,Rajagangapur,Orissa 973 | 998,Raurkela,Orissa 974 | 999,Rayagada,Orissa 975 | 1000,Sambalpur,Orissa 976 | 1001,Soro,Orissa 977 | 1002,Sunabeda,Orissa 978 | 1003,Sundargarh,Orissa 979 | 1004,Talcher,Orissa 980 | 1005,Titlagarh,Orissa 981 | 1006,Umarkote,Orissa 982 | 1007,Karaikal,Pondicherry 983 | 1008,Mahe,Pondicherry 984 | 1009,Pondicherry,Pondicherry 985 | 1010,Yanam,Pondicherry 986 | 1011,Ahmedgarh,Punjab 987 | 1012,Amritsar,Punjab 988 | 1013,Barnala,Punjab 989 | 1014,Batala,Punjab 990 | 1015,Bathinda,Punjab 991 | 1016,Bhagha Purana,Punjab 992 | 1017,Budhlada,Punjab 993 | 1018,Chandigarh,Punjab 994 | 1019,Dasua,Punjab 995 | 1020,Dhuri,Punjab 996 | 1021,Dinanagar,Punjab 997 | 1022,Faridkot,Punjab 998 | 1023,Fazilka,Punjab 999 | 1024,Firozpur,Punjab 1000 | 1025,Firozpur Cantt.,Punjab 1001 | 1026,Giddarbaha,Punjab 1002 | 1027,Gobindgarh,Punjab 1003 | 1028,Gurdaspur,Punjab 1004 | 1029,Hoshiarpur,Punjab 1005 | 1030,Jagraon,Punjab 1006 | 1031,Jaitu,Punjab 1007 | 1032,Jalalabad,Punjab 1008 | 1033,Jalandhar,Punjab 1009 | 1034,Jalandhar Cantt.,Punjab 1010 | 1035,Jandiala,Punjab 1011 | 1036,Kapurthala,Punjab 1012 | 1037,Karoran,Punjab 1013 | 1038,Kartarpur,Punjab 1014 | 1039,Khanna,Punjab 1015 | 1040,Kharar,Punjab 1016 | 1041,Kot Kapura,Punjab 1017 | 1042,Kurali,Punjab 1018 | 1043,Longowal,Punjab 1019 | 1044,Ludhiana,Punjab 1020 | 1045,Malerkotla,Punjab 1021 | 1046,Malout,Punjab 1022 | 1047,Mansa,Punjab 1023 | 1048,Maur,Punjab 1024 | 1049,Moga,Punjab 1025 | 1050,Mohali,Punjab 1026 | 1051,Morinda,Punjab 1027 | 1052,Mukerian,Punjab 1028 | 1053,Muktsar,Punjab 1029 | 1054,Nabha,Punjab 1030 | 1055,Nakodar,Punjab 1031 | 1056,Nangal,Punjab 1032 | 1057,Nawanshahr,Punjab 1033 | 1058,Pathankot,Punjab 1034 | 1059,Patiala,Punjab 1035 | 1060,Patran,Punjab 1036 | 1061,Patti,Punjab 1037 | 1062,Phagwara,Punjab 1038 | 1063,Phillaur,Punjab 1039 | 1064,Qadian,Punjab 1040 | 1065,Raikot,Punjab 1041 | 1066,Rajpura,Punjab 1042 | 1067,Rampura Phul,Punjab 1043 | 1068,Rupnagar,Punjab 1044 | 1069,Samana,Punjab 1045 | 1070,Sangrur,Punjab 1046 | 1071,Sirhind Fatehgarh Sahib,Punjab 1047 | 1072,Sujanpur,Punjab 1048 | 1073,Sunam,Punjab 1049 | 1074,Talwara,Punjab 1050 | 1075,Tarn Taran,Punjab 1051 | 1076,Urmar Tanda,Punjab 1052 | 1077,Zira,Punjab 1053 | 1078,Zirakpur,Punjab 1054 | 1079,Bali,Rajasthan 1055 | 1080,Banswara,Rajastan 1056 | 1081,Ajmer,Rajasthan 1057 | 1082,Alwar,Rajasthan 1058 | 1083,Bandikui,Rajasthan 1059 | 1084,Baran,Rajasthan 1060 | 1085,Barmer,Rajasthan 1061 | 1086,Bikaner,Rajasthan 1062 | 1087,Fatehpur,Rajasthan 1063 | 1088,Jaipur,Rajasthan 1064 | 1089,Jaisalmer,Rajasthan 1065 | 1090,Jodhpur,Rajasthan 1066 | 1091,Kota,Rajasthan 1067 | 1092,Lachhmangarh,Rajasthan 1068 | 1093,Ladnu,Rajasthan 1069 | 1094,Lakheri,Rajasthan 1070 | 1095,Lalsot,Rajasthan 1071 | 1096,Losal,Rajasthan 1072 | 1097,Makrana,Rajasthan 1073 | 1098,Malpura,Rajasthan 1074 | 1099,Mandalgarh,Rajasthan 1075 | 1100,Mandawa,Rajasthan 1076 | 1101,Mangrol,Rajasthan 1077 | 1102,Merta City,Rajasthan 1078 | 1103,Mount Abu,Rajasthan 1079 | 1104,Nadbai,Rajasthan 1080 | 1105,Nagar,Rajasthan 1081 | 1106,Nagaur,Rajasthan 1082 | 1107,Nargund,Rajasthan 1083 | 1108,Nasirabad,Rajasthan 1084 | 1109,Nathdwara,Rajasthan 1085 | 1110,Navalgund,Rajasthan 1086 | 1111,Nawalgarh,Rajasthan 1087 | 1112,Neem-Ka-Thana,Rajasthan 1088 | 1113,Nelamangala,Rajasthan 1089 | 1114,Nimbahera,Rajasthan 1090 | 1115,Nipani,Rajasthan 1091 | 1116,Niwai,Rajasthan 1092 | 1117,Nohar,Rajasthan 1093 | 1118,Nokha,Rajasthan 1094 | 1119,Pali,Rajasthan 1095 | 1120,Phalodi,Rajasthan 1096 | 1121,Phulera,Rajasthan 1097 | 1122,Pilani,Rajasthan 1098 | 1123,Pilibanga,Rajasthan 1099 | 1124,Pindwara,Rajasthan 1100 | 1125,Pipar City,Rajasthan 1101 | 1126,Prantij,Rajasthan 1102 | 1127,Pratapgarh,Rajasthan 1103 | 1128,Raisinghnagar,Rajasthan 1104 | 1129,Rajakhera,Rajasthan 1105 | 1130,Rajaldesar,Rajasthan 1106 | 1131,Rajgarh (Alwar),Rajasthan 1107 | 1132,Rajgarh (Churu,Rajasthan 1108 | 1133,Rajsamand,Rajasthan 1109 | 1134,Ramganj Mandi,Rajasthan 1110 | 1135,Ramngarh,Rajasthan 1111 | 1136,Ratangarh,Rajasthan 1112 | 1137,Rawatbhata,Rajasthan 1113 | 1138,Rawatsar,Rajasthan 1114 | 1139,Reengus,Rajasthan 1115 | 1140,Sadri,Rajasthan 1116 | 1141,Sadulshahar,Rajasthan 1117 | 1142,Sagwara,Rajasthan 1118 | 1143,Sambhar,Rajasthan 1119 | 1144,Sanchore,Rajasthan 1120 | 1145,Sangaria,Rajasthan 1121 | 1146,Sardarshahar,Rajasthan 1122 | 1147,Sawai Madhopur,Rajasthan 1123 | 1148,Shahpura,Rajasthan 1124 | 1149,Shahpura,Rajasthan 1125 | 1150,Sheoganj,Rajasthan 1126 | 1151,Sikar,Rajasthan 1127 | 1152,Sirohi,Rajasthan 1128 | 1153,Sojat,Rajasthan 1129 | 1154,Sri Madhopur,Rajasthan 1130 | 1155,Sujangarh,Rajasthan 1131 | 1156,Sumerpur,Rajasthan 1132 | 1157,Suratgarh,Rajasthan 1133 | 1158,Taranagar,Rajasthan 1134 | 1159,Todabhim,Rajasthan 1135 | 1160,Todaraisingh,Rajasthan 1136 | 1161,Tonk,Rajasthan 1137 | 1162,Udaipur,Rajasthan 1138 | 1163,Udaipurwati,Rajasthan 1139 | 1164,Vijainagar,Rajasthan 1140 | 1165,Gangtok,Sikkim 1141 | 1166,Calcutta,West Bengal 1142 | 1167,Arakkonam,Tamil Nadu 1143 | 1168,Arcot,Tamil Nadu 1144 | 1169,Aruppukkottai,Tamil Nadu 1145 | 1170,Bhavani,Tamil Nadu 1146 | 1171,Chengalpattu,Tamil Nadu 1147 | 1172,Chennai,Tamil Nadu 1148 | 1173,Chinna salem,Tamil nadu 1149 | 1174,Coimbatore,Tamil Nadu 1150 | 1175,Coonoor,Tamil Nadu 1151 | 1176,Cuddalore,Tamil Nadu 1152 | 1177,Dharmapuri,Tamil Nadu 1153 | 1178,Dindigul,Tamil Nadu 1154 | 1179,Erode,Tamil Nadu 1155 | 1180,Gudalur,Tamil Nadu 1156 | 1181,Gudalur,Tamil Nadu 1157 | 1182,Gudalur,Tamil Nadu 1158 | 1183,Kanchipuram,Tamil Nadu 1159 | 1184,Karaikudi,Tamil Nadu 1160 | 1185,Karungal,Tamil Nadu 1161 | 1186,Karur,Tamil Nadu 1162 | 1187,Kollankodu,Tamil Nadu 1163 | 1188,Lalgudi,Tamil Nadu 1164 | 1189,Madurai,Tamil Nadu 1165 | 1190,Nagapattinam,Tamil Nadu 1166 | 1191,Nagercoil,Tamil Nadu 1167 | 1192,Namagiripettai,Tamil Nadu 1168 | 1193,Namakkal,Tamil Nadu 1169 | 1194,Nandivaram-Guduvancheri,Tamil Nadu 1170 | 1195,Nanjikottai,Tamil Nadu 1171 | 1196,Natham,Tamil Nadu 1172 | 1197,Nellikuppam,Tamil Nadu 1173 | 1198,Neyveli,Tamil Nadu 1174 | 1199,O, 1175 | 1200,Oddanchatram,Tamil Nadu 1176 | 1201,P.N.Patti,Tamil Nadu 1177 | 1202,Pacode,Tamil Nadu 1178 | 1203,Padmanabhapuram,Tamil Nadu 1179 | 1204,Palani,Tamil Nadu 1180 | 1205,Palladam,Tamil Nadu 1181 | 1206,Pallapatti,Tamil Nadu 1182 | 1207,Pallikonda,Tamil Nadu 1183 | 1208,Panagudi,Tamil Nadu 1184 | 1209,Panruti,Tamil Nadu 1185 | 1210,Paramakudi,Tamil Nadu 1186 | 1211,Parangipettai,Tamil Nadu 1187 | 1212,Pattukkottai,Tamil Nadu 1188 | 1213,Perambalur,Tamil Nadu 1189 | 1214,Peravurani,Tamil Nadu 1190 | 1215,Periyakulam,Tamil Nadu 1191 | 1216,Periyasemur,Tamil Nadu 1192 | 1217,Pernampattu,Tamil Nadu 1193 | 1218,Pollachi,Tamil Nadu 1194 | 1219,Polur,Tamil Nadu 1195 | 1220,Ponneri,Tamil Nadu 1196 | 1221,Pudukkottai,Tamil Nadu 1197 | 1222,Pudupattinam,Tamil Nadu 1198 | 1223,Puliyankudi,Tamil Nadu 1199 | 1224,Punjaipugalur,Tamil Nadu 1200 | 1225,Rajapalayam,Tamil Nadu 1201 | 1226,Ramanathapuram,Tamil Nadu 1202 | 1227,Rameshwaram,Tamil Nadu 1203 | 1228,Rasipuram,Tamil Nadu 1204 | 1229,Salem,Tamil Nadu 1205 | 1230,Sankarankoil,Tamil Nadu 1206 | 1231,Sankari,Tamil Nadu 1207 | 1232,Sathyamangalam,Tamil Nadu 1208 | 1233,Sattur,Tamil Nadu 1209 | 1234,Shenkottai,Tamil Nadu 1210 | 1235,Sholavandan,Tamil Nadu 1211 | 1236,Sholingur,Tamil Nadu 1212 | 1237,Sirkali,Tamil Nadu 1213 | 1238,Sivaganga,Tamil Nadu 1214 | 1239,Sivagiri,Tamil Nadu 1215 | 1240,Sivakasi,Tamil Nadu 1216 | 1241,Srivilliputhur,Tamil Nadu 1217 | 1242,Surandai,Tamil Nadu 1218 | 1243,Suriyampalayam,Tamil Nadu 1219 | 1244,Tenkasi,Tamil Nadu 1220 | 1245,Thammampatti,Tamil Nadu 1221 | 1246,Thanjavur,Tamil Nadu 1222 | 1247,Tharamangalam,Tamil Nadu 1223 | 1248,Tharangambadi,Tamil Nadu 1224 | 1249,Theni Allinagaram,Tamil Nadu 1225 | 1250,Thirumangalam,Tamil Nadu 1226 | 1251,Thirunindravur,Tamil Nadu 1227 | 1252,Thiruparappu,Tamil Nadu 1228 | 1253,Thirupuvanam,Tamil Nadu 1229 | 1254,Thiruthuraipoondi,Tamil Nadu 1230 | 1255,Thiruvallur,Tamil Nadu 1231 | 1256,Thiruvarur,Tamil Nadu 1232 | 1257,Thoothukudi,Tamil Nadu 1233 | 1258,Thuraiyur,Tamil Nadu 1234 | 1259,Tindivanam,Tamil Nadu 1235 | 1260,Tiruchendur,Tamil Nadu 1236 | 1261,Tiruchengode,Tamil Nadu 1237 | 1262,Tiruchirappalli,Tamil Nadu 1238 | 1263,Tirukalukundram,Tamil Nadu 1239 | 1264,Tirukkoyilur,Tamil Nadu 1240 | 1265,Tirunelveli,Tamil Nadu 1241 | 1266,Tirupathur,Tamil Nadu 1242 | 1267,Tirupathur,Tamil Nadu 1243 | 1268,Tiruppur,Tamil Nadu 1244 | 1269,Tiruttani,Tamil Nadu 1245 | 1270,Tiruvannamalai,Tamil Nadu 1246 | 1271,Tiruvethipuram,Tamil Nadu 1247 | 1272,Tittakudi,Tamil Nadu 1248 | 1273,Udhagamandalam,Tamil Nadu 1249 | 1274,Udumalaipettai,Tamil Nadu 1250 | 1275,Unnamalaikadai,Tamil Nadu 1251 | 1276,Usilampatti,Tamil Nadu 1252 | 1277,Uthamapalayam,Tamil Nadu 1253 | 1278,Uthiramerur,Tamil Nadu 1254 | 1279,Vadakkuvalliyur,Tamil Nadu 1255 | 1280,Vadalur,Tamil Nadu 1256 | 1281,Vadipatti,Tamil Nadu 1257 | 1282,Valparai,Tamil Nadu 1258 | 1283,Vandavasi,Tamil Nadu 1259 | 1284,Vaniyambadi,Tamil Nadu 1260 | 1285,Vedaranyam,Tamil Nadu 1261 | 1286,Vellakoil,Tamil Nadu 1262 | 1287,Vellore,Tamil Nadu 1263 | 1288,Vikramasingapuram,Tamil Nadu 1264 | 1289,Viluppuram,Tamil Nadu 1265 | 1290,Virudhachalam,Tamil Nadu 1266 | 1291,Virudhunagar,Tamil Nadu 1267 | 1292,Viswanatham,Tamil Nadu 1268 | 1293,Agartala,Tripura 1269 | 1294,Badharghat,Tripura 1270 | 1295,Dharmanagar,Tripura 1271 | 1296,Indranagar,Tripura 1272 | 1297,Jogendranagar,Tripura 1273 | 1298,Kailasahar,Tripura 1274 | 1299,Khowai,Tripura 1275 | 1300,Pratapgarh,Tripura 1276 | 1301,Udaipur,Tripura 1277 | 1302,Achhnera,Uttar Pradesh 1278 | 1303,Adari,Uttar Pradesh 1279 | 1304,Agra,Uttar Pradesh 1280 | 1305,Aligarh,Uttar Pradesh 1281 | 1306,Allahabad,Uttar Pradesh 1282 | 1307,Amroha,Uttar Pradesh 1283 | 1308,Azamgarh,Uttar Pradesh 1284 | 1309,Bahraich,Uttar Pradesh 1285 | 1310,Ballia,Uttar Pradesh 1286 | 1311,Balrampur,Uttar Pradesh 1287 | 1312,Banda,Uttar Pradesh 1288 | 1313,Bareilly,Uttar Pradesh 1289 | 1314,Chandausi,Uttar Pradesh 1290 | 1315,Dadri,Uttar Pradesh 1291 | 1316,Deoria,Uttar Pradesh 1292 | 1317,Etawah,Uttar Pradesh 1293 | 1318,Fatehabad,Uttar Pradesh 1294 | 1319,Fatehpur,Uttar Pradesh 1295 | 1320,Fatehpur,Uttar Pradesh 1296 | 1321,Greater Noida,Uttar Pradesh 1297 | 1322,Hamirpur,Uttar Pradesh 1298 | 1323,Hardoi,Uttar Pradesh 1299 | 1324,Jajmau,Uttar Pradesh 1300 | 1325,Jaunpur,Uttar Pradesh 1301 | 1326,Jhansi,Uttar Pradesh 1302 | 1327,Kalpi,Uttar Pradesh 1303 | 1328,Kanpur,Uttar Pradesh 1304 | 1329,Kota,Uttar Pradesh 1305 | 1330,Laharpur,Uttar Pradesh 1306 | 1331,Lakhimpur,Uttar Pradesh 1307 | 1332,Lal Gopalganj Nindaura,Uttar Pradesh 1308 | 1333,Lalganj,Uttar Pradesh 1309 | 1334,Lalitpur,Uttar Pradesh 1310 | 1335,Lar,Uttar Pradesh 1311 | 1336,Loni,Uttar Pradesh 1312 | 1337,Lucknow,Uttar Pradesh 1313 | 1338,Mathura,Uttar Pradesh 1314 | 1339,Meerut,Uttar Pradesh 1315 | 1340,Modinagar,Uttar Pradesh 1316 | 1341,Muradnagar,Uttar Pradesh 1317 | 1342,Nagina,Uttar Pradesh 1318 | 1343,Najibabad,Uttar Pradesh 1319 | 1344,Nakur,Uttar Pradesh 1320 | 1345,Nanpara,Uttar Pradesh 1321 | 1346,Naraura,Uttar Pradesh 1322 | 1347,Naugawan Sadat,Uttar Pradesh 1323 | 1348,Nautanwa,Uttar Pradesh 1324 | 1349,Nawabganj,Uttar Pradesh 1325 | 1350,Nehtaur,Uttar Pradesh 1326 | 1351,NOIDA,Uttar Pradesh 1327 | 1352,Noorpur,Uttar Pradesh 1328 | 1353,Obra,Uttar Pradesh 1329 | 1354,Orai,Uttar Pradesh 1330 | 1355,Padrauna,Uttar Pradesh 1331 | 1356,Palia Kalan,Uttar Pradesh 1332 | 1357,Parasi,Uttar Pradesh 1333 | 1358,Phulpur,Uttar Pradesh 1334 | 1359,Pihani,Uttar Pradesh 1335 | 1360,Pilibhit,Uttar Pradesh 1336 | 1361,Pilkhuwa,Uttar Pradesh 1337 | 1362,Powayan,Uttar Pradesh 1338 | 1363,Pukhrayan,Uttar Pradesh 1339 | 1364,Puranpur,Uttar Pradesh 1340 | 1365,Purquazi,Uttar Pradesh 1341 | 1366,Purwa,Uttar Pradesh 1342 | 1367,Rae Bareli,Uttar Pradesh 1343 | 1368,Rampur,Uttar Pradesh 1344 | 1369,Rampur Maniharan,Uttar Pradesh 1345 | 1370,Rasra,Uttar Pradesh 1346 | 1371,Rath,Uttar Pradesh 1347 | 1372,Renukoot,Uttar Pradesh 1348 | 1373,Reoti,Uttar Pradesh 1349 | 1374,Robertsganj,Uttar Pradesh 1350 | 1375,Rudauli,Uttar Pradesh 1351 | 1376,Rudrapur,Uttar Pradesh 1352 | 1377,Sadabad,Uttar Pradesh 1353 | 1378,Safipur,Uttar Pradesh 1354 | 1379,Saharanpur,Uttar Pradesh 1355 | 1380,Sahaspur,Uttar Pradesh 1356 | 1381,Sahaswan,Uttar Pradesh 1357 | 1382,Sahawar,Uttar Pradesh 1358 | 1383,Sahjanwa,Uttar Pradesh 1359 | 1384,Saidpur, Ghazipur 1360 | 1385,Sambhal,Uttar Pradesh 1361 | 1386,Samdhan,Uttar Pradesh 1362 | 1387,Samthar,Uttar Pradesh 1363 | 1388,Sandi,Uttar Pradesh 1364 | 1389,Sandila,Uttar Pradesh 1365 | 1390,Sardhana,Uttar Pradesh 1366 | 1391,Seohara,Uttar Pradesh 1367 | 1392,Shahabad, Hardoi 1368 | 1393,Shahabad, Rampur 1369 | 1394,Shahganj,Uttar Pradesh 1370 | 1395,Shahjahanpur,Uttar Pradesh 1371 | 1396,Shamli,Uttar Pradesh 1372 | 1397,Shamsabad, Agra 1373 | 1398,Shamsabad, Farrukhabad 1374 | 1399,Sherkot,Uttar Pradesh 1375 | 1400,Shikarpur, Bulandshahr 1376 | 1401,Shikohabad,Uttar Pradesh 1377 | 1402,Shishgarh,Uttar Pradesh 1378 | 1403,Siana,Uttar Pradesh 1379 | 1404,Sikanderpur,Uttar Pradesh 1380 | 1405,Sikandra Rao,Uttar Pradesh 1381 | 1406,Sikandrabad,Uttar Pradesh 1382 | 1407,Sirsaganj,Uttar Pradesh 1383 | 1408,Sirsi,Uttar Pradesh 1384 | 1409,Sitapur,Uttar Pradesh 1385 | 1410,Soron,Uttar Pradesh 1386 | 1411,Suar,Uttar Pradesh 1387 | 1412,Sultanpur,Uttar Pradesh 1388 | 1413,Sumerpur,Uttar Pradesh 1389 | 1414,Tanda,Uttar Pradesh 1390 | 1415,Tanda,Uttar Pradesh 1391 | 1416,Tetri Bazar,Uttar Pradesh 1392 | 1417,Thakurdwara,Uttar Pradesh 1393 | 1418,Thana Bhawan,Uttar Pradesh 1394 | 1419,Tilhar,Uttar Pradesh 1395 | 1420,Tirwaganj,Uttar Pradesh 1396 | 1421,Tulsipur,Uttar Pradesh 1397 | 1422,Tundla,Uttar Pradesh 1398 | 1423,Unnao,Uttar Pradesh 1399 | 1424,Utraula,Uttar Pradesh 1400 | 1425,Varanasi,Uttar Pradesh 1401 | 1426,Vrindavan,Uttar Pradesh 1402 | 1427,Warhapur,Uttar Pradesh 1403 | 1428,Zaidpur,Uttar Pradesh 1404 | 1429,Zamania,Uttar Pradesh 1405 | 1430,Almora,Uttarakhand 1406 | 1431,Bazpur,Uttarakhand 1407 | 1432,Chamba,Uttarakhand 1408 | 1433,Dehradun,Uttarakhand 1409 | 1434,Haldwani,Uttarakhand 1410 | 1435,Haridwar,Uttarakhand 1411 | 1436,Jaspur,Uttarakhand 1412 | 1437,Kashipur,Uttarakhand 1413 | 1438,kichha,Uttarakhand 1414 | 1439,Kotdwara,Uttarakhand 1415 | 1440,Manglaur,Uttarakhand 1416 | 1441,Mussoorie,Uttarakhand 1417 | 1442,Nagla,Uttarakhand 1418 | 1443,Nainital,Uttarakhand 1419 | 1444,Pauri,Uttarakhand 1420 | 1445,Pithoragarh,Uttarakhand 1421 | 1446,Ramnagar,Uttarakhand 1422 | 1447,Rishikesh,Uttarakhand 1423 | 1448,Roorkee,Uttarakhand 1424 | 1449,Rudrapur,Uttarakhand 1425 | 1450,Sitarganj,Uttarakhand 1426 | 1451,Tehri,Uttarakhand 1427 | 1452,Muzaffarnagar,Uttar Pradesh 1428 | 1453,Adra, Purulia 1429 | 1454,Alipurduar,West Bengal 1430 | 1455,Arambagh,West Bengal 1431 | 1456,Asansol,West Bengal 1432 | 1457,Baharampur,West Bengal 1433 | 1458,Bally,West Bengal 1434 | 1459,Balurghat,West Bengal 1435 | 1460,Bankura,West Bengal 1436 | 1461,Barakar,West Bengal 1437 | 1462,Barasat,West Bengal 1438 | 1463,Bardhaman,West Bengal 1439 | 1464,Bidhan Nagar,West Bengal 1440 | 1465,Chinsura,West Bengal 1441 | 1466,Contai,West Bengal 1442 | 1467,Cooch Behar,West Bengal 1443 | 1468,Darjeeling,West Bengal 1444 | 1469,Durgapur,West Bengal 1445 | 1470,Haldia,West Bengal 1446 | 1471,Howrah,West Bengal 1447 | 1472,Islampur,West Bengal 1448 | 1473,Jhargram,West Bengal 1449 | 1474,Kharagpur,West Bengal 1450 | 1475,Kolkata,West Bengal 1451 | 1476,Mainaguri,West Bengal 1452 | 1477,Mal,West Bengal 1453 | 1478,Mathabhanga,West Bengal 1454 | 1479,Medinipur,West Bengal 1455 | 1480,Memari,West Bengal 1456 | 1481,Monoharpur,West Bengal 1457 | 1482,Murshidabad,West Bengal 1458 | 1483,Nabadwip,West Bengal 1459 | 1484,Naihati,West Bengal 1460 | 1485,Panchla,West Bengal 1461 | 1486,Pandua,West Bengal 1462 | 1487,Paschim Punropara,West Bengal 1463 | 1488,Purulia,West Bengal 1464 | 1489,Raghunathpur,West Bengal 1465 | 1490,Raiganj,West Bengal 1466 | 1491,Rampurhat,West Bengal 1467 | 1492,Ranaghat,West Bengal 1468 | 1493,Sainthia,West Bengal 1469 | 1494,Santipur,West Bengal 1470 | 1495,Siliguri,West Bengal 1471 | 1496,Sonamukhi,West Bengal 1472 | 1497,Srirampore,West Bengal 1473 | 1498,Suri,West Bengal 1474 | 1499,Taki,West Bengal 1475 | 1500,Tamluk,West Bengal 1476 | 1501,Tarakeswar,West Bengal 1477 | 1502,Chikmagalur,Karnataka 1478 | 1503,Davanagere,Karnataka 1479 | 1504,Dharwad,Karnataka 1480 | 1505,Gadag,Karnataka 1481 | 1506,Chennai,Tamil Nadu 1482 | 1507,Coimbatore,Tamil Nadu 1483 | 1508,Bengaluru,Karnataka 1484 | -------------------------------------------------------------------------------- /2-Preprocessing_and_Modelling/Pre-processing Jobs for modellingv2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Job - Pre-processing and Modelling Iteration final" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# libraries import\n", 17 | "\n", 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "import json\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "%matplotlib inline\n", 23 | "\n", 24 | "import re\n", 25 | "import datetime\n", 26 | "from datetime import date\n", 27 | "from time import strptime\n", 28 | "\n", 29 | "import RAKE as rake\n", 30 | "import operator\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "######################################################################################\n", 38 | "\n", 39 | "# Working on Job description Data\n", 40 | "###################################################################################### " 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "# reading my sorted job csv\n", 50 | "job = pd.read_csv('WIP/sorted_jobs_master_new.csv')" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 3, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "\n", 63 | "RangeIndex: 38941 entries, 0 to 38940\n", 64 | "Data columns (total 17 columns):\n", 65 | " # Column Non-Null Count Dtype \n", 66 | "--- ------ -------------- ----- \n", 67 | " 0 company 38941 non-null object \n", 68 | " 1 education 38941 non-null object \n", 69 | " 2 experience 38941 non-null int64 \n", 70 | " 3 industry 38941 non-null object \n", 71 | " 4 jobdescription 38941 non-null object \n", 72 | " 5 jobtitle 38941 non-null object \n", 73 | " 6 payrate 38941 non-null object \n", 74 | " 7 skills 38941 non-null object \n", 75 | " 8 experience_range 38941 non-null int64 \n", 76 | " 9 industry_enum 38941 non-null int64 \n", 77 | " 10 Salary_range 38941 non-null float64\n", 78 | " 11 j_id 38941 non-null int64 \n", 79 | " 12 is_grad 38941 non-null int64 \n", 80 | " 13 is_postgrad 38941 non-null int64 \n", 81 | " 14 is_doc 38941 non-null int64 \n", 82 | " 15 location 38941 non-null int64 \n", 83 | " 16 loc_name 38941 non-null object \n", 84 | "dtypes: float64(1), int64(8), object(8)\n", 85 | "memory usage: 5.1+ MB\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "job.info()" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "###########################################################################################################################\n", 98 | "# Understanding Job_description column (using NLP)\n", 99 | "###########################################################################################################################\n" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "# 1. NLP - NLTK application to understand most used words" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 4, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "name": "stderr", 116 | "output_type": "stream", 117 | "text": [ 118 | "[nltk_data] Downloading package wordnet to\n", 119 | "[nltk_data] C:\\Users\\shail\\AppData\\Roaming\\nltk_data...\n", 120 | "[nltk_data] Package wordnet is already up-to-date!\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "#Import all the dependencies\n", 126 | "import nltk\n", 127 | "nltk.download('wordnet')\n", 128 | "from nltk.stem import WordNetLemmatizer\n", 129 | "wordnet_lemmatizer = WordNetLemmatizer()\n", 130 | "from nltk.corpus import stopwords\n", 131 | "from nltk.tokenize import word_tokenize \n", 132 | "set(stopwords.words('english'))\n", 133 | "# nltk.download('abc')\n", 134 | "# from nltk.corpus import abc\n", 135 | "# from nltk import RegexpTokenizer\n", 136 | "\n", 137 | "import string\n", 138 | "stopwords = set(stopwords.words(\"english\"))\n", 139 | "import gensim\n", 140 | "from gensim.test.utils import common_texts\n", 141 | "from gensim.models.doc2vec import Doc2Vec, TaggedDocument" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 5, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "# defining tokenizer \n", 151 | "def my_tokenizer(text):\n", 152 | " # 1. split at whitespace\n", 153 | " text = text.split(' ')\n", 154 | " \n", 155 | " #2. lowercase\n", 156 | " text = [word.lower() for word in text]\n", 157 | " \n", 158 | " #3. Remove puncutation\n", 159 | " #table to replace puncuation\n", 160 | " punc_table = str.maketrans('','',string.punctuation)\n", 161 | " \n", 162 | " #call translate()\n", 163 | " text = [word.translate(punc_table) for word in text]\n", 164 | " \n", 165 | " #4. remove stopwords\n", 166 | " text = [word for word in text if word not in stopwords]\n", 167 | " \n", 168 | " #5. lemmmatize\n", 169 | " lemmatizer = WordNetLemmatizer()\n", 170 | " \n", 171 | " text = [lemmatizer.lemmatize(word, pos='v') for word in text]\n", 172 | " text = [lemmatizer.lemmatize(word, pos='n') for word in text]\n", 173 | " text = [lemmatizer.lemmatize(word, pos='a') for word in text]\n", 174 | " \n", 175 | " #6. remove empty strings\n", 176 | " text = [word for word in text if word !='']\n", 177 | " \n", 178 | " return text " 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "# 2. NLP - TF-IDF application to get a list of all tokens \n", 186 | "-- This helped to gather what words needed to be in stop-words list" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 16, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "#z = job['jobdescription'].str.rstrip('job description send me jobs like this')" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 7, 201 | "metadata": {}, 202 | "outputs": [ 203 | { 204 | "data": { 205 | "text/plain": [ 206 | "0 Qualifications: - == > 10th To Graduation & A...\n", 207 | "1 Qualifications: - == > 10th To Graduation & A...\n", 208 | "2 - as a developer in providing application des...\n", 209 | "3 - Involved with all stages of indirect taxati...\n", 210 | "4 - Involved with all stages of indirect taxati...\n", 211 | " ... \n", 212 | "38936 Looking for candidates with strong programmin...\n", 213 | "38937 Work with tech lead to architect and develop ...\n", 214 | "38938 We are looking for a Senior UI Developers and...\n", 215 | "38939 We are looking for a Senior UI Developers and...\n", 216 | "38940 Job description : Experience of 5-10 years wi...\n", 217 | "Name: jobdescription, Length: 38941, dtype: object" 218 | ] 219 | }, 220 | "execution_count": 7, 221 | "metadata": {}, 222 | "output_type": "execute_result" 223 | } 224 | ], 225 | "source": [ 226 | "# job['jobdescription'] = job.jobdescription.str[40:]\n", 227 | "job['jobdescription']" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 23, 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "# t= job.copy()\n", 237 | "# t.to_csv('WIP.sorted_jobs_master_new.csv', index=False)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 8, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "name": "stderr", 247 | "output_type": "stream", 248 | "text": [ 249 | "C:\\Users\\shail\\anaconda\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", 250 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 251 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 252 | "\n", 253 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 254 | " \n" 255 | ] 256 | }, 257 | { 258 | "data": { 259 | "text/html": [ 260 | "
\n", 261 | "\n", 274 | "\n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | "
j_idjobtitlecompanyjd_combo
00walkin data entry operator (night shift)MM Media Pvt Ltdwalkin data entry operator (night shift) Qual...
11work based onhome based part time.find live infotechwork based onhome based part time. Qualificat...
22pl/sql developer - sqlSofttech Career Infosystem Pvt. Ltdpl/sql developer - sql - as a developer in pr...
33manager/ad/partner - indirect tax - caOnboard HRServices LLPmanager/ad/partner - indirect tax - ca - Invo...
44manager/ad/partner - indirect tax - caOnboard HRServices LLPmanager/ad/partner - indirect tax - ca - Invo...
\n", 322 | "
" 323 | ], 324 | "text/plain": [ 325 | " j_id jobtitle \\\n", 326 | "0 0 walkin data entry operator (night shift) \n", 327 | "1 1 work based onhome based part time. \n", 328 | "2 2 pl/sql developer - sql \n", 329 | "3 3 manager/ad/partner - indirect tax - ca \n", 330 | "4 4 manager/ad/partner - indirect tax - ca \n", 331 | "\n", 332 | " company \\\n", 333 | "0 MM Media Pvt Ltd \n", 334 | "1 find live infotech \n", 335 | "2 Softtech Career Infosystem Pvt. Ltd \n", 336 | "3 Onboard HRServices LLP \n", 337 | "4 Onboard HRServices LLP \n", 338 | "\n", 339 | " jd_combo \n", 340 | "0 walkin data entry operator (night shift) Qual... \n", 341 | "1 work based onhome based part time. Qualificat... \n", 342 | "2 pl/sql developer - sql - as a developer in pr... \n", 343 | "3 manager/ad/partner - indirect tax - ca - Invo... \n", 344 | "4 manager/ad/partner - indirect tax - ca - Invo... " 345 | ] 346 | }, 347 | "execution_count": 8, 348 | "metadata": {}, 349 | "output_type": "execute_result" 350 | } 351 | ], 352 | "source": [ 353 | "df_job_descriptions = job[['j_id','jobtitle','company' ]]\n", 354 | "df_job_descriptions['jd_combo'] = job['jobtitle'] +\" \" + job['jobdescription'] \n", 355 | "df_job_descriptions.head()" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 9, 361 | "metadata": { 362 | "scrolled": true 363 | }, 364 | "outputs": [ 365 | { 366 | "name": "stderr", 367 | "output_type": "stream", 368 | "text": [ 369 | "C:\\Users\\shail\\anaconda\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:385: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ëœ'] not in stop_words.\n", 370 | " 'stop_words.' % sorted(inconsistent))\n" 371 | ] 372 | }, 373 | { 374 | "name": "stdout", 375 | "output_type": "stream", 376 | "text": [ 377 | "(38941, 58510)\n", 378 | "(38941, 4)\n" 379 | ] 380 | } 381 | ], 382 | "source": [ 383 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 384 | "stopwords = nltk.corpus.stopwords.words('english')\n", 385 | "stopwords.append('ã¯æ’ëœ')\n", 386 | "#Transforms words to TFIDF\n", 387 | "vectorizer = TfidfVectorizer(stop_words = stopwords)\n", 388 | "\n", 389 | "index = 0\n", 390 | "keys = {}\n", 391 | "\n", 392 | "for jd in df_job_descriptions.itertuples() :\n", 393 | " key = jd[1]\n", 394 | " keys[key] = index\n", 395 | " index += 1\n", 396 | "\n", 397 | "#Fit the vectorizer to the data\n", 398 | "vectorizer.fit(df_job_descriptions['jd_combo'].fillna(''))\n", 399 | "\n", 400 | "#Transform the data\n", 401 | "tfidf_scores = vectorizer.transform(df_job_descriptions['jd_combo'].fillna(''))\n", 402 | "\n", 403 | "print(tfidf_scores.shape)\n", 404 | "print(df_job_descriptions.shape)" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 10, 410 | "metadata": {}, 411 | "outputs": [ 412 | { 413 | "data": { 414 | "text/plain": [ 415 | "scipy.sparse.csr.csr_matrix" 416 | ] 417 | }, 418 | "execution_count": 10, 419 | "metadata": {}, 420 | "output_type": "execute_result" 421 | } 422 | ], 423 | "source": [ 424 | "type(tfidf_scores)" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 11, 430 | "metadata": {}, 431 | "outputs": [], 432 | "source": [ 433 | "test = pd.DataFrame(tfidf_scores.toarray(), columns = vectorizer.get_feature_names())" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 12, 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "data": { 443 | "text/html": [ 444 | "
\n", 445 | "\n", 458 | "\n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | "
000000000000000000gmt0001pt000290003400040200053...ïƒïƒ¼ïƒžœ100œmostœrecognitionœtošâšãžâ
00.00.0564990.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
10.00.0682730.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
20.00.0000000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
30.00.0000000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
40.00.0000000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", 608 | "

5 rows × 58510 columns

\n", 609 | "
" 610 | ], 611 | "text/plain": [ 612 | " 00 000 0000 00000 0000gmt 0001pt 00029 00034 000402 00053 \\\n", 613 | "0 0.0 0.056499 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 614 | "1 0.0 0.068273 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 615 | "2 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 616 | "3 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 617 | "4 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 618 | "\n", 619 | " ... ïƒ ïƒ¼  œ100 œmost œrecognition œto šâ šã žâ \n", 620 | "0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 621 | "1 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 622 | "2 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 623 | "3 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 624 | "4 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 625 | "\n", 626 | "[5 rows x 58510 columns]" 627 | ] 628 | }, 629 | "execution_count": 12, 630 | "metadata": {}, 631 | "output_type": "execute_result" 632 | } 633 | ], 634 | "source": [ 635 | "test.head()" 636 | ] 637 | }, 638 | { 639 | "cell_type": "markdown", 640 | "metadata": {}, 641 | "source": [ 642 | "As count vectorizer and Tf-Idf are only exploding my column numbers. It might not be wise to proceed with any of these. Moveover, I need to compare job description with Resume, that may not with fair comparison. So I will use these results so far for customizing stop word list. And will later use Doc2Vec to train my model." 643 | ] 644 | }, 645 | { 646 | "cell_type": "markdown", 647 | "metadata": {}, 648 | "source": [ 649 | "# Creating my Stopword list \n", 650 | "\n", 651 | "### As seen there are so many unwanted tokens like numbers, etc , I need to add them in \"stop words\" list to train model " 652 | ] 653 | }, 654 | { 655 | "cell_type": "code", 656 | "execution_count": 13, 657 | "metadata": {}, 658 | "outputs": [], 659 | "source": [ 660 | "#getting list of all tokens\n", 661 | "word_list = test.columns.tolist()" 662 | ] 663 | }, 664 | { 665 | "cell_type": "code", 666 | "execution_count": 14, 667 | "metadata": {}, 668 | "outputs": [], 669 | "source": [ 670 | "##Getting a list of unwanted words as s_words and adding to stopwords\n", 671 | "s_words =[]\n", 672 | "for word in word_list:\n", 673 | " #print(word)\n", 674 | " if re.search(\"^\\W|^\\d\",word):\n", 675 | " s_words.append(word)\n", 676 | " " 677 | ] 678 | }, 679 | { 680 | "cell_type": "code", 681 | "execution_count": 15, 682 | "metadata": {}, 683 | "outputs": [], 684 | "source": [ 685 | "s_words.append('') \n", 686 | "from nltk.corpus import stopwords\n", 687 | "stopword_set = set(stopwords.words('english'))\n", 688 | "stopword_set = list(stopword_set)\n", 689 | "stopword_set.extend(s_words)" 690 | ] 691 | }, 692 | { 693 | "cell_type": "markdown", 694 | "metadata": {}, 695 | "source": [ 696 | "# Collecting all text data for DOC2VEC modelling\n", 697 | "In final iteration, I only used job title and job description for creating text combo document and got my 20-D vectors. This time I trained my model on 200 epochs. \n", 698 | "\n", 699 | "As count vectorizer and Tf-Idf are only exploding my column numbers. It might not be wise to proceed with any of these. Moveover, I need to compare job description with Resume, that may not with fair comparison. \n", 700 | "\n", 701 | "Definately Doc2Vec is the smart choice to make to proceed with matching. Because Doc2Vec has ability to read document as a whole rather than working on each single word. It has a feature to provide n-Dimentional vectors. \n", 702 | "\n", 703 | "So I am going to use same concept to get my vectors. Then I ll use those vectors to match it against any given resume. \n", 704 | "\n" 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": 16, 710 | "metadata": {}, 711 | "outputs": [ 712 | { 713 | "data": { 714 | "text/html": [ 715 | "
\n", 716 | "\n", 729 | "\n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | "
j_idjobtitlecompanyjd_combo
00walkin data entry operator (night shift)MM Media Pvt Ltdwalkin data entry operator (night shift) Qual...
11work based onhome based part time.find live infotechwork based onhome based part time. Qualificat...
22pl/sql developer - sqlSofttech Career Infosystem Pvt. Ltdpl/sql developer - sql - as a developer in pr...
33manager/ad/partner - indirect tax - caOnboard HRServices LLPmanager/ad/partner - indirect tax - ca - Invo...
44manager/ad/partner - indirect tax - caOnboard HRServices LLPmanager/ad/partner - indirect tax - ca - Invo...
\n", 777 | "
" 778 | ], 779 | "text/plain": [ 780 | " j_id jobtitle \\\n", 781 | "0 0 walkin data entry operator (night shift) \n", 782 | "1 1 work based onhome based part time. \n", 783 | "2 2 pl/sql developer - sql \n", 784 | "3 3 manager/ad/partner - indirect tax - ca \n", 785 | "4 4 manager/ad/partner - indirect tax - ca \n", 786 | "\n", 787 | " company \\\n", 788 | "0 MM Media Pvt Ltd \n", 789 | "1 find live infotech \n", 790 | "2 Softtech Career Infosystem Pvt. Ltd \n", 791 | "3 Onboard HRServices LLP \n", 792 | "4 Onboard HRServices LLP \n", 793 | "\n", 794 | " jd_combo \n", 795 | "0 walkin data entry operator (night shift) Qual... \n", 796 | "1 work based onhome based part time. Qualificat... \n", 797 | "2 pl/sql developer - sql - as a developer in pr... \n", 798 | "3 manager/ad/partner - indirect tax - ca - Invo... \n", 799 | "4 manager/ad/partner - indirect tax - ca - Invo... " 800 | ] 801 | }, 802 | "execution_count": 16, 803 | "metadata": {}, 804 | "output_type": "execute_result" 805 | } 806 | ], 807 | "source": [ 808 | "# df_job_descriptions = job[['j_id','jobtitle','company' ]]\n", 809 | "# df_job_descriptions['jd_combo'] = job['jobtitle'] +\" \" + job['jobdescription'] \n", 810 | "df_job_descriptions.head()" 811 | ] 812 | }, 813 | { 814 | "cell_type": "code", 815 | "execution_count": 17, 816 | "metadata": {}, 817 | "outputs": [ 818 | { 819 | "data": { 820 | "text/plain": [ 821 | "0 walkin data entry operator (night shift) Qual...\n", 822 | "1 work based onhome based part time. Qualificat...\n", 823 | "2 pl/sql developer - sql - as a developer in pr...\n", 824 | "3 manager/ad/partner - indirect tax - ca - Invo...\n", 825 | "4 manager/ad/partner - indirect tax - ca - Invo...\n", 826 | "5 manager/ad/partner - indirect tax - ca - Invo...\n", 827 | "6 manager/ad/partner - indirect tax - ca - Invo...\n", 828 | "7 manager/ad/partner - indirect tax - ca - Invo...\n", 829 | "8 manager/ad/partner - indirect tax - ca - Invo...\n", 830 | "9 java technical lead (6-8 yrs) - Please share ...\n", 831 | "Name: jd_combo, dtype: object" 832 | ] 833 | }, 834 | "execution_count": 17, 835 | "metadata": {}, 836 | "output_type": "execute_result" 837 | } 838 | ], 839 | "source": [ 840 | "docs = df_job_descriptions['jd_combo']\n", 841 | "docs_sample = docs.head(10)\n", 842 | "docs_sample" 843 | ] 844 | }, 845 | { 846 | "cell_type": "code", 847 | "execution_count": 18, 848 | "metadata": {}, 849 | "outputs": [], 850 | "source": [ 851 | "#pre-processing with custom stop word list\n", 852 | "def preprocess(text):\n", 853 | " stop_words = stopword_set\n", 854 | " #0. split words by whitespace\n", 855 | " text = text.split()\n", 856 | " \n", 857 | " \n", 858 | " # 1. lower case\n", 859 | " text = [word.lower() for word in text]\n", 860 | " \n", 861 | " # 2. remove punctuations\n", 862 | " punc_table = str.maketrans('','',string.punctuation)\n", 863 | " text = [word.translate(punc_table) for word in text]\n", 864 | " \n", 865 | " # 3. remove stop words\n", 866 | " text = [word for word in text if word not in stop_words]\n", 867 | " \n", 868 | " return text" 869 | ] 870 | }, 871 | { 872 | "cell_type": "code", 873 | "execution_count": 19, 874 | "metadata": {}, 875 | "outputs": [], 876 | "source": [ 877 | "# calling my pre-process to tokenize \n", 878 | "tokenized_doc = []\n", 879 | "doc = df_job_descriptions['jd_combo']\n", 880 | "#doc = docs_sample\n", 881 | "for d in doc:\n", 882 | " tokenized_doc.append(preprocess(d))\n", 883 | "#tokenized_doc" 884 | ] 885 | }, 886 | { 887 | "cell_type": "code", 888 | "execution_count": 20, 889 | "metadata": {}, 890 | "outputs": [], 891 | "source": [ 892 | "# Convert tokenized document into gensim formated tagged data\n", 893 | "tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]" 894 | ] 895 | }, 896 | { 897 | "cell_type": "code", 898 | "execution_count": 21, 899 | "metadata": {}, 900 | "outputs": [ 901 | { 902 | "data": { 903 | "text/plain": [ 904 | "38941" 905 | ] 906 | }, 907 | "execution_count": 21, 908 | "metadata": {}, 909 | "output_type": "execute_result" 910 | } 911 | ], 912 | "source": [ 913 | "num_doc = len(tagged_data)\n", 914 | "num_doc" 915 | ] 916 | }, 917 | { 918 | "cell_type": "code", 919 | "execution_count": 24, 920 | "metadata": {}, 921 | "outputs": [], 922 | "source": [ 923 | "\n", 924 | "#settings to show epoch progress\n", 925 | "from gensim.test.utils import get_tmpfile\n", 926 | "from gensim.models.callbacks import CallbackAny2Vec\n", 927 | "\n", 928 | "class EpochSaver(CallbackAny2Vec):\n", 929 | "\n", 930 | " def __init__(self, path_prefix):\n", 931 | " self.path_prefix = path_prefix\n", 932 | " self.epoch = 0\n", 933 | "\n", 934 | " def on_epoch_end(self, model):\n", 935 | " output_path = get_tmpfile('{}_epoch{}.model'.format(self.path_prefix, self.epoch))\n", 936 | " model.save(output_path)\n", 937 | " self.epoch += 1" 938 | ] 939 | }, 940 | { 941 | "cell_type": "code", 942 | "execution_count": 25, 943 | "metadata": {}, 944 | "outputs": [], 945 | "source": [ 946 | "#settings to show epoch progress\n", 947 | "class EpochLogger(CallbackAny2Vec):\n", 948 | " \n", 949 | " def __init__(self):\n", 950 | " self.epoch = 0\n", 951 | " \n", 952 | " def on_epoch_begin(self, model):\n", 953 | " print(\"Epoch #{} start\".format(self.epoch))\n", 954 | "\n", 955 | " def on_epoch_end(self, model):\n", 956 | " print(\"Epoch #{} end\".format(self.epoch))\n", 957 | " self.epoch += 1" 958 | ] 959 | }, 960 | { 961 | "cell_type": "code", 962 | "execution_count": 27, 963 | "metadata": { 964 | "scrolled": true 965 | }, 966 | "outputs": [ 967 | { 968 | "name": "stdout", 969 | "output_type": "stream", 970 | "text": [ 971 | "Epoch #0 start\n", 972 | "Epoch #0 end\n", 973 | "Epoch #1 start\n", 974 | "Epoch #1 end\n", 975 | "Epoch #2 start\n", 976 | "Epoch #2 end\n", 977 | "Epoch #3 start\n", 978 | "Epoch #3 end\n", 979 | "Epoch #4 start\n", 980 | "Epoch #4 end\n", 981 | "Epoch #5 start\n", 982 | "Epoch #5 end\n", 983 | "Epoch #6 start\n", 984 | "Epoch #6 end\n", 985 | "Epoch #7 start\n", 986 | "Epoch #7 end\n", 987 | "Epoch #8 start\n", 988 | "Epoch #8 end\n", 989 | "Epoch #9 start\n", 990 | "Epoch #9 end\n", 991 | "Epoch #10 start\n", 992 | "Epoch #10 end\n", 993 | "Epoch #11 start\n", 994 | "Epoch #11 end\n", 995 | "Epoch #12 start\n", 996 | "Epoch #12 end\n", 997 | "Epoch #13 start\n", 998 | "Epoch #13 end\n", 999 | "Epoch #14 start\n", 1000 | "Epoch #14 end\n", 1001 | "Epoch #15 start\n", 1002 | "Epoch #15 end\n", 1003 | "Epoch #16 start\n", 1004 | "Epoch #16 end\n", 1005 | "Epoch #17 start\n", 1006 | "Epoch #17 end\n", 1007 | "Epoch #18 start\n", 1008 | "Epoch #18 end\n", 1009 | "Epoch #19 start\n", 1010 | "Epoch #19 end\n", 1011 | "Epoch #20 start\n", 1012 | "Epoch #20 end\n", 1013 | "Epoch #21 start\n", 1014 | "Epoch #21 end\n", 1015 | "Epoch #22 start\n", 1016 | "Epoch #22 end\n", 1017 | "Epoch #23 start\n", 1018 | "Epoch #23 end\n", 1019 | "Epoch #24 start\n", 1020 | "Epoch #24 end\n", 1021 | "Epoch #25 start\n", 1022 | "Epoch #25 end\n", 1023 | "Epoch #26 start\n", 1024 | "Epoch #26 end\n", 1025 | "Epoch #27 start\n", 1026 | "Epoch #27 end\n", 1027 | "Epoch #28 start\n", 1028 | "Epoch #28 end\n", 1029 | "Epoch #29 start\n", 1030 | "Epoch #29 end\n", 1031 | "Epoch #30 start\n", 1032 | "Epoch #30 end\n", 1033 | "Epoch #31 start\n", 1034 | "Epoch #31 end\n", 1035 | "Epoch #32 start\n", 1036 | "Epoch #32 end\n", 1037 | "Epoch #33 start\n", 1038 | "Epoch #33 end\n", 1039 | "Epoch #34 start\n", 1040 | "Epoch #34 end\n", 1041 | "Epoch #35 start\n", 1042 | "Epoch #35 end\n", 1043 | "Epoch #36 start\n", 1044 | "Epoch #36 end\n", 1045 | "Epoch #37 start\n", 1046 | "Epoch #37 end\n", 1047 | "Epoch #38 start\n", 1048 | "Epoch #38 end\n", 1049 | "Epoch #39 start\n", 1050 | "Epoch #39 end\n", 1051 | "Epoch #40 start\n", 1052 | "Epoch #40 end\n", 1053 | "Epoch #41 start\n", 1054 | "Epoch #41 end\n", 1055 | "Epoch #42 start\n", 1056 | "Epoch #42 end\n", 1057 | "Epoch #43 start\n", 1058 | "Epoch #43 end\n", 1059 | "Epoch #44 start\n", 1060 | "Epoch #44 end\n", 1061 | "Epoch #45 start\n", 1062 | "Epoch #45 end\n", 1063 | "Epoch #46 start\n", 1064 | "Epoch #46 end\n", 1065 | "Epoch #47 start\n", 1066 | "Epoch #47 end\n", 1067 | "Epoch #48 start\n", 1068 | "Epoch #48 end\n", 1069 | "Epoch #49 start\n", 1070 | "Epoch #49 end\n", 1071 | "Epoch #50 start\n", 1072 | "Epoch #50 end\n", 1073 | "Epoch #51 start\n", 1074 | "Epoch #51 end\n", 1075 | "Epoch #52 start\n", 1076 | "Epoch #52 end\n", 1077 | "Epoch #53 start\n", 1078 | "Epoch #53 end\n", 1079 | "Epoch #54 start\n", 1080 | "Epoch #54 end\n", 1081 | "Epoch #55 start\n", 1082 | "Epoch #55 end\n", 1083 | "Epoch #56 start\n", 1084 | "Epoch #56 end\n", 1085 | "Epoch #57 start\n", 1086 | "Epoch #57 end\n", 1087 | "Epoch #58 start\n", 1088 | "Epoch #58 end\n", 1089 | "Epoch #59 start\n", 1090 | "Epoch #59 end\n", 1091 | "Epoch #60 start\n", 1092 | "Epoch #60 end\n", 1093 | "Epoch #61 start\n", 1094 | "Epoch #61 end\n", 1095 | "Epoch #62 start\n", 1096 | "Epoch #62 end\n", 1097 | "Epoch #63 start\n", 1098 | "Epoch #63 end\n", 1099 | "Epoch #64 start\n", 1100 | "Epoch #64 end\n", 1101 | "Epoch #65 start\n", 1102 | "Epoch #65 end\n", 1103 | "Epoch #66 start\n", 1104 | "Epoch #66 end\n", 1105 | "Epoch #67 start\n", 1106 | "Epoch #67 end\n", 1107 | "Epoch #68 start\n", 1108 | "Epoch #68 end\n", 1109 | "Epoch #69 start\n", 1110 | "Epoch #69 end\n", 1111 | "Epoch #70 start\n", 1112 | "Epoch #70 end\n", 1113 | "Epoch #71 start\n", 1114 | "Epoch #71 end\n", 1115 | "Epoch #72 start\n", 1116 | "Epoch #72 end\n", 1117 | "Epoch #73 start\n", 1118 | "Epoch #73 end\n", 1119 | "Epoch #74 start\n", 1120 | "Epoch #74 end\n", 1121 | "Epoch #75 start\n", 1122 | "Epoch #75 end\n", 1123 | "Epoch #76 start\n", 1124 | "Epoch #76 end\n", 1125 | "Epoch #77 start\n", 1126 | "Epoch #77 end\n", 1127 | "Epoch #78 start\n", 1128 | "Epoch #78 end\n", 1129 | "Epoch #79 start\n", 1130 | "Epoch #79 end\n", 1131 | "Epoch #80 start\n", 1132 | "Epoch #80 end\n", 1133 | "Epoch #81 start\n", 1134 | "Epoch #81 end\n", 1135 | "Epoch #82 start\n", 1136 | "Epoch #82 end\n", 1137 | "Epoch #83 start\n", 1138 | "Epoch #83 end\n", 1139 | "Epoch #84 start\n", 1140 | "Epoch #84 end\n", 1141 | "Epoch #85 start\n", 1142 | "Epoch #85 end\n", 1143 | "Epoch #86 start\n", 1144 | "Epoch #86 end\n", 1145 | "Epoch #87 start\n", 1146 | "Epoch #87 end\n", 1147 | "Epoch #88 start\n", 1148 | "Epoch #88 end\n", 1149 | "Epoch #89 start\n", 1150 | "Epoch #89 end\n", 1151 | "Epoch #90 start\n", 1152 | "Epoch #90 end\n", 1153 | "Epoch #91 start\n", 1154 | "Epoch #91 end\n", 1155 | "Epoch #92 start\n", 1156 | "Epoch #92 end\n", 1157 | "Epoch #93 start\n", 1158 | "Epoch #93 end\n", 1159 | "Epoch #94 start\n", 1160 | "Epoch #94 end\n", 1161 | "Epoch #95 start\n", 1162 | "Epoch #95 end\n", 1163 | "Epoch #96 start\n", 1164 | "Epoch #96 end\n", 1165 | "Epoch #97 start\n", 1166 | "Epoch #97 end\n", 1167 | "Epoch #98 start\n", 1168 | "Epoch #98 end\n", 1169 | "Epoch #99 start\n", 1170 | "Epoch #99 end\n", 1171 | "Epoch #100 start\n", 1172 | "Epoch #100 end\n", 1173 | "Epoch #101 start\n", 1174 | "Epoch #101 end\n", 1175 | "Epoch #102 start\n", 1176 | "Epoch #102 end\n", 1177 | "Epoch #103 start\n", 1178 | "Epoch #103 end\n", 1179 | "Epoch #104 start\n", 1180 | "Epoch #104 end\n", 1181 | "Epoch #105 start\n", 1182 | "Epoch #105 end\n", 1183 | "Epoch #106 start\n", 1184 | "Epoch #106 end\n", 1185 | "Epoch #107 start\n", 1186 | "Epoch #107 end\n", 1187 | "Epoch #108 start\n", 1188 | "Epoch #108 end\n", 1189 | "Epoch #109 start\n", 1190 | "Epoch #109 end\n", 1191 | "Epoch #110 start\n", 1192 | "Epoch #110 end\n", 1193 | "Epoch #111 start\n", 1194 | "Epoch #111 end\n", 1195 | "Epoch #112 start\n", 1196 | "Epoch #112 end\n", 1197 | "Epoch #113 start\n", 1198 | "Epoch #113 end\n", 1199 | "Epoch #114 start\n", 1200 | "Epoch #114 end\n", 1201 | "Epoch #115 start\n", 1202 | "Epoch #115 end\n", 1203 | "Epoch #116 start\n", 1204 | "Epoch #116 end\n", 1205 | "Epoch #117 start\n", 1206 | "Epoch #117 end\n", 1207 | "Epoch #118 start\n", 1208 | "Epoch #118 end\n", 1209 | "Epoch #119 start\n", 1210 | "Epoch #119 end\n", 1211 | "Epoch #120 start\n", 1212 | "Epoch #120 end\n", 1213 | "Epoch #121 start\n", 1214 | "Epoch #121 end\n", 1215 | "Epoch #122 start\n", 1216 | "Epoch #122 end\n", 1217 | "Epoch #123 start\n", 1218 | "Epoch #123 end\n", 1219 | "Epoch #124 start\n", 1220 | "Epoch #124 end\n", 1221 | "Epoch #125 start\n", 1222 | "Epoch #125 end\n", 1223 | "Epoch #126 start\n", 1224 | "Epoch #126 end\n", 1225 | "Epoch #127 start\n", 1226 | "Epoch #127 end\n", 1227 | "Epoch #128 start\n", 1228 | "Epoch #128 end\n", 1229 | "Epoch #129 start\n", 1230 | "Epoch #129 end\n", 1231 | "Epoch #130 start\n", 1232 | "Epoch #130 end\n", 1233 | "Epoch #131 start\n", 1234 | "Epoch #131 end\n", 1235 | "Epoch #132 start\n", 1236 | "Epoch #132 end\n", 1237 | "Epoch #133 start\n", 1238 | "Epoch #133 end\n", 1239 | "Epoch #134 start\n", 1240 | "Epoch #134 end\n", 1241 | "Epoch #135 start\n", 1242 | "Epoch #135 end\n", 1243 | "Epoch #136 start\n", 1244 | "Epoch #136 end\n", 1245 | "Epoch #137 start\n", 1246 | "Epoch #137 end\n", 1247 | "Epoch #138 start\n", 1248 | "Epoch #138 end\n", 1249 | "Epoch #139 start\n", 1250 | "Epoch #139 end\n", 1251 | "Epoch #140 start\n", 1252 | "Epoch #140 end\n", 1253 | "Epoch #141 start\n", 1254 | "Epoch #141 end\n", 1255 | "Epoch #142 start\n", 1256 | "Epoch #142 end\n", 1257 | "Epoch #143 start\n", 1258 | "Epoch #143 end\n", 1259 | "Epoch #144 start\n", 1260 | "Epoch #144 end\n", 1261 | "Epoch #145 start\n", 1262 | "Epoch #145 end\n", 1263 | "Epoch #146 start\n", 1264 | "Epoch #146 end\n", 1265 | "Epoch #147 start\n", 1266 | "Epoch #147 end\n", 1267 | "Epoch #148 start\n", 1268 | "Epoch #148 end\n", 1269 | "Epoch #149 start\n", 1270 | "Epoch #149 end\n", 1271 | "Epoch #150 start\n", 1272 | "Epoch #150 end\n", 1273 | "Epoch #151 start\n", 1274 | "Epoch #151 end\n", 1275 | "Epoch #152 start\n", 1276 | "Epoch #152 end\n", 1277 | "Epoch #153 start\n", 1278 | "Epoch #153 end\n", 1279 | "Epoch #154 start\n", 1280 | "Epoch #154 end\n", 1281 | "Epoch #155 start\n", 1282 | "Epoch #155 end\n", 1283 | "Epoch #156 start\n", 1284 | "Epoch #156 end\n", 1285 | "Epoch #157 start\n", 1286 | "Epoch #157 end\n", 1287 | "Epoch #158 start\n", 1288 | "Epoch #158 end\n", 1289 | "Epoch #159 start\n", 1290 | "Epoch #159 end\n", 1291 | "Epoch #160 start\n", 1292 | "Epoch #160 end\n", 1293 | "Epoch #161 start\n", 1294 | "Epoch #161 end\n", 1295 | "Epoch #162 start\n", 1296 | "Epoch #162 end\n", 1297 | "Epoch #163 start\n", 1298 | "Epoch #163 end\n", 1299 | "Epoch #164 start\n", 1300 | "Epoch #164 end\n", 1301 | "Epoch #165 start\n", 1302 | "Epoch #165 end\n", 1303 | "Epoch #166 start\n", 1304 | "Epoch #166 end\n", 1305 | "Epoch #167 start\n", 1306 | "Epoch #167 end\n", 1307 | "Epoch #168 start\n", 1308 | "Epoch #168 end\n", 1309 | "Epoch #169 start\n", 1310 | "Epoch #169 end\n", 1311 | "Epoch #170 start\n", 1312 | "Epoch #170 end\n", 1313 | "Epoch #171 start\n", 1314 | "Epoch #171 end\n", 1315 | "Epoch #172 start\n", 1316 | "Epoch #172 end\n", 1317 | "Epoch #173 start\n", 1318 | "Epoch #173 end\n", 1319 | "Epoch #174 start\n", 1320 | "Epoch #174 end\n", 1321 | "Epoch #175 start\n", 1322 | "Epoch #175 end\n", 1323 | "Epoch #176 start\n", 1324 | "Epoch #176 end\n", 1325 | "Epoch #177 start\n", 1326 | "Epoch #177 end\n", 1327 | "Epoch #178 start\n", 1328 | "Epoch #178 end\n", 1329 | "Epoch #179 start\n", 1330 | "Epoch #179 end\n", 1331 | "Epoch #180 start\n", 1332 | "Epoch #180 end\n", 1333 | "Epoch #181 start\n", 1334 | "Epoch #181 end\n", 1335 | "Epoch #182 start\n", 1336 | "Epoch #182 end\n", 1337 | "Epoch #183 start\n", 1338 | "Epoch #183 end\n", 1339 | "Epoch #184 start\n", 1340 | "Epoch #184 end\n", 1341 | "Epoch #185 start\n", 1342 | "Epoch #185 end\n", 1343 | "Epoch #186 start\n", 1344 | "Epoch #186 end\n", 1345 | "Epoch #187 start\n", 1346 | "Epoch #187 end\n", 1347 | "Epoch #188 start\n", 1348 | "Epoch #188 end\n", 1349 | "Epoch #189 start\n", 1350 | "Epoch #189 end\n", 1351 | "Epoch #190 start\n", 1352 | "Epoch #190 end\n", 1353 | "Epoch #191 start\n", 1354 | "Epoch #191 end\n", 1355 | "Epoch #192 start\n", 1356 | "Epoch #192 end\n", 1357 | "Epoch #193 start\n", 1358 | "Epoch #193 end\n", 1359 | "Epoch #194 start\n", 1360 | "Epoch #194 end\n", 1361 | "Epoch #195 start\n", 1362 | "Epoch #195 end\n", 1363 | "Epoch #196 start\n", 1364 | "Epoch #196 end\n", 1365 | "Epoch #197 start\n", 1366 | "Epoch #197 end\n", 1367 | "Epoch #198 start\n", 1368 | "Epoch #198 end\n", 1369 | "Epoch #199 start\n", 1370 | "Epoch #199 end\n" 1371 | ] 1372 | } 1373 | ], 1374 | "source": [ 1375 | "#train model - final******** with 200 epochs\n", 1376 | "epoch_logger = EpochLogger()\n", 1377 | "## Train doc2vec model\n", 1378 | "model1 = Doc2Vec(tagged_data, vector_size=20, window=2, min_count=1, workers=4, epochs = 200, callbacks=[epoch_logger])\n" 1379 | ] 1380 | }, 1381 | { 1382 | "cell_type": "code", 1383 | "execution_count": 28, 1384 | "metadata": {}, 1385 | "outputs": [], 1386 | "source": [ 1387 | "# Save trained doc2vec model\n", 1388 | "model1.save(\"Model/my_doc2vec_v2.model\")" 1389 | ] 1390 | }, 1391 | { 1392 | "cell_type": "code", 1393 | "execution_count": 30, 1394 | "metadata": {}, 1395 | "outputs": [], 1396 | "source": [ 1397 | "## Load saved doc2vec model\n", 1398 | "model1= Doc2Vec.load(\"Model/my_doc2vec_v2.model\")" 1399 | ] 1400 | }, 1401 | { 1402 | "cell_type": "code", 1403 | "execution_count": 31, 1404 | "metadata": {}, 1405 | "outputs": [ 1406 | { 1407 | "data": { 1408 | "text/plain": [ 1409 | "38941" 1410 | ] 1411 | }, 1412 | "execution_count": 31, 1413 | "metadata": {}, 1414 | "output_type": "execute_result" 1415 | } 1416 | ], 1417 | "source": [ 1418 | "#confirm length (should be 38941)\n", 1419 | "len(tokenized_doc)" 1420 | ] 1421 | }, 1422 | { 1423 | "cell_type": "code", 1424 | "execution_count": 35, 1425 | "metadata": {}, 1426 | "outputs": [], 1427 | "source": [ 1428 | "## Get vector value\n", 1429 | "vec = np.empty([38941,20])\n", 1430 | "\n", 1431 | "for k,i in enumerate(tokenized_doc):\n", 1432 | " \n", 1433 | " #print(i)\n", 1434 | " vector = model1.infer_vector(i)\n", 1435 | " vec[k] = vector\n", 1436 | " #vec = np.append(vector)\n", 1437 | " #vecf = np.append(vec,vector)\n", 1438 | "\n", 1439 | "# reshape into 2D\n", 1440 | "new_arr = np.reshape(vec,(-1,20))" 1441 | ] 1442 | }, 1443 | { 1444 | "cell_type": "code", 1445 | "execution_count": 36, 1446 | "metadata": {}, 1447 | "outputs": [], 1448 | "source": [ 1449 | "rng = range(1, 21)\n", 1450 | "vec_df = pd.DataFrame(new_arr, columns=['vec_' + str(i) for i in rng])" 1451 | ] 1452 | }, 1453 | { 1454 | "cell_type": "code", 1455 | "execution_count": 37, 1456 | "metadata": {}, 1457 | "outputs": [ 1458 | { 1459 | "name": "stdout", 1460 | "output_type": "stream", 1461 | "text": [ 1462 | "\n", 1463 | "RangeIndex: 38941 entries, 0 to 38940\n", 1464 | "Data columns (total 20 columns):\n", 1465 | " # Column Non-Null Count Dtype \n", 1466 | "--- ------ -------------- ----- \n", 1467 | " 0 vec_1 38941 non-null float64\n", 1468 | " 1 vec_2 38941 non-null float64\n", 1469 | " 2 vec_3 38941 non-null float64\n", 1470 | " 3 vec_4 38941 non-null float64\n", 1471 | " 4 vec_5 38941 non-null float64\n", 1472 | " 5 vec_6 38941 non-null float64\n", 1473 | " 6 vec_7 38941 non-null float64\n", 1474 | " 7 vec_8 38941 non-null float64\n", 1475 | " 8 vec_9 38941 non-null float64\n", 1476 | " 9 vec_10 38941 non-null float64\n", 1477 | " 10 vec_11 38941 non-null float64\n", 1478 | " 11 vec_12 38941 non-null float64\n", 1479 | " 12 vec_13 38941 non-null float64\n", 1480 | " 13 vec_14 38941 non-null float64\n", 1481 | " 14 vec_15 38941 non-null float64\n", 1482 | " 15 vec_16 38941 non-null float64\n", 1483 | " 16 vec_17 38941 non-null float64\n", 1484 | " 17 vec_18 38941 non-null float64\n", 1485 | " 18 vec_19 38941 non-null float64\n", 1486 | " 19 vec_20 38941 non-null float64\n", 1487 | "dtypes: float64(20)\n", 1488 | "memory usage: 5.9 MB\n" 1489 | ] 1490 | } 1491 | ], 1492 | "source": [ 1493 | "vec_df.info()" 1494 | ] 1495 | }, 1496 | { 1497 | "cell_type": "code", 1498 | "execution_count": 38, 1499 | "metadata": {}, 1500 | "outputs": [], 1501 | "source": [ 1502 | "con_job_1 = pd.concat([job, vec_df], axis=1)" 1503 | ] 1504 | }, 1505 | { 1506 | "cell_type": "code", 1507 | "execution_count": 2, 1508 | "metadata": {}, 1509 | "outputs": [], 1510 | "source": [ 1511 | "#saving final csv with additional vectors to match with resume. \n", 1512 | "con_job_1.to_csv('wip/con_job_1.csv', index=False)" 1513 | ] 1514 | }, 1515 | { 1516 | "cell_type": "code", 1517 | "execution_count": null, 1518 | "metadata": {}, 1519 | "outputs": [], 1520 | "source": [] 1521 | } 1522 | ], 1523 | "metadata": { 1524 | "kernelspec": { 1525 | "display_name": "Python 3", 1526 | "language": "python", 1527 | "name": "python3" 1528 | }, 1529 | "language_info": { 1530 | "codemirror_mode": { 1531 | "name": "ipython", 1532 | "version": 3 1533 | }, 1534 | "file_extension": ".py", 1535 | "mimetype": "text/x-python", 1536 | "name": "python", 1537 | "nbconvert_exporter": "python", 1538 | "pygments_lexer": "ipython3", 1539 | "version": "3.7.6" 1540 | } 1541 | }, 1542 | "nbformat": 4, 1543 | "nbformat_minor": 4 1544 | } 1545 | --------------------------------------------------------------------------------