├── Images
├── php.png
├── java.png
├── oracle.png
├── php-word.png
├── python0.png
├── python1.png
├── python2.png
├── oracle-Word.png
├── python_word.png
└── python_word1.png
├── 5-Images
├── java.png
├── php.png
├── oracle.png
├── php-word.png
├── python0.png
├── oracle-Word.png
├── python_word.png
├── python_word1.png
└── Directory_Structure.png
├── 4-Top_recommendations
├── center.css
├── load_css.py
├── style.css
└── job_output.py
├── Data
├── Job-Locations
│ ├── state.csv
│ └── india-city-state.csv
└── working_jd_sample.csv
├── README.md
└── 2-Preprocessing_and_Modelling
├── Pre-processing_Resume for matchingv2.ipynb
├── Pre-processing_Resume for matchingv1.ipynb
└── Pre-processing Jobs for modellingv2.ipynb
/Images/php.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/php.png
--------------------------------------------------------------------------------
/5-Images/java.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/java.png
--------------------------------------------------------------------------------
/5-Images/php.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/php.png
--------------------------------------------------------------------------------
/Images/java.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/java.png
--------------------------------------------------------------------------------
/Images/oracle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/oracle.png
--------------------------------------------------------------------------------
/5-Images/oracle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/oracle.png
--------------------------------------------------------------------------------
/Images/php-word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/php-word.png
--------------------------------------------------------------------------------
/Images/python0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python0.png
--------------------------------------------------------------------------------
/Images/python1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python1.png
--------------------------------------------------------------------------------
/Images/python2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python2.png
--------------------------------------------------------------------------------
/5-Images/php-word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/php-word.png
--------------------------------------------------------------------------------
/5-Images/python0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/python0.png
--------------------------------------------------------------------------------
/Images/oracle-Word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/oracle-Word.png
--------------------------------------------------------------------------------
/Images/python_word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python_word.png
--------------------------------------------------------------------------------
/5-Images/oracle-Word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/oracle-Word.png
--------------------------------------------------------------------------------
/5-Images/python_word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/python_word.png
--------------------------------------------------------------------------------
/Images/python_word1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python_word1.png
--------------------------------------------------------------------------------
/5-Images/python_word1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/python_word1.png
--------------------------------------------------------------------------------
/5-Images/Directory_Structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/Directory_Structure.png
--------------------------------------------------------------------------------
/4-Top_recommendations/center.css:
--------------------------------------------------------------------------------
1 | body {
2 | background-color: #eee;
3 | }
4 |
5 | .fullScreenFrame > div {
6 | display: flex;
7 | justify-content: center;
8 | }
--------------------------------------------------------------------------------
/4-Top_recommendations/load_css.py:
--------------------------------------------------------------------------------
1 |
2 | import streamlit as st
3 |
4 | def local_css(file_name):
5 | with open(file_name) as f:
6 | st.markdown(''.format(f.read()), unsafe_allow_html=True)
--------------------------------------------------------------------------------
/4-Top_recommendations/style.css:
--------------------------------------------------------------------------------
1 | .highlight {
2 | border-radius: 0.2rem;
3 | color: white;
4 | padding: 0.1rem;
5 | margin-bottom: 1rem;
6 | }
7 | .bold {
8 | padding-left: 1rem;
9 | font-weight: 700;
10 | }
11 | .blue {
12 | background-color: rgba(19, 179, 139, 0.842);
13 | }
14 | .red {
15 | background-color: rgb(207, 79, 79);
16 | }
17 | .orange {
18 | background-color: rgb(202, 107, 17);
19 | }
20 | .green {
21 | background-color: rgb(19, 190, 42);
22 | }
--------------------------------------------------------------------------------
/Data/Job-Locations/state.csv:
--------------------------------------------------------------------------------
1 | State_id,State
2 | 1,India
3 | 2,Andaman & Nicobar Islands
4 | 2,Andhra Pradesh
5 | 3,Arunachal Pradesh
6 | 4,Assam
7 | 5,Bihar
8 | 6,Chhattisgarh
9 | 7,Dadra & Nagar Haveli
10 | 8,Daman & Diu
11 | 9,Delhi
12 | 10,Goa
13 | 11,Gujarat
14 | 12,Haryana
15 | 13,Himachal Pradesh
16 | 14,Jammu & Kashmir
17 | 15,Jharkhand
18 | 16,Karnataka
19 | 17,Kerala
20 | 18,Lakshadweep
21 | 19,Madhya Pradesh
22 | 20,Maharashtra
23 | 21,Manipur
24 | 22,Meghalaya
25 | 23,Mizoram
26 | 24,Nagaland
27 | 25,Orissa
28 | 26,Pondicherry
29 | 27,Punjab
30 | 28,Rajasthan
31 | 29,Sikkim
32 | 30,Tamil Nadu
33 | 31,Uttar Pradesh
34 | 32,Uttarakhand
35 | 33,West Bengal
36 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Bidirectional-Job-Resume-Recommender-System
2 | ## Introduction:
3 | A must have tool for job seekers and recruiters. This project is intended to find and recommend the best fit. Job seekers can find best matching jobs to their resume and Recruiters find the best fit resumes for any job posting. Its based on Machine learning "NLP" concepts of text content match via Doc2Vec and similarity scores.
4 | Primary feature of this recommender system is its roburst nature. It enables both Job-seekers and Recruiters to find best fit.
5 | 1. It reads the resume features and finds the top (n) relavant jobs based on Education, Work experience, location and text content.
6 |
7 | 2. Same code can be used to find best matching resumes for a job posting (based on Education, Work experience, location and text content).
8 |
9 | Project involves extensive use of NLP features as in:
10 |
11 | • tokenization
12 |
13 | • lemmatization (English)
14 |
15 | -- Tried WordNet, spaCy, Textblob
16 |
17 | -- spaCy used (-PRON-) if identified pronoun
18 |
19 | -- Got same results with NLTK WordNet and TextBlob - chose to stick with wordNet
20 |
21 | • Count Vectorization
22 |
23 | • TF-IDF
24 |
25 | • entity extraction
26 |
27 |
28 | ## Model
29 |
30 | Text data is trained on **Doc2Vec** Model. Doc2Vec uses NLP but rather than working on frequency of each word, Doc2Vec can create a numeric representation of each document by providing n-dimensional vectors.
31 |
32 | Uses Cosine similarity to find the closest match and recommend top (n) matches
33 |
34 | ## Directory Structure
35 | 
36 |
37 |
38 | ## Directory Details
39 |
40 | ### [1-Data_gathering_EDA](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/1-Data_gathering_EDA)
41 | Job_EDA.ipynb - File to gather raw data from csv and EDA on JOBS
42 | Resume_EDA - File to gather raw data from csv and EDA on Resumes
43 | fuzzy-wuzzy-logic-Resume_EDA.ipynb – To obtain similar titles based on score. We see similar titles written in different forms like Java Developer, Dev (java), Jave Deve. Etc which all should be only Java Developer. Fuzzy -wuzzy helps resolving the issue.
44 |
45 | ### [2-Preprocessing_and_Modelling](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/2-Preprocessing_and_Modelling)
46 | Pre-processing Jobs for modellingv1.ipynb – First iteration of Doc2Vec Model on Jobs text Data
47 | Pre-processing Jobs for modellingv2.ipynb – Second and final iteration of Doc2Vec Model on Jobs
48 | Pre-processing_Resume for matchingv1.ipynb - First iteration of Doc2Vec Model on resume text Data
49 | Pre-processing_Resume for matchingv2.ipynb - Second and final iteration of Doc2Vec Model on resume
50 | ** one can just look into v2 to understand the flow.
51 |
52 | ### [3-Matching_Sprints](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/3-Matching_Sprints)
53 | Sprint1_matching_resume_to_jobs.ipynb
54 | Sprint2_matching_resume_to_jobs-with-location-add-on.ipynb
55 | Sprint3_matching_resume_to_jobs-with-text-add-on.ipynb
56 | Sprint4_matching_resume_to_jobs-final.ipynb
57 |
58 | ### [4-Top_recommendations](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/4-Top_recommendations)
59 | job_output.py – Python file to run streamlit to see more intercative user interface to input resume and get top 10 jobs
60 | center.css – Support file to help align text / images to center
61 | load_css.py – support file for better UI
62 | style.css – support file for color coding in streamlit
63 | ** one can focus only on job_output.py for understanding the code
64 |
65 | ### [5-Images](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/5-Images)
66 | Contains images used / created during coding
67 |
68 | ### 6-Model
69 | Contains the final model, so just load, and run the model (Doc2Vec model trained on 40,000 jobs with 20-D vectors and 200 epochs)
70 |
71 | ### [Data](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/Data)
72 | Due to file size limitations showing samples datasets.
73 |
74 | • Resumes: Contains sample 15 resumes in .csv format (look and feel of dataset)
75 |
76 | • Jobs: Contains sample 15 jobs in .csv format (look and feel of dataset)
77 |
78 | • Actual datasets can be found on Kaggle:
79 |
80 | https://www.kaggle.com/PromptCloudHQ/jobs-on-naukricom
81 |
82 | https://www.kaggle.com/avanisiddhapura27/resume-dataset
83 |
84 |
85 |
86 | ## Resources
87 | • Datasets
88 |
89 | o https://www.kaggle.com/
90 |
91 | o https://www.britannica.com/
92 |
93 | • Lemmatization Approaches with Examples in Python
94 |
95 | o https://www.machinelearningplus.com/
96 |
97 | • Doc2Vec Tutorial and Implementation
98 |
99 | o https://radimrehurek.com/gensim/
100 |
101 | o https://towardsdatascience.com/
102 |
103 | • Fuzzy-Wuzzy Matching
104 |
105 | o https://towardsdatascience.com/
106 |
107 | • And shoutout to –
108 |
109 | o Scikit-learn documentation
110 |
111 | o Geekforgeeks
112 |
113 | o Stackoverflow
114 |
--------------------------------------------------------------------------------
/4-Top_recommendations/job_output.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | from PIL import Image
3 | # image = Image.open('title_page.png')
4 | # st.image(image,width = 600)
5 | st.markdown("
Bidirectional Job-Resume Recommender
", unsafe_allow_html=True)
6 | #st.markdown("Selected Resume Title : {R_title} "
89 | with open("center.css") as f:
90 | st.markdown(t.format(f.read()), unsafe_allow_html=True)
91 | """
92 |
93 |
94 | """
95 | #st.markdown(f'{R_title}')
96 | loc_ex = f"
Location: {R_location}Total Experience: {R_total_exp}
"
97 | #st.markdown(f'**Current location:** {R_location} \t **Total Experience:** {R_total_exp}')
98 | st.markdown(loc_ex, unsafe_allow_html=True)
99 | #st.subheader(f'Experience description: {R_work_ex}')
100 |
101 |
102 | import ast
103 | # for index, rows in r2.iterrows():
104 | # resume_desc= []
105 | # #pick work experience col and read it as JSON
106 | result_work = r2['work_experiences']
107 | #st.subheader(result_work)
108 | #st.subheader(type(result_work))
109 | result_work = ast.literal_eval(result_work)
110 | #st.subheader(type(result_work))
111 | # try: result_work = eval(work)
112 | # except: continue
113 | # #read description
114 | #for i in result_work.keys():
115 | # st.subheader(i)
116 | w_title = (result_work[0][0]['wtitle:'])
117 | #st.markdown(f'')
118 | w_company= (result_work[0][1]['wcompany:'])
119 | t_com = f"Current Work Title : {w_title}Company : {w_company}
"
120 | w_city= (result_work[0][2]['wcity:'])
121 | w_state= (result_work[0][3]['wstate:'])
122 | w_duration= (result_work[0][4]['wduration:'])
123 | w_descr= (result_work[0][5]['wdescr:'])
124 | #des = f"Description : {w_descr}
"
125 | des = f"Description : {R_desc}
"
126 | #st.markdown(f'**Current Work Title :** {w_title} **Company :** {w_company}')
127 | st.markdown(t_com,unsafe_allow_html=True)
128 | st.markdown(des,unsafe_allow_html=True)
129 | #st.markdown(f'**Description :** {w_descr}')
130 |
131 |
132 |
133 | #from pool of 34,000 jobs, selecting jobs that are releated to sql dba (resume in question)
134 | related_jobs = job.loc[job['jobtitle'].str.contains(match_key)]
135 | related_jobs=related_jobs.loc[related_jobs['location']==r2['location']]
136 | #job features need to be matched with resume
137 | job_m = related_jobs[['j_id','experience_range','is_grad','is_postgrad','is_doc','location',
138 | 'vec_1','vec_2','vec_3','vec_4','vec_5','vec_6','vec_7','vec_8','vec_9','vec_10','vec_11','vec_12',
139 | 'vec_13','vec_14','vec_15','vec_16','vec_17','vec_18','vec_19','vec_20']]
140 | # """
141 | # *************************************************
142 | # """
143 | st.markdown('# System Recommended Top 10 Jobs : ')
144 | image = Image.open('jobs.png')
145 | st.image(image, width = 200) #, use_column_width=True)
146 | st.write('Recommendation is based on cosine similarity of multiple factors like skills, location, experience, education, description, title etc ')
147 | # call recommender by passing selected resume
148 | matched_jobs = jobs_recommender(r1)
149 | matched_jobs = matched_jobs.head(10)
150 |
151 | st.write(matched_jobs)
152 | st.write('**Note:** Similarity Scores may round off to nearest integer value, so itcould be hard to visualize the difference. But they are displayed in ranked order.')
153 | """
154 | *************************************************
155 |
156 | """
157 | # st.markdown('# Phrases suggestions in word-cloud ')
158 | # st.write('WordCloud pulls words, pairs from all related jobs to form a cloud')
159 | # from PIL import Image
160 | # if match_key == 'java':
161 | # image = Image.open('java.png')
162 | # st.image(image, caption=(f'Suggestions for {match_key}'),
163 | # use_column_width=True)
164 | # elif match_key == 'oracle':
165 | # image = Image.open('oracle.png')
166 | # st.image(image, caption=(f'Suggestions for {match_key}'),
167 | # use_column_width=True)
168 | # elif match_key == 'php':
169 | # image = Image.open('php.png')
170 | # st.image(image, caption=(f'Suggestions for {match_key}'),
171 | # use_column_width=True)
172 | # elif match_key == 'python':
173 | # image = Image.open('python_word1.png')
174 | # st.image(image, caption=(f'Suggestions for {match_key}'),
175 | # use_column_width=True)
176 |
177 |
178 |
--------------------------------------------------------------------------------
/Data/working_jd_sample.csv:
--------------------------------------------------------------------------------
1 | company,education,experience,industry,jobdescription,jobid,joblocation_address,jobtitle,numberofpositions,payrate,postdate,site_name,skills,uniq_id
2 | Covalense Technologies Private Limited,,5 - 9 yrs,IT-Software / Software Services,"Job Description  Send me Jobs like this Experience: 5yrs. - 9yrs. Job Description: * Engineering Graduate/Post-Graduate with 6-9 years of experience in Java Programming with strong in Object Oriented concepts. * Good Exposure to spring, hibernate, web services, Threading, Socket Programming, Collections, Data Structure and IO with strong knowledge on either Spring Batch or JEE frameworks.. * Experienced in XML configuration; setting up Eclipse or any other IDEs with basic knowledge of SQL. * Energetic with strong analytical, communication and interpersonal skills. * Ability to learn and apply the new concepts quickly. Preferred Skills * Working knowledge of Oracle/DB2, ClearCase/CVS. * Experienced in test tools like JUNIT. * Ability to build tools like Ant and Maven. Salary:INR 7,50,000 - 15,00,000 P.A Industry: IT-Software / Software Services Functional Area: IT Software - Client/Server Programming Role Category:Programming & Design Role:Team Lead/Technical Lead Keyskills Hibernate Spring Java Maven JUnit Ant JEE Eclipse Oracle Web Services AngularJS Desired Candidate Profile Please refer to the Job description above Company Profile: Covalense Technologies Private Limited  Covalense is an IT services and solutions company, established in 2006 and now with the 300 + professionals are working across the location.  Our office presence is in New Zealand, Australia , US and India.  Bangalore, New Zealand is majorly focusing on End-to-End Oracle Telecom stack and Hyderabad, New Zealand focusing on Microsoft, Open Source, Mobility Apps (MOM Services) development  There are multiple project engagements with Tier 1 SI partners in BLR and Gurgaon.  Our portfolio majorly consists of telecom implementations along with a wide selection of industry verticals. Download PPT Photo 1  View Contact Details",70916001822,Bengaluru,Java - SSE / Technical Lead,,"7,50,000 - 15,00,000 P.A",2016-10-12 16:21:02 +0000,,IT Software - Client/Server Programming,60b28f3eb5c9c5c004e0b86678d99b5e
3 | Cambio Consulting,"UG: B.Tech/B.E. - Any Specialization, Other Graduate PG:M.Tech - Any Specialization, MCA - Computers, M.S/M.D - Any Specialization, Other Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",4 - 9 yrs,IT-Software / Software Services,"Job Description  Send me Jobs like this Hi, we have urgent requirement for embedded developers Job Description: Very good proficiency in programming in Java Very good proficiency in designing software applications, e.g. using design patterns, employing test driven development Good understanding of the principles of GUI programming in the context of Web, e.g. GWT, CSS, JavaScript Knowledge of telecommunication management protocols like SNMP, REST Preferable to have exposure to protocols like DHCP, DNS, SIP etc. Experience in maintaining Continuous Integration Environments e.g. using Maven, Gradle, Subversion Strong competences in working in a team, e.g. using Agile Frameworks like Scrum If interested, Please forward your updated CV to rafi@cambio.co.in Salary:INR 7,00,000 - 17,00,000 P.A Industry: IT-Software / Software Services Functional Area: IT Software - System Programming Role Category:Programming & Design Role:Software Developer Desired Candidate Profile Education- UG: B.Tech/B.E. - Any Specialization, Other Graduate PG:M.Tech - Any Specialization, MCA - Computers, M.S/M.D - Any Specialization, Other Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: Cambio Consulting We are established consulting firm providing a wide spectrum of services in the HR Domain consulting services. Our core belief is that people are the most valuable asset for any company. Leading from that is our aim to offer not just recruitment support but also be a strategic adviser to all our customers. We plan to achieve this by understanding the client's business process, industry domain and develop market intelligence in order to provide a right fit for all the positions. Download PPT Photo 1  View Contact Details",1.20317E+11,"Bengaluru/Bangalore , Hyderabad / Secunderabad",Java & NMS Development Openings @ Bangalore and Hyderabad,,"7,00,000 - 17,00,000 P.A",2016-03-11 02:30:18 +0000,,IT Software - System Programming,da267e3b96a4ed51faf0e610ea662c20
4 | SATYAM VENTURE ENGINEERING SERVICES,"UG: Any Graduate PG:Any Postgraduate Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",10 - 15 yrs,Automobile / Auto Anciliary / Auto Components,"Job Description  Send me Jobs like this Experience Profile - Experience in Should Costing of Product is must. Knowledge about various Manufacturing processes such as Stamping, Pressure Die Casting, Plastic Injection Molding etc., Knowledge about costing of various commodities such as Plastic parts, Al. Die Casting, Electric Motors, Automotive Seating System etc., Preferred knowledge in Costing softwares like aPriori, PCS, DFM/ DFA etc., Personal Attributes - Ability to develop a team Keenness to innovation, problem solving abilities, commitment to personal and professional growth, and eye for details. Presentation/ communication skills are prerequisites. Good team players Salary: Not Disclosed by Recruiter Industry: Automobile / Auto Anciliary / Auto Components Functional Area: Production , Manufacturing , Maintenance Role Category:Production/Manufacturing/Maintenance Role:Project Manager-Production/Manufacturing/Maintenance Desired Candidate Profile Education- UG: Any Graduate PG:Any Postgraduate Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: SATYAM VENTURE ENGINEERING SERVICES SATYAM VENTURE ENGINEERING SERVICES Download PPT Photo 1  View Contact Details",3.10317E+11,Hyderabad / Secunderabad,Project Manager,,Not Disclosed by Recruiter,2016-03-31 02:30:24 +0000,,Production,d6ff245ae99d79e4be094fbe47c50284
5 | Envision Enterprise Solutions Pvt Ltd,"UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",7 - 12 yrs,IT-Software / Software Services,"Job Description  Send me Jobs like this Project Managers with more than 7 years of experience of large project management and delivery. Working experience in multiple countries with multi culture onsite project engagements, with in Ability to manage stake holders with excellent written, verbal communication skills and team management abilities. Expertise in delivery models like waterfall and Agile is essential. PMP Qualification will be an advantage. Willing to travel to client locations globally Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Application Programming , Maintenance Role Category:Programming & Design Role:Testing Engineer Desired Candidate Profile Education- UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: Envision Enterprise Solutions Pvt Ltd Envision is a leading IT Solutions Provider and System Integrator, providing solutions for enterprises across the globe, to optimize resource utilization, streamline operations, reduce the costs, maximise return on investment. We provide cost effective solutions within budget and timelines. We are known for providing Enterprise Asset Management, Port and Terminal Automation Solutions, Transportation, Logistics, Enterprise Mobility Solutions, IOT, Smarter manufacturing, facilities solutions. Download PPT Photo 1  View Contact Details",70316503401,Hyderabad / Secunderabad,Project Managers,,Not Disclosed by Recruiter,2016-03-07 02:31:09 +0000,,IT Software - Application Programming,9f819c69b3578157baf8b83a5820b27e
6 | NEW HOPE MEDICAL CENTRE,"UG: B.B.A PG:MBA/PGDM Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",1 - 2 yrs,Medical / Healthcare / Hospitals,"Job Description  Send me Jobs like this Persons with great initiative, negotiation skills, sound financial insight, managerial skills to lead and support our new projects. Salary: Not Disclosed by Recruiter Industry: Medical / Healthcare / Hospitals Functional Area: Medical , Healthcare , R&D , Pharmaceuticals , Biotechnology Role Category:Drug Regulatory Affairs/Documentation Role:Regulatory Affairs Manager Desired Candidate Profile Education- UG: B.B.A PG:MBA/PGDM Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: NEW HOPE MEDICAL CENTRE NEW HOPE MEDICAL CENTRE Download PPT Photo 1  View Contact Details",1.60317E+11,Hyderabad / Secunderabad,Project Manager,,Not Disclosed by Recruiter,2016-03-16 02:31:16 +0000,,Medical,2a1d64deb55ed947cec34818eb7abf9a
7 | Maven Workforce,"UG: B.Tech/B.E. - Any Specialization PG:M.Tech - Any Specialization, MCA - Computers, M.Sc - Any Specialization Doctorate:Doctorate Not Required",5 - 10 yrs,IT-Software / Software Services,"Job Description  Send me Jobs like this 1. The candidate must have done atleast N3 of JLPT (Japanese Language Proficiency Test). Currently JLPT has 5 levels (previously 4 levels). If the candidate has N3 (current), N2 or N1 certification, we can consider. 2. Candidate must know atleast 1000 kanjis 3. Candidate must be good in speaking Japanese. There are no certifications to check that. If a person says he/she lived in Japan or speak business Japanese on a day-to- day basis. 4. M.A in Japanese (especially from Jawaharlal Nehru University (JNU), Delhi) and even if he/she has not cleared any JLPT certifications. 5. If a person has not done any certifications but lived in Japan and can read, write, speak Japanese can be considered. Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Other Role Category:Other Role:Outside Consultant Keyskills Japanese JLPT Japanese Interpreter Japanese translator Japanese Language proficiency test Desired Candidate Profile Education- UG: B.Tech/B.E. - Any Specialization PG:M.Tech - Any Specialization, MCA - Computers, M.Sc - Any Specialization Doctorate:Doctorate Not Required  Company Profile: Maven Workforce Leading client of Maven Workforce Download PPT Photo 1  View Contact Details",30516900761,"Delhi/NCR(National Capital Region) , Gurgaon",Japanese Interpreter,,Not Disclosed by Recruiter,2016-05-03 11:35:55 +0000,,IT Software - Other,30649a930cae66477fb4e0eb93f2ccf9
8 | Confidential,,2 - 5 yrs,IT-Software / Software Services,"Job Description  Send me Jobs like this hi all, please find the mentioned JD below, Skills: Dot.Net, C#, SQL, OOPS ,Web services Experience: 2 -5 Years Work Location: Bangalore M G Road Interview Timming- 10:00 A M to 2:00 P M Mode:C2H Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Application Programming , Maintenance Role Category:Programming & Design Role:Software Developer Keyskills .Net Desired Candidate Profile  Company Profile: Confidential Confidential Download PPT Photo 1  View Contact Details",1.30416E+11,Bengaluru,.Net Developer,,Not Disclosed by Recruiter,2016-10-06 16:21:31 +0000,,IT Software - Application Programming,b529711ee8c1b4c1bea4849d18594132
9 | Melstar Information Technologies Ltd,UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required,2 - 5 yrs,IT-Software / Software Services,"Job Description  Send me Jobs like this We have an urgent openings for .net developer Location : Bangalore Skills:.net, sql Exp : 2 to 4 Years, If you are interested please send your updated resume with following details: Full Name: Email ID: Phone: Primary skills: Total Exp : Relevant Exp: Minimum Notice Period: Current Company: Current CTC: Current Employment(Perm/Cont): Current Location: Preferred Location: Availability on weekend/WeekDay for a F2F discussion : PAN : Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Application Programming , Maintenance Role Category:Programming & Design Role:Software Developer Keyskills .net developer .net developer Desired Candidate Profile  Education- UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required Company Profile: Melstar Information Technologies Ltd Melstar is a CMM Level global IT company with its headquarters in Mumbai; India. We have eight software development facilities, with eleven offices worldwide including the US,Europe and India. We offer a unique blend of domain expertise in the field of Banking, Finance, Insurance and Manufacturing. We are engaged in cutting-edge technologies like e-commerce,web development and dot-com projects with strong N-tier approach. We have global partnerships with IBM,Microsoft, Oracle,Informix and other IT Stalwarts. Our strong customer focus can be seen from the prestigious list of clients like Citibank N.A., IBM, Genpact, Standard Chartered, HP etc. Download PPT Photo 1  View Contact Details",71016900650,"Bengaluru, Delhi, Noida",.net Developer,,Not Disclosed by Recruiter,2016-10-07 16:21:40 +0000,,IT Software - Application Programming,e233e57a6b2eeefc24d43cbb58a86096
10 | Unitforce technologies Pvt. Ltd.,"UG: Any Graduate - Any Specialization PG:Any Postgraduate - Any Specialization Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",5 - 6 yrs,IT-Software / Software Services,"Job Description  Send me Jobs like this Net Framework 4.5 - ASP & C# - HTML5, Java Script, AJAX, JQuery Secondary Skills Required: Entity Framework MVC Angular JS RDBMS Oracle 11g Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Application Programming , Maintenance Role Category:Programming & Design Role:Software Developer Desired Candidate Profile Education- UG: Any Graduate - Any Specialization PG:Any Postgraduate - Any Specialization Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: Unitforce technologies Pvt. Ltd. www.uftech.com Download PPT Photo 1  View Contact Details",60316600219,Hyderabad / Secunderabad,.NET Developer,,Not Disclosed by Recruiter,2016-03-05 02:30:23 +0000,,IT Software - Application Programming,4ed3d0cca70fcac5acf1557de46ef176
11 | Karvy Analytics Limited,UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required,4 - 8 yrs,KPO / Research / Analytics,"Job Description  Send me Jobs like this Responsibilities : - Selecting features, building and optimizing classifiers using machine learning techniques - Data mining using state-of-the-art methods - Enhancing data collection procedures to include information that is relevant for building analytic systems - Processing, cleansing, and verifying the integrity of data used for analysis - Doing ad-hoc analysis and presenting results in a clear manner Skills and Qualifications : - Excellent understanding of machine learning techniques and algorithms, such as k-NN, Naive Bayes, SVM, GBM, Decision Forests, Time Series Forecasting etc. - Experience with common data science toolkits in R or Python Excellence in at least one of these is highly desirable - Good communication skills - Experience with data visualization tools, such as D3.js, Tableau etc. would be added advantage - Proficiency in using query languages such as SQL, Hive, Pig would be added advantage - Good applied statistics skills, such as distributions, statistical testing, regression, etc. - Good scripting and programming skills - Data-oriented personality - More than 4 years of experience in Data analysis. Salary:INR 6,00,000 - 12,00,000 P.A Industry: KPO / Research / Analytics Functional Area: Analytics & Business Intelligence Role Category:Analytics & BI Role:Analytics Manager Keyskills Data Science Hive Machine Learning Data Mining R Data Visualization SQL Python Data Analysis Time Series Desired Candidate Profile Please refer to the Job description above Education- UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required Company Profile: Karvy Analytics Limited Karvy Analytics Limited is a new age company and a modern arm of the leading Karvy Conglomerate. Led by visionary management, the young and forward thinking team is building world class solutions for the global analytics universe. We are focused on multi-industry use cases for companies that need technology and professional services for their functional and operational analytics projects. We offer a range of solutions that bring immediate business benefits to our global customers who are interested in leveraging big data, statistical and mathematical modeling techniques, social analytics, and mobile descriptive analytics for new business insights. Download PPT Photo 1  View Contact Details",2.01216E+11,Hyderabad,Sr Data Scietist,2,"6,00,000 - 12,00,000 P.A",2016-12-20 18:19:23 +0000,www.naukri.com,Analytics & Business Intelligence,0abdcbe9423d9e4730c1b16db7954f77
12 | Rinalytics Advisors Pvt. Ltd,"UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",2 - 7 yrs,Recruitment / Staffing,Job Description  Send me Jobs like this Job Description,2.50516E+11,Bengaluru/Bangalore,Data Science Role,,Not Disclosed by Recruiter,2016-01-29 14:07:13 +0000,,Analytics & Business Intelligence,8e9ba1f084f9fe31c7878258fba47188
13 | Prism Manpower Services,UG: Any Graduate PG:Post Graduation Not Required,0 - 4 yrs,Recruitment / Staffing,"Job Description  Send me Jobs like this Computer Operators. Should have knowledge of Excel. Decent typing speed. should know English typing. Freshers are also fine. Interested Candidates can forward their resume at prismmanpower@yahoo.in call us on 9702897822 Salary: Not Disclosed by Recruiter Industry: Recruitment / Staffing Functional Area: Executive Assistant , Front Office , Data Entry Role Category:Other Role:Stenographer/Data Entry Operator Desired Candidate Profile Please refer to the Job description above Education- UG: Any Graduate PG:Post Graduation Not Required Company Profile: Prism Manpower Services We , Prism Manpower Services , provide a wide range of Recruitment Solutions for various requirements. Located in Mumbai city of Maharashtra , the company was incepted in the year 2007. With an experience of serving industries like Insurance , Event Management etc , we are today recognized as a trustworthy Service Provider in Maharashtra. Download PPT Photo 1  View Contact Details",2.51017E+11,Mumbai,DATA ENTRY OPERATOR,,Not Disclosed by Recruiter,2016-10-25 19:49:07 +0000,www.naukri.com,Executive Assistant,7fb17e8480a9978d68e30de0f39fea04
14 | "Risk Management Solutions, Inc.","UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",4 - 5 yrs,Banking / Financial Services / Broking,"Job Description  Send me Jobs like this RMS - Senior Analyst, Data Analytics Senior Analyst, Data Analytics Objective of the Role: The applicant will be extensively involved in exposure modeling and data analytics. Executes delivers original analysis and insights and own all or part of an analytics module Provide professional skills necessary for all phases of data analysis, including the application of standard statistical methods for conducting analysis, documentation and presentation. Communicates analytical insights through sophisticated synthesis and packaging of results (Including PPT slides and charts) Establishes credibility by thought partnering on analytics topics; takes positions and draws conclusions on a range of external and internal issues Serve as an active participant on cross- functional projects, interpreting data, and translating into actionable insights, provide support on ad- hoc analysis and reports. B. Tech./ Post graduate (geostatistics) from a premier institute with good academic record 4 to 5 years of total experience with minimum 3 years experience on analytical projects requiring comprehensive data analysis, interpretation and presentation skills. Knowledge of analysis techniques like statistical methodology, data manipulation. Critical thinking skills and hands on experience in data interpretation, formulating hypotheses and being able to make educated guesses when data may be sparse or unavailable. Strong MS SQL knowledge and experience, ability to write custom queries of medium to high complexity. Strong documentation skills with experience of working on MS Word, Excel (advanced knowledge such as using pivots, filters, using external data etc.) , PowerPoint and Project. Excellent communication skills and ability to independently lead and drive projects. Technical skills: Experience with multiple analytics methods (one or more required) Data management skills (e.g. data modeling, data integrity QA/ QC) Geospatial data visualization and analytics (specialties such as cluster detection or geo- statistical methods) Spatialtemporal analysis (cartographic animation of timeseries data) Experience in core analytics methods (one or more of the following) : Geo coding geo referencing. Knowledge of open source proprietary geo analytics data sources. Geographic cluster recognition. Network analysis (locationallocation, OD Matrix travelling sales person, vehicle routing problem) Spatialtemporal analysis Familiarity with analytics tools (one or more required) GIS toolkits (ESRI, Quantum GIS, MapInfo or similar) Working knowledge of Property Causality insurance or reinsurance or and Risk Assessment Analysis would be advantageous Knowledge of catastrophe modeling domain would also be advantageous Working knowledge of and experience in statistical tools like R, SPSS etc. RMS models and software help insurers, financial markets, corporations, and public agencies evaluate and manage catastrophe risks throughout the world. We lead an industry that we helped to pioneercatastrophe risk modelingand are the innovators of the RMS (one) platform, which is transforming the world's understanding and quantification of risk through open, real- time exposure and risk management. More than 400 insurers, reinsurers, trading companies, and other financial institutions trust RMS models and SaaS solutions to better understand and manage the risks of natural and human- made catastrophes, including hurricanes, earthquakes, floods, terrorism, and pandemics. We think about the unthinkable, enabling the management of even the most extreme events. Our scientific and objective measurement of risk facilitates the efficient flow of capital needed to insure, manage, and ultimately mitigate these risks to reduce the consequences of disasters, promoting resilient societies and a sustainable global economy. RMS is proud to be an equal opportunity employer. Salary: Not Disclosed by Recruiter Industry: Banking / Financial Services / Broking Functional Area: Analytics & Business Intelligence Role Category:Analytics & BI Role:Data Analyst Desired Candidate Profile Education- UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: Risk Management Solutions, Inc. Risk Management Solutions, Inc. Download PPT Photo 1  View Contact Details",1.10517E+11,Noida,Data Analytics,,Not Disclosed by Recruiter,2016-05-11 06:05:20 +0000,,Analytics & Business Intelligence,187a7d0b53f5211639157026daaf6dca
15 | ZSoft Internet Media Pvt Ltd.,"UG: Diploma PG:Post Graduation Not Required Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",2 - 4 yrs,IT-Software / Software Services,"Job Description  Send me Jobs like this Job description:The post is responsible for maintaining, updating and enhancing Client database. Work on Data Entry and generate reports as per needs Must have good hands on experience with MS OfficeRoles and Responsibilities: Good Typing Skills, Communication SkillsQualification: Any Graduate ( BE/B Tech/B. Sc Computers/BCA and Diploma Holders )Speed and accuracy essential. Must have be organized, self disciplined and self starter Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: Executive Assistant , Front Office , Data Entry Role:Stenographer/Data Entry Operator Keyskills bca diploma maintaining good typing skills data entry operator b sc b tech responsible be database Desired Candidate Profile Education- UG: Diploma PG:Post Graduation Not Required Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: ZSoft Internet Media Pvt Ltd. ZSoft Internet Media Pvt. Ltd. - Website Design | Software Development | SEO | SMO | PPC | ORM | IT Services Business Technology Consulting Download PPT Photo 1  View Contact Details",1.00816E+11,"Delhi , Delhi",Data Entry Operator,,Not Disclosed by Recruiter,2015-11-23 22:17:37 +0000,,Executive Assistant,18ee0fbcfa297155ef90876b0fda0608
16 | Startup - Entransys,"UG: Any Graduate - Any Specialization, B.Tech/B.E. - Any Specialization Doctorate:Doctorate Not Required",3 - 5 yrs,Internet / Ecommerce,"Job Description  Send me Jobs like this We are looking for an Analytics Designer with strong interests and capabilities in the design and development of engaging user experiences. Salary: Not Disclosed by Recruiter Industry: Internet / Ecommerce Functional Area: Analytics & Business Intelligence Role Category:Analytics & BI Role:Data Analyst Keyskills Design Development Data Science Analytics Desired Candidate Profile  Education- UG: Any Graduate - Any Specialization, B.Tech/B.E. - Any Specialization Doctorate:Doctorate Not Required Company Profile: Startup - Entransys Entransys approach and methodology is aimed towards converting the Business Chain into Value Chain and ensures the reconfiguration of Business processes to maximize the Business value. Download PPT Photo 1  View Contact Details",2.21217E+11,Hyderabad,Analytics & Data Science,,Not Disclosed by Recruiter,2016-12-22 18:19:00 +0000,www.naukri.com,Analytics & Business Intelligence,e5cc1a2789b45f1161636fc3681670ee
17 | AR Enterprises hiring for US Based MNC,,0 - 2 yrs,Recruitment / Staffing,"Job Description  Send me Jobs like this SECRETARY / FRONT OFFICE / DATA ENTRY Job Profile Salary: Not Disclosed by Recruiter Industry: Recruitment / Staffing Functional Area: Executive Assistant , Front Office , Data Entry Role Category:Other Role:Stenographer/Data Entry Operator Keyskills Data Entry Operation Front Office Secretarial Activities Desired Candidate Profile Please refer to the Job description above Company Profile: AR Enterprises US Based MNC our aim is to make future and to give best placement. Download PPT Photo 1  View Contact Details",51116002047,"Hyderabad, Chennai, Bengaluru, Gwalior",Data Entry Operator,3,Not Disclosed by Recruiter,2017-01-11 21:00:00 +0000,www.naukri.com,Executive Assistant,0e1a1f05ed979b8139dfb814058f68ac
18 |
--------------------------------------------------------------------------------
/2-Preprocessing_and_Modelling/Pre-processing_Resume for matchingv2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Pre-Processing Resume Text Column to Prepare for Matching - final "
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np\n",
17 | "import pandas as pd\n",
18 | "import json\n",
19 | "import matplotlib.pyplot as plt\n",
20 | "%matplotlib inline\n",
21 | "\n",
22 | "import re\n",
23 | "import datetime\n",
24 | "from datetime import date\n",
25 | "from time import strptime\n",
26 | "\n",
27 | "import RAKE as rake\n",
28 | "import operator\n"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "###############################################################################################\n",
36 | "## Working on Resume data\n",
37 | "###############################################################################################"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 2,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "# First reading my resume csv\n",
47 | "resume = pd.read_csv('wip/resume_sorted6.csv')"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 3,
53 | "metadata": {},
54 | "outputs": [
55 | {
56 | "name": "stdout",
57 | "output_type": "stream",
58 | "text": [
59 | "\n",
60 | "RangeIndex: 14428 entries, 0 to 14427\n",
61 | "Data columns (total 26 columns):\n",
62 | " # Column Non-Null Count Dtype \n",
63 | "--- ------ -------------- ----- \n",
64 | " 0 index 14428 non-null int64 \n",
65 | " 1 Resume_title 14428 non-null object\n",
66 | " 2 City 14428 non-null object\n",
67 | " 3 location 14428 non-null int64 \n",
68 | " 4 Description 14428 non-null object\n",
69 | " 5 work_experiences 14428 non-null object\n",
70 | " 6 Educations 14428 non-null object\n",
71 | " 7 Skills 14428 non-null object\n",
72 | " 8 Links 14428 non-null object\n",
73 | " 9 Certificates 14428 non-null object\n",
74 | " 10 Additional Information 14428 non-null object\n",
75 | " 11 is_grad 14428 non-null int64 \n",
76 | " 12 is_postgrad 14428 non-null int64 \n",
77 | " 13 is_doc 14428 non-null int64 \n",
78 | " 14 edu_unknown 14428 non-null int64 \n",
79 | " 15 Computer_Eng 14428 non-null int64 \n",
80 | " 16 Finance 14428 non-null int64 \n",
81 | " 17 HR 14428 non-null int64 \n",
82 | " 18 AI_stats 14428 non-null int64 \n",
83 | " 19 MBA 14428 non-null int64 \n",
84 | " 20 Other_specialization 14428 non-null int64 \n",
85 | " 21 resume_id 14428 non-null int64 \n",
86 | " 22 total_experience 14428 non-null int64 \n",
87 | " 23 experience_range 14428 non-null int64 \n",
88 | " 24 loc_name 14428 non-null object\n",
89 | " 25 experience_desc 14428 non-null object\n",
90 | "dtypes: int64(15), object(11)\n",
91 | "memory usage: 2.9+ MB\n"
92 | ]
93 | }
94 | ],
95 | "source": [
96 | "#initial info\n",
97 | "resume.info()"
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | "#########################################################################################################\n",
105 | "## To match resume with jobs, I need to have similar 20 vectors, that I created to train my Doc2Vec model for jobs. \n",
106 | "\n",
107 | "### For training my jobs model, I picked text data from :\n",
108 | "* job title\n",
109 | "* job description\n",
110 | "* skills\n",
111 | "* industry\n",
112 | "\n",
113 | "### So for training my resume model, I need similar text, thus picking:\n",
114 | "* Resume_title\n",
115 | "* Resume description \n",
116 | "* skills\n",
117 | "* Additional Information\n",
118 | "\n",
119 | "\n",
120 | "#########################################################################################################"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 40,
126 | "metadata": {},
127 | "outputs": [],
128 | "source": [
129 | "resume['Resume_title'] = resume['Resume_title'].str.lower()\n",
130 | "resume['Skills']=resume['Skills'].str.lower()\n",
131 | "resume['Description'] = resume['Description'].str.lower()\n",
132 | "resume['Additional Information'] = resume['Additional Information'].str.lower()"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 41,
138 | "metadata": {},
139 | "outputs": [],
140 | "source": [
141 | "resume['Description'].replace('none', ' ',inplace=True)\n",
142 | "resume['Additional Information'].replace('none', ' ',inplace=True)"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 5,
148 | "metadata": {},
149 | "outputs": [
150 | {
151 | "name": "stderr",
152 | "output_type": "stream",
153 | "text": [
154 | "C:\\Users\\shail\\anaconda\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
155 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
156 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
157 | "\n",
158 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
159 | " \n"
160 | ]
161 | },
162 | {
163 | "data": {
164 | "text/html": [
165 | "\n",
166 | "\n",
179 | "
\n",
180 | " \n",
181 | " \n",
182 | " | \n",
183 | " resume_id | \n",
184 | " Resume_title | \n",
185 | " resume_combo | \n",
186 | "
\n",
187 | " \n",
188 | " \n",
189 | " \n",
190 | " | 0 | \n",
191 | " 0 | \n",
192 | " java developer | \n",
193 | " java developer to prove myself dedicated, wort... | \n",
194 | "
\n",
195 | " \n",
196 | " | 1 | \n",
197 | " 1 | \n",
198 | " software developer | \n",
199 | " software developer working as software develop... | \n",
200 | "
\n",
201 | " \n",
202 | " | 2 | \n",
203 | " 2 | \n",
204 | " java developer | \n",
205 | " java developer looking for a challenging caree... | \n",
206 | "
\n",
207 | " \n",
208 | " | 3 | \n",
209 | " 3 | \n",
210 | " seeking innovative and challenging career assi... | \n",
211 | " seeking innovative and challenging career assi... | \n",
212 | "
\n",
213 | " \n",
214 | " | 4 | \n",
215 | " 4 | \n",
216 | " java developer | \n",
217 | " java developer ['project: hr payroll systems... | \n",
218 | "
\n",
219 | " \n",
220 | "
\n",
221 | "
"
222 | ],
223 | "text/plain": [
224 | " resume_id Resume_title \\\n",
225 | "0 0 java developer \n",
226 | "1 1 software developer \n",
227 | "2 2 java developer \n",
228 | "3 3 seeking innovative and challenging career assi... \n",
229 | "4 4 java developer \n",
230 | "\n",
231 | " resume_combo \n",
232 | "0 java developer to prove myself dedicated, wort... \n",
233 | "1 software developer working as software develop... \n",
234 | "2 java developer looking for a challenging caree... \n",
235 | "3 seeking innovative and challenging career assi... \n",
236 | "4 java developer ['project: hr payroll systems... "
237 | ]
238 | },
239 | "execution_count": 5,
240 | "metadata": {},
241 | "output_type": "execute_result"
242 | }
243 | ],
244 | "source": [
245 | "df_resume = resume[['resume_id','Resume_title' ]]\n",
246 | "df_resume['resume_combo'] = resume['Resume_title'] +\" \" + resume['Description'] +\" \" + resume['Skills'] + \" \"+resume['Additional Information'] + \" \"+resume['experience_desc']\n",
247 | "df_resume.head()"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": 6,
253 | "metadata": {},
254 | "outputs": [
255 | {
256 | "data": {
257 | "text/plain": [
258 | "0 java developer to prove myself dedicated, wort...\n",
259 | "1 software developer working as software develop...\n",
260 | "2 java developer looking for a challenging caree...\n",
261 | "3 seeking innovative and challenging career assi...\n",
262 | "4 java developer ['project: hr payroll systems...\n",
263 | "5 java developer ['java'] ['have the potenti...\n",
264 | "6 java developer to secure a challenging positio...\n",
265 | "7 searching job for java developer ['c++', ' h...\n",
266 | "8 mca / with 3 years of development experience •...\n",
267 | "9 java developer attain the position of 'java de...\n",
268 | "Name: resume_combo, dtype: object"
269 | ]
270 | },
271 | "execution_count": 6,
272 | "metadata": {},
273 | "output_type": "execute_result"
274 | }
275 | ],
276 | "source": [
277 | "docs = df_resume['resume_combo']\n",
278 | "docs_sample = docs.head(10)\n",
279 | "docs_sample"
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": 7,
285 | "metadata": {},
286 | "outputs": [
287 | {
288 | "name": "stderr",
289 | "output_type": "stream",
290 | "text": [
291 | "[nltk_data] Downloading package wordnet to\n",
292 | "[nltk_data] C:\\Users\\shail\\AppData\\Roaming\\nltk_data...\n",
293 | "[nltk_data] Package wordnet is already up-to-date!\n"
294 | ]
295 | }
296 | ],
297 | "source": [
298 | "#Import all the dependencies\n",
299 | "import nltk\n",
300 | "nltk.download('wordnet')\n",
301 | "from nltk.stem import WordNetLemmatizer\n",
302 | "wordnet_lemmatizer = WordNetLemmatizer()\n",
303 | "from nltk.corpus import stopwords\n",
304 | "from nltk.tokenize import word_tokenize \n",
305 | "set(stopwords.words('english'))\n",
306 | "\n",
307 | "import string\n",
308 | "\n",
309 | "import gensim\n",
310 | "from gensim.test.utils import common_texts\n",
311 | "from gensim.models.doc2vec import Doc2Vec, TaggedDocument"
312 | ]
313 | },
314 | {
315 | "cell_type": "code",
316 | "execution_count": 8,
317 | "metadata": {},
318 | "outputs": [
319 | {
320 | "name": "stderr",
321 | "output_type": "stream",
322 | "text": [
323 | "C:\\Users\\shail\\anaconda\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:385: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ëœ'] not in stop_words.\n",
324 | " 'stop_words.' % sorted(inconsistent))\n"
325 | ]
326 | },
327 | {
328 | "name": "stdout",
329 | "output_type": "stream",
330 | "text": [
331 | "(14428, 70688)\n",
332 | "(14428, 3)\n"
333 | ]
334 | }
335 | ],
336 | "source": [
337 | "from sklearn.feature_extraction.text import TfidfVectorizer\n",
338 | "stopwords = nltk.corpus.stopwords.words('english')\n",
339 | "stopwords.append('ã¯æ’ëœ')\n",
340 | "stopwords.append('\\n')\n",
341 | "stopwords.append('•')\n",
342 | "#Transforms words to TFIDF\n",
343 | "vectorizer = TfidfVectorizer(stop_words = stopwords)\n",
344 | "\n",
345 | "index = 0\n",
346 | "keys = {}\n",
347 | "\n",
348 | "for rem in df_resume.itertuples() :\n",
349 | " key = rem[1]\n",
350 | " keys[key] = index\n",
351 | " index += 1\n",
352 | "\n",
353 | "#Fit the vectorizer to the data\n",
354 | "vectorizer.fit(df_resume['resume_combo'].fillna(''))\n",
355 | "\n",
356 | "#Transform the data\n",
357 | "tfidf_scores = vectorizer.transform(df_resume['resume_combo'].fillna(''))\n",
358 | "\n",
359 | "print(tfidf_scores.shape)\n",
360 | "print(df_resume.shape)"
361 | ]
362 | },
363 | {
364 | "cell_type": "code",
365 | "execution_count": 10,
366 | "metadata": {},
367 | "outputs": [],
368 | "source": [
369 | "test = pd.DataFrame(tfidf_scores.toarray(), columns = vectorizer.get_feature_names())"
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": 11,
375 | "metadata": {},
376 | "outputs": [
377 | {
378 | "data": {
379 | "text/html": [
380 | "\n",
381 | "\n",
394 | "
\n",
395 | " \n",
396 | " \n",
397 | " | \n",
398 | " 00 | \n",
399 | " 000 | \n",
400 | " 0000 | \n",
401 | " 00089765 | \n",
402 | " 00089805 | \n",
403 | " 000webhostapp | \n",
404 | " 001 | \n",
405 | " 002 | \n",
406 | " 003 | \n",
407 | " 00353 | \n",
408 | " ... | \n",
409 | " õle | \n",
410 | " øcreated | \n",
411 | " ǁǁǁǁǁǁ | \n",
412 | " ηadoop | \n",
413 | " τrain | \n",
414 | " τοοls | \n",
415 | " чєαr | \n",
416 | " filed | \n",
417 | " financial | \n",
418 | " fixing | \n",
419 | "
\n",
420 | " \n",
421 | " \n",
422 | " \n",
423 | " | 0 | \n",
424 | " 0.0 | \n",
425 | " 0.0 | \n",
426 | " 0.0 | \n",
427 | " 0.0 | \n",
428 | " 0.0 | \n",
429 | " 0.0 | \n",
430 | " 0.0 | \n",
431 | " 0.0 | \n",
432 | " 0.0 | \n",
433 | " 0.0 | \n",
434 | " ... | \n",
435 | " 0.0 | \n",
436 | " 0.0 | \n",
437 | " 0.0 | \n",
438 | " 0.0 | \n",
439 | " 0.0 | \n",
440 | " 0.0 | \n",
441 | " 0.0 | \n",
442 | " 0.0 | \n",
443 | " 0.0 | \n",
444 | " 0.0 | \n",
445 | "
\n",
446 | " \n",
447 | " | 1 | \n",
448 | " 0.0 | \n",
449 | " 0.0 | \n",
450 | " 0.0 | \n",
451 | " 0.0 | \n",
452 | " 0.0 | \n",
453 | " 0.0 | \n",
454 | " 0.0 | \n",
455 | " 0.0 | \n",
456 | " 0.0 | \n",
457 | " 0.0 | \n",
458 | " ... | \n",
459 | " 0.0 | \n",
460 | " 0.0 | \n",
461 | " 0.0 | \n",
462 | " 0.0 | \n",
463 | " 0.0 | \n",
464 | " 0.0 | \n",
465 | " 0.0 | \n",
466 | " 0.0 | \n",
467 | " 0.0 | \n",
468 | " 0.0 | \n",
469 | "
\n",
470 | " \n",
471 | " | 2 | \n",
472 | " 0.0 | \n",
473 | " 0.0 | \n",
474 | " 0.0 | \n",
475 | " 0.0 | \n",
476 | " 0.0 | \n",
477 | " 0.0 | \n",
478 | " 0.0 | \n",
479 | " 0.0 | \n",
480 | " 0.0 | \n",
481 | " 0.0 | \n",
482 | " ... | \n",
483 | " 0.0 | \n",
484 | " 0.0 | \n",
485 | " 0.0 | \n",
486 | " 0.0 | \n",
487 | " 0.0 | \n",
488 | " 0.0 | \n",
489 | " 0.0 | \n",
490 | " 0.0 | \n",
491 | " 0.0 | \n",
492 | " 0.0 | \n",
493 | "
\n",
494 | " \n",
495 | " | 3 | \n",
496 | " 0.0 | \n",
497 | " 0.0 | \n",
498 | " 0.0 | \n",
499 | " 0.0 | \n",
500 | " 0.0 | \n",
501 | " 0.0 | \n",
502 | " 0.0 | \n",
503 | " 0.0 | \n",
504 | " 0.0 | \n",
505 | " 0.0 | \n",
506 | " ... | \n",
507 | " 0.0 | \n",
508 | " 0.0 | \n",
509 | " 0.0 | \n",
510 | " 0.0 | \n",
511 | " 0.0 | \n",
512 | " 0.0 | \n",
513 | " 0.0 | \n",
514 | " 0.0 | \n",
515 | " 0.0 | \n",
516 | " 0.0 | \n",
517 | "
\n",
518 | " \n",
519 | " | 4 | \n",
520 | " 0.0 | \n",
521 | " 0.0 | \n",
522 | " 0.0 | \n",
523 | " 0.0 | \n",
524 | " 0.0 | \n",
525 | " 0.0 | \n",
526 | " 0.0 | \n",
527 | " 0.0 | \n",
528 | " 0.0 | \n",
529 | " 0.0 | \n",
530 | " ... | \n",
531 | " 0.0 | \n",
532 | " 0.0 | \n",
533 | " 0.0 | \n",
534 | " 0.0 | \n",
535 | " 0.0 | \n",
536 | " 0.0 | \n",
537 | " 0.0 | \n",
538 | " 0.0 | \n",
539 | " 0.0 | \n",
540 | " 0.0 | \n",
541 | "
\n",
542 | " \n",
543 | "
\n",
544 | "
5 rows × 70688 columns
\n",
545 | "
"
546 | ],
547 | "text/plain": [
548 | " 00 000 0000 00089765 00089805 000webhostapp 001 002 003 00353 \\\n",
549 | "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
550 | "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
551 | "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
552 | "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
553 | "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
554 | "\n",
555 | " ... õle øcreated ǁǁǁǁǁǁ ηadoop τrain τοοls чєαr filed financial \\\n",
556 | "0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
557 | "1 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
558 | "2 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
559 | "3 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
560 | "4 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
561 | "\n",
562 | " fixing \n",
563 | "0 0.0 \n",
564 | "1 0.0 \n",
565 | "2 0.0 \n",
566 | "3 0.0 \n",
567 | "4 0.0 \n",
568 | "\n",
569 | "[5 rows x 70688 columns]"
570 | ]
571 | },
572 | "execution_count": 11,
573 | "metadata": {},
574 | "output_type": "execute_result"
575 | }
576 | ],
577 | "source": [
578 | "test.head()"
579 | ]
580 | },
581 | {
582 | "cell_type": "markdown",
583 | "metadata": {},
584 | "source": [
585 | "### Creating my Stopword list\n",
586 | "#### As seen there are so many unwanted tokens like numbers, etc , I need to add them in \"stop words\" list to train model"
587 | ]
588 | },
589 | {
590 | "cell_type": "code",
591 | "execution_count": 12,
592 | "metadata": {},
593 | "outputs": [],
594 | "source": [
595 | "#getting list of all tokens\n",
596 | "word_list = test.columns.tolist()"
597 | ]
598 | },
599 | {
600 | "cell_type": "code",
601 | "execution_count": 13,
602 | "metadata": {},
603 | "outputs": [],
604 | "source": [
605 | "##Getting a list of unwanted words as s_words and adding to stopwords\n",
606 | "s_words =[]\n",
607 | "for word in word_list:\n",
608 | " #print(word)\n",
609 | " if re.search(\"^\\W|^\\d\",word):\n",
610 | " s_words.append(word)"
611 | ]
612 | },
613 | {
614 | "cell_type": "code",
615 | "execution_count": 14,
616 | "metadata": {},
617 | "outputs": [],
618 | "source": [
619 | "s_words.append('') \n",
620 | "from nltk.corpus import stopwords\n",
621 | "stopword_set = set(stopwords.words('english'))\n",
622 | "stopword_set = list(stopword_set)\n",
623 | "stopword_set.extend(s_words)"
624 | ]
625 | },
626 | {
627 | "cell_type": "code",
628 | "execution_count": 15,
629 | "metadata": {},
630 | "outputs": [],
631 | "source": [
632 | "def preprocess(text):\n",
633 | " stop_words = stopword_set\n",
634 | " #0. split words by whitespace\n",
635 | " text = text.split()\n",
636 | " \n",
637 | " \n",
638 | " # 1. lower case\n",
639 | " text = [word.lower() for word in text]\n",
640 | " \n",
641 | " # 2. remove punctuations\n",
642 | " punc_table = str.maketrans('','',string.punctuation)\n",
643 | " text = [word.translate(punc_table) for word in text]\n",
644 | " \n",
645 | " # 3. remove stop words\n",
646 | " text = [word for word in text if word not in stop_words]\n",
647 | " \n",
648 | " return text"
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": 16,
654 | "metadata": {},
655 | "outputs": [],
656 | "source": [
657 | "tokenized_doc = []\n",
658 | "doc = df_resume['resume_combo']\n",
659 | "#doc = docs_sample\n",
660 | "for d in doc:\n",
661 | " tokenized_doc.append(preprocess(d))\n",
662 | "#tokenized_doc"
663 | ]
664 | },
665 | {
666 | "cell_type": "code",
667 | "execution_count": 17,
668 | "metadata": {},
669 | "outputs": [],
670 | "source": [
671 | "# Convert tokenized document into gensim formated tagged data\n",
672 | "tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]"
673 | ]
674 | },
675 | {
676 | "cell_type": "code",
677 | "execution_count": 18,
678 | "metadata": {},
679 | "outputs": [
680 | {
681 | "data": {
682 | "text/plain": [
683 | "14428"
684 | ]
685 | },
686 | "execution_count": 18,
687 | "metadata": {},
688 | "output_type": "execute_result"
689 | }
690 | ],
691 | "source": [
692 | "num_doc = len(tagged_data)\n",
693 | "num_doc\n",
694 | "#confirm length (should be 14428)\n",
695 | "len(tokenized_doc)"
696 | ]
697 | },
698 | {
699 | "cell_type": "code",
700 | "execution_count": 21,
701 | "metadata": {},
702 | "outputs": [],
703 | "source": [
704 | "## Load saved doc2vec model\n",
705 | "model= Doc2Vec.load(\"Model/my_doc2vec_v2.model\")"
706 | ]
707 | },
708 | {
709 | "cell_type": "code",
710 | "execution_count": 24,
711 | "metadata": {},
712 | "outputs": [],
713 | "source": [
714 | "## Get vector value\n",
715 | "vec = np.empty([14428,20])\n",
716 | "\n",
717 | "for k,i in enumerate(tokenized_doc):\n",
718 | " \n",
719 | " #print(i)\n",
720 | " vector = model.infer_vector(i)\n",
721 | " vec[k] = vector\n",
722 | "\n",
723 | "# reshape into 2D\n",
724 | "new_arr = np.reshape(vec,(-1,20))"
725 | ]
726 | },
727 | {
728 | "cell_type": "code",
729 | "execution_count": 25,
730 | "metadata": {},
731 | "outputs": [],
732 | "source": [
733 | "rng = range(1, 21)\n",
734 | "vec_df = pd.DataFrame(new_arr, columns=['vec_' + str(i) for i in rng])"
735 | ]
736 | },
737 | {
738 | "cell_type": "code",
739 | "execution_count": 26,
740 | "metadata": {},
741 | "outputs": [
742 | {
743 | "data": {
744 | "text/html": [
745 | "\n",
746 | "\n",
759 | "
\n",
760 | " \n",
761 | " \n",
762 | " | \n",
763 | " vec_1 | \n",
764 | " vec_2 | \n",
765 | " vec_3 | \n",
766 | " vec_4 | \n",
767 | " vec_5 | \n",
768 | " vec_6 | \n",
769 | " vec_7 | \n",
770 | " vec_8 | \n",
771 | " vec_9 | \n",
772 | " vec_10 | \n",
773 | " vec_11 | \n",
774 | " vec_12 | \n",
775 | " vec_13 | \n",
776 | " vec_14 | \n",
777 | " vec_15 | \n",
778 | " vec_16 | \n",
779 | " vec_17 | \n",
780 | " vec_18 | \n",
781 | " vec_19 | \n",
782 | " vec_20 | \n",
783 | "
\n",
784 | " \n",
785 | " \n",
786 | " \n",
787 | " | 0 | \n",
788 | " 3.003397 | \n",
789 | " 1.462391 | \n",
790 | " -0.732206 | \n",
791 | " 2.032145 | \n",
792 | " -3.291425 | \n",
793 | " 1.626622 | \n",
794 | " 1.269785 | \n",
795 | " -1.303818 | \n",
796 | " -1.781690 | \n",
797 | " -3.893606 | \n",
798 | " 0.582851 | \n",
799 | " -2.390430 | \n",
800 | " 0.612694 | \n",
801 | " 4.274847 | \n",
802 | " -1.641325 | \n",
803 | " 1.098874 | \n",
804 | " -0.534998 | \n",
805 | " 0.338975 | \n",
806 | " -2.081308 | \n",
807 | " -3.480031 | \n",
808 | "
\n",
809 | " \n",
810 | " | 1 | \n",
811 | " 3.969832 | \n",
812 | " -1.478794 | \n",
813 | " -1.997424 | \n",
814 | " 1.502539 | \n",
815 | " -3.507508 | \n",
816 | " 2.108994 | \n",
817 | " -0.386640 | \n",
818 | " 1.494396 | \n",
819 | " 0.454764 | \n",
820 | " -2.268685 | \n",
821 | " -1.505257 | \n",
822 | " -2.332494 | \n",
823 | " -0.431022 | \n",
824 | " 1.431269 | \n",
825 | " -0.896382 | \n",
826 | " -0.267269 | \n",
827 | " 1.433352 | \n",
828 | " 0.438305 | \n",
829 | " -0.992093 | \n",
830 | " -0.096142 | \n",
831 | "
\n",
832 | " \n",
833 | " | 2 | \n",
834 | " 1.442701 | \n",
835 | " 0.011723 | \n",
836 | " -2.126506 | \n",
837 | " 0.655804 | \n",
838 | " -3.984513 | \n",
839 | " 0.792035 | \n",
840 | " 1.317094 | \n",
841 | " -0.696710 | \n",
842 | " -1.563318 | \n",
843 | " -3.040591 | \n",
844 | " -0.367393 | \n",
845 | " -3.774975 | \n",
846 | " -1.183595 | \n",
847 | " 2.456486 | \n",
848 | " -1.270981 | \n",
849 | " 2.475039 | \n",
850 | " -1.990110 | \n",
851 | " 0.130853 | \n",
852 | " -0.589791 | \n",
853 | " -2.782936 | \n",
854 | "
\n",
855 | " \n",
856 | " | 3 | \n",
857 | " 1.803033 | \n",
858 | " -0.120398 | \n",
859 | " -1.159959 | \n",
860 | " 0.066225 | \n",
861 | " -3.522508 | \n",
862 | " 1.321965 | \n",
863 | " -0.756211 | \n",
864 | " -0.249010 | \n",
865 | " -0.074644 | \n",
866 | " -2.314389 | \n",
867 | " 0.557041 | \n",
868 | " -3.887409 | \n",
869 | " -1.070027 | \n",
870 | " 3.894971 | \n",
871 | " -0.957399 | \n",
872 | " -0.952996 | \n",
873 | " -0.824266 | \n",
874 | " 0.038712 | \n",
875 | " 1.194561 | \n",
876 | " -1.206788 | \n",
877 | "
\n",
878 | " \n",
879 | " | 4 | \n",
880 | " -0.434019 | \n",
881 | " 0.551527 | \n",
882 | " -1.531551 | \n",
883 | " -0.767032 | \n",
884 | " -0.514473 | \n",
885 | " 0.286549 | \n",
886 | " -0.563888 | \n",
887 | " 0.310748 | \n",
888 | " 0.457921 | \n",
889 | " -1.334632 | \n",
890 | " 0.183150 | \n",
891 | " -0.547834 | \n",
892 | " 1.218995 | \n",
893 | " 0.536182 | \n",
894 | " 0.995981 | \n",
895 | " -0.874730 | \n",
896 | " -0.138916 | \n",
897 | " 0.882186 | \n",
898 | " -0.129402 | \n",
899 | " -1.793177 | \n",
900 | "
\n",
901 | " \n",
902 | "
\n",
903 | "
"
904 | ],
905 | "text/plain": [
906 | " vec_1 vec_2 vec_3 vec_4 vec_5 vec_6 vec_7 \\\n",
907 | "0 3.003397 1.462391 -0.732206 2.032145 -3.291425 1.626622 1.269785 \n",
908 | "1 3.969832 -1.478794 -1.997424 1.502539 -3.507508 2.108994 -0.386640 \n",
909 | "2 1.442701 0.011723 -2.126506 0.655804 -3.984513 0.792035 1.317094 \n",
910 | "3 1.803033 -0.120398 -1.159959 0.066225 -3.522508 1.321965 -0.756211 \n",
911 | "4 -0.434019 0.551527 -1.531551 -0.767032 -0.514473 0.286549 -0.563888 \n",
912 | "\n",
913 | " vec_8 vec_9 vec_10 vec_11 vec_12 vec_13 vec_14 \\\n",
914 | "0 -1.303818 -1.781690 -3.893606 0.582851 -2.390430 0.612694 4.274847 \n",
915 | "1 1.494396 0.454764 -2.268685 -1.505257 -2.332494 -0.431022 1.431269 \n",
916 | "2 -0.696710 -1.563318 -3.040591 -0.367393 -3.774975 -1.183595 2.456486 \n",
917 | "3 -0.249010 -0.074644 -2.314389 0.557041 -3.887409 -1.070027 3.894971 \n",
918 | "4 0.310748 0.457921 -1.334632 0.183150 -0.547834 1.218995 0.536182 \n",
919 | "\n",
920 | " vec_15 vec_16 vec_17 vec_18 vec_19 vec_20 \n",
921 | "0 -1.641325 1.098874 -0.534998 0.338975 -2.081308 -3.480031 \n",
922 | "1 -0.896382 -0.267269 1.433352 0.438305 -0.992093 -0.096142 \n",
923 | "2 -1.270981 2.475039 -1.990110 0.130853 -0.589791 -2.782936 \n",
924 | "3 -0.957399 -0.952996 -0.824266 0.038712 1.194561 -1.206788 \n",
925 | "4 0.995981 -0.874730 -0.138916 0.882186 -0.129402 -1.793177 "
926 | ]
927 | },
928 | "execution_count": 26,
929 | "metadata": {},
930 | "output_type": "execute_result"
931 | }
932 | ],
933 | "source": [
934 | "vec_df.head(5)"
935 | ]
936 | },
937 | {
938 | "cell_type": "code",
939 | "execution_count": 27,
940 | "metadata": {},
941 | "outputs": [],
942 | "source": [
943 | "# concatenate and safe the resume csv file\n",
944 | "con_resume_1 = pd.concat([resume, vec_df], axis=1)\n",
945 | "con_resume_1.to_csv('wip/con_resume_1.csv', index=False)"
946 | ]
947 | }
948 | ],
949 | "metadata": {
950 | "kernelspec": {
951 | "display_name": "Python 3",
952 | "language": "python",
953 | "name": "python3"
954 | },
955 | "language_info": {
956 | "codemirror_mode": {
957 | "name": "ipython",
958 | "version": 3
959 | },
960 | "file_extension": ".py",
961 | "mimetype": "text/x-python",
962 | "name": "python",
963 | "nbconvert_exporter": "python",
964 | "pygments_lexer": "ipython3",
965 | "version": "3.7.6"
966 | }
967 | },
968 | "nbformat": 4,
969 | "nbformat_minor": 4
970 | }
971 |
--------------------------------------------------------------------------------
/2-Preprocessing_and_Modelling/Pre-processing_Resume for matchingv1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Pre-Processing Resume Text Column to Prepare for matching - first iteration"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 3,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np\n",
17 | "import pandas as pd\n",
18 | "import json\n",
19 | "import matplotlib.pyplot as plt\n",
20 | "%matplotlib inline\n",
21 | "\n",
22 | "import re\n",
23 | "import datetime\n",
24 | "from datetime import date\n",
25 | "from time import strptime\n",
26 | "\n",
27 | "import RAKE as rake\n",
28 | "import operator\n"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "###############################################################################################\n",
36 | "## Working on Resume data\n",
37 | "###############################################################################################"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 38,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "# First reading my resume csv\n",
47 | "resume = pd.read_csv('wip/resume_sorted5.csv')"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 39,
53 | "metadata": {},
54 | "outputs": [
55 | {
56 | "name": "stdout",
57 | "output_type": "stream",
58 | "text": [
59 | "\n",
60 | "RangeIndex: 14428 entries, 0 to 14427\n",
61 | "Data columns (total 26 columns):\n",
62 | " # Column Non-Null Count Dtype \n",
63 | "--- ------ -------------- ----- \n",
64 | " 0 index 14428 non-null int64 \n",
65 | " 1 Resume_title 14428 non-null object\n",
66 | " 2 City 14428 non-null object\n",
67 | " 3 location 14428 non-null int64 \n",
68 | " 4 Description 14428 non-null object\n",
69 | " 5 work_experiences 14428 non-null object\n",
70 | " 6 Educations 14428 non-null object\n",
71 | " 7 Skills 14428 non-null object\n",
72 | " 8 Links 14428 non-null object\n",
73 | " 9 Certificates 14428 non-null object\n",
74 | " 10 Additional Information 14428 non-null object\n",
75 | " 11 is_grad 14428 non-null int64 \n",
76 | " 12 is_postgrad 14428 non-null int64 \n",
77 | " 13 is_doc 14428 non-null int64 \n",
78 | " 14 edu_unknown 14428 non-null int64 \n",
79 | " 15 Computer_Eng 14428 non-null int64 \n",
80 | " 16 Finance 14428 non-null int64 \n",
81 | " 17 HR 14428 non-null int64 \n",
82 | " 18 AI_stats 14428 non-null int64 \n",
83 | " 19 MBA 14428 non-null int64 \n",
84 | " 20 Other_specialization 14428 non-null int64 \n",
85 | " 21 resume_id 14428 non-null int64 \n",
86 | " 22 total_experience 14428 non-null int64 \n",
87 | " 23 experience_range 14428 non-null int64 \n",
88 | " 24 loc_name 14428 non-null object\n",
89 | " 25 experience_desc 14428 non-null object\n",
90 | "dtypes: int64(15), object(11)\n",
91 | "memory usage: 2.9+ MB\n"
92 | ]
93 | }
94 | ],
95 | "source": [
96 | "#initial info\n",
97 | "resume.info()"
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | "#########################################################################################################\n",
105 | "## To match resume with jobs, I need to have similar 20 vectors, that I created to train my Doc2Vec model for jobs. \n",
106 | "\n",
107 | "### For training my jobs model, I picked text data from :\n",
108 | "* job title\n",
109 | "* job description\n",
110 | "* skills\n",
111 | "* industry\n",
112 | "\n",
113 | "### So for training my resume model, I need similar text, thus picking:\n",
114 | "* Resume_title\n",
115 | "* Resume description \n",
116 | "* skills\n",
117 | "* Additional Information\n",
118 | "\n",
119 | "\n",
120 | "#########################################################################################################"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 40,
126 | "metadata": {},
127 | "outputs": [],
128 | "source": [
129 | "resume['Resume_title'] = resume['Resume_title'].str.lower()\n",
130 | "resume['Skills']=resume['Skills'].str.lower()\n",
131 | "resume['Description'] = resume['Description'].str.lower()\n",
132 | "resume['Additional Information'] = resume['Additional Information'].str.lower()"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 41,
138 | "metadata": {},
139 | "outputs": [],
140 | "source": [
141 | "resume['Description'].replace('none', ' ',inplace=True)\n",
142 | "resume['Additional Information'].replace('none', ' ',inplace=True)"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 43,
148 | "metadata": {},
149 | "outputs": [
150 | {
151 | "name": "stderr",
152 | "output_type": "stream",
153 | "text": [
154 | "C:\\Users\\shail\\anaconda\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
155 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
156 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
157 | "\n",
158 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
159 | " \n"
160 | ]
161 | },
162 | {
163 | "data": {
164 | "text/html": [
165 | "\n",
166 | "\n",
179 | "
\n",
180 | " \n",
181 | " \n",
182 | " | \n",
183 | " resume_id | \n",
184 | " Resume_title | \n",
185 | " resume_combo | \n",
186 | "
\n",
187 | " \n",
188 | " \n",
189 | " \n",
190 | " | 0 | \n",
191 | " 0 | \n",
192 | " java developer | \n",
193 | " java developer to prove myself dedicated, wort... | \n",
194 | "
\n",
195 | " \n",
196 | " | 1 | \n",
197 | " 1 | \n",
198 | " software developer | \n",
199 | " software developer working as software develop... | \n",
200 | "
\n",
201 | " \n",
202 | " | 2 | \n",
203 | " 2 | \n",
204 | " java developer | \n",
205 | " java developer looking for a challenging caree... | \n",
206 | "
\n",
207 | " \n",
208 | " | 3 | \n",
209 | " 3 | \n",
210 | " seeking innovative and challenging career assi... | \n",
211 | " seeking innovative and challenging career assi... | \n",
212 | "
\n",
213 | " \n",
214 | " | 4 | \n",
215 | " 4 | \n",
216 | " java developer | \n",
217 | " java developer ['project: hr payroll systems... | \n",
218 | "
\n",
219 | " \n",
220 | "
\n",
221 | "
"
222 | ],
223 | "text/plain": [
224 | " resume_id Resume_title \\\n",
225 | "0 0 java developer \n",
226 | "1 1 software developer \n",
227 | "2 2 java developer \n",
228 | "3 3 seeking innovative and challenging career assi... \n",
229 | "4 4 java developer \n",
230 | "\n",
231 | " resume_combo \n",
232 | "0 java developer to prove myself dedicated, wort... \n",
233 | "1 software developer working as software develop... \n",
234 | "2 java developer looking for a challenging caree... \n",
235 | "3 seeking innovative and challenging career assi... \n",
236 | "4 java developer ['project: hr payroll systems... "
237 | ]
238 | },
239 | "execution_count": 43,
240 | "metadata": {},
241 | "output_type": "execute_result"
242 | }
243 | ],
244 | "source": [
245 | "df_resume = resume[['resume_id','Resume_title' ]]\n",
246 | "df_resume['resume_combo'] = resume['Resume_title'] +\" \" + resume['Description'] +\" \" + resume['Skills'] + \" \"+resume['Additional Information'] + \" \"+resume['experience_desc']\n",
247 | "df_resume.head()"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": 44,
253 | "metadata": {},
254 | "outputs": [
255 | {
256 | "data": {
257 | "text/plain": [
258 | "0 java developer to prove myself dedicated, wort...\n",
259 | "1 software developer working as software develop...\n",
260 | "2 java developer looking for a challenging caree...\n",
261 | "3 seeking innovative and challenging career assi...\n",
262 | "4 java developer ['project: hr payroll systems...\n",
263 | "5 java developer ['java'] ['have the potenti...\n",
264 | "6 java developer to secure a challenging positio...\n",
265 | "7 searching job for java developer ['c++', ' h...\n",
266 | "8 mca / with 3 years of development experience •...\n",
267 | "9 java developer attain the position of 'java de...\n",
268 | "Name: resume_combo, dtype: object"
269 | ]
270 | },
271 | "execution_count": 44,
272 | "metadata": {},
273 | "output_type": "execute_result"
274 | }
275 | ],
276 | "source": [
277 | "docs = df_resume['resume_combo']\n",
278 | "docs_sample = docs.head(10)\n",
279 | "docs_sample"
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": 45,
285 | "metadata": {},
286 | "outputs": [
287 | {
288 | "name": "stderr",
289 | "output_type": "stream",
290 | "text": [
291 | "[nltk_data] Downloading package wordnet to\n",
292 | "[nltk_data] C:\\Users\\shail\\AppData\\Roaming\\nltk_data...\n",
293 | "[nltk_data] Package wordnet is already up-to-date!\n"
294 | ]
295 | }
296 | ],
297 | "source": [
298 | "#Import all the dependencies\n",
299 | "import nltk\n",
300 | "nltk.download('wordnet')\n",
301 | "from nltk.stem import WordNetLemmatizer\n",
302 | "wordnet_lemmatizer = WordNetLemmatizer()\n",
303 | "from nltk.corpus import stopwords\n",
304 | "from nltk.tokenize import word_tokenize \n",
305 | "set(stopwords.words('english'))\n",
306 | "\n",
307 | "import string\n",
308 | "\n",
309 | "import gensim\n",
310 | "from gensim.test.utils import common_texts\n",
311 | "from gensim.models.doc2vec import Doc2Vec, TaggedDocument"
312 | ]
313 | },
314 | {
315 | "cell_type": "code",
316 | "execution_count": 47,
317 | "metadata": {},
318 | "outputs": [
319 | {
320 | "name": "stderr",
321 | "output_type": "stream",
322 | "text": [
323 | "C:\\Users\\shail\\anaconda\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:385: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ëœ'] not in stop_words.\n",
324 | " 'stop_words.' % sorted(inconsistent))\n"
325 | ]
326 | },
327 | {
328 | "name": "stdout",
329 | "output_type": "stream",
330 | "text": [
331 | "(14428, 70688)\n",
332 | "(14428, 3)\n"
333 | ]
334 | }
335 | ],
336 | "source": [
337 | "from sklearn.feature_extraction.text import TfidfVectorizer\n",
338 | "stopwords = nltk.corpus.stopwords.words('english')\n",
339 | "stopwords.append('ã¯æ’ëœ')\n",
340 | "stopwords.append('\\n')\n",
341 | "stopwords.append('•')\n",
342 | "#Transforms words to TFIDF\n",
343 | "vectorizer = TfidfVectorizer(stop_words = stopwords)\n",
344 | "\n",
345 | "index = 0\n",
346 | "keys = {}\n",
347 | "\n",
348 | "for rem in df_resume.itertuples() :\n",
349 | " key = rem[1]\n",
350 | " keys[key] = index\n",
351 | " index += 1\n",
352 | "\n",
353 | "#Fit the vectorizer to the data\n",
354 | "vectorizer.fit(df_resume['resume_combo'].fillna(''))\n",
355 | "\n",
356 | "#Transform the data\n",
357 | "tfidf_scores = vectorizer.transform(df_resume['resume_combo'].fillna(''))\n",
358 | "\n",
359 | "print(tfidf_scores.shape)\n",
360 | "print(df_resume.shape)"
361 | ]
362 | },
363 | {
364 | "cell_type": "code",
365 | "execution_count": 48,
366 | "metadata": {},
367 | "outputs": [],
368 | "source": [
369 | "test = pd.DataFrame(tfidf_scores.toarray(), columns = vectorizer.get_feature_names())"
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": 49,
375 | "metadata": {},
376 | "outputs": [
377 | {
378 | "data": {
379 | "text/html": [
380 | "\n",
381 | "\n",
394 | "
\n",
395 | " \n",
396 | " \n",
397 | " | \n",
398 | " 00 | \n",
399 | " 000 | \n",
400 | " 0000 | \n",
401 | " 00089765 | \n",
402 | " 00089805 | \n",
403 | " 000webhostapp | \n",
404 | " 001 | \n",
405 | " 002 | \n",
406 | " 003 | \n",
407 | " 00353 | \n",
408 | " ... | \n",
409 | " õle | \n",
410 | " øcreated | \n",
411 | " ǁǁǁǁǁǁ | \n",
412 | " ηadoop | \n",
413 | " τrain | \n",
414 | " τοοls | \n",
415 | " чєαr | \n",
416 | " filed | \n",
417 | " financial | \n",
418 | " fixing | \n",
419 | "
\n",
420 | " \n",
421 | " \n",
422 | " \n",
423 | " | 0 | \n",
424 | " 0.0 | \n",
425 | " 0.0 | \n",
426 | " 0.0 | \n",
427 | " 0.0 | \n",
428 | " 0.0 | \n",
429 | " 0.0 | \n",
430 | " 0.0 | \n",
431 | " 0.0 | \n",
432 | " 0.0 | \n",
433 | " 0.0 | \n",
434 | " ... | \n",
435 | " 0.0 | \n",
436 | " 0.0 | \n",
437 | " 0.0 | \n",
438 | " 0.0 | \n",
439 | " 0.0 | \n",
440 | " 0.0 | \n",
441 | " 0.0 | \n",
442 | " 0.0 | \n",
443 | " 0.0 | \n",
444 | " 0.0 | \n",
445 | "
\n",
446 | " \n",
447 | " | 1 | \n",
448 | " 0.0 | \n",
449 | " 0.0 | \n",
450 | " 0.0 | \n",
451 | " 0.0 | \n",
452 | " 0.0 | \n",
453 | " 0.0 | \n",
454 | " 0.0 | \n",
455 | " 0.0 | \n",
456 | " 0.0 | \n",
457 | " 0.0 | \n",
458 | " ... | \n",
459 | " 0.0 | \n",
460 | " 0.0 | \n",
461 | " 0.0 | \n",
462 | " 0.0 | \n",
463 | " 0.0 | \n",
464 | " 0.0 | \n",
465 | " 0.0 | \n",
466 | " 0.0 | \n",
467 | " 0.0 | \n",
468 | " 0.0 | \n",
469 | "
\n",
470 | " \n",
471 | " | 2 | \n",
472 | " 0.0 | \n",
473 | " 0.0 | \n",
474 | " 0.0 | \n",
475 | " 0.0 | \n",
476 | " 0.0 | \n",
477 | " 0.0 | \n",
478 | " 0.0 | \n",
479 | " 0.0 | \n",
480 | " 0.0 | \n",
481 | " 0.0 | \n",
482 | " ... | \n",
483 | " 0.0 | \n",
484 | " 0.0 | \n",
485 | " 0.0 | \n",
486 | " 0.0 | \n",
487 | " 0.0 | \n",
488 | " 0.0 | \n",
489 | " 0.0 | \n",
490 | " 0.0 | \n",
491 | " 0.0 | \n",
492 | " 0.0 | \n",
493 | "
\n",
494 | " \n",
495 | " | 3 | \n",
496 | " 0.0 | \n",
497 | " 0.0 | \n",
498 | " 0.0 | \n",
499 | " 0.0 | \n",
500 | " 0.0 | \n",
501 | " 0.0 | \n",
502 | " 0.0 | \n",
503 | " 0.0 | \n",
504 | " 0.0 | \n",
505 | " 0.0 | \n",
506 | " ... | \n",
507 | " 0.0 | \n",
508 | " 0.0 | \n",
509 | " 0.0 | \n",
510 | " 0.0 | \n",
511 | " 0.0 | \n",
512 | " 0.0 | \n",
513 | " 0.0 | \n",
514 | " 0.0 | \n",
515 | " 0.0 | \n",
516 | " 0.0 | \n",
517 | "
\n",
518 | " \n",
519 | " | 4 | \n",
520 | " 0.0 | \n",
521 | " 0.0 | \n",
522 | " 0.0 | \n",
523 | " 0.0 | \n",
524 | " 0.0 | \n",
525 | " 0.0 | \n",
526 | " 0.0 | \n",
527 | " 0.0 | \n",
528 | " 0.0 | \n",
529 | " 0.0 | \n",
530 | " ... | \n",
531 | " 0.0 | \n",
532 | " 0.0 | \n",
533 | " 0.0 | \n",
534 | " 0.0 | \n",
535 | " 0.0 | \n",
536 | " 0.0 | \n",
537 | " 0.0 | \n",
538 | " 0.0 | \n",
539 | " 0.0 | \n",
540 | " 0.0 | \n",
541 | "
\n",
542 | " \n",
543 | "
\n",
544 | "
5 rows × 70688 columns
\n",
545 | "
"
546 | ],
547 | "text/plain": [
548 | " 00 000 0000 00089765 00089805 000webhostapp 001 002 003 00353 \\\n",
549 | "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
550 | "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
551 | "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
552 | "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
553 | "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
554 | "\n",
555 | " ... õle øcreated ǁǁǁǁǁǁ ηadoop τrain τοοls чєαr filed financial \\\n",
556 | "0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
557 | "1 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
558 | "2 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
559 | "3 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
560 | "4 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
561 | "\n",
562 | " fixing \n",
563 | "0 0.0 \n",
564 | "1 0.0 \n",
565 | "2 0.0 \n",
566 | "3 0.0 \n",
567 | "4 0.0 \n",
568 | "\n",
569 | "[5 rows x 70688 columns]"
570 | ]
571 | },
572 | "execution_count": 49,
573 | "metadata": {},
574 | "output_type": "execute_result"
575 | }
576 | ],
577 | "source": [
578 | "test.head()"
579 | ]
580 | },
581 | {
582 | "cell_type": "markdown",
583 | "metadata": {},
584 | "source": [
585 | "### Creating my Stopword list\n",
586 | "#### As seen there are so many unwanted tokens like numbers, etc , I need to add them in \"stop words\" list to train model"
587 | ]
588 | },
589 | {
590 | "cell_type": "code",
591 | "execution_count": 50,
592 | "metadata": {},
593 | "outputs": [],
594 | "source": [
595 | "#getting list of all tokens\n",
596 | "word_list = test.columns.tolist()"
597 | ]
598 | },
599 | {
600 | "cell_type": "code",
601 | "execution_count": 51,
602 | "metadata": {},
603 | "outputs": [],
604 | "source": [
605 | "##Getting a list of unwanted words as s_words and adding to stopwords\n",
606 | "s_words =[]\n",
607 | "for word in word_list:\n",
608 | " #print(word)\n",
609 | " if re.search(\"^\\W|^\\d\",word):\n",
610 | " s_words.append(word)"
611 | ]
612 | },
613 | {
614 | "cell_type": "code",
615 | "execution_count": 52,
616 | "metadata": {},
617 | "outputs": [],
618 | "source": [
619 | "s_words.append('') \n",
620 | "from nltk.corpus import stopwords\n",
621 | "stopword_set = set(stopwords.words('english'))\n",
622 | "stopword_set = list(stopword_set)\n",
623 | "stopword_set.extend(s_words)"
624 | ]
625 | },
626 | {
627 | "cell_type": "code",
628 | "execution_count": 53,
629 | "metadata": {},
630 | "outputs": [],
631 | "source": [
632 | "def preprocess(text):\n",
633 | " stop_words = stopword_set\n",
634 | " #0. split words by whitespace\n",
635 | " text = text.split()\n",
636 | " \n",
637 | " \n",
638 | " # 1. lower case\n",
639 | " text = [word.lower() for word in text]\n",
640 | " \n",
641 | " # 2. remove punctuations\n",
642 | " punc_table = str.maketrans('','',string.punctuation)\n",
643 | " text = [word.translate(punc_table) for word in text]\n",
644 | " \n",
645 | " # 3. remove stop words\n",
646 | " text = [word for word in text if word not in stop_words]\n",
647 | " \n",
648 | " return text"
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": 54,
654 | "metadata": {},
655 | "outputs": [],
656 | "source": [
657 | "tokenized_doc = []\n",
658 | "doc = df_resume['resume_combo']\n",
659 | "#doc = docs_sample\n",
660 | "for d in doc:\n",
661 | " tokenized_doc.append(preprocess(d))\n",
662 | "#tokenized_doc"
663 | ]
664 | },
665 | {
666 | "cell_type": "code",
667 | "execution_count": 55,
668 | "metadata": {},
669 | "outputs": [],
670 | "source": [
671 | "# Convert tokenized document into gensim formated tagged data\n",
672 | "tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]"
673 | ]
674 | },
675 | {
676 | "cell_type": "code",
677 | "execution_count": 56,
678 | "metadata": {},
679 | "outputs": [
680 | {
681 | "data": {
682 | "text/plain": [
683 | "14428"
684 | ]
685 | },
686 | "execution_count": 56,
687 | "metadata": {},
688 | "output_type": "execute_result"
689 | }
690 | ],
691 | "source": [
692 | "num_doc = len(tagged_data)\n",
693 | "num_doc\n",
694 | "#confirm length (should be 14428)\n",
695 | "len(tokenized_doc)"
696 | ]
697 | },
698 | {
699 | "cell_type": "code",
700 | "execution_count": 58,
701 | "metadata": {},
702 | "outputs": [],
703 | "source": [
704 | "from gensim.test.utils import get_tmpfile\n",
705 | "from gensim.models.callbacks import CallbackAny2Vec\n",
706 | "\n",
707 | "class EpochSaver(CallbackAny2Vec):\n",
708 | "\n",
709 | " def __init__(self, path_prefix):\n",
710 | " self.path_prefix = path_prefix\n",
711 | " self.epoch = 0\n",
712 | "\n",
713 | " def on_epoch_end(self, model):\n",
714 | " output_path = get_tmpfile('{}_epoch{}.model'.format(self.path_prefix, self.epoch))\n",
715 | " model.save(output_path)\n",
716 | " self.epoch += 1"
717 | ]
718 | },
719 | {
720 | "cell_type": "code",
721 | "execution_count": 59,
722 | "metadata": {},
723 | "outputs": [],
724 | "source": [
725 | "class EpochLogger(CallbackAny2Vec):\n",
726 | " \n",
727 | " def __init__(self):\n",
728 | " self.epoch = 0\n",
729 | " \n",
730 | " def on_epoch_begin(self, model):\n",
731 | " print(\"Epoch #{} start\".format(self.epoch))\n",
732 | "\n",
733 | " def on_epoch_end(self, model):\n",
734 | " print(\"Epoch #{} end\".format(self.epoch))\n",
735 | " self.epoch += 1"
736 | ]
737 | },
738 | {
739 | "cell_type": "code",
740 | "execution_count": 61,
741 | "metadata": {},
742 | "outputs": [],
743 | "source": [
744 | "## Load saved doc2vec model\n",
745 | "model= Doc2Vec.load(\"Model/my_doc2vec.model\")"
746 | ]
747 | },
748 | {
749 | "cell_type": "code",
750 | "execution_count": 62,
751 | "metadata": {},
752 | "outputs": [],
753 | "source": [
754 | "## Get vector value\n",
755 | "vec = np.empty([14428,20])\n",
756 | "\n",
757 | "for k,i in enumerate(tokenized_doc):\n",
758 | " \n",
759 | " #print(i)\n",
760 | " vector = model.infer_vector(i)\n",
761 | " vec[k] = vector\n",
762 | " #vec = np.append(vector)\n",
763 | " #vecf = np.append(vec,vector)\n",
764 | "\n",
765 | "# reshape into 2D\n",
766 | "new_arr = np.reshape(vec,(-1,20))"
767 | ]
768 | },
769 | {
770 | "cell_type": "code",
771 | "execution_count": 64,
772 | "metadata": {},
773 | "outputs": [],
774 | "source": [
775 | "rng = range(1, 21)\n",
776 | "vec_df = pd.DataFrame(new_arr, columns=['vec_' + str(i) for i in rng])"
777 | ]
778 | },
779 | {
780 | "cell_type": "code",
781 | "execution_count": 65,
782 | "metadata": {},
783 | "outputs": [
784 | {
785 | "data": {
786 | "text/html": [
787 | "\n",
788 | "\n",
801 | "
\n",
802 | " \n",
803 | " \n",
804 | " | \n",
805 | " vec_1 | \n",
806 | " vec_2 | \n",
807 | " vec_3 | \n",
808 | " vec_4 | \n",
809 | " vec_5 | \n",
810 | " vec_6 | \n",
811 | " vec_7 | \n",
812 | " vec_8 | \n",
813 | " vec_9 | \n",
814 | " vec_10 | \n",
815 | " vec_11 | \n",
816 | " vec_12 | \n",
817 | " vec_13 | \n",
818 | " vec_14 | \n",
819 | " vec_15 | \n",
820 | " vec_16 | \n",
821 | " vec_17 | \n",
822 | " vec_18 | \n",
823 | " vec_19 | \n",
824 | " vec_20 | \n",
825 | "
\n",
826 | " \n",
827 | " \n",
828 | " \n",
829 | " | 0 | \n",
830 | " -3.145642 | \n",
831 | " -0.409380 | \n",
832 | " 0.701160 | \n",
833 | " -0.938745 | \n",
834 | " 0.585239 | \n",
835 | " 3.585946 | \n",
836 | " -0.120781 | \n",
837 | " 0.111222 | \n",
838 | " 1.644105 | \n",
839 | " 2.184981 | \n",
840 | " -2.117909 | \n",
841 | " -0.085430 | \n",
842 | " -2.877392 | \n",
843 | " 0.239383 | \n",
844 | " -1.582871 | \n",
845 | " 1.435642 | \n",
846 | " -1.051450 | \n",
847 | " 1.960831 | \n",
848 | " 1.786694 | \n",
849 | " -2.375981 | \n",
850 | "
\n",
851 | " \n",
852 | " | 1 | \n",
853 | " -0.786235 | \n",
854 | " -1.306011 | \n",
855 | " -1.383107 | \n",
856 | " -1.669708 | \n",
857 | " 0.832136 | \n",
858 | " 1.849790 | \n",
859 | " 0.178872 | \n",
860 | " -1.736894 | \n",
861 | " 0.741685 | \n",
862 | " 1.553933 | \n",
863 | " -2.916478 | \n",
864 | " -0.712572 | \n",
865 | " -0.502129 | \n",
866 | " -0.849293 | \n",
867 | " 0.435406 | \n",
868 | " 0.339330 | \n",
869 | " 0.060282 | \n",
870 | " -0.415035 | \n",
871 | " 3.203696 | \n",
872 | " -3.607635 | \n",
873 | "
\n",
874 | " \n",
875 | " | 2 | \n",
876 | " -2.747642 | \n",
877 | " -1.721797 | \n",
878 | " -0.910322 | \n",
879 | " -0.775950 | \n",
880 | " 1.472325 | \n",
881 | " 2.455998 | \n",
882 | " -0.852150 | \n",
883 | " -0.150517 | \n",
884 | " 0.844202 | \n",
885 | " 1.380623 | \n",
886 | " -1.018832 | \n",
887 | " 0.777981 | \n",
888 | " -1.977556 | \n",
889 | " 0.853214 | \n",
890 | " -1.281344 | \n",
891 | " 2.195391 | \n",
892 | " 0.800305 | \n",
893 | " 1.078035 | \n",
894 | " 2.166900 | \n",
895 | " -2.658121 | \n",
896 | "
\n",
897 | " \n",
898 | " | 3 | \n",
899 | " -1.771770 | \n",
900 | " -1.375850 | \n",
901 | " -0.475922 | \n",
902 | " -0.784473 | \n",
903 | " -0.377240 | \n",
904 | " 1.596389 | \n",
905 | " 1.094220 | \n",
906 | " -0.253642 | \n",
907 | " 0.468265 | \n",
908 | " 2.149588 | \n",
909 | " -1.234415 | \n",
910 | " 0.295536 | \n",
911 | " -2.615532 | \n",
912 | " 0.115959 | \n",
913 | " -2.044196 | \n",
914 | " -0.769109 | \n",
915 | " -0.716604 | \n",
916 | " 1.145388 | \n",
917 | " 3.452934 | \n",
918 | " -1.008162 | \n",
919 | "
\n",
920 | " \n",
921 | " | 4 | \n",
922 | " -0.097372 | \n",
923 | " -1.405603 | \n",
924 | " -0.801234 | \n",
925 | " -0.248921 | \n",
926 | " -0.376417 | \n",
927 | " -0.157050 | \n",
928 | " -0.290440 | \n",
929 | " -1.440582 | \n",
930 | " -0.169669 | \n",
931 | " 1.190537 | \n",
932 | " -0.291407 | \n",
933 | " -1.080500 | \n",
934 | " -2.950497 | \n",
935 | " 0.031693 | \n",
936 | " 0.119182 | \n",
937 | " -0.883555 | \n",
938 | " 0.178819 | \n",
939 | " -0.858324 | \n",
940 | " 1.239632 | \n",
941 | " -0.043914 | \n",
942 | "
\n",
943 | " \n",
944 | "
\n",
945 | "
"
946 | ],
947 | "text/plain": [
948 | " vec_1 vec_2 vec_3 vec_4 vec_5 vec_6 vec_7 \\\n",
949 | "0 -3.145642 -0.409380 0.701160 -0.938745 0.585239 3.585946 -0.120781 \n",
950 | "1 -0.786235 -1.306011 -1.383107 -1.669708 0.832136 1.849790 0.178872 \n",
951 | "2 -2.747642 -1.721797 -0.910322 -0.775950 1.472325 2.455998 -0.852150 \n",
952 | "3 -1.771770 -1.375850 -0.475922 -0.784473 -0.377240 1.596389 1.094220 \n",
953 | "4 -0.097372 -1.405603 -0.801234 -0.248921 -0.376417 -0.157050 -0.290440 \n",
954 | "\n",
955 | " vec_8 vec_9 vec_10 vec_11 vec_12 vec_13 vec_14 \\\n",
956 | "0 0.111222 1.644105 2.184981 -2.117909 -0.085430 -2.877392 0.239383 \n",
957 | "1 -1.736894 0.741685 1.553933 -2.916478 -0.712572 -0.502129 -0.849293 \n",
958 | "2 -0.150517 0.844202 1.380623 -1.018832 0.777981 -1.977556 0.853214 \n",
959 | "3 -0.253642 0.468265 2.149588 -1.234415 0.295536 -2.615532 0.115959 \n",
960 | "4 -1.440582 -0.169669 1.190537 -0.291407 -1.080500 -2.950497 0.031693 \n",
961 | "\n",
962 | " vec_15 vec_16 vec_17 vec_18 vec_19 vec_20 \n",
963 | "0 -1.582871 1.435642 -1.051450 1.960831 1.786694 -2.375981 \n",
964 | "1 0.435406 0.339330 0.060282 -0.415035 3.203696 -3.607635 \n",
965 | "2 -1.281344 2.195391 0.800305 1.078035 2.166900 -2.658121 \n",
966 | "3 -2.044196 -0.769109 -0.716604 1.145388 3.452934 -1.008162 \n",
967 | "4 0.119182 -0.883555 0.178819 -0.858324 1.239632 -0.043914 "
968 | ]
969 | },
970 | "execution_count": 65,
971 | "metadata": {},
972 | "output_type": "execute_result"
973 | }
974 | ],
975 | "source": [
976 | "vec_df.head(5)"
977 | ]
978 | },
979 | {
980 | "cell_type": "code",
981 | "execution_count": 66,
982 | "metadata": {},
983 | "outputs": [],
984 | "source": [
985 | "con_resume = pd.concat([resume, vec_df], axis=1)\n",
986 | "con_resume.to_csv('wip/con_resume.csv', index=False)"
987 | ]
988 | },
989 | {
990 | "cell_type": "code",
991 | "execution_count": 44,
992 | "metadata": {},
993 | "outputs": [],
994 | "source": [
995 | "#con_resume.info()"
996 | ]
997 | },
998 | {
999 | "cell_type": "code",
1000 | "execution_count": null,
1001 | "metadata": {},
1002 | "outputs": [],
1003 | "source": []
1004 | },
1005 | {
1006 | "cell_type": "code",
1007 | "execution_count": 49,
1008 | "metadata": {},
1009 | "outputs": [],
1010 | "source": [
1011 | "tokenized_doc = []\n",
1012 | "#doc = df_resume['resume_combo']\n",
1013 | "doc = docs_sample\n",
1014 | "for d in doc:\n",
1015 | " tokenized_doc.append(preprocess(d))\n",
1016 | "#tokenized_doc"
1017 | ]
1018 | },
1019 | {
1020 | "cell_type": "code",
1021 | "execution_count": 50,
1022 | "metadata": {},
1023 | "outputs": [],
1024 | "source": [
1025 | "# Convert tokenized document into gensim formated tagged data\n",
1026 | "tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]"
1027 | ]
1028 | },
1029 | {
1030 | "cell_type": "code",
1031 | "execution_count": 51,
1032 | "metadata": {},
1033 | "outputs": [
1034 | {
1035 | "data": {
1036 | "text/plain": [
1037 | "10"
1038 | ]
1039 | },
1040 | "execution_count": 51,
1041 | "metadata": {},
1042 | "output_type": "execute_result"
1043 | }
1044 | ],
1045 | "source": [
1046 | "num_doc = len(tagged_data)\n",
1047 | "num_doc\n",
1048 | "#confirm length (should be 38941)\n",
1049 | "len(tokenized_doc)"
1050 | ]
1051 | },
1052 | {
1053 | "cell_type": "code",
1054 | "execution_count": 58,
1055 | "metadata": {},
1056 | "outputs": [],
1057 | "source": [
1058 | "## Load saved doc2vec model\n",
1059 | "model= Doc2Vec.load(\"my_doc2vec.model\")\n",
1060 | "\n",
1061 | "## Get vector value\n",
1062 | "vec = np.empty([10,20])\n",
1063 | "\n",
1064 | "for k,i in enumerate(tokenized_doc):\n",
1065 | " \n",
1066 | " #print(i)\n",
1067 | " vector = model.infer_vector(i)\n",
1068 | " vec[k] = vector\n",
1069 | " #vec = np.append(vector)\n",
1070 | " #vecf = np.append(vec,vector)\n",
1071 | "\n",
1072 | "# reshape into 2D\n",
1073 | "new_arr = np.reshape(vec,(-1,20))"
1074 | ]
1075 | },
1076 | {
1077 | "cell_type": "code",
1078 | "execution_count": null,
1079 | "metadata": {},
1080 | "outputs": [],
1081 | "source": [
1082 | "test = np.array([[1,2,3],[4,5,6]])\n",
1083 | "test[0]"
1084 | ]
1085 | },
1086 | {
1087 | "cell_type": "code",
1088 | "execution_count": 61,
1089 | "metadata": {},
1090 | "outputs": [
1091 | {
1092 | "data": {
1093 | "text/plain": [
1094 | "array([-3.14492106, -0.41021681, 0.70149601, -0.93887955, 0.58496076,\n",
1095 | " 3.58589458, -0.12033088, 0.11019378, 1.64519656, 2.18371987,\n",
1096 | " -2.11720061, -0.08485675, -2.87654066, 0.24021174, -1.58367932,\n",
1097 | " 1.43522847, -1.05121636, 1.96061814, 1.78778028, -2.37729073])"
1098 | ]
1099 | },
1100 | "execution_count": 61,
1101 | "metadata": {},
1102 | "output_type": "execute_result"
1103 | }
1104 | ],
1105 | "source": [
1106 | "new_arr[0]"
1107 | ]
1108 | },
1109 | {
1110 | "cell_type": "code",
1111 | "execution_count": 62,
1112 | "metadata": {},
1113 | "outputs": [],
1114 | "source": [
1115 | "rng = range(1, 21)\n",
1116 | "vec_df = pd.DataFrame(new_arr, columns=['vec_' + str(i) for i in rng])"
1117 | ]
1118 | },
1119 | {
1120 | "cell_type": "code",
1121 | "execution_count": 63,
1122 | "metadata": {},
1123 | "outputs": [
1124 | {
1125 | "name": "stdout",
1126 | "output_type": "stream",
1127 | "text": [
1128 | "\n",
1129 | "RangeIndex: 10 entries, 0 to 9\n",
1130 | "Data columns (total 20 columns):\n",
1131 | " # Column Non-Null Count Dtype \n",
1132 | "--- ------ -------------- ----- \n",
1133 | " 0 vec_1 10 non-null float64\n",
1134 | " 1 vec_2 10 non-null float64\n",
1135 | " 2 vec_3 10 non-null float64\n",
1136 | " 3 vec_4 10 non-null float64\n",
1137 | " 4 vec_5 10 non-null float64\n",
1138 | " 5 vec_6 10 non-null float64\n",
1139 | " 6 vec_7 10 non-null float64\n",
1140 | " 7 vec_8 10 non-null float64\n",
1141 | " 8 vec_9 10 non-null float64\n",
1142 | " 9 vec_10 10 non-null float64\n",
1143 | " 10 vec_11 10 non-null float64\n",
1144 | " 11 vec_12 10 non-null float64\n",
1145 | " 12 vec_13 10 non-null float64\n",
1146 | " 13 vec_14 10 non-null float64\n",
1147 | " 14 vec_15 10 non-null float64\n",
1148 | " 15 vec_16 10 non-null float64\n",
1149 | " 16 vec_17 10 non-null float64\n",
1150 | " 17 vec_18 10 non-null float64\n",
1151 | " 18 vec_19 10 non-null float64\n",
1152 | " 19 vec_20 10 non-null float64\n",
1153 | "dtypes: float64(20)\n",
1154 | "memory usage: 1.7 KB\n"
1155 | ]
1156 | }
1157 | ],
1158 | "source": [
1159 | "vec_df.info()"
1160 | ]
1161 | },
1162 | {
1163 | "cell_type": "code",
1164 | "execution_count": 35,
1165 | "metadata": {},
1166 | "outputs": [],
1167 | "source": [
1168 | "r1.to_csv('test_r.csv',index=False)"
1169 | ]
1170 | },
1171 | {
1172 | "cell_type": "code",
1173 | "execution_count": 24,
1174 | "metadata": {},
1175 | "outputs": [],
1176 | "source": [
1177 | "r1 = resume.head(10)"
1178 | ]
1179 | },
1180 | {
1181 | "cell_type": "code",
1182 | "execution_count": 36,
1183 | "metadata": {
1184 | "scrolled": false
1185 | },
1186 | "outputs": [],
1187 | "source": [
1188 | "# read each work experience\n",
1189 | "resume['work_experiences'] = resume['work_experiences'].str.lower()\n",
1190 | "\n",
1191 | "resume_all_desc = []\n",
1192 | "for index, rows in resume.iterrows():\n",
1193 | " #print('#@#@#@#@#@@#@#@#@#@##@@#@#@@##@#@#@#@#@#@##@#@#@##@#@@#@#@#')\n",
1194 | " #print(f'resume no. {index}')\n",
1195 | " resume_desc= []\n",
1196 | " #pick work experience col and read it as JSON \n",
1197 | " \n",
1198 | " work = resume['work_experiences'][index]\n",
1199 | " try: result_work = eval(work)\n",
1200 | " except: continue\n",
1201 | " #print(f'resume : {index}')\n",
1202 | " #read description to match with job\n",
1203 | " \n",
1204 | " for i in result_work: \n",
1205 | " w_title_n = (result_work[0][0]['wtitle:']) \n",
1206 | " w_company= (result_work[i][1]['wcompany:'])\n",
1207 | "# resume_desc.append(w_company) \n",
1208 | " w_city= (result_work[i][2]['wcity:'])\n",
1209 | " w_state= (result_work[i][3]['wstate:'])\n",
1210 | " w_duration= (result_work[i][4]['wduration:'])\n",
1211 | " \n",
1212 | " w_descr= (result_work[i][5]['wdescr:'])\n",
1213 | " if (w_descr == 'none'):\n",
1214 | " continue\n",
1215 | " #print(w_descr)\n",
1216 | " #print('**************')\n",
1217 | " resume_desc.append(w_descr + '') \n",
1218 | " \n",
1219 | " #print(resume_desc)\n",
1220 | " resume_all_desc.append(resume_desc)\n",
1221 | "#print(resume_test)\n",
1222 | "resume['experience_desc'] = resume_all_desc"
1223 | ]
1224 | },
1225 | {
1226 | "cell_type": "code",
1227 | "execution_count": 37,
1228 | "metadata": {},
1229 | "outputs": [],
1230 | "source": [
1231 | "#resume.to_csv('wip/resume_sorted5.csv',index=False)"
1232 | ]
1233 | }
1234 | ],
1235 | "metadata": {
1236 | "kernelspec": {
1237 | "display_name": "Python 3",
1238 | "language": "python",
1239 | "name": "python3"
1240 | },
1241 | "language_info": {
1242 | "codemirror_mode": {
1243 | "name": "ipython",
1244 | "version": 3
1245 | },
1246 | "file_extension": ".py",
1247 | "mimetype": "text/x-python",
1248 | "name": "python",
1249 | "nbconvert_exporter": "python",
1250 | "pygments_lexer": "ipython3",
1251 | "version": "3.7.6"
1252 | }
1253 | },
1254 | "nbformat": 4,
1255 | "nbformat_minor": 4
1256 | }
1257 |
--------------------------------------------------------------------------------
/Data/Job-Locations/india-city-state.csv:
--------------------------------------------------------------------------------
1 | city_id,city_name,state
2 | 1,Kolhapur,Maharashtra
3 | 2,Port Blair,Andaman & Nicobar Islands
4 | 3,Adilabad,Andhra Pradesh
5 | 4,Adoni,Andhra Pradesh
6 | 5,Amadalavalasa,Andhra Pradesh
7 | 6,Amalapuram,Andhra Pradesh
8 | 7,Anakapalle,Andhra Pradesh
9 | 8,Anantapur,Andhra Pradesh
10 | 9,Badepalle,Andhra Pradesh
11 | 10,Banganapalle,Andhra Pradesh
12 | 11,Bapatla,Andhra Pradesh
13 | 12,Bellampalle,Andhra Pradesh
14 | 13,Bethamcherla,Andhra Pradesh
15 | 14,Bhadrachalam,Andhra Pradesh
16 | 15,Bhainsa,Andhra Pradesh
17 | 16,Bheemunipatnam,Andhra Pradesh
18 | 17,Bhimavaram,Andhra Pradesh
19 | 18,Bhongir,Andhra Pradesh
20 | 19,Bobbili,Andhra Pradesh
21 | 20,Bodhan,Andhra Pradesh
22 | 21,Chilakaluripet,Andhra Pradesh
23 | 22,Chirala,Andhra Pradesh
24 | 23,Chittoor,Andhra Pradesh
25 | 24,Cuddapah,Andhra Pradesh
26 | 25,Devarakonda,Andhra Pradesh
27 | 26,Dharmavaram,Andhra Pradesh
28 | 27,Eluru,Andhra Pradesh
29 | 28,Farooqnagar,Andhra Pradesh
30 | 29,Gadwal,Andhra Pradesh
31 | 30,Gooty,Andhra Pradesh
32 | 31,Gudivada,Andhra Pradesh
33 | 32,Gudur,Andhra Pradesh
34 | 33,Guntakal,Andhra Pradesh
35 | 34,Guntur,Andhra Pradesh
36 | 35,Hanuman Junction,Andhra Pradesh
37 | 36,Hindupur,Andhra Pradesh
38 | 37,Hyderabad,Andhra Pradesh
39 | 38,Ichchapuram,Andhra Pradesh
40 | 39,Jaggaiahpet,Andhra Pradesh
41 | 40,Jagtial,Andhra Pradesh
42 | 41,Jammalamadugu,Andhra Pradesh
43 | 42,Jangaon,Andhra Pradesh
44 | 43,Kadapa,Andhra Pradesh
45 | 44,Kadiri,Andhra Pradesh
46 | 45,Kagaznagar,Andhra Pradesh
47 | 46,Kakinada,Andhra Pradesh
48 | 47,Kalyandurg,Andhra Pradesh
49 | 48,Kamareddy,Andhra Pradesh
50 | 49,Kandukur,Andhra Pradesh
51 | 50,Karimnagar,Andhra Pradesh
52 | 51,Kavali,Andhra Pradesh
53 | 52,Khammam,Andhra Pradesh
54 | 53,Koratla,Andhra Pradesh
55 | 54,Kothagudem,Andhra Pradesh
56 | 55,Kothapeta,Andhra Pradesh
57 | 56,Kovvur,Andhra Pradesh
58 | 57,Kurnool,Andhra Pradesh
59 | 58,Kyathampalle,Andhra Pradesh
60 | 59,Macherla,Andhra Pradesh
61 | 60,Machilipatnam,Andhra Pradesh
62 | 61,Madanapalle,Andhra Pradesh
63 | 62,Mahbubnagar,Andhra Pradesh
64 | 63,Mancherial,Andhra Pradesh
65 | 64,Mandamarri,Andhra Pradesh
66 | 65,Mandapeta,Andhra Pradesh
67 | 66,Manuguru,Andhra Pradesh
68 | 67,Markapur,Andhra Pradesh
69 | 68,Medak,Andhra Pradesh
70 | 69,Miryalaguda,Andhra Pradesh
71 | 70,Mogalthur,Andhra Pradesh
72 | 71,Nagari,Andhra Pradesh
73 | 72,Nagarkurnool,Andhra Pradesh
74 | 73,Nandyal,Andhra Pradesh
75 | 74,Narasapur,Andhra Pradesh
76 | 75,Narasaraopet,Andhra Pradesh
77 | 76,Narayanpet,Andhra Pradesh
78 | 77,Narsipatnam,Andhra Pradesh
79 | 78,Nellore,Andhra Pradesh
80 | 79,Nidadavole,Andhra Pradesh
81 | 80,Nirmal,Andhra Pradesh
82 | 81,Nizamabad,Andhra Pradesh
83 | 82,Nuzvid,Andhra Pradesh
84 | 83,Ongole,Andhra Pradesh
85 | 84,Palacole,Andhra Pradesh
86 | 85,Palasa Kasibugga,Andhra Pradesh
87 | 86,Palwancha,Andhra Pradesh
88 | 87,Parvathipuram,Andhra Pradesh
89 | 88,Pedana,Andhra Pradesh
90 | 89,Peddapuram,Andhra Pradesh
91 | 90,Pithapuram,Andhra Pradesh
92 | 91,Pondur,Andhra pradesh
93 | 92,Ponnur,Andhra Pradesh
94 | 93,Proddatur,Andhra Pradesh
95 | 94,Punganur,Andhra Pradesh
96 | 95,Puttur,Andhra Pradesh
97 | 96,Rajahmundry,Andhra Pradesh
98 | 97,Rajam,Andhra Pradesh
99 | 98,Ramachandrapuram,Andhra Pradesh
100 | 99,Ramagundam,Andhra Pradesh
101 | 100,Rayachoti,Andhra Pradesh
102 | 101,Rayadurg,Andhra Pradesh
103 | 102,Renigunta,Andhra Pradesh
104 | 103,Repalle,Andhra Pradesh
105 | 104,Sadasivpet,Andhra Pradesh
106 | 105,Salur,Andhra Pradesh
107 | 106,Samalkot,Andhra Pradesh
108 | 107,Sangareddy,Andhra Pradesh
109 | 108,Sattenapalle,Andhra Pradesh
110 | 109,Siddipet,Andhra Pradesh
111 | 110,Singapur,Andhra Pradesh
112 | 111,Sircilla,Andhra Pradesh
113 | 112,Srikakulam,Andhra Pradesh
114 | 113,Srikalahasti,Andhra Pradesh
115 | 115,Suryapet,Andhra Pradesh
116 | 116,Tadepalligudem,Andhra Pradesh
117 | 117,Tadpatri,Andhra Pradesh
118 | 118,Tandur,Andhra Pradesh
119 | 119,Tanuku,Andhra Pradesh
120 | 120,Tenali,Andhra Pradesh
121 | 121,Tirupati,Andhra Pradesh
122 | 122,Tuni,Andhra Pradesh
123 | 123,Uravakonda,Andhra Pradesh
124 | 124,Venkatagiri,Andhra Pradesh
125 | 125,Vicarabad,Andhra Pradesh
126 | 126,Vijayawada,Andhra Pradesh
127 | 127,Vinukonda,Andhra Pradesh
128 | 128,Visakhapatnam,Andhra Pradesh
129 | 129,Vizianagaram,Andhra Pradesh
130 | 130,Wanaparthy,Andhra Pradesh
131 | 131,Warangal,Andhra Pradesh
132 | 132,Yellandu,Andhra Pradesh
133 | 133,Yemmiganur,Andhra Pradesh
134 | 134,Yerraguntla,Andhra Pradesh
135 | 135,Zahirabad,Andhra Pradesh
136 | 136,Rajampet,Andhra Pradesh
137 | 137,Along,Arunachal Pradesh
138 | 138,Bomdila,Arunachal Pradesh
139 | 139,Itanagar,Arunachal Pradesh
140 | 140,Naharlagun,Arunachal Pradesh
141 | 141,Pasighat,Arunachal Pradesh
142 | 142,Abhayapuri,Assam
143 | 143,Amguri,Assam
144 | 144,Anandnagaar,Assam
145 | 145,Barpeta,Assam
146 | 146,Barpeta Road,Assam
147 | 147,Bilasipara,Assam
148 | 148,Bongaigaon,Assam
149 | 149,Dhekiajuli,Assam
150 | 150,Dhubri,Assam
151 | 151,Dibrugarh,Assam
152 | 152,Digboi,Assam
153 | 153,Diphu,Assam
154 | 154,Dispur,Assam
155 | 156,Gauripur,Assam
156 | 157,Goalpara,Assam
157 | 158,Golaghat,Assam
158 | 159,Guwahati,Assam
159 | 160,Haflong,Assam
160 | 161,Hailakandi,Assam
161 | 162,Hojai,Assam
162 | 163,Jorhat,Assam
163 | 164,Karimganj,Assam
164 | 165,Kokrajhar,Assam
165 | 166,Lanka,Assam
166 | 167,Lumding,Assam
167 | 168,Mangaldoi,Assam
168 | 169,Mankachar,Assam
169 | 170,Margherita,Assam
170 | 171,Mariani,Assam
171 | 172,Marigaon,Assam
172 | 173,Nagaon,Assam
173 | 174,Nalbari,Assam
174 | 175,North Lakhimpur,Assam
175 | 176,Rangia,Assam
176 | 177,Sibsagar,Assam
177 | 178,Silapathar,Assam
178 | 179,Silchar,Assam
179 | 180,Tezpur,Assam
180 | 181,Tinsukia,Assam
181 | 182,Amarpur,Bihar
182 | 183,Araria,Bihar
183 | 184,Areraj,Bihar
184 | 185,Arrah,Bihar
185 | 186,Asarganj,Bihar
186 | 187,Aurangabad,Bihar
187 | 188,Bagaha,Bihar
188 | 189,Bahadurganj,Bihar
189 | 190,Bairgania,Bihar
190 | 191,Bakhtiarpur,Bihar
191 | 192,Banka,Bihar
192 | 193,Banmankhi Bazar,Bihar
193 | 194,Barahiya,Bihar
194 | 195,Barauli,Bihar
195 | 196,Barbigha,Bihar
196 | 197,Barh,Bihar
197 | 198,Begusarai,Bihar
198 | 199,Behea,Bihar
199 | 200,Bettiah,Bihar
200 | 201,Bhabua,Bihar
201 | 202,Bhagalpur,Bihar
202 | 203,Bihar Sharif,Bihar
203 | 204,Bikramganj,Bihar
204 | 205,Bodh Gaya,Bihar
205 | 206,Buxar,Bihar
206 | 207,Chandan Bara,Bihar
207 | 208,Chanpatia,Bihar
208 | 209,Chhapra,Bihar
209 | 210,Colgong,Bihar
210 | 211,Dalsinghsarai,Bihar
211 | 212,Darbhanga,Bihar
212 | 213,Daudnagar,Bihar
213 | 214,Dehri-on-Sone,Bihar
214 | 215,Dhaka,Bihar
215 | 216,Dighwara,Bihar
216 | 217,Dumraon,Bihar
217 | 218,Fatwah,Bihar
218 | 219,Forbesganj,Bihar
219 | 220,Gaya,Bihar
220 | 221,Gogri Jamalpur,Bihar
221 | 222,Gopalganj,Bihar
222 | 223,Hajipur,Bihar
223 | 224,Hilsa,Bihar
224 | 225,Hisua,Bihar
225 | 226,Islampur,Bihar
226 | 227,Jagdispur,Bihar
227 | 228,Jamalpur,Bihar
228 | 229,Jamui,Bihar
229 | 230,Jehanabad,Bihar
230 | 231,Jhajha,Bihar
231 | 232,Jhanjharpur,Bihar
232 | 233,Jogabani,Bihar
233 | 234,Kanti,Bihar
234 | 235,Katihar,Bihar
235 | 236,Khagaria,Bihar
236 | 237,Kharagpur,Bihar
237 | 238,Kishanganj,Bihar
238 | 239,Lakhisarai,Bihar
239 | 240,Lalganj,Bihar
240 | 241,Madhepura,Bihar
241 | 242,Madhubani,Bihar
242 | 243,Maharajganj,Bihar
243 | 244,Mahnar Bazar,Bihar
244 | 245,Makhdumpur,Bihar
245 | 246,Maner,Bihar
246 | 247,Manihari,Bihar
247 | 248,Marhaura,Bihar
248 | 249,Masaurhi,Bihar
249 | 250,Mirganj,Bihar
250 | 251,Mokameh,Bihar
251 | 252,Motihari,Bihar
252 | 253,Motipur,Bihar
253 | 254,Munger,Bihar
254 | 255,Murliganj,Bihar
255 | 256,Muzaffarpur,Bihar
256 | 257,Narkatiaganj,Bihar
257 | 258,Naugachhia,Bihar
258 | 259,Nawada,Bihar
259 | 260,Nokha,Bihar
260 | 261,Patna,Bihar
261 | 262,Piro,Bihar
262 | 263,Purnia,Bihar
263 | 264,Rafiganj,Bihar
264 | 265,Rajgir,Bihar
265 | 266,Ramnagar,Bihar
266 | 267,Raxaul Bazar,Bihar
267 | 268,Revelganj,Bihar
268 | 269,Rosera,Bihar
269 | 270,Saharsa,Bihar
270 | 271,Samastipur,Bihar
271 | 272,Sasaram,Bihar
272 | 273,Sheikhpura,Bihar
273 | 274,Sheohar,Bihar
274 | 275,Sherghati,Bihar
275 | 276,Silao,Bihar
276 | 277,Sitamarhi,Bihar
277 | 278,Siwan,Bihar
278 | 279,Sonepur,Bihar
279 | 280,Sugauli,Bihar
280 | 281,Sultanganj,Bihar
281 | 282,Supaul,Bihar
282 | 283,Warisaliganj,Bihar
283 | 284,Ahiwara,Chhattisgarh
284 | 285,Akaltara,Chhattisgarh
285 | 286,Ambagarh Chowki,Chhattisgarh
286 | 287,Ambikapur,Chhattisgarh
287 | 288,Arang,Chhattisgarh
288 | 289,Bade Bacheli,Chhattisgarh
289 | 290,Balod,Chhattisgarh
290 | 291,Baloda Bazar,Chhattisgarh
291 | 292,Bemetra,Chhattisgarh
292 | 293,Bhatapara,Chhattisgarh
293 | 294,Bilaspur,Chhattisgarh
294 | 295,Birgaon,Chhattisgarh
295 | 296,Champa,Chhattisgarh
296 | 297,Chirmiri,Chhattisgarh
297 | 298,Dalli-Rajhara,Chhattisgarh
298 | 299,Dhamtari,Chhattisgarh
299 | 300,Dipka,Chhattisgarh
300 | 301,Dongargarh,Chhattisgarh
301 | 302,Durg-Bhilai Nagar,Chhattisgarh
302 | 303,Gobranawapara,Chhattisgarh
303 | 304,Jagdalpur,Chhattisgarh
304 | 305,Janjgir,Chhattisgarh
305 | 306,Jashpurnagar,Chhattisgarh
306 | 307,Kanker,Chhattisgarh
307 | 308,Kawardha,Chhattisgarh
308 | 309,Kondagaon,Chhattisgarh
309 | 310,Korba,Chhattisgarh
310 | 311,Mahasamund,Chhattisgarh
311 | 312,Mahendragarh,Chhattisgarh
312 | 313,Mungeli,Chhattisgarh
313 | 314,Naila Janjgir,Chhattisgarh
314 | 315,Raigarh,Chhattisgarh
315 | 316,Raipur,Chhattisgarh
316 | 317,Rajnandgaon,Chhattisgarh
317 | 318,Sakti,Chhattisgarh
318 | 319,Tilda Newra,Chhattisgarh
319 | 320,Amli,Dadra & Nagar Haveli
320 | 321,Silvassa,Dadra & Nagar Haveli
321 | 322,Daman and Diu,Daman & Diu
322 | 323,Daman and Diu,Daman & Diu
323 | 324,Asola,Delhi
324 | 325,Delhi,Delhi
325 | 326,Aldona,Goa
326 | 327,Curchorem Cacora,Goa
327 | 328,Madgaon,Goa
328 | 329,Mapusa,Goa
329 | 330,Margao,Goa
330 | 331,Marmagao,Goa
331 | 332,Panaji,Goa
332 | 333,Ahmedabad,Gujarat
333 | 334,Amreli,Gujarat
334 | 335,Anand,Gujarat
335 | 336,Ankleshwar,Gujarat
336 | 337,Bharuch,Gujarat
337 | 338,Bhavnagar,Gujarat
338 | 339,Bhuj,Gujarat
339 | 340,Cambay,Gujarat
340 | 341,Dahod,Gujarat
341 | 342,Deesa,Gujarat
342 | 343,Dharampur, India
343 | 344,Dholka,Gujarat
344 | 345,Gandhinagar,Gujarat
345 | 346,Godhra,Gujarat
346 | 347,Himatnagar,Gujarat
347 | 348,Idar,Gujarat
348 | 349,Jamnagar,Gujarat
349 | 350,Junagadh,Gujarat
350 | 351,Kadi,Gujarat
351 | 352,Kalavad,Gujarat
352 | 353,Kalol,Gujarat
353 | 354,Kapadvanj,Gujarat
354 | 355,Karjan,Gujarat
355 | 356,Keshod,Gujarat
356 | 357,Khambhalia,Gujarat
357 | 358,Khambhat,Gujarat
358 | 359,Kheda,Gujarat
359 | 360,Khedbrahma,Gujarat
360 | 361,Kheralu,Gujarat
361 | 362,Kodinar,Gujarat
362 | 363,Lathi,Gujarat
363 | 364,Limbdi,Gujarat
364 | 365,Lunawada,Gujarat
365 | 366,Mahesana,Gujarat
366 | 367,Mahuva,Gujarat
367 | 368,Manavadar,Gujarat
368 | 369,Mandvi,Gujarat
369 | 370,Mangrol,Gujarat
370 | 371,Mansa,Gujarat
371 | 372,Mehmedabad,Gujarat
372 | 373,Modasa,Gujarat
373 | 374,Morvi,Gujarat
374 | 375,Nadiad,Gujarat
375 | 376,Navsari,Gujarat
376 | 377,Padra,Gujarat
377 | 378,Palanpur,Gujarat
378 | 379,Palitana,Gujarat
379 | 380,Pardi,Gujarat
380 | 381,Patan,Gujarat
381 | 382,Petlad,Gujarat
382 | 383,Porbandar,Gujarat
383 | 384,Radhanpur,Gujarat
384 | 385,Rajkot,Gujarat
385 | 386,Rajpipla,Gujarat
386 | 387,Rajula,Gujarat
387 | 388,Ranavav,Gujarat
388 | 389,Rapar,Gujarat
389 | 390,Salaya,Gujarat
390 | 391,Sanand,Gujarat
391 | 392,Savarkundla,Gujarat
392 | 393,Sidhpur,Gujarat
393 | 394,Sihor,Gujarat
394 | 395,Songadh,Gujarat
395 | 396,Surat,Gujarat
396 | 397,Talaja,Gujarat
397 | 398,Thangadh,Gujarat
398 | 399,Tharad,Gujarat
399 | 400,Umbergaon,Gujarat
400 | 401,Umreth,Gujarat
401 | 402,Una,Gujarat
402 | 403,Unjha,Gujarat
403 | 404,Upleta,Gujarat
404 | 405,Vadnagar,Gujarat
405 | 406,Vadodara,Gujarat
406 | 407,Valsad,Gujarat
407 | 408,Vapi,Gujarat
408 | 409,Vapi,Gujarat
409 | 410,Veraval,Gujarat
410 | 411,Vijapur,Gujarat
411 | 412,Viramgam,Gujarat
412 | 413,Visnagar,Gujarat
413 | 414,Vyara,Gujarat
414 | 415,Wadhwan,Gujarat
415 | 416,Wankaner,Gujarat
416 | 417,Adalaj,Gujrat
417 | 418,Adityana,Gujrat
418 | 419,Alang,Gujrat
419 | 420,Ambaji,Gujrat
420 | 421,Ambaliyasan,Gujrat
421 | 422,Andada,Gujrat
422 | 423,Anjar,Gujrat
423 | 424,Anklav,Gujrat
424 | 425,Antaliya,Gujrat
425 | 426,Arambhada,Gujrat
426 | 427,Atul,Gujrat
427 | 428,Ballabhgarh,Hariyana
428 | 429,Ambala,Haryana
429 | 430,Ambala,Haryana
430 | 431,Asankhurd,Haryana
431 | 432,Assandh,Haryana
432 | 433,Ateli,Haryana
433 | 434,Babiyal,Haryana
434 | 435,Bahadurgarh,Haryana
435 | 436,Barwala,Haryana
436 | 437,Bhiwani,Haryana
437 | 438,Charkhi Dadri,Haryana
438 | 439,Cheeka,Haryana
439 | 440,Ellenabad 2,Haryana
440 | 441,Faridabad,Haryana
441 | 442,Fatehabad,Haryana
442 | 443,Ganaur,Haryana
443 | 444,Gharaunda,Haryana
444 | 445,Gohana,Haryana
445 | 446,Gurgaon,Haryana
446 | 447,Haibat(Yamuna Nagar),Haryana
447 | 448,Hansi,Haryana
448 | 449,Hisar,Haryana
449 | 450,Hodal,Haryana
450 | 451,Jhajjar,Haryana
451 | 452,Jind,Haryana
452 | 453,Kaithal,Haryana
453 | 454,Kalan Wali,Haryana
454 | 455,Kalka,Haryana
455 | 456,Karnal,Haryana
456 | 457,Ladwa,Haryana
457 | 458,Mahendragarh,Haryana
458 | 459,Mandi Dabwali,Haryana
459 | 460,Narnaul,Haryana
460 | 461,Narwana,Haryana
461 | 462,Palwal,Haryana
462 | 463,Panchkula,Haryana
463 | 464,Panipat,Haryana
464 | 465,Pehowa,Haryana
465 | 466,Pinjore,Haryana
466 | 467,Rania,Haryana
467 | 468,Ratia,Haryana
468 | 469,Rewari,Haryana
469 | 470,Rohtak,Haryana
470 | 471,Safidon,Haryana
471 | 472,Samalkha,Haryana
472 | 473,Shahbad,Haryana
473 | 474,Sirsa,Haryana
474 | 475,Sohna,Haryana
475 | 476,Sonipat,Haryana
476 | 477,Taraori,Haryana
477 | 478,Thanesar,Haryana
478 | 479,Tohana,Haryana
479 | 480,Yamunanagar,Haryana
480 | 481,Arki,Himachal Pradesh
481 | 482,Baddi,Himachal Pradesh
482 | 483,Bilaspur,Himachal Pradesh
483 | 484,Chamba,Himachal Pradesh
484 | 485,Dalhousie,Himachal Pradesh
485 | 486,Dharamsala,Himachal Pradesh
486 | 487,Hamirpur,Himachal Pradesh
487 | 488,Mandi,Himachal Pradesh
488 | 489,Nahan,Himachal Pradesh
489 | 490,Shimla,Himachal Pradesh
490 | 491,Solan,Himachal Pradesh
491 | 492,Sundarnagar,Himachal Pradesh
492 | 493,Jammu,Jammu & Kashmir
493 | 494,Achabbal,Jammu & Kashmir
494 | 495,Akhnoor,Jammu & Kashmir
495 | 496,Anantnag,Jammu & Kashmir
496 | 497,Arnia,Jammu & Kashmir
497 | 498,Awantipora,Jammu & Kashmir
498 | 499,Bandipore,Jammu & Kashmir
499 | 500,Baramula,Jammu & Kashmir
500 | 501,Kathua,Jammu & Kashmir
501 | 502,Leh,Jammu & Kashmir
502 | 503,Punch,Jammu & Kashmir
503 | 504,Rajauri,Jammu & Kashmir
504 | 505,Sopore,Jammu & Kashmir
505 | 506,Srinagar,Jammu & Kashmir
506 | 507,Udhampur,Jammu & Kashmir
507 | 508,Amlabad,Jharkhand
508 | 509,Ara,Jharkhand
509 | 510,Barughutu,Jharkhand
510 | 511,Bokaro Steel City,Jharkhand
511 | 512,Chaibasa,Jharkhand
512 | 513,Chakradharpur,Jharkhand
513 | 514,Chandrapura,Jharkhand
514 | 515,Chatra,Jharkhand
515 | 516,Chirkunda,Jharkhand
516 | 517,Churi,Jharkhand
517 | 518,Daltonganj,Jharkhand
518 | 519,Deoghar,Jharkhand
519 | 520,Dhanbad,Jharkhand
520 | 521,Dumka,Jharkhand
521 | 522,Garhwa,Jharkhand
522 | 523,Ghatshila,Jharkhand
523 | 524,Giridih,Jharkhand
524 | 525,Godda,Jharkhand
525 | 526,Gomoh,Jharkhand
526 | 527,Gumia,Jharkhand
527 | 528,Gumla,Jharkhand
528 | 529,Hazaribag,Jharkhand
529 | 530,Hussainabad,Jharkhand
530 | 531,Jamshedpur,Jharkhand
531 | 532,Jamtara,Jharkhand
532 | 533,Jhumri Tilaiya,Jharkhand
533 | 534,Khunti,Jharkhand
534 | 535,Lohardaga,Jharkhand
535 | 536,Madhupur,Jharkhand
536 | 537,Mihijam,Jharkhand
537 | 538,Musabani,Jharkhand
538 | 539,Pakaur,Jharkhand
539 | 540,Patratu,Jharkhand
540 | 541,Phusro,Jharkhand
541 | 542,Ramngarh,Jharkhand
542 | 543,Ranchi,Jharkhand
543 | 544,Sahibganj,Jharkhand
544 | 545,Saunda,Jharkhand
545 | 546,Simdega,Jharkhand
546 | 547,Tenu Dam-cum- Kathhara,Jharkhand
547 | 548,Arasikere,Karnataka
548 | 549,Bangalore,Karnataka
549 | 550,Belgaum,Karnataka
550 | 551,Bellary,Karnataka
551 | 552,Chamrajnagar,Karnataka
552 | 553,Chikkaballapur,Karnataka
553 | 554,Chintamani,Karnataka
554 | 555,Chitradurga,Karnataka
555 | 556,Gulbarga,Karnataka
556 | 557,Gundlupet,Karnataka
557 | 558,Hassan,Karnataka
558 | 559,Hospet,Karnataka
559 | 560,Hubli,Karnataka
560 | 561,Karkala,Karnataka
561 | 562,Karwar,Karnataka
562 | 563,Kolar,Karnataka
563 | 564,Kota,Karnataka
564 | 565,Lakshmeshwar,Karnataka
565 | 566,Lingsugur,Karnataka
566 | 567,Maddur,Karnataka
567 | 568,Madhugiri,Karnataka
568 | 569,Madikeri,Karnataka
569 | 570,Magadi,Karnataka
570 | 571,Mahalingpur,Karnataka
571 | 572,Malavalli,Karnataka
572 | 573,Malur,Karnataka
573 | 574,Mandya,Karnataka
574 | 575,Mangalore,Karnataka
575 | 576,Manvi,Karnataka
576 | 577,Mudalgi,Karnataka
577 | 578,Mudbidri,Karnataka
578 | 579,Muddebihal,Karnataka
579 | 580,Mudhol,Karnataka
580 | 581,Mulbagal,Karnataka
581 | 582,Mundargi,Karnataka
582 | 583,Mysore,Karnataka
583 | 584,Nanjangud,Karnataka
584 | 585,Pavagada,Karnataka
585 | 586,Puttur,Karnataka
586 | 587,Rabkavi Banhatti,Karnataka
587 | 588,Raichur,Karnataka
588 | 589,Ramanagaram,Karnataka
589 | 590,Ramdurg,Karnataka
590 | 591,Ranibennur,Karnataka
591 | 592,Robertson Pet,Karnataka
592 | 593,Ron,Karnataka
593 | 594,Sadalgi,Karnataka
594 | 595,Sagar,Karnataka
595 | 596,Sakleshpur,Karnataka
596 | 597,Sandur,Karnataka
597 | 598,Sankeshwar,Karnataka
598 | 599,Saundatti-Yellamma,Karnataka
599 | 600,Savanur,Karnataka
600 | 601,Sedam,Karnataka
601 | 602,Shahabad,Karnataka
602 | 603,Shahpur,Karnataka
603 | 604,Shiggaon,Karnataka
604 | 605,Shikapur,Karnataka
605 | 606,Shimoga,Karnataka
606 | 607,Shorapur,Karnataka
607 | 608,Shrirangapattana,Karnataka
608 | 609,Sidlaghatta,Karnataka
609 | 610,Sindgi,Karnataka
610 | 611,Sindhnur,Karnataka
611 | 612,Sira,Karnataka
612 | 613,Sirsi,Karnataka
613 | 614,Siruguppa,Karnataka
614 | 615,Srinivaspur,Karnataka
615 | 616,Talikota,Karnataka
616 | 617,Tarikere,Karnataka
617 | 618,Tekkalakota,Karnataka
618 | 619,Terdal,Karnataka
619 | 620,Tiptur,Karnataka
620 | 621,Tumkur,Karnataka
621 | 622,Udupi,Karnataka
622 | 623,Vijayapura,Karnataka
623 | 624,Wadi,Karnataka
624 | 625,Yadgir,Karnataka
625 | 626,Adoor,Kerala
626 | 627,Akathiyoor,Kerala
627 | 628,Alappuzha,Kerala
628 | 629,Ancharakandy,Kerala
629 | 630,Aroor,Kerala
630 | 631,Ashtamichira,Kerala
631 | 632,Attingal,Kerala
632 | 633,Avinissery,Kerala
633 | 634,Chalakudy,Kerala
634 | 635,Changanassery,Kerala
635 | 636,Chendamangalam,Kerala
636 | 637,Chengannur,Kerala
637 | 638,Cherthala,Kerala
638 | 639,Cheruthazham,Kerala
639 | 640,Chittur-Thathamangalam,Kerala
640 | 641,Chockli,Kerala
641 | 642,Erattupetta,Kerala
642 | 643,Guruvayoor,Kerala
643 | 644,Irinjalakuda,Kerala
644 | 645,Kadirur,Kerala
645 | 646,Kalliasseri,Kerala
646 | 647,Kalpetta,Kerala
647 | 648,Kanhangad,Kerala
648 | 649,Kanjikkuzhi,Kerala
649 | 650,Kannur,Kerala
650 | 651,Kasaragod,Kerala
651 | 652,Kayamkulam,Kerala
652 | 653,Kochi,Kerala
653 | 654,Kodungallur,Kerala
654 | 655,Kollam,Kerala
655 | 656,Koothuparamba,Kerala
656 | 657,Kothamangalam,Kerala
657 | 658,Kottayam,Kerala
658 | 659,Kozhikode,Kerala
659 | 660,Kunnamkulam,Kerala
660 | 661,Malappuram,Kerala
661 | 662,Mattannur,Kerala
662 | 663,Mavelikkara,Kerala
663 | 664,Mavoor,Kerala
664 | 665,Muvattupuzha,Kerala
665 | 666,Nedumangad,Kerala
666 | 667,Neyyattinkara,Kerala
667 | 668,Ottappalam,Kerala
668 | 669,Palai,Kerala
669 | 670,Palakkad,Kerala
670 | 671,Panniyannur,Kerala
671 | 672,Pappinisseri,Kerala
672 | 673,Paravoor,Kerala
673 | 674,Pathanamthitta,Kerala
674 | 675,Payyannur,Kerala
675 | 676,Peringathur,Kerala
676 | 677,Perinthalmanna,Kerala
677 | 678,Perumbavoor,Kerala
678 | 679,Ponnani,Kerala
679 | 680,Punalur,Kerala
680 | 681,Quilandy,Kerala
681 | 682,Shoranur,Kerala
682 | 683,Taliparamba,Kerala
683 | 684,Thiruvalla,Kerala
684 | 685,Thiruvananthapuram,Kerala
685 | 686,Thodupuzha,Kerala
686 | 687,Thrissur,Kerala
687 | 688,Tirur,Kerala
688 | 689,Vadakara,Kerala
689 | 690,Vaikom,Kerala
690 | 691,Varkala,Kerala
691 | 692,Kavaratti,Lakshadweep
692 | 693,Ashok Nagar,Madhya Pradesh
693 | 694,Balaghat,Madhya Pradesh
694 | 695,Betul,Madhya Pradesh
695 | 696,Bhopal,Madhya Pradesh
696 | 697,Burhanpur,Madhya Pradesh
697 | 698,Chhatarpur,Madhya Pradesh
698 | 699,Dabra,Madhya Pradesh
699 | 700,Datia,Madhya Pradesh
700 | 701,Dewas,Madhya Pradesh
701 | 702,Dhar,Madhya Pradesh
702 | 703,Fatehabad,Madhya Pradesh
703 | 704,Gwalior,Madhya Pradesh
704 | 705,Indore,Madhya Pradesh
705 | 706,Itarsi,Madhya Pradesh
706 | 707,Jabalpur,Madhya Pradesh
707 | 708,Katni,Madhya Pradesh
708 | 709,Kotma,Madhya Pradesh
709 | 710,Lahar,Madhya Pradesh
710 | 711,Lundi,Madhya Pradesh
711 | 712,Maharajpur,Madhya Pradesh
712 | 713,Mahidpur,Madhya Pradesh
713 | 714,Maihar,Madhya Pradesh
714 | 715,Malajkhand,Madhya Pradesh
715 | 716,Manasa,Madhya Pradesh
716 | 717,Manawar,Madhya Pradesh
717 | 718,Mandideep,Madhya Pradesh
718 | 719,Mandla,Madhya Pradesh
719 | 720,Mandsaur,Madhya Pradesh
720 | 721,Mauganj,Madhya Pradesh
721 | 722,Mhow Cantonment,Madhya Pradesh
722 | 723,Mhowgaon,Madhya Pradesh
723 | 724,Morena,Madhya Pradesh
724 | 725,Multai,Madhya Pradesh
725 | 726,Murwara,Madhya Pradesh
726 | 727,Nagda,Madhya Pradesh
727 | 728,Nainpur,Madhya Pradesh
728 | 729,Narsinghgarh,Madhya Pradesh
729 | 730,Narsinghgarh,Madhya Pradesh
730 | 731,Neemuch,Madhya Pradesh
731 | 732,Nepanagar,Madhya Pradesh
732 | 733,Niwari,Madhya Pradesh
733 | 734,Nowgong,Madhya Pradesh
734 | 735,Nowrozabad,Madhya Pradesh
735 | 736,Pachore,Madhya Pradesh
736 | 737,Pali,Madhya Pradesh
737 | 738,Panagar,Madhya Pradesh
738 | 739,Pandhurna,Madhya Pradesh
739 | 740,Panna,Madhya Pradesh
740 | 741,Pasan,Madhya Pradesh
741 | 742,Pipariya,Madhya Pradesh
742 | 743,Pithampur,Madhya Pradesh
743 | 744,Porsa,Madhya Pradesh
744 | 745,Prithvipur,Madhya Pradesh
745 | 746,Raghogarh-Vijaypur,Madhya Pradesh
746 | 747,Rahatgarh,Madhya Pradesh
747 | 748,Raisen,Madhya Pradesh
748 | 749,Rajgarh,Madhya Pradesh
749 | 750,Ratlam,Madhya Pradesh
750 | 751,Rau,Madhya Pradesh
751 | 752,Rehli,Madhya Pradesh
752 | 753,Rewa,Madhya Pradesh
753 | 754,Sabalgarh,Madhya Pradesh
754 | 755,Sagar,Madhya Pradesh
755 | 756,Sanawad,Madhya Pradesh
756 | 757,Sarangpur,Madhya Pradesh
757 | 758,Sarni,Madhya Pradesh
758 | 759,Satna,Madhya Pradesh
759 | 760,Sausar,Madhya Pradesh
760 | 761,Sehore,Madhya Pradesh
761 | 762,Sendhwa,Madhya Pradesh
762 | 763,Seoni,Madhya Pradesh
763 | 764,Seoni-Malwa,Madhya Pradesh
764 | 765,Shahdol,Madhya Pradesh
765 | 766,Shajapur,Madhya Pradesh
766 | 767,Shamgarh,Madhya Pradesh
767 | 768,Sheopur,Madhya Pradesh
768 | 769,Shivpuri,Madhya Pradesh
769 | 770,Shujalpur,Madhya Pradesh
770 | 771,Sidhi,Madhya Pradesh
771 | 772,Sihora,Madhya Pradesh
772 | 773,Singrauli,Madhya Pradesh
773 | 774,Sironj,Madhya Pradesh
774 | 775,Sohagpur,Madhya Pradesh
775 | 776,Tarana,Madhya Pradesh
776 | 777,Tikamgarh,Madhya Pradesh
777 | 778,Ujhani,Madhya Pradesh
778 | 779,Ujjain,Madhya Pradesh
779 | 780,Umaria,Madhya Pradesh
780 | 781,Vidisha,Madhya Pradesh
781 | 782,Wara Seoni,Madhya Pradesh
782 | 783,Ahmednagar,Maharashtra
783 | 784,Akola,Maharashtra
784 | 785,Amravati,Maharashtra
785 | 786,Aurangabad,Maharashtra
786 | 787,Baramati,Maharashtra
787 | 788,Chalisgaon,Maharashtra
788 | 789,Chinchani,Maharashtra
789 | 790,Devgarh,Maharashtra
790 | 791,Dhule,Maharashtra
791 | 792,Dombivli,Maharashtra
792 | 793,Durgapur,Maharashtra
793 | 794,Ichalkaranji,Maharashtra
794 | 795,Jalna,Maharashtra
795 | 796,Kalyan,Maharashtra
796 | 797,Latur,Maharashtra
797 | 798,Loha,Maharashtra
798 | 799,Lonar,Maharashtra
799 | 800,Lonavla,Maharashtra
800 | 801,Mahad,Maharashtra
801 | 802,Mahuli,Maharashtra
802 | 803,Malegaon,Maharashtra
803 | 804,Malkapur,Maharashtra
804 | 805,Manchar,Maharashtra
805 | 806,Mangalvedhe,Maharashtra
806 | 807,Mangrulpir,Maharashtra
807 | 808,Manjlegaon,Maharashtra
808 | 809,Manmad,Maharashtra
809 | 810,Manwath,Maharashtra
810 | 811,Mehkar,Maharashtra
811 | 812,Mhaswad,Maharashtra
812 | 813,Miraj,Maharashtra
813 | 814,Morshi,Maharashtra
814 | 815,Mukhed,Maharashtra
815 | 816,Mul,Maharashtra
816 | 817,Mumbai,Maharashtra
817 | 818,Murtijapur,Maharashtra
818 | 819,Nagpur,Maharashtra
819 | 820,Nalasopara,Maharashtra
820 | 821,Nanded-Waghala,Maharashtra
821 | 822,Nandgaon,Maharashtra
822 | 823,Nandura,Maharashtra
823 | 824,Nandurbar,Maharashtra
824 | 825,Narkhed,Maharashtra
825 | 826,Nashik,Maharashtra
826 | 827,Navi Mumbai,Maharashtra
827 | 828,Nawapur,Maharashtra
828 | 829,Nilanga,Maharashtra
829 | 830,Osmanabad,Maharashtra
830 | 831,Ozar,Maharashtra
831 | 832,Pachora,Maharashtra
832 | 833,Paithan,Maharashtra
833 | 834,Palghar,Maharashtra
834 | 835,Pandharkaoda,Maharashtra
835 | 836,Pandharpur,Maharashtra
836 | 837,Panvel,Maharashtra
837 | 838,Parbhani,Maharashtra
838 | 839,Parli,Maharashtra
839 | 840,Parola,Maharashtra
840 | 841,Partur,Maharashtra
841 | 842,Pathardi,Maharashtra
842 | 843,Pathri,Maharashtra
843 | 844,Patur,Maharashtra
844 | 845,Pauni,Maharashtra
845 | 846,Pen,Maharashtra
846 | 847,Phaltan,Maharashtra
847 | 848,Pulgaon,Maharashtra
848 | 849,Pune,Maharashtra
849 | 850,Purna,Maharashtra
850 | 851,Pusad,Maharashtra
851 | 852,Rahuri,Maharashtra
852 | 853,Rajura,Maharashtra
853 | 854,Ramtek,Maharashtra
854 | 855,Ratnagiri,Maharashtra
855 | 856,Raver,Maharashtra
856 | 857,Risod,Maharashtra
857 | 858,Sailu,Maharashtra
858 | 859,Sangamner,Maharashtra
859 | 860,Sangli,Maharashtra
860 | 861,Sangole,Maharashtra
861 | 862,Sasvad,Maharashtra
862 | 863,Satana,Maharashtra
863 | 864,Satara,Maharashtra
864 | 865,Savner,Maharashtra
865 | 866,Sawantwadi,Maharashtra
866 | 867,Shahade,Maharashtra
867 | 868,Shegaon,Maharashtra
868 | 869,Shendurjana,Maharashtra
869 | 870,Shirdi,Maharashtra
870 | 871,Shirpur-Warwade,Maharashtra
871 | 872,Shirur,Maharashtra
872 | 873,Shrigonda,Maharashtra
873 | 874,Shrirampur,Maharashtra
874 | 875,Sillod,Maharashtra
875 | 876,Sinnar,Maharashtra
876 | 877,Solapur,Maharashtra
877 | 878,Soyagaon,Maharashtra
878 | 879,Talegaon Dabhade,Maharashtra
879 | 880,Talode,Maharashtra
880 | 881,Tasgaon,Maharashtra
881 | 882,Tirora,Maharashtra
882 | 883,Tuljapur,Maharashtra
883 | 884,Tumsar,Maharashtra
884 | 885,Uran,Maharashtra
885 | 886,Uran Islampur,Maharashtra
886 | 887,Wadgaon Road,Maharashtra
887 | 888,Wai,Maharashtra
888 | 889,Wani,Maharashtra
889 | 890,Wardha,Maharashtra
890 | 891,Warora,Maharashtra
891 | 892,Warud,Maharashtra
892 | 893,Washim,Maharashtra
893 | 894,Yevla,Maharashtra
894 | 895,Uchgaon,Maharashtra
895 | 896,Udgir,Maharashtra
896 | 897,Umarga,Maharastra
897 | 898,Umarkhed,Maharastra
898 | 899,Umred,Maharastra
899 | 900,Vadgaon Kasba,Maharastra
900 | 901,Vaijapur,Maharastra
901 | 902,Vasai,Maharastra
902 | 903,Virar,Maharastra
903 | 904,Vita,Maharastra
904 | 905,Yavatmal,Maharastra
905 | 906,Yawal,Maharastra
906 | 907,Imphal,Manipur
907 | 908,Kakching,Manipur
908 | 909,Lilong,Manipur
909 | 910,Mayang Imphal,Manipur
910 | 911,Thoubal,Manipur
911 | 912,Jowai,Meghalaya
912 | 913,Nongstoin,Meghalaya
913 | 914,Shillong,Meghalaya
914 | 915,Tura,Meghalaya
915 | 916,Aizawl,Mizoram
916 | 917,Champhai,Mizoram
917 | 918,Lunglei,Mizoram
918 | 919,Saiha,Mizoram
919 | 920,Dimapur,Nagaland
920 | 921,Kohima,Nagaland
921 | 922,Mokokchung,Nagaland
922 | 923,Tuensang,Nagaland
923 | 924,Wokha,Nagaland
924 | 925,Zunheboto,Nagaland
925 | 950,Anandapur,Orissa
926 | 951,Anugul,Orissa
927 | 952,Asika,Orissa
928 | 953,Balangir,Orissa
929 | 954,Balasore,Orissa
930 | 955,Baleshwar,Orissa
931 | 956,Bamra,Orissa
932 | 957,Barbil,Orissa
933 | 958,Bargarh,Orissa
934 | 959,Bargarh,Orissa
935 | 960,Baripada,Orissa
936 | 961,Basudebpur,Orissa
937 | 962,Belpahar,Orissa
938 | 963,Bhadrak,Orissa
939 | 964,Bhawanipatna,Orissa
940 | 965,Bhuban,Orissa
941 | 966,Bhubaneswar,Orissa
942 | 967,Biramitrapur,Orissa
943 | 968,Brahmapur,Orissa
944 | 969,Brajrajnagar,Orissa
945 | 970,Byasanagar,Orissa
946 | 971,Cuttack,Orissa
947 | 972,Debagarh,Orissa
948 | 973,Dhenkanal,Orissa
949 | 974,Gunupur,Orissa
950 | 975,Hinjilicut,Orissa
951 | 976,Jagatsinghapur,Orissa
952 | 977,Jajapur,Orissa
953 | 978,Jaleswar,Orissa
954 | 979,Jatani,Orissa
955 | 980,Jeypur,Orissa
956 | 981,Jharsuguda,Orissa
957 | 982,Joda,Orissa
958 | 983,Kantabanji,Orissa
959 | 984,Karanjia,Orissa
960 | 985,Kendrapara,Orissa
961 | 986,Kendujhar,Orissa
962 | 987,Khordha,Orissa
963 | 988,Koraput,Orissa
964 | 989,Malkangiri,Orissa
965 | 990,Nabarangapur,Orissa
966 | 991,Paradip,Orissa
967 | 992,Parlakhemundi,Orissa
968 | 993,Pattamundai,Orissa
969 | 994,Phulabani,Orissa
970 | 995,Puri,Orissa
971 | 996,Rairangpur,Orissa
972 | 997,Rajagangapur,Orissa
973 | 998,Raurkela,Orissa
974 | 999,Rayagada,Orissa
975 | 1000,Sambalpur,Orissa
976 | 1001,Soro,Orissa
977 | 1002,Sunabeda,Orissa
978 | 1003,Sundargarh,Orissa
979 | 1004,Talcher,Orissa
980 | 1005,Titlagarh,Orissa
981 | 1006,Umarkote,Orissa
982 | 1007,Karaikal,Pondicherry
983 | 1008,Mahe,Pondicherry
984 | 1009,Pondicherry,Pondicherry
985 | 1010,Yanam,Pondicherry
986 | 1011,Ahmedgarh,Punjab
987 | 1012,Amritsar,Punjab
988 | 1013,Barnala,Punjab
989 | 1014,Batala,Punjab
990 | 1015,Bathinda,Punjab
991 | 1016,Bhagha Purana,Punjab
992 | 1017,Budhlada,Punjab
993 | 1018,Chandigarh,Punjab
994 | 1019,Dasua,Punjab
995 | 1020,Dhuri,Punjab
996 | 1021,Dinanagar,Punjab
997 | 1022,Faridkot,Punjab
998 | 1023,Fazilka,Punjab
999 | 1024,Firozpur,Punjab
1000 | 1025,Firozpur Cantt.,Punjab
1001 | 1026,Giddarbaha,Punjab
1002 | 1027,Gobindgarh,Punjab
1003 | 1028,Gurdaspur,Punjab
1004 | 1029,Hoshiarpur,Punjab
1005 | 1030,Jagraon,Punjab
1006 | 1031,Jaitu,Punjab
1007 | 1032,Jalalabad,Punjab
1008 | 1033,Jalandhar,Punjab
1009 | 1034,Jalandhar Cantt.,Punjab
1010 | 1035,Jandiala,Punjab
1011 | 1036,Kapurthala,Punjab
1012 | 1037,Karoran,Punjab
1013 | 1038,Kartarpur,Punjab
1014 | 1039,Khanna,Punjab
1015 | 1040,Kharar,Punjab
1016 | 1041,Kot Kapura,Punjab
1017 | 1042,Kurali,Punjab
1018 | 1043,Longowal,Punjab
1019 | 1044,Ludhiana,Punjab
1020 | 1045,Malerkotla,Punjab
1021 | 1046,Malout,Punjab
1022 | 1047,Mansa,Punjab
1023 | 1048,Maur,Punjab
1024 | 1049,Moga,Punjab
1025 | 1050,Mohali,Punjab
1026 | 1051,Morinda,Punjab
1027 | 1052,Mukerian,Punjab
1028 | 1053,Muktsar,Punjab
1029 | 1054,Nabha,Punjab
1030 | 1055,Nakodar,Punjab
1031 | 1056,Nangal,Punjab
1032 | 1057,Nawanshahr,Punjab
1033 | 1058,Pathankot,Punjab
1034 | 1059,Patiala,Punjab
1035 | 1060,Patran,Punjab
1036 | 1061,Patti,Punjab
1037 | 1062,Phagwara,Punjab
1038 | 1063,Phillaur,Punjab
1039 | 1064,Qadian,Punjab
1040 | 1065,Raikot,Punjab
1041 | 1066,Rajpura,Punjab
1042 | 1067,Rampura Phul,Punjab
1043 | 1068,Rupnagar,Punjab
1044 | 1069,Samana,Punjab
1045 | 1070,Sangrur,Punjab
1046 | 1071,Sirhind Fatehgarh Sahib,Punjab
1047 | 1072,Sujanpur,Punjab
1048 | 1073,Sunam,Punjab
1049 | 1074,Talwara,Punjab
1050 | 1075,Tarn Taran,Punjab
1051 | 1076,Urmar Tanda,Punjab
1052 | 1077,Zira,Punjab
1053 | 1078,Zirakpur,Punjab
1054 | 1079,Bali,Rajasthan
1055 | 1080,Banswara,Rajastan
1056 | 1081,Ajmer,Rajasthan
1057 | 1082,Alwar,Rajasthan
1058 | 1083,Bandikui,Rajasthan
1059 | 1084,Baran,Rajasthan
1060 | 1085,Barmer,Rajasthan
1061 | 1086,Bikaner,Rajasthan
1062 | 1087,Fatehpur,Rajasthan
1063 | 1088,Jaipur,Rajasthan
1064 | 1089,Jaisalmer,Rajasthan
1065 | 1090,Jodhpur,Rajasthan
1066 | 1091,Kota,Rajasthan
1067 | 1092,Lachhmangarh,Rajasthan
1068 | 1093,Ladnu,Rajasthan
1069 | 1094,Lakheri,Rajasthan
1070 | 1095,Lalsot,Rajasthan
1071 | 1096,Losal,Rajasthan
1072 | 1097,Makrana,Rajasthan
1073 | 1098,Malpura,Rajasthan
1074 | 1099,Mandalgarh,Rajasthan
1075 | 1100,Mandawa,Rajasthan
1076 | 1101,Mangrol,Rajasthan
1077 | 1102,Merta City,Rajasthan
1078 | 1103,Mount Abu,Rajasthan
1079 | 1104,Nadbai,Rajasthan
1080 | 1105,Nagar,Rajasthan
1081 | 1106,Nagaur,Rajasthan
1082 | 1107,Nargund,Rajasthan
1083 | 1108,Nasirabad,Rajasthan
1084 | 1109,Nathdwara,Rajasthan
1085 | 1110,Navalgund,Rajasthan
1086 | 1111,Nawalgarh,Rajasthan
1087 | 1112,Neem-Ka-Thana,Rajasthan
1088 | 1113,Nelamangala,Rajasthan
1089 | 1114,Nimbahera,Rajasthan
1090 | 1115,Nipani,Rajasthan
1091 | 1116,Niwai,Rajasthan
1092 | 1117,Nohar,Rajasthan
1093 | 1118,Nokha,Rajasthan
1094 | 1119,Pali,Rajasthan
1095 | 1120,Phalodi,Rajasthan
1096 | 1121,Phulera,Rajasthan
1097 | 1122,Pilani,Rajasthan
1098 | 1123,Pilibanga,Rajasthan
1099 | 1124,Pindwara,Rajasthan
1100 | 1125,Pipar City,Rajasthan
1101 | 1126,Prantij,Rajasthan
1102 | 1127,Pratapgarh,Rajasthan
1103 | 1128,Raisinghnagar,Rajasthan
1104 | 1129,Rajakhera,Rajasthan
1105 | 1130,Rajaldesar,Rajasthan
1106 | 1131,Rajgarh (Alwar),Rajasthan
1107 | 1132,Rajgarh (Churu,Rajasthan
1108 | 1133,Rajsamand,Rajasthan
1109 | 1134,Ramganj Mandi,Rajasthan
1110 | 1135,Ramngarh,Rajasthan
1111 | 1136,Ratangarh,Rajasthan
1112 | 1137,Rawatbhata,Rajasthan
1113 | 1138,Rawatsar,Rajasthan
1114 | 1139,Reengus,Rajasthan
1115 | 1140,Sadri,Rajasthan
1116 | 1141,Sadulshahar,Rajasthan
1117 | 1142,Sagwara,Rajasthan
1118 | 1143,Sambhar,Rajasthan
1119 | 1144,Sanchore,Rajasthan
1120 | 1145,Sangaria,Rajasthan
1121 | 1146,Sardarshahar,Rajasthan
1122 | 1147,Sawai Madhopur,Rajasthan
1123 | 1148,Shahpura,Rajasthan
1124 | 1149,Shahpura,Rajasthan
1125 | 1150,Sheoganj,Rajasthan
1126 | 1151,Sikar,Rajasthan
1127 | 1152,Sirohi,Rajasthan
1128 | 1153,Sojat,Rajasthan
1129 | 1154,Sri Madhopur,Rajasthan
1130 | 1155,Sujangarh,Rajasthan
1131 | 1156,Sumerpur,Rajasthan
1132 | 1157,Suratgarh,Rajasthan
1133 | 1158,Taranagar,Rajasthan
1134 | 1159,Todabhim,Rajasthan
1135 | 1160,Todaraisingh,Rajasthan
1136 | 1161,Tonk,Rajasthan
1137 | 1162,Udaipur,Rajasthan
1138 | 1163,Udaipurwati,Rajasthan
1139 | 1164,Vijainagar,Rajasthan
1140 | 1165,Gangtok,Sikkim
1141 | 1166,Calcutta,West Bengal
1142 | 1167,Arakkonam,Tamil Nadu
1143 | 1168,Arcot,Tamil Nadu
1144 | 1169,Aruppukkottai,Tamil Nadu
1145 | 1170,Bhavani,Tamil Nadu
1146 | 1171,Chengalpattu,Tamil Nadu
1147 | 1172,Chennai,Tamil Nadu
1148 | 1173,Chinna salem,Tamil nadu
1149 | 1174,Coimbatore,Tamil Nadu
1150 | 1175,Coonoor,Tamil Nadu
1151 | 1176,Cuddalore,Tamil Nadu
1152 | 1177,Dharmapuri,Tamil Nadu
1153 | 1178,Dindigul,Tamil Nadu
1154 | 1179,Erode,Tamil Nadu
1155 | 1180,Gudalur,Tamil Nadu
1156 | 1181,Gudalur,Tamil Nadu
1157 | 1182,Gudalur,Tamil Nadu
1158 | 1183,Kanchipuram,Tamil Nadu
1159 | 1184,Karaikudi,Tamil Nadu
1160 | 1185,Karungal,Tamil Nadu
1161 | 1186,Karur,Tamil Nadu
1162 | 1187,Kollankodu,Tamil Nadu
1163 | 1188,Lalgudi,Tamil Nadu
1164 | 1189,Madurai,Tamil Nadu
1165 | 1190,Nagapattinam,Tamil Nadu
1166 | 1191,Nagercoil,Tamil Nadu
1167 | 1192,Namagiripettai,Tamil Nadu
1168 | 1193,Namakkal,Tamil Nadu
1169 | 1194,Nandivaram-Guduvancheri,Tamil Nadu
1170 | 1195,Nanjikottai,Tamil Nadu
1171 | 1196,Natham,Tamil Nadu
1172 | 1197,Nellikuppam,Tamil Nadu
1173 | 1198,Neyveli,Tamil Nadu
1174 | 1199,O,
1175 | 1200,Oddanchatram,Tamil Nadu
1176 | 1201,P.N.Patti,Tamil Nadu
1177 | 1202,Pacode,Tamil Nadu
1178 | 1203,Padmanabhapuram,Tamil Nadu
1179 | 1204,Palani,Tamil Nadu
1180 | 1205,Palladam,Tamil Nadu
1181 | 1206,Pallapatti,Tamil Nadu
1182 | 1207,Pallikonda,Tamil Nadu
1183 | 1208,Panagudi,Tamil Nadu
1184 | 1209,Panruti,Tamil Nadu
1185 | 1210,Paramakudi,Tamil Nadu
1186 | 1211,Parangipettai,Tamil Nadu
1187 | 1212,Pattukkottai,Tamil Nadu
1188 | 1213,Perambalur,Tamil Nadu
1189 | 1214,Peravurani,Tamil Nadu
1190 | 1215,Periyakulam,Tamil Nadu
1191 | 1216,Periyasemur,Tamil Nadu
1192 | 1217,Pernampattu,Tamil Nadu
1193 | 1218,Pollachi,Tamil Nadu
1194 | 1219,Polur,Tamil Nadu
1195 | 1220,Ponneri,Tamil Nadu
1196 | 1221,Pudukkottai,Tamil Nadu
1197 | 1222,Pudupattinam,Tamil Nadu
1198 | 1223,Puliyankudi,Tamil Nadu
1199 | 1224,Punjaipugalur,Tamil Nadu
1200 | 1225,Rajapalayam,Tamil Nadu
1201 | 1226,Ramanathapuram,Tamil Nadu
1202 | 1227,Rameshwaram,Tamil Nadu
1203 | 1228,Rasipuram,Tamil Nadu
1204 | 1229,Salem,Tamil Nadu
1205 | 1230,Sankarankoil,Tamil Nadu
1206 | 1231,Sankari,Tamil Nadu
1207 | 1232,Sathyamangalam,Tamil Nadu
1208 | 1233,Sattur,Tamil Nadu
1209 | 1234,Shenkottai,Tamil Nadu
1210 | 1235,Sholavandan,Tamil Nadu
1211 | 1236,Sholingur,Tamil Nadu
1212 | 1237,Sirkali,Tamil Nadu
1213 | 1238,Sivaganga,Tamil Nadu
1214 | 1239,Sivagiri,Tamil Nadu
1215 | 1240,Sivakasi,Tamil Nadu
1216 | 1241,Srivilliputhur,Tamil Nadu
1217 | 1242,Surandai,Tamil Nadu
1218 | 1243,Suriyampalayam,Tamil Nadu
1219 | 1244,Tenkasi,Tamil Nadu
1220 | 1245,Thammampatti,Tamil Nadu
1221 | 1246,Thanjavur,Tamil Nadu
1222 | 1247,Tharamangalam,Tamil Nadu
1223 | 1248,Tharangambadi,Tamil Nadu
1224 | 1249,Theni Allinagaram,Tamil Nadu
1225 | 1250,Thirumangalam,Tamil Nadu
1226 | 1251,Thirunindravur,Tamil Nadu
1227 | 1252,Thiruparappu,Tamil Nadu
1228 | 1253,Thirupuvanam,Tamil Nadu
1229 | 1254,Thiruthuraipoondi,Tamil Nadu
1230 | 1255,Thiruvallur,Tamil Nadu
1231 | 1256,Thiruvarur,Tamil Nadu
1232 | 1257,Thoothukudi,Tamil Nadu
1233 | 1258,Thuraiyur,Tamil Nadu
1234 | 1259,Tindivanam,Tamil Nadu
1235 | 1260,Tiruchendur,Tamil Nadu
1236 | 1261,Tiruchengode,Tamil Nadu
1237 | 1262,Tiruchirappalli,Tamil Nadu
1238 | 1263,Tirukalukundram,Tamil Nadu
1239 | 1264,Tirukkoyilur,Tamil Nadu
1240 | 1265,Tirunelveli,Tamil Nadu
1241 | 1266,Tirupathur,Tamil Nadu
1242 | 1267,Tirupathur,Tamil Nadu
1243 | 1268,Tiruppur,Tamil Nadu
1244 | 1269,Tiruttani,Tamil Nadu
1245 | 1270,Tiruvannamalai,Tamil Nadu
1246 | 1271,Tiruvethipuram,Tamil Nadu
1247 | 1272,Tittakudi,Tamil Nadu
1248 | 1273,Udhagamandalam,Tamil Nadu
1249 | 1274,Udumalaipettai,Tamil Nadu
1250 | 1275,Unnamalaikadai,Tamil Nadu
1251 | 1276,Usilampatti,Tamil Nadu
1252 | 1277,Uthamapalayam,Tamil Nadu
1253 | 1278,Uthiramerur,Tamil Nadu
1254 | 1279,Vadakkuvalliyur,Tamil Nadu
1255 | 1280,Vadalur,Tamil Nadu
1256 | 1281,Vadipatti,Tamil Nadu
1257 | 1282,Valparai,Tamil Nadu
1258 | 1283,Vandavasi,Tamil Nadu
1259 | 1284,Vaniyambadi,Tamil Nadu
1260 | 1285,Vedaranyam,Tamil Nadu
1261 | 1286,Vellakoil,Tamil Nadu
1262 | 1287,Vellore,Tamil Nadu
1263 | 1288,Vikramasingapuram,Tamil Nadu
1264 | 1289,Viluppuram,Tamil Nadu
1265 | 1290,Virudhachalam,Tamil Nadu
1266 | 1291,Virudhunagar,Tamil Nadu
1267 | 1292,Viswanatham,Tamil Nadu
1268 | 1293,Agartala,Tripura
1269 | 1294,Badharghat,Tripura
1270 | 1295,Dharmanagar,Tripura
1271 | 1296,Indranagar,Tripura
1272 | 1297,Jogendranagar,Tripura
1273 | 1298,Kailasahar,Tripura
1274 | 1299,Khowai,Tripura
1275 | 1300,Pratapgarh,Tripura
1276 | 1301,Udaipur,Tripura
1277 | 1302,Achhnera,Uttar Pradesh
1278 | 1303,Adari,Uttar Pradesh
1279 | 1304,Agra,Uttar Pradesh
1280 | 1305,Aligarh,Uttar Pradesh
1281 | 1306,Allahabad,Uttar Pradesh
1282 | 1307,Amroha,Uttar Pradesh
1283 | 1308,Azamgarh,Uttar Pradesh
1284 | 1309,Bahraich,Uttar Pradesh
1285 | 1310,Ballia,Uttar Pradesh
1286 | 1311,Balrampur,Uttar Pradesh
1287 | 1312,Banda,Uttar Pradesh
1288 | 1313,Bareilly,Uttar Pradesh
1289 | 1314,Chandausi,Uttar Pradesh
1290 | 1315,Dadri,Uttar Pradesh
1291 | 1316,Deoria,Uttar Pradesh
1292 | 1317,Etawah,Uttar Pradesh
1293 | 1318,Fatehabad,Uttar Pradesh
1294 | 1319,Fatehpur,Uttar Pradesh
1295 | 1320,Fatehpur,Uttar Pradesh
1296 | 1321,Greater Noida,Uttar Pradesh
1297 | 1322,Hamirpur,Uttar Pradesh
1298 | 1323,Hardoi,Uttar Pradesh
1299 | 1324,Jajmau,Uttar Pradesh
1300 | 1325,Jaunpur,Uttar Pradesh
1301 | 1326,Jhansi,Uttar Pradesh
1302 | 1327,Kalpi,Uttar Pradesh
1303 | 1328,Kanpur,Uttar Pradesh
1304 | 1329,Kota,Uttar Pradesh
1305 | 1330,Laharpur,Uttar Pradesh
1306 | 1331,Lakhimpur,Uttar Pradesh
1307 | 1332,Lal Gopalganj Nindaura,Uttar Pradesh
1308 | 1333,Lalganj,Uttar Pradesh
1309 | 1334,Lalitpur,Uttar Pradesh
1310 | 1335,Lar,Uttar Pradesh
1311 | 1336,Loni,Uttar Pradesh
1312 | 1337,Lucknow,Uttar Pradesh
1313 | 1338,Mathura,Uttar Pradesh
1314 | 1339,Meerut,Uttar Pradesh
1315 | 1340,Modinagar,Uttar Pradesh
1316 | 1341,Muradnagar,Uttar Pradesh
1317 | 1342,Nagina,Uttar Pradesh
1318 | 1343,Najibabad,Uttar Pradesh
1319 | 1344,Nakur,Uttar Pradesh
1320 | 1345,Nanpara,Uttar Pradesh
1321 | 1346,Naraura,Uttar Pradesh
1322 | 1347,Naugawan Sadat,Uttar Pradesh
1323 | 1348,Nautanwa,Uttar Pradesh
1324 | 1349,Nawabganj,Uttar Pradesh
1325 | 1350,Nehtaur,Uttar Pradesh
1326 | 1351,NOIDA,Uttar Pradesh
1327 | 1352,Noorpur,Uttar Pradesh
1328 | 1353,Obra,Uttar Pradesh
1329 | 1354,Orai,Uttar Pradesh
1330 | 1355,Padrauna,Uttar Pradesh
1331 | 1356,Palia Kalan,Uttar Pradesh
1332 | 1357,Parasi,Uttar Pradesh
1333 | 1358,Phulpur,Uttar Pradesh
1334 | 1359,Pihani,Uttar Pradesh
1335 | 1360,Pilibhit,Uttar Pradesh
1336 | 1361,Pilkhuwa,Uttar Pradesh
1337 | 1362,Powayan,Uttar Pradesh
1338 | 1363,Pukhrayan,Uttar Pradesh
1339 | 1364,Puranpur,Uttar Pradesh
1340 | 1365,Purquazi,Uttar Pradesh
1341 | 1366,Purwa,Uttar Pradesh
1342 | 1367,Rae Bareli,Uttar Pradesh
1343 | 1368,Rampur,Uttar Pradesh
1344 | 1369,Rampur Maniharan,Uttar Pradesh
1345 | 1370,Rasra,Uttar Pradesh
1346 | 1371,Rath,Uttar Pradesh
1347 | 1372,Renukoot,Uttar Pradesh
1348 | 1373,Reoti,Uttar Pradesh
1349 | 1374,Robertsganj,Uttar Pradesh
1350 | 1375,Rudauli,Uttar Pradesh
1351 | 1376,Rudrapur,Uttar Pradesh
1352 | 1377,Sadabad,Uttar Pradesh
1353 | 1378,Safipur,Uttar Pradesh
1354 | 1379,Saharanpur,Uttar Pradesh
1355 | 1380,Sahaspur,Uttar Pradesh
1356 | 1381,Sahaswan,Uttar Pradesh
1357 | 1382,Sahawar,Uttar Pradesh
1358 | 1383,Sahjanwa,Uttar Pradesh
1359 | 1384,Saidpur, Ghazipur
1360 | 1385,Sambhal,Uttar Pradesh
1361 | 1386,Samdhan,Uttar Pradesh
1362 | 1387,Samthar,Uttar Pradesh
1363 | 1388,Sandi,Uttar Pradesh
1364 | 1389,Sandila,Uttar Pradesh
1365 | 1390,Sardhana,Uttar Pradesh
1366 | 1391,Seohara,Uttar Pradesh
1367 | 1392,Shahabad, Hardoi
1368 | 1393,Shahabad, Rampur
1369 | 1394,Shahganj,Uttar Pradesh
1370 | 1395,Shahjahanpur,Uttar Pradesh
1371 | 1396,Shamli,Uttar Pradesh
1372 | 1397,Shamsabad, Agra
1373 | 1398,Shamsabad, Farrukhabad
1374 | 1399,Sherkot,Uttar Pradesh
1375 | 1400,Shikarpur, Bulandshahr
1376 | 1401,Shikohabad,Uttar Pradesh
1377 | 1402,Shishgarh,Uttar Pradesh
1378 | 1403,Siana,Uttar Pradesh
1379 | 1404,Sikanderpur,Uttar Pradesh
1380 | 1405,Sikandra Rao,Uttar Pradesh
1381 | 1406,Sikandrabad,Uttar Pradesh
1382 | 1407,Sirsaganj,Uttar Pradesh
1383 | 1408,Sirsi,Uttar Pradesh
1384 | 1409,Sitapur,Uttar Pradesh
1385 | 1410,Soron,Uttar Pradesh
1386 | 1411,Suar,Uttar Pradesh
1387 | 1412,Sultanpur,Uttar Pradesh
1388 | 1413,Sumerpur,Uttar Pradesh
1389 | 1414,Tanda,Uttar Pradesh
1390 | 1415,Tanda,Uttar Pradesh
1391 | 1416,Tetri Bazar,Uttar Pradesh
1392 | 1417,Thakurdwara,Uttar Pradesh
1393 | 1418,Thana Bhawan,Uttar Pradesh
1394 | 1419,Tilhar,Uttar Pradesh
1395 | 1420,Tirwaganj,Uttar Pradesh
1396 | 1421,Tulsipur,Uttar Pradesh
1397 | 1422,Tundla,Uttar Pradesh
1398 | 1423,Unnao,Uttar Pradesh
1399 | 1424,Utraula,Uttar Pradesh
1400 | 1425,Varanasi,Uttar Pradesh
1401 | 1426,Vrindavan,Uttar Pradesh
1402 | 1427,Warhapur,Uttar Pradesh
1403 | 1428,Zaidpur,Uttar Pradesh
1404 | 1429,Zamania,Uttar Pradesh
1405 | 1430,Almora,Uttarakhand
1406 | 1431,Bazpur,Uttarakhand
1407 | 1432,Chamba,Uttarakhand
1408 | 1433,Dehradun,Uttarakhand
1409 | 1434,Haldwani,Uttarakhand
1410 | 1435,Haridwar,Uttarakhand
1411 | 1436,Jaspur,Uttarakhand
1412 | 1437,Kashipur,Uttarakhand
1413 | 1438,kichha,Uttarakhand
1414 | 1439,Kotdwara,Uttarakhand
1415 | 1440,Manglaur,Uttarakhand
1416 | 1441,Mussoorie,Uttarakhand
1417 | 1442,Nagla,Uttarakhand
1418 | 1443,Nainital,Uttarakhand
1419 | 1444,Pauri,Uttarakhand
1420 | 1445,Pithoragarh,Uttarakhand
1421 | 1446,Ramnagar,Uttarakhand
1422 | 1447,Rishikesh,Uttarakhand
1423 | 1448,Roorkee,Uttarakhand
1424 | 1449,Rudrapur,Uttarakhand
1425 | 1450,Sitarganj,Uttarakhand
1426 | 1451,Tehri,Uttarakhand
1427 | 1452,Muzaffarnagar,Uttar Pradesh
1428 | 1453,Adra, Purulia
1429 | 1454,Alipurduar,West Bengal
1430 | 1455,Arambagh,West Bengal
1431 | 1456,Asansol,West Bengal
1432 | 1457,Baharampur,West Bengal
1433 | 1458,Bally,West Bengal
1434 | 1459,Balurghat,West Bengal
1435 | 1460,Bankura,West Bengal
1436 | 1461,Barakar,West Bengal
1437 | 1462,Barasat,West Bengal
1438 | 1463,Bardhaman,West Bengal
1439 | 1464,Bidhan Nagar,West Bengal
1440 | 1465,Chinsura,West Bengal
1441 | 1466,Contai,West Bengal
1442 | 1467,Cooch Behar,West Bengal
1443 | 1468,Darjeeling,West Bengal
1444 | 1469,Durgapur,West Bengal
1445 | 1470,Haldia,West Bengal
1446 | 1471,Howrah,West Bengal
1447 | 1472,Islampur,West Bengal
1448 | 1473,Jhargram,West Bengal
1449 | 1474,Kharagpur,West Bengal
1450 | 1475,Kolkata,West Bengal
1451 | 1476,Mainaguri,West Bengal
1452 | 1477,Mal,West Bengal
1453 | 1478,Mathabhanga,West Bengal
1454 | 1479,Medinipur,West Bengal
1455 | 1480,Memari,West Bengal
1456 | 1481,Monoharpur,West Bengal
1457 | 1482,Murshidabad,West Bengal
1458 | 1483,Nabadwip,West Bengal
1459 | 1484,Naihati,West Bengal
1460 | 1485,Panchla,West Bengal
1461 | 1486,Pandua,West Bengal
1462 | 1487,Paschim Punropara,West Bengal
1463 | 1488,Purulia,West Bengal
1464 | 1489,Raghunathpur,West Bengal
1465 | 1490,Raiganj,West Bengal
1466 | 1491,Rampurhat,West Bengal
1467 | 1492,Ranaghat,West Bengal
1468 | 1493,Sainthia,West Bengal
1469 | 1494,Santipur,West Bengal
1470 | 1495,Siliguri,West Bengal
1471 | 1496,Sonamukhi,West Bengal
1472 | 1497,Srirampore,West Bengal
1473 | 1498,Suri,West Bengal
1474 | 1499,Taki,West Bengal
1475 | 1500,Tamluk,West Bengal
1476 | 1501,Tarakeswar,West Bengal
1477 | 1502,Chikmagalur,Karnataka
1478 | 1503,Davanagere,Karnataka
1479 | 1504,Dharwad,Karnataka
1480 | 1505,Gadag,Karnataka
1481 | 1506,Chennai,Tamil Nadu
1482 | 1507,Coimbatore,Tamil Nadu
1483 | 1508,Bengaluru,Karnataka
1484 |
--------------------------------------------------------------------------------
/2-Preprocessing_and_Modelling/Pre-processing Jobs for modellingv2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Job - Pre-processing and Modelling Iteration final"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# libraries import\n",
17 | "\n",
18 | "import numpy as np\n",
19 | "import pandas as pd\n",
20 | "import json\n",
21 | "import matplotlib.pyplot as plt\n",
22 | "%matplotlib inline\n",
23 | "\n",
24 | "import re\n",
25 | "import datetime\n",
26 | "from datetime import date\n",
27 | "from time import strptime\n",
28 | "\n",
29 | "import RAKE as rake\n",
30 | "import operator\n"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "######################################################################################\n",
38 | "\n",
39 | "# Working on Job description Data\n",
40 | "###################################################################################### "
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 2,
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "# reading my sorted job csv\n",
50 | "job = pd.read_csv('WIP/sorted_jobs_master_new.csv')"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 3,
56 | "metadata": {},
57 | "outputs": [
58 | {
59 | "name": "stdout",
60 | "output_type": "stream",
61 | "text": [
62 | "\n",
63 | "RangeIndex: 38941 entries, 0 to 38940\n",
64 | "Data columns (total 17 columns):\n",
65 | " # Column Non-Null Count Dtype \n",
66 | "--- ------ -------------- ----- \n",
67 | " 0 company 38941 non-null object \n",
68 | " 1 education 38941 non-null object \n",
69 | " 2 experience 38941 non-null int64 \n",
70 | " 3 industry 38941 non-null object \n",
71 | " 4 jobdescription 38941 non-null object \n",
72 | " 5 jobtitle 38941 non-null object \n",
73 | " 6 payrate 38941 non-null object \n",
74 | " 7 skills 38941 non-null object \n",
75 | " 8 experience_range 38941 non-null int64 \n",
76 | " 9 industry_enum 38941 non-null int64 \n",
77 | " 10 Salary_range 38941 non-null float64\n",
78 | " 11 j_id 38941 non-null int64 \n",
79 | " 12 is_grad 38941 non-null int64 \n",
80 | " 13 is_postgrad 38941 non-null int64 \n",
81 | " 14 is_doc 38941 non-null int64 \n",
82 | " 15 location 38941 non-null int64 \n",
83 | " 16 loc_name 38941 non-null object \n",
84 | "dtypes: float64(1), int64(8), object(8)\n",
85 | "memory usage: 5.1+ MB\n"
86 | ]
87 | }
88 | ],
89 | "source": [
90 | "job.info()"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | "###########################################################################################################################\n",
98 | "# Understanding Job_description column (using NLP)\n",
99 | "###########################################################################################################################\n"
100 | ]
101 | },
102 | {
103 | "cell_type": "markdown",
104 | "metadata": {},
105 | "source": [
106 | "# 1. NLP - NLTK application to understand most used words"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 4,
112 | "metadata": {},
113 | "outputs": [
114 | {
115 | "name": "stderr",
116 | "output_type": "stream",
117 | "text": [
118 | "[nltk_data] Downloading package wordnet to\n",
119 | "[nltk_data] C:\\Users\\shail\\AppData\\Roaming\\nltk_data...\n",
120 | "[nltk_data] Package wordnet is already up-to-date!\n"
121 | ]
122 | }
123 | ],
124 | "source": [
125 | "#Import all the dependencies\n",
126 | "import nltk\n",
127 | "nltk.download('wordnet')\n",
128 | "from nltk.stem import WordNetLemmatizer\n",
129 | "wordnet_lemmatizer = WordNetLemmatizer()\n",
130 | "from nltk.corpus import stopwords\n",
131 | "from nltk.tokenize import word_tokenize \n",
132 | "set(stopwords.words('english'))\n",
133 | "# nltk.download('abc')\n",
134 | "# from nltk.corpus import abc\n",
135 | "# from nltk import RegexpTokenizer\n",
136 | "\n",
137 | "import string\n",
138 | "stopwords = set(stopwords.words(\"english\"))\n",
139 | "import gensim\n",
140 | "from gensim.test.utils import common_texts\n",
141 | "from gensim.models.doc2vec import Doc2Vec, TaggedDocument"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": 5,
147 | "metadata": {},
148 | "outputs": [],
149 | "source": [
150 | "# defining tokenizer \n",
151 | "def my_tokenizer(text):\n",
152 | " # 1. split at whitespace\n",
153 | " text = text.split(' ')\n",
154 | " \n",
155 | " #2. lowercase\n",
156 | " text = [word.lower() for word in text]\n",
157 | " \n",
158 | " #3. Remove puncutation\n",
159 | " #table to replace puncuation\n",
160 | " punc_table = str.maketrans('','',string.punctuation)\n",
161 | " \n",
162 | " #call translate()\n",
163 | " text = [word.translate(punc_table) for word in text]\n",
164 | " \n",
165 | " #4. remove stopwords\n",
166 | " text = [word for word in text if word not in stopwords]\n",
167 | " \n",
168 | " #5. lemmmatize\n",
169 | " lemmatizer = WordNetLemmatizer()\n",
170 | " \n",
171 | " text = [lemmatizer.lemmatize(word, pos='v') for word in text]\n",
172 | " text = [lemmatizer.lemmatize(word, pos='n') for word in text]\n",
173 | " text = [lemmatizer.lemmatize(word, pos='a') for word in text]\n",
174 | " \n",
175 | " #6. remove empty strings\n",
176 | " text = [word for word in text if word !='']\n",
177 | " \n",
178 | " return text "
179 | ]
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {},
184 | "source": [
185 | "# 2. NLP - TF-IDF application to get a list of all tokens \n",
186 | "-- This helped to gather what words needed to be in stop-words list"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 16,
192 | "metadata": {},
193 | "outputs": [],
194 | "source": [
195 | "#z = job['jobdescription'].str.rstrip('job description send me jobs like this')"
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": 7,
201 | "metadata": {},
202 | "outputs": [
203 | {
204 | "data": {
205 | "text/plain": [
206 | "0 Qualifications: - == > 10th To Graduation & A...\n",
207 | "1 Qualifications: - == > 10th To Graduation & A...\n",
208 | "2 - as a developer in providing application des...\n",
209 | "3 - Involved with all stages of indirect taxati...\n",
210 | "4 - Involved with all stages of indirect taxati...\n",
211 | " ... \n",
212 | "38936 Looking for candidates with strong programmin...\n",
213 | "38937 Work with tech lead to architect and develop ...\n",
214 | "38938 We are looking for a Senior UI Developers and...\n",
215 | "38939 We are looking for a Senior UI Developers and...\n",
216 | "38940 Job description : Experience of 5-10 years wi...\n",
217 | "Name: jobdescription, Length: 38941, dtype: object"
218 | ]
219 | },
220 | "execution_count": 7,
221 | "metadata": {},
222 | "output_type": "execute_result"
223 | }
224 | ],
225 | "source": [
226 | "# job['jobdescription'] = job.jobdescription.str[40:]\n",
227 | "job['jobdescription']"
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": 23,
233 | "metadata": {},
234 | "outputs": [],
235 | "source": [
236 | "# t= job.copy()\n",
237 | "# t.to_csv('WIP.sorted_jobs_master_new.csv', index=False)"
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": 8,
243 | "metadata": {},
244 | "outputs": [
245 | {
246 | "name": "stderr",
247 | "output_type": "stream",
248 | "text": [
249 | "C:\\Users\\shail\\anaconda\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
250 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
251 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
252 | "\n",
253 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
254 | " \n"
255 | ]
256 | },
257 | {
258 | "data": {
259 | "text/html": [
260 | "\n",
261 | "\n",
274 | "
\n",
275 | " \n",
276 | " \n",
277 | " | \n",
278 | " j_id | \n",
279 | " jobtitle | \n",
280 | " company | \n",
281 | " jd_combo | \n",
282 | "
\n",
283 | " \n",
284 | " \n",
285 | " \n",
286 | " | 0 | \n",
287 | " 0 | \n",
288 | " walkin data entry operator (night shift) | \n",
289 | " MM Media Pvt Ltd | \n",
290 | " walkin data entry operator (night shift) Qual... | \n",
291 | "
\n",
292 | " \n",
293 | " | 1 | \n",
294 | " 1 | \n",
295 | " work based onhome based part time. | \n",
296 | " find live infotech | \n",
297 | " work based onhome based part time. Qualificat... | \n",
298 | "
\n",
299 | " \n",
300 | " | 2 | \n",
301 | " 2 | \n",
302 | " pl/sql developer - sql | \n",
303 | " Softtech Career Infosystem Pvt. Ltd | \n",
304 | " pl/sql developer - sql - as a developer in pr... | \n",
305 | "
\n",
306 | " \n",
307 | " | 3 | \n",
308 | " 3 | \n",
309 | " manager/ad/partner - indirect tax - ca | \n",
310 | " Onboard HRServices LLP | \n",
311 | " manager/ad/partner - indirect tax - ca - Invo... | \n",
312 | "
\n",
313 | " \n",
314 | " | 4 | \n",
315 | " 4 | \n",
316 | " manager/ad/partner - indirect tax - ca | \n",
317 | " Onboard HRServices LLP | \n",
318 | " manager/ad/partner - indirect tax - ca - Invo... | \n",
319 | "
\n",
320 | " \n",
321 | "
\n",
322 | "
"
323 | ],
324 | "text/plain": [
325 | " j_id jobtitle \\\n",
326 | "0 0 walkin data entry operator (night shift) \n",
327 | "1 1 work based onhome based part time. \n",
328 | "2 2 pl/sql developer - sql \n",
329 | "3 3 manager/ad/partner - indirect tax - ca \n",
330 | "4 4 manager/ad/partner - indirect tax - ca \n",
331 | "\n",
332 | " company \\\n",
333 | "0 MM Media Pvt Ltd \n",
334 | "1 find live infotech \n",
335 | "2 Softtech Career Infosystem Pvt. Ltd \n",
336 | "3 Onboard HRServices LLP \n",
337 | "4 Onboard HRServices LLP \n",
338 | "\n",
339 | " jd_combo \n",
340 | "0 walkin data entry operator (night shift) Qual... \n",
341 | "1 work based onhome based part time. Qualificat... \n",
342 | "2 pl/sql developer - sql - as a developer in pr... \n",
343 | "3 manager/ad/partner - indirect tax - ca - Invo... \n",
344 | "4 manager/ad/partner - indirect tax - ca - Invo... "
345 | ]
346 | },
347 | "execution_count": 8,
348 | "metadata": {},
349 | "output_type": "execute_result"
350 | }
351 | ],
352 | "source": [
353 | "df_job_descriptions = job[['j_id','jobtitle','company' ]]\n",
354 | "df_job_descriptions['jd_combo'] = job['jobtitle'] +\" \" + job['jobdescription'] \n",
355 | "df_job_descriptions.head()"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": 9,
361 | "metadata": {
362 | "scrolled": true
363 | },
364 | "outputs": [
365 | {
366 | "name": "stderr",
367 | "output_type": "stream",
368 | "text": [
369 | "C:\\Users\\shail\\anaconda\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:385: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ëœ'] not in stop_words.\n",
370 | " 'stop_words.' % sorted(inconsistent))\n"
371 | ]
372 | },
373 | {
374 | "name": "stdout",
375 | "output_type": "stream",
376 | "text": [
377 | "(38941, 58510)\n",
378 | "(38941, 4)\n"
379 | ]
380 | }
381 | ],
382 | "source": [
383 | "from sklearn.feature_extraction.text import TfidfVectorizer\n",
384 | "stopwords = nltk.corpus.stopwords.words('english')\n",
385 | "stopwords.append('ã¯æ’ëœ')\n",
386 | "#Transforms words to TFIDF\n",
387 | "vectorizer = TfidfVectorizer(stop_words = stopwords)\n",
388 | "\n",
389 | "index = 0\n",
390 | "keys = {}\n",
391 | "\n",
392 | "for jd in df_job_descriptions.itertuples() :\n",
393 | " key = jd[1]\n",
394 | " keys[key] = index\n",
395 | " index += 1\n",
396 | "\n",
397 | "#Fit the vectorizer to the data\n",
398 | "vectorizer.fit(df_job_descriptions['jd_combo'].fillna(''))\n",
399 | "\n",
400 | "#Transform the data\n",
401 | "tfidf_scores = vectorizer.transform(df_job_descriptions['jd_combo'].fillna(''))\n",
402 | "\n",
403 | "print(tfidf_scores.shape)\n",
404 | "print(df_job_descriptions.shape)"
405 | ]
406 | },
407 | {
408 | "cell_type": "code",
409 | "execution_count": 10,
410 | "metadata": {},
411 | "outputs": [
412 | {
413 | "data": {
414 | "text/plain": [
415 | "scipy.sparse.csr.csr_matrix"
416 | ]
417 | },
418 | "execution_count": 10,
419 | "metadata": {},
420 | "output_type": "execute_result"
421 | }
422 | ],
423 | "source": [
424 | "type(tfidf_scores)"
425 | ]
426 | },
427 | {
428 | "cell_type": "code",
429 | "execution_count": 11,
430 | "metadata": {},
431 | "outputs": [],
432 | "source": [
433 | "test = pd.DataFrame(tfidf_scores.toarray(), columns = vectorizer.get_feature_names())"
434 | ]
435 | },
436 | {
437 | "cell_type": "code",
438 | "execution_count": 12,
439 | "metadata": {},
440 | "outputs": [
441 | {
442 | "data": {
443 | "text/html": [
444 | "\n",
445 | "\n",
458 | "
\n",
459 | " \n",
460 | " \n",
461 | " | \n",
462 | " 00 | \n",
463 | " 000 | \n",
464 | " 0000 | \n",
465 | " 00000 | \n",
466 | " 0000gmt | \n",
467 | " 0001pt | \n",
468 | " 00029 | \n",
469 | " 00034 | \n",
470 | " 000402 | \n",
471 | " 00053 | \n",
472 | " ... | \n",
473 | " ïƒ | \n",
474 | "  | \n",
475 | "  | \n",
476 | " œ100 | \n",
477 | " œmost | \n",
478 | " œrecognition | \n",
479 | " œto | \n",
480 | " šâ | \n",
481 | " šã | \n",
482 | " žâ | \n",
483 | "
\n",
484 | " \n",
485 | " \n",
486 | " \n",
487 | " | 0 | \n",
488 | " 0.0 | \n",
489 | " 0.056499 | \n",
490 | " 0.0 | \n",
491 | " 0.0 | \n",
492 | " 0.0 | \n",
493 | " 0.0 | \n",
494 | " 0.0 | \n",
495 | " 0.0 | \n",
496 | " 0.0 | \n",
497 | " 0.0 | \n",
498 | " ... | \n",
499 | " 0.0 | \n",
500 | " 0.0 | \n",
501 | " 0.0 | \n",
502 | " 0.0 | \n",
503 | " 0.0 | \n",
504 | " 0.0 | \n",
505 | " 0.0 | \n",
506 | " 0.0 | \n",
507 | " 0.0 | \n",
508 | " 0.0 | \n",
509 | "
\n",
510 | " \n",
511 | " | 1 | \n",
512 | " 0.0 | \n",
513 | " 0.068273 | \n",
514 | " 0.0 | \n",
515 | " 0.0 | \n",
516 | " 0.0 | \n",
517 | " 0.0 | \n",
518 | " 0.0 | \n",
519 | " 0.0 | \n",
520 | " 0.0 | \n",
521 | " 0.0 | \n",
522 | " ... | \n",
523 | " 0.0 | \n",
524 | " 0.0 | \n",
525 | " 0.0 | \n",
526 | " 0.0 | \n",
527 | " 0.0 | \n",
528 | " 0.0 | \n",
529 | " 0.0 | \n",
530 | " 0.0 | \n",
531 | " 0.0 | \n",
532 | " 0.0 | \n",
533 | "
\n",
534 | " \n",
535 | " | 2 | \n",
536 | " 0.0 | \n",
537 | " 0.000000 | \n",
538 | " 0.0 | \n",
539 | " 0.0 | \n",
540 | " 0.0 | \n",
541 | " 0.0 | \n",
542 | " 0.0 | \n",
543 | " 0.0 | \n",
544 | " 0.0 | \n",
545 | " 0.0 | \n",
546 | " ... | \n",
547 | " 0.0 | \n",
548 | " 0.0 | \n",
549 | " 0.0 | \n",
550 | " 0.0 | \n",
551 | " 0.0 | \n",
552 | " 0.0 | \n",
553 | " 0.0 | \n",
554 | " 0.0 | \n",
555 | " 0.0 | \n",
556 | " 0.0 | \n",
557 | "
\n",
558 | " \n",
559 | " | 3 | \n",
560 | " 0.0 | \n",
561 | " 0.000000 | \n",
562 | " 0.0 | \n",
563 | " 0.0 | \n",
564 | " 0.0 | \n",
565 | " 0.0 | \n",
566 | " 0.0 | \n",
567 | " 0.0 | \n",
568 | " 0.0 | \n",
569 | " 0.0 | \n",
570 | " ... | \n",
571 | " 0.0 | \n",
572 | " 0.0 | \n",
573 | " 0.0 | \n",
574 | " 0.0 | \n",
575 | " 0.0 | \n",
576 | " 0.0 | \n",
577 | " 0.0 | \n",
578 | " 0.0 | \n",
579 | " 0.0 | \n",
580 | " 0.0 | \n",
581 | "
\n",
582 | " \n",
583 | " | 4 | \n",
584 | " 0.0 | \n",
585 | " 0.000000 | \n",
586 | " 0.0 | \n",
587 | " 0.0 | \n",
588 | " 0.0 | \n",
589 | " 0.0 | \n",
590 | " 0.0 | \n",
591 | " 0.0 | \n",
592 | " 0.0 | \n",
593 | " 0.0 | \n",
594 | " ... | \n",
595 | " 0.0 | \n",
596 | " 0.0 | \n",
597 | " 0.0 | \n",
598 | " 0.0 | \n",
599 | " 0.0 | \n",
600 | " 0.0 | \n",
601 | " 0.0 | \n",
602 | " 0.0 | \n",
603 | " 0.0 | \n",
604 | " 0.0 | \n",
605 | "
\n",
606 | " \n",
607 | "
\n",
608 | "
5 rows × 58510 columns
\n",
609 | "
"
610 | ],
611 | "text/plain": [
612 | " 00 000 0000 00000 0000gmt 0001pt 00029 00034 000402 00053 \\\n",
613 | "0 0.0 0.056499 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
614 | "1 0.0 0.068273 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
615 | "2 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
616 | "3 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
617 | "4 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
618 | "\n",
619 | " ... ïƒ ïƒ¼  œ100 œmost œrecognition œto šâ šã žâ \n",
620 | "0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
621 | "1 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
622 | "2 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
623 | "3 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
624 | "4 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
625 | "\n",
626 | "[5 rows x 58510 columns]"
627 | ]
628 | },
629 | "execution_count": 12,
630 | "metadata": {},
631 | "output_type": "execute_result"
632 | }
633 | ],
634 | "source": [
635 | "test.head()"
636 | ]
637 | },
638 | {
639 | "cell_type": "markdown",
640 | "metadata": {},
641 | "source": [
642 | "As count vectorizer and Tf-Idf are only exploding my column numbers. It might not be wise to proceed with any of these. Moveover, I need to compare job description with Resume, that may not with fair comparison. So I will use these results so far for customizing stop word list. And will later use Doc2Vec to train my model."
643 | ]
644 | },
645 | {
646 | "cell_type": "markdown",
647 | "metadata": {},
648 | "source": [
649 | "# Creating my Stopword list \n",
650 | "\n",
651 | "### As seen there are so many unwanted tokens like numbers, etc , I need to add them in \"stop words\" list to train model "
652 | ]
653 | },
654 | {
655 | "cell_type": "code",
656 | "execution_count": 13,
657 | "metadata": {},
658 | "outputs": [],
659 | "source": [
660 | "#getting list of all tokens\n",
661 | "word_list = test.columns.tolist()"
662 | ]
663 | },
664 | {
665 | "cell_type": "code",
666 | "execution_count": 14,
667 | "metadata": {},
668 | "outputs": [],
669 | "source": [
670 | "##Getting a list of unwanted words as s_words and adding to stopwords\n",
671 | "s_words =[]\n",
672 | "for word in word_list:\n",
673 | " #print(word)\n",
674 | " if re.search(\"^\\W|^\\d\",word):\n",
675 | " s_words.append(word)\n",
676 | " "
677 | ]
678 | },
679 | {
680 | "cell_type": "code",
681 | "execution_count": 15,
682 | "metadata": {},
683 | "outputs": [],
684 | "source": [
685 | "s_words.append('') \n",
686 | "from nltk.corpus import stopwords\n",
687 | "stopword_set = set(stopwords.words('english'))\n",
688 | "stopword_set = list(stopword_set)\n",
689 | "stopword_set.extend(s_words)"
690 | ]
691 | },
692 | {
693 | "cell_type": "markdown",
694 | "metadata": {},
695 | "source": [
696 | "# Collecting all text data for DOC2VEC modelling\n",
697 | "In final iteration, I only used job title and job description for creating text combo document and got my 20-D vectors. This time I trained my model on 200 epochs. \n",
698 | "\n",
699 | "As count vectorizer and Tf-Idf are only exploding my column numbers. It might not be wise to proceed with any of these. Moveover, I need to compare job description with Resume, that may not with fair comparison. \n",
700 | "\n",
701 | "Definately Doc2Vec is the smart choice to make to proceed with matching. Because Doc2Vec has ability to read document as a whole rather than working on each single word. It has a feature to provide n-Dimentional vectors. \n",
702 | "\n",
703 | "So I am going to use same concept to get my vectors. Then I ll use those vectors to match it against any given resume. \n",
704 | "\n"
705 | ]
706 | },
707 | {
708 | "cell_type": "code",
709 | "execution_count": 16,
710 | "metadata": {},
711 | "outputs": [
712 | {
713 | "data": {
714 | "text/html": [
715 | "\n",
716 | "\n",
729 | "
\n",
730 | " \n",
731 | " \n",
732 | " | \n",
733 | " j_id | \n",
734 | " jobtitle | \n",
735 | " company | \n",
736 | " jd_combo | \n",
737 | "
\n",
738 | " \n",
739 | " \n",
740 | " \n",
741 | " | 0 | \n",
742 | " 0 | \n",
743 | " walkin data entry operator (night shift) | \n",
744 | " MM Media Pvt Ltd | \n",
745 | " walkin data entry operator (night shift) Qual... | \n",
746 | "
\n",
747 | " \n",
748 | " | 1 | \n",
749 | " 1 | \n",
750 | " work based onhome based part time. | \n",
751 | " find live infotech | \n",
752 | " work based onhome based part time. Qualificat... | \n",
753 | "
\n",
754 | " \n",
755 | " | 2 | \n",
756 | " 2 | \n",
757 | " pl/sql developer - sql | \n",
758 | " Softtech Career Infosystem Pvt. Ltd | \n",
759 | " pl/sql developer - sql - as a developer in pr... | \n",
760 | "
\n",
761 | " \n",
762 | " | 3 | \n",
763 | " 3 | \n",
764 | " manager/ad/partner - indirect tax - ca | \n",
765 | " Onboard HRServices LLP | \n",
766 | " manager/ad/partner - indirect tax - ca - Invo... | \n",
767 | "
\n",
768 | " \n",
769 | " | 4 | \n",
770 | " 4 | \n",
771 | " manager/ad/partner - indirect tax - ca | \n",
772 | " Onboard HRServices LLP | \n",
773 | " manager/ad/partner - indirect tax - ca - Invo... | \n",
774 | "
\n",
775 | " \n",
776 | "
\n",
777 | "
"
778 | ],
779 | "text/plain": [
780 | " j_id jobtitle \\\n",
781 | "0 0 walkin data entry operator (night shift) \n",
782 | "1 1 work based onhome based part time. \n",
783 | "2 2 pl/sql developer - sql \n",
784 | "3 3 manager/ad/partner - indirect tax - ca \n",
785 | "4 4 manager/ad/partner - indirect tax - ca \n",
786 | "\n",
787 | " company \\\n",
788 | "0 MM Media Pvt Ltd \n",
789 | "1 find live infotech \n",
790 | "2 Softtech Career Infosystem Pvt. Ltd \n",
791 | "3 Onboard HRServices LLP \n",
792 | "4 Onboard HRServices LLP \n",
793 | "\n",
794 | " jd_combo \n",
795 | "0 walkin data entry operator (night shift) Qual... \n",
796 | "1 work based onhome based part time. Qualificat... \n",
797 | "2 pl/sql developer - sql - as a developer in pr... \n",
798 | "3 manager/ad/partner - indirect tax - ca - Invo... \n",
799 | "4 manager/ad/partner - indirect tax - ca - Invo... "
800 | ]
801 | },
802 | "execution_count": 16,
803 | "metadata": {},
804 | "output_type": "execute_result"
805 | }
806 | ],
807 | "source": [
808 | "# df_job_descriptions = job[['j_id','jobtitle','company' ]]\n",
809 | "# df_job_descriptions['jd_combo'] = job['jobtitle'] +\" \" + job['jobdescription'] \n",
810 | "df_job_descriptions.head()"
811 | ]
812 | },
813 | {
814 | "cell_type": "code",
815 | "execution_count": 17,
816 | "metadata": {},
817 | "outputs": [
818 | {
819 | "data": {
820 | "text/plain": [
821 | "0 walkin data entry operator (night shift) Qual...\n",
822 | "1 work based onhome based part time. Qualificat...\n",
823 | "2 pl/sql developer - sql - as a developer in pr...\n",
824 | "3 manager/ad/partner - indirect tax - ca - Invo...\n",
825 | "4 manager/ad/partner - indirect tax - ca - Invo...\n",
826 | "5 manager/ad/partner - indirect tax - ca - Invo...\n",
827 | "6 manager/ad/partner - indirect tax - ca - Invo...\n",
828 | "7 manager/ad/partner - indirect tax - ca - Invo...\n",
829 | "8 manager/ad/partner - indirect tax - ca - Invo...\n",
830 | "9 java technical lead (6-8 yrs) - Please share ...\n",
831 | "Name: jd_combo, dtype: object"
832 | ]
833 | },
834 | "execution_count": 17,
835 | "metadata": {},
836 | "output_type": "execute_result"
837 | }
838 | ],
839 | "source": [
840 | "docs = df_job_descriptions['jd_combo']\n",
841 | "docs_sample = docs.head(10)\n",
842 | "docs_sample"
843 | ]
844 | },
845 | {
846 | "cell_type": "code",
847 | "execution_count": 18,
848 | "metadata": {},
849 | "outputs": [],
850 | "source": [
851 | "#pre-processing with custom stop word list\n",
852 | "def preprocess(text):\n",
853 | " stop_words = stopword_set\n",
854 | " #0. split words by whitespace\n",
855 | " text = text.split()\n",
856 | " \n",
857 | " \n",
858 | " # 1. lower case\n",
859 | " text = [word.lower() for word in text]\n",
860 | " \n",
861 | " # 2. remove punctuations\n",
862 | " punc_table = str.maketrans('','',string.punctuation)\n",
863 | " text = [word.translate(punc_table) for word in text]\n",
864 | " \n",
865 | " # 3. remove stop words\n",
866 | " text = [word for word in text if word not in stop_words]\n",
867 | " \n",
868 | " return text"
869 | ]
870 | },
871 | {
872 | "cell_type": "code",
873 | "execution_count": 19,
874 | "metadata": {},
875 | "outputs": [],
876 | "source": [
877 | "# calling my pre-process to tokenize \n",
878 | "tokenized_doc = []\n",
879 | "doc = df_job_descriptions['jd_combo']\n",
880 | "#doc = docs_sample\n",
881 | "for d in doc:\n",
882 | " tokenized_doc.append(preprocess(d))\n",
883 | "#tokenized_doc"
884 | ]
885 | },
886 | {
887 | "cell_type": "code",
888 | "execution_count": 20,
889 | "metadata": {},
890 | "outputs": [],
891 | "source": [
892 | "# Convert tokenized document into gensim formated tagged data\n",
893 | "tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]"
894 | ]
895 | },
896 | {
897 | "cell_type": "code",
898 | "execution_count": 21,
899 | "metadata": {},
900 | "outputs": [
901 | {
902 | "data": {
903 | "text/plain": [
904 | "38941"
905 | ]
906 | },
907 | "execution_count": 21,
908 | "metadata": {},
909 | "output_type": "execute_result"
910 | }
911 | ],
912 | "source": [
913 | "num_doc = len(tagged_data)\n",
914 | "num_doc"
915 | ]
916 | },
917 | {
918 | "cell_type": "code",
919 | "execution_count": 24,
920 | "metadata": {},
921 | "outputs": [],
922 | "source": [
923 | "\n",
924 | "#settings to show epoch progress\n",
925 | "from gensim.test.utils import get_tmpfile\n",
926 | "from gensim.models.callbacks import CallbackAny2Vec\n",
927 | "\n",
928 | "class EpochSaver(CallbackAny2Vec):\n",
929 | "\n",
930 | " def __init__(self, path_prefix):\n",
931 | " self.path_prefix = path_prefix\n",
932 | " self.epoch = 0\n",
933 | "\n",
934 | " def on_epoch_end(self, model):\n",
935 | " output_path = get_tmpfile('{}_epoch{}.model'.format(self.path_prefix, self.epoch))\n",
936 | " model.save(output_path)\n",
937 | " self.epoch += 1"
938 | ]
939 | },
940 | {
941 | "cell_type": "code",
942 | "execution_count": 25,
943 | "metadata": {},
944 | "outputs": [],
945 | "source": [
946 | "#settings to show epoch progress\n",
947 | "class EpochLogger(CallbackAny2Vec):\n",
948 | " \n",
949 | " def __init__(self):\n",
950 | " self.epoch = 0\n",
951 | " \n",
952 | " def on_epoch_begin(self, model):\n",
953 | " print(\"Epoch #{} start\".format(self.epoch))\n",
954 | "\n",
955 | " def on_epoch_end(self, model):\n",
956 | " print(\"Epoch #{} end\".format(self.epoch))\n",
957 | " self.epoch += 1"
958 | ]
959 | },
960 | {
961 | "cell_type": "code",
962 | "execution_count": 27,
963 | "metadata": {
964 | "scrolled": true
965 | },
966 | "outputs": [
967 | {
968 | "name": "stdout",
969 | "output_type": "stream",
970 | "text": [
971 | "Epoch #0 start\n",
972 | "Epoch #0 end\n",
973 | "Epoch #1 start\n",
974 | "Epoch #1 end\n",
975 | "Epoch #2 start\n",
976 | "Epoch #2 end\n",
977 | "Epoch #3 start\n",
978 | "Epoch #3 end\n",
979 | "Epoch #4 start\n",
980 | "Epoch #4 end\n",
981 | "Epoch #5 start\n",
982 | "Epoch #5 end\n",
983 | "Epoch #6 start\n",
984 | "Epoch #6 end\n",
985 | "Epoch #7 start\n",
986 | "Epoch #7 end\n",
987 | "Epoch #8 start\n",
988 | "Epoch #8 end\n",
989 | "Epoch #9 start\n",
990 | "Epoch #9 end\n",
991 | "Epoch #10 start\n",
992 | "Epoch #10 end\n",
993 | "Epoch #11 start\n",
994 | "Epoch #11 end\n",
995 | "Epoch #12 start\n",
996 | "Epoch #12 end\n",
997 | "Epoch #13 start\n",
998 | "Epoch #13 end\n",
999 | "Epoch #14 start\n",
1000 | "Epoch #14 end\n",
1001 | "Epoch #15 start\n",
1002 | "Epoch #15 end\n",
1003 | "Epoch #16 start\n",
1004 | "Epoch #16 end\n",
1005 | "Epoch #17 start\n",
1006 | "Epoch #17 end\n",
1007 | "Epoch #18 start\n",
1008 | "Epoch #18 end\n",
1009 | "Epoch #19 start\n",
1010 | "Epoch #19 end\n",
1011 | "Epoch #20 start\n",
1012 | "Epoch #20 end\n",
1013 | "Epoch #21 start\n",
1014 | "Epoch #21 end\n",
1015 | "Epoch #22 start\n",
1016 | "Epoch #22 end\n",
1017 | "Epoch #23 start\n",
1018 | "Epoch #23 end\n",
1019 | "Epoch #24 start\n",
1020 | "Epoch #24 end\n",
1021 | "Epoch #25 start\n",
1022 | "Epoch #25 end\n",
1023 | "Epoch #26 start\n",
1024 | "Epoch #26 end\n",
1025 | "Epoch #27 start\n",
1026 | "Epoch #27 end\n",
1027 | "Epoch #28 start\n",
1028 | "Epoch #28 end\n",
1029 | "Epoch #29 start\n",
1030 | "Epoch #29 end\n",
1031 | "Epoch #30 start\n",
1032 | "Epoch #30 end\n",
1033 | "Epoch #31 start\n",
1034 | "Epoch #31 end\n",
1035 | "Epoch #32 start\n",
1036 | "Epoch #32 end\n",
1037 | "Epoch #33 start\n",
1038 | "Epoch #33 end\n",
1039 | "Epoch #34 start\n",
1040 | "Epoch #34 end\n",
1041 | "Epoch #35 start\n",
1042 | "Epoch #35 end\n",
1043 | "Epoch #36 start\n",
1044 | "Epoch #36 end\n",
1045 | "Epoch #37 start\n",
1046 | "Epoch #37 end\n",
1047 | "Epoch #38 start\n",
1048 | "Epoch #38 end\n",
1049 | "Epoch #39 start\n",
1050 | "Epoch #39 end\n",
1051 | "Epoch #40 start\n",
1052 | "Epoch #40 end\n",
1053 | "Epoch #41 start\n",
1054 | "Epoch #41 end\n",
1055 | "Epoch #42 start\n",
1056 | "Epoch #42 end\n",
1057 | "Epoch #43 start\n",
1058 | "Epoch #43 end\n",
1059 | "Epoch #44 start\n",
1060 | "Epoch #44 end\n",
1061 | "Epoch #45 start\n",
1062 | "Epoch #45 end\n",
1063 | "Epoch #46 start\n",
1064 | "Epoch #46 end\n",
1065 | "Epoch #47 start\n",
1066 | "Epoch #47 end\n",
1067 | "Epoch #48 start\n",
1068 | "Epoch #48 end\n",
1069 | "Epoch #49 start\n",
1070 | "Epoch #49 end\n",
1071 | "Epoch #50 start\n",
1072 | "Epoch #50 end\n",
1073 | "Epoch #51 start\n",
1074 | "Epoch #51 end\n",
1075 | "Epoch #52 start\n",
1076 | "Epoch #52 end\n",
1077 | "Epoch #53 start\n",
1078 | "Epoch #53 end\n",
1079 | "Epoch #54 start\n",
1080 | "Epoch #54 end\n",
1081 | "Epoch #55 start\n",
1082 | "Epoch #55 end\n",
1083 | "Epoch #56 start\n",
1084 | "Epoch #56 end\n",
1085 | "Epoch #57 start\n",
1086 | "Epoch #57 end\n",
1087 | "Epoch #58 start\n",
1088 | "Epoch #58 end\n",
1089 | "Epoch #59 start\n",
1090 | "Epoch #59 end\n",
1091 | "Epoch #60 start\n",
1092 | "Epoch #60 end\n",
1093 | "Epoch #61 start\n",
1094 | "Epoch #61 end\n",
1095 | "Epoch #62 start\n",
1096 | "Epoch #62 end\n",
1097 | "Epoch #63 start\n",
1098 | "Epoch #63 end\n",
1099 | "Epoch #64 start\n",
1100 | "Epoch #64 end\n",
1101 | "Epoch #65 start\n",
1102 | "Epoch #65 end\n",
1103 | "Epoch #66 start\n",
1104 | "Epoch #66 end\n",
1105 | "Epoch #67 start\n",
1106 | "Epoch #67 end\n",
1107 | "Epoch #68 start\n",
1108 | "Epoch #68 end\n",
1109 | "Epoch #69 start\n",
1110 | "Epoch #69 end\n",
1111 | "Epoch #70 start\n",
1112 | "Epoch #70 end\n",
1113 | "Epoch #71 start\n",
1114 | "Epoch #71 end\n",
1115 | "Epoch #72 start\n",
1116 | "Epoch #72 end\n",
1117 | "Epoch #73 start\n",
1118 | "Epoch #73 end\n",
1119 | "Epoch #74 start\n",
1120 | "Epoch #74 end\n",
1121 | "Epoch #75 start\n",
1122 | "Epoch #75 end\n",
1123 | "Epoch #76 start\n",
1124 | "Epoch #76 end\n",
1125 | "Epoch #77 start\n",
1126 | "Epoch #77 end\n",
1127 | "Epoch #78 start\n",
1128 | "Epoch #78 end\n",
1129 | "Epoch #79 start\n",
1130 | "Epoch #79 end\n",
1131 | "Epoch #80 start\n",
1132 | "Epoch #80 end\n",
1133 | "Epoch #81 start\n",
1134 | "Epoch #81 end\n",
1135 | "Epoch #82 start\n",
1136 | "Epoch #82 end\n",
1137 | "Epoch #83 start\n",
1138 | "Epoch #83 end\n",
1139 | "Epoch #84 start\n",
1140 | "Epoch #84 end\n",
1141 | "Epoch #85 start\n",
1142 | "Epoch #85 end\n",
1143 | "Epoch #86 start\n",
1144 | "Epoch #86 end\n",
1145 | "Epoch #87 start\n",
1146 | "Epoch #87 end\n",
1147 | "Epoch #88 start\n",
1148 | "Epoch #88 end\n",
1149 | "Epoch #89 start\n",
1150 | "Epoch #89 end\n",
1151 | "Epoch #90 start\n",
1152 | "Epoch #90 end\n",
1153 | "Epoch #91 start\n",
1154 | "Epoch #91 end\n",
1155 | "Epoch #92 start\n",
1156 | "Epoch #92 end\n",
1157 | "Epoch #93 start\n",
1158 | "Epoch #93 end\n",
1159 | "Epoch #94 start\n",
1160 | "Epoch #94 end\n",
1161 | "Epoch #95 start\n",
1162 | "Epoch #95 end\n",
1163 | "Epoch #96 start\n",
1164 | "Epoch #96 end\n",
1165 | "Epoch #97 start\n",
1166 | "Epoch #97 end\n",
1167 | "Epoch #98 start\n",
1168 | "Epoch #98 end\n",
1169 | "Epoch #99 start\n",
1170 | "Epoch #99 end\n",
1171 | "Epoch #100 start\n",
1172 | "Epoch #100 end\n",
1173 | "Epoch #101 start\n",
1174 | "Epoch #101 end\n",
1175 | "Epoch #102 start\n",
1176 | "Epoch #102 end\n",
1177 | "Epoch #103 start\n",
1178 | "Epoch #103 end\n",
1179 | "Epoch #104 start\n",
1180 | "Epoch #104 end\n",
1181 | "Epoch #105 start\n",
1182 | "Epoch #105 end\n",
1183 | "Epoch #106 start\n",
1184 | "Epoch #106 end\n",
1185 | "Epoch #107 start\n",
1186 | "Epoch #107 end\n",
1187 | "Epoch #108 start\n",
1188 | "Epoch #108 end\n",
1189 | "Epoch #109 start\n",
1190 | "Epoch #109 end\n",
1191 | "Epoch #110 start\n",
1192 | "Epoch #110 end\n",
1193 | "Epoch #111 start\n",
1194 | "Epoch #111 end\n",
1195 | "Epoch #112 start\n",
1196 | "Epoch #112 end\n",
1197 | "Epoch #113 start\n",
1198 | "Epoch #113 end\n",
1199 | "Epoch #114 start\n",
1200 | "Epoch #114 end\n",
1201 | "Epoch #115 start\n",
1202 | "Epoch #115 end\n",
1203 | "Epoch #116 start\n",
1204 | "Epoch #116 end\n",
1205 | "Epoch #117 start\n",
1206 | "Epoch #117 end\n",
1207 | "Epoch #118 start\n",
1208 | "Epoch #118 end\n",
1209 | "Epoch #119 start\n",
1210 | "Epoch #119 end\n",
1211 | "Epoch #120 start\n",
1212 | "Epoch #120 end\n",
1213 | "Epoch #121 start\n",
1214 | "Epoch #121 end\n",
1215 | "Epoch #122 start\n",
1216 | "Epoch #122 end\n",
1217 | "Epoch #123 start\n",
1218 | "Epoch #123 end\n",
1219 | "Epoch #124 start\n",
1220 | "Epoch #124 end\n",
1221 | "Epoch #125 start\n",
1222 | "Epoch #125 end\n",
1223 | "Epoch #126 start\n",
1224 | "Epoch #126 end\n",
1225 | "Epoch #127 start\n",
1226 | "Epoch #127 end\n",
1227 | "Epoch #128 start\n",
1228 | "Epoch #128 end\n",
1229 | "Epoch #129 start\n",
1230 | "Epoch #129 end\n",
1231 | "Epoch #130 start\n",
1232 | "Epoch #130 end\n",
1233 | "Epoch #131 start\n",
1234 | "Epoch #131 end\n",
1235 | "Epoch #132 start\n",
1236 | "Epoch #132 end\n",
1237 | "Epoch #133 start\n",
1238 | "Epoch #133 end\n",
1239 | "Epoch #134 start\n",
1240 | "Epoch #134 end\n",
1241 | "Epoch #135 start\n",
1242 | "Epoch #135 end\n",
1243 | "Epoch #136 start\n",
1244 | "Epoch #136 end\n",
1245 | "Epoch #137 start\n",
1246 | "Epoch #137 end\n",
1247 | "Epoch #138 start\n",
1248 | "Epoch #138 end\n",
1249 | "Epoch #139 start\n",
1250 | "Epoch #139 end\n",
1251 | "Epoch #140 start\n",
1252 | "Epoch #140 end\n",
1253 | "Epoch #141 start\n",
1254 | "Epoch #141 end\n",
1255 | "Epoch #142 start\n",
1256 | "Epoch #142 end\n",
1257 | "Epoch #143 start\n",
1258 | "Epoch #143 end\n",
1259 | "Epoch #144 start\n",
1260 | "Epoch #144 end\n",
1261 | "Epoch #145 start\n",
1262 | "Epoch #145 end\n",
1263 | "Epoch #146 start\n",
1264 | "Epoch #146 end\n",
1265 | "Epoch #147 start\n",
1266 | "Epoch #147 end\n",
1267 | "Epoch #148 start\n",
1268 | "Epoch #148 end\n",
1269 | "Epoch #149 start\n",
1270 | "Epoch #149 end\n",
1271 | "Epoch #150 start\n",
1272 | "Epoch #150 end\n",
1273 | "Epoch #151 start\n",
1274 | "Epoch #151 end\n",
1275 | "Epoch #152 start\n",
1276 | "Epoch #152 end\n",
1277 | "Epoch #153 start\n",
1278 | "Epoch #153 end\n",
1279 | "Epoch #154 start\n",
1280 | "Epoch #154 end\n",
1281 | "Epoch #155 start\n",
1282 | "Epoch #155 end\n",
1283 | "Epoch #156 start\n",
1284 | "Epoch #156 end\n",
1285 | "Epoch #157 start\n",
1286 | "Epoch #157 end\n",
1287 | "Epoch #158 start\n",
1288 | "Epoch #158 end\n",
1289 | "Epoch #159 start\n",
1290 | "Epoch #159 end\n",
1291 | "Epoch #160 start\n",
1292 | "Epoch #160 end\n",
1293 | "Epoch #161 start\n",
1294 | "Epoch #161 end\n",
1295 | "Epoch #162 start\n",
1296 | "Epoch #162 end\n",
1297 | "Epoch #163 start\n",
1298 | "Epoch #163 end\n",
1299 | "Epoch #164 start\n",
1300 | "Epoch #164 end\n",
1301 | "Epoch #165 start\n",
1302 | "Epoch #165 end\n",
1303 | "Epoch #166 start\n",
1304 | "Epoch #166 end\n",
1305 | "Epoch #167 start\n",
1306 | "Epoch #167 end\n",
1307 | "Epoch #168 start\n",
1308 | "Epoch #168 end\n",
1309 | "Epoch #169 start\n",
1310 | "Epoch #169 end\n",
1311 | "Epoch #170 start\n",
1312 | "Epoch #170 end\n",
1313 | "Epoch #171 start\n",
1314 | "Epoch #171 end\n",
1315 | "Epoch #172 start\n",
1316 | "Epoch #172 end\n",
1317 | "Epoch #173 start\n",
1318 | "Epoch #173 end\n",
1319 | "Epoch #174 start\n",
1320 | "Epoch #174 end\n",
1321 | "Epoch #175 start\n",
1322 | "Epoch #175 end\n",
1323 | "Epoch #176 start\n",
1324 | "Epoch #176 end\n",
1325 | "Epoch #177 start\n",
1326 | "Epoch #177 end\n",
1327 | "Epoch #178 start\n",
1328 | "Epoch #178 end\n",
1329 | "Epoch #179 start\n",
1330 | "Epoch #179 end\n",
1331 | "Epoch #180 start\n",
1332 | "Epoch #180 end\n",
1333 | "Epoch #181 start\n",
1334 | "Epoch #181 end\n",
1335 | "Epoch #182 start\n",
1336 | "Epoch #182 end\n",
1337 | "Epoch #183 start\n",
1338 | "Epoch #183 end\n",
1339 | "Epoch #184 start\n",
1340 | "Epoch #184 end\n",
1341 | "Epoch #185 start\n",
1342 | "Epoch #185 end\n",
1343 | "Epoch #186 start\n",
1344 | "Epoch #186 end\n",
1345 | "Epoch #187 start\n",
1346 | "Epoch #187 end\n",
1347 | "Epoch #188 start\n",
1348 | "Epoch #188 end\n",
1349 | "Epoch #189 start\n",
1350 | "Epoch #189 end\n",
1351 | "Epoch #190 start\n",
1352 | "Epoch #190 end\n",
1353 | "Epoch #191 start\n",
1354 | "Epoch #191 end\n",
1355 | "Epoch #192 start\n",
1356 | "Epoch #192 end\n",
1357 | "Epoch #193 start\n",
1358 | "Epoch #193 end\n",
1359 | "Epoch #194 start\n",
1360 | "Epoch #194 end\n",
1361 | "Epoch #195 start\n",
1362 | "Epoch #195 end\n",
1363 | "Epoch #196 start\n",
1364 | "Epoch #196 end\n",
1365 | "Epoch #197 start\n",
1366 | "Epoch #197 end\n",
1367 | "Epoch #198 start\n",
1368 | "Epoch #198 end\n",
1369 | "Epoch #199 start\n",
1370 | "Epoch #199 end\n"
1371 | ]
1372 | }
1373 | ],
1374 | "source": [
1375 | "#train model - final******** with 200 epochs\n",
1376 | "epoch_logger = EpochLogger()\n",
1377 | "## Train doc2vec model\n",
1378 | "model1 = Doc2Vec(tagged_data, vector_size=20, window=2, min_count=1, workers=4, epochs = 200, callbacks=[epoch_logger])\n"
1379 | ]
1380 | },
1381 | {
1382 | "cell_type": "code",
1383 | "execution_count": 28,
1384 | "metadata": {},
1385 | "outputs": [],
1386 | "source": [
1387 | "# Save trained doc2vec model\n",
1388 | "model1.save(\"Model/my_doc2vec_v2.model\")"
1389 | ]
1390 | },
1391 | {
1392 | "cell_type": "code",
1393 | "execution_count": 30,
1394 | "metadata": {},
1395 | "outputs": [],
1396 | "source": [
1397 | "## Load saved doc2vec model\n",
1398 | "model1= Doc2Vec.load(\"Model/my_doc2vec_v2.model\")"
1399 | ]
1400 | },
1401 | {
1402 | "cell_type": "code",
1403 | "execution_count": 31,
1404 | "metadata": {},
1405 | "outputs": [
1406 | {
1407 | "data": {
1408 | "text/plain": [
1409 | "38941"
1410 | ]
1411 | },
1412 | "execution_count": 31,
1413 | "metadata": {},
1414 | "output_type": "execute_result"
1415 | }
1416 | ],
1417 | "source": [
1418 | "#confirm length (should be 38941)\n",
1419 | "len(tokenized_doc)"
1420 | ]
1421 | },
1422 | {
1423 | "cell_type": "code",
1424 | "execution_count": 35,
1425 | "metadata": {},
1426 | "outputs": [],
1427 | "source": [
1428 | "## Get vector value\n",
1429 | "vec = np.empty([38941,20])\n",
1430 | "\n",
1431 | "for k,i in enumerate(tokenized_doc):\n",
1432 | " \n",
1433 | " #print(i)\n",
1434 | " vector = model1.infer_vector(i)\n",
1435 | " vec[k] = vector\n",
1436 | " #vec = np.append(vector)\n",
1437 | " #vecf = np.append(vec,vector)\n",
1438 | "\n",
1439 | "# reshape into 2D\n",
1440 | "new_arr = np.reshape(vec,(-1,20))"
1441 | ]
1442 | },
1443 | {
1444 | "cell_type": "code",
1445 | "execution_count": 36,
1446 | "metadata": {},
1447 | "outputs": [],
1448 | "source": [
1449 | "rng = range(1, 21)\n",
1450 | "vec_df = pd.DataFrame(new_arr, columns=['vec_' + str(i) for i in rng])"
1451 | ]
1452 | },
1453 | {
1454 | "cell_type": "code",
1455 | "execution_count": 37,
1456 | "metadata": {},
1457 | "outputs": [
1458 | {
1459 | "name": "stdout",
1460 | "output_type": "stream",
1461 | "text": [
1462 | "\n",
1463 | "RangeIndex: 38941 entries, 0 to 38940\n",
1464 | "Data columns (total 20 columns):\n",
1465 | " # Column Non-Null Count Dtype \n",
1466 | "--- ------ -------------- ----- \n",
1467 | " 0 vec_1 38941 non-null float64\n",
1468 | " 1 vec_2 38941 non-null float64\n",
1469 | " 2 vec_3 38941 non-null float64\n",
1470 | " 3 vec_4 38941 non-null float64\n",
1471 | " 4 vec_5 38941 non-null float64\n",
1472 | " 5 vec_6 38941 non-null float64\n",
1473 | " 6 vec_7 38941 non-null float64\n",
1474 | " 7 vec_8 38941 non-null float64\n",
1475 | " 8 vec_9 38941 non-null float64\n",
1476 | " 9 vec_10 38941 non-null float64\n",
1477 | " 10 vec_11 38941 non-null float64\n",
1478 | " 11 vec_12 38941 non-null float64\n",
1479 | " 12 vec_13 38941 non-null float64\n",
1480 | " 13 vec_14 38941 non-null float64\n",
1481 | " 14 vec_15 38941 non-null float64\n",
1482 | " 15 vec_16 38941 non-null float64\n",
1483 | " 16 vec_17 38941 non-null float64\n",
1484 | " 17 vec_18 38941 non-null float64\n",
1485 | " 18 vec_19 38941 non-null float64\n",
1486 | " 19 vec_20 38941 non-null float64\n",
1487 | "dtypes: float64(20)\n",
1488 | "memory usage: 5.9 MB\n"
1489 | ]
1490 | }
1491 | ],
1492 | "source": [
1493 | "vec_df.info()"
1494 | ]
1495 | },
1496 | {
1497 | "cell_type": "code",
1498 | "execution_count": 38,
1499 | "metadata": {},
1500 | "outputs": [],
1501 | "source": [
1502 | "con_job_1 = pd.concat([job, vec_df], axis=1)"
1503 | ]
1504 | },
1505 | {
1506 | "cell_type": "code",
1507 | "execution_count": 2,
1508 | "metadata": {},
1509 | "outputs": [],
1510 | "source": [
1511 | "#saving final csv with additional vectors to match with resume. \n",
1512 | "con_job_1.to_csv('wip/con_job_1.csv', index=False)"
1513 | ]
1514 | },
1515 | {
1516 | "cell_type": "code",
1517 | "execution_count": null,
1518 | "metadata": {},
1519 | "outputs": [],
1520 | "source": []
1521 | }
1522 | ],
1523 | "metadata": {
1524 | "kernelspec": {
1525 | "display_name": "Python 3",
1526 | "language": "python",
1527 | "name": "python3"
1528 | },
1529 | "language_info": {
1530 | "codemirror_mode": {
1531 | "name": "ipython",
1532 | "version": 3
1533 | },
1534 | "file_extension": ".py",
1535 | "mimetype": "text/x-python",
1536 | "name": "python",
1537 | "nbconvert_exporter": "python",
1538 | "pygments_lexer": "ipython3",
1539 | "version": "3.7.6"
1540 | }
1541 | },
1542 | "nbformat": 4,
1543 | "nbformat_minor": 4
1544 | }
1545 |
--------------------------------------------------------------------------------