├── Images
    ├── php.png
    ├── java.png
    ├── oracle.png
    ├── php-word.png
    ├── python0.png
    ├── python1.png
    ├── python2.png
    ├── oracle-Word.png
    ├── python_word.png
    └── python_word1.png
├── 5-Images
    ├── java.png
    ├── php.png
    ├── oracle.png
    ├── php-word.png
    ├── python0.png
    ├── oracle-Word.png
    ├── python_word.png
    ├── python_word1.png
    └── Directory_Structure.png
├── 4-Top_recommendations
    ├── center.css
    ├── load_css.py
    ├── style.css
    └── job_output.py
├── Data
    ├── Job-Locations
    │   ├── state.csv
    │   └── india-city-state.csv
    └── working_jd_sample.csv
├── README.md
└── 2-Preprocessing_and_Modelling
    ├── Pre-processing_Resume for matchingv2.ipynb
    ├── Pre-processing_Resume for matchingv1.ipynb
    └── Pre-processing Jobs for modellingv2.ipynb


/Images/php.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/php.png


--------------------------------------------------------------------------------
/5-Images/java.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/java.png


--------------------------------------------------------------------------------
/5-Images/php.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/php.png


--------------------------------------------------------------------------------
/Images/java.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/java.png


--------------------------------------------------------------------------------
/Images/oracle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/oracle.png


--------------------------------------------------------------------------------
/5-Images/oracle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/oracle.png


--------------------------------------------------------------------------------
/Images/php-word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/php-word.png


--------------------------------------------------------------------------------
/Images/python0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python0.png


--------------------------------------------------------------------------------
/Images/python1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python1.png


--------------------------------------------------------------------------------
/Images/python2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python2.png


--------------------------------------------------------------------------------
/5-Images/php-word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/php-word.png


--------------------------------------------------------------------------------
/5-Images/python0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/python0.png


--------------------------------------------------------------------------------
/Images/oracle-Word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/oracle-Word.png


--------------------------------------------------------------------------------
/Images/python_word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python_word.png


--------------------------------------------------------------------------------
/5-Images/oracle-Word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/oracle-Word.png


--------------------------------------------------------------------------------
/5-Images/python_word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/python_word.png


--------------------------------------------------------------------------------
/Images/python_word1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/Images/python_word1.png


--------------------------------------------------------------------------------
/5-Images/python_word1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/python_word1.png


--------------------------------------------------------------------------------
/5-Images/Directory_Structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/HEAD/5-Images/Directory_Structure.png


--------------------------------------------------------------------------------
/4-Top_recommendations/center.css:
--------------------------------------------------------------------------------
1 | body {
2 |     background-color: #eee;
3 | }
4 | 
5 | .fullScreenFrame > div {
6 |     display: flex;
7 |     justify-content: center;
8 | }


--------------------------------------------------------------------------------
/4-Top_recommendations/load_css.py:
--------------------------------------------------------------------------------
1 | 
2 | import streamlit as st
3 | 
4 | def local_css(file_name):
5 |     with open(file_name) as f:
6 |         st.markdown('<style>{}</style>'.format(f.read()), unsafe_allow_html=True)


--------------------------------------------------------------------------------
/4-Top_recommendations/style.css:
--------------------------------------------------------------------------------
 1 | .highlight {
 2 |   border-radius: 0.2rem;
 3 |   color: white;
 4 |   padding: 0.1rem;
 5 |   margin-bottom: 1rem;
 6 | }
 7 | .bold {
 8 |   padding-left: 1rem;
 9 |   font-weight: 700;
10 | }
11 | .blue {
12 |   background-color: rgba(19, 179, 139, 0.842);
13 | }
14 | .red {
15 |   background-color: rgb(207, 79, 79);
16 | }
17 | .orange {
18 |   background-color: rgb(202, 107, 17);
19 | }
20 | .green {
21 |   background-color: rgb(19, 190, 42);
22 | }


--------------------------------------------------------------------------------
/Data/Job-Locations/state.csv:
--------------------------------------------------------------------------------
 1 | State_id,State
 2 | 1,India
 3 | 2,Andaman & Nicobar Islands
 4 | 2,Andhra Pradesh
 5 | 3,Arunachal Pradesh
 6 | 4,Assam
 7 | 5,Bihar
 8 | 6,Chhattisgarh
 9 | 7,Dadra & Nagar Haveli
10 | 8,Daman & Diu
11 | 9,Delhi
12 | 10,Goa
13 | 11,Gujarat
14 | 12,Haryana
15 | 13,Himachal Pradesh
16 | 14,Jammu & Kashmir
17 | 15,Jharkhand
18 | 16,Karnataka
19 | 17,Kerala
20 | 18,Lakshadweep
21 | 19,Madhya Pradesh
22 | 20,Maharashtra
23 | 21,Manipur
24 | 22,Meghalaya
25 | 23,Mizoram
26 | 24,Nagaland
27 | 25,Orissa
28 | 26,Pondicherry
29 | 27,Punjab
30 | 28,Rajasthan
31 | 29,Sikkim
32 | 30,Tamil Nadu
33 | 31,Uttar Pradesh
34 | 32,Uttarakhand
35 | 33,West Bengal
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Bidirectional-Job-Resume-Recommender-System
  2 | ## Introduction:
  3 | A must have tool for job seekers and recruiters. This project is intended to find and recommend the best fit. Job seekers can find best matching jobs to their resume and Recruiters find the best fit resumes for any job posting. Its based on Machine learning "NLP" concepts of text content match via Doc2Vec and similarity scores.
  4 | Primary feature of this recommender system is its roburst nature. It enables both Job-seekers and Recruiters to find best fit.
  5 | 1.	It reads the resume features and finds the top (n) relavant jobs based on Education, Work experience, location and text content.
  6 | 
  7 | 2.	Same code can be used to find best matching resumes for a job posting (based on Education, Work experience, location and text content).
  8 | 
  9 | Project involves extensive use of NLP features as in:
 10 | 
 11 | •	tokenization
 12 | 
 13 | •	lemmatization (English) 
 14 | 
 15 | -- Tried WordNet, spaCy, Textblob 
 16 | 
 17 | -- spaCy used (-PRON-) if identified pronoun 
 18 | 
 19 | -- Got same results with NLTK WordNet and TextBlob - chose to stick with wordNet
 20 | 
 21 | •	Count Vectorization
 22 | 
 23 | •	TF-IDF
 24 | 
 25 | •	entity extraction
 26 | 
 27 | 
 28 | ## Model
 29 | 
 30 | Text data is trained on **Doc2Vec** Model. Doc2Vec uses NLP but rather than working on frequency of each word, Doc2Vec can create a numeric representation of each  document by providing n-dimensional vectors.
 31 | 
 32 | Uses Cosine similarity to find the closest match and recommend top (n) matches
 33 | 
 34 | ## Directory Structure 
 35 | ![image](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/blob/master/5-Images/Directory_Structure.png)
 36 | 
 37 | 
 38 | ## Directory Details
 39 | 
 40 | ### [1-Data_gathering_EDA](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/1-Data_gathering_EDA)
 41 | Job_EDA.ipynb  - File to gather raw data from csv and EDA on JOBS
 42 | Resume_EDA -  File to gather raw data from csv and EDA on Resumes
 43 | fuzzy-wuzzy-logic-Resume_EDA.ipynb – To obtain similar titles based on score. We see similar titles written in different forms like Java Developer, Dev (java), Jave Deve. Etc which all should be only Java Developer. Fuzzy -wuzzy helps resolving the issue.
 44 | 
 45 | ### [2-Preprocessing_and_Modelling](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/2-Preprocessing_and_Modelling)
 46 | Pre-processing Jobs for modellingv1.ipynb – First iteration of Doc2Vec Model on Jobs text Data
 47 | Pre-processing Jobs for modellingv2.ipynb – Second and final iteration of Doc2Vec Model on Jobs 
 48 | Pre-processing_Resume for matchingv1.ipynb - First iteration of Doc2Vec Model on resume text Data
 49 | Pre-processing_Resume for matchingv2.ipynb - Second and final iteration of Doc2Vec Model on resume
 50 | ** one can just look into v2 to understand the flow.
 51 | 
 52 | ### [3-Matching_Sprints](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/3-Matching_Sprints)
 53 | Sprint1_matching_resume_to_jobs.ipynb
 54 | Sprint2_matching_resume_to_jobs-with-location-add-on.ipynb
 55 | Sprint3_matching_resume_to_jobs-with-text-add-on.ipynb
 56 | Sprint4_matching_resume_to_jobs-final.ipynb
 57 | 
 58 | ### [4-Top_recommendations](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/4-Top_recommendations)
 59 | job_output.py – Python file to run streamlit to see more intercative user interface to input resume and get top 10 jobs
 60 | center.css – Support file to help align text / images to center
 61 | load_css.py – support file for better UI
 62 | style.css – support file for color coding in streamlit
 63 | ** one can focus only on job_output.py for understanding the code
 64 | 
 65 | ### [5-Images](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/5-Images)
 66 | Contains images used / created during coding
 67 | 
 68 | ### 6-Model
 69 | Contains the final model, so just load, and run the model (Doc2Vec model trained on 40,000 jobs with 20-D vectors and 200 epochs)
 70 | 
 71 | ### [Data](https://github.com/Shailja-Jindal/Bidirectional-Job-Resume-Recommender-System/tree/master/Data)  
 72 | Due to file size limitations showing samples datasets.
 73 | 
 74 | •	Resumes: Contains sample 15 resumes in .csv format (look and feel of dataset)
 75 | 
 76 | •	Jobs: Contains sample 15 jobs in .csv format (look and feel of dataset)
 77 | 
 78 | •	Actual datasets can be found on Kaggle:
 79 | 
 80 | https://www.kaggle.com/PromptCloudHQ/jobs-on-naukricom
 81 | 
 82 | https://www.kaggle.com/avanisiddhapura27/resume-dataset
 83 | 
 84 | 
 85 | 
 86 | ## Resources
 87 | •	Datasets
 88 | 
 89 |     o	https://www.kaggle.com/
 90 | 
 91 |     o	https://www.britannica.com/
 92 | 
 93 | •	Lemmatization Approaches with Examples in Python
 94 | 
 95 |     o	https://www.machinelearningplus.com/
 96 | 
 97 | •	Doc2Vec Tutorial and Implementation
 98 | 
 99 |     o	https://radimrehurek.com/gensim/
100 | 
101 |     o	https://towardsdatascience.com/
102 | 
103 | •	Fuzzy-Wuzzy Matching
104 | 
105 |     o	https://towardsdatascience.com/
106 | 
107 | •	And shoutout to –
108 | 
109 |     o	Scikit-learn documentation
110 | 
111 |     o	Geekforgeeks 
112 | 
113 |     o	Stackoverflow 
114 | 


--------------------------------------------------------------------------------
/4-Top_recommendations/job_output.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from PIL import Image
  3 | # image = Image.open('title_page.png')
  4 | # st.image(image,width = 600)
  5 | st.markdown("<h1 style='text-align: center; color: Blue;'>Bidirectional Job-Resume Recommender</h1>", unsafe_allow_html=True)
  6 | #st.markdown("<h1 style='text-align: center; image</h1>", unsafe_allow_html=True)
  7 | #st.markdown('# Bidirectional Job-Resume Recommender')
  8 | #"""
  9 | ##---- 
 10 | #"""
 11 | import numpy as np 
 12 | import pandas as pd 
 13 | import datetime
 14 | import matplotlib.pyplot as plt
 15 | from sklearn.metrics.pairwise import cosine_similarity
 16 | # %matplotlib inline
 17 | import streamlit as st
 18 | from load_css import local_css
 19 | local_css("style.css")
 20 | from PIL import Image
 21 | 
 22 | # Loading in data
 23 | @st.cache
 24 | def reading():
 25 | # reading my sorted resume csv
 26 |     resume = pd.read_csv('wip/con_resume_1.csv')
 27 | 
 28 |     # reading my sorted job csv
 29 |     job = pd.read_csv('wip/con_job_1.csv')
 30 | 
 31 |     # resume features to be matched with jobs
 32 |     r_df = resume[['resume_id','experience_range','is_grad','is_postgrad','is_doc','location',
 33 |                     'vec_1','vec_2','vec_3','vec_4','vec_5','vec_6','vec_7','vec_8','vec_9','vec_10','vec_11','vec_12',
 34 |                     'vec_13','vec_14','vec_15','vec_16','vec_17','vec_18','vec_19','vec_20']]
 35 | 
 36 |     return resume, job, r_df
 37 | 
 38 | resume, job, r_df = reading()
 39 | 
 40 | #My recommender system to find best jobs for a given resume
 41 | def jobs_recommender(r) :
 42 |     
 43 |     #Store the results in this DF
 44 |     matched_jobs = pd.DataFrame(columns = ["id","company","job_title","jobdescription","experience_range","location","similarity"] )
 45 |     
 46 |     r= r.to_numpy()
 47 |     r= r.reshape(1, -1)
 48 |     #Go through ALL the related jobs
 49 |     for jd in job_m['j_id'] :
 50 |         #print(f'jd is {jd}')        
 51 |         #Find the similarity of the jobs with resume
 52 |         jobs = job_m.loc[jd]
 53 |         jobs = jobs.to_numpy()
 54 |         jobs = jobs.reshape(1, -1)
 55 |         #print(f'job is {jobs}')
 56 |         #print(f'r is {r}')
 57 |         #print(f'job is {job}')
 58 |         similarity = cosine_similarity(r,jobs)
 59 |         #print(f'similarity is {similarity}')
 60 |         matched_jobs.loc[len(matched_jobs)] = [jd,
 61 |                                                related_jobs['company'][jd],
 62 |                                                related_jobs['jobtitle'][jd],
 63 |                                                related_jobs['jobdescription'][jd],
 64 |                                                related_jobs['experience_range'][jd],
 65 |                                                related_jobs['loc_name'][jd],
 66 |                                                similarity[0][0]]
 67 |         
 68 | 
 69 |     return matched_jobs.sort_values(by=['similarity'],ascending=False)[1:]              
 70 | 
 71 | # r1 contrains the only features to be matched of slected resume  13769-java
 72 | 
 73 | actual_name = {'java developer - Maharashtra': 14, 'java developer - Haryana ': 310, 'php developer - Delhi': 983, 'php developer - Karnataka':9934, 'python developer -Karnataka': 10303, 'python developer - Tamil Nadu': 10940, 'oracle - Maharashtra': 12330, 'oracle - Tamil Nadu ': 12884}
 74 | r_test = st.sidebar.selectbox('Pick a resume from Drop-down : ', list(actual_name.keys()))
 75 | #st.text(type(r_test))
 76 | match_key= r_test.split()[0]
 77 | #st.text(match_key)
 78 | r1= r_df.loc[actual_name[r_test]]
 79 | r2= resume.loc[actual_name[r_test]]
 80 | R_title = r2['Resume_title']
 81 | R_location = r2['loc_name']
 82 | R_total_exp = r2['total_experience']
 83 | R_desc = r2['Description']
 84 | R_work_ex = r2['experience_desc']
 85 | image = Image.open('cv_pic.png')
 86 | #image = image.resize((100, 100))
 87 | #st.image(image,width = 200)
 88 | t = f"<div><span class='bold'><span class='highlight blue'>Selected Resume Title  : </span></span>   <span class='highlight blue'>{R_title} </span> </div>"
 89 | with open("center.css") as f: 
 90 |     st.markdown(t.format(f.read()), unsafe_allow_html=True)
 91 | """
 92 | 
 93 | 
 94 | """
 95 | #st.markdown(f'{R_title}')  
 96 | loc_ex =  f"<div><span class='bold'>Location: </span><span class='highlight red'>{R_location}</span><span class='bold'>Total Experience: </span><span class='highlight red'>{R_total_exp}</span></div>" 
 97 | #st.markdown(f'**Current location:** {R_location} \t **Total Experience:** {R_total_exp}')
 98 | st.markdown(loc_ex, unsafe_allow_html=True)
 99 | #st.subheader(f'Experience description: {R_work_ex}')
100 | 
101 | 
102 | import ast 
103 | # for index, rows in r2.iterrows():
104 | #     resume_desc= []
105 | #     #pick work experience col and read it as JSON   
106 | result_work = r2['work_experiences']
107 | #st.subheader(result_work)
108 | #st.subheader(type(result_work))
109 | result_work =  ast.literal_eval(result_work)
110 | #st.subheader(type(result_work))
111 | #     try: result_work = eval(work)
112 | #     except: continue
113 | #     #read description   
114 | #for i in result_work.keys(): 
115 | # st.subheader(i)   
116 | w_title = (result_work[0][0]['wtitle:']) 
117 | #st.markdown(f'')       
118 | w_company= (result_work[0][1]['wcompany:'])
119 | t_com = f"<div><span class='bold'>Current Work Title : </span><span class='highlight red'>{w_title}</span><span class='bold'>Company : </span><span class='highlight red'>{w_company}</span></div>"
120 | w_city= (result_work[0][2]['wcity:'])
121 | w_state= (result_work[0][3]['wstate:'])
122 | w_duration= (result_work[0][4]['wduration:'])      
123 | w_descr= (result_work[0][5]['wdescr:'])
124 | #des = f"<div><span class='bold'>Description :  </span>{w_descr}</div>" 
125 | des = f"<div><span class='bold'>Description :  </span>{R_desc}</div>" 
126 | #st.markdown(f'**Current Work Title :** {w_title}  **Company :** {w_company}')
127 | st.markdown(t_com,unsafe_allow_html=True)
128 | st.markdown(des,unsafe_allow_html=True)
129 | #st.markdown(f'**Description :** {w_descr}') 
130 | 
131 | 
132 | 
133 | #from pool of 34,000 jobs, selecting jobs that are releated to sql dba (resume in question)
134 | related_jobs = job.loc[job['jobtitle'].str.contains(match_key)]
135 | related_jobs=related_jobs.loc[related_jobs['location']==r2['location']]
136 | #job features need to be matched with resume
137 | job_m = related_jobs[['j_id','experience_range','is_grad','is_postgrad','is_doc','location',
138 |                      'vec_1','vec_2','vec_3','vec_4','vec_5','vec_6','vec_7','vec_8','vec_9','vec_10','vec_11','vec_12',
139 |                       'vec_13','vec_14','vec_15','vec_16','vec_17','vec_18','vec_19','vec_20']]
140 | # """
141 | # *************************************************
142 | # """
143 | st.markdown('# System Recommended Top 10 Jobs : ')
144 | image = Image.open('jobs.png')
145 | st.image(image, width = 200) #, use_column_width=True)
146 | st.write('Recommendation is based on cosine similarity of multiple factors like skills, location, experience, education, description, title etc ')
147 | # call recommender by passing selected resume 
148 | matched_jobs = jobs_recommender(r1)
149 | matched_jobs = matched_jobs.head(10)
150 | 
151 | st.write(matched_jobs)
152 | st.write('**Note:** Similarity Scores may round off to nearest integer value, so itcould be hard to visualize the difference. But they are displayed in ranked order.')
153 | """
154 | *************************************************
155 | 
156 | """
157 | # st.markdown('# Phrases suggestions in word-cloud ')
158 | # st.write('WordCloud pulls words, pairs from all related jobs to form a cloud')
159 | # from PIL import Image
160 | # if match_key == 'java':
161 | #     image = Image.open('java.png')
162 | #     st.image(image, caption=(f'Suggestions for {match_key}'),
163 | #             use_column_width=True)
164 | # elif match_key == 'oracle':
165 | #     image = Image.open('oracle.png')
166 | #     st.image(image, caption=(f'Suggestions for {match_key}'),
167 | #             use_column_width=True)
168 | # elif match_key == 'php':
169 | #     image = Image.open('php.png')
170 | #     st.image(image, caption=(f'Suggestions for {match_key}'),
171 | #             use_column_width=True)
172 | # elif match_key == 'python':
173 | #     image = Image.open('python_word1.png')
174 | #     st.image(image, caption=(f'Suggestions for {match_key}'),
175 | #             use_column_width=True)
176 | 
177 | 
178 | 


--------------------------------------------------------------------------------
/Data/working_jd_sample.csv:
--------------------------------------------------------------------------------
 1 | company,education,experience,industry,jobdescription,jobid,joblocation_address,jobtitle,numberofpositions,payrate,postdate,site_name,skills,uniq_id
 2 | Covalense Technologies Private Limited,,5 - 9 yrs,IT-Software / Software Services,"Job Description Â  Send me Jobs like this Experience: 5yrs. - 9yrs. Job Description: * Engineering Graduate/Post-Graduate with 6-9 years of experience in Java Programming with strong in Object Oriented concepts. * Good Exposure to spring, hibernate, web services, Threading, Socket Programming, Collections, Data Structure and IO with strong knowledge on either Spring Batch or JEE frameworks.. * Experienced in XML configuration; setting up Eclipse or any other IDEs with basic knowledge of SQL. * Energetic with strong analytical, communication and interpersonal skills. * Ability to learn and apply the new concepts quickly. Preferred Skills * Working knowledge of Oracle/DB2, ClearCase/CVS. * Experienced in test tools like JUNIT. * Ability to build tools like Ant and Maven. Salary:INR 7,50,000 - 15,00,000 P.A Industry: IT-Software / Software Services Functional Area: IT Software - Client/Server Programming Role Category:Programming & Design Role:Team Lead/Technical Lead Keyskills Hibernate Spring Java Maven JUnit Ant JEE Eclipse Oracle Web Services AngularJS Desired Candidate Profile Please refer to the Job description above Company Profile: Covalense Technologies Private Limited Ã¯Æ’Ëœ Covalense is an IT services and solutions company, established in 2006 and now with the 300 + professionals are working across the location. Ã¯Æ’Ëœ Our office presence is in New Zealand, Australia , US and India. Ã¯Æ’Ëœ Bangalore, New Zealand is majorly focusing on End-to-End Oracle Telecom stack and Hyderabad, New Zealand focusing on Microsoft, Open Source, Mobility Apps (MOM Services) development Ã¯Æ’Ëœ There are multiple project engagements with Tier 1 SI partners in BLR and Gurgaon. Ã¯Æ’Ëœ Our portfolio majorly consists of telecom implementations along with a wide selection of industry verticals. Download PPT Photo 1 Â  View Contact Details",70916001822,Bengaluru,Java - SSE / Technical Lead,,"7,50,000 - 15,00,000 P.A",2016-10-12 16:21:02 +0000,,IT Software - Client/Server Programming,60b28f3eb5c9c5c004e0b86678d99b5e
 3 | Cambio Consulting,"UG: B.Tech/B.E. - Any Specialization, Other Graduate PG:M.Tech - Any Specialization, MCA - Computers, M.S/M.D - Any Specialization, Other Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",4 - 9 yrs,IT-Software / Software Services,"Job Description Â  Send me Jobs like this Hi, we have urgent requirement for embedded developers Job Description: Very good proficiency in programming in Java Very good proficiency in designing software applications, e.g. using design patterns, employing test driven development Good understanding of the principles of GUI programming in the context of Web, e.g. GWT, CSS, JavaScript Knowledge of telecommunication management protocols like SNMP, REST Preferable to have exposure to protocols like DHCP, DNS, SIP etc. Experience in maintaining Continuous Integration Environments e.g. using Maven, Gradle, Subversion Strong competences in working in a team, e.g. using Agile Frameworks like Scrum If interested, Please forward your updated CV to rafi@cambio.co.in Salary:INR 7,00,000 - 17,00,000 P.A Industry: IT-Software / Software Services Functional Area: IT Software - System Programming Role Category:Programming & Design Role:Software Developer Desired Candidate Profile Education- UG: B.Tech/B.E. - Any Specialization, Other Graduate PG:M.Tech - Any Specialization, MCA - Computers, M.S/M.D - Any Specialization, Other Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: Cambio Consulting We are established consulting firm providing a wide spectrum of services in the HR Domain consulting services. Our core belief is that people are the most valuable asset for any company. Leading from that is our aim to offer not just recruitment support but also be a strategic adviser to all our customers. We plan to achieve this by understanding the client's business process, industry domain and develop market intelligence in order to provide a right fit for all the positions. Download PPT Photo 1 Â  View Contact Details",1.20317E+11,"Bengaluru/Bangalore , Hyderabad / Secunderabad",Java & NMS Development Openings @ Bangalore and Hyderabad,,"7,00,000 - 17,00,000 P.A",2016-03-11 02:30:18 +0000,,IT Software - System Programming,da267e3b96a4ed51faf0e610ea662c20
 4 | SATYAM VENTURE ENGINEERING SERVICES,"UG: Any Graduate PG:Any Postgraduate Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",10 - 15 yrs,Automobile / Auto Anciliary / Auto Components,"Job Description Â  Send me Jobs like this Experience Profile - Experience in Should Costing of Product is must. Knowledge about various Manufacturing processes such as Stamping, Pressure Die Casting, Plastic Injection Molding etc., Knowledge about costing of various commodities such as Plastic parts, Al. Die Casting, Electric Motors, Automotive Seating System etc., Preferred knowledge in Costing softwares like aPriori, PCS, DFM/ DFA etc., Personal Attributes - Ability to develop a team Keenness to innovation, problem solving abilities, commitment to personal and professional growth, and eye for details. Presentation/ communication skills are prerequisites. Good team players Salary: Not Disclosed by Recruiter Industry: Automobile / Auto Anciliary / Auto Components Functional Area: Production , Manufacturing , Maintenance Role Category:Production/Manufacturing/Maintenance Role:Project Manager-Production/Manufacturing/Maintenance Desired Candidate Profile Education- UG: Any Graduate PG:Any Postgraduate Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: SATYAM VENTURE ENGINEERING SERVICES SATYAM VENTURE ENGINEERING SERVICES Download PPT Photo 1 Â  View Contact Details",3.10317E+11,Hyderabad / Secunderabad,Project Manager,,Not Disclosed by Recruiter,2016-03-31 02:30:24 +0000,,Production,d6ff245ae99d79e4be094fbe47c50284
 5 | Envision Enterprise Solutions Pvt Ltd,"UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",7 - 12 yrs,IT-Software / Software Services,"Job Description Â  Send me Jobs like this Project Managers with more than 7 years of experience of large project management and delivery. Working experience in multiple countries with multi culture onsite project engagements, with in Ability to manage stake holders with excellent written, verbal communication skills and team management abilities. Expertise in delivery models like waterfall and Agile is essential. PMP Qualification will be an advantage. Willing to travel to client locations globally Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Application Programming , Maintenance Role Category:Programming & Design Role:Testing Engineer Desired Candidate Profile Education- UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: Envision Enterprise Solutions Pvt Ltd Envision is a leading IT Solutions Provider and System Integrator, providing solutions for enterprises across the globe, to optimize resource utilization, streamline operations, reduce the costs, maximise return on investment. We provide cost effective solutions within budget and timelines. We are known for providing Enterprise Asset Management, Port and Terminal Automation Solutions, Transportation, Logistics, Enterprise Mobility Solutions, IOT, Smarter manufacturing, facilities solutions. Download PPT Photo 1 Â  View Contact Details",70316503401,Hyderabad / Secunderabad,Project Managers,,Not Disclosed by Recruiter,2016-03-07 02:31:09 +0000,,IT Software - Application Programming,9f819c69b3578157baf8b83a5820b27e
 6 | NEW HOPE MEDICAL CENTRE,"UG: B.B.A PG:MBA/PGDM Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",1 - 2 yrs,Medical / Healthcare / Hospitals,"Job Description Â  Send me Jobs like this Persons with great initiative, negotiation skills, sound financial insight, managerial skills to lead and support our new projects. Salary: Not Disclosed by Recruiter Industry: Medical / Healthcare / Hospitals Functional Area: Medical , Healthcare , R&D , Pharmaceuticals , Biotechnology Role Category:Drug Regulatory Affairs/Documentation Role:Regulatory Affairs Manager Desired Candidate Profile Education- UG: B.B.A PG:MBA/PGDM Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: NEW HOPE MEDICAL CENTRE NEW HOPE MEDICAL CENTRE Download PPT Photo 1 Â  View Contact Details",1.60317E+11,Hyderabad / Secunderabad,Project Manager,,Not Disclosed by Recruiter,2016-03-16 02:31:16 +0000,,Medical,2a1d64deb55ed947cec34818eb7abf9a
 7 | Maven Workforce,"UG: B.Tech/B.E. - Any Specialization PG:M.Tech - Any Specialization, MCA - Computers, M.Sc - Any Specialization Doctorate:Doctorate Not Required",5 - 10 yrs,IT-Software / Software Services,"Job Description Â  Send me Jobs like this 1. The candidate must have done atleast N3 of JLPT (Japanese Language Proficiency Test). Currently JLPT has 5 levels (previously 4 levels). If the candidate has N3 (current), N2 or N1 certification, we can consider. 2. Candidate must know atleast 1000 kanjis 3. Candidate must be good in speaking Japanese. There are no certifications to check that. If a person says he/she lived in Japan or speak business Japanese on a day-to- day basis. 4. M.A in Japanese (especially from Jawaharlal Nehru University (JNU), Delhi) and even if he/she has not cleared any JLPT certifications. 5. If a person has not done any certifications but lived in Japan and can read, write, speak Japanese can be considered. Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Other Role Category:Other Role:Outside Consultant Keyskills Japanese JLPT Japanese Interpreter Japanese translator Japanese Language proficiency test Desired Candidate Profile Education- UG: B.Tech/B.E. - Any Specialization PG:M.Tech - Any Specialization, MCA - Computers, M.Sc - Any Specialization Doctorate:Doctorate Not Required Â  Company Profile: Maven Workforce Leading client of Maven Workforce Download PPT Photo 1 Â  View Contact Details",30516900761,"Delhi/NCR(National Capital Region) , Gurgaon",Japanese Interpreter,,Not Disclosed by Recruiter,2016-05-03 11:35:55 +0000,,IT Software - Other,30649a930cae66477fb4e0eb93f2ccf9
 8 | Confidential,,2 - 5 yrs,IT-Software / Software Services,"Job Description Â  Send me Jobs like this hi all, please find the mentioned JD below, Skills: Dot.Net, C#, SQL, OOPS ,Web services Experience: 2 -5 Years Work Location: Bangalore M G Road Interview Timming- 10:00 A M to 2:00 P M Mode:C2H Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Application Programming , Maintenance Role Category:Programming & Design Role:Software Developer Keyskills .Net Desired Candidate Profile Â  Company Profile: Confidential Confidential Download PPT Photo 1 Â  View Contact Details",1.30416E+11,Bengaluru,.Net Developer,,Not Disclosed by Recruiter,2016-10-06 16:21:31 +0000,,IT Software - Application Programming,b529711ee8c1b4c1bea4849d18594132
 9 | Melstar Information Technologies Ltd,UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required,2 - 5 yrs,IT-Software / Software Services,"Job Description Â  Send me Jobs like this We have an urgent openings for .net developer Location : Bangalore Skills:.net, sql Exp : 2 to 4 Years, If you are interested please send your updated resume with following details: Full Name: Email ID: Phone: Primary skills: Total Exp : Relevant Exp: Minimum Notice Period: Current Company: Current CTC: Current Employment(Perm/Cont): Current Location: Preferred Location: Availability on weekend/WeekDay for a F2F discussion : PAN : Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Application Programming , Maintenance Role Category:Programming & Design Role:Software Developer Keyskills .net developer .net developer Desired Candidate Profile Â  Education- UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required Company Profile: Melstar Information Technologies Ltd Melstar is a CMM Level global IT company with its headquarters in Mumbai; India. We have eight software development facilities, with eleven offices worldwide including the US,Europe and India. We offer a unique blend of domain expertise in the field of Banking, Finance, Insurance and Manufacturing. We are engaged in cutting-edge technologies like e-commerce,web development and dot-com projects with strong N-tier approach. We have global partnerships with IBM,Microsoft, Oracle,Informix and other IT Stalwarts. Our strong customer focus can be seen from the prestigious list of clients like Citibank N.A., IBM, Genpact, Standard Chartered, HP etc. Download PPT Photo 1 Â  View Contact Details",71016900650,"Bengaluru, Delhi, Noida",.net Developer,,Not Disclosed by Recruiter,2016-10-07 16:21:40 +0000,,IT Software - Application Programming,e233e57a6b2eeefc24d43cbb58a86096
10 | Unitforce technologies Pvt. Ltd.,"UG: Any Graduate - Any Specialization PG:Any Postgraduate - Any Specialization Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",5 - 6 yrs,IT-Software / Software Services,"Job Description Â  Send me Jobs like this Net Framework 4.5 - ASP & C# - HTML5, Java Script, AJAX, JQuery Secondary Skills Required: Entity Framework MVC Angular JS RDBMS Oracle 11g Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: IT Software - Application Programming , Maintenance Role Category:Programming & Design Role:Software Developer Desired Candidate Profile Education- UG: Any Graduate - Any Specialization PG:Any Postgraduate - Any Specialization Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: Unitforce technologies Pvt. Ltd. www.uftech.com Download PPT Photo 1 Â  View Contact Details",60316600219,Hyderabad / Secunderabad,.NET Developer,,Not Disclosed by Recruiter,2016-03-05 02:30:23 +0000,,IT Software - Application Programming,4ed3d0cca70fcac5acf1557de46ef176
11 | Karvy Analytics Limited,UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required,4 - 8 yrs,KPO / Research / Analytics,"Job Description Â  Send me Jobs like this Responsibilities : - Selecting features, building and optimizing classifiers using machine learning techniques - Data mining using state-of-the-art methods - Enhancing data collection procedures to include information that is relevant for building analytic systems - Processing, cleansing, and verifying the integrity of data used for analysis - Doing ad-hoc analysis and presenting results in a clear manner Skills and Qualifications : - Excellent understanding of machine learning techniques and algorithms, such as k-NN, Naive Bayes, SVM, GBM, Decision Forests, Time Series Forecasting etc. - Experience with common data science toolkits in R or Python Excellence in at least one of these is highly desirable - Good communication skills - Experience with data visualization tools, such as D3.js, Tableau etc. would be added advantage - Proficiency in using query languages such as SQL, Hive, Pig would be added advantage - Good applied statistics skills, such as distributions, statistical testing, regression, etc. - Good scripting and programming skills - Data-oriented personality - More than 4 years of experience in Data analysis. Salary:INR 6,00,000 - 12,00,000 P.A Industry: KPO / Research / Analytics Functional Area: Analytics & Business Intelligence Role Category:Analytics & BI Role:Analytics Manager Keyskills Data Science Hive Machine Learning Data Mining R Data Visualization SQL Python Data Analysis Time Series Desired Candidate Profile Please refer to the Job description above Education- UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required Company Profile: Karvy Analytics Limited Karvy Analytics Limited is a new age company and a modern arm of the leading Karvy Conglomerate. Led by visionary management, the young and forward thinking team is building world class solutions for the global analytics universe. We are focused on multi-industry use cases for companies that need technology and professional services for their functional and operational analytics projects. We offer a range of solutions that bring immediate business benefits to our global customers who are interested in leveraging big data, statistical and mathematical modeling techniques, social analytics, and mobile descriptive analytics for new business insights. Download PPT Photo 1 Â  View Contact Details",2.01216E+11,Hyderabad,Sr Data Scietist,2,"6,00,000 - 12,00,000 P.A",2016-12-20 18:19:23 +0000,www.naukri.com,Analytics & Business Intelligence,0abdcbe9423d9e4730c1b16db7954f77
12 | Rinalytics Advisors Pvt. Ltd,"UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",2 - 7 yrs,Recruitment / Staffing,Job Description Â  Send me Jobs like this Job Description,2.50516E+11,Bengaluru/Bangalore,Data Science Role,,Not Disclosed by Recruiter,2016-01-29 14:07:13 +0000,,Analytics & Business Intelligence,8e9ba1f084f9fe31c7878258fba47188
13 | Prism Manpower Services,UG: Any Graduate PG:Post Graduation Not Required,0 - 4 yrs,Recruitment / Staffing,"Job Description Â  Send me Jobs like this Computer Operators. Should have knowledge of Excel. Decent typing speed. should know English typing. Freshers are also fine. Interested Candidates can forward their resume at prismmanpower@yahoo.in call us on 9702897822 Salary: Not Disclosed by Recruiter Industry: Recruitment / Staffing Functional Area: Executive Assistant , Front Office , Data Entry Role Category:Other Role:Stenographer/Data Entry Operator Desired Candidate Profile Please refer to the Job description above Education- UG: Any Graduate PG:Post Graduation Not Required Company Profile: Prism Manpower Services We , Prism Manpower Services , provide a wide range of Recruitment Solutions for various requirements. Located in Mumbai city of Maharashtra , the company was incepted in the year 2007. With an experience of serving industries like Insurance , Event Management etc , we are today recognized as a trustworthy Service Provider in Maharashtra. Download PPT Photo 1 Â  View Contact Details",2.51017E+11,Mumbai,DATA ENTRY OPERATOR,,Not Disclosed by Recruiter,2016-10-25 19:49:07 +0000,www.naukri.com,Executive Assistant,7fb17e8480a9978d68e30de0f39fea04
14 | "Risk Management Solutions, Inc.","UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",4 - 5 yrs,Banking / Financial Services / Broking,"Job Description Â  Send me Jobs like this RMS - Senior Analyst, Data Analytics Senior Analyst, Data Analytics Objective of the Role: The applicant will be extensively involved in exposure modeling and data analytics. Executes delivers original analysis and insights and own all or part of an analytics module Provide professional skills necessary for all phases of data analysis, including the application of standard statistical methods for conducting analysis, documentation and presentation. Communicates analytical insights through sophisticated synthesis and packaging of results (Including PPT slides and charts) Establishes credibility by thought partnering on analytics topics; takes positions and draws conclusions on a range of external and internal issues Serve as an active participant on cross- functional projects, interpreting data, and translating into actionable insights, provide support on ad- hoc analysis and reports. B. Tech./ Post graduate (geostatistics) from a premier institute with good academic record 4 to 5 years of total experience with minimum 3 years experience on analytical projects requiring comprehensive data analysis, interpretation and presentation skills. Knowledge of analysis techniques like statistical methodology, data manipulation. Critical thinking skills and hands on experience in data interpretation, formulating hypotheses and being able to make educated guesses when data may be sparse or unavailable. Strong MS SQL knowledge and experience, ability to write custom queries of medium to high complexity. Strong documentation skills with experience of working on MS Word, Excel (advanced knowledge such as using pivots, filters, using external data etc.) , PowerPoint and Project. Excellent communication skills and ability to independently lead and drive projects. Technical skills: Experience with multiple analytics methods (one or more required) Data management skills (e.g. data modeling, data integrity QA/ QC) Geospatial data visualization and analytics (specialties such as cluster detection or geo- statistical methods) Spatialtemporal analysis (cartographic animation of timeseries data) Experience in core analytics methods (one or more of the following) : Geo coding geo referencing. Knowledge of open source proprietary geo analytics data sources. Geographic cluster recognition. Network analysis (locationallocation, OD Matrix travelling sales person, vehicle routing problem) Spatialtemporal analysis Familiarity with analytics tools (one or more required) GIS toolkits (ESRI, Quantum GIS, MapInfo or similar) Working knowledge of Property Causality insurance or reinsurance or and Risk Assessment Analysis would be advantageous Knowledge of catastrophe modeling domain would also be advantageous Working knowledge of and experience in statistical tools like R, SPSS etc. RMS models and software help insurers, financial markets, corporations, and public agencies evaluate and manage catastrophe risks throughout the world. We lead an industry that we helped to pioneercatastrophe risk modelingand are the innovators of the RMS (one) platform, which is transforming the world's understanding and quantification of risk through open, real- time exposure and risk management. More than 400 insurers, reinsurers, trading companies, and other financial institutions trust RMS models and SaaS solutions to better understand and manage the risks of natural and human- made catastrophes, including hurricanes, earthquakes, floods, terrorism, and pandemics. We think about the unthinkable, enabling the management of even the most extreme events. Our scientific and objective measurement of risk facilitates the efficient flow of capital needed to insure, manage, and ultimately mitigate these risks to reduce the consequences of disasters, promoting resilient societies and a sustainable global economy. RMS is proud to be an equal opportunity employer. Salary: Not Disclosed by Recruiter Industry: Banking / Financial Services / Broking Functional Area: Analytics & Business Intelligence Role Category:Analytics & BI Role:Data Analyst Desired Candidate Profile Education- UG: B.Tech/B.E. PG:M.Tech Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: Risk Management Solutions, Inc. Risk Management Solutions, Inc. Download PPT Photo 1 Â  View Contact Details",1.10517E+11,Noida,Data Analytics,,Not Disclosed by Recruiter,2016-05-11 06:05:20 +0000,,Analytics & Business Intelligence,187a7d0b53f5211639157026daaf6dca
15 | ZSoft Internet Media Pvt Ltd.,"UG: Diploma PG:Post Graduation Not Required Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required",2 - 4 yrs,IT-Software / Software Services,"Job Description Â  Send me Jobs like this Job description:The post is responsible for maintaining, updating and enhancing Client database. Work on Data Entry and generate reports as per needs Must have good hands on experience with MS OfficeRoles and Responsibilities: Good Typing Skills, Communication SkillsQualification: Any Graduate ( BE/B Tech/B. Sc Computers/BCA and Diploma Holders )Speed and accuracy essential. Must have be organized, self disciplined and self starter Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: Executive Assistant , Front Office , Data Entry Role:Stenographer/Data Entry Operator Keyskills bca diploma maintaining good typing skills data entry operator b sc b tech responsible be database Desired Candidate Profile Education- UG: Diploma PG:Post Graduation Not Required Doctorate:Any Doctorate - Any Specialization, Doctorate Not Required Please refer to the Job description above Company Profile: ZSoft Internet Media Pvt Ltd. ZSoft Internet Media Pvt. Ltd. - Website Design | Software Development | SEO | SMO | PPC | ORM | IT Services Business Technology Consulting Download PPT Photo 1 Â  View Contact Details",1.00816E+11,"Delhi , Delhi",Data Entry Operator,,Not Disclosed by Recruiter,2015-11-23 22:17:37 +0000,,Executive Assistant,18ee0fbcfa297155ef90876b0fda0608
16 | Startup - Entransys,"UG: Any Graduate - Any Specialization, B.Tech/B.E. - Any Specialization Doctorate:Doctorate Not Required",3 - 5 yrs,Internet / Ecommerce,"Job Description Â  Send me Jobs like this We are looking for an Analytics Designer with strong interests and capabilities in the design and development of engaging user experiences. Salary: Not Disclosed by Recruiter Industry: Internet / Ecommerce Functional Area: Analytics & Business Intelligence Role Category:Analytics & BI Role:Data Analyst Keyskills Design Development Data Science Analytics Desired Candidate Profile Â  Education- UG: Any Graduate - Any Specialization, B.Tech/B.E. - Any Specialization Doctorate:Doctorate Not Required Company Profile: Startup - Entransys Entransys approach and methodology is aimed towards converting the Business Chain into Value Chain and ensures the reconfiguration of Business processes to maximize the Business value. Download PPT Photo 1 Â  View Contact Details",2.21217E+11,Hyderabad,Analytics & Data Science,,Not Disclosed by Recruiter,2016-12-22 18:19:00 +0000,www.naukri.com,Analytics & Business Intelligence,e5cc1a2789b45f1161636fc3681670ee
17 | AR Enterprises hiring for US Based MNC,,0 - 2 yrs,Recruitment / Staffing,"Job Description Â  Send me Jobs like this SECRETARY / FRONT OFFICE / DATA ENTRY Job Profile Salary: Not Disclosed by Recruiter Industry: Recruitment / Staffing Functional Area: Executive Assistant , Front Office , Data Entry Role Category:Other Role:Stenographer/Data Entry Operator Keyskills Data Entry Operation Front Office Secretarial Activities Desired Candidate Profile Please refer to the Job description above Company Profile: AR Enterprises US Based MNC our aim is to make future and to give best placement. Download PPT Photo 1 Â  View Contact Details",51116002047,"Hyderabad, Chennai, Bengaluru, Gwalior",Data Entry Operator,3,Not Disclosed by Recruiter,2017-01-11 21:00:00 +0000,www.naukri.com,Executive Assistant,0e1a1f05ed979b8139dfb814058f68ac
18 | 


--------------------------------------------------------------------------------
/2-Preprocessing_and_Modelling/Pre-processing_Resume for matchingv2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Pre-Processing Resume Text Column to Prepare for Matching - final "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import pandas as pd\n",
 18 |     "import json\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "%matplotlib inline\n",
 21 |     "\n",
 22 |     "import re\n",
 23 |     "import datetime\n",
 24 |     "from datetime import date\n",
 25 |     "from time import strptime\n",
 26 |     "\n",
 27 |     "import RAKE as rake\n",
 28 |     "import operator\n"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "###############################################################################################\n",
 36 |     "## Working on Resume data\n",
 37 |     "###############################################################################################"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 2,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# First reading my resume csv\n",
 47 |     "resume = pd.read_csv('wip/resume_sorted6.csv')"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 3,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "<class 'pandas.core.frame.DataFrame'>\n",
 60 |       "RangeIndex: 14428 entries, 0 to 14427\n",
 61 |       "Data columns (total 26 columns):\n",
 62 |       " #   Column                  Non-Null Count  Dtype \n",
 63 |       "---  ------                  --------------  ----- \n",
 64 |       " 0   index                   14428 non-null  int64 \n",
 65 |       " 1   Resume_title            14428 non-null  object\n",
 66 |       " 2   City                    14428 non-null  object\n",
 67 |       " 3   location                14428 non-null  int64 \n",
 68 |       " 4   Description             14428 non-null  object\n",
 69 |       " 5   work_experiences        14428 non-null  object\n",
 70 |       " 6   Educations              14428 non-null  object\n",
 71 |       " 7   Skills                  14428 non-null  object\n",
 72 |       " 8   Links                   14428 non-null  object\n",
 73 |       " 9   Certificates            14428 non-null  object\n",
 74 |       " 10  Additional Information  14428 non-null  object\n",
 75 |       " 11  is_grad                 14428 non-null  int64 \n",
 76 |       " 12  is_postgrad             14428 non-null  int64 \n",
 77 |       " 13  is_doc                  14428 non-null  int64 \n",
 78 |       " 14  edu_unknown             14428 non-null  int64 \n",
 79 |       " 15  Computer_Eng            14428 non-null  int64 \n",
 80 |       " 16  Finance                 14428 non-null  int64 \n",
 81 |       " 17  HR                      14428 non-null  int64 \n",
 82 |       " 18  AI_stats                14428 non-null  int64 \n",
 83 |       " 19  MBA                     14428 non-null  int64 \n",
 84 |       " 20  Other_specialization    14428 non-null  int64 \n",
 85 |       " 21  resume_id               14428 non-null  int64 \n",
 86 |       " 22  total_experience        14428 non-null  int64 \n",
 87 |       " 23  experience_range        14428 non-null  int64 \n",
 88 |       " 24  loc_name                14428 non-null  object\n",
 89 |       " 25  experience_desc         14428 non-null  object\n",
 90 |       "dtypes: int64(15), object(11)\n",
 91 |       "memory usage: 2.9+ MB\n"
 92 |      ]
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "#initial info\n",
 97 |     "resume.info()"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "#########################################################################################################\n",
105 |     "## To match resume with jobs, I need to have similar 20 vectors, that I created to train my Doc2Vec model for jobs. \n",
106 |     "\n",
107 |     "### For training my jobs model, I picked  text data from :\n",
108 |     "* job title\n",
109 |     "* job description\n",
110 |     "* skills\n",
111 |     "* industry\n",
112 |     "\n",
113 |     "### So for training my resume model, I need similar text, thus picking:\n",
114 |     "* Resume_title\n",
115 |     "* Resume description \n",
116 |     "* skills\n",
117 |     "* Additional Information\n",
118 |     "\n",
119 |     "\n",
120 |     "#########################################################################################################"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 40,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "resume['Resume_title'] = resume['Resume_title'].str.lower()\n",
130 |     "resume['Skills']=resume['Skills'].str.lower()\n",
131 |     "resume['Description'] = resume['Description'].str.lower()\n",
132 |     "resume['Additional Information'] = resume['Additional Information'].str.lower()"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 41,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "resume['Description'].replace('none', ' ',inplace=True)\n",
142 |     "resume['Additional Information'].replace('none', ' ',inplace=True)"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 5,
148 |    "metadata": {},
149 |    "outputs": [
150 |     {
151 |      "name": "stderr",
152 |      "output_type": "stream",
153 |      "text": [
154 |       "C:\\Users\\shail\\anaconda\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
155 |       "A value is trying to be set on a copy of a slice from a DataFrame.\n",
156 |       "Try using .loc[row_indexer,col_indexer] = value instead\n",
157 |       "\n",
158 |       "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
159 |       "  \n"
160 |      ]
161 |     },
162 |     {
163 |      "data": {
164 |       "text/html": [
165 |        "<div>\n",
166 |        "<style scoped>\n",
167 |        "    .dataframe tbody tr th:only-of-type {\n",
168 |        "        vertical-align: middle;\n",
169 |        "    }\n",
170 |        "\n",
171 |        "    .dataframe tbody tr th {\n",
172 |        "        vertical-align: top;\n",
173 |        "    }\n",
174 |        "\n",
175 |        "    .dataframe thead th {\n",
176 |        "        text-align: right;\n",
177 |        "    }\n",
178 |        "</style>\n",
179 |        "<table border=\"1\" class=\"dataframe\">\n",
180 |        "  <thead>\n",
181 |        "    <tr style=\"text-align: right;\">\n",
182 |        "      <th></th>\n",
183 |        "      <th>resume_id</th>\n",
184 |        "      <th>Resume_title</th>\n",
185 |        "      <th>resume_combo</th>\n",
186 |        "    </tr>\n",
187 |        "  </thead>\n",
188 |        "  <tbody>\n",
189 |        "    <tr>\n",
190 |        "      <th>0</th>\n",
191 |        "      <td>0</td>\n",
192 |        "      <td>java developer</td>\n",
193 |        "      <td>java developer to prove myself dedicated, wort...</td>\n",
194 |        "    </tr>\n",
195 |        "    <tr>\n",
196 |        "      <th>1</th>\n",
197 |        "      <td>1</td>\n",
198 |        "      <td>software developer</td>\n",
199 |        "      <td>software developer working as software develop...</td>\n",
200 |        "    </tr>\n",
201 |        "    <tr>\n",
202 |        "      <th>2</th>\n",
203 |        "      <td>2</td>\n",
204 |        "      <td>java developer</td>\n",
205 |        "      <td>java developer looking for a challenging caree...</td>\n",
206 |        "    </tr>\n",
207 |        "    <tr>\n",
208 |        "      <th>3</th>\n",
209 |        "      <td>3</td>\n",
210 |        "      <td>seeking innovative and challenging career assi...</td>\n",
211 |        "      <td>seeking innovative and challenging career assi...</td>\n",
212 |        "    </tr>\n",
213 |        "    <tr>\n",
214 |        "      <th>4</th>\n",
215 |        "      <td>4</td>\n",
216 |        "      <td>java developer</td>\n",
217 |        "      <td>java developer   ['project: hr payroll systems...</td>\n",
218 |        "    </tr>\n",
219 |        "  </tbody>\n",
220 |        "</table>\n",
221 |        "</div>"
222 |       ],
223 |       "text/plain": [
224 |        "   resume_id                                       Resume_title  \\\n",
225 |        "0          0                                     java developer   \n",
226 |        "1          1                                 software developer   \n",
227 |        "2          2                                     java developer   \n",
228 |        "3          3  seeking innovative and challenging career assi...   \n",
229 |        "4          4                                     java developer   \n",
230 |        "\n",
231 |        "                                        resume_combo  \n",
232 |        "0  java developer to prove myself dedicated, wort...  \n",
233 |        "1  software developer working as software develop...  \n",
234 |        "2  java developer looking for a challenging caree...  \n",
235 |        "3  seeking innovative and challenging career assi...  \n",
236 |        "4  java developer   ['project: hr payroll systems...  "
237 |       ]
238 |      },
239 |      "execution_count": 5,
240 |      "metadata": {},
241 |      "output_type": "execute_result"
242 |     }
243 |    ],
244 |    "source": [
245 |     "df_resume = resume[['resume_id','Resume_title' ]]\n",
246 |     "df_resume['resume_combo'] = resume['Resume_title'] +\" \" + resume['Description'] +\" \" + resume['Skills'] + \" \"+resume['Additional Information'] + \" \"+resume['experience_desc']\n",
247 |     "df_resume.head()"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": 6,
253 |    "metadata": {},
254 |    "outputs": [
255 |     {
256 |      "data": {
257 |       "text/plain": [
258 |        "0    java developer to prove myself dedicated, wort...\n",
259 |        "1    software developer working as software develop...\n",
260 |        "2    java developer looking for a challenging caree...\n",
261 |        "3    seeking innovative and challenging career assi...\n",
262 |        "4    java developer   ['project: hr payroll systems...\n",
263 |        "5    java developer   ['java']   ['have the potenti...\n",
264 |        "6    java developer to secure a challenging positio...\n",
265 |        "7    searching job for java developer   ['c++', ' h...\n",
266 |        "8    mca / with 3 years of development experience •...\n",
267 |        "9    java developer attain the position of 'java de...\n",
268 |        "Name: resume_combo, dtype: object"
269 |       ]
270 |      },
271 |      "execution_count": 6,
272 |      "metadata": {},
273 |      "output_type": "execute_result"
274 |     }
275 |    ],
276 |    "source": [
277 |     "docs = df_resume['resume_combo']\n",
278 |     "docs_sample = docs.head(10)\n",
279 |     "docs_sample"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": 7,
285 |    "metadata": {},
286 |    "outputs": [
287 |     {
288 |      "name": "stderr",
289 |      "output_type": "stream",
290 |      "text": [
291 |       "[nltk_data] Downloading package wordnet to\n",
292 |       "[nltk_data]     C:\\Users\\shail\\AppData\\Roaming\\nltk_data...\n",
293 |       "[nltk_data]   Package wordnet is already up-to-date!\n"
294 |      ]
295 |     }
296 |    ],
297 |    "source": [
298 |     "#Import all the dependencies\n",
299 |     "import nltk\n",
300 |     "nltk.download('wordnet')\n",
301 |     "from nltk.stem import WordNetLemmatizer\n",
302 |     "wordnet_lemmatizer = WordNetLemmatizer()\n",
303 |     "from nltk.corpus import stopwords\n",
304 |     "from nltk.tokenize import word_tokenize \n",
305 |     "set(stopwords.words('english'))\n",
306 |     "\n",
307 |     "import string\n",
308 |     "\n",
309 |     "import gensim\n",
310 |     "from gensim.test.utils import common_texts\n",
311 |     "from gensim.models.doc2vec import Doc2Vec, TaggedDocument"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": 8,
317 |    "metadata": {},
318 |    "outputs": [
319 |     {
320 |      "name": "stderr",
321 |      "output_type": "stream",
322 |      "text": [
323 |       "C:\\Users\\shail\\anaconda\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:385: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ëœ'] not in stop_words.\n",
324 |       "  'stop_words.' % sorted(inconsistent))\n"
325 |      ]
326 |     },
327 |     {
328 |      "name": "stdout",
329 |      "output_type": "stream",
330 |      "text": [
331 |       "(14428, 70688)\n",
332 |       "(14428, 3)\n"
333 |      ]
334 |     }
335 |    ],
336 |    "source": [
337 |     "from sklearn.feature_extraction.text import TfidfVectorizer\n",
338 |     "stopwords = nltk.corpus.stopwords.words('english')\n",
339 |     "stopwords.append('ã¯æ’ëœ')\n",
340 |     "stopwords.append('\\n')\n",
341 |     "stopwords.append('•')\n",
342 |     "#Transforms words to TFIDF\n",
343 |     "vectorizer = TfidfVectorizer(stop_words = stopwords)\n",
344 |     "\n",
345 |     "index = 0\n",
346 |     "keys = {}\n",
347 |     "\n",
348 |     "for rem in df_resume.itertuples() :\n",
349 |     "    key = rem[1]\n",
350 |     "    keys[key] = index\n",
351 |     "    index += 1\n",
352 |     "\n",
353 |     "#Fit the vectorizer to the data\n",
354 |     "vectorizer.fit(df_resume['resume_combo'].fillna(''))\n",
355 |     "\n",
356 |     "#Transform the data\n",
357 |     "tfidf_scores = vectorizer.transform(df_resume['resume_combo'].fillna(''))\n",
358 |     "\n",
359 |     "print(tfidf_scores.shape)\n",
360 |     "print(df_resume.shape)"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": 10,
366 |    "metadata": {},
367 |    "outputs": [],
368 |    "source": [
369 |     "test = pd.DataFrame(tfidf_scores.toarray(), columns = vectorizer.get_feature_names())"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": 11,
375 |    "metadata": {},
376 |    "outputs": [
377 |     {
378 |      "data": {
379 |       "text/html": [
380 |        "<div>\n",
381 |        "<style scoped>\n",
382 |        "    .dataframe tbody tr th:only-of-type {\n",
383 |        "        vertical-align: middle;\n",
384 |        "    }\n",
385 |        "\n",
386 |        "    .dataframe tbody tr th {\n",
387 |        "        vertical-align: top;\n",
388 |        "    }\n",
389 |        "\n",
390 |        "    .dataframe thead th {\n",
391 |        "        text-align: right;\n",
392 |        "    }\n",
393 |        "</style>\n",
394 |        "<table border=\"1\" class=\"dataframe\">\n",
395 |        "  <thead>\n",
396 |        "    <tr style=\"text-align: right;\">\n",
397 |        "      <th></th>\n",
398 |        "      <th>00</th>\n",
399 |        "      <th>000</th>\n",
400 |        "      <th>0000</th>\n",
401 |        "      <th>00089765</th>\n",
402 |        "      <th>00089805</th>\n",
403 |        "      <th>000webhostapp</th>\n",
404 |        "      <th>001</th>\n",
405 |        "      <th>002</th>\n",
406 |        "      <th>003</th>\n",
407 |        "      <th>00353</th>\n",
408 |        "      <th>...</th>\n",
409 |        "      <th>õle</th>\n",
410 |        "      <th>øcreated</th>\n",
411 |        "      <th>ǁǁǁǁǁǁ</th>\n",
412 |        "      <th>ηadoop</th>\n",
413 |        "      <th>τrain</th>\n",
414 |        "      <th>τοοls</th>\n",
415 |        "      <th>чєαr</th>\n",
416 |        "      <th>ﬁled</th>\n",
417 |        "      <th>ﬁnancial</th>\n",
418 |        "      <th>ﬁxing</th>\n",
419 |        "    </tr>\n",
420 |        "  </thead>\n",
421 |        "  <tbody>\n",
422 |        "    <tr>\n",
423 |        "      <th>0</th>\n",
424 |        "      <td>0.0</td>\n",
425 |        "      <td>0.0</td>\n",
426 |        "      <td>0.0</td>\n",
427 |        "      <td>0.0</td>\n",
428 |        "      <td>0.0</td>\n",
429 |        "      <td>0.0</td>\n",
430 |        "      <td>0.0</td>\n",
431 |        "      <td>0.0</td>\n",
432 |        "      <td>0.0</td>\n",
433 |        "      <td>0.0</td>\n",
434 |        "      <td>...</td>\n",
435 |        "      <td>0.0</td>\n",
436 |        "      <td>0.0</td>\n",
437 |        "      <td>0.0</td>\n",
438 |        "      <td>0.0</td>\n",
439 |        "      <td>0.0</td>\n",
440 |        "      <td>0.0</td>\n",
441 |        "      <td>0.0</td>\n",
442 |        "      <td>0.0</td>\n",
443 |        "      <td>0.0</td>\n",
444 |        "      <td>0.0</td>\n",
445 |        "    </tr>\n",
446 |        "    <tr>\n",
447 |        "      <th>1</th>\n",
448 |        "      <td>0.0</td>\n",
449 |        "      <td>0.0</td>\n",
450 |        "      <td>0.0</td>\n",
451 |        "      <td>0.0</td>\n",
452 |        "      <td>0.0</td>\n",
453 |        "      <td>0.0</td>\n",
454 |        "      <td>0.0</td>\n",
455 |        "      <td>0.0</td>\n",
456 |        "      <td>0.0</td>\n",
457 |        "      <td>0.0</td>\n",
458 |        "      <td>...</td>\n",
459 |        "      <td>0.0</td>\n",
460 |        "      <td>0.0</td>\n",
461 |        "      <td>0.0</td>\n",
462 |        "      <td>0.0</td>\n",
463 |        "      <td>0.0</td>\n",
464 |        "      <td>0.0</td>\n",
465 |        "      <td>0.0</td>\n",
466 |        "      <td>0.0</td>\n",
467 |        "      <td>0.0</td>\n",
468 |        "      <td>0.0</td>\n",
469 |        "    </tr>\n",
470 |        "    <tr>\n",
471 |        "      <th>2</th>\n",
472 |        "      <td>0.0</td>\n",
473 |        "      <td>0.0</td>\n",
474 |        "      <td>0.0</td>\n",
475 |        "      <td>0.0</td>\n",
476 |        "      <td>0.0</td>\n",
477 |        "      <td>0.0</td>\n",
478 |        "      <td>0.0</td>\n",
479 |        "      <td>0.0</td>\n",
480 |        "      <td>0.0</td>\n",
481 |        "      <td>0.0</td>\n",
482 |        "      <td>...</td>\n",
483 |        "      <td>0.0</td>\n",
484 |        "      <td>0.0</td>\n",
485 |        "      <td>0.0</td>\n",
486 |        "      <td>0.0</td>\n",
487 |        "      <td>0.0</td>\n",
488 |        "      <td>0.0</td>\n",
489 |        "      <td>0.0</td>\n",
490 |        "      <td>0.0</td>\n",
491 |        "      <td>0.0</td>\n",
492 |        "      <td>0.0</td>\n",
493 |        "    </tr>\n",
494 |        "    <tr>\n",
495 |        "      <th>3</th>\n",
496 |        "      <td>0.0</td>\n",
497 |        "      <td>0.0</td>\n",
498 |        "      <td>0.0</td>\n",
499 |        "      <td>0.0</td>\n",
500 |        "      <td>0.0</td>\n",
501 |        "      <td>0.0</td>\n",
502 |        "      <td>0.0</td>\n",
503 |        "      <td>0.0</td>\n",
504 |        "      <td>0.0</td>\n",
505 |        "      <td>0.0</td>\n",
506 |        "      <td>...</td>\n",
507 |        "      <td>0.0</td>\n",
508 |        "      <td>0.0</td>\n",
509 |        "      <td>0.0</td>\n",
510 |        "      <td>0.0</td>\n",
511 |        "      <td>0.0</td>\n",
512 |        "      <td>0.0</td>\n",
513 |        "      <td>0.0</td>\n",
514 |        "      <td>0.0</td>\n",
515 |        "      <td>0.0</td>\n",
516 |        "      <td>0.0</td>\n",
517 |        "    </tr>\n",
518 |        "    <tr>\n",
519 |        "      <th>4</th>\n",
520 |        "      <td>0.0</td>\n",
521 |        "      <td>0.0</td>\n",
522 |        "      <td>0.0</td>\n",
523 |        "      <td>0.0</td>\n",
524 |        "      <td>0.0</td>\n",
525 |        "      <td>0.0</td>\n",
526 |        "      <td>0.0</td>\n",
527 |        "      <td>0.0</td>\n",
528 |        "      <td>0.0</td>\n",
529 |        "      <td>0.0</td>\n",
530 |        "      <td>...</td>\n",
531 |        "      <td>0.0</td>\n",
532 |        "      <td>0.0</td>\n",
533 |        "      <td>0.0</td>\n",
534 |        "      <td>0.0</td>\n",
535 |        "      <td>0.0</td>\n",
536 |        "      <td>0.0</td>\n",
537 |        "      <td>0.0</td>\n",
538 |        "      <td>0.0</td>\n",
539 |        "      <td>0.0</td>\n",
540 |        "      <td>0.0</td>\n",
541 |        "    </tr>\n",
542 |        "  </tbody>\n",
543 |        "</table>\n",
544 |        "<p>5 rows × 70688 columns</p>\n",
545 |        "</div>"
546 |       ],
547 |       "text/plain": [
548 |        "    00  000  0000  00089765  00089805  000webhostapp  001  002  003  00353  \\\n",
549 |        "0  0.0  0.0   0.0       0.0       0.0            0.0  0.0  0.0  0.0    0.0   \n",
550 |        "1  0.0  0.0   0.0       0.0       0.0            0.0  0.0  0.0  0.0    0.0   \n",
551 |        "2  0.0  0.0   0.0       0.0       0.0            0.0  0.0  0.0  0.0    0.0   \n",
552 |        "3  0.0  0.0   0.0       0.0       0.0            0.0  0.0  0.0  0.0    0.0   \n",
553 |        "4  0.0  0.0   0.0       0.0       0.0            0.0  0.0  0.0  0.0    0.0   \n",
554 |        "\n",
555 |        "   ...  õle  øcreated  ǁǁǁǁǁǁ  ηadoop  τrain  τοοls  чєαr  ﬁled  ﬁnancial  \\\n",
556 |        "0  ...  0.0       0.0     0.0     0.0    0.0    0.0   0.0   0.0       0.0   \n",
557 |        "1  ...  0.0       0.0     0.0     0.0    0.0    0.0   0.0   0.0       0.0   \n",
558 |        "2  ...  0.0       0.0     0.0     0.0    0.0    0.0   0.0   0.0       0.0   \n",
559 |        "3  ...  0.0       0.0     0.0     0.0    0.0    0.0   0.0   0.0       0.0   \n",
560 |        "4  ...  0.0       0.0     0.0     0.0    0.0    0.0   0.0   0.0       0.0   \n",
561 |        "\n",
562 |        "   ﬁxing  \n",
563 |        "0    0.0  \n",
564 |        "1    0.0  \n",
565 |        "2    0.0  \n",
566 |        "3    0.0  \n",
567 |        "4    0.0  \n",
568 |        "\n",
569 |        "[5 rows x 70688 columns]"
570 |       ]
571 |      },
572 |      "execution_count": 11,
573 |      "metadata": {},
574 |      "output_type": "execute_result"
575 |     }
576 |    ],
577 |    "source": [
578 |     "test.head()"
579 |    ]
580 |   },
581 |   {
582 |    "cell_type": "markdown",
583 |    "metadata": {},
584 |    "source": [
585 |     "### Creating my Stopword list\n",
586 |     "#### As seen there are so many unwanted tokens like numbers,ïƒ¼ etc , I need to add them in \"stop words\" list to train model"
587 |    ]
588 |   },
589 |   {
590 |    "cell_type": "code",
591 |    "execution_count": 12,
592 |    "metadata": {},
593 |    "outputs": [],
594 |    "source": [
595 |     "#getting list of all tokens\n",
596 |     "word_list = test.columns.tolist()"
597 |    ]
598 |   },
599 |   {
600 |    "cell_type": "code",
601 |    "execution_count": 13,
602 |    "metadata": {},
603 |    "outputs": [],
604 |    "source": [
605 |     "##Getting a list of unwanted words as s_words and adding to stopwords\n",
606 |     "s_words =[]\n",
607 |     "for word in word_list:\n",
608 |     "    #print(word)\n",
609 |     "    if re.search(\"^\\W|^\\d\",word):\n",
610 |     "        s_words.append(word)"
611 |    ]
612 |   },
613 |   {
614 |    "cell_type": "code",
615 |    "execution_count": 14,
616 |    "metadata": {},
617 |    "outputs": [],
618 |    "source": [
619 |     "s_words.append('')        \n",
620 |     "from nltk.corpus import stopwords\n",
621 |     "stopword_set = set(stopwords.words('english'))\n",
622 |     "stopword_set = list(stopword_set)\n",
623 |     "stopword_set.extend(s_words)"
624 |    ]
625 |   },
626 |   {
627 |    "cell_type": "code",
628 |    "execution_count": 15,
629 |    "metadata": {},
630 |    "outputs": [],
631 |    "source": [
632 |     "def preprocess(text):\n",
633 |     "    stop_words = stopword_set\n",
634 |     "    #0. split words by whitespace\n",
635 |     "    text = text.split()\n",
636 |     "    \n",
637 |     "    \n",
638 |     "    # 1. lower case\n",
639 |     "    text = [word.lower() for word in text]\n",
640 |     "    \n",
641 |     "    # 2. remove punctuations\n",
642 |     "    punc_table = str.maketrans('','',string.punctuation)\n",
643 |     "    text = [word.translate(punc_table) for word in text]\n",
644 |     "    \n",
645 |     "    # 3. remove stop words\n",
646 |     "    text = [word for word in text if word not in stop_words]\n",
647 |     "    \n",
648 |     "    return text"
649 |    ]
650 |   },
651 |   {
652 |    "cell_type": "code",
653 |    "execution_count": 16,
654 |    "metadata": {},
655 |    "outputs": [],
656 |    "source": [
657 |     "tokenized_doc = []\n",
658 |     "doc = df_resume['resume_combo']\n",
659 |     "#doc = docs_sample\n",
660 |     "for d in doc:\n",
661 |     "    tokenized_doc.append(preprocess(d))\n",
662 |     "#tokenized_doc"
663 |    ]
664 |   },
665 |   {
666 |    "cell_type": "code",
667 |    "execution_count": 17,
668 |    "metadata": {},
669 |    "outputs": [],
670 |    "source": [
671 |     "# Convert tokenized document into gensim formated tagged data\n",
672 |     "tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]"
673 |    ]
674 |   },
675 |   {
676 |    "cell_type": "code",
677 |    "execution_count": 18,
678 |    "metadata": {},
679 |    "outputs": [
680 |     {
681 |      "data": {
682 |       "text/plain": [
683 |        "14428"
684 |       ]
685 |      },
686 |      "execution_count": 18,
687 |      "metadata": {},
688 |      "output_type": "execute_result"
689 |     }
690 |    ],
691 |    "source": [
692 |     "num_doc = len(tagged_data)\n",
693 |     "num_doc\n",
694 |     "#confirm length (should be 14428)\n",
695 |     "len(tokenized_doc)"
696 |    ]
697 |   },
698 |   {
699 |    "cell_type": "code",
700 |    "execution_count": 21,
701 |    "metadata": {},
702 |    "outputs": [],
703 |    "source": [
704 |     "## Load saved doc2vec model\n",
705 |     "model= Doc2Vec.load(\"Model/my_doc2vec_v2.model\")"
706 |    ]
707 |   },
708 |   {
709 |    "cell_type": "code",
710 |    "execution_count": 24,
711 |    "metadata": {},
712 |    "outputs": [],
713 |    "source": [
714 |     "## Get vector value\n",
715 |     "vec = np.empty([14428,20])\n",
716 |     "\n",
717 |     "for k,i in enumerate(tokenized_doc):\n",
718 |     "    \n",
719 |     "    #print(i)\n",
720 |     "    vector = model.infer_vector(i)\n",
721 |     "    vec[k] = vector\n",
722 |     "\n",
723 |     "# reshape into 2D\n",
724 |     "new_arr = np.reshape(vec,(-1,20))"
725 |    ]
726 |   },
727 |   {
728 |    "cell_type": "code",
729 |    "execution_count": 25,
730 |    "metadata": {},
731 |    "outputs": [],
732 |    "source": [
733 |     "rng = range(1, 21)\n",
734 |     "vec_df = pd.DataFrame(new_arr, columns=['vec_' + str(i) for i in rng])"
735 |    ]
736 |   },
737 |   {
738 |    "cell_type": "code",
739 |    "execution_count": 26,
740 |    "metadata": {},
741 |    "outputs": [
742 |     {
743 |      "data": {
744 |       "text/html": [
745 |        "<div>\n",
746 |        "<style scoped>\n",
747 |        "    .dataframe tbody tr th:only-of-type {\n",
748 |        "        vertical-align: middle;\n",
749 |        "    }\n",
750 |        "\n",
751 |        "    .dataframe tbody tr th {\n",
752 |        "        vertical-align: top;\n",
753 |        "    }\n",
754 |        "\n",
755 |        "    .dataframe thead th {\n",
756 |        "        text-align: right;\n",
757 |        "    }\n",
758 |        "</style>\n",
759 |        "<table border=\"1\" class=\"dataframe\">\n",
760 |        "  <thead>\n",
761 |        "    <tr style=\"text-align: right;\">\n",
762 |        "      <th></th>\n",
763 |        "      <th>vec_1</th>\n",
764 |        "      <th>vec_2</th>\n",
765 |        "      <th>vec_3</th>\n",
766 |        "      <th>vec_4</th>\n",
767 |        "      <th>vec_5</th>\n",
768 |        "      <th>vec_6</th>\n",
769 |        "      <th>vec_7</th>\n",
770 |        "      <th>vec_8</th>\n",
771 |        "      <th>vec_9</th>\n",
772 |        "      <th>vec_10</th>\n",
773 |        "      <th>vec_11</th>\n",
774 |        "      <th>vec_12</th>\n",
775 |        "      <th>vec_13</th>\n",
776 |        "      <th>vec_14</th>\n",
777 |        "      <th>vec_15</th>\n",
778 |        "      <th>vec_16</th>\n",
779 |        "      <th>vec_17</th>\n",
780 |        "      <th>vec_18</th>\n",
781 |        "      <th>vec_19</th>\n",
782 |        "      <th>vec_20</th>\n",
783 |        "    </tr>\n",
784 |        "  </thead>\n",
785 |        "  <tbody>\n",
786 |        "    <tr>\n",
787 |        "      <th>0</th>\n",
788 |        "      <td>3.003397</td>\n",
789 |        "      <td>1.462391</td>\n",
790 |        "      <td>-0.732206</td>\n",
791 |        "      <td>2.032145</td>\n",
792 |        "      <td>-3.291425</td>\n",
793 |        "      <td>1.626622</td>\n",
794 |        "      <td>1.269785</td>\n",
795 |        "      <td>-1.303818</td>\n",
796 |        "      <td>-1.781690</td>\n",
797 |        "      <td>-3.893606</td>\n",
798 |        "      <td>0.582851</td>\n",
799 |        "      <td>-2.390430</td>\n",
800 |        "      <td>0.612694</td>\n",
801 |        "      <td>4.274847</td>\n",
802 |        "      <td>-1.641325</td>\n",
803 |        "      <td>1.098874</td>\n",
804 |        "      <td>-0.534998</td>\n",
805 |        "      <td>0.338975</td>\n",
806 |        "      <td>-2.081308</td>\n",
807 |        "      <td>-3.480031</td>\n",
808 |        "    </tr>\n",
809 |        "    <tr>\n",
810 |        "      <th>1</th>\n",
811 |        "      <td>3.969832</td>\n",
812 |        "      <td>-1.478794</td>\n",
813 |        "      <td>-1.997424</td>\n",
814 |        "      <td>1.502539</td>\n",
815 |        "      <td>-3.507508</td>\n",
816 |        "      <td>2.108994</td>\n",
817 |        "      <td>-0.386640</td>\n",
818 |        "      <td>1.494396</td>\n",
819 |        "      <td>0.454764</td>\n",
820 |        "      <td>-2.268685</td>\n",
821 |        "      <td>-1.505257</td>\n",
822 |        "      <td>-2.332494</td>\n",
823 |        "      <td>-0.431022</td>\n",
824 |        "      <td>1.431269</td>\n",
825 |        "      <td>-0.896382</td>\n",
826 |        "      <td>-0.267269</td>\n",
827 |        "      <td>1.433352</td>\n",
828 |        "      <td>0.438305</td>\n",
829 |        "      <td>-0.992093</td>\n",
830 |        "      <td>-0.096142</td>\n",
831 |        "    </tr>\n",
832 |        "    <tr>\n",
833 |        "      <th>2</th>\n",
834 |        "      <td>1.442701</td>\n",
835 |        "      <td>0.011723</td>\n",
836 |        "      <td>-2.126506</td>\n",
837 |        "      <td>0.655804</td>\n",
838 |        "      <td>-3.984513</td>\n",
839 |        "      <td>0.792035</td>\n",
840 |        "      <td>1.317094</td>\n",
841 |        "      <td>-0.696710</td>\n",
842 |        "      <td>-1.563318</td>\n",
843 |        "      <td>-3.040591</td>\n",
844 |        "      <td>-0.367393</td>\n",
845 |        "      <td>-3.774975</td>\n",
846 |        "      <td>-1.183595</td>\n",
847 |        "      <td>2.456486</td>\n",
848 |        "      <td>-1.270981</td>\n",
849 |        "      <td>2.475039</td>\n",
850 |        "      <td>-1.990110</td>\n",
851 |        "      <td>0.130853</td>\n",
852 |        "      <td>-0.589791</td>\n",
853 |        "      <td>-2.782936</td>\n",
854 |        "    </tr>\n",
855 |        "    <tr>\n",
856 |        "      <th>3</th>\n",
857 |        "      <td>1.803033</td>\n",
858 |        "      <td>-0.120398</td>\n",
859 |        "      <td>-1.159959</td>\n",
860 |        "      <td>0.066225</td>\n",
861 |        "      <td>-3.522508</td>\n",
862 |        "      <td>1.321965</td>\n",
863 |        "      <td>-0.756211</td>\n",
864 |        "      <td>-0.249010</td>\n",
865 |        "      <td>-0.074644</td>\n",
866 |        "      <td>-2.314389</td>\n",
867 |        "      <td>0.557041</td>\n",
868 |        "      <td>-3.887409</td>\n",
869 |        "      <td>-1.070027</td>\n",
870 |        "      <td>3.894971</td>\n",
871 |        "      <td>-0.957399</td>\n",
872 |        "      <td>-0.952996</td>\n",
873 |        "      <td>-0.824266</td>\n",
874 |        "      <td>0.038712</td>\n",
875 |        "      <td>1.194561</td>\n",
876 |        "      <td>-1.206788</td>\n",
877 |        "    </tr>\n",
878 |        "    <tr>\n",
879 |        "      <th>4</th>\n",
880 |        "      <td>-0.434019</td>\n",
881 |        "      <td>0.551527</td>\n",
882 |        "      <td>-1.531551</td>\n",
883 |        "      <td>-0.767032</td>\n",
884 |        "      <td>-0.514473</td>\n",
885 |        "      <td>0.286549</td>\n",
886 |        "      <td>-0.563888</td>\n",
887 |        "      <td>0.310748</td>\n",
888 |        "      <td>0.457921</td>\n",
889 |        "      <td>-1.334632</td>\n",
890 |        "      <td>0.183150</td>\n",
891 |        "      <td>-0.547834</td>\n",
892 |        "      <td>1.218995</td>\n",
893 |        "      <td>0.536182</td>\n",
894 |        "      <td>0.995981</td>\n",
895 |        "      <td>-0.874730</td>\n",
896 |        "      <td>-0.138916</td>\n",
897 |        "      <td>0.882186</td>\n",
898 |        "      <td>-0.129402</td>\n",
899 |        "      <td>-1.793177</td>\n",
900 |        "    </tr>\n",
901 |        "  </tbody>\n",
902 |        "</table>\n",
903 |        "</div>"
904 |       ],
905 |       "text/plain": [
906 |        "      vec_1     vec_2     vec_3     vec_4     vec_5     vec_6     vec_7  \\\n",
907 |        "0  3.003397  1.462391 -0.732206  2.032145 -3.291425  1.626622  1.269785   \n",
908 |        "1  3.969832 -1.478794 -1.997424  1.502539 -3.507508  2.108994 -0.386640   \n",
909 |        "2  1.442701  0.011723 -2.126506  0.655804 -3.984513  0.792035  1.317094   \n",
910 |        "3  1.803033 -0.120398 -1.159959  0.066225 -3.522508  1.321965 -0.756211   \n",
911 |        "4 -0.434019  0.551527 -1.531551 -0.767032 -0.514473  0.286549 -0.563888   \n",
912 |        "\n",
913 |        "      vec_8     vec_9    vec_10    vec_11    vec_12    vec_13    vec_14  \\\n",
914 |        "0 -1.303818 -1.781690 -3.893606  0.582851 -2.390430  0.612694  4.274847   \n",
915 |        "1  1.494396  0.454764 -2.268685 -1.505257 -2.332494 -0.431022  1.431269   \n",
916 |        "2 -0.696710 -1.563318 -3.040591 -0.367393 -3.774975 -1.183595  2.456486   \n",
917 |        "3 -0.249010 -0.074644 -2.314389  0.557041 -3.887409 -1.070027  3.894971   \n",
918 |        "4  0.310748  0.457921 -1.334632  0.183150 -0.547834  1.218995  0.536182   \n",
919 |        "\n",
920 |        "     vec_15    vec_16    vec_17    vec_18    vec_19    vec_20  \n",
921 |        "0 -1.641325  1.098874 -0.534998  0.338975 -2.081308 -3.480031  \n",
922 |        "1 -0.896382 -0.267269  1.433352  0.438305 -0.992093 -0.096142  \n",
923 |        "2 -1.270981  2.475039 -1.990110  0.130853 -0.589791 -2.782936  \n",
924 |        "3 -0.957399 -0.952996 -0.824266  0.038712  1.194561 -1.206788  \n",
925 |        "4  0.995981 -0.874730 -0.138916  0.882186 -0.129402 -1.793177  "
926 |       ]
927 |      },
928 |      "execution_count": 26,
929 |      "metadata": {},
930 |      "output_type": "execute_result"
931 |     }
932 |    ],
933 |    "source": [
934 |     "vec_df.head(5)"
935 |    ]
936 |   },
937 |   {
938 |    "cell_type": "code",
939 |    "execution_count": 27,
940 |    "metadata": {},
941 |    "outputs": [],
942 |    "source": [
943 |     "# concatenate and safe the resume csv file\n",
944 |     "con_resume_1 = pd.concat([resume, vec_df], axis=1)\n",
945 |     "con_resume_1.to_csv('wip/con_resume_1.csv', index=False)"
946 |    ]
947 |   }
948 |  ],
949 |  "metadata": {
950 |   "kernelspec": {
951 |    "display_name": "Python 3",
952 |    "language": "python",
953 |    "name": "python3"
954 |   },
955 |   "language_info": {
956 |    "codemirror_mode": {
957 |     "name": "ipython",
958 |     "version": 3
959 |    },
960 |    "file_extension": ".py",
961 |    "mimetype": "text/x-python",
962 |    "name": "python",
963 |    "nbconvert_exporter": "python",
964 |    "pygments_lexer": "ipython3",
965 |    "version": "3.7.6"
966 |   }
967 |  },
968 |  "nbformat": 4,
969 |  "nbformat_minor": 4
970 | }
971 | 


--------------------------------------------------------------------------------
/2-Preprocessing_and_Modelling/Pre-processing_Resume for matchingv1.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Pre-Processing Resume Text Column to Prepare for matching - first iteration"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "code",
  12 |    "execution_count": 3,
  13 |    "metadata": {},
  14 |    "outputs": [],
  15 |    "source": [
  16 |     "import numpy as np\n",
  17 |     "import pandas as pd\n",
  18 |     "import json\n",
  19 |     "import matplotlib.pyplot as plt\n",
  20 |     "%matplotlib inline\n",
  21 |     "\n",
  22 |     "import re\n",
  23 |     "import datetime\n",
  24 |     "from datetime import date\n",
  25 |     "from time import strptime\n",
  26 |     "\n",
  27 |     "import RAKE as rake\n",
  28 |     "import operator\n"
  29 |    ]
  30 |   },
  31 |   {
  32 |    "cell_type": "markdown",
  33 |    "metadata": {},
  34 |    "source": [
  35 |     "###############################################################################################\n",
  36 |     "## Working on Resume data\n",
  37 |     "###############################################################################################"
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "code",
  42 |    "execution_count": 38,
  43 |    "metadata": {},
  44 |    "outputs": [],
  45 |    "source": [
  46 |     "# First reading my resume csv\n",
  47 |     "resume = pd.read_csv('wip/resume_sorted5.csv')"
  48 |    ]
  49 |   },
  50 |   {
  51 |    "cell_type": "code",
  52 |    "execution_count": 39,
  53 |    "metadata": {},
  54 |    "outputs": [
  55 |     {
  56 |      "name": "stdout",
  57 |      "output_type": "stream",
  58 |      "text": [
  59 |       "<class 'pandas.core.frame.DataFrame'>\n",
  60 |       "RangeIndex: 14428 entries, 0 to 14427\n",
  61 |       "Data columns (total 26 columns):\n",
  62 |       " #   Column                  Non-Null Count  Dtype \n",
  63 |       "---  ------                  --------------  ----- \n",
  64 |       " 0   index                   14428 non-null  int64 \n",
  65 |       " 1   Resume_title            14428 non-null  object\n",
  66 |       " 2   City                    14428 non-null  object\n",
  67 |       " 3   location                14428 non-null  int64 \n",
  68 |       " 4   Description             14428 non-null  object\n",
  69 |       " 5   work_experiences        14428 non-null  object\n",
  70 |       " 6   Educations              14428 non-null  object\n",
  71 |       " 7   Skills                  14428 non-null  object\n",
  72 |       " 8   Links                   14428 non-null  object\n",
  73 |       " 9   Certificates            14428 non-null  object\n",
  74 |       " 10  Additional Information  14428 non-null  object\n",
  75 |       " 11  is_grad                 14428 non-null  int64 \n",
  76 |       " 12  is_postgrad             14428 non-null  int64 \n",
  77 |       " 13  is_doc                  14428 non-null  int64 \n",
  78 |       " 14  edu_unknown             14428 non-null  int64 \n",
  79 |       " 15  Computer_Eng            14428 non-null  int64 \n",
  80 |       " 16  Finance                 14428 non-null  int64 \n",
  81 |       " 17  HR                      14428 non-null  int64 \n",
  82 |       " 18  AI_stats                14428 non-null  int64 \n",
  83 |       " 19  MBA                     14428 non-null  int64 \n",
  84 |       " 20  Other_specialization    14428 non-null  int64 \n",
  85 |       " 21  resume_id               14428 non-null  int64 \n",
  86 |       " 22  total_experience        14428 non-null  int64 \n",
  87 |       " 23  experience_range        14428 non-null  int64 \n",
  88 |       " 24  loc_name                14428 non-null  object\n",
  89 |       " 25  experience_desc         14428 non-null  object\n",
  90 |       "dtypes: int64(15), object(11)\n",
  91 |       "memory usage: 2.9+ MB\n"
  92 |      ]
  93 |     }
  94 |    ],
  95 |    "source": [
  96 |     "#initial info\n",
  97 |     "resume.info()"
  98 |    ]
  99 |   },
 100 |   {
 101 |    "cell_type": "markdown",
 102 |    "metadata": {},
 103 |    "source": [
 104 |     "#########################################################################################################\n",
 105 |     "## To match resume with jobs, I need to have similar 20 vectors, that I created to train my Doc2Vec model for jobs. \n",
 106 |     "\n",
 107 |     "### For training my jobs model, I picked  text data from :\n",
 108 |     "* job title\n",
 109 |     "* job description\n",
 110 |     "* skills\n",
 111 |     "* industry\n",
 112 |     "\n",
 113 |     "### So for training my resume model, I need similar text, thus picking:\n",
 114 |     "* Resume_title\n",
 115 |     "* Resume description \n",
 116 |     "* skills\n",
 117 |     "* Additional Information\n",
 118 |     "\n",
 119 |     "\n",
 120 |     "#########################################################################################################"
 121 |    ]
 122 |   },
 123 |   {
 124 |    "cell_type": "code",
 125 |    "execution_count": 40,
 126 |    "metadata": {},
 127 |    "outputs": [],
 128 |    "source": [
 129 |     "resume['Resume_title'] = resume['Resume_title'].str.lower()\n",
 130 |     "resume['Skills']=resume['Skills'].str.lower()\n",
 131 |     "resume['Description'] = resume['Description'].str.lower()\n",
 132 |     "resume['Additional Information'] = resume['Additional Information'].str.lower()"
 133 |    ]
 134 |   },
 135 |   {
 136 |    "cell_type": "code",
 137 |    "execution_count": 41,
 138 |    "metadata": {},
 139 |    "outputs": [],
 140 |    "source": [
 141 |     "resume['Description'].replace('none', ' ',inplace=True)\n",
 142 |     "resume['Additional Information'].replace('none', ' ',inplace=True)"
 143 |    ]
 144 |   },
 145 |   {
 146 |    "cell_type": "code",
 147 |    "execution_count": 43,
 148 |    "metadata": {},
 149 |    "outputs": [
 150 |     {
 151 |      "name": "stderr",
 152 |      "output_type": "stream",
 153 |      "text": [
 154 |       "C:\\Users\\shail\\anaconda\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
 155 |       "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 156 |       "Try using .loc[row_indexer,col_indexer] = value instead\n",
 157 |       "\n",
 158 |       "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 159 |       "  \n"
 160 |      ]
 161 |     },
 162 |     {
 163 |      "data": {
 164 |       "text/html": [
 165 |        "<div>\n",
 166 |        "<style scoped>\n",
 167 |        "    .dataframe tbody tr th:only-of-type {\n",
 168 |        "        vertical-align: middle;\n",
 169 |        "    }\n",
 170 |        "\n",
 171 |        "    .dataframe tbody tr th {\n",
 172 |        "        vertical-align: top;\n",
 173 |        "    }\n",
 174 |        "\n",
 175 |        "    .dataframe thead th {\n",
 176 |        "        text-align: right;\n",
 177 |        "    }\n",
 178 |        "</style>\n",
 179 |        "<table border=\"1\" class=\"dataframe\">\n",
 180 |        "  <thead>\n",
 181 |        "    <tr style=\"text-align: right;\">\n",
 182 |        "      <th></th>\n",
 183 |        "      <th>resume_id</th>\n",
 184 |        "      <th>Resume_title</th>\n",
 185 |        "      <th>resume_combo</th>\n",
 186 |        "    </tr>\n",
 187 |        "  </thead>\n",
 188 |        "  <tbody>\n",
 189 |        "    <tr>\n",
 190 |        "      <th>0</th>\n",
 191 |        "      <td>0</td>\n",
 192 |        "      <td>java developer</td>\n",
 193 |        "      <td>java developer to prove myself dedicated, wort...</td>\n",
 194 |        "    </tr>\n",
 195 |        "    <tr>\n",
 196 |        "      <th>1</th>\n",
 197 |        "      <td>1</td>\n",
 198 |        "      <td>software developer</td>\n",
 199 |        "      <td>software developer working as software develop...</td>\n",
 200 |        "    </tr>\n",
 201 |        "    <tr>\n",
 202 |        "      <th>2</th>\n",
 203 |        "      <td>2</td>\n",
 204 |        "      <td>java developer</td>\n",
 205 |        "      <td>java developer looking for a challenging caree...</td>\n",
 206 |        "    </tr>\n",
 207 |        "    <tr>\n",
 208 |        "      <th>3</th>\n",
 209 |        "      <td>3</td>\n",
 210 |        "      <td>seeking innovative and challenging career assi...</td>\n",
 211 |        "      <td>seeking innovative and challenging career assi...</td>\n",
 212 |        "    </tr>\n",
 213 |        "    <tr>\n",
 214 |        "      <th>4</th>\n",
 215 |        "      <td>4</td>\n",
 216 |        "      <td>java developer</td>\n",
 217 |        "      <td>java developer   ['project: hr payroll systems...</td>\n",
 218 |        "    </tr>\n",
 219 |        "  </tbody>\n",
 220 |        "</table>\n",
 221 |        "</div>"
 222 |       ],
 223 |       "text/plain": [
 224 |        "   resume_id                                       Resume_title  \\\n",
 225 |        "0          0                                     java developer   \n",
 226 |        "1          1                                 software developer   \n",
 227 |        "2          2                                     java developer   \n",
 228 |        "3          3  seeking innovative and challenging career assi...   \n",
 229 |        "4          4                                     java developer   \n",
 230 |        "\n",
 231 |        "                                        resume_combo  \n",
 232 |        "0  java developer to prove myself dedicated, wort...  \n",
 233 |        "1  software developer working as software develop...  \n",
 234 |        "2  java developer looking for a challenging caree...  \n",
 235 |        "3  seeking innovative and challenging career assi...  \n",
 236 |        "4  java developer   ['project: hr payroll systems...  "
 237 |       ]
 238 |      },
 239 |      "execution_count": 43,
 240 |      "metadata": {},
 241 |      "output_type": "execute_result"
 242 |     }
 243 |    ],
 244 |    "source": [
 245 |     "df_resume = resume[['resume_id','Resume_title' ]]\n",
 246 |     "df_resume['resume_combo'] = resume['Resume_title'] +\" \" + resume['Description'] +\" \" + resume['Skills'] + \" \"+resume['Additional Information'] + \" \"+resume['experience_desc']\n",
 247 |     "df_resume.head()"
 248 |    ]
 249 |   },
 250 |   {
 251 |    "cell_type": "code",
 252 |    "execution_count": 44,
 253 |    "metadata": {},
 254 |    "outputs": [
 255 |     {
 256 |      "data": {
 257 |       "text/plain": [
 258 |        "0    java developer to prove myself dedicated, wort...\n",
 259 |        "1    software developer working as software develop...\n",
 260 |        "2    java developer looking for a challenging caree...\n",
 261 |        "3    seeking innovative and challenging career assi...\n",
 262 |        "4    java developer   ['project: hr payroll systems...\n",
 263 |        "5    java developer   ['java']   ['have the potenti...\n",
 264 |        "6    java developer to secure a challenging positio...\n",
 265 |        "7    searching job for java developer   ['c++', ' h...\n",
 266 |        "8    mca / with 3 years of development experience •...\n",
 267 |        "9    java developer attain the position of 'java de...\n",
 268 |        "Name: resume_combo, dtype: object"
 269 |       ]
 270 |      },
 271 |      "execution_count": 44,
 272 |      "metadata": {},
 273 |      "output_type": "execute_result"
 274 |     }
 275 |    ],
 276 |    "source": [
 277 |     "docs = df_resume['resume_combo']\n",
 278 |     "docs_sample = docs.head(10)\n",
 279 |     "docs_sample"
 280 |    ]
 281 |   },
 282 |   {
 283 |    "cell_type": "code",
 284 |    "execution_count": 45,
 285 |    "metadata": {},
 286 |    "outputs": [
 287 |     {
 288 |      "name": "stderr",
 289 |      "output_type": "stream",
 290 |      "text": [
 291 |       "[nltk_data] Downloading package wordnet to\n",
 292 |       "[nltk_data]     C:\\Users\\shail\\AppData\\Roaming\\nltk_data...\n",
 293 |       "[nltk_data]   Package wordnet is already up-to-date!\n"
 294 |      ]
 295 |     }
 296 |    ],
 297 |    "source": [
 298 |     "#Import all the dependencies\n",
 299 |     "import nltk\n",
 300 |     "nltk.download('wordnet')\n",
 301 |     "from nltk.stem import WordNetLemmatizer\n",
 302 |     "wordnet_lemmatizer = WordNetLemmatizer()\n",
 303 |     "from nltk.corpus import stopwords\n",
 304 |     "from nltk.tokenize import word_tokenize \n",
 305 |     "set(stopwords.words('english'))\n",
 306 |     "\n",
 307 |     "import string\n",
 308 |     "\n",
 309 |     "import gensim\n",
 310 |     "from gensim.test.utils import common_texts\n",
 311 |     "from gensim.models.doc2vec import Doc2Vec, TaggedDocument"
 312 |    ]
 313 |   },
 314 |   {
 315 |    "cell_type": "code",
 316 |    "execution_count": 47,
 317 |    "metadata": {},
 318 |    "outputs": [
 319 |     {
 320 |      "name": "stderr",
 321 |      "output_type": "stream",
 322 |      "text": [
 323 |       "C:\\Users\\shail\\anaconda\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:385: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ëœ'] not in stop_words.\n",
 324 |       "  'stop_words.' % sorted(inconsistent))\n"
 325 |      ]
 326 |     },
 327 |     {
 328 |      "name": "stdout",
 329 |      "output_type": "stream",
 330 |      "text": [
 331 |       "(14428, 70688)\n",
 332 |       "(14428, 3)\n"
 333 |      ]
 334 |     }
 335 |    ],
 336 |    "source": [
 337 |     "from sklearn.feature_extraction.text import TfidfVectorizer\n",
 338 |     "stopwords = nltk.corpus.stopwords.words('english')\n",
 339 |     "stopwords.append('ã¯æ’ëœ')\n",
 340 |     "stopwords.append('\\n')\n",
 341 |     "stopwords.append('•')\n",
 342 |     "#Transforms words to TFIDF\n",
 343 |     "vectorizer = TfidfVectorizer(stop_words = stopwords)\n",
 344 |     "\n",
 345 |     "index = 0\n",
 346 |     "keys = {}\n",
 347 |     "\n",
 348 |     "for rem in df_resume.itertuples() :\n",
 349 |     "    key = rem[1]\n",
 350 |     "    keys[key] = index\n",
 351 |     "    index += 1\n",
 352 |     "\n",
 353 |     "#Fit the vectorizer to the data\n",
 354 |     "vectorizer.fit(df_resume['resume_combo'].fillna(''))\n",
 355 |     "\n",
 356 |     "#Transform the data\n",
 357 |     "tfidf_scores = vectorizer.transform(df_resume['resume_combo'].fillna(''))\n",
 358 |     "\n",
 359 |     "print(tfidf_scores.shape)\n",
 360 |     "print(df_resume.shape)"
 361 |    ]
 362 |   },
 363 |   {
 364 |    "cell_type": "code",
 365 |    "execution_count": 48,
 366 |    "metadata": {},
 367 |    "outputs": [],
 368 |    "source": [
 369 |     "test = pd.DataFrame(tfidf_scores.toarray(), columns = vectorizer.get_feature_names())"
 370 |    ]
 371 |   },
 372 |   {
 373 |    "cell_type": "code",
 374 |    "execution_count": 49,
 375 |    "metadata": {},
 376 |    "outputs": [
 377 |     {
 378 |      "data": {
 379 |       "text/html": [
 380 |        "<div>\n",
 381 |        "<style scoped>\n",
 382 |        "    .dataframe tbody tr th:only-of-type {\n",
 383 |        "        vertical-align: middle;\n",
 384 |        "    }\n",
 385 |        "\n",
 386 |        "    .dataframe tbody tr th {\n",
 387 |        "        vertical-align: top;\n",
 388 |        "    }\n",
 389 |        "\n",
 390 |        "    .dataframe thead th {\n",
 391 |        "        text-align: right;\n",
 392 |        "    }\n",
 393 |        "</style>\n",
 394 |        "<table border=\"1\" class=\"dataframe\">\n",
 395 |        "  <thead>\n",
 396 |        "    <tr style=\"text-align: right;\">\n",
 397 |        "      <th></th>\n",
 398 |        "      <th>00</th>\n",
 399 |        "      <th>000</th>\n",
 400 |        "      <th>0000</th>\n",
 401 |        "      <th>00089765</th>\n",
 402 |        "      <th>00089805</th>\n",
 403 |        "      <th>000webhostapp</th>\n",
 404 |        "      <th>001</th>\n",
 405 |        "      <th>002</th>\n",
 406 |        "      <th>003</th>\n",
 407 |        "      <th>00353</th>\n",
 408 |        "      <th>...</th>\n",
 409 |        "      <th>õle</th>\n",
 410 |        "      <th>øcreated</th>\n",
 411 |        "      <th>ǁǁǁǁǁǁ</th>\n",
 412 |        "      <th>ηadoop</th>\n",
 413 |        "      <th>τrain</th>\n",
 414 |        "      <th>τοοls</th>\n",
 415 |        "      <th>чєαr</th>\n",
 416 |        "      <th>ﬁled</th>\n",
 417 |        "      <th>ﬁnancial</th>\n",
 418 |        "      <th>ﬁxing</th>\n",
 419 |        "    </tr>\n",
 420 |        "  </thead>\n",
 421 |        "  <tbody>\n",
 422 |        "    <tr>\n",
 423 |        "      <th>0</th>\n",
 424 |        "      <td>0.0</td>\n",
 425 |        "      <td>0.0</td>\n",
 426 |        "      <td>0.0</td>\n",
 427 |        "      <td>0.0</td>\n",
 428 |        "      <td>0.0</td>\n",
 429 |        "      <td>0.0</td>\n",
 430 |        "      <td>0.0</td>\n",
 431 |        "      <td>0.0</td>\n",
 432 |        "      <td>0.0</td>\n",
 433 |        "      <td>0.0</td>\n",
 434 |        "      <td>...</td>\n",
 435 |        "      <td>0.0</td>\n",
 436 |        "      <td>0.0</td>\n",
 437 |        "      <td>0.0</td>\n",
 438 |        "      <td>0.0</td>\n",
 439 |        "      <td>0.0</td>\n",
 440 |        "      <td>0.0</td>\n",
 441 |        "      <td>0.0</td>\n",
 442 |        "      <td>0.0</td>\n",
 443 |        "      <td>0.0</td>\n",
 444 |        "      <td>0.0</td>\n",
 445 |        "    </tr>\n",
 446 |        "    <tr>\n",
 447 |        "      <th>1</th>\n",
 448 |        "      <td>0.0</td>\n",
 449 |        "      <td>0.0</td>\n",
 450 |        "      <td>0.0</td>\n",
 451 |        "      <td>0.0</td>\n",
 452 |        "      <td>0.0</td>\n",
 453 |        "      <td>0.0</td>\n",
 454 |        "      <td>0.0</td>\n",
 455 |        "      <td>0.0</td>\n",
 456 |        "      <td>0.0</td>\n",
 457 |        "      <td>0.0</td>\n",
 458 |        "      <td>...</td>\n",
 459 |        "      <td>0.0</td>\n",
 460 |        "      <td>0.0</td>\n",
 461 |        "      <td>0.0</td>\n",
 462 |        "      <td>0.0</td>\n",
 463 |        "      <td>0.0</td>\n",
 464 |        "      <td>0.0</td>\n",
 465 |        "      <td>0.0</td>\n",
 466 |        "      <td>0.0</td>\n",
 467 |        "      <td>0.0</td>\n",
 468 |        "      <td>0.0</td>\n",
 469 |        "    </tr>\n",
 470 |        "    <tr>\n",
 471 |        "      <th>2</th>\n",
 472 |        "      <td>0.0</td>\n",
 473 |        "      <td>0.0</td>\n",
 474 |        "      <td>0.0</td>\n",
 475 |        "      <td>0.0</td>\n",
 476 |        "      <td>0.0</td>\n",
 477 |        "      <td>0.0</td>\n",
 478 |        "      <td>0.0</td>\n",
 479 |        "      <td>0.0</td>\n",
 480 |        "      <td>0.0</td>\n",
 481 |        "      <td>0.0</td>\n",
 482 |        "      <td>...</td>\n",
 483 |        "      <td>0.0</td>\n",
 484 |        "      <td>0.0</td>\n",
 485 |        "      <td>0.0</td>\n",
 486 |        "      <td>0.0</td>\n",
 487 |        "      <td>0.0</td>\n",
 488 |        "      <td>0.0</td>\n",
 489 |        "      <td>0.0</td>\n",
 490 |        "      <td>0.0</td>\n",
 491 |        "      <td>0.0</td>\n",
 492 |        "      <td>0.0</td>\n",
 493 |        "    </tr>\n",
 494 |        "    <tr>\n",
 495 |        "      <th>3</th>\n",
 496 |        "      <td>0.0</td>\n",
 497 |        "      <td>0.0</td>\n",
 498 |        "      <td>0.0</td>\n",
 499 |        "      <td>0.0</td>\n",
 500 |        "      <td>0.0</td>\n",
 501 |        "      <td>0.0</td>\n",
 502 |        "      <td>0.0</td>\n",
 503 |        "      <td>0.0</td>\n",
 504 |        "      <td>0.0</td>\n",
 505 |        "      <td>0.0</td>\n",
 506 |        "      <td>...</td>\n",
 507 |        "      <td>0.0</td>\n",
 508 |        "      <td>0.0</td>\n",
 509 |        "      <td>0.0</td>\n",
 510 |        "      <td>0.0</td>\n",
 511 |        "      <td>0.0</td>\n",
 512 |        "      <td>0.0</td>\n",
 513 |        "      <td>0.0</td>\n",
 514 |        "      <td>0.0</td>\n",
 515 |        "      <td>0.0</td>\n",
 516 |        "      <td>0.0</td>\n",
 517 |        "    </tr>\n",
 518 |        "    <tr>\n",
 519 |        "      <th>4</th>\n",
 520 |        "      <td>0.0</td>\n",
 521 |        "      <td>0.0</td>\n",
 522 |        "      <td>0.0</td>\n",
 523 |        "      <td>0.0</td>\n",
 524 |        "      <td>0.0</td>\n",
 525 |        "      <td>0.0</td>\n",
 526 |        "      <td>0.0</td>\n",
 527 |        "      <td>0.0</td>\n",
 528 |        "      <td>0.0</td>\n",
 529 |        "      <td>0.0</td>\n",
 530 |        "      <td>...</td>\n",
 531 |        "      <td>0.0</td>\n",
 532 |        "      <td>0.0</td>\n",
 533 |        "      <td>0.0</td>\n",
 534 |        "      <td>0.0</td>\n",
 535 |        "      <td>0.0</td>\n",
 536 |        "      <td>0.0</td>\n",
 537 |        "      <td>0.0</td>\n",
 538 |        "      <td>0.0</td>\n",
 539 |        "      <td>0.0</td>\n",
 540 |        "      <td>0.0</td>\n",
 541 |        "    </tr>\n",
 542 |        "  </tbody>\n",
 543 |        "</table>\n",
 544 |        "<p>5 rows × 70688 columns</p>\n",
 545 |        "</div>"
 546 |       ],
 547 |       "text/plain": [
 548 |        "    00  000  0000  00089765  00089805  000webhostapp  001  002  003  00353  \\\n",
 549 |        "0  0.0  0.0   0.0       0.0       0.0            0.0  0.0  0.0  0.0    0.0   \n",
 550 |        "1  0.0  0.0   0.0       0.0       0.0            0.0  0.0  0.0  0.0    0.0   \n",
 551 |        "2  0.0  0.0   0.0       0.0       0.0            0.0  0.0  0.0  0.0    0.0   \n",
 552 |        "3  0.0  0.0   0.0       0.0       0.0            0.0  0.0  0.0  0.0    0.0   \n",
 553 |        "4  0.0  0.0   0.0       0.0       0.0            0.0  0.0  0.0  0.0    0.0   \n",
 554 |        "\n",
 555 |        "   ...  õle  øcreated  ǁǁǁǁǁǁ  ηadoop  τrain  τοοls  чєαr  ﬁled  ﬁnancial  \\\n",
 556 |        "0  ...  0.0       0.0     0.0     0.0    0.0    0.0   0.0   0.0       0.0   \n",
 557 |        "1  ...  0.0       0.0     0.0     0.0    0.0    0.0   0.0   0.0       0.0   \n",
 558 |        "2  ...  0.0       0.0     0.0     0.0    0.0    0.0   0.0   0.0       0.0   \n",
 559 |        "3  ...  0.0       0.0     0.0     0.0    0.0    0.0   0.0   0.0       0.0   \n",
 560 |        "4  ...  0.0       0.0     0.0     0.0    0.0    0.0   0.0   0.0       0.0   \n",
 561 |        "\n",
 562 |        "   ﬁxing  \n",
 563 |        "0    0.0  \n",
 564 |        "1    0.0  \n",
 565 |        "2    0.0  \n",
 566 |        "3    0.0  \n",
 567 |        "4    0.0  \n",
 568 |        "\n",
 569 |        "[5 rows x 70688 columns]"
 570 |       ]
 571 |      },
 572 |      "execution_count": 49,
 573 |      "metadata": {},
 574 |      "output_type": "execute_result"
 575 |     }
 576 |    ],
 577 |    "source": [
 578 |     "test.head()"
 579 |    ]
 580 |   },
 581 |   {
 582 |    "cell_type": "markdown",
 583 |    "metadata": {},
 584 |    "source": [
 585 |     "### Creating my Stopword list\n",
 586 |     "#### As seen there are so many unwanted tokens like numbers,ïƒ¼ etc , I need to add them in \"stop words\" list to train model"
 587 |    ]
 588 |   },
 589 |   {
 590 |    "cell_type": "code",
 591 |    "execution_count": 50,
 592 |    "metadata": {},
 593 |    "outputs": [],
 594 |    "source": [
 595 |     "#getting list of all tokens\n",
 596 |     "word_list = test.columns.tolist()"
 597 |    ]
 598 |   },
 599 |   {
 600 |    "cell_type": "code",
 601 |    "execution_count": 51,
 602 |    "metadata": {},
 603 |    "outputs": [],
 604 |    "source": [
 605 |     "##Getting a list of unwanted words as s_words and adding to stopwords\n",
 606 |     "s_words =[]\n",
 607 |     "for word in word_list:\n",
 608 |     "    #print(word)\n",
 609 |     "    if re.search(\"^\\W|^\\d\",word):\n",
 610 |     "        s_words.append(word)"
 611 |    ]
 612 |   },
 613 |   {
 614 |    "cell_type": "code",
 615 |    "execution_count": 52,
 616 |    "metadata": {},
 617 |    "outputs": [],
 618 |    "source": [
 619 |     "s_words.append('')        \n",
 620 |     "from nltk.corpus import stopwords\n",
 621 |     "stopword_set = set(stopwords.words('english'))\n",
 622 |     "stopword_set = list(stopword_set)\n",
 623 |     "stopword_set.extend(s_words)"
 624 |    ]
 625 |   },
 626 |   {
 627 |    "cell_type": "code",
 628 |    "execution_count": 53,
 629 |    "metadata": {},
 630 |    "outputs": [],
 631 |    "source": [
 632 |     "def preprocess(text):\n",
 633 |     "    stop_words = stopword_set\n",
 634 |     "    #0. split words by whitespace\n",
 635 |     "    text = text.split()\n",
 636 |     "    \n",
 637 |     "    \n",
 638 |     "    # 1. lower case\n",
 639 |     "    text = [word.lower() for word in text]\n",
 640 |     "    \n",
 641 |     "    # 2. remove punctuations\n",
 642 |     "    punc_table = str.maketrans('','',string.punctuation)\n",
 643 |     "    text = [word.translate(punc_table) for word in text]\n",
 644 |     "    \n",
 645 |     "    # 3. remove stop words\n",
 646 |     "    text = [word for word in text if word not in stop_words]\n",
 647 |     "    \n",
 648 |     "    return text"
 649 |    ]
 650 |   },
 651 |   {
 652 |    "cell_type": "code",
 653 |    "execution_count": 54,
 654 |    "metadata": {},
 655 |    "outputs": [],
 656 |    "source": [
 657 |     "tokenized_doc = []\n",
 658 |     "doc = df_resume['resume_combo']\n",
 659 |     "#doc = docs_sample\n",
 660 |     "for d in doc:\n",
 661 |     "    tokenized_doc.append(preprocess(d))\n",
 662 |     "#tokenized_doc"
 663 |    ]
 664 |   },
 665 |   {
 666 |    "cell_type": "code",
 667 |    "execution_count": 55,
 668 |    "metadata": {},
 669 |    "outputs": [],
 670 |    "source": [
 671 |     "# Convert tokenized document into gensim formated tagged data\n",
 672 |     "tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]"
 673 |    ]
 674 |   },
 675 |   {
 676 |    "cell_type": "code",
 677 |    "execution_count": 56,
 678 |    "metadata": {},
 679 |    "outputs": [
 680 |     {
 681 |      "data": {
 682 |       "text/plain": [
 683 |        "14428"
 684 |       ]
 685 |      },
 686 |      "execution_count": 56,
 687 |      "metadata": {},
 688 |      "output_type": "execute_result"
 689 |     }
 690 |    ],
 691 |    "source": [
 692 |     "num_doc = len(tagged_data)\n",
 693 |     "num_doc\n",
 694 |     "#confirm length (should be 14428)\n",
 695 |     "len(tokenized_doc)"
 696 |    ]
 697 |   },
 698 |   {
 699 |    "cell_type": "code",
 700 |    "execution_count": 58,
 701 |    "metadata": {},
 702 |    "outputs": [],
 703 |    "source": [
 704 |     "from gensim.test.utils import get_tmpfile\n",
 705 |     "from gensim.models.callbacks import CallbackAny2Vec\n",
 706 |     "\n",
 707 |     "class EpochSaver(CallbackAny2Vec):\n",
 708 |     "\n",
 709 |     "    def __init__(self, path_prefix):\n",
 710 |     "        self.path_prefix = path_prefix\n",
 711 |     "        self.epoch = 0\n",
 712 |     "\n",
 713 |     "    def on_epoch_end(self, model):\n",
 714 |     "        output_path = get_tmpfile('{}_epoch{}.model'.format(self.path_prefix, self.epoch))\n",
 715 |     "        model.save(output_path)\n",
 716 |     "        self.epoch += 1"
 717 |    ]
 718 |   },
 719 |   {
 720 |    "cell_type": "code",
 721 |    "execution_count": 59,
 722 |    "metadata": {},
 723 |    "outputs": [],
 724 |    "source": [
 725 |     "class EpochLogger(CallbackAny2Vec):\n",
 726 |     "    \n",
 727 |     "    def __init__(self):\n",
 728 |     "        self.epoch = 0\n",
 729 |     "        \n",
 730 |     "    def on_epoch_begin(self, model):\n",
 731 |     "        print(\"Epoch #{} start\".format(self.epoch))\n",
 732 |     "\n",
 733 |     "    def on_epoch_end(self, model):\n",
 734 |     "        print(\"Epoch #{} end\".format(self.epoch))\n",
 735 |     "        self.epoch += 1"
 736 |    ]
 737 |   },
 738 |   {
 739 |    "cell_type": "code",
 740 |    "execution_count": 61,
 741 |    "metadata": {},
 742 |    "outputs": [],
 743 |    "source": [
 744 |     "## Load saved doc2vec model\n",
 745 |     "model= Doc2Vec.load(\"Model/my_doc2vec.model\")"
 746 |    ]
 747 |   },
 748 |   {
 749 |    "cell_type": "code",
 750 |    "execution_count": 62,
 751 |    "metadata": {},
 752 |    "outputs": [],
 753 |    "source": [
 754 |     "## Get vector value\n",
 755 |     "vec = np.empty([14428,20])\n",
 756 |     "\n",
 757 |     "for k,i in enumerate(tokenized_doc):\n",
 758 |     "    \n",
 759 |     "    #print(i)\n",
 760 |     "    vector = model.infer_vector(i)\n",
 761 |     "    vec[k] = vector\n",
 762 |     "    #vec = np.append(vector)\n",
 763 |     "    #vecf = np.append(vec,vector)\n",
 764 |     "\n",
 765 |     "# reshape into 2D\n",
 766 |     "new_arr = np.reshape(vec,(-1,20))"
 767 |    ]
 768 |   },
 769 |   {
 770 |    "cell_type": "code",
 771 |    "execution_count": 64,
 772 |    "metadata": {},
 773 |    "outputs": [],
 774 |    "source": [
 775 |     "rng = range(1, 21)\n",
 776 |     "vec_df = pd.DataFrame(new_arr, columns=['vec_' + str(i) for i in rng])"
 777 |    ]
 778 |   },
 779 |   {
 780 |    "cell_type": "code",
 781 |    "execution_count": 65,
 782 |    "metadata": {},
 783 |    "outputs": [
 784 |     {
 785 |      "data": {
 786 |       "text/html": [
 787 |        "<div>\n",
 788 |        "<style scoped>\n",
 789 |        "    .dataframe tbody tr th:only-of-type {\n",
 790 |        "        vertical-align: middle;\n",
 791 |        "    }\n",
 792 |        "\n",
 793 |        "    .dataframe tbody tr th {\n",
 794 |        "        vertical-align: top;\n",
 795 |        "    }\n",
 796 |        "\n",
 797 |        "    .dataframe thead th {\n",
 798 |        "        text-align: right;\n",
 799 |        "    }\n",
 800 |        "</style>\n",
 801 |        "<table border=\"1\" class=\"dataframe\">\n",
 802 |        "  <thead>\n",
 803 |        "    <tr style=\"text-align: right;\">\n",
 804 |        "      <th></th>\n",
 805 |        "      <th>vec_1</th>\n",
 806 |        "      <th>vec_2</th>\n",
 807 |        "      <th>vec_3</th>\n",
 808 |        "      <th>vec_4</th>\n",
 809 |        "      <th>vec_5</th>\n",
 810 |        "      <th>vec_6</th>\n",
 811 |        "      <th>vec_7</th>\n",
 812 |        "      <th>vec_8</th>\n",
 813 |        "      <th>vec_9</th>\n",
 814 |        "      <th>vec_10</th>\n",
 815 |        "      <th>vec_11</th>\n",
 816 |        "      <th>vec_12</th>\n",
 817 |        "      <th>vec_13</th>\n",
 818 |        "      <th>vec_14</th>\n",
 819 |        "      <th>vec_15</th>\n",
 820 |        "      <th>vec_16</th>\n",
 821 |        "      <th>vec_17</th>\n",
 822 |        "      <th>vec_18</th>\n",
 823 |        "      <th>vec_19</th>\n",
 824 |        "      <th>vec_20</th>\n",
 825 |        "    </tr>\n",
 826 |        "  </thead>\n",
 827 |        "  <tbody>\n",
 828 |        "    <tr>\n",
 829 |        "      <th>0</th>\n",
 830 |        "      <td>-3.145642</td>\n",
 831 |        "      <td>-0.409380</td>\n",
 832 |        "      <td>0.701160</td>\n",
 833 |        "      <td>-0.938745</td>\n",
 834 |        "      <td>0.585239</td>\n",
 835 |        "      <td>3.585946</td>\n",
 836 |        "      <td>-0.120781</td>\n",
 837 |        "      <td>0.111222</td>\n",
 838 |        "      <td>1.644105</td>\n",
 839 |        "      <td>2.184981</td>\n",
 840 |        "      <td>-2.117909</td>\n",
 841 |        "      <td>-0.085430</td>\n",
 842 |        "      <td>-2.877392</td>\n",
 843 |        "      <td>0.239383</td>\n",
 844 |        "      <td>-1.582871</td>\n",
 845 |        "      <td>1.435642</td>\n",
 846 |        "      <td>-1.051450</td>\n",
 847 |        "      <td>1.960831</td>\n",
 848 |        "      <td>1.786694</td>\n",
 849 |        "      <td>-2.375981</td>\n",
 850 |        "    </tr>\n",
 851 |        "    <tr>\n",
 852 |        "      <th>1</th>\n",
 853 |        "      <td>-0.786235</td>\n",
 854 |        "      <td>-1.306011</td>\n",
 855 |        "      <td>-1.383107</td>\n",
 856 |        "      <td>-1.669708</td>\n",
 857 |        "      <td>0.832136</td>\n",
 858 |        "      <td>1.849790</td>\n",
 859 |        "      <td>0.178872</td>\n",
 860 |        "      <td>-1.736894</td>\n",
 861 |        "      <td>0.741685</td>\n",
 862 |        "      <td>1.553933</td>\n",
 863 |        "      <td>-2.916478</td>\n",
 864 |        "      <td>-0.712572</td>\n",
 865 |        "      <td>-0.502129</td>\n",
 866 |        "      <td>-0.849293</td>\n",
 867 |        "      <td>0.435406</td>\n",
 868 |        "      <td>0.339330</td>\n",
 869 |        "      <td>0.060282</td>\n",
 870 |        "      <td>-0.415035</td>\n",
 871 |        "      <td>3.203696</td>\n",
 872 |        "      <td>-3.607635</td>\n",
 873 |        "    </tr>\n",
 874 |        "    <tr>\n",
 875 |        "      <th>2</th>\n",
 876 |        "      <td>-2.747642</td>\n",
 877 |        "      <td>-1.721797</td>\n",
 878 |        "      <td>-0.910322</td>\n",
 879 |        "      <td>-0.775950</td>\n",
 880 |        "      <td>1.472325</td>\n",
 881 |        "      <td>2.455998</td>\n",
 882 |        "      <td>-0.852150</td>\n",
 883 |        "      <td>-0.150517</td>\n",
 884 |        "      <td>0.844202</td>\n",
 885 |        "      <td>1.380623</td>\n",
 886 |        "      <td>-1.018832</td>\n",
 887 |        "      <td>0.777981</td>\n",
 888 |        "      <td>-1.977556</td>\n",
 889 |        "      <td>0.853214</td>\n",
 890 |        "      <td>-1.281344</td>\n",
 891 |        "      <td>2.195391</td>\n",
 892 |        "      <td>0.800305</td>\n",
 893 |        "      <td>1.078035</td>\n",
 894 |        "      <td>2.166900</td>\n",
 895 |        "      <td>-2.658121</td>\n",
 896 |        "    </tr>\n",
 897 |        "    <tr>\n",
 898 |        "      <th>3</th>\n",
 899 |        "      <td>-1.771770</td>\n",
 900 |        "      <td>-1.375850</td>\n",
 901 |        "      <td>-0.475922</td>\n",
 902 |        "      <td>-0.784473</td>\n",
 903 |        "      <td>-0.377240</td>\n",
 904 |        "      <td>1.596389</td>\n",
 905 |        "      <td>1.094220</td>\n",
 906 |        "      <td>-0.253642</td>\n",
 907 |        "      <td>0.468265</td>\n",
 908 |        "      <td>2.149588</td>\n",
 909 |        "      <td>-1.234415</td>\n",
 910 |        "      <td>0.295536</td>\n",
 911 |        "      <td>-2.615532</td>\n",
 912 |        "      <td>0.115959</td>\n",
 913 |        "      <td>-2.044196</td>\n",
 914 |        "      <td>-0.769109</td>\n",
 915 |        "      <td>-0.716604</td>\n",
 916 |        "      <td>1.145388</td>\n",
 917 |        "      <td>3.452934</td>\n",
 918 |        "      <td>-1.008162</td>\n",
 919 |        "    </tr>\n",
 920 |        "    <tr>\n",
 921 |        "      <th>4</th>\n",
 922 |        "      <td>-0.097372</td>\n",
 923 |        "      <td>-1.405603</td>\n",
 924 |        "      <td>-0.801234</td>\n",
 925 |        "      <td>-0.248921</td>\n",
 926 |        "      <td>-0.376417</td>\n",
 927 |        "      <td>-0.157050</td>\n",
 928 |        "      <td>-0.290440</td>\n",
 929 |        "      <td>-1.440582</td>\n",
 930 |        "      <td>-0.169669</td>\n",
 931 |        "      <td>1.190537</td>\n",
 932 |        "      <td>-0.291407</td>\n",
 933 |        "      <td>-1.080500</td>\n",
 934 |        "      <td>-2.950497</td>\n",
 935 |        "      <td>0.031693</td>\n",
 936 |        "      <td>0.119182</td>\n",
 937 |        "      <td>-0.883555</td>\n",
 938 |        "      <td>0.178819</td>\n",
 939 |        "      <td>-0.858324</td>\n",
 940 |        "      <td>1.239632</td>\n",
 941 |        "      <td>-0.043914</td>\n",
 942 |        "    </tr>\n",
 943 |        "  </tbody>\n",
 944 |        "</table>\n",
 945 |        "</div>"
 946 |       ],
 947 |       "text/plain": [
 948 |        "      vec_1     vec_2     vec_3     vec_4     vec_5     vec_6     vec_7  \\\n",
 949 |        "0 -3.145642 -0.409380  0.701160 -0.938745  0.585239  3.585946 -0.120781   \n",
 950 |        "1 -0.786235 -1.306011 -1.383107 -1.669708  0.832136  1.849790  0.178872   \n",
 951 |        "2 -2.747642 -1.721797 -0.910322 -0.775950  1.472325  2.455998 -0.852150   \n",
 952 |        "3 -1.771770 -1.375850 -0.475922 -0.784473 -0.377240  1.596389  1.094220   \n",
 953 |        "4 -0.097372 -1.405603 -0.801234 -0.248921 -0.376417 -0.157050 -0.290440   \n",
 954 |        "\n",
 955 |        "      vec_8     vec_9    vec_10    vec_11    vec_12    vec_13    vec_14  \\\n",
 956 |        "0  0.111222  1.644105  2.184981 -2.117909 -0.085430 -2.877392  0.239383   \n",
 957 |        "1 -1.736894  0.741685  1.553933 -2.916478 -0.712572 -0.502129 -0.849293   \n",
 958 |        "2 -0.150517  0.844202  1.380623 -1.018832  0.777981 -1.977556  0.853214   \n",
 959 |        "3 -0.253642  0.468265  2.149588 -1.234415  0.295536 -2.615532  0.115959   \n",
 960 |        "4 -1.440582 -0.169669  1.190537 -0.291407 -1.080500 -2.950497  0.031693   \n",
 961 |        "\n",
 962 |        "     vec_15    vec_16    vec_17    vec_18    vec_19    vec_20  \n",
 963 |        "0 -1.582871  1.435642 -1.051450  1.960831  1.786694 -2.375981  \n",
 964 |        "1  0.435406  0.339330  0.060282 -0.415035  3.203696 -3.607635  \n",
 965 |        "2 -1.281344  2.195391  0.800305  1.078035  2.166900 -2.658121  \n",
 966 |        "3 -2.044196 -0.769109 -0.716604  1.145388  3.452934 -1.008162  \n",
 967 |        "4  0.119182 -0.883555  0.178819 -0.858324  1.239632 -0.043914  "
 968 |       ]
 969 |      },
 970 |      "execution_count": 65,
 971 |      "metadata": {},
 972 |      "output_type": "execute_result"
 973 |     }
 974 |    ],
 975 |    "source": [
 976 |     "vec_df.head(5)"
 977 |    ]
 978 |   },
 979 |   {
 980 |    "cell_type": "code",
 981 |    "execution_count": 66,
 982 |    "metadata": {},
 983 |    "outputs": [],
 984 |    "source": [
 985 |     "con_resume = pd.concat([resume, vec_df], axis=1)\n",
 986 |     "con_resume.to_csv('wip/con_resume.csv', index=False)"
 987 |    ]
 988 |   },
 989 |   {
 990 |    "cell_type": "code",
 991 |    "execution_count": 44,
 992 |    "metadata": {},
 993 |    "outputs": [],
 994 |    "source": [
 995 |     "#con_resume.info()"
 996 |    ]
 997 |   },
 998 |   {
 999 |    "cell_type": "code",
1000 |    "execution_count": null,
1001 |    "metadata": {},
1002 |    "outputs": [],
1003 |    "source": []
1004 |   },
1005 |   {
1006 |    "cell_type": "code",
1007 |    "execution_count": 49,
1008 |    "metadata": {},
1009 |    "outputs": [],
1010 |    "source": [
1011 |     "tokenized_doc = []\n",
1012 |     "#doc = df_resume['resume_combo']\n",
1013 |     "doc = docs_sample\n",
1014 |     "for d in doc:\n",
1015 |     "    tokenized_doc.append(preprocess(d))\n",
1016 |     "#tokenized_doc"
1017 |    ]
1018 |   },
1019 |   {
1020 |    "cell_type": "code",
1021 |    "execution_count": 50,
1022 |    "metadata": {},
1023 |    "outputs": [],
1024 |    "source": [
1025 |     "# Convert tokenized document into gensim formated tagged data\n",
1026 |     "tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]"
1027 |    ]
1028 |   },
1029 |   {
1030 |    "cell_type": "code",
1031 |    "execution_count": 51,
1032 |    "metadata": {},
1033 |    "outputs": [
1034 |     {
1035 |      "data": {
1036 |       "text/plain": [
1037 |        "10"
1038 |       ]
1039 |      },
1040 |      "execution_count": 51,
1041 |      "metadata": {},
1042 |      "output_type": "execute_result"
1043 |     }
1044 |    ],
1045 |    "source": [
1046 |     "num_doc = len(tagged_data)\n",
1047 |     "num_doc\n",
1048 |     "#confirm length (should be 38941)\n",
1049 |     "len(tokenized_doc)"
1050 |    ]
1051 |   },
1052 |   {
1053 |    "cell_type": "code",
1054 |    "execution_count": 58,
1055 |    "metadata": {},
1056 |    "outputs": [],
1057 |    "source": [
1058 |     "## Load saved doc2vec model\n",
1059 |     "model= Doc2Vec.load(\"my_doc2vec.model\")\n",
1060 |     "\n",
1061 |     "## Get vector value\n",
1062 |     "vec = np.empty([10,20])\n",
1063 |     "\n",
1064 |     "for k,i in enumerate(tokenized_doc):\n",
1065 |     "    \n",
1066 |     "    #print(i)\n",
1067 |     "    vector = model.infer_vector(i)\n",
1068 |     "    vec[k] = vector\n",
1069 |     "    #vec = np.append(vector)\n",
1070 |     "    #vecf = np.append(vec,vector)\n",
1071 |     "\n",
1072 |     "# reshape into 2D\n",
1073 |     "new_arr = np.reshape(vec,(-1,20))"
1074 |    ]
1075 |   },
1076 |   {
1077 |    "cell_type": "code",
1078 |    "execution_count": null,
1079 |    "metadata": {},
1080 |    "outputs": [],
1081 |    "source": [
1082 |     "test = np.array([[1,2,3],[4,5,6]])\n",
1083 |     "test[0]"
1084 |    ]
1085 |   },
1086 |   {
1087 |    "cell_type": "code",
1088 |    "execution_count": 61,
1089 |    "metadata": {},
1090 |    "outputs": [
1091 |     {
1092 |      "data": {
1093 |       "text/plain": [
1094 |        "array([-3.14492106, -0.41021681,  0.70149601, -0.93887955,  0.58496076,\n",
1095 |        "        3.58589458, -0.12033088,  0.11019378,  1.64519656,  2.18371987,\n",
1096 |        "       -2.11720061, -0.08485675, -2.87654066,  0.24021174, -1.58367932,\n",
1097 |        "        1.43522847, -1.05121636,  1.96061814,  1.78778028, -2.37729073])"
1098 |       ]
1099 |      },
1100 |      "execution_count": 61,
1101 |      "metadata": {},
1102 |      "output_type": "execute_result"
1103 |     }
1104 |    ],
1105 |    "source": [
1106 |     "new_arr[0]"
1107 |    ]
1108 |   },
1109 |   {
1110 |    "cell_type": "code",
1111 |    "execution_count": 62,
1112 |    "metadata": {},
1113 |    "outputs": [],
1114 |    "source": [
1115 |     "rng = range(1, 21)\n",
1116 |     "vec_df = pd.DataFrame(new_arr, columns=['vec_' + str(i) for i in rng])"
1117 |    ]
1118 |   },
1119 |   {
1120 |    "cell_type": "code",
1121 |    "execution_count": 63,
1122 |    "metadata": {},
1123 |    "outputs": [
1124 |     {
1125 |      "name": "stdout",
1126 |      "output_type": "stream",
1127 |      "text": [
1128 |       "<class 'pandas.core.frame.DataFrame'>\n",
1129 |       "RangeIndex: 10 entries, 0 to 9\n",
1130 |       "Data columns (total 20 columns):\n",
1131 |       " #   Column  Non-Null Count  Dtype  \n",
1132 |       "---  ------  --------------  -----  \n",
1133 |       " 0   vec_1   10 non-null     float64\n",
1134 |       " 1   vec_2   10 non-null     float64\n",
1135 |       " 2   vec_3   10 non-null     float64\n",
1136 |       " 3   vec_4   10 non-null     float64\n",
1137 |       " 4   vec_5   10 non-null     float64\n",
1138 |       " 5   vec_6   10 non-null     float64\n",
1139 |       " 6   vec_7   10 non-null     float64\n",
1140 |       " 7   vec_8   10 non-null     float64\n",
1141 |       " 8   vec_9   10 non-null     float64\n",
1142 |       " 9   vec_10  10 non-null     float64\n",
1143 |       " 10  vec_11  10 non-null     float64\n",
1144 |       " 11  vec_12  10 non-null     float64\n",
1145 |       " 12  vec_13  10 non-null     float64\n",
1146 |       " 13  vec_14  10 non-null     float64\n",
1147 |       " 14  vec_15  10 non-null     float64\n",
1148 |       " 15  vec_16  10 non-null     float64\n",
1149 |       " 16  vec_17  10 non-null     float64\n",
1150 |       " 17  vec_18  10 non-null     float64\n",
1151 |       " 18  vec_19  10 non-null     float64\n",
1152 |       " 19  vec_20  10 non-null     float64\n",
1153 |       "dtypes: float64(20)\n",
1154 |       "memory usage: 1.7 KB\n"
1155 |      ]
1156 |     }
1157 |    ],
1158 |    "source": [
1159 |     "vec_df.info()"
1160 |    ]
1161 |   },
1162 |   {
1163 |    "cell_type": "code",
1164 |    "execution_count": 35,
1165 |    "metadata": {},
1166 |    "outputs": [],
1167 |    "source": [
1168 |     "r1.to_csv('test_r.csv',index=False)"
1169 |    ]
1170 |   },
1171 |   {
1172 |    "cell_type": "code",
1173 |    "execution_count": 24,
1174 |    "metadata": {},
1175 |    "outputs": [],
1176 |    "source": [
1177 |     "r1 = resume.head(10)"
1178 |    ]
1179 |   },
1180 |   {
1181 |    "cell_type": "code",
1182 |    "execution_count": 36,
1183 |    "metadata": {
1184 |     "scrolled": false
1185 |    },
1186 |    "outputs": [],
1187 |    "source": [
1188 |     "# read each work experience\n",
1189 |     "resume['work_experiences'] = resume['work_experiences'].str.lower()\n",
1190 |     "\n",
1191 |     "resume_all_desc = []\n",
1192 |     "for index, rows in resume.iterrows():\n",
1193 |     "    #print('#@#@#@#@#@@#@#@#@#@##@@#@#@@##@#@#@#@#@#@##@#@#@##@#@@#@#@#')\n",
1194 |     "    #print(f'resume no. {index}')\n",
1195 |     "    resume_desc= []\n",
1196 |     "    #pick work experience col and read it as JSON \n",
1197 |     "    \n",
1198 |     "    work = resume['work_experiences'][index]\n",
1199 |     "    try: result_work = eval(work)\n",
1200 |     "    except: continue\n",
1201 |     "    #print(f'resume  :  {index}')\n",
1202 |     "    #read description to match with job\n",
1203 |     "    \n",
1204 |     "    for i in result_work:    \n",
1205 |     "        w_title_n = (result_work[0][0]['wtitle:'])            \n",
1206 |     "        w_company= (result_work[i][1]['wcompany:'])\n",
1207 |     "#         resume_desc.append(w_company) \n",
1208 |     "        w_city= (result_work[i][2]['wcity:'])\n",
1209 |     "        w_state= (result_work[i][3]['wstate:'])\n",
1210 |     "        w_duration= (result_work[i][4]['wduration:'])\n",
1211 |     "           \n",
1212 |     "        w_descr= (result_work[i][5]['wdescr:'])\n",
1213 |     "        if  (w_descr == 'none'):\n",
1214 |     "            continue\n",
1215 |     "        #print(w_descr)\n",
1216 |     "        #print('**************')\n",
1217 |     "        resume_desc.append(w_descr + '')   \n",
1218 |     "        \n",
1219 |     "    #print(resume_desc)\n",
1220 |     "    resume_all_desc.append(resume_desc)\n",
1221 |     "#print(resume_test)\n",
1222 |     "resume['experience_desc'] = resume_all_desc"
1223 |    ]
1224 |   },
1225 |   {
1226 |    "cell_type": "code",
1227 |    "execution_count": 37,
1228 |    "metadata": {},
1229 |    "outputs": [],
1230 |    "source": [
1231 |     "#resume.to_csv('wip/resume_sorted5.csv',index=False)"
1232 |    ]
1233 |   }
1234 |  ],
1235 |  "metadata": {
1236 |   "kernelspec": {
1237 |    "display_name": "Python 3",
1238 |    "language": "python",
1239 |    "name": "python3"
1240 |   },
1241 |   "language_info": {
1242 |    "codemirror_mode": {
1243 |     "name": "ipython",
1244 |     "version": 3
1245 |    },
1246 |    "file_extension": ".py",
1247 |    "mimetype": "text/x-python",
1248 |    "name": "python",
1249 |    "nbconvert_exporter": "python",
1250 |    "pygments_lexer": "ipython3",
1251 |    "version": "3.7.6"
1252 |   }
1253 |  },
1254 |  "nbformat": 4,
1255 |  "nbformat_minor": 4
1256 | }
1257 | 


--------------------------------------------------------------------------------
/Data/Job-Locations/india-city-state.csv:
--------------------------------------------------------------------------------
   1 | city_id,city_name,state
   2 | 1,Kolhapur,Maharashtra
   3 | 2,Port Blair,Andaman & Nicobar Islands
   4 | 3,Adilabad,Andhra Pradesh
   5 | 4,Adoni,Andhra Pradesh
   6 | 5,Amadalavalasa,Andhra Pradesh
   7 | 6,Amalapuram,Andhra Pradesh
   8 | 7,Anakapalle,Andhra Pradesh
   9 | 8,Anantapur,Andhra Pradesh
  10 | 9,Badepalle,Andhra Pradesh
  11 | 10,Banganapalle,Andhra Pradesh
  12 | 11,Bapatla,Andhra Pradesh
  13 | 12,Bellampalle,Andhra Pradesh
  14 | 13,Bethamcherla,Andhra Pradesh
  15 | 14,Bhadrachalam,Andhra Pradesh
  16 | 15,Bhainsa,Andhra Pradesh
  17 | 16,Bheemunipatnam,Andhra Pradesh
  18 | 17,Bhimavaram,Andhra Pradesh
  19 | 18,Bhongir,Andhra Pradesh
  20 | 19,Bobbili,Andhra Pradesh
  21 | 20,Bodhan,Andhra Pradesh
  22 | 21,Chilakaluripet,Andhra Pradesh
  23 | 22,Chirala,Andhra Pradesh
  24 | 23,Chittoor,Andhra Pradesh
  25 | 24,Cuddapah,Andhra Pradesh
  26 | 25,Devarakonda,Andhra Pradesh
  27 | 26,Dharmavaram,Andhra Pradesh
  28 | 27,Eluru,Andhra Pradesh
  29 | 28,Farooqnagar,Andhra Pradesh
  30 | 29,Gadwal,Andhra Pradesh
  31 | 30,Gooty,Andhra Pradesh
  32 | 31,Gudivada,Andhra Pradesh
  33 | 32,Gudur,Andhra Pradesh
  34 | 33,Guntakal,Andhra Pradesh
  35 | 34,Guntur,Andhra Pradesh
  36 | 35,Hanuman Junction,Andhra Pradesh
  37 | 36,Hindupur,Andhra Pradesh
  38 | 37,Hyderabad,Andhra Pradesh
  39 | 38,Ichchapuram,Andhra Pradesh
  40 | 39,Jaggaiahpet,Andhra Pradesh
  41 | 40,Jagtial,Andhra Pradesh
  42 | 41,Jammalamadugu,Andhra Pradesh
  43 | 42,Jangaon,Andhra Pradesh
  44 | 43,Kadapa,Andhra Pradesh
  45 | 44,Kadiri,Andhra Pradesh
  46 | 45,Kagaznagar,Andhra Pradesh
  47 | 46,Kakinada,Andhra Pradesh
  48 | 47,Kalyandurg,Andhra Pradesh
  49 | 48,Kamareddy,Andhra Pradesh
  50 | 49,Kandukur,Andhra Pradesh
  51 | 50,Karimnagar,Andhra Pradesh
  52 | 51,Kavali,Andhra Pradesh
  53 | 52,Khammam,Andhra Pradesh
  54 | 53,Koratla,Andhra Pradesh
  55 | 54,Kothagudem,Andhra Pradesh
  56 | 55,Kothapeta,Andhra Pradesh
  57 | 56,Kovvur,Andhra Pradesh
  58 | 57,Kurnool,Andhra Pradesh
  59 | 58,Kyathampalle,Andhra Pradesh
  60 | 59,Macherla,Andhra Pradesh
  61 | 60,Machilipatnam,Andhra Pradesh
  62 | 61,Madanapalle,Andhra Pradesh
  63 | 62,Mahbubnagar,Andhra Pradesh
  64 | 63,Mancherial,Andhra Pradesh
  65 | 64,Mandamarri,Andhra Pradesh
  66 | 65,Mandapeta,Andhra Pradesh
  67 | 66,Manuguru,Andhra Pradesh
  68 | 67,Markapur,Andhra Pradesh
  69 | 68,Medak,Andhra Pradesh
  70 | 69,Miryalaguda,Andhra Pradesh
  71 | 70,Mogalthur,Andhra Pradesh
  72 | 71,Nagari,Andhra Pradesh
  73 | 72,Nagarkurnool,Andhra Pradesh
  74 | 73,Nandyal,Andhra Pradesh
  75 | 74,Narasapur,Andhra Pradesh
  76 | 75,Narasaraopet,Andhra Pradesh
  77 | 76,Narayanpet,Andhra Pradesh
  78 | 77,Narsipatnam,Andhra Pradesh
  79 | 78,Nellore,Andhra Pradesh
  80 | 79,Nidadavole,Andhra Pradesh
  81 | 80,Nirmal,Andhra Pradesh
  82 | 81,Nizamabad,Andhra Pradesh
  83 | 82,Nuzvid,Andhra Pradesh
  84 | 83,Ongole,Andhra Pradesh
  85 | 84,Palacole,Andhra Pradesh
  86 | 85,Palasa Kasibugga,Andhra Pradesh
  87 | 86,Palwancha,Andhra Pradesh
  88 | 87,Parvathipuram,Andhra Pradesh
  89 | 88,Pedana,Andhra Pradesh
  90 | 89,Peddapuram,Andhra Pradesh
  91 | 90,Pithapuram,Andhra Pradesh
  92 | 91,Pondur,Andhra pradesh
  93 | 92,Ponnur,Andhra Pradesh
  94 | 93,Proddatur,Andhra Pradesh
  95 | 94,Punganur,Andhra Pradesh
  96 | 95,Puttur,Andhra Pradesh
  97 | 96,Rajahmundry,Andhra Pradesh
  98 | 97,Rajam,Andhra Pradesh
  99 | 98,Ramachandrapuram,Andhra Pradesh
 100 | 99,Ramagundam,Andhra Pradesh
 101 | 100,Rayachoti,Andhra Pradesh
 102 | 101,Rayadurg,Andhra Pradesh
 103 | 102,Renigunta,Andhra Pradesh
 104 | 103,Repalle,Andhra Pradesh
 105 | 104,Sadasivpet,Andhra Pradesh
 106 | 105,Salur,Andhra Pradesh
 107 | 106,Samalkot,Andhra Pradesh
 108 | 107,Sangareddy,Andhra Pradesh
 109 | 108,Sattenapalle,Andhra Pradesh
 110 | 109,Siddipet,Andhra Pradesh
 111 | 110,Singapur,Andhra Pradesh
 112 | 111,Sircilla,Andhra Pradesh
 113 | 112,Srikakulam,Andhra Pradesh
 114 | 113,Srikalahasti,Andhra Pradesh
 115 | 115,Suryapet,Andhra Pradesh
 116 | 116,Tadepalligudem,Andhra Pradesh
 117 | 117,Tadpatri,Andhra Pradesh
 118 | 118,Tandur,Andhra Pradesh
 119 | 119,Tanuku,Andhra Pradesh
 120 | 120,Tenali,Andhra Pradesh
 121 | 121,Tirupati,Andhra Pradesh
 122 | 122,Tuni,Andhra Pradesh
 123 | 123,Uravakonda,Andhra Pradesh
 124 | 124,Venkatagiri,Andhra Pradesh
 125 | 125,Vicarabad,Andhra Pradesh
 126 | 126,Vijayawada,Andhra Pradesh
 127 | 127,Vinukonda,Andhra Pradesh
 128 | 128,Visakhapatnam,Andhra Pradesh
 129 | 129,Vizianagaram,Andhra Pradesh
 130 | 130,Wanaparthy,Andhra Pradesh
 131 | 131,Warangal,Andhra Pradesh
 132 | 132,Yellandu,Andhra Pradesh
 133 | 133,Yemmiganur,Andhra Pradesh
 134 | 134,Yerraguntla,Andhra Pradesh
 135 | 135,Zahirabad,Andhra Pradesh
 136 | 136,Rajampet,Andhra Pradesh
 137 | 137,Along,Arunachal Pradesh
 138 | 138,Bomdila,Arunachal Pradesh
 139 | 139,Itanagar,Arunachal Pradesh
 140 | 140,Naharlagun,Arunachal Pradesh
 141 | 141,Pasighat,Arunachal Pradesh
 142 | 142,Abhayapuri,Assam
 143 | 143,Amguri,Assam
 144 | 144,Anandnagaar,Assam
 145 | 145,Barpeta,Assam
 146 | 146,Barpeta Road,Assam
 147 | 147,Bilasipara,Assam
 148 | 148,Bongaigaon,Assam
 149 | 149,Dhekiajuli,Assam
 150 | 150,Dhubri,Assam
 151 | 151,Dibrugarh,Assam
 152 | 152,Digboi,Assam
 153 | 153,Diphu,Assam
 154 | 154,Dispur,Assam
 155 | 156,Gauripur,Assam
 156 | 157,Goalpara,Assam
 157 | 158,Golaghat,Assam
 158 | 159,Guwahati,Assam
 159 | 160,Haflong,Assam
 160 | 161,Hailakandi,Assam
 161 | 162,Hojai,Assam
 162 | 163,Jorhat,Assam
 163 | 164,Karimganj,Assam
 164 | 165,Kokrajhar,Assam
 165 | 166,Lanka,Assam
 166 | 167,Lumding,Assam
 167 | 168,Mangaldoi,Assam
 168 | 169,Mankachar,Assam
 169 | 170,Margherita,Assam
 170 | 171,Mariani,Assam
 171 | 172,Marigaon,Assam
 172 | 173,Nagaon,Assam
 173 | 174,Nalbari,Assam
 174 | 175,North Lakhimpur,Assam
 175 | 176,Rangia,Assam
 176 | 177,Sibsagar,Assam
 177 | 178,Silapathar,Assam
 178 | 179,Silchar,Assam
 179 | 180,Tezpur,Assam
 180 | 181,Tinsukia,Assam
 181 | 182,Amarpur,Bihar
 182 | 183,Araria,Bihar
 183 | 184,Areraj,Bihar
 184 | 185,Arrah,Bihar
 185 | 186,Asarganj,Bihar
 186 | 187,Aurangabad,Bihar
 187 | 188,Bagaha,Bihar
 188 | 189,Bahadurganj,Bihar
 189 | 190,Bairgania,Bihar
 190 | 191,Bakhtiarpur,Bihar
 191 | 192,Banka,Bihar
 192 | 193,Banmankhi Bazar,Bihar
 193 | 194,Barahiya,Bihar
 194 | 195,Barauli,Bihar
 195 | 196,Barbigha,Bihar
 196 | 197,Barh,Bihar
 197 | 198,Begusarai,Bihar
 198 | 199,Behea,Bihar
 199 | 200,Bettiah,Bihar
 200 | 201,Bhabua,Bihar
 201 | 202,Bhagalpur,Bihar
 202 | 203,Bihar Sharif,Bihar
 203 | 204,Bikramganj,Bihar
 204 | 205,Bodh Gaya,Bihar
 205 | 206,Buxar,Bihar
 206 | 207,Chandan Bara,Bihar
 207 | 208,Chanpatia,Bihar
 208 | 209,Chhapra,Bihar
 209 | 210,Colgong,Bihar
 210 | 211,Dalsinghsarai,Bihar
 211 | 212,Darbhanga,Bihar
 212 | 213,Daudnagar,Bihar
 213 | 214,Dehri-on-Sone,Bihar
 214 | 215,Dhaka,Bihar
 215 | 216,Dighwara,Bihar
 216 | 217,Dumraon,Bihar
 217 | 218,Fatwah,Bihar
 218 | 219,Forbesganj,Bihar
 219 | 220,Gaya,Bihar
 220 | 221,Gogri Jamalpur,Bihar
 221 | 222,Gopalganj,Bihar
 222 | 223,Hajipur,Bihar
 223 | 224,Hilsa,Bihar
 224 | 225,Hisua,Bihar
 225 | 226,Islampur,Bihar
 226 | 227,Jagdispur,Bihar
 227 | 228,Jamalpur,Bihar
 228 | 229,Jamui,Bihar
 229 | 230,Jehanabad,Bihar
 230 | 231,Jhajha,Bihar
 231 | 232,Jhanjharpur,Bihar
 232 | 233,Jogabani,Bihar
 233 | 234,Kanti,Bihar
 234 | 235,Katihar,Bihar
 235 | 236,Khagaria,Bihar
 236 | 237,Kharagpur,Bihar
 237 | 238,Kishanganj,Bihar
 238 | 239,Lakhisarai,Bihar
 239 | 240,Lalganj,Bihar
 240 | 241,Madhepura,Bihar
 241 | 242,Madhubani,Bihar
 242 | 243,Maharajganj,Bihar
 243 | 244,Mahnar Bazar,Bihar
 244 | 245,Makhdumpur,Bihar
 245 | 246,Maner,Bihar
 246 | 247,Manihari,Bihar
 247 | 248,Marhaura,Bihar
 248 | 249,Masaurhi,Bihar
 249 | 250,Mirganj,Bihar
 250 | 251,Mokameh,Bihar
 251 | 252,Motihari,Bihar
 252 | 253,Motipur,Bihar
 253 | 254,Munger,Bihar
 254 | 255,Murliganj,Bihar
 255 | 256,Muzaffarpur,Bihar
 256 | 257,Narkatiaganj,Bihar
 257 | 258,Naugachhia,Bihar
 258 | 259,Nawada,Bihar
 259 | 260,Nokha,Bihar
 260 | 261,Patna,Bihar
 261 | 262,Piro,Bihar
 262 | 263,Purnia,Bihar
 263 | 264,Rafiganj,Bihar
 264 | 265,Rajgir,Bihar
 265 | 266,Ramnagar,Bihar
 266 | 267,Raxaul Bazar,Bihar
 267 | 268,Revelganj,Bihar
 268 | 269,Rosera,Bihar
 269 | 270,Saharsa,Bihar
 270 | 271,Samastipur,Bihar
 271 | 272,Sasaram,Bihar
 272 | 273,Sheikhpura,Bihar
 273 | 274,Sheohar,Bihar
 274 | 275,Sherghati,Bihar
 275 | 276,Silao,Bihar
 276 | 277,Sitamarhi,Bihar
 277 | 278,Siwan,Bihar
 278 | 279,Sonepur,Bihar
 279 | 280,Sugauli,Bihar
 280 | 281,Sultanganj,Bihar
 281 | 282,Supaul,Bihar
 282 | 283,Warisaliganj,Bihar
 283 | 284,Ahiwara,Chhattisgarh
 284 | 285,Akaltara,Chhattisgarh
 285 | 286,Ambagarh Chowki,Chhattisgarh
 286 | 287,Ambikapur,Chhattisgarh
 287 | 288,Arang,Chhattisgarh
 288 | 289,Bade Bacheli,Chhattisgarh
 289 | 290,Balod,Chhattisgarh
 290 | 291,Baloda Bazar,Chhattisgarh
 291 | 292,Bemetra,Chhattisgarh
 292 | 293,Bhatapara,Chhattisgarh
 293 | 294,Bilaspur,Chhattisgarh
 294 | 295,Birgaon,Chhattisgarh
 295 | 296,Champa,Chhattisgarh
 296 | 297,Chirmiri,Chhattisgarh
 297 | 298,Dalli-Rajhara,Chhattisgarh
 298 | 299,Dhamtari,Chhattisgarh
 299 | 300,Dipka,Chhattisgarh
 300 | 301,Dongargarh,Chhattisgarh
 301 | 302,Durg-Bhilai Nagar,Chhattisgarh
 302 | 303,Gobranawapara,Chhattisgarh
 303 | 304,Jagdalpur,Chhattisgarh
 304 | 305,Janjgir,Chhattisgarh
 305 | 306,Jashpurnagar,Chhattisgarh
 306 | 307,Kanker,Chhattisgarh
 307 | 308,Kawardha,Chhattisgarh
 308 | 309,Kondagaon,Chhattisgarh
 309 | 310,Korba,Chhattisgarh
 310 | 311,Mahasamund,Chhattisgarh
 311 | 312,Mahendragarh,Chhattisgarh
 312 | 313,Mungeli,Chhattisgarh
 313 | 314,Naila Janjgir,Chhattisgarh
 314 | 315,Raigarh,Chhattisgarh
 315 | 316,Raipur,Chhattisgarh
 316 | 317,Rajnandgaon,Chhattisgarh
 317 | 318,Sakti,Chhattisgarh
 318 | 319,Tilda Newra,Chhattisgarh
 319 | 320,Amli,Dadra & Nagar Haveli
 320 | 321,Silvassa,Dadra & Nagar Haveli
 321 | 322,Daman and Diu,Daman & Diu
 322 | 323,Daman and Diu,Daman & Diu
 323 | 324,Asola,Delhi
 324 | 325,Delhi,Delhi
 325 | 326,Aldona,Goa
 326 | 327,Curchorem Cacora,Goa
 327 | 328,Madgaon,Goa
 328 | 329,Mapusa,Goa
 329 | 330,Margao,Goa
 330 | 331,Marmagao,Goa
 331 | 332,Panaji,Goa
 332 | 333,Ahmedabad,Gujarat
 333 | 334,Amreli,Gujarat
 334 | 335,Anand,Gujarat
 335 | 336,Ankleshwar,Gujarat
 336 | 337,Bharuch,Gujarat
 337 | 338,Bhavnagar,Gujarat
 338 | 339,Bhuj,Gujarat
 339 | 340,Cambay,Gujarat
 340 | 341,Dahod,Gujarat
 341 | 342,Deesa,Gujarat
 342 | 343,Dharampur, India
 343 | 344,Dholka,Gujarat
 344 | 345,Gandhinagar,Gujarat
 345 | 346,Godhra,Gujarat
 346 | 347,Himatnagar,Gujarat
 347 | 348,Idar,Gujarat
 348 | 349,Jamnagar,Gujarat
 349 | 350,Junagadh,Gujarat
 350 | 351,Kadi,Gujarat
 351 | 352,Kalavad,Gujarat
 352 | 353,Kalol,Gujarat
 353 | 354,Kapadvanj,Gujarat
 354 | 355,Karjan,Gujarat
 355 | 356,Keshod,Gujarat
 356 | 357,Khambhalia,Gujarat
 357 | 358,Khambhat,Gujarat
 358 | 359,Kheda,Gujarat
 359 | 360,Khedbrahma,Gujarat
 360 | 361,Kheralu,Gujarat
 361 | 362,Kodinar,Gujarat
 362 | 363,Lathi,Gujarat
 363 | 364,Limbdi,Gujarat
 364 | 365,Lunawada,Gujarat
 365 | 366,Mahesana,Gujarat
 366 | 367,Mahuva,Gujarat
 367 | 368,Manavadar,Gujarat
 368 | 369,Mandvi,Gujarat
 369 | 370,Mangrol,Gujarat
 370 | 371,Mansa,Gujarat
 371 | 372,Mehmedabad,Gujarat
 372 | 373,Modasa,Gujarat
 373 | 374,Morvi,Gujarat
 374 | 375,Nadiad,Gujarat
 375 | 376,Navsari,Gujarat
 376 | 377,Padra,Gujarat
 377 | 378,Palanpur,Gujarat
 378 | 379,Palitana,Gujarat
 379 | 380,Pardi,Gujarat
 380 | 381,Patan,Gujarat
 381 | 382,Petlad,Gujarat
 382 | 383,Porbandar,Gujarat
 383 | 384,Radhanpur,Gujarat
 384 | 385,Rajkot,Gujarat
 385 | 386,Rajpipla,Gujarat
 386 | 387,Rajula,Gujarat
 387 | 388,Ranavav,Gujarat
 388 | 389,Rapar,Gujarat
 389 | 390,Salaya,Gujarat
 390 | 391,Sanand,Gujarat
 391 | 392,Savarkundla,Gujarat
 392 | 393,Sidhpur,Gujarat
 393 | 394,Sihor,Gujarat
 394 | 395,Songadh,Gujarat
 395 | 396,Surat,Gujarat
 396 | 397,Talaja,Gujarat
 397 | 398,Thangadh,Gujarat
 398 | 399,Tharad,Gujarat
 399 | 400,Umbergaon,Gujarat
 400 | 401,Umreth,Gujarat
 401 | 402,Una,Gujarat
 402 | 403,Unjha,Gujarat
 403 | 404,Upleta,Gujarat
 404 | 405,Vadnagar,Gujarat
 405 | 406,Vadodara,Gujarat
 406 | 407,Valsad,Gujarat
 407 | 408,Vapi,Gujarat
 408 | 409,Vapi,Gujarat
 409 | 410,Veraval,Gujarat
 410 | 411,Vijapur,Gujarat
 411 | 412,Viramgam,Gujarat
 412 | 413,Visnagar,Gujarat
 413 | 414,Vyara,Gujarat
 414 | 415,Wadhwan,Gujarat
 415 | 416,Wankaner,Gujarat
 416 | 417,Adalaj,Gujrat
 417 | 418,Adityana,Gujrat
 418 | 419,Alang,Gujrat
 419 | 420,Ambaji,Gujrat
 420 | 421,Ambaliyasan,Gujrat
 421 | 422,Andada,Gujrat
 422 | 423,Anjar,Gujrat
 423 | 424,Anklav,Gujrat
 424 | 425,Antaliya,Gujrat
 425 | 426,Arambhada,Gujrat
 426 | 427,Atul,Gujrat
 427 | 428,Ballabhgarh,Hariyana
 428 | 429,Ambala,Haryana
 429 | 430,Ambala,Haryana
 430 | 431,Asankhurd,Haryana
 431 | 432,Assandh,Haryana
 432 | 433,Ateli,Haryana
 433 | 434,Babiyal,Haryana
 434 | 435,Bahadurgarh,Haryana
 435 | 436,Barwala,Haryana
 436 | 437,Bhiwani,Haryana
 437 | 438,Charkhi Dadri,Haryana
 438 | 439,Cheeka,Haryana
 439 | 440,Ellenabad 2,Haryana
 440 | 441,Faridabad,Haryana
 441 | 442,Fatehabad,Haryana
 442 | 443,Ganaur,Haryana
 443 | 444,Gharaunda,Haryana
 444 | 445,Gohana,Haryana
 445 | 446,Gurgaon,Haryana
 446 | 447,Haibat(Yamuna Nagar),Haryana
 447 | 448,Hansi,Haryana
 448 | 449,Hisar,Haryana
 449 | 450,Hodal,Haryana
 450 | 451,Jhajjar,Haryana
 451 | 452,Jind,Haryana
 452 | 453,Kaithal,Haryana
 453 | 454,Kalan Wali,Haryana
 454 | 455,Kalka,Haryana
 455 | 456,Karnal,Haryana
 456 | 457,Ladwa,Haryana
 457 | 458,Mahendragarh,Haryana
 458 | 459,Mandi Dabwali,Haryana
 459 | 460,Narnaul,Haryana
 460 | 461,Narwana,Haryana
 461 | 462,Palwal,Haryana
 462 | 463,Panchkula,Haryana
 463 | 464,Panipat,Haryana
 464 | 465,Pehowa,Haryana
 465 | 466,Pinjore,Haryana
 466 | 467,Rania,Haryana
 467 | 468,Ratia,Haryana
 468 | 469,Rewari,Haryana
 469 | 470,Rohtak,Haryana
 470 | 471,Safidon,Haryana
 471 | 472,Samalkha,Haryana
 472 | 473,Shahbad,Haryana
 473 | 474,Sirsa,Haryana
 474 | 475,Sohna,Haryana
 475 | 476,Sonipat,Haryana
 476 | 477,Taraori,Haryana
 477 | 478,Thanesar,Haryana
 478 | 479,Tohana,Haryana
 479 | 480,Yamunanagar,Haryana
 480 | 481,Arki,Himachal Pradesh
 481 | 482,Baddi,Himachal Pradesh
 482 | 483,Bilaspur,Himachal Pradesh
 483 | 484,Chamba,Himachal Pradesh
 484 | 485,Dalhousie,Himachal Pradesh
 485 | 486,Dharamsala,Himachal Pradesh
 486 | 487,Hamirpur,Himachal Pradesh
 487 | 488,Mandi,Himachal Pradesh
 488 | 489,Nahan,Himachal Pradesh
 489 | 490,Shimla,Himachal Pradesh
 490 | 491,Solan,Himachal Pradesh
 491 | 492,Sundarnagar,Himachal Pradesh
 492 | 493,Jammu,Jammu & Kashmir
 493 | 494,Achabbal,Jammu & Kashmir
 494 | 495,Akhnoor,Jammu & Kashmir
 495 | 496,Anantnag,Jammu & Kashmir
 496 | 497,Arnia,Jammu & Kashmir
 497 | 498,Awantipora,Jammu & Kashmir
 498 | 499,Bandipore,Jammu & Kashmir
 499 | 500,Baramula,Jammu & Kashmir
 500 | 501,Kathua,Jammu & Kashmir
 501 | 502,Leh,Jammu & Kashmir
 502 | 503,Punch,Jammu & Kashmir
 503 | 504,Rajauri,Jammu & Kashmir
 504 | 505,Sopore,Jammu & Kashmir
 505 | 506,Srinagar,Jammu & Kashmir
 506 | 507,Udhampur,Jammu & Kashmir
 507 | 508,Amlabad,Jharkhand
 508 | 509,Ara,Jharkhand
 509 | 510,Barughutu,Jharkhand
 510 | 511,Bokaro Steel City,Jharkhand
 511 | 512,Chaibasa,Jharkhand
 512 | 513,Chakradharpur,Jharkhand
 513 | 514,Chandrapura,Jharkhand
 514 | 515,Chatra,Jharkhand
 515 | 516,Chirkunda,Jharkhand
 516 | 517,Churi,Jharkhand
 517 | 518,Daltonganj,Jharkhand
 518 | 519,Deoghar,Jharkhand
 519 | 520,Dhanbad,Jharkhand
 520 | 521,Dumka,Jharkhand
 521 | 522,Garhwa,Jharkhand
 522 | 523,Ghatshila,Jharkhand
 523 | 524,Giridih,Jharkhand
 524 | 525,Godda,Jharkhand
 525 | 526,Gomoh,Jharkhand
 526 | 527,Gumia,Jharkhand
 527 | 528,Gumla,Jharkhand
 528 | 529,Hazaribag,Jharkhand
 529 | 530,Hussainabad,Jharkhand
 530 | 531,Jamshedpur,Jharkhand
 531 | 532,Jamtara,Jharkhand
 532 | 533,Jhumri Tilaiya,Jharkhand
 533 | 534,Khunti,Jharkhand
 534 | 535,Lohardaga,Jharkhand
 535 | 536,Madhupur,Jharkhand
 536 | 537,Mihijam,Jharkhand
 537 | 538,Musabani,Jharkhand
 538 | 539,Pakaur,Jharkhand
 539 | 540,Patratu,Jharkhand
 540 | 541,Phusro,Jharkhand
 541 | 542,Ramngarh,Jharkhand
 542 | 543,Ranchi,Jharkhand
 543 | 544,Sahibganj,Jharkhand
 544 | 545,Saunda,Jharkhand
 545 | 546,Simdega,Jharkhand
 546 | 547,Tenu Dam-cum- Kathhara,Jharkhand
 547 | 548,Arasikere,Karnataka
 548 | 549,Bangalore,Karnataka
 549 | 550,Belgaum,Karnataka
 550 | 551,Bellary,Karnataka
 551 | 552,Chamrajnagar,Karnataka
 552 | 553,Chikkaballapur,Karnataka
 553 | 554,Chintamani,Karnataka
 554 | 555,Chitradurga,Karnataka
 555 | 556,Gulbarga,Karnataka
 556 | 557,Gundlupet,Karnataka
 557 | 558,Hassan,Karnataka
 558 | 559,Hospet,Karnataka
 559 | 560,Hubli,Karnataka
 560 | 561,Karkala,Karnataka
 561 | 562,Karwar,Karnataka
 562 | 563,Kolar,Karnataka
 563 | 564,Kota,Karnataka
 564 | 565,Lakshmeshwar,Karnataka
 565 | 566,Lingsugur,Karnataka
 566 | 567,Maddur,Karnataka
 567 | 568,Madhugiri,Karnataka
 568 | 569,Madikeri,Karnataka
 569 | 570,Magadi,Karnataka
 570 | 571,Mahalingpur,Karnataka
 571 | 572,Malavalli,Karnataka
 572 | 573,Malur,Karnataka
 573 | 574,Mandya,Karnataka
 574 | 575,Mangalore,Karnataka
 575 | 576,Manvi,Karnataka
 576 | 577,Mudalgi,Karnataka
 577 | 578,Mudbidri,Karnataka
 578 | 579,Muddebihal,Karnataka
 579 | 580,Mudhol,Karnataka
 580 | 581,Mulbagal,Karnataka
 581 | 582,Mundargi,Karnataka
 582 | 583,Mysore,Karnataka
 583 | 584,Nanjangud,Karnataka
 584 | 585,Pavagada,Karnataka
 585 | 586,Puttur,Karnataka
 586 | 587,Rabkavi Banhatti,Karnataka
 587 | 588,Raichur,Karnataka
 588 | 589,Ramanagaram,Karnataka
 589 | 590,Ramdurg,Karnataka
 590 | 591,Ranibennur,Karnataka
 591 | 592,Robertson Pet,Karnataka
 592 | 593,Ron,Karnataka
 593 | 594,Sadalgi,Karnataka
 594 | 595,Sagar,Karnataka
 595 | 596,Sakleshpur,Karnataka
 596 | 597,Sandur,Karnataka
 597 | 598,Sankeshwar,Karnataka
 598 | 599,Saundatti-Yellamma,Karnataka
 599 | 600,Savanur,Karnataka
 600 | 601,Sedam,Karnataka
 601 | 602,Shahabad,Karnataka
 602 | 603,Shahpur,Karnataka
 603 | 604,Shiggaon,Karnataka
 604 | 605,Shikapur,Karnataka
 605 | 606,Shimoga,Karnataka
 606 | 607,Shorapur,Karnataka
 607 | 608,Shrirangapattana,Karnataka
 608 | 609,Sidlaghatta,Karnataka
 609 | 610,Sindgi,Karnataka
 610 | 611,Sindhnur,Karnataka
 611 | 612,Sira,Karnataka
 612 | 613,Sirsi,Karnataka
 613 | 614,Siruguppa,Karnataka
 614 | 615,Srinivaspur,Karnataka
 615 | 616,Talikota,Karnataka
 616 | 617,Tarikere,Karnataka
 617 | 618,Tekkalakota,Karnataka
 618 | 619,Terdal,Karnataka
 619 | 620,Tiptur,Karnataka
 620 | 621,Tumkur,Karnataka
 621 | 622,Udupi,Karnataka
 622 | 623,Vijayapura,Karnataka
 623 | 624,Wadi,Karnataka
 624 | 625,Yadgir,Karnataka
 625 | 626,Adoor,Kerala
 626 | 627,Akathiyoor,Kerala
 627 | 628,Alappuzha,Kerala
 628 | 629,Ancharakandy,Kerala
 629 | 630,Aroor,Kerala
 630 | 631,Ashtamichira,Kerala
 631 | 632,Attingal,Kerala
 632 | 633,Avinissery,Kerala
 633 | 634,Chalakudy,Kerala
 634 | 635,Changanassery,Kerala
 635 | 636,Chendamangalam,Kerala
 636 | 637,Chengannur,Kerala
 637 | 638,Cherthala,Kerala
 638 | 639,Cheruthazham,Kerala
 639 | 640,Chittur-Thathamangalam,Kerala
 640 | 641,Chockli,Kerala
 641 | 642,Erattupetta,Kerala
 642 | 643,Guruvayoor,Kerala
 643 | 644,Irinjalakuda,Kerala
 644 | 645,Kadirur,Kerala
 645 | 646,Kalliasseri,Kerala
 646 | 647,Kalpetta,Kerala
 647 | 648,Kanhangad,Kerala
 648 | 649,Kanjikkuzhi,Kerala
 649 | 650,Kannur,Kerala
 650 | 651,Kasaragod,Kerala
 651 | 652,Kayamkulam,Kerala
 652 | 653,Kochi,Kerala
 653 | 654,Kodungallur,Kerala
 654 | 655,Kollam,Kerala
 655 | 656,Koothuparamba,Kerala
 656 | 657,Kothamangalam,Kerala
 657 | 658,Kottayam,Kerala
 658 | 659,Kozhikode,Kerala
 659 | 660,Kunnamkulam,Kerala
 660 | 661,Malappuram,Kerala
 661 | 662,Mattannur,Kerala
 662 | 663,Mavelikkara,Kerala
 663 | 664,Mavoor,Kerala
 664 | 665,Muvattupuzha,Kerala
 665 | 666,Nedumangad,Kerala
 666 | 667,Neyyattinkara,Kerala
 667 | 668,Ottappalam,Kerala
 668 | 669,Palai,Kerala
 669 | 670,Palakkad,Kerala
 670 | 671,Panniyannur,Kerala
 671 | 672,Pappinisseri,Kerala
 672 | 673,Paravoor,Kerala
 673 | 674,Pathanamthitta,Kerala
 674 | 675,Payyannur,Kerala
 675 | 676,Peringathur,Kerala
 676 | 677,Perinthalmanna,Kerala
 677 | 678,Perumbavoor,Kerala
 678 | 679,Ponnani,Kerala
 679 | 680,Punalur,Kerala
 680 | 681,Quilandy,Kerala
 681 | 682,Shoranur,Kerala
 682 | 683,Taliparamba,Kerala
 683 | 684,Thiruvalla,Kerala
 684 | 685,Thiruvananthapuram,Kerala
 685 | 686,Thodupuzha,Kerala
 686 | 687,Thrissur,Kerala
 687 | 688,Tirur,Kerala
 688 | 689,Vadakara,Kerala
 689 | 690,Vaikom,Kerala
 690 | 691,Varkala,Kerala
 691 | 692,Kavaratti,Lakshadweep
 692 | 693,Ashok Nagar,Madhya Pradesh
 693 | 694,Balaghat,Madhya Pradesh
 694 | 695,Betul,Madhya Pradesh
 695 | 696,Bhopal,Madhya Pradesh
 696 | 697,Burhanpur,Madhya Pradesh
 697 | 698,Chhatarpur,Madhya Pradesh
 698 | 699,Dabra,Madhya Pradesh
 699 | 700,Datia,Madhya Pradesh
 700 | 701,Dewas,Madhya Pradesh
 701 | 702,Dhar,Madhya Pradesh
 702 | 703,Fatehabad,Madhya Pradesh
 703 | 704,Gwalior,Madhya Pradesh
 704 | 705,Indore,Madhya Pradesh
 705 | 706,Itarsi,Madhya Pradesh
 706 | 707,Jabalpur,Madhya Pradesh
 707 | 708,Katni,Madhya Pradesh
 708 | 709,Kotma,Madhya Pradesh
 709 | 710,Lahar,Madhya Pradesh
 710 | 711,Lundi,Madhya Pradesh
 711 | 712,Maharajpur,Madhya Pradesh
 712 | 713,Mahidpur,Madhya Pradesh
 713 | 714,Maihar,Madhya Pradesh
 714 | 715,Malajkhand,Madhya Pradesh
 715 | 716,Manasa,Madhya Pradesh
 716 | 717,Manawar,Madhya Pradesh
 717 | 718,Mandideep,Madhya Pradesh
 718 | 719,Mandla,Madhya Pradesh
 719 | 720,Mandsaur,Madhya Pradesh
 720 | 721,Mauganj,Madhya Pradesh
 721 | 722,Mhow Cantonment,Madhya Pradesh
 722 | 723,Mhowgaon,Madhya Pradesh
 723 | 724,Morena,Madhya Pradesh
 724 | 725,Multai,Madhya Pradesh
 725 | 726,Murwara,Madhya Pradesh
 726 | 727,Nagda,Madhya Pradesh
 727 | 728,Nainpur,Madhya Pradesh
 728 | 729,Narsinghgarh,Madhya Pradesh
 729 | 730,Narsinghgarh,Madhya Pradesh
 730 | 731,Neemuch,Madhya Pradesh
 731 | 732,Nepanagar,Madhya Pradesh
 732 | 733,Niwari,Madhya Pradesh
 733 | 734,Nowgong,Madhya Pradesh
 734 | 735,Nowrozabad,Madhya Pradesh
 735 | 736,Pachore,Madhya Pradesh
 736 | 737,Pali,Madhya Pradesh
 737 | 738,Panagar,Madhya Pradesh
 738 | 739,Pandhurna,Madhya Pradesh
 739 | 740,Panna,Madhya Pradesh
 740 | 741,Pasan,Madhya Pradesh
 741 | 742,Pipariya,Madhya Pradesh
 742 | 743,Pithampur,Madhya Pradesh
 743 | 744,Porsa,Madhya Pradesh
 744 | 745,Prithvipur,Madhya Pradesh
 745 | 746,Raghogarh-Vijaypur,Madhya Pradesh
 746 | 747,Rahatgarh,Madhya Pradesh
 747 | 748,Raisen,Madhya Pradesh
 748 | 749,Rajgarh,Madhya Pradesh
 749 | 750,Ratlam,Madhya Pradesh
 750 | 751,Rau,Madhya Pradesh
 751 | 752,Rehli,Madhya Pradesh
 752 | 753,Rewa,Madhya Pradesh
 753 | 754,Sabalgarh,Madhya Pradesh
 754 | 755,Sagar,Madhya Pradesh
 755 | 756,Sanawad,Madhya Pradesh
 756 | 757,Sarangpur,Madhya Pradesh
 757 | 758,Sarni,Madhya Pradesh
 758 | 759,Satna,Madhya Pradesh
 759 | 760,Sausar,Madhya Pradesh
 760 | 761,Sehore,Madhya Pradesh
 761 | 762,Sendhwa,Madhya Pradesh
 762 | 763,Seoni,Madhya Pradesh
 763 | 764,Seoni-Malwa,Madhya Pradesh
 764 | 765,Shahdol,Madhya Pradesh
 765 | 766,Shajapur,Madhya Pradesh
 766 | 767,Shamgarh,Madhya Pradesh
 767 | 768,Sheopur,Madhya Pradesh
 768 | 769,Shivpuri,Madhya Pradesh
 769 | 770,Shujalpur,Madhya Pradesh
 770 | 771,Sidhi,Madhya Pradesh
 771 | 772,Sihora,Madhya Pradesh
 772 | 773,Singrauli,Madhya Pradesh
 773 | 774,Sironj,Madhya Pradesh
 774 | 775,Sohagpur,Madhya Pradesh
 775 | 776,Tarana,Madhya Pradesh
 776 | 777,Tikamgarh,Madhya Pradesh
 777 | 778,Ujhani,Madhya Pradesh
 778 | 779,Ujjain,Madhya Pradesh
 779 | 780,Umaria,Madhya Pradesh
 780 | 781,Vidisha,Madhya Pradesh
 781 | 782,Wara Seoni,Madhya Pradesh
 782 | 783,Ahmednagar,Maharashtra
 783 | 784,Akola,Maharashtra
 784 | 785,Amravati,Maharashtra
 785 | 786,Aurangabad,Maharashtra
 786 | 787,Baramati,Maharashtra
 787 | 788,Chalisgaon,Maharashtra
 788 | 789,Chinchani,Maharashtra
 789 | 790,Devgarh,Maharashtra
 790 | 791,Dhule,Maharashtra
 791 | 792,Dombivli,Maharashtra
 792 | 793,Durgapur,Maharashtra
 793 | 794,Ichalkaranji,Maharashtra
 794 | 795,Jalna,Maharashtra
 795 | 796,Kalyan,Maharashtra
 796 | 797,Latur,Maharashtra
 797 | 798,Loha,Maharashtra
 798 | 799,Lonar,Maharashtra
 799 | 800,Lonavla,Maharashtra
 800 | 801,Mahad,Maharashtra
 801 | 802,Mahuli,Maharashtra
 802 | 803,Malegaon,Maharashtra
 803 | 804,Malkapur,Maharashtra
 804 | 805,Manchar,Maharashtra
 805 | 806,Mangalvedhe,Maharashtra
 806 | 807,Mangrulpir,Maharashtra
 807 | 808,Manjlegaon,Maharashtra
 808 | 809,Manmad,Maharashtra
 809 | 810,Manwath,Maharashtra
 810 | 811,Mehkar,Maharashtra
 811 | 812,Mhaswad,Maharashtra
 812 | 813,Miraj,Maharashtra
 813 | 814,Morshi,Maharashtra
 814 | 815,Mukhed,Maharashtra
 815 | 816,Mul,Maharashtra
 816 | 817,Mumbai,Maharashtra
 817 | 818,Murtijapur,Maharashtra
 818 | 819,Nagpur,Maharashtra
 819 | 820,Nalasopara,Maharashtra
 820 | 821,Nanded-Waghala,Maharashtra
 821 | 822,Nandgaon,Maharashtra
 822 | 823,Nandura,Maharashtra
 823 | 824,Nandurbar,Maharashtra
 824 | 825,Narkhed,Maharashtra
 825 | 826,Nashik,Maharashtra
 826 | 827,Navi Mumbai,Maharashtra
 827 | 828,Nawapur,Maharashtra
 828 | 829,Nilanga,Maharashtra
 829 | 830,Osmanabad,Maharashtra
 830 | 831,Ozar,Maharashtra
 831 | 832,Pachora,Maharashtra
 832 | 833,Paithan,Maharashtra
 833 | 834,Palghar,Maharashtra
 834 | 835,Pandharkaoda,Maharashtra
 835 | 836,Pandharpur,Maharashtra
 836 | 837,Panvel,Maharashtra
 837 | 838,Parbhani,Maharashtra
 838 | 839,Parli,Maharashtra
 839 | 840,Parola,Maharashtra
 840 | 841,Partur,Maharashtra
 841 | 842,Pathardi,Maharashtra
 842 | 843,Pathri,Maharashtra
 843 | 844,Patur,Maharashtra
 844 | 845,Pauni,Maharashtra
 845 | 846,Pen,Maharashtra
 846 | 847,Phaltan,Maharashtra
 847 | 848,Pulgaon,Maharashtra
 848 | 849,Pune,Maharashtra
 849 | 850,Purna,Maharashtra
 850 | 851,Pusad,Maharashtra
 851 | 852,Rahuri,Maharashtra
 852 | 853,Rajura,Maharashtra
 853 | 854,Ramtek,Maharashtra
 854 | 855,Ratnagiri,Maharashtra
 855 | 856,Raver,Maharashtra
 856 | 857,Risod,Maharashtra
 857 | 858,Sailu,Maharashtra
 858 | 859,Sangamner,Maharashtra
 859 | 860,Sangli,Maharashtra
 860 | 861,Sangole,Maharashtra
 861 | 862,Sasvad,Maharashtra
 862 | 863,Satana,Maharashtra
 863 | 864,Satara,Maharashtra
 864 | 865,Savner,Maharashtra
 865 | 866,Sawantwadi,Maharashtra
 866 | 867,Shahade,Maharashtra
 867 | 868,Shegaon,Maharashtra
 868 | 869,Shendurjana,Maharashtra
 869 | 870,Shirdi,Maharashtra
 870 | 871,Shirpur-Warwade,Maharashtra
 871 | 872,Shirur,Maharashtra
 872 | 873,Shrigonda,Maharashtra
 873 | 874,Shrirampur,Maharashtra
 874 | 875,Sillod,Maharashtra
 875 | 876,Sinnar,Maharashtra
 876 | 877,Solapur,Maharashtra
 877 | 878,Soyagaon,Maharashtra
 878 | 879,Talegaon Dabhade,Maharashtra
 879 | 880,Talode,Maharashtra
 880 | 881,Tasgaon,Maharashtra
 881 | 882,Tirora,Maharashtra
 882 | 883,Tuljapur,Maharashtra
 883 | 884,Tumsar,Maharashtra
 884 | 885,Uran,Maharashtra
 885 | 886,Uran Islampur,Maharashtra
 886 | 887,Wadgaon Road,Maharashtra
 887 | 888,Wai,Maharashtra
 888 | 889,Wani,Maharashtra
 889 | 890,Wardha,Maharashtra
 890 | 891,Warora,Maharashtra
 891 | 892,Warud,Maharashtra
 892 | 893,Washim,Maharashtra
 893 | 894,Yevla,Maharashtra
 894 | 895,Uchgaon,Maharashtra
 895 | 896,Udgir,Maharashtra
 896 | 897,Umarga,Maharastra
 897 | 898,Umarkhed,Maharastra
 898 | 899,Umred,Maharastra
 899 | 900,Vadgaon Kasba,Maharastra
 900 | 901,Vaijapur,Maharastra
 901 | 902,Vasai,Maharastra
 902 | 903,Virar,Maharastra
 903 | 904,Vita,Maharastra
 904 | 905,Yavatmal,Maharastra
 905 | 906,Yawal,Maharastra
 906 | 907,Imphal,Manipur
 907 | 908,Kakching,Manipur
 908 | 909,Lilong,Manipur
 909 | 910,Mayang Imphal,Manipur
 910 | 911,Thoubal,Manipur
 911 | 912,Jowai,Meghalaya
 912 | 913,Nongstoin,Meghalaya
 913 | 914,Shillong,Meghalaya
 914 | 915,Tura,Meghalaya
 915 | 916,Aizawl,Mizoram
 916 | 917,Champhai,Mizoram
 917 | 918,Lunglei,Mizoram
 918 | 919,Saiha,Mizoram
 919 | 920,Dimapur,Nagaland
 920 | 921,Kohima,Nagaland
 921 | 922,Mokokchung,Nagaland
 922 | 923,Tuensang,Nagaland
 923 | 924,Wokha,Nagaland
 924 | 925,Zunheboto,Nagaland
 925 | 950,Anandapur,Orissa
 926 | 951,Anugul,Orissa
 927 | 952,Asika,Orissa
 928 | 953,Balangir,Orissa
 929 | 954,Balasore,Orissa
 930 | 955,Baleshwar,Orissa
 931 | 956,Bamra,Orissa
 932 | 957,Barbil,Orissa
 933 | 958,Bargarh,Orissa
 934 | 959,Bargarh,Orissa
 935 | 960,Baripada,Orissa
 936 | 961,Basudebpur,Orissa
 937 | 962,Belpahar,Orissa
 938 | 963,Bhadrak,Orissa
 939 | 964,Bhawanipatna,Orissa
 940 | 965,Bhuban,Orissa
 941 | 966,Bhubaneswar,Orissa
 942 | 967,Biramitrapur,Orissa
 943 | 968,Brahmapur,Orissa
 944 | 969,Brajrajnagar,Orissa
 945 | 970,Byasanagar,Orissa
 946 | 971,Cuttack,Orissa
 947 | 972,Debagarh,Orissa
 948 | 973,Dhenkanal,Orissa
 949 | 974,Gunupur,Orissa
 950 | 975,Hinjilicut,Orissa
 951 | 976,Jagatsinghapur,Orissa
 952 | 977,Jajapur,Orissa
 953 | 978,Jaleswar,Orissa
 954 | 979,Jatani,Orissa
 955 | 980,Jeypur,Orissa
 956 | 981,Jharsuguda,Orissa
 957 | 982,Joda,Orissa
 958 | 983,Kantabanji,Orissa
 959 | 984,Karanjia,Orissa
 960 | 985,Kendrapara,Orissa
 961 | 986,Kendujhar,Orissa
 962 | 987,Khordha,Orissa
 963 | 988,Koraput,Orissa
 964 | 989,Malkangiri,Orissa
 965 | 990,Nabarangapur,Orissa
 966 | 991,Paradip,Orissa
 967 | 992,Parlakhemundi,Orissa
 968 | 993,Pattamundai,Orissa
 969 | 994,Phulabani,Orissa
 970 | 995,Puri,Orissa
 971 | 996,Rairangpur,Orissa
 972 | 997,Rajagangapur,Orissa
 973 | 998,Raurkela,Orissa
 974 | 999,Rayagada,Orissa
 975 | 1000,Sambalpur,Orissa
 976 | 1001,Soro,Orissa
 977 | 1002,Sunabeda,Orissa
 978 | 1003,Sundargarh,Orissa
 979 | 1004,Talcher,Orissa
 980 | 1005,Titlagarh,Orissa
 981 | 1006,Umarkote,Orissa
 982 | 1007,Karaikal,Pondicherry
 983 | 1008,Mahe,Pondicherry
 984 | 1009,Pondicherry,Pondicherry
 985 | 1010,Yanam,Pondicherry
 986 | 1011,Ahmedgarh,Punjab
 987 | 1012,Amritsar,Punjab
 988 | 1013,Barnala,Punjab
 989 | 1014,Batala,Punjab
 990 | 1015,Bathinda,Punjab
 991 | 1016,Bhagha Purana,Punjab
 992 | 1017,Budhlada,Punjab
 993 | 1018,Chandigarh,Punjab
 994 | 1019,Dasua,Punjab
 995 | 1020,Dhuri,Punjab
 996 | 1021,Dinanagar,Punjab
 997 | 1022,Faridkot,Punjab
 998 | 1023,Fazilka,Punjab
 999 | 1024,Firozpur,Punjab
1000 | 1025,Firozpur Cantt.,Punjab
1001 | 1026,Giddarbaha,Punjab
1002 | 1027,Gobindgarh,Punjab
1003 | 1028,Gurdaspur,Punjab
1004 | 1029,Hoshiarpur,Punjab
1005 | 1030,Jagraon,Punjab
1006 | 1031,Jaitu,Punjab
1007 | 1032,Jalalabad,Punjab
1008 | 1033,Jalandhar,Punjab
1009 | 1034,Jalandhar Cantt.,Punjab
1010 | 1035,Jandiala,Punjab
1011 | 1036,Kapurthala,Punjab
1012 | 1037,Karoran,Punjab
1013 | 1038,Kartarpur,Punjab
1014 | 1039,Khanna,Punjab
1015 | 1040,Kharar,Punjab
1016 | 1041,Kot Kapura,Punjab
1017 | 1042,Kurali,Punjab
1018 | 1043,Longowal,Punjab
1019 | 1044,Ludhiana,Punjab
1020 | 1045,Malerkotla,Punjab
1021 | 1046,Malout,Punjab
1022 | 1047,Mansa,Punjab
1023 | 1048,Maur,Punjab
1024 | 1049,Moga,Punjab
1025 | 1050,Mohali,Punjab
1026 | 1051,Morinda,Punjab
1027 | 1052,Mukerian,Punjab
1028 | 1053,Muktsar,Punjab
1029 | 1054,Nabha,Punjab
1030 | 1055,Nakodar,Punjab
1031 | 1056,Nangal,Punjab
1032 | 1057,Nawanshahr,Punjab
1033 | 1058,Pathankot,Punjab
1034 | 1059,Patiala,Punjab
1035 | 1060,Patran,Punjab
1036 | 1061,Patti,Punjab
1037 | 1062,Phagwara,Punjab
1038 | 1063,Phillaur,Punjab
1039 | 1064,Qadian,Punjab
1040 | 1065,Raikot,Punjab
1041 | 1066,Rajpura,Punjab
1042 | 1067,Rampura Phul,Punjab
1043 | 1068,Rupnagar,Punjab
1044 | 1069,Samana,Punjab
1045 | 1070,Sangrur,Punjab
1046 | 1071,Sirhind Fatehgarh Sahib,Punjab
1047 | 1072,Sujanpur,Punjab
1048 | 1073,Sunam,Punjab
1049 | 1074,Talwara,Punjab
1050 | 1075,Tarn Taran,Punjab
1051 | 1076,Urmar Tanda,Punjab
1052 | 1077,Zira,Punjab
1053 | 1078,Zirakpur,Punjab
1054 | 1079,Bali,Rajasthan
1055 | 1080,Banswara,Rajastan
1056 | 1081,Ajmer,Rajasthan
1057 | 1082,Alwar,Rajasthan
1058 | 1083,Bandikui,Rajasthan
1059 | 1084,Baran,Rajasthan
1060 | 1085,Barmer,Rajasthan
1061 | 1086,Bikaner,Rajasthan
1062 | 1087,Fatehpur,Rajasthan
1063 | 1088,Jaipur,Rajasthan
1064 | 1089,Jaisalmer,Rajasthan
1065 | 1090,Jodhpur,Rajasthan
1066 | 1091,Kota,Rajasthan
1067 | 1092,Lachhmangarh,Rajasthan
1068 | 1093,Ladnu,Rajasthan
1069 | 1094,Lakheri,Rajasthan
1070 | 1095,Lalsot,Rajasthan
1071 | 1096,Losal,Rajasthan
1072 | 1097,Makrana,Rajasthan
1073 | 1098,Malpura,Rajasthan
1074 | 1099,Mandalgarh,Rajasthan
1075 | 1100,Mandawa,Rajasthan
1076 | 1101,Mangrol,Rajasthan
1077 | 1102,Merta City,Rajasthan
1078 | 1103,Mount Abu,Rajasthan
1079 | 1104,Nadbai,Rajasthan
1080 | 1105,Nagar,Rajasthan
1081 | 1106,Nagaur,Rajasthan
1082 | 1107,Nargund,Rajasthan
1083 | 1108,Nasirabad,Rajasthan
1084 | 1109,Nathdwara,Rajasthan
1085 | 1110,Navalgund,Rajasthan
1086 | 1111,Nawalgarh,Rajasthan
1087 | 1112,Neem-Ka-Thana,Rajasthan
1088 | 1113,Nelamangala,Rajasthan
1089 | 1114,Nimbahera,Rajasthan
1090 | 1115,Nipani,Rajasthan
1091 | 1116,Niwai,Rajasthan
1092 | 1117,Nohar,Rajasthan
1093 | 1118,Nokha,Rajasthan
1094 | 1119,Pali,Rajasthan
1095 | 1120,Phalodi,Rajasthan
1096 | 1121,Phulera,Rajasthan
1097 | 1122,Pilani,Rajasthan
1098 | 1123,Pilibanga,Rajasthan
1099 | 1124,Pindwara,Rajasthan
1100 | 1125,Pipar City,Rajasthan
1101 | 1126,Prantij,Rajasthan
1102 | 1127,Pratapgarh,Rajasthan
1103 | 1128,Raisinghnagar,Rajasthan
1104 | 1129,Rajakhera,Rajasthan
1105 | 1130,Rajaldesar,Rajasthan
1106 | 1131,Rajgarh (Alwar),Rajasthan
1107 | 1132,Rajgarh (Churu,Rajasthan
1108 | 1133,Rajsamand,Rajasthan
1109 | 1134,Ramganj Mandi,Rajasthan
1110 | 1135,Ramngarh,Rajasthan
1111 | 1136,Ratangarh,Rajasthan
1112 | 1137,Rawatbhata,Rajasthan
1113 | 1138,Rawatsar,Rajasthan
1114 | 1139,Reengus,Rajasthan
1115 | 1140,Sadri,Rajasthan
1116 | 1141,Sadulshahar,Rajasthan
1117 | 1142,Sagwara,Rajasthan
1118 | 1143,Sambhar,Rajasthan
1119 | 1144,Sanchore,Rajasthan
1120 | 1145,Sangaria,Rajasthan
1121 | 1146,Sardarshahar,Rajasthan
1122 | 1147,Sawai Madhopur,Rajasthan
1123 | 1148,Shahpura,Rajasthan
1124 | 1149,Shahpura,Rajasthan
1125 | 1150,Sheoganj,Rajasthan
1126 | 1151,Sikar,Rajasthan
1127 | 1152,Sirohi,Rajasthan
1128 | 1153,Sojat,Rajasthan
1129 | 1154,Sri Madhopur,Rajasthan
1130 | 1155,Sujangarh,Rajasthan
1131 | 1156,Sumerpur,Rajasthan
1132 | 1157,Suratgarh,Rajasthan
1133 | 1158,Taranagar,Rajasthan
1134 | 1159,Todabhim,Rajasthan
1135 | 1160,Todaraisingh,Rajasthan
1136 | 1161,Tonk,Rajasthan
1137 | 1162,Udaipur,Rajasthan
1138 | 1163,Udaipurwati,Rajasthan
1139 | 1164,Vijainagar,Rajasthan
1140 | 1165,Gangtok,Sikkim
1141 | 1166,Calcutta,West Bengal
1142 | 1167,Arakkonam,Tamil Nadu
1143 | 1168,Arcot,Tamil Nadu
1144 | 1169,Aruppukkottai,Tamil Nadu
1145 | 1170,Bhavani,Tamil Nadu
1146 | 1171,Chengalpattu,Tamil Nadu
1147 | 1172,Chennai,Tamil Nadu
1148 | 1173,Chinna salem,Tamil nadu
1149 | 1174,Coimbatore,Tamil Nadu
1150 | 1175,Coonoor,Tamil Nadu
1151 | 1176,Cuddalore,Tamil Nadu
1152 | 1177,Dharmapuri,Tamil Nadu
1153 | 1178,Dindigul,Tamil Nadu
1154 | 1179,Erode,Tamil Nadu
1155 | 1180,Gudalur,Tamil Nadu
1156 | 1181,Gudalur,Tamil Nadu
1157 | 1182,Gudalur,Tamil Nadu
1158 | 1183,Kanchipuram,Tamil Nadu
1159 | 1184,Karaikudi,Tamil Nadu
1160 | 1185,Karungal,Tamil Nadu
1161 | 1186,Karur,Tamil Nadu
1162 | 1187,Kollankodu,Tamil Nadu
1163 | 1188,Lalgudi,Tamil Nadu
1164 | 1189,Madurai,Tamil Nadu
1165 | 1190,Nagapattinam,Tamil Nadu
1166 | 1191,Nagercoil,Tamil Nadu
1167 | 1192,Namagiripettai,Tamil Nadu
1168 | 1193,Namakkal,Tamil Nadu
1169 | 1194,Nandivaram-Guduvancheri,Tamil Nadu
1170 | 1195,Nanjikottai,Tamil Nadu
1171 | 1196,Natham,Tamil Nadu
1172 | 1197,Nellikuppam,Tamil Nadu
1173 | 1198,Neyveli,Tamil Nadu
1174 | 1199,O,
1175 | 1200,Oddanchatram,Tamil Nadu
1176 | 1201,P.N.Patti,Tamil Nadu
1177 | 1202,Pacode,Tamil Nadu
1178 | 1203,Padmanabhapuram,Tamil Nadu
1179 | 1204,Palani,Tamil Nadu
1180 | 1205,Palladam,Tamil Nadu
1181 | 1206,Pallapatti,Tamil Nadu
1182 | 1207,Pallikonda,Tamil Nadu
1183 | 1208,Panagudi,Tamil Nadu
1184 | 1209,Panruti,Tamil Nadu
1185 | 1210,Paramakudi,Tamil Nadu
1186 | 1211,Parangipettai,Tamil Nadu
1187 | 1212,Pattukkottai,Tamil Nadu
1188 | 1213,Perambalur,Tamil Nadu
1189 | 1214,Peravurani,Tamil Nadu
1190 | 1215,Periyakulam,Tamil Nadu
1191 | 1216,Periyasemur,Tamil Nadu
1192 | 1217,Pernampattu,Tamil Nadu
1193 | 1218,Pollachi,Tamil Nadu
1194 | 1219,Polur,Tamil Nadu
1195 | 1220,Ponneri,Tamil Nadu
1196 | 1221,Pudukkottai,Tamil Nadu
1197 | 1222,Pudupattinam,Tamil Nadu
1198 | 1223,Puliyankudi,Tamil Nadu
1199 | 1224,Punjaipugalur,Tamil Nadu
1200 | 1225,Rajapalayam,Tamil Nadu
1201 | 1226,Ramanathapuram,Tamil Nadu
1202 | 1227,Rameshwaram,Tamil Nadu
1203 | 1228,Rasipuram,Tamil Nadu
1204 | 1229,Salem,Tamil Nadu
1205 | 1230,Sankarankoil,Tamil Nadu
1206 | 1231,Sankari,Tamil Nadu
1207 | 1232,Sathyamangalam,Tamil Nadu
1208 | 1233,Sattur,Tamil Nadu
1209 | 1234,Shenkottai,Tamil Nadu
1210 | 1235,Sholavandan,Tamil Nadu
1211 | 1236,Sholingur,Tamil Nadu
1212 | 1237,Sirkali,Tamil Nadu
1213 | 1238,Sivaganga,Tamil Nadu
1214 | 1239,Sivagiri,Tamil Nadu
1215 | 1240,Sivakasi,Tamil Nadu
1216 | 1241,Srivilliputhur,Tamil Nadu
1217 | 1242,Surandai,Tamil Nadu
1218 | 1243,Suriyampalayam,Tamil Nadu
1219 | 1244,Tenkasi,Tamil Nadu
1220 | 1245,Thammampatti,Tamil Nadu
1221 | 1246,Thanjavur,Tamil Nadu
1222 | 1247,Tharamangalam,Tamil Nadu
1223 | 1248,Tharangambadi,Tamil Nadu
1224 | 1249,Theni Allinagaram,Tamil Nadu
1225 | 1250,Thirumangalam,Tamil Nadu
1226 | 1251,Thirunindravur,Tamil Nadu
1227 | 1252,Thiruparappu,Tamil Nadu
1228 | 1253,Thirupuvanam,Tamil Nadu
1229 | 1254,Thiruthuraipoondi,Tamil Nadu
1230 | 1255,Thiruvallur,Tamil Nadu
1231 | 1256,Thiruvarur,Tamil Nadu
1232 | 1257,Thoothukudi,Tamil Nadu
1233 | 1258,Thuraiyur,Tamil Nadu
1234 | 1259,Tindivanam,Tamil Nadu
1235 | 1260,Tiruchendur,Tamil Nadu
1236 | 1261,Tiruchengode,Tamil Nadu
1237 | 1262,Tiruchirappalli,Tamil Nadu
1238 | 1263,Tirukalukundram,Tamil Nadu
1239 | 1264,Tirukkoyilur,Tamil Nadu
1240 | 1265,Tirunelveli,Tamil Nadu
1241 | 1266,Tirupathur,Tamil Nadu
1242 | 1267,Tirupathur,Tamil Nadu
1243 | 1268,Tiruppur,Tamil Nadu
1244 | 1269,Tiruttani,Tamil Nadu
1245 | 1270,Tiruvannamalai,Tamil Nadu
1246 | 1271,Tiruvethipuram,Tamil Nadu
1247 | 1272,Tittakudi,Tamil Nadu
1248 | 1273,Udhagamandalam,Tamil Nadu
1249 | 1274,Udumalaipettai,Tamil Nadu
1250 | 1275,Unnamalaikadai,Tamil Nadu
1251 | 1276,Usilampatti,Tamil Nadu
1252 | 1277,Uthamapalayam,Tamil Nadu
1253 | 1278,Uthiramerur,Tamil Nadu
1254 | 1279,Vadakkuvalliyur,Tamil Nadu
1255 | 1280,Vadalur,Tamil Nadu
1256 | 1281,Vadipatti,Tamil Nadu
1257 | 1282,Valparai,Tamil Nadu
1258 | 1283,Vandavasi,Tamil Nadu
1259 | 1284,Vaniyambadi,Tamil Nadu
1260 | 1285,Vedaranyam,Tamil Nadu
1261 | 1286,Vellakoil,Tamil Nadu
1262 | 1287,Vellore,Tamil Nadu
1263 | 1288,Vikramasingapuram,Tamil Nadu
1264 | 1289,Viluppuram,Tamil Nadu
1265 | 1290,Virudhachalam,Tamil Nadu
1266 | 1291,Virudhunagar,Tamil Nadu
1267 | 1292,Viswanatham,Tamil Nadu
1268 | 1293,Agartala,Tripura
1269 | 1294,Badharghat,Tripura
1270 | 1295,Dharmanagar,Tripura
1271 | 1296,Indranagar,Tripura
1272 | 1297,Jogendranagar,Tripura
1273 | 1298,Kailasahar,Tripura
1274 | 1299,Khowai,Tripura
1275 | 1300,Pratapgarh,Tripura
1276 | 1301,Udaipur,Tripura
1277 | 1302,Achhnera,Uttar Pradesh
1278 | 1303,Adari,Uttar Pradesh
1279 | 1304,Agra,Uttar Pradesh
1280 | 1305,Aligarh,Uttar Pradesh
1281 | 1306,Allahabad,Uttar Pradesh
1282 | 1307,Amroha,Uttar Pradesh
1283 | 1308,Azamgarh,Uttar Pradesh
1284 | 1309,Bahraich,Uttar Pradesh
1285 | 1310,Ballia,Uttar Pradesh
1286 | 1311,Balrampur,Uttar Pradesh
1287 | 1312,Banda,Uttar Pradesh
1288 | 1313,Bareilly,Uttar Pradesh
1289 | 1314,Chandausi,Uttar Pradesh
1290 | 1315,Dadri,Uttar Pradesh
1291 | 1316,Deoria,Uttar Pradesh
1292 | 1317,Etawah,Uttar Pradesh
1293 | 1318,Fatehabad,Uttar Pradesh
1294 | 1319,Fatehpur,Uttar Pradesh
1295 | 1320,Fatehpur,Uttar Pradesh
1296 | 1321,Greater Noida,Uttar Pradesh
1297 | 1322,Hamirpur,Uttar Pradesh
1298 | 1323,Hardoi,Uttar Pradesh
1299 | 1324,Jajmau,Uttar Pradesh
1300 | 1325,Jaunpur,Uttar Pradesh
1301 | 1326,Jhansi,Uttar Pradesh
1302 | 1327,Kalpi,Uttar Pradesh
1303 | 1328,Kanpur,Uttar Pradesh
1304 | 1329,Kota,Uttar Pradesh
1305 | 1330,Laharpur,Uttar Pradesh
1306 | 1331,Lakhimpur,Uttar Pradesh
1307 | 1332,Lal Gopalganj Nindaura,Uttar Pradesh
1308 | 1333,Lalganj,Uttar Pradesh
1309 | 1334,Lalitpur,Uttar Pradesh
1310 | 1335,Lar,Uttar Pradesh
1311 | 1336,Loni,Uttar Pradesh
1312 | 1337,Lucknow,Uttar Pradesh
1313 | 1338,Mathura,Uttar Pradesh
1314 | 1339,Meerut,Uttar Pradesh
1315 | 1340,Modinagar,Uttar Pradesh
1316 | 1341,Muradnagar,Uttar Pradesh
1317 | 1342,Nagina,Uttar Pradesh
1318 | 1343,Najibabad,Uttar Pradesh
1319 | 1344,Nakur,Uttar Pradesh
1320 | 1345,Nanpara,Uttar Pradesh
1321 | 1346,Naraura,Uttar Pradesh
1322 | 1347,Naugawan Sadat,Uttar Pradesh
1323 | 1348,Nautanwa,Uttar Pradesh
1324 | 1349,Nawabganj,Uttar Pradesh
1325 | 1350,Nehtaur,Uttar Pradesh
1326 | 1351,NOIDA,Uttar Pradesh
1327 | 1352,Noorpur,Uttar Pradesh
1328 | 1353,Obra,Uttar Pradesh
1329 | 1354,Orai,Uttar Pradesh
1330 | 1355,Padrauna,Uttar Pradesh
1331 | 1356,Palia Kalan,Uttar Pradesh
1332 | 1357,Parasi,Uttar Pradesh
1333 | 1358,Phulpur,Uttar Pradesh
1334 | 1359,Pihani,Uttar Pradesh
1335 | 1360,Pilibhit,Uttar Pradesh
1336 | 1361,Pilkhuwa,Uttar Pradesh
1337 | 1362,Powayan,Uttar Pradesh
1338 | 1363,Pukhrayan,Uttar Pradesh
1339 | 1364,Puranpur,Uttar Pradesh
1340 | 1365,Purquazi,Uttar Pradesh
1341 | 1366,Purwa,Uttar Pradesh
1342 | 1367,Rae Bareli,Uttar Pradesh
1343 | 1368,Rampur,Uttar Pradesh
1344 | 1369,Rampur Maniharan,Uttar Pradesh
1345 | 1370,Rasra,Uttar Pradesh
1346 | 1371,Rath,Uttar Pradesh
1347 | 1372,Renukoot,Uttar Pradesh
1348 | 1373,Reoti,Uttar Pradesh
1349 | 1374,Robertsganj,Uttar Pradesh
1350 | 1375,Rudauli,Uttar Pradesh
1351 | 1376,Rudrapur,Uttar Pradesh
1352 | 1377,Sadabad,Uttar Pradesh
1353 | 1378,Safipur,Uttar Pradesh
1354 | 1379,Saharanpur,Uttar Pradesh
1355 | 1380,Sahaspur,Uttar Pradesh
1356 | 1381,Sahaswan,Uttar Pradesh
1357 | 1382,Sahawar,Uttar Pradesh
1358 | 1383,Sahjanwa,Uttar Pradesh
1359 | 1384,Saidpur, Ghazipur
1360 | 1385,Sambhal,Uttar Pradesh
1361 | 1386,Samdhan,Uttar Pradesh
1362 | 1387,Samthar,Uttar Pradesh
1363 | 1388,Sandi,Uttar Pradesh
1364 | 1389,Sandila,Uttar Pradesh
1365 | 1390,Sardhana,Uttar Pradesh
1366 | 1391,Seohara,Uttar Pradesh
1367 | 1392,Shahabad, Hardoi
1368 | 1393,Shahabad, Rampur
1369 | 1394,Shahganj,Uttar Pradesh
1370 | 1395,Shahjahanpur,Uttar Pradesh
1371 | 1396,Shamli,Uttar Pradesh
1372 | 1397,Shamsabad, Agra
1373 | 1398,Shamsabad, Farrukhabad
1374 | 1399,Sherkot,Uttar Pradesh
1375 | 1400,Shikarpur, Bulandshahr
1376 | 1401,Shikohabad,Uttar Pradesh
1377 | 1402,Shishgarh,Uttar Pradesh
1378 | 1403,Siana,Uttar Pradesh
1379 | 1404,Sikanderpur,Uttar Pradesh
1380 | 1405,Sikandra Rao,Uttar Pradesh
1381 | 1406,Sikandrabad,Uttar Pradesh
1382 | 1407,Sirsaganj,Uttar Pradesh
1383 | 1408,Sirsi,Uttar Pradesh
1384 | 1409,Sitapur,Uttar Pradesh
1385 | 1410,Soron,Uttar Pradesh
1386 | 1411,Suar,Uttar Pradesh
1387 | 1412,Sultanpur,Uttar Pradesh
1388 | 1413,Sumerpur,Uttar Pradesh
1389 | 1414,Tanda,Uttar Pradesh
1390 | 1415,Tanda,Uttar Pradesh
1391 | 1416,Tetri Bazar,Uttar Pradesh
1392 | 1417,Thakurdwara,Uttar Pradesh
1393 | 1418,Thana Bhawan,Uttar Pradesh
1394 | 1419,Tilhar,Uttar Pradesh
1395 | 1420,Tirwaganj,Uttar Pradesh
1396 | 1421,Tulsipur,Uttar Pradesh
1397 | 1422,Tundla,Uttar Pradesh
1398 | 1423,Unnao,Uttar Pradesh
1399 | 1424,Utraula,Uttar Pradesh
1400 | 1425,Varanasi,Uttar Pradesh
1401 | 1426,Vrindavan,Uttar Pradesh
1402 | 1427,Warhapur,Uttar Pradesh
1403 | 1428,Zaidpur,Uttar Pradesh
1404 | 1429,Zamania,Uttar Pradesh
1405 | 1430,Almora,Uttarakhand
1406 | 1431,Bazpur,Uttarakhand
1407 | 1432,Chamba,Uttarakhand
1408 | 1433,Dehradun,Uttarakhand
1409 | 1434,Haldwani,Uttarakhand
1410 | 1435,Haridwar,Uttarakhand
1411 | 1436,Jaspur,Uttarakhand
1412 | 1437,Kashipur,Uttarakhand
1413 | 1438,kichha,Uttarakhand
1414 | 1439,Kotdwara,Uttarakhand
1415 | 1440,Manglaur,Uttarakhand
1416 | 1441,Mussoorie,Uttarakhand
1417 | 1442,Nagla,Uttarakhand
1418 | 1443,Nainital,Uttarakhand
1419 | 1444,Pauri,Uttarakhand
1420 | 1445,Pithoragarh,Uttarakhand
1421 | 1446,Ramnagar,Uttarakhand
1422 | 1447,Rishikesh,Uttarakhand
1423 | 1448,Roorkee,Uttarakhand
1424 | 1449,Rudrapur,Uttarakhand
1425 | 1450,Sitarganj,Uttarakhand
1426 | 1451,Tehri,Uttarakhand
1427 | 1452,Muzaffarnagar,Uttar Pradesh
1428 | 1453,Adra, Purulia
1429 | 1454,Alipurduar,West Bengal
1430 | 1455,Arambagh,West Bengal
1431 | 1456,Asansol,West Bengal
1432 | 1457,Baharampur,West Bengal
1433 | 1458,Bally,West Bengal
1434 | 1459,Balurghat,West Bengal
1435 | 1460,Bankura,West Bengal
1436 | 1461,Barakar,West Bengal
1437 | 1462,Barasat,West Bengal
1438 | 1463,Bardhaman,West Bengal
1439 | 1464,Bidhan Nagar,West Bengal
1440 | 1465,Chinsura,West Bengal
1441 | 1466,Contai,West Bengal
1442 | 1467,Cooch Behar,West Bengal
1443 | 1468,Darjeeling,West Bengal
1444 | 1469,Durgapur,West Bengal
1445 | 1470,Haldia,West Bengal
1446 | 1471,Howrah,West Bengal
1447 | 1472,Islampur,West Bengal
1448 | 1473,Jhargram,West Bengal
1449 | 1474,Kharagpur,West Bengal
1450 | 1475,Kolkata,West Bengal
1451 | 1476,Mainaguri,West Bengal
1452 | 1477,Mal,West Bengal
1453 | 1478,Mathabhanga,West Bengal
1454 | 1479,Medinipur,West Bengal
1455 | 1480,Memari,West Bengal
1456 | 1481,Monoharpur,West Bengal
1457 | 1482,Murshidabad,West Bengal
1458 | 1483,Nabadwip,West Bengal
1459 | 1484,Naihati,West Bengal
1460 | 1485,Panchla,West Bengal
1461 | 1486,Pandua,West Bengal
1462 | 1487,Paschim Punropara,West Bengal
1463 | 1488,Purulia,West Bengal
1464 | 1489,Raghunathpur,West Bengal
1465 | 1490,Raiganj,West Bengal
1466 | 1491,Rampurhat,West Bengal
1467 | 1492,Ranaghat,West Bengal
1468 | 1493,Sainthia,West Bengal
1469 | 1494,Santipur,West Bengal
1470 | 1495,Siliguri,West Bengal
1471 | 1496,Sonamukhi,West Bengal
1472 | 1497,Srirampore,West Bengal
1473 | 1498,Suri,West Bengal
1474 | 1499,Taki,West Bengal
1475 | 1500,Tamluk,West Bengal
1476 | 1501,Tarakeswar,West Bengal
1477 | 1502,Chikmagalur,Karnataka
1478 | 1503,Davanagere,Karnataka
1479 | 1504,Dharwad,Karnataka
1480 | 1505,Gadag,Karnataka
1481 | 1506,Chennai,Tamil Nadu
1482 | 1507,Coimbatore,Tamil Nadu
1483 | 1508,Bengaluru,Karnataka
1484 | 


--------------------------------------------------------------------------------
/2-Preprocessing_and_Modelling/Pre-processing Jobs for modellingv2.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Job - Pre-processing and Modelling Iteration final"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "code",
  12 |    "execution_count": 1,
  13 |    "metadata": {},
  14 |    "outputs": [],
  15 |    "source": [
  16 |     "# libraries import\n",
  17 |     "\n",
  18 |     "import numpy as np\n",
  19 |     "import pandas as pd\n",
  20 |     "import json\n",
  21 |     "import matplotlib.pyplot as plt\n",
  22 |     "%matplotlib inline\n",
  23 |     "\n",
  24 |     "import re\n",
  25 |     "import datetime\n",
  26 |     "from datetime import date\n",
  27 |     "from time import strptime\n",
  28 |     "\n",
  29 |     "import RAKE as rake\n",
  30 |     "import operator\n"
  31 |    ]
  32 |   },
  33 |   {
  34 |    "cell_type": "markdown",
  35 |    "metadata": {},
  36 |    "source": [
  37 |     "######################################################################################\n",
  38 |     "\n",
  39 |     "# Working on Job description Data\n",
  40 |     "######################################################################################   "
  41 |    ]
  42 |   },
  43 |   {
  44 |    "cell_type": "code",
  45 |    "execution_count": 2,
  46 |    "metadata": {},
  47 |    "outputs": [],
  48 |    "source": [
  49 |     "# reading my sorted job csv\n",
  50 |     "job = pd.read_csv('WIP/sorted_jobs_master_new.csv')"
  51 |    ]
  52 |   },
  53 |   {
  54 |    "cell_type": "code",
  55 |    "execution_count": 3,
  56 |    "metadata": {},
  57 |    "outputs": [
  58 |     {
  59 |      "name": "stdout",
  60 |      "output_type": "stream",
  61 |      "text": [
  62 |       "<class 'pandas.core.frame.DataFrame'>\n",
  63 |       "RangeIndex: 38941 entries, 0 to 38940\n",
  64 |       "Data columns (total 17 columns):\n",
  65 |       " #   Column            Non-Null Count  Dtype  \n",
  66 |       "---  ------            --------------  -----  \n",
  67 |       " 0   company           38941 non-null  object \n",
  68 |       " 1   education         38941 non-null  object \n",
  69 |       " 2   experience        38941 non-null  int64  \n",
  70 |       " 3   industry          38941 non-null  object \n",
  71 |       " 4   jobdescription    38941 non-null  object \n",
  72 |       " 5   jobtitle          38941 non-null  object \n",
  73 |       " 6   payrate           38941 non-null  object \n",
  74 |       " 7   skills            38941 non-null  object \n",
  75 |       " 8   experience_range  38941 non-null  int64  \n",
  76 |       " 9   industry_enum     38941 non-null  int64  \n",
  77 |       " 10  Salary_range      38941 non-null  float64\n",
  78 |       " 11  j_id              38941 non-null  int64  \n",
  79 |       " 12  is_grad           38941 non-null  int64  \n",
  80 |       " 13  is_postgrad       38941 non-null  int64  \n",
  81 |       " 14  is_doc            38941 non-null  int64  \n",
  82 |       " 15  location          38941 non-null  int64  \n",
  83 |       " 16  loc_name          38941 non-null  object \n",
  84 |       "dtypes: float64(1), int64(8), object(8)\n",
  85 |       "memory usage: 5.1+ MB\n"
  86 |      ]
  87 |     }
  88 |    ],
  89 |    "source": [
  90 |     "job.info()"
  91 |    ]
  92 |   },
  93 |   {
  94 |    "cell_type": "markdown",
  95 |    "metadata": {},
  96 |    "source": [
  97 |     "###########################################################################################################################\n",
  98 |     "# Understanding Job_description column (using NLP)\n",
  99 |     "###########################################################################################################################\n"
 100 |    ]
 101 |   },
 102 |   {
 103 |    "cell_type": "markdown",
 104 |    "metadata": {},
 105 |    "source": [
 106 |     "# 1. NLP - NLTK application to understand most used words"
 107 |    ]
 108 |   },
 109 |   {
 110 |    "cell_type": "code",
 111 |    "execution_count": 4,
 112 |    "metadata": {},
 113 |    "outputs": [
 114 |     {
 115 |      "name": "stderr",
 116 |      "output_type": "stream",
 117 |      "text": [
 118 |       "[nltk_data] Downloading package wordnet to\n",
 119 |       "[nltk_data]     C:\\Users\\shail\\AppData\\Roaming\\nltk_data...\n",
 120 |       "[nltk_data]   Package wordnet is already up-to-date!\n"
 121 |      ]
 122 |     }
 123 |    ],
 124 |    "source": [
 125 |     "#Import all the dependencies\n",
 126 |     "import nltk\n",
 127 |     "nltk.download('wordnet')\n",
 128 |     "from nltk.stem import WordNetLemmatizer\n",
 129 |     "wordnet_lemmatizer = WordNetLemmatizer()\n",
 130 |     "from nltk.corpus import stopwords\n",
 131 |     "from nltk.tokenize import word_tokenize \n",
 132 |     "set(stopwords.words('english'))\n",
 133 |     "# nltk.download('abc')\n",
 134 |     "# from nltk.corpus import abc\n",
 135 |     "# from nltk import RegexpTokenizer\n",
 136 |     "\n",
 137 |     "import string\n",
 138 |     "stopwords = set(stopwords.words(\"english\"))\n",
 139 |     "import gensim\n",
 140 |     "from gensim.test.utils import common_texts\n",
 141 |     "from gensim.models.doc2vec import Doc2Vec, TaggedDocument"
 142 |    ]
 143 |   },
 144 |   {
 145 |    "cell_type": "code",
 146 |    "execution_count": 5,
 147 |    "metadata": {},
 148 |    "outputs": [],
 149 |    "source": [
 150 |     "# defining tokenizer \n",
 151 |     "def my_tokenizer(text):\n",
 152 |     "    # 1. split at whitespace\n",
 153 |     "    text = text.split(' ')\n",
 154 |     "    \n",
 155 |     "    #2. lowercase\n",
 156 |     "    text = [word.lower() for word in text]\n",
 157 |     "    \n",
 158 |     "    #3. Remove puncutation\n",
 159 |     "    #table to replace puncuation\n",
 160 |     "    punc_table = str.maketrans('','',string.punctuation)\n",
 161 |     "    \n",
 162 |     "    #call translate()\n",
 163 |     "    text = [word.translate(punc_table) for word in text]\n",
 164 |     "    \n",
 165 |     "    #4. remove stopwords\n",
 166 |     "    text = [word for word in text if word not in stopwords]\n",
 167 |     "    \n",
 168 |     "    #5. lemmmatize\n",
 169 |     "    lemmatizer = WordNetLemmatizer()\n",
 170 |     "    \n",
 171 |     "    text = [lemmatizer.lemmatize(word, pos='v') for word in text]\n",
 172 |     "    text = [lemmatizer.lemmatize(word, pos='n') for word in text]\n",
 173 |     "    text = [lemmatizer.lemmatize(word, pos='a') for word in text]\n",
 174 |     "    \n",
 175 |     "    #6. remove empty strings\n",
 176 |     "    text = [word for word in text if word !='']\n",
 177 |     "    \n",
 178 |     "    return text "
 179 |    ]
 180 |   },
 181 |   {
 182 |    "cell_type": "markdown",
 183 |    "metadata": {},
 184 |    "source": [
 185 |     "# 2. NLP - TF-IDF application to get a list of all tokens \n",
 186 |     "-- This helped to gather what words needed to be in stop-words list"
 187 |    ]
 188 |   },
 189 |   {
 190 |    "cell_type": "code",
 191 |    "execution_count": 16,
 192 |    "metadata": {},
 193 |    "outputs": [],
 194 |    "source": [
 195 |     "#z = job['jobdescription'].str.rstrip('job description   send me jobs like this')"
 196 |    ]
 197 |   },
 198 |   {
 199 |    "cell_type": "code",
 200 |    "execution_count": 7,
 201 |    "metadata": {},
 202 |    "outputs": [
 203 |     {
 204 |      "data": {
 205 |       "text/plain": [
 206 |        "0         Qualifications: - == > 10th To Graduation & A...\n",
 207 |        "1         Qualifications: - == > 10th To Graduation & A...\n",
 208 |        "2         - as a developer in providing application des...\n",
 209 |        "3         - Involved with all stages of indirect taxati...\n",
 210 |        "4         - Involved with all stages of indirect taxati...\n",
 211 |        "                               ...                        \n",
 212 |        "38936     Looking for candidates with strong programmin...\n",
 213 |        "38937     Work with tech lead to architect and develop ...\n",
 214 |        "38938     We are looking for a Senior UI Developers and...\n",
 215 |        "38939     We are looking for a Senior UI Developers and...\n",
 216 |        "38940     Job description : Experience of 5-10 years wi...\n",
 217 |        "Name: jobdescription, Length: 38941, dtype: object"
 218 |       ]
 219 |      },
 220 |      "execution_count": 7,
 221 |      "metadata": {},
 222 |      "output_type": "execute_result"
 223 |     }
 224 |    ],
 225 |    "source": [
 226 |     "# job['jobdescription'] = job.jobdescription.str[40:]\n",
 227 |     "job['jobdescription']"
 228 |    ]
 229 |   },
 230 |   {
 231 |    "cell_type": "code",
 232 |    "execution_count": 23,
 233 |    "metadata": {},
 234 |    "outputs": [],
 235 |    "source": [
 236 |     "# t= job.copy()\n",
 237 |     "# t.to_csv('WIP.sorted_jobs_master_new.csv', index=False)"
 238 |    ]
 239 |   },
 240 |   {
 241 |    "cell_type": "code",
 242 |    "execution_count": 8,
 243 |    "metadata": {},
 244 |    "outputs": [
 245 |     {
 246 |      "name": "stderr",
 247 |      "output_type": "stream",
 248 |      "text": [
 249 |       "C:\\Users\\shail\\anaconda\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
 250 |       "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 251 |       "Try using .loc[row_indexer,col_indexer] = value instead\n",
 252 |       "\n",
 253 |       "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 254 |       "  \n"
 255 |      ]
 256 |     },
 257 |     {
 258 |      "data": {
 259 |       "text/html": [
 260 |        "<div>\n",
 261 |        "<style scoped>\n",
 262 |        "    .dataframe tbody tr th:only-of-type {\n",
 263 |        "        vertical-align: middle;\n",
 264 |        "    }\n",
 265 |        "\n",
 266 |        "    .dataframe tbody tr th {\n",
 267 |        "        vertical-align: top;\n",
 268 |        "    }\n",
 269 |        "\n",
 270 |        "    .dataframe thead th {\n",
 271 |        "        text-align: right;\n",
 272 |        "    }\n",
 273 |        "</style>\n",
 274 |        "<table border=\"1\" class=\"dataframe\">\n",
 275 |        "  <thead>\n",
 276 |        "    <tr style=\"text-align: right;\">\n",
 277 |        "      <th></th>\n",
 278 |        "      <th>j_id</th>\n",
 279 |        "      <th>jobtitle</th>\n",
 280 |        "      <th>company</th>\n",
 281 |        "      <th>jd_combo</th>\n",
 282 |        "    </tr>\n",
 283 |        "  </thead>\n",
 284 |        "  <tbody>\n",
 285 |        "    <tr>\n",
 286 |        "      <th>0</th>\n",
 287 |        "      <td>0</td>\n",
 288 |        "      <td>walkin data entry operator (night shift)</td>\n",
 289 |        "      <td>MM Media Pvt Ltd</td>\n",
 290 |        "      <td>walkin data entry operator (night shift)  Qual...</td>\n",
 291 |        "    </tr>\n",
 292 |        "    <tr>\n",
 293 |        "      <th>1</th>\n",
 294 |        "      <td>1</td>\n",
 295 |        "      <td>work based onhome based part time.</td>\n",
 296 |        "      <td>find live infotech</td>\n",
 297 |        "      <td>work based onhome based part time.  Qualificat...</td>\n",
 298 |        "    </tr>\n",
 299 |        "    <tr>\n",
 300 |        "      <th>2</th>\n",
 301 |        "      <td>2</td>\n",
 302 |        "      <td>pl/sql developer - sql</td>\n",
 303 |        "      <td>Softtech Career Infosystem Pvt. Ltd</td>\n",
 304 |        "      <td>pl/sql developer - sql  - as a developer in pr...</td>\n",
 305 |        "    </tr>\n",
 306 |        "    <tr>\n",
 307 |        "      <th>3</th>\n",
 308 |        "      <td>3</td>\n",
 309 |        "      <td>manager/ad/partner - indirect tax - ca</td>\n",
 310 |        "      <td>Onboard HRServices LLP</td>\n",
 311 |        "      <td>manager/ad/partner - indirect tax - ca  - Invo...</td>\n",
 312 |        "    </tr>\n",
 313 |        "    <tr>\n",
 314 |        "      <th>4</th>\n",
 315 |        "      <td>4</td>\n",
 316 |        "      <td>manager/ad/partner - indirect tax - ca</td>\n",
 317 |        "      <td>Onboard HRServices LLP</td>\n",
 318 |        "      <td>manager/ad/partner - indirect tax - ca  - Invo...</td>\n",
 319 |        "    </tr>\n",
 320 |        "  </tbody>\n",
 321 |        "</table>\n",
 322 |        "</div>"
 323 |       ],
 324 |       "text/plain": [
 325 |        "   j_id                                  jobtitle  \\\n",
 326 |        "0     0  walkin data entry operator (night shift)   \n",
 327 |        "1     1        work based onhome based part time.   \n",
 328 |        "2     2                    pl/sql developer - sql   \n",
 329 |        "3     3    manager/ad/partner - indirect tax - ca   \n",
 330 |        "4     4    manager/ad/partner - indirect tax - ca   \n",
 331 |        "\n",
 332 |        "                               company  \\\n",
 333 |        "0                     MM Media Pvt Ltd   \n",
 334 |        "1                   find live infotech   \n",
 335 |        "2  Softtech Career Infosystem Pvt. Ltd   \n",
 336 |        "3               Onboard HRServices LLP   \n",
 337 |        "4               Onboard HRServices LLP   \n",
 338 |        "\n",
 339 |        "                                            jd_combo  \n",
 340 |        "0  walkin data entry operator (night shift)  Qual...  \n",
 341 |        "1  work based onhome based part time.  Qualificat...  \n",
 342 |        "2  pl/sql developer - sql  - as a developer in pr...  \n",
 343 |        "3  manager/ad/partner - indirect tax - ca  - Invo...  \n",
 344 |        "4  manager/ad/partner - indirect tax - ca  - Invo...  "
 345 |       ]
 346 |      },
 347 |      "execution_count": 8,
 348 |      "metadata": {},
 349 |      "output_type": "execute_result"
 350 |     }
 351 |    ],
 352 |    "source": [
 353 |     "df_job_descriptions = job[['j_id','jobtitle','company' ]]\n",
 354 |     "df_job_descriptions['jd_combo'] = job['jobtitle'] +\" \" +  job['jobdescription'] \n",
 355 |     "df_job_descriptions.head()"
 356 |    ]
 357 |   },
 358 |   {
 359 |    "cell_type": "code",
 360 |    "execution_count": 9,
 361 |    "metadata": {
 362 |     "scrolled": true
 363 |    },
 364 |    "outputs": [
 365 |     {
 366 |      "name": "stderr",
 367 |      "output_type": "stream",
 368 |      "text": [
 369 |       "C:\\Users\\shail\\anaconda\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:385: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ëœ'] not in stop_words.\n",
 370 |       "  'stop_words.' % sorted(inconsistent))\n"
 371 |      ]
 372 |     },
 373 |     {
 374 |      "name": "stdout",
 375 |      "output_type": "stream",
 376 |      "text": [
 377 |       "(38941, 58510)\n",
 378 |       "(38941, 4)\n"
 379 |      ]
 380 |     }
 381 |    ],
 382 |    "source": [
 383 |     "from sklearn.feature_extraction.text import TfidfVectorizer\n",
 384 |     "stopwords = nltk.corpus.stopwords.words('english')\n",
 385 |     "stopwords.append('ã¯æ’ëœ')\n",
 386 |     "#Transforms words to TFIDF\n",
 387 |     "vectorizer = TfidfVectorizer(stop_words = stopwords)\n",
 388 |     "\n",
 389 |     "index = 0\n",
 390 |     "keys = {}\n",
 391 |     "\n",
 392 |     "for jd in df_job_descriptions.itertuples() :\n",
 393 |     "    key = jd[1]\n",
 394 |     "    keys[key] = index\n",
 395 |     "    index += 1\n",
 396 |     "\n",
 397 |     "#Fit the vectorizer to the data\n",
 398 |     "vectorizer.fit(df_job_descriptions['jd_combo'].fillna(''))\n",
 399 |     "\n",
 400 |     "#Transform the data\n",
 401 |     "tfidf_scores = vectorizer.transform(df_job_descriptions['jd_combo'].fillna(''))\n",
 402 |     "\n",
 403 |     "print(tfidf_scores.shape)\n",
 404 |     "print(df_job_descriptions.shape)"
 405 |    ]
 406 |   },
 407 |   {
 408 |    "cell_type": "code",
 409 |    "execution_count": 10,
 410 |    "metadata": {},
 411 |    "outputs": [
 412 |     {
 413 |      "data": {
 414 |       "text/plain": [
 415 |        "scipy.sparse.csr.csr_matrix"
 416 |       ]
 417 |      },
 418 |      "execution_count": 10,
 419 |      "metadata": {},
 420 |      "output_type": "execute_result"
 421 |     }
 422 |    ],
 423 |    "source": [
 424 |     "type(tfidf_scores)"
 425 |    ]
 426 |   },
 427 |   {
 428 |    "cell_type": "code",
 429 |    "execution_count": 11,
 430 |    "metadata": {},
 431 |    "outputs": [],
 432 |    "source": [
 433 |     "test = pd.DataFrame(tfidf_scores.toarray(), columns = vectorizer.get_feature_names())"
 434 |    ]
 435 |   },
 436 |   {
 437 |    "cell_type": "code",
 438 |    "execution_count": 12,
 439 |    "metadata": {},
 440 |    "outputs": [
 441 |     {
 442 |      "data": {
 443 |       "text/html": [
 444 |        "<div>\n",
 445 |        "<style scoped>\n",
 446 |        "    .dataframe tbody tr th:only-of-type {\n",
 447 |        "        vertical-align: middle;\n",
 448 |        "    }\n",
 449 |        "\n",
 450 |        "    .dataframe tbody tr th {\n",
 451 |        "        vertical-align: top;\n",
 452 |        "    }\n",
 453 |        "\n",
 454 |        "    .dataframe thead th {\n",
 455 |        "        text-align: right;\n",
 456 |        "    }\n",
 457 |        "</style>\n",
 458 |        "<table border=\"1\" class=\"dataframe\">\n",
 459 |        "  <thead>\n",
 460 |        "    <tr style=\"text-align: right;\">\n",
 461 |        "      <th></th>\n",
 462 |        "      <th>00</th>\n",
 463 |        "      <th>000</th>\n",
 464 |        "      <th>0000</th>\n",
 465 |        "      <th>00000</th>\n",
 466 |        "      <th>0000gmt</th>\n",
 467 |        "      <th>0001pt</th>\n",
 468 |        "      <th>00029</th>\n",
 469 |        "      <th>00034</th>\n",
 470 |        "      <th>000402</th>\n",
 471 |        "      <th>00053</th>\n",
 472 |        "      <th>...</th>\n",
 473 |        "      <th>ïƒ</th>\n",
 474 |        "      <th>ïƒ¼</th>\n",
 475 |        "      <th>ïƒž</th>\n",
 476 |        "      <th>œ100</th>\n",
 477 |        "      <th>œmost</th>\n",
 478 |        "      <th>œrecognition</th>\n",
 479 |        "      <th>œto</th>\n",
 480 |        "      <th>šâ</th>\n",
 481 |        "      <th>šã</th>\n",
 482 |        "      <th>žâ</th>\n",
 483 |        "    </tr>\n",
 484 |        "  </thead>\n",
 485 |        "  <tbody>\n",
 486 |        "    <tr>\n",
 487 |        "      <th>0</th>\n",
 488 |        "      <td>0.0</td>\n",
 489 |        "      <td>0.056499</td>\n",
 490 |        "      <td>0.0</td>\n",
 491 |        "      <td>0.0</td>\n",
 492 |        "      <td>0.0</td>\n",
 493 |        "      <td>0.0</td>\n",
 494 |        "      <td>0.0</td>\n",
 495 |        "      <td>0.0</td>\n",
 496 |        "      <td>0.0</td>\n",
 497 |        "      <td>0.0</td>\n",
 498 |        "      <td>...</td>\n",
 499 |        "      <td>0.0</td>\n",
 500 |        "      <td>0.0</td>\n",
 501 |        "      <td>0.0</td>\n",
 502 |        "      <td>0.0</td>\n",
 503 |        "      <td>0.0</td>\n",
 504 |        "      <td>0.0</td>\n",
 505 |        "      <td>0.0</td>\n",
 506 |        "      <td>0.0</td>\n",
 507 |        "      <td>0.0</td>\n",
 508 |        "      <td>0.0</td>\n",
 509 |        "    </tr>\n",
 510 |        "    <tr>\n",
 511 |        "      <th>1</th>\n",
 512 |        "      <td>0.0</td>\n",
 513 |        "      <td>0.068273</td>\n",
 514 |        "      <td>0.0</td>\n",
 515 |        "      <td>0.0</td>\n",
 516 |        "      <td>0.0</td>\n",
 517 |        "      <td>0.0</td>\n",
 518 |        "      <td>0.0</td>\n",
 519 |        "      <td>0.0</td>\n",
 520 |        "      <td>0.0</td>\n",
 521 |        "      <td>0.0</td>\n",
 522 |        "      <td>...</td>\n",
 523 |        "      <td>0.0</td>\n",
 524 |        "      <td>0.0</td>\n",
 525 |        "      <td>0.0</td>\n",
 526 |        "      <td>0.0</td>\n",
 527 |        "      <td>0.0</td>\n",
 528 |        "      <td>0.0</td>\n",
 529 |        "      <td>0.0</td>\n",
 530 |        "      <td>0.0</td>\n",
 531 |        "      <td>0.0</td>\n",
 532 |        "      <td>0.0</td>\n",
 533 |        "    </tr>\n",
 534 |        "    <tr>\n",
 535 |        "      <th>2</th>\n",
 536 |        "      <td>0.0</td>\n",
 537 |        "      <td>0.000000</td>\n",
 538 |        "      <td>0.0</td>\n",
 539 |        "      <td>0.0</td>\n",
 540 |        "      <td>0.0</td>\n",
 541 |        "      <td>0.0</td>\n",
 542 |        "      <td>0.0</td>\n",
 543 |        "      <td>0.0</td>\n",
 544 |        "      <td>0.0</td>\n",
 545 |        "      <td>0.0</td>\n",
 546 |        "      <td>...</td>\n",
 547 |        "      <td>0.0</td>\n",
 548 |        "      <td>0.0</td>\n",
 549 |        "      <td>0.0</td>\n",
 550 |        "      <td>0.0</td>\n",
 551 |        "      <td>0.0</td>\n",
 552 |        "      <td>0.0</td>\n",
 553 |        "      <td>0.0</td>\n",
 554 |        "      <td>0.0</td>\n",
 555 |        "      <td>0.0</td>\n",
 556 |        "      <td>0.0</td>\n",
 557 |        "    </tr>\n",
 558 |        "    <tr>\n",
 559 |        "      <th>3</th>\n",
 560 |        "      <td>0.0</td>\n",
 561 |        "      <td>0.000000</td>\n",
 562 |        "      <td>0.0</td>\n",
 563 |        "      <td>0.0</td>\n",
 564 |        "      <td>0.0</td>\n",
 565 |        "      <td>0.0</td>\n",
 566 |        "      <td>0.0</td>\n",
 567 |        "      <td>0.0</td>\n",
 568 |        "      <td>0.0</td>\n",
 569 |        "      <td>0.0</td>\n",
 570 |        "      <td>...</td>\n",
 571 |        "      <td>0.0</td>\n",
 572 |        "      <td>0.0</td>\n",
 573 |        "      <td>0.0</td>\n",
 574 |        "      <td>0.0</td>\n",
 575 |        "      <td>0.0</td>\n",
 576 |        "      <td>0.0</td>\n",
 577 |        "      <td>0.0</td>\n",
 578 |        "      <td>0.0</td>\n",
 579 |        "      <td>0.0</td>\n",
 580 |        "      <td>0.0</td>\n",
 581 |        "    </tr>\n",
 582 |        "    <tr>\n",
 583 |        "      <th>4</th>\n",
 584 |        "      <td>0.0</td>\n",
 585 |        "      <td>0.000000</td>\n",
 586 |        "      <td>0.0</td>\n",
 587 |        "      <td>0.0</td>\n",
 588 |        "      <td>0.0</td>\n",
 589 |        "      <td>0.0</td>\n",
 590 |        "      <td>0.0</td>\n",
 591 |        "      <td>0.0</td>\n",
 592 |        "      <td>0.0</td>\n",
 593 |        "      <td>0.0</td>\n",
 594 |        "      <td>...</td>\n",
 595 |        "      <td>0.0</td>\n",
 596 |        "      <td>0.0</td>\n",
 597 |        "      <td>0.0</td>\n",
 598 |        "      <td>0.0</td>\n",
 599 |        "      <td>0.0</td>\n",
 600 |        "      <td>0.0</td>\n",
 601 |        "      <td>0.0</td>\n",
 602 |        "      <td>0.0</td>\n",
 603 |        "      <td>0.0</td>\n",
 604 |        "      <td>0.0</td>\n",
 605 |        "    </tr>\n",
 606 |        "  </tbody>\n",
 607 |        "</table>\n",
 608 |        "<p>5 rows × 58510 columns</p>\n",
 609 |        "</div>"
 610 |       ],
 611 |       "text/plain": [
 612 |        "    00       000  0000  00000  0000gmt  0001pt  00029  00034  000402  00053  \\\n",
 613 |        "0  0.0  0.056499   0.0    0.0      0.0     0.0    0.0    0.0     0.0    0.0   \n",
 614 |        "1  0.0  0.068273   0.0    0.0      0.0     0.0    0.0    0.0     0.0    0.0   \n",
 615 |        "2  0.0  0.000000   0.0    0.0      0.0     0.0    0.0    0.0     0.0    0.0   \n",
 616 |        "3  0.0  0.000000   0.0    0.0      0.0     0.0    0.0    0.0     0.0    0.0   \n",
 617 |        "4  0.0  0.000000   0.0    0.0      0.0     0.0    0.0    0.0     0.0    0.0   \n",
 618 |        "\n",
 619 |        "   ...   ïƒ  ïƒ¼  ïƒž  œ100  œmost  œrecognition  œto   šâ   šã   žâ  \n",
 620 |        "0  ...  0.0  0.0  0.0   0.0    0.0           0.0  0.0  0.0  0.0  0.0  \n",
 621 |        "1  ...  0.0  0.0  0.0   0.0    0.0           0.0  0.0  0.0  0.0  0.0  \n",
 622 |        "2  ...  0.0  0.0  0.0   0.0    0.0           0.0  0.0  0.0  0.0  0.0  \n",
 623 |        "3  ...  0.0  0.0  0.0   0.0    0.0           0.0  0.0  0.0  0.0  0.0  \n",
 624 |        "4  ...  0.0  0.0  0.0   0.0    0.0           0.0  0.0  0.0  0.0  0.0  \n",
 625 |        "\n",
 626 |        "[5 rows x 58510 columns]"
 627 |       ]
 628 |      },
 629 |      "execution_count": 12,
 630 |      "metadata": {},
 631 |      "output_type": "execute_result"
 632 |     }
 633 |    ],
 634 |    "source": [
 635 |     "test.head()"
 636 |    ]
 637 |   },
 638 |   {
 639 |    "cell_type": "markdown",
 640 |    "metadata": {},
 641 |    "source": [
 642 |     "As count vectorizer and Tf-Idf are only exploding my column numbers. It might not be wise to proceed with any of these. Moveover, I need to compare job description with Resume, that may not with fair comparison. So I will use these results so far for customizing stop word list. And will later use Doc2Vec to train my model."
 643 |    ]
 644 |   },
 645 |   {
 646 |    "cell_type": "markdown",
 647 |    "metadata": {},
 648 |    "source": [
 649 |     "# Creating my Stopword list \n",
 650 |     "\n",
 651 |     "### As seen there are so many unwanted tokens like numbers,ïƒ¼ etc , I need to add them in \"stop words\" list to train model "
 652 |    ]
 653 |   },
 654 |   {
 655 |    "cell_type": "code",
 656 |    "execution_count": 13,
 657 |    "metadata": {},
 658 |    "outputs": [],
 659 |    "source": [
 660 |     "#getting list of all tokens\n",
 661 |     "word_list = test.columns.tolist()"
 662 |    ]
 663 |   },
 664 |   {
 665 |    "cell_type": "code",
 666 |    "execution_count": 14,
 667 |    "metadata": {},
 668 |    "outputs": [],
 669 |    "source": [
 670 |     "##Getting a list of unwanted words as s_words and adding to stopwords\n",
 671 |     "s_words =[]\n",
 672 |     "for word in word_list:\n",
 673 |     "    #print(word)\n",
 674 |     "    if re.search(\"^\\W|^\\d\",word):\n",
 675 |     "        s_words.append(word)\n",
 676 |     "        "
 677 |    ]
 678 |   },
 679 |   {
 680 |    "cell_type": "code",
 681 |    "execution_count": 15,
 682 |    "metadata": {},
 683 |    "outputs": [],
 684 |    "source": [
 685 |     "s_words.append('')        \n",
 686 |     "from nltk.corpus import stopwords\n",
 687 |     "stopword_set = set(stopwords.words('english'))\n",
 688 |     "stopword_set = list(stopword_set)\n",
 689 |     "stopword_set.extend(s_words)"
 690 |    ]
 691 |   },
 692 |   {
 693 |    "cell_type": "markdown",
 694 |    "metadata": {},
 695 |    "source": [
 696 |     "# Collecting all text data for DOC2VEC modelling\n",
 697 |     "In final iteration, I only used job title and job description for creating text combo document and got my 20-D vectors. This time I trained my model on 200 epochs. \n",
 698 |     "\n",
 699 |     "As count vectorizer and Tf-Idf are only exploding my column numbers. It might not be wise to proceed with any of these. Moveover, I need to compare job description with Resume, that may not with fair comparison. \n",
 700 |     "\n",
 701 |     "Definately Doc2Vec is the smart choice to make to proceed with matching. Because Doc2Vec has ability to read document as a whole rather than working on each single word. It has a feature to provide n-Dimentional vectors. \n",
 702 |     "\n",
 703 |     "So I am going to use same concept to get my vectors. Then I ll use those vectors to match it against any given resume. \n",
 704 |     "\n"
 705 |    ]
 706 |   },
 707 |   {
 708 |    "cell_type": "code",
 709 |    "execution_count": 16,
 710 |    "metadata": {},
 711 |    "outputs": [
 712 |     {
 713 |      "data": {
 714 |       "text/html": [
 715 |        "<div>\n",
 716 |        "<style scoped>\n",
 717 |        "    .dataframe tbody tr th:only-of-type {\n",
 718 |        "        vertical-align: middle;\n",
 719 |        "    }\n",
 720 |        "\n",
 721 |        "    .dataframe tbody tr th {\n",
 722 |        "        vertical-align: top;\n",
 723 |        "    }\n",
 724 |        "\n",
 725 |        "    .dataframe thead th {\n",
 726 |        "        text-align: right;\n",
 727 |        "    }\n",
 728 |        "</style>\n",
 729 |        "<table border=\"1\" class=\"dataframe\">\n",
 730 |        "  <thead>\n",
 731 |        "    <tr style=\"text-align: right;\">\n",
 732 |        "      <th></th>\n",
 733 |        "      <th>j_id</th>\n",
 734 |        "      <th>jobtitle</th>\n",
 735 |        "      <th>company</th>\n",
 736 |        "      <th>jd_combo</th>\n",
 737 |        "    </tr>\n",
 738 |        "  </thead>\n",
 739 |        "  <tbody>\n",
 740 |        "    <tr>\n",
 741 |        "      <th>0</th>\n",
 742 |        "      <td>0</td>\n",
 743 |        "      <td>walkin data entry operator (night shift)</td>\n",
 744 |        "      <td>MM Media Pvt Ltd</td>\n",
 745 |        "      <td>walkin data entry operator (night shift)  Qual...</td>\n",
 746 |        "    </tr>\n",
 747 |        "    <tr>\n",
 748 |        "      <th>1</th>\n",
 749 |        "      <td>1</td>\n",
 750 |        "      <td>work based onhome based part time.</td>\n",
 751 |        "      <td>find live infotech</td>\n",
 752 |        "      <td>work based onhome based part time.  Qualificat...</td>\n",
 753 |        "    </tr>\n",
 754 |        "    <tr>\n",
 755 |        "      <th>2</th>\n",
 756 |        "      <td>2</td>\n",
 757 |        "      <td>pl/sql developer - sql</td>\n",
 758 |        "      <td>Softtech Career Infosystem Pvt. Ltd</td>\n",
 759 |        "      <td>pl/sql developer - sql  - as a developer in pr...</td>\n",
 760 |        "    </tr>\n",
 761 |        "    <tr>\n",
 762 |        "      <th>3</th>\n",
 763 |        "      <td>3</td>\n",
 764 |        "      <td>manager/ad/partner - indirect tax - ca</td>\n",
 765 |        "      <td>Onboard HRServices LLP</td>\n",
 766 |        "      <td>manager/ad/partner - indirect tax - ca  - Invo...</td>\n",
 767 |        "    </tr>\n",
 768 |        "    <tr>\n",
 769 |        "      <th>4</th>\n",
 770 |        "      <td>4</td>\n",
 771 |        "      <td>manager/ad/partner - indirect tax - ca</td>\n",
 772 |        "      <td>Onboard HRServices LLP</td>\n",
 773 |        "      <td>manager/ad/partner - indirect tax - ca  - Invo...</td>\n",
 774 |        "    </tr>\n",
 775 |        "  </tbody>\n",
 776 |        "</table>\n",
 777 |        "</div>"
 778 |       ],
 779 |       "text/plain": [
 780 |        "   j_id                                  jobtitle  \\\n",
 781 |        "0     0  walkin data entry operator (night shift)   \n",
 782 |        "1     1        work based onhome based part time.   \n",
 783 |        "2     2                    pl/sql developer - sql   \n",
 784 |        "3     3    manager/ad/partner - indirect tax - ca   \n",
 785 |        "4     4    manager/ad/partner - indirect tax - ca   \n",
 786 |        "\n",
 787 |        "                               company  \\\n",
 788 |        "0                     MM Media Pvt Ltd   \n",
 789 |        "1                   find live infotech   \n",
 790 |        "2  Softtech Career Infosystem Pvt. Ltd   \n",
 791 |        "3               Onboard HRServices LLP   \n",
 792 |        "4               Onboard HRServices LLP   \n",
 793 |        "\n",
 794 |        "                                            jd_combo  \n",
 795 |        "0  walkin data entry operator (night shift)  Qual...  \n",
 796 |        "1  work based onhome based part time.  Qualificat...  \n",
 797 |        "2  pl/sql developer - sql  - as a developer in pr...  \n",
 798 |        "3  manager/ad/partner - indirect tax - ca  - Invo...  \n",
 799 |        "4  manager/ad/partner - indirect tax - ca  - Invo...  "
 800 |       ]
 801 |      },
 802 |      "execution_count": 16,
 803 |      "metadata": {},
 804 |      "output_type": "execute_result"
 805 |     }
 806 |    ],
 807 |    "source": [
 808 |     "# df_job_descriptions = job[['j_id','jobtitle','company' ]]\n",
 809 |     "# df_job_descriptions['jd_combo'] = job['jobtitle'] +\" \" +  job['jobdescription'] \n",
 810 |     "df_job_descriptions.head()"
 811 |    ]
 812 |   },
 813 |   {
 814 |    "cell_type": "code",
 815 |    "execution_count": 17,
 816 |    "metadata": {},
 817 |    "outputs": [
 818 |     {
 819 |      "data": {
 820 |       "text/plain": [
 821 |        "0    walkin data entry operator (night shift)  Qual...\n",
 822 |        "1    work based onhome based part time.  Qualificat...\n",
 823 |        "2    pl/sql developer - sql  - as a developer in pr...\n",
 824 |        "3    manager/ad/partner - indirect tax - ca  - Invo...\n",
 825 |        "4    manager/ad/partner - indirect tax - ca  - Invo...\n",
 826 |        "5    manager/ad/partner - indirect tax - ca  - Invo...\n",
 827 |        "6    manager/ad/partner - indirect tax - ca  - Invo...\n",
 828 |        "7    manager/ad/partner - indirect tax - ca  - Invo...\n",
 829 |        "8    manager/ad/partner - indirect tax - ca  - Invo...\n",
 830 |        "9    java technical lead (6-8 yrs) -  Please share ...\n",
 831 |        "Name: jd_combo, dtype: object"
 832 |       ]
 833 |      },
 834 |      "execution_count": 17,
 835 |      "metadata": {},
 836 |      "output_type": "execute_result"
 837 |     }
 838 |    ],
 839 |    "source": [
 840 |     "docs = df_job_descriptions['jd_combo']\n",
 841 |     "docs_sample = docs.head(10)\n",
 842 |     "docs_sample"
 843 |    ]
 844 |   },
 845 |   {
 846 |    "cell_type": "code",
 847 |    "execution_count": 18,
 848 |    "metadata": {},
 849 |    "outputs": [],
 850 |    "source": [
 851 |     "#pre-processing with custom stop word list\n",
 852 |     "def preprocess(text):\n",
 853 |     "    stop_words = stopword_set\n",
 854 |     "    #0. split words by whitespace\n",
 855 |     "    text = text.split()\n",
 856 |     "    \n",
 857 |     "    \n",
 858 |     "    # 1. lower case\n",
 859 |     "    text = [word.lower() for word in text]\n",
 860 |     "    \n",
 861 |     "    # 2. remove punctuations\n",
 862 |     "    punc_table = str.maketrans('','',string.punctuation)\n",
 863 |     "    text = [word.translate(punc_table) for word in text]\n",
 864 |     "    \n",
 865 |     "    # 3. remove stop words\n",
 866 |     "    text = [word for word in text if word not in stop_words]\n",
 867 |     "    \n",
 868 |     "    return text"
 869 |    ]
 870 |   },
 871 |   {
 872 |    "cell_type": "code",
 873 |    "execution_count": 19,
 874 |    "metadata": {},
 875 |    "outputs": [],
 876 |    "source": [
 877 |     "# calling my pre-process to tokenize \n",
 878 |     "tokenized_doc = []\n",
 879 |     "doc = df_job_descriptions['jd_combo']\n",
 880 |     "#doc = docs_sample\n",
 881 |     "for d in doc:\n",
 882 |     "    tokenized_doc.append(preprocess(d))\n",
 883 |     "#tokenized_doc"
 884 |    ]
 885 |   },
 886 |   {
 887 |    "cell_type": "code",
 888 |    "execution_count": 20,
 889 |    "metadata": {},
 890 |    "outputs": [],
 891 |    "source": [
 892 |     "# Convert tokenized document into gensim formated tagged data\n",
 893 |     "tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]"
 894 |    ]
 895 |   },
 896 |   {
 897 |    "cell_type": "code",
 898 |    "execution_count": 21,
 899 |    "metadata": {},
 900 |    "outputs": [
 901 |     {
 902 |      "data": {
 903 |       "text/plain": [
 904 |        "38941"
 905 |       ]
 906 |      },
 907 |      "execution_count": 21,
 908 |      "metadata": {},
 909 |      "output_type": "execute_result"
 910 |     }
 911 |    ],
 912 |    "source": [
 913 |     "num_doc = len(tagged_data)\n",
 914 |     "num_doc"
 915 |    ]
 916 |   },
 917 |   {
 918 |    "cell_type": "code",
 919 |    "execution_count": 24,
 920 |    "metadata": {},
 921 |    "outputs": [],
 922 |    "source": [
 923 |     "\n",
 924 |     "#settings to show epoch progress\n",
 925 |     "from gensim.test.utils import get_tmpfile\n",
 926 |     "from gensim.models.callbacks import CallbackAny2Vec\n",
 927 |     "\n",
 928 |     "class EpochSaver(CallbackAny2Vec):\n",
 929 |     "\n",
 930 |     "    def __init__(self, path_prefix):\n",
 931 |     "        self.path_prefix = path_prefix\n",
 932 |     "        self.epoch = 0\n",
 933 |     "\n",
 934 |     "    def on_epoch_end(self, model):\n",
 935 |     "        output_path = get_tmpfile('{}_epoch{}.model'.format(self.path_prefix, self.epoch))\n",
 936 |     "        model.save(output_path)\n",
 937 |     "        self.epoch += 1"
 938 |    ]
 939 |   },
 940 |   {
 941 |    "cell_type": "code",
 942 |    "execution_count": 25,
 943 |    "metadata": {},
 944 |    "outputs": [],
 945 |    "source": [
 946 |     "#settings to show epoch progress\n",
 947 |     "class EpochLogger(CallbackAny2Vec):\n",
 948 |     "    \n",
 949 |     "    def __init__(self):\n",
 950 |     "        self.epoch = 0\n",
 951 |     "        \n",
 952 |     "    def on_epoch_begin(self, model):\n",
 953 |     "        print(\"Epoch #{} start\".format(self.epoch))\n",
 954 |     "\n",
 955 |     "    def on_epoch_end(self, model):\n",
 956 |     "        print(\"Epoch #{} end\".format(self.epoch))\n",
 957 |     "        self.epoch += 1"
 958 |    ]
 959 |   },
 960 |   {
 961 |    "cell_type": "code",
 962 |    "execution_count": 27,
 963 |    "metadata": {
 964 |     "scrolled": true
 965 |    },
 966 |    "outputs": [
 967 |     {
 968 |      "name": "stdout",
 969 |      "output_type": "stream",
 970 |      "text": [
 971 |       "Epoch #0 start\n",
 972 |       "Epoch #0 end\n",
 973 |       "Epoch #1 start\n",
 974 |       "Epoch #1 end\n",
 975 |       "Epoch #2 start\n",
 976 |       "Epoch #2 end\n",
 977 |       "Epoch #3 start\n",
 978 |       "Epoch #3 end\n",
 979 |       "Epoch #4 start\n",
 980 |       "Epoch #4 end\n",
 981 |       "Epoch #5 start\n",
 982 |       "Epoch #5 end\n",
 983 |       "Epoch #6 start\n",
 984 |       "Epoch #6 end\n",
 985 |       "Epoch #7 start\n",
 986 |       "Epoch #7 end\n",
 987 |       "Epoch #8 start\n",
 988 |       "Epoch #8 end\n",
 989 |       "Epoch #9 start\n",
 990 |       "Epoch #9 end\n",
 991 |       "Epoch #10 start\n",
 992 |       "Epoch #10 end\n",
 993 |       "Epoch #11 start\n",
 994 |       "Epoch #11 end\n",
 995 |       "Epoch #12 start\n",
 996 |       "Epoch #12 end\n",
 997 |       "Epoch #13 start\n",
 998 |       "Epoch #13 end\n",
 999 |       "Epoch #14 start\n",
1000 |       "Epoch #14 end\n",
1001 |       "Epoch #15 start\n",
1002 |       "Epoch #15 end\n",
1003 |       "Epoch #16 start\n",
1004 |       "Epoch #16 end\n",
1005 |       "Epoch #17 start\n",
1006 |       "Epoch #17 end\n",
1007 |       "Epoch #18 start\n",
1008 |       "Epoch #18 end\n",
1009 |       "Epoch #19 start\n",
1010 |       "Epoch #19 end\n",
1011 |       "Epoch #20 start\n",
1012 |       "Epoch #20 end\n",
1013 |       "Epoch #21 start\n",
1014 |       "Epoch #21 end\n",
1015 |       "Epoch #22 start\n",
1016 |       "Epoch #22 end\n",
1017 |       "Epoch #23 start\n",
1018 |       "Epoch #23 end\n",
1019 |       "Epoch #24 start\n",
1020 |       "Epoch #24 end\n",
1021 |       "Epoch #25 start\n",
1022 |       "Epoch #25 end\n",
1023 |       "Epoch #26 start\n",
1024 |       "Epoch #26 end\n",
1025 |       "Epoch #27 start\n",
1026 |       "Epoch #27 end\n",
1027 |       "Epoch #28 start\n",
1028 |       "Epoch #28 end\n",
1029 |       "Epoch #29 start\n",
1030 |       "Epoch #29 end\n",
1031 |       "Epoch #30 start\n",
1032 |       "Epoch #30 end\n",
1033 |       "Epoch #31 start\n",
1034 |       "Epoch #31 end\n",
1035 |       "Epoch #32 start\n",
1036 |       "Epoch #32 end\n",
1037 |       "Epoch #33 start\n",
1038 |       "Epoch #33 end\n",
1039 |       "Epoch #34 start\n",
1040 |       "Epoch #34 end\n",
1041 |       "Epoch #35 start\n",
1042 |       "Epoch #35 end\n",
1043 |       "Epoch #36 start\n",
1044 |       "Epoch #36 end\n",
1045 |       "Epoch #37 start\n",
1046 |       "Epoch #37 end\n",
1047 |       "Epoch #38 start\n",
1048 |       "Epoch #38 end\n",
1049 |       "Epoch #39 start\n",
1050 |       "Epoch #39 end\n",
1051 |       "Epoch #40 start\n",
1052 |       "Epoch #40 end\n",
1053 |       "Epoch #41 start\n",
1054 |       "Epoch #41 end\n",
1055 |       "Epoch #42 start\n",
1056 |       "Epoch #42 end\n",
1057 |       "Epoch #43 start\n",
1058 |       "Epoch #43 end\n",
1059 |       "Epoch #44 start\n",
1060 |       "Epoch #44 end\n",
1061 |       "Epoch #45 start\n",
1062 |       "Epoch #45 end\n",
1063 |       "Epoch #46 start\n",
1064 |       "Epoch #46 end\n",
1065 |       "Epoch #47 start\n",
1066 |       "Epoch #47 end\n",
1067 |       "Epoch #48 start\n",
1068 |       "Epoch #48 end\n",
1069 |       "Epoch #49 start\n",
1070 |       "Epoch #49 end\n",
1071 |       "Epoch #50 start\n",
1072 |       "Epoch #50 end\n",
1073 |       "Epoch #51 start\n",
1074 |       "Epoch #51 end\n",
1075 |       "Epoch #52 start\n",
1076 |       "Epoch #52 end\n",
1077 |       "Epoch #53 start\n",
1078 |       "Epoch #53 end\n",
1079 |       "Epoch #54 start\n",
1080 |       "Epoch #54 end\n",
1081 |       "Epoch #55 start\n",
1082 |       "Epoch #55 end\n",
1083 |       "Epoch #56 start\n",
1084 |       "Epoch #56 end\n",
1085 |       "Epoch #57 start\n",
1086 |       "Epoch #57 end\n",
1087 |       "Epoch #58 start\n",
1088 |       "Epoch #58 end\n",
1089 |       "Epoch #59 start\n",
1090 |       "Epoch #59 end\n",
1091 |       "Epoch #60 start\n",
1092 |       "Epoch #60 end\n",
1093 |       "Epoch #61 start\n",
1094 |       "Epoch #61 end\n",
1095 |       "Epoch #62 start\n",
1096 |       "Epoch #62 end\n",
1097 |       "Epoch #63 start\n",
1098 |       "Epoch #63 end\n",
1099 |       "Epoch #64 start\n",
1100 |       "Epoch #64 end\n",
1101 |       "Epoch #65 start\n",
1102 |       "Epoch #65 end\n",
1103 |       "Epoch #66 start\n",
1104 |       "Epoch #66 end\n",
1105 |       "Epoch #67 start\n",
1106 |       "Epoch #67 end\n",
1107 |       "Epoch #68 start\n",
1108 |       "Epoch #68 end\n",
1109 |       "Epoch #69 start\n",
1110 |       "Epoch #69 end\n",
1111 |       "Epoch #70 start\n",
1112 |       "Epoch #70 end\n",
1113 |       "Epoch #71 start\n",
1114 |       "Epoch #71 end\n",
1115 |       "Epoch #72 start\n",
1116 |       "Epoch #72 end\n",
1117 |       "Epoch #73 start\n",
1118 |       "Epoch #73 end\n",
1119 |       "Epoch #74 start\n",
1120 |       "Epoch #74 end\n",
1121 |       "Epoch #75 start\n",
1122 |       "Epoch #75 end\n",
1123 |       "Epoch #76 start\n",
1124 |       "Epoch #76 end\n",
1125 |       "Epoch #77 start\n",
1126 |       "Epoch #77 end\n",
1127 |       "Epoch #78 start\n",
1128 |       "Epoch #78 end\n",
1129 |       "Epoch #79 start\n",
1130 |       "Epoch #79 end\n",
1131 |       "Epoch #80 start\n",
1132 |       "Epoch #80 end\n",
1133 |       "Epoch #81 start\n",
1134 |       "Epoch #81 end\n",
1135 |       "Epoch #82 start\n",
1136 |       "Epoch #82 end\n",
1137 |       "Epoch #83 start\n",
1138 |       "Epoch #83 end\n",
1139 |       "Epoch #84 start\n",
1140 |       "Epoch #84 end\n",
1141 |       "Epoch #85 start\n",
1142 |       "Epoch #85 end\n",
1143 |       "Epoch #86 start\n",
1144 |       "Epoch #86 end\n",
1145 |       "Epoch #87 start\n",
1146 |       "Epoch #87 end\n",
1147 |       "Epoch #88 start\n",
1148 |       "Epoch #88 end\n",
1149 |       "Epoch #89 start\n",
1150 |       "Epoch #89 end\n",
1151 |       "Epoch #90 start\n",
1152 |       "Epoch #90 end\n",
1153 |       "Epoch #91 start\n",
1154 |       "Epoch #91 end\n",
1155 |       "Epoch #92 start\n",
1156 |       "Epoch #92 end\n",
1157 |       "Epoch #93 start\n",
1158 |       "Epoch #93 end\n",
1159 |       "Epoch #94 start\n",
1160 |       "Epoch #94 end\n",
1161 |       "Epoch #95 start\n",
1162 |       "Epoch #95 end\n",
1163 |       "Epoch #96 start\n",
1164 |       "Epoch #96 end\n",
1165 |       "Epoch #97 start\n",
1166 |       "Epoch #97 end\n",
1167 |       "Epoch #98 start\n",
1168 |       "Epoch #98 end\n",
1169 |       "Epoch #99 start\n",
1170 |       "Epoch #99 end\n",
1171 |       "Epoch #100 start\n",
1172 |       "Epoch #100 end\n",
1173 |       "Epoch #101 start\n",
1174 |       "Epoch #101 end\n",
1175 |       "Epoch #102 start\n",
1176 |       "Epoch #102 end\n",
1177 |       "Epoch #103 start\n",
1178 |       "Epoch #103 end\n",
1179 |       "Epoch #104 start\n",
1180 |       "Epoch #104 end\n",
1181 |       "Epoch #105 start\n",
1182 |       "Epoch #105 end\n",
1183 |       "Epoch #106 start\n",
1184 |       "Epoch #106 end\n",
1185 |       "Epoch #107 start\n",
1186 |       "Epoch #107 end\n",
1187 |       "Epoch #108 start\n",
1188 |       "Epoch #108 end\n",
1189 |       "Epoch #109 start\n",
1190 |       "Epoch #109 end\n",
1191 |       "Epoch #110 start\n",
1192 |       "Epoch #110 end\n",
1193 |       "Epoch #111 start\n",
1194 |       "Epoch #111 end\n",
1195 |       "Epoch #112 start\n",
1196 |       "Epoch #112 end\n",
1197 |       "Epoch #113 start\n",
1198 |       "Epoch #113 end\n",
1199 |       "Epoch #114 start\n",
1200 |       "Epoch #114 end\n",
1201 |       "Epoch #115 start\n",
1202 |       "Epoch #115 end\n",
1203 |       "Epoch #116 start\n",
1204 |       "Epoch #116 end\n",
1205 |       "Epoch #117 start\n",
1206 |       "Epoch #117 end\n",
1207 |       "Epoch #118 start\n",
1208 |       "Epoch #118 end\n",
1209 |       "Epoch #119 start\n",
1210 |       "Epoch #119 end\n",
1211 |       "Epoch #120 start\n",
1212 |       "Epoch #120 end\n",
1213 |       "Epoch #121 start\n",
1214 |       "Epoch #121 end\n",
1215 |       "Epoch #122 start\n",
1216 |       "Epoch #122 end\n",
1217 |       "Epoch #123 start\n",
1218 |       "Epoch #123 end\n",
1219 |       "Epoch #124 start\n",
1220 |       "Epoch #124 end\n",
1221 |       "Epoch #125 start\n",
1222 |       "Epoch #125 end\n",
1223 |       "Epoch #126 start\n",
1224 |       "Epoch #126 end\n",
1225 |       "Epoch #127 start\n",
1226 |       "Epoch #127 end\n",
1227 |       "Epoch #128 start\n",
1228 |       "Epoch #128 end\n",
1229 |       "Epoch #129 start\n",
1230 |       "Epoch #129 end\n",
1231 |       "Epoch #130 start\n",
1232 |       "Epoch #130 end\n",
1233 |       "Epoch #131 start\n",
1234 |       "Epoch #131 end\n",
1235 |       "Epoch #132 start\n",
1236 |       "Epoch #132 end\n",
1237 |       "Epoch #133 start\n",
1238 |       "Epoch #133 end\n",
1239 |       "Epoch #134 start\n",
1240 |       "Epoch #134 end\n",
1241 |       "Epoch #135 start\n",
1242 |       "Epoch #135 end\n",
1243 |       "Epoch #136 start\n",
1244 |       "Epoch #136 end\n",
1245 |       "Epoch #137 start\n",
1246 |       "Epoch #137 end\n",
1247 |       "Epoch #138 start\n",
1248 |       "Epoch #138 end\n",
1249 |       "Epoch #139 start\n",
1250 |       "Epoch #139 end\n",
1251 |       "Epoch #140 start\n",
1252 |       "Epoch #140 end\n",
1253 |       "Epoch #141 start\n",
1254 |       "Epoch #141 end\n",
1255 |       "Epoch #142 start\n",
1256 |       "Epoch #142 end\n",
1257 |       "Epoch #143 start\n",
1258 |       "Epoch #143 end\n",
1259 |       "Epoch #144 start\n",
1260 |       "Epoch #144 end\n",
1261 |       "Epoch #145 start\n",
1262 |       "Epoch #145 end\n",
1263 |       "Epoch #146 start\n",
1264 |       "Epoch #146 end\n",
1265 |       "Epoch #147 start\n",
1266 |       "Epoch #147 end\n",
1267 |       "Epoch #148 start\n",
1268 |       "Epoch #148 end\n",
1269 |       "Epoch #149 start\n",
1270 |       "Epoch #149 end\n",
1271 |       "Epoch #150 start\n",
1272 |       "Epoch #150 end\n",
1273 |       "Epoch #151 start\n",
1274 |       "Epoch #151 end\n",
1275 |       "Epoch #152 start\n",
1276 |       "Epoch #152 end\n",
1277 |       "Epoch #153 start\n",
1278 |       "Epoch #153 end\n",
1279 |       "Epoch #154 start\n",
1280 |       "Epoch #154 end\n",
1281 |       "Epoch #155 start\n",
1282 |       "Epoch #155 end\n",
1283 |       "Epoch #156 start\n",
1284 |       "Epoch #156 end\n",
1285 |       "Epoch #157 start\n",
1286 |       "Epoch #157 end\n",
1287 |       "Epoch #158 start\n",
1288 |       "Epoch #158 end\n",
1289 |       "Epoch #159 start\n",
1290 |       "Epoch #159 end\n",
1291 |       "Epoch #160 start\n",
1292 |       "Epoch #160 end\n",
1293 |       "Epoch #161 start\n",
1294 |       "Epoch #161 end\n",
1295 |       "Epoch #162 start\n",
1296 |       "Epoch #162 end\n",
1297 |       "Epoch #163 start\n",
1298 |       "Epoch #163 end\n",
1299 |       "Epoch #164 start\n",
1300 |       "Epoch #164 end\n",
1301 |       "Epoch #165 start\n",
1302 |       "Epoch #165 end\n",
1303 |       "Epoch #166 start\n",
1304 |       "Epoch #166 end\n",
1305 |       "Epoch #167 start\n",
1306 |       "Epoch #167 end\n",
1307 |       "Epoch #168 start\n",
1308 |       "Epoch #168 end\n",
1309 |       "Epoch #169 start\n",
1310 |       "Epoch #169 end\n",
1311 |       "Epoch #170 start\n",
1312 |       "Epoch #170 end\n",
1313 |       "Epoch #171 start\n",
1314 |       "Epoch #171 end\n",
1315 |       "Epoch #172 start\n",
1316 |       "Epoch #172 end\n",
1317 |       "Epoch #173 start\n",
1318 |       "Epoch #173 end\n",
1319 |       "Epoch #174 start\n",
1320 |       "Epoch #174 end\n",
1321 |       "Epoch #175 start\n",
1322 |       "Epoch #175 end\n",
1323 |       "Epoch #176 start\n",
1324 |       "Epoch #176 end\n",
1325 |       "Epoch #177 start\n",
1326 |       "Epoch #177 end\n",
1327 |       "Epoch #178 start\n",
1328 |       "Epoch #178 end\n",
1329 |       "Epoch #179 start\n",
1330 |       "Epoch #179 end\n",
1331 |       "Epoch #180 start\n",
1332 |       "Epoch #180 end\n",
1333 |       "Epoch #181 start\n",
1334 |       "Epoch #181 end\n",
1335 |       "Epoch #182 start\n",
1336 |       "Epoch #182 end\n",
1337 |       "Epoch #183 start\n",
1338 |       "Epoch #183 end\n",
1339 |       "Epoch #184 start\n",
1340 |       "Epoch #184 end\n",
1341 |       "Epoch #185 start\n",
1342 |       "Epoch #185 end\n",
1343 |       "Epoch #186 start\n",
1344 |       "Epoch #186 end\n",
1345 |       "Epoch #187 start\n",
1346 |       "Epoch #187 end\n",
1347 |       "Epoch #188 start\n",
1348 |       "Epoch #188 end\n",
1349 |       "Epoch #189 start\n",
1350 |       "Epoch #189 end\n",
1351 |       "Epoch #190 start\n",
1352 |       "Epoch #190 end\n",
1353 |       "Epoch #191 start\n",
1354 |       "Epoch #191 end\n",
1355 |       "Epoch #192 start\n",
1356 |       "Epoch #192 end\n",
1357 |       "Epoch #193 start\n",
1358 |       "Epoch #193 end\n",
1359 |       "Epoch #194 start\n",
1360 |       "Epoch #194 end\n",
1361 |       "Epoch #195 start\n",
1362 |       "Epoch #195 end\n",
1363 |       "Epoch #196 start\n",
1364 |       "Epoch #196 end\n",
1365 |       "Epoch #197 start\n",
1366 |       "Epoch #197 end\n",
1367 |       "Epoch #198 start\n",
1368 |       "Epoch #198 end\n",
1369 |       "Epoch #199 start\n",
1370 |       "Epoch #199 end\n"
1371 |      ]
1372 |     }
1373 |    ],
1374 |    "source": [
1375 |     "#train model - final******** with 200 epochs\n",
1376 |     "epoch_logger = EpochLogger()\n",
1377 |     "## Train doc2vec model\n",
1378 |     "model1 = Doc2Vec(tagged_data, vector_size=20, window=2, min_count=1, workers=4, epochs = 200, callbacks=[epoch_logger])\n"
1379 |    ]
1380 |   },
1381 |   {
1382 |    "cell_type": "code",
1383 |    "execution_count": 28,
1384 |    "metadata": {},
1385 |    "outputs": [],
1386 |    "source": [
1387 |     "# Save trained doc2vec model\n",
1388 |     "model1.save(\"Model/my_doc2vec_v2.model\")"
1389 |    ]
1390 |   },
1391 |   {
1392 |    "cell_type": "code",
1393 |    "execution_count": 30,
1394 |    "metadata": {},
1395 |    "outputs": [],
1396 |    "source": [
1397 |     "## Load saved doc2vec model\n",
1398 |     "model1= Doc2Vec.load(\"Model/my_doc2vec_v2.model\")"
1399 |    ]
1400 |   },
1401 |   {
1402 |    "cell_type": "code",
1403 |    "execution_count": 31,
1404 |    "metadata": {},
1405 |    "outputs": [
1406 |     {
1407 |      "data": {
1408 |       "text/plain": [
1409 |        "38941"
1410 |       ]
1411 |      },
1412 |      "execution_count": 31,
1413 |      "metadata": {},
1414 |      "output_type": "execute_result"
1415 |     }
1416 |    ],
1417 |    "source": [
1418 |     "#confirm length (should be 38941)\n",
1419 |     "len(tokenized_doc)"
1420 |    ]
1421 |   },
1422 |   {
1423 |    "cell_type": "code",
1424 |    "execution_count": 35,
1425 |    "metadata": {},
1426 |    "outputs": [],
1427 |    "source": [
1428 |     "## Get vector value\n",
1429 |     "vec = np.empty([38941,20])\n",
1430 |     "\n",
1431 |     "for k,i in enumerate(tokenized_doc):\n",
1432 |     "    \n",
1433 |     "    #print(i)\n",
1434 |     "    vector = model1.infer_vector(i)\n",
1435 |     "    vec[k] = vector\n",
1436 |     "    #vec = np.append(vector)\n",
1437 |     "    #vecf = np.append(vec,vector)\n",
1438 |     "\n",
1439 |     "# reshape into 2D\n",
1440 |     "new_arr = np.reshape(vec,(-1,20))"
1441 |    ]
1442 |   },
1443 |   {
1444 |    "cell_type": "code",
1445 |    "execution_count": 36,
1446 |    "metadata": {},
1447 |    "outputs": [],
1448 |    "source": [
1449 |     "rng = range(1, 21)\n",
1450 |     "vec_df = pd.DataFrame(new_arr, columns=['vec_' + str(i) for i in rng])"
1451 |    ]
1452 |   },
1453 |   {
1454 |    "cell_type": "code",
1455 |    "execution_count": 37,
1456 |    "metadata": {},
1457 |    "outputs": [
1458 |     {
1459 |      "name": "stdout",
1460 |      "output_type": "stream",
1461 |      "text": [
1462 |       "<class 'pandas.core.frame.DataFrame'>\n",
1463 |       "RangeIndex: 38941 entries, 0 to 38940\n",
1464 |       "Data columns (total 20 columns):\n",
1465 |       " #   Column  Non-Null Count  Dtype  \n",
1466 |       "---  ------  --------------  -----  \n",
1467 |       " 0   vec_1   38941 non-null  float64\n",
1468 |       " 1   vec_2   38941 non-null  float64\n",
1469 |       " 2   vec_3   38941 non-null  float64\n",
1470 |       " 3   vec_4   38941 non-null  float64\n",
1471 |       " 4   vec_5   38941 non-null  float64\n",
1472 |       " 5   vec_6   38941 non-null  float64\n",
1473 |       " 6   vec_7   38941 non-null  float64\n",
1474 |       " 7   vec_8   38941 non-null  float64\n",
1475 |       " 8   vec_9   38941 non-null  float64\n",
1476 |       " 9   vec_10  38941 non-null  float64\n",
1477 |       " 10  vec_11  38941 non-null  float64\n",
1478 |       " 11  vec_12  38941 non-null  float64\n",
1479 |       " 12  vec_13  38941 non-null  float64\n",
1480 |       " 13  vec_14  38941 non-null  float64\n",
1481 |       " 14  vec_15  38941 non-null  float64\n",
1482 |       " 15  vec_16  38941 non-null  float64\n",
1483 |       " 16  vec_17  38941 non-null  float64\n",
1484 |       " 17  vec_18  38941 non-null  float64\n",
1485 |       " 18  vec_19  38941 non-null  float64\n",
1486 |       " 19  vec_20  38941 non-null  float64\n",
1487 |       "dtypes: float64(20)\n",
1488 |       "memory usage: 5.9 MB\n"
1489 |      ]
1490 |     }
1491 |    ],
1492 |    "source": [
1493 |     "vec_df.info()"
1494 |    ]
1495 |   },
1496 |   {
1497 |    "cell_type": "code",
1498 |    "execution_count": 38,
1499 |    "metadata": {},
1500 |    "outputs": [],
1501 |    "source": [
1502 |     "con_job_1 = pd.concat([job, vec_df], axis=1)"
1503 |    ]
1504 |   },
1505 |   {
1506 |    "cell_type": "code",
1507 |    "execution_count": 2,
1508 |    "metadata": {},
1509 |    "outputs": [],
1510 |    "source": [
1511 |     "#saving final csv with additional vectors to match with resume. \n",
1512 |     "con_job_1.to_csv('wip/con_job_1.csv', index=False)"
1513 |    ]
1514 |   },
1515 |   {
1516 |    "cell_type": "code",
1517 |    "execution_count": null,
1518 |    "metadata": {},
1519 |    "outputs": [],
1520 |    "source": []
1521 |   }
1522 |  ],
1523 |  "metadata": {
1524 |   "kernelspec": {
1525 |    "display_name": "Python 3",
1526 |    "language": "python",
1527 |    "name": "python3"
1528 |   },
1529 |   "language_info": {
1530 |    "codemirror_mode": {
1531 |     "name": "ipython",
1532 |     "version": 3
1533 |    },
1534 |    "file_extension": ".py",
1535 |    "mimetype": "text/x-python",
1536 |    "name": "python",
1537 |    "nbconvert_exporter": "python",
1538 |    "pygments_lexer": "ipython3",
1539 |    "version": "3.7.6"
1540 |   }
1541 |  },
1542 |  "nbformat": 4,
1543 |  "nbformat_minor": 4
1544 | }
1545 | 


--------------------------------------------------------------------------------