├── .gitignore
├── get-auto-complete.py
├── readme.md
└── webapp
└── index.html
/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything
2 | *
3 |
4 | # But not these files...
5 | !.gitignore
6 | !get-auto-complete.py
7 | !readme.md
8 | !webapp
9 | !webapp/index.html
10 |
11 |
12 |
13 | # !work-from-home_07-09-2020.txt
14 | # !Startup-developers_07-09-2020.txt
15 | # !twitter-marketing_07-09-2020.txt
16 | # etc...
--------------------------------------------------------------------------------
/get-auto-complete.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import time
3 | import requests
4 | import urllib.parse
5 | from datetime import datetime
6 | from urllib.parse import urlencode
7 | import xml.etree.ElementTree as ET
8 | from collections import OrderedDict
9 |
10 |
11 | keyword_list = []
12 |
13 | keyword_found = 0
14 |
15 | keyword_searched = 0
16 |
17 | base_url = 'http://suggestqueries.google.com/complete/search'
18 |
19 | # Updates
20 | # 1. folder name for keyword
21 | # 2. child folder for date
22 | # 3. file for all keywords
23 | # 4. CSV file for common keywords
24 | # 5. eliminate keywords with "tools" "hubspot" "jobs"
25 | # 6. sort by search volume and keyword density
26 |
27 |
28 | def create_doc_file(search, string, LONGSEARCH):
29 | print("\n-------LONG TAIL KEYWORD FINDER-------\n")
30 | print("Search Engine \t: ", search)
31 | print("Keyword \t: ", string)
32 | print("Long search \t: ", LONGSEARCH)
33 | print('\nCreating File...', end="")
34 | sys.stdout.flush()
35 |
36 | date_today = datetime.now().strftime("%d-%m-%Y")
37 |
38 | sub_folder = search
39 | if LONGSEARCH: string = string + ' [LONG]'
40 |
41 | file_name = (sub_folder + '/' + string + "_" + date_today).replace(' ', '-')+".txt"
42 | file = open(file_name, 'w+')
43 |
44 | print(' Created {}'.format(file_name))
45 | file.close()
46 | return file_name
47 | # file.write(keyword_list)
48 |
49 | def write(keywords, file_name):
50 | # create a file and write
51 | # print(file_name)
52 | with open(file_name,"a+") as f:
53 | for keyword in keywords:
54 | f.write(keyword+"\n")
55 |
56 |
57 | def string_search(file_name, search, string):
58 | # count how many keywords searched
59 | global keyword_searched
60 | keyword_searched = keyword_searched + 1
61 |
62 | keywords = get_keywords(search, string)
63 | if keywords is not None:
64 | write(keywords, file_name)
65 | return len(keywords)
66 | else:
67 | return 0
68 |
69 | def get_keywords(search_engine, string):
70 | # print("parsing {} now".format(string))
71 |
72 |
73 | # time.sleep(1)
74 |
75 | # OLD CODE
76 | # encord the string
77 | # encoded_string = urllib.parse.quote(string)
78 | # encoded_url = base_url + encoded_string
79 |
80 | # NEW CODE
81 | base_url = 'http://suggestqueries.google.com/complete/search'
82 |
83 | search_dictionary = {
84 | 'output':'toolbar',
85 | 'gl': 'in',
86 | 'hl': 'en',
87 | 'q' : string,
88 | }
89 |
90 | youtube_parameter = '&ds=yt'
91 | JSON_param = '&client=firefox'
92 | JSON_detail_param = '&client=chrome'
93 |
94 |
95 | if search_engine is 'youtube':
96 | search_dictionary['ds'] = 'yt'
97 |
98 | # search_dictionary['client'] = 'chrome'
99 |
100 | encoded_url = base_url +'?'+ urlencode(search_dictionary)
101 |
102 | # print(encoded_url)
103 |
104 | # send a request
105 | r = requests.get(encoded_url)
106 |
107 | # read the request, and get all suggestions
108 |
109 | try:
110 | suggestions = ET.fromstring(r.content)
111 | except Exception as e:
112 | return None
113 |
114 | # try:
115 | # suggestions = ET.fromstring(r.content)
116 | # except xml.etree.ElementTree.ParseError:
117 | # pass
118 |
119 | # record all suggestions
120 | keywords=[]
121 | this_keyword = 0
122 | for suggestion_ in suggestions.iter('suggestion'):
123 | # count how many keywords found
124 | global keyword_found
125 | keyword_found = keyword_found + 1
126 | keyword = suggestion_.attrib.get('data')
127 | keywords.append(keyword)
128 | this_keyword = this_keyword + 1
129 |
130 | # print(type(suggestion_.attrib.value()))
131 | return keywords
132 |
133 | def main(search_engine, string, file_name, LONGSEARCH):
134 | # initial string
135 | print("Finding longtail {} keywords for '{}'... ".format(search_engine, string), end="")
136 | sys.stdout.flush()
137 | found_core_keyword = string_search(file_name, search_engine, string)
138 | print("Found {}".format(found_core_keyword))
139 |
140 | for i in range(ord('A'), ord('Z') + 1):
141 | print("Finding longtail {} keywords for '{}' with {}... ".format(search_engine, string, chr(i)), end = "")
142 | sys.stdout.flush()
143 | # prefixed alphabet
144 | found_core_keyword_in_loop = 0
145 | found_core_keyword_in_loop = found_core_keyword_in_loop + string_search(file_name, search_engine, chr(i)+" "+string)
146 | if LONGSEARCH: found_core_keyword_in_loop = found_core_keyword_in_loop + string_search(file_name, search_engine, chr(i)+chr(i)+" "+string)
147 |
148 | # postfixed alphabet
149 | found_core_keyword_in_loop = found_core_keyword_in_loop + string_search(file_name, search_engine, string+" "+chr(i))
150 | if LONGSEARCH: found_core_keyword_in_loop = found_core_keyword_in_loop + string_search(file_name, search_engine, string+" "+chr(i)+chr(i))
151 |
152 | print("Found {}".format(found_core_keyword_in_loop))
153 |
154 | print("---REPORT---")
155 | print('Found {} keywords by searching {} keywords on {}\n'.format(keyword_found, keyword_searched, search_engine))
156 |
157 |
158 | def common_keywords(file_name, string):
159 |
160 | # list of new keywords
161 | with open(file_name,"r") as f:
162 | keywords = f.readlines()
163 | keywords = [x.strip() for x in keywords]
164 |
165 |
166 | # original keyword, string. Split by words
167 | string_words = string.lower().split()
168 | # stop_words = ['on','of','the','if','it','a','an']
169 |
170 | print("\n-------Word Repeating Frequently-------\n")
171 |
172 |
173 |
174 | print('cleaning keywords... ', end = "")
175 | sys.stdout.flush()
176 |
177 | sublist = []
178 | # This nested for loop is very complex. it turns keyword from ['this is a keyword'] to ['this','is','a','keyword'].
179 | # and then it gives is [['this','is','a','keyword'],['those','are','also','keywords']]
180 | # and then if each of those sub keywords have any words from base keywords it excludes them from the new keyword list
181 | cleaned_keywords = [item for sublist in [keyword.split() for keyword in keywords] for item in sublist if item not in string_words]
182 |
183 |
184 | repeat_words = {i:cleaned_keywords.count(i) for i in cleaned_keywords}
185 | repeat_words = OrderedDict(sorted(repeat_words.viewitems(), key=lambda x: len(x[1])))
186 |
187 | print('DONE')
188 |
189 | for key, value in sorted (key_value.values()):
190 | print('{}\t{}'.format(value, key))
191 |
192 | print("\n--------------------------------------\n")
193 | F202 16-4-293 SSKPlaza Chanchalguda
194 | if __name__ == '__main__':
195 | string = "How to win accounts"
196 | search_engine = "google"
197 | LONGSEARCH = True
198 | # file_name = create_doc_file(search_engine, string, LONGSEARCH)
199 | # main(search_engine, string, file_name, LONGSEARCH)
200 |
201 | file_name = 'google/how-to-win-accounts_13-10-2020.txt'
202 | common_keywords(file_name, string)
203 |
204 |
205 | # print("Creating folders... ", end="")
206 | # sys.stdout.flush()
207 | # time.sleep(2) # fake process
208 | # print("DONE")
209 |
210 | # time.sleep(1)
211 |
212 | # print("Creating files... ", end="")
213 | # sys.stdout.flush()
214 | # time.sleep(2) # fake process
215 | # print("DONE")
216 | # Add progress bar using: https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Keyword Auto Complete
2 |
3 | ## What It Does
4 |
5 | Gives you a list of suggested keywords
6 |
7 | ## Get Your List of Keywords
8 |
9 | If you want a similar list for your Keyword, you can check out [this google form](https://forms.gle/AwgnbB4FQNL3933j9), and I will email the list to you.
10 |
11 |
12 | There are three sample keywords which you find below.
13 |
14 | ## Sample Keywords
15 |
16 | [Startup Developer](/Startup-developers_07-09-2020.txt) (68 Suggestions)
17 |
18 | [Twitter marketing](/twitter-marketing_07-09-2020.txt) (222 Suggestions)
19 |
20 | [Work from home](/work-from-home_07-09-2020.txt) (476 Suggestions)
21 |
22 | _Updated: 7th september 2020_
23 |
24 |
25 | ## Future Version
26 |
27 | 1. Downloadable CSV file
28 | 2. sort keywords by search volume
29 | 3. sort keywords by keyword density
30 |
--------------------------------------------------------------------------------
/webapp/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | Keyword Auto generator
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
117 |
118 |
119 |
126 |
127 |
128 |
129 |
130 |
131 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 | rel |
180 | Keyword |
181 | |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
341 |
342 |
343 |
344 |
--------------------------------------------------------------------------------