├── .gitignore
├── README.md
├── gsc_api
    ├── __init__.py
    ├── auth.py
    ├── indexing.py
    └── search_analytics.py
├── logs
    └── .gitignore
├── main.py
└── results
    └── .gitignore


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | *.txt
  7 | *.csv
  8 | *.json
  9 | *.log
 10 | 
 11 | draft/*
 12 | results/*
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | pip-wheel-metadata/
 32 | share/python-wheels/
 33 | *.egg-info/
 34 | .installed.cfg
 35 | *.egg
 36 | MANIFEST
 37 | 
 38 | # PyInstaller
 39 | #  Usually these files are written by a python script from a template
 40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 41 | *.manifest
 42 | *.spec
 43 | 
 44 | # Installer logs
 45 | pip-log.txt
 46 | pip-delete-this-directory.txt
 47 | 
 48 | # Unit test / coverage reports
 49 | htmlcov/
 50 | .tox/
 51 | .nox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *.cover
 58 | *.py,cover
 59 | .hypothesis/
 60 | .pytest_cache/
 61 | 
 62 | # Translations
 63 | *.mo
 64 | *.pot
 65 | 
 66 | # Django stuff:
 67 | *.log
 68 | local_settings.py
 69 | db.sqlite3
 70 | db.sqlite3-journal
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | 
 82 | # PyBuilder
 83 | target/
 84 | 
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # IPython
 89 | profile_default/
 90 | ipython_config.py
 91 | 
 92 | # pyenv
 93 | .python-version
 94 | 
 95 | # pipenv
 96 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 97 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 98 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 99 | #   install all needed dependencies.
100 | #Pipfile.lock
101 | 
102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
103 | __pypackages__/
104 | 
105 | # Celery stuff
106 | celerybeat-schedule
107 | celerybeat.pid
108 | 
109 | # SageMath parsed files
110 | *.sage.py
111 | 
112 | # Environments
113 | .env
114 | .venv
115 | env/
116 | venv/
117 | ENV/
118 | env.bak/
119 | venv.bak/
120 | 
121 | # Spyder project settings
122 | .spyderproject
123 | .spyproject
124 | 
125 | # Rope project settings
126 | .ropeproject
127 | 
128 | # mkdocs documentation
129 | /site
130 | 
131 | # mypy
132 | .mypy_cache/
133 | .dmypy.json
134 | dmypy.json
135 | 
136 | # Pyre type checker
137 | .pyre/
138 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Google Search Console API by [DRKWNG](https://drkwng.rocks)
 2 | 
 3 | - **Search Analytics API** - get all (ALL!) keywords from Google Search Console.
 4 | - **URL Inspection API** - check URLs indexation status and other params for Google.
 5 | - **Indexing API** - send up to 200 URLs to Googlebot with URL_UPDATED or URL_DELETED param.
 6 | 
 7 | ## Activate API and create credentials
 8 | ### OAuth client ID (Search Analytics and URL Inspection)
 9 | 1. Go to [https://console.cloud.google.com/](https://console.cloud.google.com/) -> APIs & Services -> Credentials. If it is your first project in Cloud Console create one.
10 | 2. Click "Create Credentials" -> "OAuth client ID" (**Application type = Desktop app**) and click "Create".
11 | 3. Download JSON and put it in the same folder with the program.
12 | 4. Go to APIs & Services -> Library and activate "Google Search Console API" and "Indexing API".
13 | 5. Start the program and follow instructions in the console.
14 | 
15 | ## Run Program
16 | 1. Made on [Python 3.8.x](https://www.python.org/downloads/) (recommended) 
17 | Tick the "Add to PATH" option during the installation process. 
18 | 
19 | 2. **Install packages:**
20 | 
21 | `pip install --upgrade google-api-python-client, google-auth-oauthlib, google-auth`
22 | 
23 | _Type in this command into your terminal._
24 | 
25 | 3. Start Terminal in the program folder and type in:  
26 | `python main.py` or `python3 main.py`
27 |  
28 | 4. Enjoy😻
29 | 
30 | 
31 | I appreciate your bug reports and suggestions🖖
32 | 


--------------------------------------------------------------------------------
/gsc_api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drkwng/google-search-console-api/1234c54088746482354c77b3e99e57740a93e161/gsc_api/__init__.py


--------------------------------------------------------------------------------
/gsc_api/auth.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from google_auth_oauthlib.flow import InstalledAppFlow
 4 | from googleapiclient.discovery import build
 5 | from googleapiclient.errors import HttpError
 6 | 
 7 | from google.oauth2.service_account import Credentials
 8 | 
 9 | 
10 | class GoogleOAuth:
11 |     def __init__(self, file_path, scopes, service_name, version):
12 |         """
13 |         Google OAuth for Search Analytics keywords report and URL Inspect
14 |         :param file_path: oauth json file
15 |         :type file_path: str
16 |         :param scopes: e.g. ['https://www.googleapis.com/auth/webmasters']
17 |         :type scopes: list
18 |         :param service_name: e.g. 'searchconsole'
19 |         :type service_name: str
20 |         :param version: e.g. 'v1'
21 |         :type version: str
22 |         """
23 |         self.OAUTH_FILE_PATH = file_path
24 |         self.SCOPES = scopes
25 |         self.SERVICE_NAME = service_name
26 |         self.VERSION = version
27 | 
28 |     def auth(self):
29 |         flow = InstalledAppFlow.from_client_secrets_file(self.OAUTH_FILE_PATH, self.SCOPES)
30 |         credentials = flow.run_local_server(port=0)
31 | 
32 |         try:
33 |             service = build(self.SERVICE_NAME, self.VERSION,
34 |                             credentials=credentials, cache_discovery=False)
35 |             return service
36 | 
37 |         except HttpError as err:
38 |             logging.error(err)
39 | 
40 | 
41 | class GoogleServiceAccount:
42 |     def __init__(self, file_path, scopes, service_name, version):
43 |         """
44 |         Google Service Account Auth
45 |         :param file_path: service account json file
46 |         :type file_path: str
47 |         :param scopes: e.g. ["https://www.googleapis.com/auth/indexing"]
48 |         :type scopes: list
49 |         :param service_name: e.g. 'indexing'
50 |         :type service_name: str
51 |         :param version: e.g. 'v3'
52 |         :type version: str
53 |         """
54 |         self.SERVICE_ACCOUNT_FILE = file_path
55 |         self.SCOPES = scopes
56 |         self.SERVICE_NAME = service_name
57 |         self.VERSION = version
58 | 
59 |     def auth(self):
60 |         credentials = Credentials.from_service_account_file(
61 |             self.SERVICE_ACCOUNT_FILE, scopes=self.SCOPES
62 |         )
63 |         try:
64 |             client = build(self.SERVICE_NAME, self.VERSION,
65 |                            credentials=credentials, cache_discovery=False)
66 |             return client
67 | 
68 |         except HttpError as err:
69 |             logging.error(err)
70 | 
71 | 


--------------------------------------------------------------------------------
/gsc_api/indexing.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from .auth import GoogleOAuth
 3 | 
 4 | 
 5 | class CheckIndexation(GoogleOAuth):
 6 |     def __init__(self, _client_secret_file):
 7 |         """
 8 |         Check URL Indexation Status (and many other parameters)
 9 |         :param _client_secret_file: OAuth json file path
10 |         :type _client_secret_file: str
11 |         """
12 |         self.SCOPES = ['https://www.googleapis.com/auth/webmasters']
13 |         self.CLIENT_SECRET = _client_secret_file
14 |         self.SERVICE_NAME = 'searchconsole'
15 |         self.VERSION = 'v1'
16 | 
17 |         super().__init__(self.CLIENT_SECRET, self.SCOPES, self.SERVICE_NAME, self.VERSION)
18 |         self.service = self.auth()
19 | 
20 |     def exec_request(self, _url, _domain):
21 |         try:
22 |             request = {
23 |                 'inspectionUrl': _url,
24 |                 'siteUrl': _domain
25 |             }
26 |             return self.service.urlInspection().index().inspect(body=request).execute()
27 | 
28 |         except Exception as err:
29 |             logging.error(err)
30 |             return None
31 | 
32 |     def worker(self, _data):
33 |         response = {}
34 |         for key, value in _data.items():
35 |             for val in value:
36 |                 response[val] = self.exec_request(val, key)
37 |                 logging.info(f'Check URL {val} response: {response[val]}')
38 |         return response
39 | 
40 | 
41 | class Indexation(GoogleOAuth):
42 |     def __init__(self, _service_account_file):
43 |         """
44 |         Send URLs to Googlebot via Indexing API
45 |         :param _service_account_file: OAuth json file path
46 |         :type _service_account_file: str
47 |         """
48 |         self.CLIENT_SECRET = _service_account_file
49 |         self.SCOPES = ["https://www.googleapis.com/auth/indexing"]
50 |         self.SERVICE_NAME = 'indexing'
51 |         self.VERSION = 'v3'
52 | 
53 |         super().__init__(self.CLIENT_SECRET, self.SCOPES, self.SERVICE_NAME, self.VERSION)
54 |         self.service = self.auth()
55 | 
56 |     @staticmethod
57 |     def callback_callable(request_id, response, exception):
58 |         if exception is not None:
59 |             logging.info(f'{exception}')
60 |         else:
61 |             data = (
62 |                 response['urlNotificationMetadata']['latestUpdate']['notifyTime'],
63 |                 response['urlNotificationMetadata']['url'],
64 |                 response['urlNotificationMetadata']['latestUpdate']['type'],
65 |             )
66 |             logging.info(data)
67 | 
68 |     def worker(self, _urls, _method):
69 |         batch = self.service.new_batch_http_request(callback=self.callback_callable)
70 |         for url in _urls:
71 |             batch.add(self.service.urlNotifications().publish(
72 |                 body={"url": url, "type": _method}
73 |             ))
74 |         return batch.execute()
75 | 
76 | 


--------------------------------------------------------------------------------
/gsc_api/search_analytics.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .auth import GoogleOAuth
 4 | 
 5 | 
 6 | class GetData(GoogleOAuth):
 7 |     def __init__(self, _client_secret_file, _domain):
 8 |         """
 9 |         Get all keywords from Google Search Console (last 16 months)
10 |         :param _client_secret_file: OAuth json file path
11 |         :type _client_secret_file: str
12 |         :param _domain: Google Search Console resource name
13 |         :type _domain: str
14 |         """
15 |         self.SCOPES = ['https://www.googleapis.com/auth/webmasters']
16 |         self.CLIENT_SECRET = _client_secret_file
17 |         self.SERVICE_NAME = 'searchconsole'
18 |         self.VERSION = 'v1'
19 | 
20 |         self.domain = _domain
21 | 
22 |         super().__init__(self.CLIENT_SECRET, self.SCOPES, self.SERVICE_NAME, self.VERSION)
23 |         self.service = self.auth()
24 | 
25 |     def execute_request(self, start_row, start, end, dimensions, r_type, aggregate_by):
26 |         """
27 |         Executes a searchAnalytics.query request.
28 |         """
29 |         params = {
30 |             'type': r_type,
31 |             'startDate': start,
32 |             'endDate': end,
33 |             'dimensions': dimensions,
34 |             'aggregationType': aggregate_by,
35 |             'dataState': 'all',
36 |             'rowLimit': 25000,
37 |             'startRow': start_row
38 |         }
39 |         request = self.service.searchanalytics().query(siteUrl=self.domain, body=params).execute()
40 |         return request
41 | 
42 |     def worker(self, start, end, dimensions, r_type='web', aggregate_by='auto'):
43 |         start_row = 0
44 |         result = []
45 |         while True:
46 |             try:
47 |                 response = self.execute_request(
48 |                     start_row, start, end,
49 |                     dimensions, r_type, aggregate_by
50 |                 )
51 |                 result.append(response)
52 |                 if len(response['rows']) == 25000:
53 |                     start_row += 25000
54 |                 else:
55 |                     logging.info(f'We got {start_row + len(response["rows"])} keywords in total.')
56 |                     break
57 | 
58 |             except Exception as err:
59 |                 logging.error(err)
60 | 
61 |         return result
62 | 


--------------------------------------------------------------------------------
/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | */
3 | !.gitignore


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | ################################################
  2 | # Google Search Console & Indexing API Tools
  3 | # by @drkwng (https://drkwng.rocks)
  4 | ################################################
  5 | 
  6 | import os
  7 | import re
  8 | import csv
  9 | import logging
 10 | 
 11 | from datetime import date, timedelta
 12 | 
 13 | from gsc_api import indexing, search_analytics
 14 | 
 15 | 
 16 | def choose_tool():
 17 |     print('Choose tool (enter number) \n'
 18 |           '1 - Search Analytics (get Performance data from Google Search Console\n'
 19 |           '2 - Check URLs indexation and other params\n'
 20 |           '3 - Send URLs to Googlebot')
 21 |     while True:
 22 |         mode = input('').strip()
 23 |         if int(mode) not in [1, 2, 3]:
 24 |             print('Please enter the correct number (1, 2 or 3)\n')
 25 |         else:
 26 |             break
 27 |     return mode
 28 | 
 29 | 
 30 | def search_api_key(mask):
 31 |     r = re.compile(f"{mask}.*")
 32 |     files = os.listdir()
 33 |     key = list(filter(r.match, files))
 34 |     if len(key) > 0:
 35 |         key = key[0]
 36 |     else:
 37 |         while True:
 38 |             key = input(f'{mask} json key file not found. Please enter your json key filename below:\n')
 39 |             if key in files:
 40 |                 break
 41 |     return key
 42 | 
 43 | 
 44 | def normalize_resource_name():
 45 |     name = input('Enter the Google Search Console resource name\n').strip()
 46 |     if 'http' not in name:
 47 |         name = f'sc-domain:{name}'
 48 |     elif not name.endswith('/'):
 49 |         name += '/'
 50 |     return name
 51 | 
 52 | 
 53 | def get_file():
 54 |     while True:
 55 |         file = input('Enter the txt filename:\n').strip()
 56 |         if file in os.listdir():
 57 |             break
 58 |         else:
 59 |             print('Please enter a correct filename')
 60 |     return file
 61 | 
 62 | 
 63 | def keywords_to_csv(file, mode, dimensions, result):
 64 |     res_folder = '\\results\\'
 65 |     path = os.getcwd() + res_folder + file
 66 |     with open(path, mode, encoding='utf-8', newline='') as f:
 67 |         writer = csv.writer(f, delimiter=';')
 68 |         if mode == 'w':
 69 |             heading = dimensions
 70 |             heading += ['clicks', 'impressions', 'ctr', 'position']
 71 |             writer.writerow(heading)
 72 | 
 73 |         for elem in result['rows']:
 74 |             row = [key for key in elem['keys']]
 75 |             row += [elem['clicks'], elem['ctr'], elem['impressions'], elem['position']]
 76 |             writer.writerow(row)
 77 | 
 78 | 
 79 | def check_index_to_csv(file, result):
 80 |     res_folder = '\\results\\'
 81 |     path = os.getcwd() + res_folder + file
 82 |     with open(path, 'w', encoding='utf-8', newline='') as f:
 83 |         writer = csv.writer(f, delimiter=';')
 84 |         heading = ['url', 'coverageState', 'robotsTxtState', 'indexingState', 'lastCrawlTime',
 85 |                    'googleCanonical', 'userCanonical', 'mobileUsabilityResult']
 86 |         writer.writerow(heading)
 87 | 
 88 |         for key, value in result.items():
 89 |             row = [key]
 90 |             if value is not None:
 91 |                 row += [
 92 |                     value['inspectionResult']['indexStatusResult']['coverageState'],
 93 |                     value['inspectionResult']['indexStatusResult']['robotsTxtState'],
 94 |                     value['inspectionResult']['indexStatusResult']['indexingState'],
 95 |                     value['inspectionResult']['indexStatusResult']['lastCrawlTime'],
 96 |                 ]
 97 |                 try:
 98 |                     row += [
 99 |                         value['inspectionResult']['indexStatusResult']['googleCanonical'],
100 |                         value['inspectionResult']['indexStatusResult']['userCanonical']
101 |                     ]
102 |                 except KeyError:
103 |                     row += [None, None]
104 | 
105 |                 try:
106 |                     row.append(value['inspectionResult']['mobileUsabilityResult']['verdict'])
107 |                 except KeyError:
108 |                     row.append(None)
109 |             else:
110 |                 row.append(value)
111 | 
112 |             writer.writerow(row)
113 | 
114 | 
115 | def init_get_keywords(key):
116 |     resource = normalize_resource_name()
117 | 
118 |     today = date.today()
119 | 
120 |     while True:
121 |         num_months = input('Please enter number of months period (1-16) you want to get data '
122 |                            'or press Enter to get data with the default 16 months period:\n').strip()
123 |         if 16 > int(num_months) > 0:
124 |             start_date = str(today - timedelta(days=int(num_months)*30))
125 |             break
126 |         elif len(num_months) == 0:
127 |             start_date = str(today - timedelta(days=486))
128 |             print(f'Start date is: {start_date}')
129 |             break
130 |         else:
131 |             print("Please enter the number in 1 to 16 range or press Enter")
132 | 
133 |     end_date = str(date.today() - timedelta(days=1))
134 | 
135 |     available_dimensions = ["date", "query", "page", "country", "device", "search_appearance"]
136 |     while True:
137 |         dimensions = list(input(f"Please input one of the available dimensions \n"
138 |                                 f"{available_dimensions} divided by ',' WITHOUT SPACES\n"
139 |                                 f"or press Enter to get data with the 'query' dimension:\n").strip().split(','))
140 |         if set(dimensions).issubset(set(available_dimensions)):
141 |             break
142 |         elif len(dimensions) == 0:
143 |             dimensions = ['query', ]
144 |             print("You chose a 'query' dimension")
145 |             break
146 |         else:
147 |             print("Please enter correct dimensions or press Enter")
148 | 
149 |     get_keywords = search_analytics.GetData(key, resource)
150 |     print('Keywords parsing has started. \n'
151 |           'Please wait and stay calm ^_____^')
152 | 
153 |     result = get_keywords.worker(start_date, end_date, dimensions=dimensions)
154 | 
155 |     res_file = 'search_analytics.csv'
156 |     for num, elem in enumerate(result):
157 |         keywords_to_csv(res_file, 'w' if num == 0 else 'a', dimensions, elem)
158 | 
159 |     print(f'Done! Check the {res_file} file in "results/" folder')
160 | 
161 | 
162 | def init_indexation_check(key, file):
163 |     resource = normalize_resource_name()
164 |     data = {}
165 | 
166 |     with open(file, 'r', encoding='utf-8') as f:
167 |         data[resource] = [url.strip() for url in f]
168 | 
169 |     check_index = indexing.CheckIndexation(key)
170 |     print('URLs indexation check has started. \n'
171 |           'Please wait and stay calm ^_____^')
172 |     result = check_index.worker(data)
173 | 
174 |     res_file = 'check_urls.csv'
175 |     check_index_to_csv(res_file, result)
176 | 
177 |     print(f'Done! Check the {res_file} file in "results/" folder')
178 | 
179 | 
180 | def init_send_urls(key, file):
181 |     while True:
182 |         choose_msg = input('\nChoose one of methods (print number) and press Enter \n'
183 |                            '1 - URL_UPDATED\n'
184 |                            '2 - URL_DELETED:\n')
185 |         if '1' in choose_msg:
186 |             method = 'URL_UPDATED'
187 |             break
188 |         elif '2' in choose_msg:
189 |             method = 'URL_DELETED'
190 |             break
191 |         else:
192 |             print('Please enter correct number')
193 | 
194 |     with open(file, 'r', encoding='utf-8') as f:
195 |         urls = [url.strip() for url in f]
196 | 
197 |     index = indexing.Indexation(key)
198 |     print('URLs sending to Googlebot has started. \n'
199 |           'Please wait and stay calm ^_____^')
200 |     if 100 < len(urls) <= 200:
201 |         index.worker(urls[:99], method)
202 |         index.worker(urls[100:], method)
203 |         print(f'Done! Check the logs.log file in "logs/" folder')
204 |     elif len(urls) < 100:
205 |         index.worker(urls, method)
206 |         print(f'Done! Check the logs.log file in "logs/" folder')
207 |     else:
208 |         print('You are trying to send more than 200 URLs. \n'
209 |               'There is a 200 URLs quota =(')
210 |         exit()
211 | 
212 | 
213 | def main():
214 |     tool = choose_tool()
215 |     path = os.getcwd()
216 | 
217 |     logging.basicConfig(level=logging.INFO, filename=f'{path}/logs/logs.log')
218 | 
219 |     api_key = search_api_key('client_secret')
220 |     if int(tool) == 1:
221 |         init_get_keywords(api_key)
222 | 
223 |     elif int(tool) == 2:
224 |         file = get_file()
225 |         init_indexation_check(api_key, file)
226 | 
227 |     elif int(tool) == 3:
228 |         file = get_file()
229 |         init_send_urls(api_key, file)
230 | 
231 | 
232 | if __name__ == "__main__":
233 |     main()
234 | 
235 |     # Old clear not unique values func for Keywords Search Analytics results
236 |     # One day it will be back :-)
237 |     # def clear_not_unique(self):
238 |     #     with open(self.file, 'r', encoding='utf-8', newline='') as f:
239 |     #         reader = csv.reader(f, delimiter=';')
240 |     #         all_keys = {rows[0]: rows[1:] for rows in reader}
241 |     #     with open(self.file, 'w', encoding='utf-8', newline='') as f:
242 |     #         writer = csv.writer(f, delimiter=';')
243 |     #         for _key, _value in all_keys.items():
244 |     #             row = [_key, _value[0], _value[1], _value[2], _value[3], _value[4]]
245 |     #             writer.writerow(row)
246 | 


--------------------------------------------------------------------------------
/results/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | */
3 | !.gitignore


--------------------------------------------------------------------------------