├── README.md
├── LICENSE
├── Segmentation.ipynb
├── Log File Analysis.ipynb
├── Internal Link Analysis.ipynb
└── Google_PyTrends.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # python-for-seo
2 |
3 | Hello!
4 |
5 | This is a small collection of beginner friendly python scripts for performing data analysis for the purpose of SEO. Please read [this article](https://www.searchenginejournal.com/7-example-projects-to-get-started-with-python-for-seo/389336/) to find out more.
6 |
7 | If you have any questions or would like some data to play around with, please feel free to reach out to me via email - ruth.everett@deepcrawl.com or on twitter [@rvtheverett](https://twitter.com/rvtheverett)
8 |
9 | I hope to add more scripts soon :)
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 rvth
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Segmentation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# Importing all needed libraries\n",
10 | "import pandas as pd\n",
11 | "from google.colab import files\n",
12 | "import io\n",
13 | "import re"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": null,
19 | "metadata": {},
20 | "outputs": [],
21 | "source": [
22 | "# Upload crawl data\n",
23 | "df = pd.read_csv('/Users/rutheverett/Downloads/file-path')"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "# Define your own custom regex segments\n",
33 | "\n",
34 | "segment_definitions = [\n",
35 | " [(r'\\/blog\\/'), 'Blog'],\n",
36 | " [(r'\\/technical-seo-library\\/'), 'Technical SEO Library'],\n",
37 | " [(r'\\/hangout-library\\/'), 'Hangout Library'],\n",
38 | " [(r'\\/guides\\/'), 'Guides'],\n",
39 | " [(r'\\/case-studies\\/'), 'Case Studies'],\n",
40 | " [(r'\\/why-'), 'Solutions'],\n",
41 | " ]\n",
42 | "\n",
43 | "use_segment_definitions = True\n",
44 | "\n",
45 | "def get_segment(url):\n",
46 | " \n",
47 | " if use_segment_definitions == True:\n",
48 | " for segment_definition in segment_definitions:\n",
49 | " if re.findall(segment_definition[0], url):\n",
50 | " return segment_definition[1]\n",
51 | " return 'Other'\n",
52 | "\n",
53 | "# Add a segment column, and assign each URL a segment \n",
54 | "df['segment'] = df['url'].apply(lambda x: get_segment(x))"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "# Function to create a segment from a URL\n",
64 | "def get_segment(url):\n",
65 | " \n",
66 | " slug = re.search(r'https?:\\/\\/.*?\\//?([^\\/]*)\\/', url)\n",
67 | " if slug:\n",
68 | " return slug.group(1)\n",
69 | " else:\n",
70 | " return 'None'\n",
71 | "\n",
72 | "# Add a segment column, and assign each URL a segment \n",
73 | "df['segment'] = df['url'].apply(lambda x: get_segment(x))"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "metadata": {},
80 | "outputs": [],
81 | "source": [
82 | "df.head(10)"
83 | ]
84 | }
85 | ],
86 | "metadata": {
87 | "kernelspec": {
88 | "display_name": "Python 3",
89 | "language": "python",
90 | "name": "python3"
91 | },
92 | "language_info": {
93 | "codemirror_mode": {
94 | "name": "ipython",
95 | "version": 3
96 | },
97 | "file_extension": ".py",
98 | "mimetype": "text/x-python",
99 | "name": "python",
100 | "nbconvert_exporter": "python",
101 | "pygments_lexer": "ipython3",
102 | "version": "3.7.6"
103 | }
104 | },
105 | "nbformat": 4,
106 | "nbformat_minor": 4
107 | }
108 |
--------------------------------------------------------------------------------
/Log File Analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 18,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "import requests\n",
12 | "import re\n",
13 | "import seaborn as sns\n",
14 | "import matplotlib.pyplot as plt"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 20,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "#upload data\n",
24 | "data = ('/Users/rutheverett/Downloads/www-deepcrawl-com_12-10-2020_All_Pages_basic.csv')\n",
25 | "#select colums \n",
26 | "columns = ['url',\n",
27 | " 'log_requests_total',\n",
28 | " 'log_requests_desktop',\n",
29 | " 'log_requests_mobile',\n",
30 | " 'indexable',\n",
31 | " 'http_status_code',\n",
32 | " 'primary_page',\n",
33 | " 'duplicate_page', \n",
34 | " 'search_console_total_clicks',\n",
35 | " 'level',\n",
36 | " 'links_in_count']\n",
37 | "#read in data colums \n",
38 | "df = pd.read_csv(data , usecols=columns)\n",
39 | "df.head(10)"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "#Segment URLs \n",
49 | "segment_definitions = [\n",
50 | " [(r'\\/blog\\/'), 'Blog'],\n",
51 | " [(r'\\/technical-seo-library\\/'), 'Technical SEO Library'],\n",
52 | " [(r'\\/hangout-library\\/'), 'Hangout Library'],\n",
53 | " [(r'\\/guides\\/'), 'Guides'],\n",
54 | " [(r'\\/case-studies\\/'), 'Case Studies'],\n",
55 | " [(r'\\/why-'), 'Solutions'],\n",
56 | " ]\n",
57 | "\n",
58 | "use_segment_definitions = True\n",
59 | "\n",
60 | "def get_segment(url):\n",
61 | " \n",
62 | " if use_segment_definitions == True:\n",
63 | " for segment_definition in segment_definitions:\n",
64 | " if re.findall(segment_definition[0], url):\n",
65 | " return segment_definition[1]\n",
66 | " return 'Other'\n",
67 | "\n",
68 | "df['segment'] = df['url'].apply(lambda x: get_segment(x))\n",
69 | "\n",
70 | "# Replace NaN with 0\n",
71 | "df['search_console_total_clicks'].replace(np.nan, 0, inplace=True)\n",
72 | "df['log_requests_total'].replace(np.nan, 0, inplace=True)\n",
73 | "df['log_requests_mobile'].replace(np.nan, 0, inplace=True)\n",
74 | "df['log_requests_desktop'].replace(np.nan, 0, inplace=True)\n",
75 | "df['level'].replace(np.nan, 0, inplace=True)"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": null,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "#Create pivot table with total log file requests for each segment\n",
85 | "total_log_requests = df.pivot_table(index='segment', values=['url', 'log_requests_total', 'log_requests_mobile', 'log_requests_desktop'], aggfunc={'url':len, 'log_requests_total':sum, 'log_requests_mobile':sum, 'log_requests_desktop':sum})\n",
86 | "total_log_requests['percent_of_total_log_requests']= (total_log_requests['log_requests_total']/total_log_requests['log_requests_total'].sum()).apply('{:.2%}'.format)\n",
87 | "#Reset Index\n",
88 | "new_total_log_requests = total_log_requests.reset_index('segment')\n",
89 | "new_total_log_requests"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": null,
95 | "metadata": {},
96 | "outputs": [],
97 | "source": [
98 | "#Create bar chart for total requests per category\n",
99 | "sns.barplot(data=new_pivot, x='segment', y='log_requests_total')\n",
100 | "plt.xticks(rotation=90)\n",
101 | "plt.show()"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {},
108 | "outputs": [],
109 | "source": [
110 | "#Create bar chart for total mobile requests per category\n",
111 | "sns.barplot(data=new_pivot, x='segment', y='log_requests_mobile')\n",
112 | "plt.xticks(rotation=90)\n",
113 | "plt.show()"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": null,
119 | "metadata": {},
120 | "outputs": [],
121 | "source": [
122 | "#Create bar chart for total desktop requests per category\n",
123 | "sns.barplot(data=new_pivot, x='segment', y='log_requests_desktop')\n",
124 | "plt.xticks(rotation=90)\n",
125 | "plt.show()"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "metadata": {},
132 | "outputs": [],
133 | "source": [
134 | "#Create pivot table with total log file requests for indexability status \n",
135 | "indexable_log_file = df.pivot_table(index='indexable', values=['url', 'log_requests_total', 'log_requests_mobile', 'log_requests_desktop'], aggfunc={'url':len, 'log_requests_total':sum, 'log_requests_mobile':sum, 'log_requests_desktop':sum})\n",
136 | "indexable_log_file['percent_of_total_log_requests']= (indexable_log_file['log_requests_total']/indexable_log_file['log_requests_total'].sum()).apply('{:.2%}'.format)\n",
137 | "indexable_log_file"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "metadata": {},
144 | "outputs": [],
145 | "source": [
146 | "# Function to create a pivot table with a variable number of indexes\n",
147 | "\n",
148 | "def pivot_table(df, indexes):\n",
149 | " pivot_table = df.pivot_table(index=indexes, values=['url', 'log_requests_total'], aggfunc={'url':len, 'log_requests_total':sum})\n",
150 | "\n",
151 | " pivot_table = pivot_table.sort_values('log_requests_total', ascending=False)\n",
152 | " pivot_table['percent_of_total_log_requests']= (pivot_table['log_requests_total']/pivot_table['log_requests_total'].sum()).apply('{:.2%}'.format)\n",
153 | " pivot_table['average_log_requests_per_url']= (pivot_table['log_requests_total']/pivot_table['url']).apply('{:.1f}'.format)\n",
154 | " pivot_table['log_requests_total'] = (pivot_table['log_requests_total']).apply('{:,.0f}'.format)\n",
155 | " pivot_table['url'] = (pivot_table['url']).apply('{:,}'.format)\n",
156 | " pivot_table = pivot_table.rename({'url': 'url_count'}, axis='columns')\n",
157 | "\n",
158 | " return pivot_table"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": null,
164 | "metadata": {},
165 | "outputs": [],
166 | "source": [
167 | "#Create pivot table with total log file requests for status code\n",
168 | "pivot_status = pivot_table(df, ['category', 'http_status_code'])\n",
169 | "pivot_status"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": null,
175 | "metadata": {},
176 | "outputs": [],
177 | "source": [
178 | "#Create pivot table with total log file requests for 200 status code, indexable pages - you can change these to be any status code and either indexable or non-indexable\n",
179 | "df_200 = df[(df.http_status_code == 200)]\n",
180 | "\n",
181 | "pivot_indexable = pivot_table(df_200, ['category', 'indexable'])\n",
182 | "pivot_indexable"
183 | ]
184 | },
185 | {
186 | "cell_type": "markdown",
187 | "metadata": {},
188 | "source": [
189 | "If you would like to see how log file requests have changed over 6 months: "
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": null,
195 | "metadata": {},
196 | "outputs": [],
197 | "source": [
198 | "#upload multiple dfs \n",
199 | "june_df = pd.read_csv(june , usecols=columns)\n",
200 | "july_df = pd.read_csv(july , usecols=columns)\n",
201 | "aug_df = pd.read_csv(aug , usecols=columns) \n",
202 | "sep_df = pd.read_csv(sep , usecols=columns)\n",
203 | "oct_df = pd.read_csv(oct , usecols=columns)\n",
204 | "nov_df = pd.read_csv(nov , usecols=columns)\n",
205 | "\n",
206 | "june_df['month'] = 'May'\n",
207 | "july_df['month'] = 'June'\n",
208 | "aug_df['month'] = 'July'\n",
209 | "sep_df['month'] = 'August'\n",
210 | "oct_df['month'] = 'September'\n",
211 | "nov_df['month'] = 'October'\n",
212 | "\n",
213 | "#concatenate dataframes together\n",
214 | "df_all = pd.concat(objs =[june_df, july_df, aug_df, sep_df, oct_df, nov_df], ignore_index=True)"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": null,
220 | "metadata": {},
221 | "outputs": [],
222 | "source": [
223 | "#create pivot table to store all data\n",
224 | "def pivot_table(df, indexes):\n",
225 | " pivot_table = df.pivot_table(index=indexes, values=['url', 'log_requests_total'], aggfunc={'url':len, 'log_requests_total':sum})\n",
226 | "\n",
227 | " pivot_table = pivot_table.sort_values('log_requests_total', ascending=False)\n",
228 | " pivot_table['percent_of_total_log_requests']= (pivot_table['log_requests_total']/pivot_table['log_requests_total'].sum()).apply('{:.2%}'.format)\n",
229 | " pivot_table['average_log_requests_per_url']= (pivot_table['log_requests_total']/pivot_table['url']).apply('{:.1f}'.format)\n",
230 | " pivot_table['log_requests_total'] = (pivot_table['log_requests_total']).apply('{:,.0f}'.format)\n",
231 | " pivot_table['url'] = (pivot_table['url']).apply('{:,}'.format)\n",
232 | " pivot_table = pivot_table.rename({'url': 'url_count'}, axis='columns')\n",
233 | "\n",
234 | " return pivot_table"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": null,
240 | "metadata": {},
241 | "outputs": [],
242 | "source": [
243 | "#create pivot table to get count of log file requests for each month\n",
244 | "pivot_table_total = df.pivot_table(index='month', values=['url', 'log_requests_total', 'log_requests_mobile', 'log_requests_desktop'], aggfunc={'url':len, 'log_requests_total':sum, 'log_requests_mobile':sum, 'log_requests_desktop':sum})\n",
245 | "pivot_table_total \n",
246 | "\n",
247 | "#reset index to get months in order\n",
248 | "new_index = ['May', 'June', 'July', 'August', 'September', 'October']\n",
249 | "new_pivot = pivot_table_total.reindex(new_index)\n",
250 | "new_pivot"
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": null,
256 | "metadata": {},
257 | "outputs": [],
258 | "source": [
259 | "#create line graph to show log requests over time \n",
260 | "sns.lineplot(x = \"month\", y = \"log_requests_total\", data=new_pivot)\n",
261 | "plt.show()"
262 | ]
263 | }
264 | ],
265 | "metadata": {
266 | "kernelspec": {
267 | "display_name": "Python 3",
268 | "language": "python",
269 | "name": "python3"
270 | },
271 | "language_info": {
272 | "codemirror_mode": {
273 | "name": "ipython",
274 | "version": 3
275 | },
276 | "file_extension": ".py",
277 | "mimetype": "text/x-python",
278 | "name": "python",
279 | "nbconvert_exporter": "python",
280 | "pygments_lexer": "ipython3",
281 | "version": "3.7.6"
282 | }
283 | },
284 | "nbformat": 4,
285 | "nbformat_minor": 4
286 | }
287 |
--------------------------------------------------------------------------------
/Internal Link Analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import re\n",
11 | "import numpy as np\n",
12 | "import requests\n",
13 | "import urllib\n",
14 | "import io"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 3,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "#insert crawl csv \n",
24 | "data = ('/Users/rutheverett/Downloads/www-deepcrawl-com_12-10-2020_All_Pages_basic.csv')\n",
25 | "#update to include columns you would like to include from csv \n",
26 | "columns = ['url', 'level', 'http_status_code', 'indexable', 'links_in_count', 'followed_links_in_count', 'links_out_count', 'deeprank', 'backlink_count', 'backlink_domain_count']\n",
27 | "df = pd.read_csv(data , usecols=columns)"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 7,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "data": {
37 | "text/html": [
38 | "
\n",
39 | "\n",
52 | "
\n",
53 | " \n",
54 | " \n",
55 | " | \n",
56 | " deeprank | \n",
57 | " level | \n",
58 | " url | \n",
59 | " http_status_code | \n",
60 | " indexable | \n",
61 | " links_in_count | \n",
62 | " links_out_count | \n",
63 | " followed_links_in_count | \n",
64 | " backlink_count | \n",
65 | " backlink_domain_count | \n",
66 | " segment | \n",
67 | "
\n",
68 | " \n",
69 | " \n",
70 | " \n",
71 | " | 0 | \n",
72 | " 2.13 | \n",
73 | " 3 | \n",
74 | " https://www.deepcrawl.com/knowledge/technical-... | \n",
75 | " 200 | \n",
76 | " True | \n",
77 | " 3 | \n",
78 | " 88 | \n",
79 | " 3 | \n",
80 | " NaN | \n",
81 | " NaN | \n",
82 | " Technical SEO Library | \n",
83 | "
\n",
84 | " \n",
85 | " | 1 | \n",
86 | " 2.55 | \n",
87 | " 3 | \n",
88 | " https://www.deepcrawl.com/knowledge/guides/sch... | \n",
89 | " 200 | \n",
90 | " True | \n",
91 | " 6 | \n",
92 | " 114 | \n",
93 | " 6 | \n",
94 | " NaN | \n",
95 | " NaN | \n",
96 | " Guides | \n",
97 | "
\n",
98 | " \n",
99 | " | 2 | \n",
100 | " 2.19 | \n",
101 | " 3 | \n",
102 | " https://www.deepcrawl.com/knowledge/technical-... | \n",
103 | " 200 | \n",
104 | " True | \n",
105 | " 1 | \n",
106 | " 93 | \n",
107 | " 1 | \n",
108 | " NaN | \n",
109 | " NaN | \n",
110 | " Technical SEO Library | \n",
111 | "
\n",
112 | " \n",
113 | " | 3 | \n",
114 | " 2.51 | \n",
115 | " 3 | \n",
116 | " https://www.deepcrawl.com/knowledge/guides/seg... | \n",
117 | " 200 | \n",
118 | " True | \n",
119 | " 1 | \n",
120 | " 92 | \n",
121 | " 1 | \n",
122 | " NaN | \n",
123 | " NaN | \n",
124 | " Guides | \n",
125 | "
\n",
126 | " \n",
127 | " | 4 | \n",
128 | " 2.57 | \n",
129 | " 3 | \n",
130 | " https://www.deepcrawl.com/blog/events/go-red-c... | \n",
131 | " 200 | \n",
132 | " True | \n",
133 | " 13 | \n",
134 | " 108 | \n",
135 | " 13 | \n",
136 | " NaN | \n",
137 | " NaN | \n",
138 | " Blog | \n",
139 | "
\n",
140 | " \n",
141 | " | 5 | \n",
142 | " 2.07 | \n",
143 | " 3 | \n",
144 | " https://www.deepcrawl.com/knowledge/technical-... | \n",
145 | " 200 | \n",
146 | " True | \n",
147 | " 2 | \n",
148 | " 86 | \n",
149 | " 2 | \n",
150 | " NaN | \n",
151 | " NaN | \n",
152 | " Technical SEO Library | \n",
153 | "
\n",
154 | " \n",
155 | " | 6 | \n",
156 | " 2.07 | \n",
157 | " 3 | \n",
158 | " https://www.deepcrawl.com/knowledge/technical-... | \n",
159 | " 200 | \n",
160 | " True | \n",
161 | " 2 | \n",
162 | " 86 | \n",
163 | " 2 | \n",
164 | " NaN | \n",
165 | " NaN | \n",
166 | " Technical SEO Library | \n",
167 | "
\n",
168 | " \n",
169 | " | 7 | \n",
170 | " 2.59 | \n",
171 | " 3 | \n",
172 | " https://www.deepcrawl.com/knowledge/technical-... | \n",
173 | " 200 | \n",
174 | " True | \n",
175 | " 5 | \n",
176 | " 97 | \n",
177 | " 5 | \n",
178 | " NaN | \n",
179 | " NaN | \n",
180 | " Technical SEO Library | \n",
181 | "
\n",
182 | " \n",
183 | " | 8 | \n",
184 | " 2.55 | \n",
185 | " 3 | \n",
186 | " https://www.deepcrawl.com/knowledge/guides/ren... | \n",
187 | " 200 | \n",
188 | " True | \n",
189 | " 6 | \n",
190 | " 94 | \n",
191 | " 6 | \n",
192 | " NaN | \n",
193 | " NaN | \n",
194 | " Guides | \n",
195 | "
\n",
196 | " \n",
197 | " | 9 | \n",
198 | " 2.17 | \n",
199 | " 3 | \n",
200 | " https://www.deepcrawl.com/knowledge/technical-... | \n",
201 | " 200 | \n",
202 | " True | \n",
203 | " 2 | \n",
204 | " 100 | \n",
205 | " 2 | \n",
206 | " NaN | \n",
207 | " NaN | \n",
208 | " Technical SEO Library | \n",
209 | "
\n",
210 | " \n",
211 | "
\n",
212 | "
"
213 | ],
214 | "text/plain": [
215 | " deeprank level url \\\n",
216 | "0 2.13 3 https://www.deepcrawl.com/knowledge/technical-... \n",
217 | "1 2.55 3 https://www.deepcrawl.com/knowledge/guides/sch... \n",
218 | "2 2.19 3 https://www.deepcrawl.com/knowledge/technical-... \n",
219 | "3 2.51 3 https://www.deepcrawl.com/knowledge/guides/seg... \n",
220 | "4 2.57 3 https://www.deepcrawl.com/blog/events/go-red-c... \n",
221 | "5 2.07 3 https://www.deepcrawl.com/knowledge/technical-... \n",
222 | "6 2.07 3 https://www.deepcrawl.com/knowledge/technical-... \n",
223 | "7 2.59 3 https://www.deepcrawl.com/knowledge/technical-... \n",
224 | "8 2.55 3 https://www.deepcrawl.com/knowledge/guides/ren... \n",
225 | "9 2.17 3 https://www.deepcrawl.com/knowledge/technical-... \n",
226 | "\n",
227 | " http_status_code indexable links_in_count links_out_count \\\n",
228 | "0 200 True 3 88 \n",
229 | "1 200 True 6 114 \n",
230 | "2 200 True 1 93 \n",
231 | "3 200 True 1 92 \n",
232 | "4 200 True 13 108 \n",
233 | "5 200 True 2 86 \n",
234 | "6 200 True 2 86 \n",
235 | "7 200 True 5 97 \n",
236 | "8 200 True 6 94 \n",
237 | "9 200 True 2 100 \n",
238 | "\n",
239 | " followed_links_in_count backlink_count backlink_domain_count \\\n",
240 | "0 3 NaN NaN \n",
241 | "1 6 NaN NaN \n",
242 | "2 1 NaN NaN \n",
243 | "3 1 NaN NaN \n",
244 | "4 13 NaN NaN \n",
245 | "5 2 NaN NaN \n",
246 | "6 2 NaN NaN \n",
247 | "7 5 NaN NaN \n",
248 | "8 6 NaN NaN \n",
249 | "9 2 NaN NaN \n",
250 | "\n",
251 | " segment \n",
252 | "0 Technical SEO Library \n",
253 | "1 Guides \n",
254 | "2 Technical SEO Library \n",
255 | "3 Guides \n",
256 | "4 Blog \n",
257 | "5 Technical SEO Library \n",
258 | "6 Technical SEO Library \n",
259 | "7 Technical SEO Library \n",
260 | "8 Guides \n",
261 | "9 Technical SEO Library "
262 | ]
263 | },
264 | "execution_count": 7,
265 | "metadata": {},
266 | "output_type": "execute_result"
267 | }
268 | ],
269 | "source": [
270 | "#read first 10 rows\n",
271 | "df.head(10)"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": 8,
277 | "metadata": {},
278 | "outputs": [],
279 | "source": [
280 | "#set up segments \n",
281 | "segment_definitions = [\n",
282 | " [(r'\\/blog\\/'), 'Blog'],\n",
283 | " [(r'\\/technical-seo-library\\/'), 'Technical SEO Library'],\n",
284 | " [(r'\\/hangout-library\\/'), 'Hangout Library'],\n",
285 | " [(r'\\/guides\\/'), 'Guides'],\n",
286 | " [(r'\\/case-studies\\/'), 'Case Studies'],\n",
287 | " [(r'\\/why-'), 'Solutions'],\n",
288 | " ]\n",
289 | "\n",
290 | "use_segment_definitions = True\n",
291 | "\n",
292 | "def get_segment(url):\n",
293 | " \n",
294 | " if use_segment_definitions == True:\n",
295 | " for segment_definition in segment_definitions:\n",
296 | " if re.findall(segment_definition[0], url):\n",
297 | " return segment_definition[1]\n",
298 | " return 'Other'\n",
299 | "\n",
300 | "#apply segmentation to all URLs in dataframe \n",
301 | "df['segment'] = df['url'].apply(lambda x: get_segment(x))"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 10,
307 | "metadata": {},
308 | "outputs": [
309 | {
310 | "data": {
311 | "text/html": [
312 | "\n",
313 | "\n",
326 | "
\n",
327 | " \n",
328 | " \n",
329 | " | \n",
330 | " followed_links_in_count | \n",
331 | " links_in_count | \n",
332 | " links_out_count | \n",
333 | " url | \n",
334 | "
\n",
335 | " \n",
336 | " | segment | \n",
337 | " | \n",
338 | " | \n",
339 | " | \n",
340 | " | \n",
341 | "
\n",
342 | " \n",
343 | " \n",
344 | " \n",
345 | " | Blog | \n",
346 | " 19742 | \n",
347 | " 19742 | \n",
348 | " 77002 | \n",
349 | " 737 | \n",
350 | "
\n",
351 | " \n",
352 | " | Case Studies | \n",
353 | " 117 | \n",
354 | " 117 | \n",
355 | " 924 | \n",
356 | " 10 | \n",
357 | "
\n",
358 | " \n",
359 | " | Guides | \n",
360 | " 4127 | \n",
361 | " 4127 | \n",
362 | " 4967 | \n",
363 | " 67 | \n",
364 | "
\n",
365 | " \n",
366 | " | Hangout Library | \n",
367 | " 8019 | \n",
368 | " 8019 | \n",
369 | " 45261 | \n",
370 | " 406 | \n",
371 | "
\n",
372 | " \n",
373 | " | Other | \n",
374 | " 37050 | \n",
375 | " 37050 | \n",
376 | " 5498 | \n",
377 | " 88 | \n",
378 | "
\n",
379 | " \n",
380 | " | Solutions | \n",
381 | " 5143 | \n",
382 | " 5143 | \n",
383 | " 392 | \n",
384 | " 6 | \n",
385 | "
\n",
386 | " \n",
387 | " | Technical SEO Library | \n",
388 | " 2699 | \n",
389 | " 2699 | \n",
390 | " 2924 | \n",
391 | " 33 | \n",
392 | "
\n",
393 | " \n",
394 | "
\n",
395 | "
"
396 | ],
397 | "text/plain": [
398 | " followed_links_in_count links_in_count \\\n",
399 | "segment \n",
400 | "Blog 19742 19742 \n",
401 | "Case Studies 117 117 \n",
402 | "Guides 4127 4127 \n",
403 | "Hangout Library 8019 8019 \n",
404 | "Other 37050 37050 \n",
405 | "Solutions 5143 5143 \n",
406 | "Technical SEO Library 2699 2699 \n",
407 | "\n",
408 | " links_out_count url \n",
409 | "segment \n",
410 | "Blog 77002 737 \n",
411 | "Case Studies 924 10 \n",
412 | "Guides 4967 67 \n",
413 | "Hangout Library 45261 406 \n",
414 | "Other 5498 88 \n",
415 | "Solutions 392 6 \n",
416 | "Technical SEO Library 2924 33 "
417 | ]
418 | },
419 | "execution_count": 10,
420 | "metadata": {},
421 | "output_type": "execute_result"
422 | }
423 | ],
424 | "source": [
425 | "#create pivot table to get a count of internal links to each segment \n",
426 | "total_internal_links = pd.pivot_table(df, index='segment', values=['url', 'links_in_count', 'followed_links_in_count', 'links_out_count'], aggfunc={'url':len, 'links_in_count':np.sum, 'followed_links_in_count':np.sum, 'links_out_count':np.sum})\n",
427 | "total_internal_links"
428 | ]
429 | },
430 | {
431 | "cell_type": "code",
432 | "execution_count": 15,
433 | "metadata": {},
434 | "outputs": [
435 | {
436 | "data": {
437 | "text/html": [
438 | "\n",
439 | "\n",
452 | "
\n",
453 | " \n",
454 | " \n",
455 | " | \n",
456 | " followed_links_in_count | \n",
457 | " links_in_count | \n",
458 | " links_out_count | \n",
459 | " url | \n",
460 | "
\n",
461 | " \n",
462 | " | segment | \n",
463 | " | \n",
464 | " | \n",
465 | " | \n",
466 | " | \n",
467 | "
\n",
468 | " \n",
469 | " \n",
470 | " \n",
471 | " | Blog | \n",
472 | " 26.8 | \n",
473 | " 26.8 | \n",
474 | " 104.5 | \n",
475 | " 737 | \n",
476 | "
\n",
477 | " \n",
478 | " | Case Studies | \n",
479 | " 11.7 | \n",
480 | " 11.7 | \n",
481 | " 92.4 | \n",
482 | " 10 | \n",
483 | "
\n",
484 | " \n",
485 | " | Guides | \n",
486 | " 61.6 | \n",
487 | " 61.6 | \n",
488 | " 74.1 | \n",
489 | " 67 | \n",
490 | "
\n",
491 | " \n",
492 | " | Hangout Library | \n",
493 | " 19.8 | \n",
494 | " 19.8 | \n",
495 | " 111.5 | \n",
496 | " 406 | \n",
497 | "
\n",
498 | " \n",
499 | " | Other | \n",
500 | " 421.0 | \n",
501 | " 421.0 | \n",
502 | " 62.5 | \n",
503 | " 88 | \n",
504 | "
\n",
505 | " \n",
506 | " | Solutions | \n",
507 | " 857.2 | \n",
508 | " 857.2 | \n",
509 | " 65.3 | \n",
510 | " 6 | \n",
511 | "
\n",
512 | " \n",
513 | " | Technical SEO Library | \n",
514 | " 81.8 | \n",
515 | " 81.8 | \n",
516 | " 88.6 | \n",
517 | " 33 | \n",
518 | "
\n",
519 | " \n",
520 | "
\n",
521 | "
"
522 | ],
523 | "text/plain": [
524 | " followed_links_in_count links_in_count links_out_count \\\n",
525 | "segment \n",
526 | "Blog 26.8 26.8 104.5 \n",
527 | "Case Studies 11.7 11.7 92.4 \n",
528 | "Guides 61.6 61.6 74.1 \n",
529 | "Hangout Library 19.8 19.8 111.5 \n",
530 | "Other 421.0 421.0 62.5 \n",
531 | "Solutions 857.2 857.2 65.3 \n",
532 | "Technical SEO Library 81.8 81.8 88.6 \n",
533 | "\n",
534 | " url \n",
535 | "segment \n",
536 | "Blog 737 \n",
537 | "Case Studies 10 \n",
538 | "Guides 67 \n",
539 | "Hangout Library 406 \n",
540 | "Other 88 \n",
541 | "Solutions 6 \n",
542 | "Technical SEO Library 33 "
543 | ]
544 | },
545 | "execution_count": 15,
546 | "metadata": {},
547 | "output_type": "execute_result"
548 | }
549 | ],
550 | "source": [
551 | "#create pivot table to get an of internal links to each segment \n",
552 | "average_internal_links = pd.pivot_table(df, index='segment', values=['url', 'links_in_count', 'followed_links_in_count', 'links_out_count'], aggfunc={'url':len, 'links_in_count':np.mean, 'followed_links_in_count':np.mean, 'links_out_count':np.mean})\n",
553 | "average_internal_links['followed_links_in_count'] = (average_internal_links['followed_links_in_count']).apply('{:.1f}'.format)\n",
554 | "average_internal_links['links_in_count'] = (average_internal_links['links_in_count']).apply('{:.1f}'.format)\n",
555 | "average_internal_links['links_out_count'] = (average_internal_links['links_out_count']).apply('{:.1f}'.format)\n",
556 | "average_internal_links"
557 | ]
558 | },
559 | {
560 | "cell_type": "code",
561 | "execution_count": 17,
562 | "metadata": {},
563 | "outputs": [
564 | {
565 | "data": {
566 | "text/html": [
567 | "\n",
568 | "\n",
581 | "
\n",
582 | " \n",
583 | " \n",
584 | " | \n",
585 | " deeprank | \n",
586 | " followed_links_in_count | \n",
587 | " level | \n",
588 | " links_in_count | \n",
589 | " links_out_count | \n",
590 | " url | \n",
591 | "
\n",
592 | " \n",
593 | " | segment | \n",
594 | " | \n",
595 | " | \n",
596 | " | \n",
597 | " | \n",
598 | " | \n",
599 | " | \n",
600 | "
\n",
601 | " \n",
602 | " \n",
603 | " \n",
604 | " | Blog | \n",
605 | " 1.7 | \n",
606 | " 26.8 | \n",
607 | " 4.9 | \n",
608 | " 26.8 | \n",
609 | " 104.5 | \n",
610 | " 737 | \n",
611 | "
\n",
612 | " \n",
613 | " | Case Studies | \n",
614 | " 3.9 | \n",
615 | " 11.7 | \n",
616 | " 2.2 | \n",
617 | " 11.7 | \n",
618 | " 92.4 | \n",
619 | " 10 | \n",
620 | "
\n",
621 | " \n",
622 | " | Guides | \n",
623 | " 1.8 | \n",
624 | " 61.6 | \n",
625 | " 3.8 | \n",
626 | " 61.6 | \n",
627 | " 74.1 | \n",
628 | " 67 | \n",
629 | "
\n",
630 | " \n",
631 | " | Hangout Library | \n",
632 | " 1.5 | \n",
633 | " 19.8 | \n",
634 | " 4.6 | \n",
635 | " 19.8 | \n",
636 | " 111.5 | \n",
637 | " 406 | \n",
638 | "
\n",
639 | " \n",
640 | " | Other | \n",
641 | " 2.9 | \n",
642 | " 421.0 | \n",
643 | " 3.5 | \n",
644 | " 421.0 | \n",
645 | " 62.5 | \n",
646 | " 88 | \n",
647 | "
\n",
648 | " \n",
649 | " | Solutions | \n",
650 | " 5.7 | \n",
651 | " 857.2 | \n",
652 | " 2.8 | \n",
653 | " 857.2 | \n",
654 | " 65.3 | \n",
655 | " 6 | \n",
656 | "
\n",
657 | " \n",
658 | " | Technical SEO Library | \n",
659 | " 2.3 | \n",
660 | " 81.8 | \n",
661 | " 3.2 | \n",
662 | " 81.8 | \n",
663 | " 88.6 | \n",
664 | " 33 | \n",
665 | "
\n",
666 | " \n",
667 | "
\n",
668 | "
"
669 | ],
670 | "text/plain": [
671 | " deeprank followed_links_in_count level links_in_count \\\n",
672 | "segment \n",
673 | "Blog 1.7 26.8 4.9 26.8 \n",
674 | "Case Studies 3.9 11.7 2.2 11.7 \n",
675 | "Guides 1.8 61.6 3.8 61.6 \n",
676 | "Hangout Library 1.5 19.8 4.6 19.8 \n",
677 | "Other 2.9 421.0 3.5 421.0 \n",
678 | "Solutions 5.7 857.2 2.8 857.2 \n",
679 | "Technical SEO Library 2.3 81.8 3.2 81.8 \n",
680 | "\n",
681 | " links_out_count url \n",
682 | "segment \n",
683 | "Blog 104.5 737 \n",
684 | "Case Studies 92.4 10 \n",
685 | "Guides 74.1 67 \n",
686 | "Hangout Library 111.5 406 \n",
687 | "Other 62.5 88 \n",
688 | "Solutions 65.3 6 \n",
689 | "Technical SEO Library 88.6 33 "
690 | ]
691 | },
692 | "execution_count": 17,
693 | "metadata": {},
694 | "output_type": "execute_result"
695 | }
696 | ],
697 | "source": [
698 | "#Additional step using Level and DeepRank to find averages for segments \n",
699 | "averages = pd.pivot_table(df, index='segment', values=['url', 'links_in_count', 'followed_links_in_count', 'links_out_count', 'deeprank', 'level'], aggfunc={'url':len, 'links_in_count':np.mean, 'followed_links_in_count':np.mean, 'links_out_count':np.mean, 'deeprank':np.mean, 'level':np.mean })\n",
700 | "averages['followed_links_in_count'] = (averages['followed_links_in_count']).apply('{:.1f}'.format)\n",
701 | "averages['links_in_count'] = (averages['links_in_count']).apply('{:.1f}'.format)\n",
702 | "averages['links_out_count'] = (averages['links_out_count']).apply('{:.1f}'.format)\n",
703 | "averages['deeprank'] = (averages['deeprank']).apply('{:.1f}'.format)\n",
704 | "averages['level'] = (averages['level']).apply('{:.1f}'.format)\n",
705 | "averages"
706 | ]
707 | }
708 | ],
709 | "metadata": {
710 | "kernelspec": {
711 | "display_name": "Python 3",
712 | "language": "python",
713 | "name": "python3"
714 | },
715 | "language_info": {
716 | "codemirror_mode": {
717 | "name": "ipython",
718 | "version": 3
719 | },
720 | "file_extension": ".py",
721 | "mimetype": "text/x-python",
722 | "name": "python",
723 | "nbconvert_exporter": "python",
724 | "pygments_lexer": "ipython3",
725 | "version": "3.7.6"
726 | }
727 | },
728 | "nbformat": 4,
729 | "nbformat_minor": 4
730 | }
731 |
--------------------------------------------------------------------------------
/Google_PyTrends.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Google PyTrends.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyMGgzeL+4WEsGTAZ02tWGnt",
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "view-in-github",
21 | "colab_type": "text"
22 | },
23 | "source": [
24 | "
"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "metadata": {
30 | "id": "fSO8-J6LwFJe",
31 | "colab": {
32 | "base_uri": "https://localhost:8080/",
33 | "height": 221
34 | },
35 | "outputId": "455f1e60-9d31-4d56-d3f1-8aab75ade8b7"
36 | },
37 | "source": [
38 | "!pip install pytrends\n",
39 | "from pytrends.request import TrendReq\n",
40 | "pytrends = TrendReq()\n",
41 | "import pandas as pd \n",
42 | "import time\n",
43 | "import datetime\n",
44 | "from datetime import datetime, date, time"
45 | ],
46 | "execution_count": null,
47 | "outputs": [
48 | {
49 | "output_type": "stream",
50 | "text": [
51 | "Requirement already satisfied: pytrends in /usr/local/lib/python3.6/dist-packages (4.7.3)\n",
52 | "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from pytrends) (2.23.0)\n",
53 | "Requirement already satisfied: lxml in /usr/local/lib/python3.6/dist-packages (from pytrends) (4.2.6)\n",
54 | "Requirement already satisfied: pandas>=0.25 in /usr/local/lib/python3.6/dist-packages (from pytrends) (1.0.5)\n",
55 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->pytrends) (1.24.3)\n",
56 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->pytrends) (2020.6.20)\n",
57 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->pytrends) (3.0.4)\n",
58 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->pytrends) (2.10)\n",
59 | "Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.25->pytrends) (2.8.1)\n",
60 | "Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.25->pytrends) (1.18.5)\n",
61 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.25->pytrends) (2018.9)\n",
62 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.6.1->pandas>=0.25->pytrends) (1.15.0)\n"
63 | ],
64 | "name": "stdout"
65 | }
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {
71 | "id": "9L4sqRuxwWsr"
72 | },
73 | "source": [
74 | "Interest over Time\n"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "metadata": {
80 | "id": "PHapqWBUwMUp",
81 | "colab": {
82 | "base_uri": "https://localhost:8080/",
83 | "height": 450
84 | },
85 | "outputId": "aab7786f-c01b-401b-e58d-a0fde6c53922"
86 | },
87 | "source": [
88 | "kw_list = [\"labrador\", \"german shepherd\", \"staffordshire bull terrier\", \"rottweiler\", \"husky\"]\n",
89 | "pytrends.build_payload(kw_list, timeframe='today 3-m')\n",
90 | "over_time = pytrends.interest_over_time()\n",
91 | "over_time_df = pd.DataFrame(over_time)\n",
92 | "over_time_df.drop(columns= 'isPartial') "
93 | ],
94 | "execution_count": null,
95 | "outputs": [
96 | {
97 | "output_type": "execute_result",
98 | "data": {
99 | "text/html": [
100 | "\n",
101 | "\n",
114 | "
\n",
115 | " \n",
116 | " \n",
117 | " | \n",
118 | " labrador | \n",
119 | " german shepherd | \n",
120 | " staffordshire bull terrier | \n",
121 | " rottweiler | \n",
122 | " husky | \n",
123 | "
\n",
124 | " \n",
125 | " | date | \n",
126 | " | \n",
127 | " | \n",
128 | " | \n",
129 | " | \n",
130 | " | \n",
131 | "
\n",
132 | " \n",
133 | " \n",
134 | " \n",
135 | " | 2020-05-05 | \n",
136 | " 67 | \n",
137 | " 48 | \n",
138 | " 3 | \n",
139 | " 38 | \n",
140 | " 75 | \n",
141 | "
\n",
142 | " \n",
143 | " | 2020-05-06 | \n",
144 | " 67 | \n",
145 | " 49 | \n",
146 | " 4 | \n",
147 | " 36 | \n",
148 | " 77 | \n",
149 | "
\n",
150 | " \n",
151 | " | 2020-05-07 | \n",
152 | " 68 | \n",
153 | " 45 | \n",
154 | " 4 | \n",
155 | " 38 | \n",
156 | " 81 | \n",
157 | "
\n",
158 | " \n",
159 | " | 2020-05-08 | \n",
160 | " 70 | \n",
161 | " 46 | \n",
162 | " 5 | \n",
163 | " 38 | \n",
164 | " 82 | \n",
165 | "
\n",
166 | " \n",
167 | " | 2020-05-09 | \n",
168 | " 80 | \n",
169 | " 56 | \n",
170 | " 5 | \n",
171 | " 47 | \n",
172 | " 89 | \n",
173 | "
\n",
174 | " \n",
175 | " | ... | \n",
176 | " ... | \n",
177 | " ... | \n",
178 | " ... | \n",
179 | " ... | \n",
180 | " ... | \n",
181 | "
\n",
182 | " \n",
183 | " | 2020-07-29 | \n",
184 | " 66 | \n",
185 | " 41 | \n",
186 | " 4 | \n",
187 | " 38 | \n",
188 | " 74 | \n",
189 | "
\n",
190 | " \n",
191 | " | 2020-07-30 | \n",
192 | " 62 | \n",
193 | " 43 | \n",
194 | " 4 | \n",
195 | " 36 | \n",
196 | " 73 | \n",
197 | "
\n",
198 | " \n",
199 | " | 2020-07-31 | \n",
200 | " 63 | \n",
201 | " 45 | \n",
202 | " 4 | \n",
203 | " 39 | \n",
204 | " 75 | \n",
205 | "
\n",
206 | " \n",
207 | " | 2020-08-01 | \n",
208 | " 80 | \n",
209 | " 49 | \n",
210 | " 4 | \n",
211 | " 46 | \n",
212 | " 87 | \n",
213 | "
\n",
214 | " \n",
215 | " | 2020-08-02 | \n",
216 | " 85 | \n",
217 | " 50 | \n",
218 | " 4 | \n",
219 | " 51 | \n",
220 | " 91 | \n",
221 | "
\n",
222 | " \n",
223 | "
\n",
224 | "
90 rows × 5 columns
\n",
225 | "
"
226 | ],
227 | "text/plain": [
228 | " labrador german shepherd ... rottweiler husky\n",
229 | "date ... \n",
230 | "2020-05-05 67 48 ... 38 75\n",
231 | "2020-05-06 67 49 ... 36 77\n",
232 | "2020-05-07 68 45 ... 38 81\n",
233 | "2020-05-08 70 46 ... 38 82\n",
234 | "2020-05-09 80 56 ... 47 89\n",
235 | "... ... ... ... ... ...\n",
236 | "2020-07-29 66 41 ... 38 74\n",
237 | "2020-07-30 62 43 ... 36 73\n",
238 | "2020-07-31 63 45 ... 39 75\n",
239 | "2020-08-01 80 49 ... 46 87\n",
240 | "2020-08-02 85 50 ... 51 91\n",
241 | "\n",
242 | "[90 rows x 5 columns]"
243 | ]
244 | },
245 | "metadata": {
246 | "tags": []
247 | },
248 | "execution_count": 2
249 | }
250 | ]
251 | },
252 | {
253 | "cell_type": "markdown",
254 | "metadata": {
255 | "id": "b2nZpHS11rfr"
256 | },
257 | "source": [
258 | "Related Queries"
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "metadata": {
264 | "id": "Cr1Hvyc83oUT"
265 | },
266 | "source": [
267 | "kw_list=['giant panda', 'tiger', 'polar bear', 'penguin']\n",
268 | "pytrends.build_payload(kw_list, timeframe='today 3-m')"
269 | ],
270 | "execution_count": null,
271 | "outputs": []
272 | },
273 | {
274 | "cell_type": "code",
275 | "metadata": {
276 | "id": "-TYnwuM-0tX4",
277 | "colab": {
278 | "base_uri": "https://localhost:8080/",
279 | "height": 1000
280 | },
281 | "outputId": "f49741fd-a69b-4d10-cf2f-3d179b3564b2"
282 | },
283 | "source": [
284 | "related_queries = pytrends.related_queries()\n",
285 | "related_queries.values()"
286 | ],
287 | "execution_count": null,
288 | "outputs": [
289 | {
290 | "output_type": "execute_result",
291 | "data": {
292 | "text/plain": [
293 | "dict_values([{'top': query value\n",
294 | "0 the giant panda 100\n",
295 | "1 giant panda 3d 85\n",
296 | "2 tiger 55\n",
297 | "3 giant panda facts 27\n",
298 | "4 red panda 25\n",
299 | "5 3d animals 25\n",
300 | "6 tiger 3d 25\n",
301 | "7 shark 23\n",
302 | "8 lion 23\n",
303 | "9 giant panda habitat 23\n",
304 | "10 duck 15\n",
305 | "11 alligator 13\n",
306 | "12 google 3d animals 10\n",
307 | "13 endangered species 8\n",
308 | "14 giant panda facts for kids 5\n",
309 | "15 giant panda fargo 3, 'rising': query value\n",
310 | "0 giant panda facts for kids 250\n",
311 | "1 giant panda habitat 50\n",
312 | "2 the giant panda 40}, {'top': query value\n",
313 | "0 the tiger 100\n",
314 | "1 tiger woods 74\n",
315 | "2 tiger king 68\n",
316 | "3 tiger shroff 40\n",
317 | "4 giant tiger 28\n",
318 | "5 tiger movie 23\n",
319 | "6 lion 20\n",
320 | "7 tiger eye 19\n",
321 | "8 white tiger 16\n",
322 | "9 tiger flying 16\n",
323 | "10 tiger phil 15\n",
324 | "11 tiger triumph 14\n",
325 | "12 triumph 13\n",
326 | "13 3d tiger 13\n",
327 | "14 black tiger 12\n",
328 | "15 eye of the tiger 12\n",
329 | "16 tiger onitsuka 12\n",
330 | "17 shark 11\n",
331 | "18 tiger shark 10\n",
332 | "19 daniel tiger 10\n",
333 | "20 cat 10\n",
334 | "21 tiger cat 10\n",
335 | "22 tiger video 10\n",
336 | "23 tiger 800 9\n",
337 | "24 romance of tiger and rose 9, 'rising': query value\n",
338 | "0 tiger woods peyton manning date 22550\n",
339 | "1 romance of tiger and rose 16400\n",
340 | "2 the romance of tiger and rose 15100\n",
341 | "3 the romance of tiger and rose dramacool 7150\n",
342 | "4 tiger phil peyton brady date 5950\n",
343 | "5 tiger king challenge bitlife 4500\n",
344 | "6 tiger phil match 2020 date 3450\n",
345 | "7 tiger vs phil match 2020 3350\n",
346 | "8 memorial tournament 2750\n",
347 | "9 tiger phil match 2250\n",
348 | "10 tiger luxx 2050\n",
349 | "11 tiger vs phil 1850\n",
350 | "12 tiger and phil match 1750\n",
351 | "13 the romance of tiger and rose ซับ ไทย 1550\n",
352 | "14 international tiger day 1400\n",
353 | "15 tiger phil 1200\n",
354 | "16 how to watch tiger vs phil 1150\n",
355 | "17 international tiger day 2020 1150\n",
356 | "18 tiger woods phil mickelson 1000\n",
357 | "19 phil mickelson 950\n",
358 | "20 nicolas cage tiger king 650\n",
359 | "21 flying tiger 2 600\n",
360 | "22 tiger brokers 550\n",
361 | "23 roy horn tiger attack video 500\n",
362 | "24 tiger muskie 450}, {'top': query value\n",
363 | "0 the polar bear 100\n",
364 | "1 polar bears 51\n",
365 | "2 bears 49\n",
366 | "3 minecraft polar bear 31\n",
367 | "4 grizzly bear 30\n",
368 | "5 white polar bear 22\n",
369 | "6 polar bear baby 20\n",
370 | "7 coca cola polar bear 19\n",
371 | "8 black bear 19\n",
372 | "9 panda 17\n",
373 | "10 polar bear dog 17\n",
374 | "11 tiger 16\n",
375 | "12 polar bear skin 16\n",
376 | "13 brown bear 14\n",
377 | "14 coca cola polar bear playing cards 14\n",
378 | "15 polar bear facts 14\n",
379 | "16 polar bear population 13\n",
380 | "17 polar bear habitat 13\n",
381 | "18 polar bear size 12\n",
382 | "19 tame polar bear minecraft 12\n",
383 | "20 lion 12\n",
384 | "21 polar bear fur 12\n",
385 | "22 polar bear coolers 12\n",
386 | "23 polar bear shot 11\n",
387 | "24 polar bear adaptations 10, 'rising': query value\n",
388 | "0 coca cola polar bear playing cards 149100\n",
389 | "1 polar bear leura 11650\n",
390 | "2 hyped polar bear 5650\n",
391 | "3 polar bear 45 cooler 4100\n",
392 | "4 coca cola polar bear 450\n",
393 | "5 polar bear coolers 350\n",
394 | "6 bonjour polar bear 350\n",
395 | "7 polar bear cooler 250\n",
396 | "8 polar bear hull 250\n",
397 | "9 polar bear provincial park 190\n",
398 | "10 adopt me polar bear 180\n",
399 | "11 polar bear heating and air 180\n",
400 | "12 hortensja polar bear 170\n",
401 | "13 shaved polar bear 170\n",
402 | "14 polar bear stuffed animal 150\n",
403 | "15 how much does a polar bear weigh joke 120\n",
404 | "16 do you know how much a polar bear weighs 120\n",
405 | "17 polar bear skin color 120\n",
406 | "18 polar bear toy 110\n",
407 | "19 polar bear roar 110\n",
408 | "20 wolverine kills polar bear 110\n",
409 | "21 the polar bear king 70\n",
410 | "22 polar bear skin colour 70\n",
411 | "23 polar bear size comparison 60\n",
412 | "24 maya the polar bear 60}, {'top': query value\n",
413 | "0 penguin club 100\n",
414 | "1 club 99\n",
415 | "2 penguin movie 30\n",
416 | "3 the penguin 25\n",
417 | "4 club penguin rewritten 11\n",
418 | "5 club penguin online 8\n",
419 | "6 penguin tamil movie 8\n",
420 | "7 penguin 2020 7\n",
421 | "8 super club penguin 7\n",
422 | "9 penguins 5\n",
423 | "10 batman penguin 5\n",
424 | "11 baby penguin 5\n",
425 | "12 emperor penguin 5\n",
426 | "13 penguin review 5\n",
427 | "14 google penguin 4\n",
428 | "15 penguin books 4\n",
429 | "16 penguin movie download 4\n",
430 | "17 penguin game 4\n",
431 | "18 free penguin 3\n",
432 | "19 penguin movie review 3\n",
433 | "20 penguin random house 3\n",
434 | "21 original penguin 3\n",
435 | "22 penguin tamil movie download 3\n",
436 | "23 penguin classics 3\n",
437 | "24 penguin island 3, 'rising': query value\n",
438 | "0 penguin tamil movie download 92100\n",
439 | "1 penguin movie telugu 62450\n",
440 | "2 penguin movie download in tamil 47800\n",
441 | "3 tamilrockers 47000\n",
442 | "4 penguin tamil movie 39750\n",
443 | "5 penguin movie amazon prime 29450\n",
444 | "6 penguin tamil full movie 28300\n",
445 | "7 penguin movie download in tamilrockers 21500\n",
446 | "8 tamilyogi 21450\n",
447 | "9 tamilyogi penguin 21000\n",
448 | "10 penguin movie rating 19600\n",
449 | "11 moviesda 17400\n",
450 | "12 ponmagal vandhal 14200\n",
451 | "13 penguin full movie in tamil 13450\n",
452 | "14 penguin tamil movie cast 10650\n",
453 | "15 kuttymovies 9550\n",
454 | "16 penguin movie review 9350\n",
455 | "17 penguin tamil movie trailer 8900\n",
456 | "18 penguin movie download 8350\n",
457 | "19 gulabo sitabo 6250\n",
458 | "20 baby penguin cm son 6200\n",
459 | "21 isaimini 5450\n",
460 | "22 tamilgun 4700\n",
461 | "23 movierulz 4050\n",
462 | "24 penguin keerthy suresh 4050}])"
463 | ]
464 | },
465 | "metadata": {
466 | "tags": []
467 | },
468 | "execution_count": 4
469 | }
470 | ]
471 | },
472 | {
473 | "cell_type": "code",
474 | "metadata": {
475 | "id": "UZ9glkas0uXS",
476 | "colab": {
477 | "base_uri": "https://localhost:8080/",
478 | "height": 142
479 | },
480 | "outputId": "92a8c913-b5e3-4439-f789-828a9f297d08"
481 | },
482 | "source": [
483 | "related_queries.get('giant panda').get('rising')"
484 | ],
485 | "execution_count": null,
486 | "outputs": [
487 | {
488 | "output_type": "execute_result",
489 | "data": {
490 | "text/html": [
491 | "\n",
492 | "\n",
505 | "
\n",
506 | " \n",
507 | " \n",
508 | " | \n",
509 | " query | \n",
510 | " value | \n",
511 | "
\n",
512 | " \n",
513 | " \n",
514 | " \n",
515 | " | 0 | \n",
516 | " giant panda facts for kids | \n",
517 | " 250 | \n",
518 | "
\n",
519 | " \n",
520 | " | 1 | \n",
521 | " giant panda habitat | \n",
522 | " 50 | \n",
523 | "
\n",
524 | " \n",
525 | " | 2 | \n",
526 | " the giant panda | \n",
527 | " 40 | \n",
528 | "
\n",
529 | " \n",
530 | "
\n",
531 | "
"
532 | ],
533 | "text/plain": [
534 | " query value\n",
535 | "0 giant panda facts for kids 250\n",
536 | "1 giant panda habitat 50\n",
537 | "2 the giant panda 40"
538 | ]
539 | },
540 | "metadata": {
541 | "tags": []
542 | },
543 | "execution_count": 5
544 | }
545 | ]
546 | },
547 | {
548 | "cell_type": "code",
549 | "metadata": {
550 | "id": "qVdQpZ2G03MR",
551 | "colab": {
552 | "base_uri": "https://localhost:8080/",
553 | "height": 824
554 | },
555 | "outputId": "17e1db83-8b88-4597-f24b-a17e623da461"
556 | },
557 | "source": [
558 | "related_queries.get('tiger').get('top')"
559 | ],
560 | "execution_count": null,
561 | "outputs": [
562 | {
563 | "output_type": "execute_result",
564 | "data": {
565 | "text/html": [
566 | "\n",
567 | "\n",
580 | "
\n",
581 | " \n",
582 | " \n",
583 | " | \n",
584 | " query | \n",
585 | " value | \n",
586 | "
\n",
587 | " \n",
588 | " \n",
589 | " \n",
590 | " | 0 | \n",
591 | " the tiger | \n",
592 | " 100 | \n",
593 | "
\n",
594 | " \n",
595 | " | 1 | \n",
596 | " tiger woods | \n",
597 | " 74 | \n",
598 | "
\n",
599 | " \n",
600 | " | 2 | \n",
601 | " tiger king | \n",
602 | " 68 | \n",
603 | "
\n",
604 | " \n",
605 | " | 3 | \n",
606 | " tiger shroff | \n",
607 | " 40 | \n",
608 | "
\n",
609 | " \n",
610 | " | 4 | \n",
611 | " giant tiger | \n",
612 | " 28 | \n",
613 | "
\n",
614 | " \n",
615 | " | 5 | \n",
616 | " tiger movie | \n",
617 | " 23 | \n",
618 | "
\n",
619 | " \n",
620 | " | 6 | \n",
621 | " lion | \n",
622 | " 20 | \n",
623 | "
\n",
624 | " \n",
625 | " | 7 | \n",
626 | " tiger eye | \n",
627 | " 19 | \n",
628 | "
\n",
629 | " \n",
630 | " | 8 | \n",
631 | " white tiger | \n",
632 | " 16 | \n",
633 | "
\n",
634 | " \n",
635 | " | 9 | \n",
636 | " tiger flying | \n",
637 | " 16 | \n",
638 | "
\n",
639 | " \n",
640 | " | 10 | \n",
641 | " tiger phil | \n",
642 | " 15 | \n",
643 | "
\n",
644 | " \n",
645 | " | 11 | \n",
646 | " tiger triumph | \n",
647 | " 14 | \n",
648 | "
\n",
649 | " \n",
650 | " | 12 | \n",
651 | " triumph | \n",
652 | " 13 | \n",
653 | "
\n",
654 | " \n",
655 | " | 13 | \n",
656 | " 3d tiger | \n",
657 | " 13 | \n",
658 | "
\n",
659 | " \n",
660 | " | 14 | \n",
661 | " black tiger | \n",
662 | " 12 | \n",
663 | "
\n",
664 | " \n",
665 | " | 15 | \n",
666 | " eye of the tiger | \n",
667 | " 12 | \n",
668 | "
\n",
669 | " \n",
670 | " | 16 | \n",
671 | " tiger onitsuka | \n",
672 | " 12 | \n",
673 | "
\n",
674 | " \n",
675 | " | 17 | \n",
676 | " shark | \n",
677 | " 11 | \n",
678 | "
\n",
679 | " \n",
680 | " | 18 | \n",
681 | " tiger shark | \n",
682 | " 10 | \n",
683 | "
\n",
684 | " \n",
685 | " | 19 | \n",
686 | " daniel tiger | \n",
687 | " 10 | \n",
688 | "
\n",
689 | " \n",
690 | " | 20 | \n",
691 | " cat | \n",
692 | " 10 | \n",
693 | "
\n",
694 | " \n",
695 | " | 21 | \n",
696 | " tiger cat | \n",
697 | " 10 | \n",
698 | "
\n",
699 | " \n",
700 | " | 22 | \n",
701 | " tiger video | \n",
702 | " 10 | \n",
703 | "
\n",
704 | " \n",
705 | " | 23 | \n",
706 | " tiger 800 | \n",
707 | " 9 | \n",
708 | "
\n",
709 | " \n",
710 | " | 24 | \n",
711 | " romance of tiger and rose | \n",
712 | " 9 | \n",
713 | "
\n",
714 | " \n",
715 | "
\n",
716 | "
"
717 | ],
718 | "text/plain": [
719 | " query value\n",
720 | "0 the tiger 100\n",
721 | "1 tiger woods 74\n",
722 | "2 tiger king 68\n",
723 | "3 tiger shroff 40\n",
724 | "4 giant tiger 28\n",
725 | "5 tiger movie 23\n",
726 | "6 lion 20\n",
727 | "7 tiger eye 19\n",
728 | "8 white tiger 16\n",
729 | "9 tiger flying 16\n",
730 | "10 tiger phil 15\n",
731 | "11 tiger triumph 14\n",
732 | "12 triumph 13\n",
733 | "13 3d tiger 13\n",
734 | "14 black tiger 12\n",
735 | "15 eye of the tiger 12\n",
736 | "16 tiger onitsuka 12\n",
737 | "17 shark 11\n",
738 | "18 tiger shark 10\n",
739 | "19 daniel tiger 10\n",
740 | "20 cat 10\n",
741 | "21 tiger cat 10\n",
742 | "22 tiger video 10\n",
743 | "23 tiger 800 9\n",
744 | "24 romance of tiger and rose 9"
745 | ]
746 | },
747 | "metadata": {
748 | "tags": []
749 | },
750 | "execution_count": 6
751 | }
752 | ]
753 | },
754 | {
755 | "cell_type": "markdown",
756 | "metadata": {
757 | "id": "RKOMZiyH1t1Z"
758 | },
759 | "source": [
760 | "Suggestions"
761 | ]
762 | },
763 | {
764 | "cell_type": "code",
765 | "metadata": {
766 | "id": "9HDCc7E53w12",
767 | "colab": {
768 | "base_uri": "https://localhost:8080/",
769 | "height": 204
770 | },
771 | "outputId": "d60a5fe8-bd5f-4bff-f863-af78bb6e02e5"
772 | },
773 | "source": [
774 | "suggestions = pytrends.suggestions(keyword='nintendo switch')\n",
775 | "suggestions_df = pd.DataFrame(suggestions)\n",
776 | "suggestions_df.drop(columns= 'mid') "
777 | ],
778 | "execution_count": null,
779 | "outputs": [
780 | {
781 | "output_type": "execute_result",
782 | "data": {
783 | "text/html": [
784 | "\n",
785 | "\n",
798 | "
\n",
799 | " \n",
800 | " \n",
801 | " | \n",
802 | " title | \n",
803 | " type | \n",
804 | "
\n",
805 | " \n",
806 | " \n",
807 | " \n",
808 | " | 0 | \n",
809 | " Nintendo Switch | \n",
810 | " Video game console | \n",
811 | "
\n",
812 | " \n",
813 | " | 1 | \n",
814 | " Nintendo Switch Online | \n",
815 | " Topic | \n",
816 | "
\n",
817 | " \n",
818 | " | 2 | \n",
819 | " Joy-Con | \n",
820 | " Game controller | \n",
821 | "
\n",
822 | " \n",
823 | " | 3 | \n",
824 | " Celeste | \n",
825 | " Video game | \n",
826 | "
\n",
827 | " \n",
828 | " | 4 | \n",
829 | " Nintendo Switch system software | \n",
830 | " Operating system | \n",
831 | "
\n",
832 | " \n",
833 | "
\n",
834 | "
"
835 | ],
836 | "text/plain": [
837 | " title type\n",
838 | "0 Nintendo Switch Video game console\n",
839 | "1 Nintendo Switch Online Topic\n",
840 | "2 Joy-Con Game controller\n",
841 | "3 Celeste Video game\n",
842 | "4 Nintendo Switch system software Operating system"
843 | ]
844 | },
845 | "metadata": {
846 | "tags": []
847 | },
848 | "execution_count": 31
849 | }
850 | ]
851 | },
852 | {
853 | "cell_type": "markdown",
854 | "metadata": {
855 | "id": "LRq5Pd8_S8e2"
856 | },
857 | "source": [
858 | "Trending Searches\n"
859 | ]
860 | },
861 | {
862 | "cell_type": "code",
863 | "metadata": {
864 | "id": "_LBLsKO1D0Q_",
865 | "colab": {
866 | "base_uri": "https://localhost:8080/",
867 | "height": 669
868 | },
869 | "outputId": "9d2fcc9e-633a-4f0d-9282-aa248aeb0d9f"
870 | },
871 | "source": [
872 | "trend = pytrends.trending_searches(pn='united_kingdom')\n",
873 | "uk_trend_df = pd.DataFrame(trend)\n",
874 | "uk_trend_df"
875 | ],
876 | "execution_count": null,
877 | "outputs": [
878 | {
879 | "output_type": "execute_result",
880 | "data": {
881 | "text/html": [
882 | "\n",
883 | "\n",
896 | "
\n",
897 | " \n",
898 | " \n",
899 | " | \n",
900 | " 0 | \n",
901 | "
\n",
902 | " \n",
903 | " \n",
904 | " \n",
905 | " | 0 | \n",
906 | " COVID-19 prevention | \n",
907 | "
\n",
908 | " \n",
909 | " | 1 | \n",
910 | " Aberdeen | \n",
911 | "
\n",
912 | " \n",
913 | " | 2 | \n",
914 | " Jamie Dornan | \n",
915 | "
\n",
916 | " \n",
917 | " | 3 | \n",
918 | " Gary Barlow | \n",
919 | "
\n",
920 | " \n",
921 | " | 4 | \n",
922 | " Angel Gomes | \n",
923 | "
\n",
924 | " \n",
925 | " | 5 | \n",
926 | " Football | \n",
927 | "
\n",
928 | " \n",
929 | " | 6 | \n",
930 | " Caroline Flack | \n",
931 | "
\n",
932 | " \n",
933 | " | 7 | \n",
934 | " FBG Duck | \n",
935 | "
\n",
936 | " \n",
937 | " | 8 | \n",
938 | " Jamal Lewis | \n",
939 | "
\n",
940 | " \n",
941 | " | 9 | \n",
942 | " BREAKING news | \n",
943 | "
\n",
944 | " \n",
945 | " | 10 | \n",
946 | " Hiroshima | \n",
947 | "
\n",
948 | " \n",
949 | " | 11 | \n",
950 | " Beirut | \n",
951 | "
\n",
952 | " \n",
953 | " | 12 | \n",
954 | " Brentford | \n",
955 | "
\n",
956 | " \n",
957 | " | 13 | \n",
958 | " Kemar Roofe | \n",
959 | "
\n",
960 | " \n",
961 | " | 14 | \n",
962 | " England vs Ireland | \n",
963 | "
\n",
964 | " \n",
965 | " | 15 | \n",
966 | " Virgin Atlantic | \n",
967 | "
\n",
968 | " \n",
969 | " | 16 | \n",
970 | " Will Young | \n",
971 | "
\n",
972 | " \n",
973 | " | 17 | \n",
974 | " Brian Black | \n",
975 | "
\n",
976 | " \n",
977 | " | 18 | \n",
978 | " Fall Guys | \n",
979 | "
\n",
980 | " \n",
981 | " | 19 | \n",
982 | " Jonathan Swan | \n",
983 | "
\n",
984 | " \n",
985 | "
\n",
986 | "
"
987 | ],
988 | "text/plain": [
989 | " 0\n",
990 | "0 COVID-19 prevention\n",
991 | "1 Aberdeen\n",
992 | "2 Jamie Dornan\n",
993 | "3 Gary Barlow\n",
994 | "4 Angel Gomes\n",
995 | "5 Football\n",
996 | "6 Caroline Flack\n",
997 | "7 FBG Duck\n",
998 | "8 Jamal Lewis\n",
999 | "9 BREAKING news\n",
1000 | "10 Hiroshima\n",
1001 | "11 Beirut\n",
1002 | "12 Brentford\n",
1003 | "13 Kemar Roofe\n",
1004 | "14 England vs Ireland\n",
1005 | "15 Virgin Atlantic\n",
1006 | "16 Will Young\n",
1007 | "17 Brian Black\n",
1008 | "18 Fall Guys\n",
1009 | "19 Jonathan Swan"
1010 | ]
1011 | },
1012 | "metadata": {
1013 | "tags": []
1014 | },
1015 | "execution_count": 26
1016 | }
1017 | ]
1018 | },
1019 | {
1020 | "cell_type": "markdown",
1021 | "metadata": {
1022 | "id": "0itrtRU8S-1D"
1023 | },
1024 | "source": [
1025 | "Interest by Region"
1026 | ]
1027 | },
1028 | {
1029 | "cell_type": "code",
1030 | "metadata": {
1031 | "id": "WWG1Dpm7Q1UE",
1032 | "colab": {
1033 | "base_uri": "https://localhost:8080/",
1034 | "height": 204
1035 | },
1036 | "outputId": "477192b9-35a8-4006-eaac-1d5e08319018"
1037 | },
1038 | "source": [
1039 | "pytrends.build_payload(kw_list=['coronavirus'], geo='GB') \n",
1040 | "region_df = pytrends.interest_by_region(resolution='REGION', inc_low_vol=True)\n",
1041 | "\n",
1042 | "region_df.head(10)"
1043 | ],
1044 | "execution_count": null,
1045 | "outputs": [
1046 | {
1047 | "output_type": "execute_result",
1048 | "data": {
1049 | "text/html": [
1050 | "\n",
1051 | "\n",
1064 | "
\n",
1065 | " \n",
1066 | " \n",
1067 | " | \n",
1068 | " coronavirus | \n",
1069 | "
\n",
1070 | " \n",
1071 | " | geoName | \n",
1072 | " | \n",
1073 | "
\n",
1074 | " \n",
1075 | " \n",
1076 | " \n",
1077 | " | England | \n",
1078 | " 97 | \n",
1079 | "
\n",
1080 | " \n",
1081 | " | Northern Ireland | \n",
1082 | " 96 | \n",
1083 | "
\n",
1084 | " \n",
1085 | " | Scotland | \n",
1086 | " 96 | \n",
1087 | "
\n",
1088 | " \n",
1089 | " | Wales | \n",
1090 | " 100 | \n",
1091 | "
\n",
1092 | " \n",
1093 | "
\n",
1094 | "
"
1095 | ],
1096 | "text/plain": [
1097 | " coronavirus\n",
1098 | "geoName \n",
1099 | "England 97\n",
1100 | "Northern Ireland 96\n",
1101 | "Scotland 96\n",
1102 | "Wales 100"
1103 | ]
1104 | },
1105 | "metadata": {
1106 | "tags": []
1107 | },
1108 | "execution_count": 32
1109 | }
1110 | ]
1111 | },
1112 | {
1113 | "cell_type": "code",
1114 | "metadata": {
1115 | "id": "-C7uHXfFSxbN",
1116 | "colab": {
1117 | "base_uri": "https://localhost:8080/",
1118 | "height": 204
1119 | },
1120 | "outputId": "b53182cc-4377-4aa5-ffd2-0c2fac64d4f0"
1121 | },
1122 | "source": [
1123 | "kw_list = [\"BTS\", \"Stray Kids\", \"Day 6\"]\n",
1124 | "pytrends.build_payload(kw_list)\n",
1125 | "kpop_regions_df = pytrends.interest_by_region(resolution='COUNTRY', inc_low_vol=True)\n",
1126 | "\n",
1127 | "kpop_regions_df.head(10)"
1128 | ],
1129 | "execution_count": null,
1130 | "outputs": [
1131 | {
1132 | "output_type": "execute_result",
1133 | "data": {
1134 | "text/html": [
1135 | "\n",
1136 | "\n",
1149 | "
\n",
1150 | " \n",
1151 | " \n",
1152 | " | \n",
1153 | " BTS | \n",
1154 | " Stray Kids | \n",
1155 | " Day 6 | \n",
1156 | "
\n",
1157 | " \n",
1158 | " | geoName | \n",
1159 | " | \n",
1160 | " | \n",
1161 | " | \n",
1162 | "
\n",
1163 | " \n",
1164 | " \n",
1165 | " \n",
1166 | " | England | \n",
1167 | " 82 | \n",
1168 | " 3 | \n",
1169 | " 15 | \n",
1170 | "
\n",
1171 | " \n",
1172 | " | Northern Ireland | \n",
1173 | " 80 | \n",
1174 | " 3 | \n",
1175 | " 17 | \n",
1176 | "
\n",
1177 | " \n",
1178 | " | Scotland | \n",
1179 | " 81 | \n",
1180 | " 3 | \n",
1181 | " 16 | \n",
1182 | "
\n",
1183 | " \n",
1184 | " | Wales | \n",
1185 | " 79 | \n",
1186 | " 3 | \n",
1187 | " 18 | \n",
1188 | "
\n",
1189 | " \n",
1190 | "
\n",
1191 | "
"
1192 | ],
1193 | "text/plain": [
1194 | " BTS Stray Kids Day 6\n",
1195 | "geoName \n",
1196 | "England 82 3 15\n",
1197 | "Northern Ireland 80 3 17\n",
1198 | "Scotland 81 3 16\n",
1199 | "Wales 79 3 18"
1200 | ]
1201 | },
1202 | "metadata": {
1203 | "tags": []
1204 | },
1205 | "execution_count": 33
1206 | }
1207 | ]
1208 | }
1209 | ]
1210 | }
--------------------------------------------------------------------------------