├── README.md
└── YouTube_Recommendations.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # Customize-YouTube-Recommendations
2 |
3 | To create an API Key
4 | 1. Go to Google Console and enable YouTube Data API v3.
5 | 2. To access enabled APIs create credentials : APIs & Services --> Credentials --> API Keys
6 |
7 | How to run this?
8 | 1. Download it and open through Jupyter Notebook.
9 | 2. Simply click on Open In Colab button at the top of the .ipynb file and run through Google Colab.
10 |
--------------------------------------------------------------------------------
/YouTube_Recommendations.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "YouTube Recommendations.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "authorship_tag": "ABX9TyNzEeNBF+Sp0UlXb0hXxhT/",
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "name": "python3",
14 | "display_name": "Python 3"
15 | },
16 | "language_info": {
17 | "name": "python"
18 | }
19 | },
20 | "cells": [
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {
24 | "id": "view-in-github",
25 | "colab_type": "text"
26 | },
27 | "source": [
28 | ""
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "metadata": {
34 | "id": "S7dR5LZikFjI"
35 | },
36 | "source": [
37 | "pip install google-api-python-client"
38 | ],
39 | "execution_count": null,
40 | "outputs": []
41 | },
42 | {
43 | "cell_type": "code",
44 | "metadata": {
45 | "id": "8cluwEjrs3Wk"
46 | },
47 | "source": [
48 | "import pandas as pd\n",
49 | "from datetime import datetime, timedelta\n",
50 | "from tabulate import tabulate\n",
51 | "from IPython.display import display, HTML\n",
52 | "from apiclient.discovery import build\n",
53 | "\n",
54 | "api_key = 'your_api_key_here'"
55 | ],
56 | "execution_count": 2,
57 | "outputs": []
58 | },
59 | {
60 | "cell_type": "code",
61 | "metadata": {
62 | "id": "0GUPRiQctNsW"
63 | },
64 | "source": [
65 | "def get_start_date_string(search_period_days):\n",
66 | " #Returns string for start-date of search period\n",
67 | " search_start_date = datetime.today() - timedelta(search_period_days)\n",
68 | " date_string = datetime(year=search_start_date.year,month=search_start_date.month,\n",
69 | " day=search_start_date.day).strftime('%Y-%m-%dT%H:%M:%SZ')\n",
70 | " return date_string"
71 | ],
72 | "execution_count": 3,
73 | "outputs": []
74 | },
75 | {
76 | "cell_type": "code",
77 | "metadata": {
78 | "id": "5AFBdYzMtdco"
79 | },
80 | "source": [
81 | "#Parsing key information about vidoes\n",
82 | "\n",
83 | "def find_title(item):\n",
84 | " title = item['snippet']['title']\n",
85 | " return title\n",
86 | "\n",
87 | "def find_video_url(item):\n",
88 | " video_id = item['id']['videoId']\n",
89 | " video_url = \"https://www.youtube.com/watch?v=\" + video_id\n",
90 | " return video_url\n",
91 | "\n",
92 | "def find_viewcount(item, youtube):\n",
93 | " video_id = item['id']['videoId']\n",
94 | " video_statistics = youtube.videos().list(id=video_id,\n",
95 | " part='statistics').execute()\n",
96 | " viewcount = int(video_statistics['items'][0]['statistics']['viewCount'])\n",
97 | " return viewcount\n",
98 | "\n",
99 | "def find_channel_id(item):\n",
100 | " channel_id = item['snippet']['channelId']\n",
101 | " return channel_id\n",
102 | "\n",
103 | "def find_channel_url(item):\n",
104 | " channel_id = item['snippet']['channelId']\n",
105 | " channel_url = \"https://www.youtube.com/channel/\" + channel_id\n",
106 | " return channel_url\n",
107 | "\n",
108 | "def find_channel_title(channel_id, youtube):\n",
109 | " channel_search = youtube.channels().list(id=channel_id,\n",
110 | " part='brandingSettings').execute()\n",
111 | " channel_name = channel_search['items'][0]\\\n",
112 | " ['brandingSettings']['channel']['title']\n",
113 | " return channel_name\n",
114 | "\n",
115 | "def find_num_subscribers(channel_id, youtube):\n",
116 | " subs_search = youtube.channels().list(id=channel_id,\n",
117 | " part='statistics').execute()\n",
118 | " if subs_search['items'][0]['statistics']['hiddenSubscriberCount']:\n",
119 | " num_subscribers = 1000000\n",
120 | " else:\n",
121 | " num_subscribers = int(subs_search['items'][0]\\\n",
122 | " ['statistics']['subscriberCount'])\n",
123 | " return num_subscribers\n",
124 | "\n",
125 | "def view_to_sub_ratio(viewcount, num_subscribers):\n",
126 | " if num_subscribers == 0:\n",
127 | " return 0\n",
128 | " else:\n",
129 | " ratio = viewcount / num_subscribers\n",
130 | " return ratio\n",
131 | "\n",
132 | "def age(item):\n",
133 | " when_published = item['snippet']['publishedAt']\n",
134 | " when_published_datetime_object = datetime.strptime(when_published,\n",
135 | " '%Y-%m-%dT%H:%M:%SZ')\n",
136 | " today_date = datetime.today()\n",
137 | " days_since_published = int((today_date - when_published_datetime_object).days)\n",
138 | " if days_since_published == 0:\n",
139 | " days_since_published = 1\n",
140 | " return days_since_published\n",
141 | "\n",
142 | "def custom_score(viewcount, ratio, days_since_published):\n",
143 | " ratio = min(ratio, 5)\n",
144 | " score = (viewcount * ratio) / days_since_published\n",
145 | " return round(score)\n"
146 | ],
147 | "execution_count": 4,
148 | "outputs": []
149 | },
150 | {
151 | "cell_type": "code",
152 | "metadata": {
153 | "id": "lrSs3P1DupKt"
154 | },
155 | "source": [
156 | "def find_videos(search_terms, api_key, views_threshold, uploaded_since):\n",
157 | "\n",
158 | " # Initialise results dataframe\n",
159 | " dataframe = pd.DataFrame(columns=('Title', 'Link', 'Score',\n",
160 | " 'Views', 'Channel','Subscribers',\n",
161 | " 'View-Subscriber Ratio','Channel Link'))\n",
162 | "\n",
163 | " # Run search and populate dataframe\n",
164 | " search_results, youtube_api = search_api(search_terms, api_key,\n",
165 | " uploaded_since)\n",
166 | "\n",
167 | " results_df = populate_dataframe(search_results, youtube_api, dataframe,\n",
168 | " views_threshold)\n",
169 | "\n",
170 | " return results_df\n",
171 | "\n",
172 | "def search_api(search_terms, api_key, uploaded_since):\n",
173 | " \n",
174 | " # Initialise API call\n",
175 | " youtube_api = build('youtube', 'v3', developerKey = api_key)\n",
176 | "\n",
177 | " # Run search\n",
178 | " results = youtube_api.search().list(q=search_terms, part='snippet',\n",
179 | " type='video', order='viewCount', maxResults=50,\n",
180 | " publishedAfter=uploaded_since).execute()\n",
181 | "\n",
182 | " return results, youtube_api\n",
183 | "\n",
184 | "\n",
185 | "def populate_dataframe(results, youtube_api, df, views_threshold):\n",
186 | "\n",
187 | " # Loop over search results and add key information to dataframe\n",
188 | " i = 1\n",
189 | " for item in results['items']:\n",
190 | " viewcount = find_viewcount(item, youtube_api)\n",
191 | " if viewcount > views_threshold:\n",
192 | " title = find_title(item)\n",
193 | " video_url = find_video_url(item)\n",
194 | " channel_url = find_channel_url(item)\n",
195 | " channel_id = find_channel_id(item)\n",
196 | " channel_name = find_channel_title(channel_id, youtube_api)\n",
197 | " num_subs = find_num_subscribers(channel_id, youtube_api)\n",
198 | " ratio = view_to_sub_ratio(viewcount, num_subs)\n",
199 | " days_since_published = age(item)\n",
200 | " score = custom_score(viewcount, ratio, days_since_published)\n",
201 | " df.loc[i] = [title, video_url, score, viewcount, channel_name,\\\n",
202 | " num_subs, format(ratio, '.1f'), channel_url]\n",
203 | " i += 1\n",
204 | " return df\n"
205 | ],
206 | "execution_count": 5,
207 | "outputs": []
208 | },
209 | {
210 | "cell_type": "code",
211 | "metadata": {
212 | "id": "09DrmEs9s_rN"
213 | },
214 | "source": [
215 | "def search_each_term(search_terms, api_key, uploaded_since=get_start_date_string(7),\n",
216 | " views_threshold=5000, num_to_print=5):\n",
217 | " \n",
218 | " #Uses search term to execute API calls\n",
219 | " if type(search_terms) == str:\n",
220 | " search_terms = [search_terms]\n",
221 | "\n",
222 | " \n",
223 | " for index, search_term in enumerate(search_terms):\n",
224 | " df = find_videos(search_terms[index], api_key, views_threshold=views_threshold,\n",
225 | " uploaded_since = uploaded_since)\n",
226 | " df = df.sort_values(['Score'], ascending=[0])\n",
227 | " \n",
228 | " print(\"\\n\\nHi Sreelaya\\nHere are the videos you should watch this week\\n\\n\")\n",
229 | " print_top_videos(df, num_to_print) #Prints top 5 vidoes\n",
230 | " \n",
231 | " print(\"\\n\\nThese are all the results fetched...\\n\")\n",
232 | " pd.set_option('max_colwidth', 400)\n",
233 | "\n",
234 | " #Modifying Title and Channel columns as hyperlinks\n",
235 | " df['Title'] = '
| Title | Score | Views | Channel | Subscribers | View-Subscriber Ratio |
|---|---|---|---|---|---|
Linear Regression with Scikit Learn | Lesson 1 of 6 | Machine Learning with Python: Zero to GBMs | \n",
328 | " 1595 | \n", 329 | "10752 | \n", 330 | "Jovian | \n",
331 | " 14500 | \n", 332 | "0.7 | \n", 333 | "
XCiT: Cross-Covariance Image Transformers (Facebook AI Machine Learning Research Paper Explained) | \n",
336 | " 429 | \n", 337 | "8865 | \n", 338 | "Yannic Kilcher | \n",
339 | " 91500 | \n", 340 | "0.1 | \n", 341 | "
#55 Self-Supervised Vision Models (Dr. Ishan Misra - FAIR). | \n",
344 | " 400 | \n", 345 | "5153 | \n", 346 | "Machine Learning Street Talk | \n",
347 | " 16600 | \n", 348 | "0.3 | \n", 349 | "
Künstliche Intelligenz: Wie Machine Learning unsere Gesellschaft verändert | \n",
352 | " 304 | \n", 353 | "14721 | \n", 354 | "Florian Homm | \n",
355 | " 238000 | \n", 356 | "0.1 | \n", 357 | "
Sign Language Detection using ACTION RECOGNITION with Python | LSTM Deep Learning Model | \n",
360 | " 208 | \n", 361 | "5380 | \n", 362 | "Nicholas Renotte | \n",
363 | " 23200 | \n", 364 | "0.2 | \n", 365 | "
White Box Vs Black Box Models In Machine Learning- Data Science Interview Question | \n",
368 | " 47 | \n", 369 | "8469 | \n", 370 | "Krish Naik | \n",
371 | " 384000 | \n", 372 | "0.0 | \n", 373 | "
Deep Learning with Python Tutorial | Deep Learning Training | Edureka | DL Rewind - 1 | \n",
376 | " 5 | \n", 377 | "5252 | \n", 378 | "edureka! | \n",
379 | " 2710000 | \n", 380 | "0.0 | \n", 381 | "