├── README.md └── YouTube_Recommendations.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Customize-YouTube-Recommendations 2 | 3 | To create an API Key 4 | 1. Go to Google Console and enable YouTube Data API v3. 5 | 2. To access enabled APIs create credentials : APIs & Services --> Credentials --> API Keys 6 | 7 | How to run this? 8 | 1. Download it and open through Jupyter Notebook. 9 | 2. Simply click on Open In Colab button at the top of the .ipynb file and run through Google Colab. 10 | -------------------------------------------------------------------------------- /YouTube_Recommendations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "YouTube Recommendations.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "authorship_tag": "ABX9TyNzEeNBF+Sp0UlXb0hXxhT/", 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | }, 16 | "language_info": { 17 | "name": "python" 18 | } 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | " $\"Open$ " 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "metadata": { 34 | "id": "S7dR5LZikFjI" 35 | }, 36 | "source": [ 37 | "pip install google-api-python-client" 38 | ], 39 | "execution_count": null, 40 | "outputs": [] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "metadata": { 45 | "id": "8cluwEjrs3Wk" 46 | }, 47 | "source": [ 48 | "import pandas as pd\n", 49 | "from datetime import datetime, timedelta\n", 50 | "from tabulate import tabulate\n", 51 | "from IPython.display import display, HTML\n", 52 | "from apiclient.discovery import build\n", 53 | "\n", 54 | "api_key = 'your_api_key_here'" 55 | ], 56 | "execution_count": 2, 57 | "outputs": [] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "metadata": { 62 | "id": "0GUPRiQctNsW" 63 | }, 64 | "source": [ 65 | "def get_start_date_string(search_period_days):\n", 66 | " #Returns string for start-date of search period\n", 67 | " search_start_date = datetime.today() - timedelta(search_period_days)\n", 68 | " date_string = datetime(year=search_start_date.year,month=search_start_date.month,\n", 69 | " day=search_start_date.day).strftime('%Y-%m-%dT%H:%M:%SZ')\n", 70 | " return date_string" 71 | ], 72 | "execution_count": 3, 73 | "outputs": [] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "metadata": { 78 | "id": "5AFBdYzMtdco" 79 | }, 80 | "source": [ 81 | "#Parsing key information about vidoes\n", 82 | "\n", 83 | "def find_title(item):\n", 84 | " title = item['snippet']['title']\n", 85 | " return title\n", 86 | "\n", 87 | "def find_video_url(item):\n", 88 | " video_id = item['id']['videoId']\n", 89 | " video_url = \"https://www.youtube.com/watch?v=\" + video_id\n", 90 | " return video_url\n", 91 | "\n", 92 | "def find_viewcount(item, youtube):\n", 93 | " video_id = item['id']['videoId']\n", 94 | " video_statistics = youtube.videos().list(id=video_id,\n", 95 | " part='statistics').execute()\n", 96 | " viewcount = int(video_statistics['items'][0]['statistics']['viewCount'])\n", 97 | " return viewcount\n", 98 | "\n", 99 | "def find_channel_id(item):\n", 100 | " channel_id = item['snippet']['channelId']\n", 101 | " return channel_id\n", 102 | "\n", 103 | "def find_channel_url(item):\n", 104 | " channel_id = item['snippet']['channelId']\n", 105 | " channel_url = \"https://www.youtube.com/channel/\" + channel_id\n", 106 | " return channel_url\n", 107 | "\n", 108 | "def find_channel_title(channel_id, youtube):\n", 109 | " channel_search = youtube.channels().list(id=channel_id,\n", 110 | " part='brandingSettings').execute()\n", 111 | " channel_name = channel_search['items'][0]\\\n", 112 | " ['brandingSettings']['channel']['title']\n", 113 | " return channel_name\n", 114 | "\n", 115 | "def find_num_subscribers(channel_id, youtube):\n", 116 | " subs_search = youtube.channels().list(id=channel_id,\n", 117 | " part='statistics').execute()\n", 118 | " if subs_search['items'][0]['statistics']['hiddenSubscriberCount']:\n", 119 | " num_subscribers = 1000000\n", 120 | " else:\n", 121 | " num_subscribers = int(subs_search['items'][0]\\\n", 122 | " ['statistics']['subscriberCount'])\n", 123 | " return num_subscribers\n", 124 | "\n", 125 | "def view_to_sub_ratio(viewcount, num_subscribers):\n", 126 | " if num_subscribers == 0:\n", 127 | " return 0\n", 128 | " else:\n", 129 | " ratio = viewcount / num_subscribers\n", 130 | " return ratio\n", 131 | "\n", 132 | "def age(item):\n", 133 | " when_published = item['snippet']['publishedAt']\n", 134 | " when_published_datetime_object = datetime.strptime(when_published,\n", 135 | " '%Y-%m-%dT%H:%M:%SZ')\n", 136 | " today_date = datetime.today()\n", 137 | " days_since_published = int((today_date - when_published_datetime_object).days)\n", 138 | " if days_since_published == 0:\n", 139 | " days_since_published = 1\n", 140 | " return days_since_published\n", 141 | "\n", 142 | "def custom_score(viewcount, ratio, days_since_published):\n", 143 | " ratio = min(ratio, 5)\n", 144 | " score = (viewcount * ratio) / days_since_published\n", 145 | " return round(score)\n" 146 | ], 147 | "execution_count": 4, 148 | "outputs": [] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "metadata": { 153 | "id": "lrSs3P1DupKt" 154 | }, 155 | "source": [ 156 | "def find_videos(search_terms, api_key, views_threshold, uploaded_since):\n", 157 | "\n", 158 | " # Initialise results dataframe\n", 159 | " dataframe = pd.DataFrame(columns=('Title', 'Link', 'Score',\n", 160 | " 'Views', 'Channel','Subscribers',\n", 161 | " 'View-Subscriber Ratio','Channel Link'))\n", 162 | "\n", 163 | " # Run search and populate dataframe\n", 164 | " search_results, youtube_api = search_api(search_terms, api_key,\n", 165 | " uploaded_since)\n", 166 | "\n", 167 | " results_df = populate_dataframe(search_results, youtube_api, dataframe,\n", 168 | " views_threshold)\n", 169 | "\n", 170 | " return results_df\n", 171 | "\n", 172 | "def search_api(search_terms, api_key, uploaded_since):\n", 173 | " \n", 174 | " # Initialise API call\n", 175 | " youtube_api = build('youtube', 'v3', developerKey = api_key)\n", 176 | "\n", 177 | " # Run search\n", 178 | " results = youtube_api.search().list(q=search_terms, part='snippet',\n", 179 | " type='video', order='viewCount', maxResults=50,\n", 180 | " publishedAfter=uploaded_since).execute()\n", 181 | "\n", 182 | " return results, youtube_api\n", 183 | "\n", 184 | "\n", 185 | "def populate_dataframe(results, youtube_api, df, views_threshold):\n", 186 | "\n", 187 | " # Loop over search results and add key information to dataframe\n", 188 | " i = 1\n", 189 | " for item in results['items']:\n", 190 | " viewcount = find_viewcount(item, youtube_api)\n", 191 | " if viewcount > views_threshold:\n", 192 | " title = find_title(item)\n", 193 | " video_url = find_video_url(item)\n", 194 | " channel_url = find_channel_url(item)\n", 195 | " channel_id = find_channel_id(item)\n", 196 | " channel_name = find_channel_title(channel_id, youtube_api)\n", 197 | " num_subs = find_num_subscribers(channel_id, youtube_api)\n", 198 | " ratio = view_to_sub_ratio(viewcount, num_subs)\n", 199 | " days_since_published = age(item)\n", 200 | " score = custom_score(viewcount, ratio, days_since_published)\n", 201 | " df.loc[i] = [title, video_url, score, viewcount, channel_name,\\\n", 202 | " num_subs, format(ratio, '.1f'), channel_url]\n", 203 | " i += 1\n", 204 | " return df\n" 205 | ], 206 | "execution_count": 5, 207 | "outputs": [] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "metadata": { 212 | "id": "09DrmEs9s_rN" 213 | }, 214 | "source": [ 215 | "def search_each_term(search_terms, api_key, uploaded_since=get_start_date_string(7),\n", 216 | " views_threshold=5000, num_to_print=5):\n", 217 | " \n", 218 | " #Uses search term to execute API calls\n", 219 | " if type(search_terms) == str:\n", 220 | " search_terms = [search_terms]\n", 221 | "\n", 222 | " \n", 223 | " for index, search_term in enumerate(search_terms):\n", 224 | " df = find_videos(search_terms[index], api_key, views_threshold=views_threshold,\n", 225 | " uploaded_since = uploaded_since)\n", 226 | " df = df.sort_values(['Score'], ascending=[0])\n", 227 | " \n", 228 | " print(\"\\n\\nHi Sreelaya\\nHere are the videos you should watch this week\\n\\n\")\n", 229 | " print_top_videos(df, num_to_print) #Prints top 5 vidoes\n", 230 | " \n", 231 | " print(\"\\n\\nThese are all the results fetched...\\n\")\n", 232 | " pd.set_option('max_colwidth', 400)\n", 233 | "\n", 234 | " #Modifying Title and Channel columns as hyperlinks\n", 235 | " df['Title'] = '

'\n", 236 | " df['Channel'] = '

' \n", 237 | " final_df = df.drop(['Link', 'Channel Link'], axis=1)\n", 238 | "\n", 239 | "\n", 240 | " dfStyler = final_df.style.set_properties(subset=['Title'],**{'text-align': 'left'})\n", 241 | " dfStyler = final_df.style.set_properties(subset=['Score', 'Views', 'Channel','Subscribers',\n", 242 | " 'View-Subscriber Ratio'],**{'text-align': 'center'})\n", 243 | " dfStyler.set_table_styles([dict(selector='th', props=[('text-align', 'center')])]).hide_index()\n", 244 | " \n", 245 | " display(HTML(dfStyler.render()))\n", 246 | "\n", 247 | "def print_top_videos(df, num_to_print):\n", 248 | " if len(df) < num_to_print:\n", 249 | " num_to_print = len(df)\n", 250 | " if num_to_print == 0:\n", 251 | " print(\"No video results found\")\n", 252 | " else:\n", 253 | " for i in range(num_to_print):\n", 254 | " video = df.iloc[i]\n", 255 | " title = video['Title']\n", 256 | " link = video['Link']\n", 257 | " print(\"#{}\\nTitle: '{}' \\nURL: {} \\n\"\\\n", 258 | " .format(i+1, title, link))\n", 259 | "\n" 260 | ], 261 | "execution_count": 6, 262 | "outputs": [] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "metadata": { 267 | "colab": { 268 | "base_uri": "https://localhost:8080/", 269 | "height": 697 270 | }, 271 | "id": "Ar5U_OxQuXBd", 272 | "outputId": "5feeea7e-8771-48d6-f1c2-c70cae72177f" 273 | }, 274 | "source": [ 275 | "search_each_term(\"Machine Learning\", api_key, num_to_print=5) #search_each_term(\"Data Science\", api_key, '2021-01-11T00:00:00Z' )\n", 276 | " " 277 | ], 278 | "execution_count": 7, 279 | "outputs": [ 280 | { 281 | "output_type": "stream", 282 | "text": [ 283 | "\n", 284 | "\n", 285 | "Hi Sreelaya\n", 286 | "Here are the videos you should watch this week\n", 287 | "\n", 288 | "\n", 289 | "#1\n", 290 | "Title: 'Linear Regression with Scikit Learn | Lesson 1 of 6 | Machine Learning with Python: Zero to GBMs' \n", 291 | "URL: https://www.youtube.com/watch?v=CVszSgTWODE \n", 292 | "\n", 293 | "#2\n", 294 | "Title: 'XCiT: Cross-Covariance Image Transformers (Facebook AI Machine Learning Research Paper Explained)' \n", 295 | "URL: https://www.youtube.com/watch?v=g08NkNWmZTA \n", 296 | "\n", 297 | "#3\n", 298 | "Title: '#55 Self-Supervised Vision Models (Dr. Ishan Misra - FAIR).' \n", 299 | "URL: https://www.youtube.com/watch?v=EXJmodhu4_4 \n", 300 | "\n", 301 | "#4\n", 302 | "Title: 'Künstliche Intelligenz: Wie Machine Learning unsere Gesellschaft verändert' \n", 303 | "URL: https://www.youtube.com/watch?v=xhOeuqL2Pro \n", 304 | "\n", 305 | "#5\n", 306 | "Title: 'Sign Language Detection using ACTION RECOGNITION with Python | LSTM Deep Learning Model' \n", 307 | "URL: https://www.youtube.com/watch?v=doDUihpj6ro \n", 308 | "\n", 309 | "\n", 310 | "\n", 311 | "These are all the results fetched...\n", 312 | "\n" 313 | ], 314 | "name": "stdout" 315 | }, 316 | { 317 | "output_type": "display_data", 318 | "data": { 319 | "text/html": [ 320 | "\n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | "

Title	Score	Views	Channel	Subscribers	View-Subscriber Ratio
Linear Regression with Scikit Learn \| Lesson 1 of 6 \| Machine Learning with Python: Zero to GBMs	1595	10752	Jovian	14500	0.7
XCiT: Cross-Covariance Image Transformers (Facebook AI Machine Learning Research Paper Explained)	429	8865	Yannic Kilcher	91500	0.1
#55 Self-Supervised Vision Models (Dr. Ishan Misra - FAIR).	400	5153	Machine Learning Street Talk	16600	0.3
Künstliche Intelligenz: Wie Machine Learning unsere Gesellschaft verändert	304	14721	Florian Homm	238000	0.1
Sign Language Detection using ACTION RECOGNITION with Python \| LSTM Deep Learning Model	208	5380	Nicholas Renotte	23200	0.2
White Box Vs Black Box Models In Machine Learning- Data Science Interview Question	47	8469	Krish Naik	384000	0.0
Deep Learning with Python Tutorial \| Deep Learning Training \| Edureka \| DL Rewind - 1	5	5252	edureka!	2710000	0.0

" 383 | ], 384 | "text/plain": [ 385 | "" 386 | ] 387 | }, 388 | "metadata": { 389 | "tags": [] 390 | } 391 | } 392 | ] 393 | } 394 | ] 395 | } --------------------------------------------------------------------------------