├── .gitignore
├── requirements.txt
├── utils
    ├── utils.py
    ├── spotify_category.py
    ├── spotify_album.py
    ├── spotify_playlist.py
    ├── spotify_artist.py
    ├── spotify_track.py
    └── deezer_utils.py
├── exceptions.py
├── settings.py
├── main.py
├── keys.py
├── config.py
├── webgui.py
├── auto_compressor.py
├── static
    └── css
    │   └── base.css
├── templates
    └── index.html
├── spotify_client.py
├── spotify_scraper.py
├── spotify_mass_download.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | music/
3 | *.mp3
4 | *.json


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cryptography
2 | py-deezer
3 | flask
4 | lxml
5 | eyed3


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
1 | ILLEGAL_FILE_NAME_CHARACTERS = '/?"*|\\:;><#%{}$!\'@`='
2 | 
3 | 
4 | def clean_file_path(prompt: str):
5 |     for illegal_char in ILLEGAL_FILE_NAME_CHARACTERS:
6 |         prompt = prompt.replace(illegal_char, '')
7 |     return prompt
8 | 


--------------------------------------------------------------------------------
/exceptions.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class SpotiFileException(Exception):
 4 |     pass
 5 | 
 6 | 
 7 | class SpotifyClientException(SpotiFileException):
 8 |     pass
 9 | 
10 | 
11 | class SpotifyTrackException(SpotiFileException):
12 |     pass
13 | 
14 | 
15 | class SpotifyArtistException(SpotiFileException):
16 |     pass
17 | 
18 | 
19 | class DeezerException(SpotiFileException):
20 |     pass
21 | 


--------------------------------------------------------------------------------
/settings.py:
--------------------------------------------------------------------------------
 1 | class Settings:
 2 |     DEFAULT_DOWNLOAD_DIRECTORY = 'music/'
 3 |     ARTIST_IMAGES_SUB_DIR = '_Artists'
 4 |     PLAYLIST_METADATA_SUB_DIR = '_Playlists'
 5 |     CATEGORY_METADATA_SUB_DIR = '_Categories'
 6 |     GLOBALS_SAVE_FILE = '_downloaded_store.json'
 7 |     VERBOSE_OUTPUTS = False
 8 |     AUTO_DOWNLOAD_PLAYLIST_METADATA = True
 9 |     DOWNLOADS_FILE_SAVE_INTERVAL = 15
10 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from config import *
 2 | from webgui import app
 3 | import spotify_mass_download
 4 | from spotify_mass_download import full_download, save_globals_save_file
 5 | from threading import Thread
 6 | import webbrowser
 7 | 
 8 | def main():
 9 |     print(f'=== SpotiFile ===')
10 |     spotify_mass_download.g_keep_saving += 1
11 | 
12 |     save_globals_save_file_thread = Thread(target=save_globals_save_file)
13 |     save_globals_save_file_thread.start()
14 | 
15 |     webbrowser.open('http://127.0.0.1:8888/')
16 |     app.run(host='127.0.0.1', port=8888, debug=False)
17 | 
18 |     spotify_mass_download.g_keep_saving -= 1
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     main()
23 | 


--------------------------------------------------------------------------------
/keys.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | __keys = [
 4 |     ('913e0bdf-1edd-47c7-87a9-a35220b6e5d0', 'AQBRFGQJBQtClUTyq7LHR77hf_cRwFjKVhisQVcHMWb7TQmkJso5xbcrIyHlDzA-qqoNhY3SSfFhPaPMRR7sCJW-KX0YSiyw8qw5kmbtsSTWasYb_-HDp40rAzlR2b2eAS32SJBRB4bna7k_CcgilB3tj4ZBNJqC'),
 5 |     ('79d7e532-9bd5-4a61-9122-6b1c9b482efe', 'AQBSvqnDCT2HcOTC0V4yGgdGAjv2elnnTDqLMZmt7pzlimjtmGtwJ0-0n5hyrC3cuYsFaxClVg6ki02r8tMo_7PD6G2Jtx8uGSyL8xWg6TrLqm-XwtKAimut1qdMwVtPVZ8cO5T-MWxI-PduZ975AB17EO0vHbj4'),
 6 |     ('1541e9bc-fa60-4c0b-86fa-452e0fba304f', 'AQAda5eiTbKYEwNvHWq7afCWlfuN8eMN_vyIiTgd_28Jde7LtqBArq8ADDYivY2_39LgNfegFWQj21bURknirP3KAuiiVHpnotbmwSxhrbLQBOmjaSx1S_PEKpaPkFwvl288XY-W9It3PtFNve3RnnV-VnFji_Y'),
 7 | ]
 8 | 
 9 | pepper = random.randint(0, len(__keys) - 1)
10 | 
11 | def get_sp_key():
12 |     return __keys[pepper][0]
13 | 
14 | 
15 | def get_sp_dc():
16 |     return __keys[pepper][1]
17 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | from urllib.parse import urlencode
 2 | from http import client
 3 | import requests
 4 | from requests import Response
 5 | from lxml import html
 6 | import json
 7 | import datetime, time
 8 | import hashlib
 9 | import eyed3
10 | import os
11 | import shutil
12 | from cryptography.hazmat.backends import default_backend
13 | from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
14 | from pydeezer.constants import track_formats
15 | from settings import Settings
16 | import keys
17 | 
18 | USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'
19 | SP_DC = os.getenv('SP_DC') if os.getenv('SP_DC') else keys.get_sp_dc()
20 | SP_KEY = os.getenv('SP_KEY') if os.getenv('SP_KEY') else keys.get_sp_key()
21 | #PROXY = {"http": "127.0.0.1:8080", "https": "127.0.0.1:8080"}
22 | PROXY = {}
23 | VERIFY_SSL = True
24 | 
25 | settings = Settings()
26 | 


--------------------------------------------------------------------------------
/utils/spotify_category.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import base64
 3 | import json
 4 | from config import settings
 5 | 
 6 | 
 7 | class SpotifyCategory:
 8 |     name = ''
 9 |     spotify_id = ''
10 |     playlist_ids = ''
11 |     thumbnail_href = ''
12 | 
13 |     def __init__(self, category_data=None):
14 |         self.name = category_data['name']
15 |         self.spotify_id = category_data['id']
16 |         if len(category_data['icons']) > 0:
17 |             self.thumbnail_href = category_data['icons'][0]['url']
18 | 
19 |     def download_metadata(self, scraper):
20 | 
21 |         thumbail_b64 = ''
22 |         if self.thumbnail_href:
23 |             thumbail_b64 = base64.b64encode( requests.get(self.thumbnail_href).content ).decode()
24 | 
25 |         try:
26 |             self.playlist_ids = scraper.get_category_playlist_ids(category_id=self.spotify_id)
27 |         except:
28 |             self.playlist_ids = []
29 | 
30 |         data = {
31 |             'name': self.name,
32 |             'spotify_id': self.spotify_id,
33 |             'thumbnail_b64': thumbail_b64,
34 |             'playlist_ids': self.playlist_ids,
35 |         }
36 | 
37 |         with open(f'{settings.DEFAULT_DOWNLOAD_DIRECTORY}/{settings.CATEGORY_METADATA_SUB_DIR}/{self.spotify_id}.category', 'w') as f:
38 |             f.write(json.dumps(data))
39 | 


--------------------------------------------------------------------------------
/utils/spotify_album.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import time
 3 | 
 4 | 
 5 | class SpotifyAlbum:
 6 |     title = ''
 7 |     thumbnail_href = ''
 8 |     track_count = 0
 9 |     release_date = 0
10 |     spotify_id = ''
11 | 
12 |     def __init__(self, album_data=None) -> None:
13 |         if album_data is not None:
14 |             self.load_from_data(album_data)
15 | 
16 |     def load_from_data(self, data):
17 |         if not data['album_type']:
18 |             return
19 |         self.title = data['name']
20 |         if len(data['images']) > 0:
21 |             self.thumbnail_href = data['images'][0]['url']
22 |         self.track_count = data['total_tracks']
23 |         try:
24 |             self.release_date = time.mktime(datetime.datetime.strptime(data['release_date'], "%Y-%m-%d").timetuple())
25 |         except:
26 |             try:
27 |                 self.release_date = time.mktime(datetime.datetime.strptime(data['release_date'], "%Y-%m").timetuple())
28 |             except:
29 |                 try:
30 |                     self.release_date = time.mktime(datetime.datetime.strptime(data['release_date'], "%Y").timetuple())
31 |                 except:
32 |                     self.release_date = '0000-00-00'
33 |         self.spotify_id = data['id']
34 | 
35 |     def __str__(self) -> str:
36 |         return f'SpotifyAlbum< {self.title} >'
37 | 
38 |     def href(self) -> str:
39 |         return f'https://api.spotify.com/v1/albums/{self.spotify_id}'
40 | 


--------------------------------------------------------------------------------
/utils/spotify_playlist.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import json
 3 | import requests
 4 | import os
 5 | from config import settings
 6 | from utils.spotify_track import SpotifyTrack
 7 | from typing import List
 8 | 
 9 | 
10 | class SpotifyPlaylist:
11 |     spotify_id = ''
12 |     tracks = []
13 |     image_url = ''
14 |     title = ''
15 |     description = ''
16 | 
17 |     def __init__(self, spotify_id, tracks:List[SpotifyTrack], data):
18 |         self.spotify_id = spotify_id
19 |         self.tracks = tracks
20 |         self.title = data['name']
21 |         self.description = data['description']
22 |         if len(data['images']) > 0:
23 |             self.image_url = data['images'][0]['url']
24 | 
25 |     def export(self) -> str:
26 |         """ Returns a simple json object with the bare minimum playlist data """
27 |         image_data = requests.get(self.image_url).content
28 |         data = {
29 |             'title': self.title, 
30 |             'description': self.description, 
31 |             'spotify_id': self.spotify_id, 
32 |             'image_url': self.image_url, 
33 |             'image_b64': base64.b64encode(image_data).decode(), 
34 |             'track_ids': [track.spotify_id for track in self.tracks]
35 |             }
36 |         return json.dumps(data)
37 |     
38 |     def export_to_file(self) -> None:
39 |         os.makedirs(f'{settings.DEFAULT_DOWNLOAD_DIRECTORY}/{settings.PLAYLIST_METADATA_SUB_DIR}/', exist_ok=True)
40 |         with open(f'{settings.DEFAULT_DOWNLOAD_DIRECTORY}/{settings.PLAYLIST_METADATA_SUB_DIR}/{self.spotify_id}.playlist', 'w') as f:
41 |             f.write(self.export())
42 | 
43 |     @property
44 |     def href(self):
45 |         return f'https://open.spotify.com/playlist/{self.spotify_id}'
46 | 


--------------------------------------------------------------------------------
/utils/spotify_artist.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from exceptions import SpotifyArtistException
 3 | 
 4 | 
 5 | class SpotifyArtist:
 6 |     spotify_id = ''
 7 |     name = ''
 8 | 
 9 |     def __init__(self, artist_data: None) -> None:
10 |         if artist_data is not None:
11 |             self.load_from_data(artist_data)
12 | 
13 |     def load_from_data(self, data):
14 |         self.spotify_id = data['id']
15 |         self.name = data['name']
16 |     
17 |     def href(self) -> str:
18 |         return f'https://api.spotify.com/v1/artists/{self.spotify_id}'
19 | 
20 |     def __str__(self) -> str:
21 |         return f'SpotifyArtist< {self.name} >'
22 |     
23 |     def __repr__(self) -> str:
24 |         return self.__str__()
25 | 
26 |     def download_image(self, scraper) -> bytes:
27 |         if scraper is None:
28 |             return b''
29 |         artist_images = scraper.get(self.href()).json()['images']
30 |         if len(artist_images) == 0:
31 |             raise SpotifyArtistException(f'Artist "{self.name}" has no image!')
32 |         image_response = requests.get(artist_images[0]['url'])
33 |         return image_response.content
34 | 
35 |     def get_this_is_playlist(self, scraper) -> str:
36 |         if 'this_is_playlist_id' in self.__dict__ and self.this_is_playlist_id:
37 |             return self.this_is_playlist_id
38 |         self.this_is_playlist_id = ''
39 |         this_is = requests.utils.quote(f'this is {self.name}')
40 |         search_results = scraper.get(f'https://api.spotify.com/v1/search?type=playlist&q={this_is}&market=IL').json()
41 |         for playlist_json in search_results['playlists']['items']:
42 |             if playlist_json['name'].lower() != f'this is {self.name}'.lower():
43 |                 continue
44 |             if playlist_json['description'].lower() != f'This is {self.name}. The essential tracks, all in one playlist.'.lower():
45 |                 continue
46 |             self.this_is_playlist_id = playlist_json['id']
47 |             break
48 |         return self.this_is_playlist_id
49 | 


--------------------------------------------------------------------------------
/webgui.py:
--------------------------------------------------------------------------------
 1 | from config import *
 2 | from pydoc import render_doc
 3 | from flask import Flask, render_template, request, jsonify
 4 | from spotify_mass_download import full_download, console, download_all_categories_playlists
 5 | app = Flask(__name__)
 6 | 
 7 | app.config['TEMPLATES_AUTO_RELOAD'] = True
 8 | 
 9 | 
10 | @app.route('/')
11 | def index():
12 |     return render_template('index.html', settings=settings)
13 | 
14 | 
15 | @app.route('/actions/download/', methods=['POST'])
16 | def actions_download():
17 |     try:
18 |         spotify_url = request.form.get('flink')
19 |         recursive = request.form.get('recursive') or False
20 |         recursive_artist = request.form.get('recursive-artist') or False
21 |         recursive_album = request.form.get('recursive-album') or False
22 |         recursive = True if recursive == 'on' else False
23 |         recursive_album = True if recursive_album == 'on' else False
24 |         recursive_artist = True if recursive_artist == 'on' else False
25 |         full_download(settings.DEFAULT_DOWNLOAD_DIRECTORY, spotify_url, recursive=recursive, recursive_artist=recursive_artist, recursive_album=recursive_album)
26 |         return 'success'
27 |     except Exception as ex:
28 |         return str(ex)
29 | 
30 | 
31 | @app.route('/actions/download/categories', methods=['POST'])
32 | def actions_download_categories():
33 |     query = request.form.get('query')
34 |     if not query:
35 |         query = ''
36 |     download_all_categories_playlists(download_meta_data_only=False, query=query)
37 | 
38 | 
39 | @app.route('/info/console/')
40 | def info_console():
41 |     offset = request.args.get('offset')
42 |     if offset == 'undefined':
43 |         offset = 0
44 |     offset = int(offset)
45 |     logs = console.get()
46 |     return jsonify( {'logs': logs[offset:], 'offset': len(logs)} )
47 | 
48 | 
49 | @app.route('/settings/', methods=['POST'])
50 | def change_settings():
51 |     settings.DEFAULT_DOWNLOAD_DIRECTORY = request.form.get('download-dir')
52 |     return 'success'
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     app.run(debug=True)
57 | 


--------------------------------------------------------------------------------
/auto_compressor.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from zipfile import ZipFile
 3 | import time
 4 | import sys
 5 | 
 6 | 
 7 | class ZipUtilities:
 8 | 
 9 |     def toZip(self, files, filename):
10 |         zip_file = ZipFile(filename, 'w')
11 |         for file in files:
12 |             if os.path.isfile(file):
13 |                     zip_file.write(file)
14 |             else:
15 |                 self.addFolderToZip(zip_file, file)
16 |         zip_file.close()
17 | 
18 |     def addFolderToZip(self, zip_file, folder):
19 |         print(f'Adding folder {folder} to archive {zip_file}')
20 |         for file in os.listdir(folder):
21 |             full_path = os.path.join(folder, file)
22 |             if os.path.isfile(full_path):
23 |                 print('File added: ' + str(full_path))
24 |                 zip_file.write(full_path)
25 |             elif os.path.isdir(full_path):
26 |                 print('Entering folder: ' + str(full_path))
27 |                 self.addFolderToZip(zip_file, full_path)
28 | 
29 | 
30 | def get_directory_size(start_path = '.'):
31 |     total_size = 0
32 |     for dirpath, dirnames, filenames in os.walk(start_path):
33 |         for f in filenames:
34 |             fp = os.path.join(dirpath, f)
35 |             # skip if it is symbolic link
36 |             if not os.path.islink(fp):
37 |                 total_size += os.path.getsize(fp)
38 | 
39 |     return total_size
40 | 
41 | 
42 | def get_zip_name() -> str:
43 |     return f'{sys.argv[2]}/Download_{str(time.time())}.zip'
44 | 
45 | 
46 | def zip_dirs(dir_list:list) -> str:
47 |     zname = get_zip_name()
48 |     utilities = ZipUtilities()
49 |     utilities.toZip(dir_list, zname)
50 | 
51 | 
52 | def zip_bunches(start_path:str='music', max_zip_size:int=3813764863):
53 |     current_bunch = []
54 |     current_bunch_size = 0
55 |     for dir_name in os.listdir(start_path):
56 |         dir_path = os.path.join(start_path, dir_name)
57 |         dir_size = get_directory_size(dir_path)
58 |         print(f'Dir: {dir_path} of size {dir_size}')
59 |         if current_bunch_size + dir_size < max_zip_size:
60 |             current_bunch.append(dir_path)
61 |             current_bunch_size += dir_size
62 |         else:
63 |             zip_dirs(current_bunch)
64 |             current_bunch.clear()
65 |             current_bunch_size = 0
66 |     zip_dirs(current_bunch)
67 |     current_bunch.clear()
68 |     current_bunch_size = 0
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     if len(sys.argv) == 1:
73 |         zip_bunches()
74 |     else:
75 |         zip_bunches(sys.argv[1])
76 | 


--------------------------------------------------------------------------------
/static/css/base.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |     width: 100vw;
  3 |     height: 100vh;
  4 |     overflow: hidden;
  5 | }
  6 | 
  7 | .dark-mode
  8 | {
  9 |     background-image: linear-gradient(to bottom right, rgba( 50, 50, 50, 0.9 ), rgba(10, 10, 10, 0.9) );
 10 |     background-color: rgba(32, 32, 32, 0.3);
 11 |     color: rgba(220, 220, 220, 0.95);
 12 | }
 13 | 
 14 | .center-all
 15 | {
 16 |     align-items: center;
 17 |     text-align: center;
 18 | }
 19 | 
 20 | .align-bottom
 21 | {
 22 |     position: absolute;
 23 |     bottom: 0;
 24 | }
 25 | 
 26 | .scrollable::-webkit-scrollbar
 27 | {
 28 |     width: 10px;
 29 | }
 30 | .scrollable::-webkit-scrollbar-track
 31 | {
 32 |     background-color: rgba(240, 240, 240, 0.1);
 33 |     border-radius: 2px;
 34 |     border-style: solid;
 35 |     border-color: rgba(250, 250, 250, 0.2);
 36 | }
 37 | .scrollable::-webkit-scrollbar-thumb
 38 | {
 39 |     background-color: rgba(240, 240, 240, 0.5);
 40 |     border-radius: 5px;
 41 | }
 42 | 
 43 | 
 44 | #console-output
 45 | {
 46 |     box-sizing: border-box;
 47 |     background-color: rgba(42, 42, 42, 0.3);
 48 |     border-radius: 1vw;
 49 |     text-align: left;
 50 |     padding: 10px;
 51 |     max-height: 40vh;
 52 |     width: calc(100% - 10px);
 53 |     overflow-x: hidden;
 54 |     overflow-y: scroll;
 55 | }
 56 | 
 57 | #console-output p
 58 | {
 59 |     border-bottom: solid 1px rgba(220, 220, 220, 0.95);
 60 |     padding: 1px;
 61 | }
 62 | 
 63 | input
 64 | {
 65 |     background-color: rgba(50, 50, 50, 0.7);
 66 |     border-color: rgba(220, 220, 220, 0.6);
 67 |     border-radius: 5px;
 68 |     border-style:  ridge;
 69 |     color: inherit;
 70 |     padding: 1px;
 71 |     margin: 2px;
 72 | }
 73 | 
 74 | input[type="submit"] {
 75 |     padding: 3px;
 76 | }
 77 | 
 78 | #download-form
 79 | {
 80 |     padding: 10px;
 81 | }
 82 | 
 83 | .single-line {
 84 |     width: 100%;
 85 |     display: flex;
 86 |     flex-direction: row;
 87 |     flex-wrap: nowrap;
 88 |     flex: 1;
 89 |     justify-content: space-between;
 90 |     align-items: stretch;
 91 |     text-align: right;
 92 |     margin: 3px;
 93 | }
 94 | 
 95 | .single-line label {
 96 |     margin-right: 5px;
 97 | }
 98 | 
 99 | form input[type="number"] {
100 |     padding: 2px;
101 | }
102 | 
103 | input[type="checkbox"] {
104 |     -webkit-appearance: none;
105 |     appearance: none;
106 |     background-color: #fff;
107 |     margin: 3px;
108 |     margin-right: 5px;
109 | 
110 |     font: inherit;
111 |     color: currentColor;
112 |     width: 1.15em;
113 |     height: 1.15em;
114 |     border: 0.15em solid currentColor;
115 |     border-radius: 0.15em;
116 |     transform: translateY(-0.075em);
117 | 
118 |     display: grid;
119 |     place-content: center;
120 | }
121 | 
122 | input[type="checkbox"]::before {
123 |     content: "";
124 |     width: 0.65em;
125 |     height: 0.65em;
126 |     transform: scale(0);
127 |     transition: 120ms transform ease-in-out;
128 |     box-shadow: inset 1em 1em var(--form-control-color);
129 |     background-color: CanvasText;
130 |     transform-origin: center;
131 |     clip-path: polygon(14% 44%, 0 65%, 50% 100%, 100% 16%, 80% 0%, 43% 62%);
132 | }
133 |   
134 | input[type="checkbox"]:checked::before {
135 |     transform: scale(1);
136 | }
137 | 
138 | input[type="checkbox"]:hover {
139 |     outline: max(2px, 0.15em) solid currentColor;
140 |     outline-offset: max(2px, 0.15em);
141 | }


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <link rel="stylesheet" href="/static/css/base.css">
 8 |     <title>SpotiFile</title>
 9 |     
10 |     <script>
11 |         let offset = 0;
12 |         const interval = setInterval(async function() {
13 | 
14 |             const response = await fetch(`/info/console/?offset=${offset}`);
15 |             const data = await response.json();
16 | 
17 |             offset = data['offset'];
18 | 
19 |             for (let i = 0; i < data['logs'].length; ++i)
20 |             {
21 |                 let log_line = document.createElement('p');
22 |                 log_line.innerHTML = `${data['logs'][i]['time']} | ${data['logs'][i]['value']}`;
23 |                 log_line.style.color = data['logs'][i]['color'];
24 |                 document.getElementById('console-output').appendChild(log_line);
25 |                 if (document.getElementById('autoscroll').checked) 
26 |                 {
27 |                 document.getElementById('console-output').scrollTop = document.getElementById('console-output').scrollHeight;
28 |                 }
29 |             }
30 |         }, 1000);
31 | 
32 |         let settings_visible = true;
33 |         function toggle_settings_visibility() {
34 |             if (settings_visible) {
35 |                 document.getElementById('settings-container').setAttribute('hidden', '')
36 |             } else {
37 |                 document.getElementById('settings-container').removeAttribute('hidden')
38 |             };
39 |             settings_visible = !settings_visible;
40 |         };
41 |     </script>
42 | </head>
43 | <body class="dark-mode center-all">
44 |     <div id="download-selector">
45 |         <form id="download-form" action="/actions/download/" method="POST" target="dummy-frame">
46 |             <input type="text" id="flink" name="flink" placeholder="https://open.spotify.com/..." style="width:30em;">
47 |             <input type="submit" value="Download"/><br>
48 |             <div style="display:flex;justify-content:center;"><div style="display:flex;flex-direction:column;width:fit-content;">
49 |             <div class="single-line"><label>Recursive: </label><input type="checkbox" name="recursive"></div>
50 |             <div class="single-line"><label>Recusive Album: </label><input type="checkbox" name="recursive-album"></div>
51 |             <div class="single-line"><label>Recusive Artist: </label><input type="checkbox" name="recursive-artist"></div>
52 |             </div></div>
53 |         </form>
54 |         <form id="category-download-form" action="/actions/download/categories" method="POST" target="dummy-frame">
55 |             <input type="text" id="query" name="query" placeholder="Category query (e.g. 'pop')" style="width:30em;">
56 |             <input type="submit" value="Download"/><br>
57 |         </form>
58 |     </div>
59 |     <div>
60 |         <div style="display:inline-flex;align-items:baseline;"><label>Show settings: </label><input type="checkbox" checked onchange="toggle_settings_visibility();"></div>
61 |         <div id="settings-container" style="display:block;" name="Settings">
62 |             <form action="/settings/" method="POST" target="dummy-frame" style="width:100%;">
63 |                 <input name="download-dir" placeholder="Download directory" value="{{settings.DEFAULT_DOWNLOAD_DIRECTORY}}">
64 |                 <input type="Submit" value="Save">
65 |             </form>
66 |         </div>
67 |     </div>
68 |     <div style="display:inline-flex;align-items:baseline;"><label>Autoscroll: </label><input id="autoscroll" type="checkbox" checked></div>
69 |     <div id="console-output" class="align-bottom scrollable">
70 |     </div>
71 | 
72 |     <iframe id="dummy-frame" name="dummy-frame" style="display:none;"></iframe>
73 | </body>
74 | </html>


--------------------------------------------------------------------------------
/utils/spotify_track.py:
--------------------------------------------------------------------------------
  1 | import eyed3
  2 | import requests
  3 | from requests import Response
  4 | import hashlib
  5 | import datetime
  6 | import os
  7 | import shutil
  8 | import json
  9 | from utils.spotify_album import SpotifyAlbum
 10 | from utils.spotify_artist import SpotifyArtist
 11 | from utils.deezer_utils import Deezer
 12 | from utils.utils import clean_file_path
 13 | from exceptions import SpotifyTrackException
 14 | 
 15 | 
 16 | class SpotifyTrack:
 17 |     title = ''
 18 |     spotify_id = ''
 19 |     artist = None
 20 |     artists = []
 21 |     album = None
 22 |     thumbnail_href = ''
 23 |     release_date = 0
 24 |     disc_number = 0
 25 |     duration_ms = 0
 26 |     explicit = False
 27 |     href = ''
 28 |     popularity = 0
 29 |     audio = b''
 30 |     lyrics = ''
 31 |     thumnail = b''
 32 |     data_dump = ''
 33 |     isrc = ''
 34 | 
 35 |     def __init__(self, track_data=None) -> None:
 36 |         if track_data is not None:
 37 |             self.load_from_data(track_data)
 38 | 
 39 |     def load_from_data(self, data):
 40 |         if 'track' in data:
 41 |             data = data['track']
 42 |         self.data_dump = data
 43 |         self.album = SpotifyAlbum(data['album'])
 44 |         self.title = data['name']
 45 |         self.spotify_id = data['id']
 46 |         self.artists = [SpotifyArtist(x) for x in data['artists']]
 47 |         self.artist = self.artists[0]
 48 |         self.thumbnail_href = self.album.thumbnail_href
 49 |         self.release_date = self.album.release_date
 50 |         self.track_number = data['track_number']
 51 |         self.duration_ms = data['duration_ms']
 52 |         self.explicit = data['explicit']
 53 |         self.href = data['href']
 54 |         self.popularity = data['popularity']
 55 |         if 'isrc' in data['external_ids']:
 56 |             # isrc is not available for local files
 57 |             self.isrc = data['external_ids']['isrc']
 58 | 
 59 |     def __str__(self) -> str:
 60 |         return f'SpotifyTrack< {self.title} >'
 61 | 
 62 |     def __repr__(self) -> str:
 63 |         return self.__str__()
 64 | 
 65 |     def get_lyrics(self, scraper) -> str:
 66 |         if scraper is None:
 67 |             raise SpotifyTrackException('SCAPER NOT AVAILABLE!')
 68 |         return scraper.get_lyrics(self.spotify_id)
 69 |     
 70 |     def download_thumbnail(self, scraper) -> bytes:
 71 |         return scraper.get(self.thumbnail_href).content
 72 | 
 73 |     def get_download_link(self, scraper) -> str:
 74 |         if not self.isrc:
 75 |             return ''
 76 |         return Deezer.get_track_download_url(Deezer.get_track_data(Deezer.get_track_id_from_isrc(self.isrc)))[0]
 77 | 
 78 |     def download(self, scraper) -> bytes:
 79 |         if not self.isrc:
 80 |             raise SpotifyTrackException(f'Cannot download local file {self.title}!')
 81 |         # I'm used to C, sorry
 82 |         download_link = None
 83 |         try:
 84 |             download_link = self.get_download_link(scraper)
 85 |         except Exception as ex:
 86 |             raise SpotifyTrackException(f'Failed to get download url for {self.title} | Exception: {ex}')
 87 |         try:
 88 |             data = Deezer.decrypt_download_data(requests.get(download_link, headers={'Accept':'*/*'}), self.isrc)
 89 |             return data
 90 |         except Exception as ex:
 91 |             raise SpotifyTrackException(f'Failed to download {self.title} | Exception: {ex}')
 92 |     
 93 |     def package_download(self, scraper):
 94 |         self.audio = self.download(scraper)
 95 |         self.thumbnail = self.download_thumbnail(scraper)
 96 |         self.lyrics = self.get_lyrics(scraper)
 97 |     
 98 |     def preview_title(self):
 99 |         return f'{", ".join([x.name for x in self.artists])} - {self.title} [{self.album.title}]'
100 | 
101 |     def download_to_file(self, scraper, output_path: str):
102 |         temp_file_path = f'temp/{hashlib.sha1(self.title.encode() + self.album.spotify_id.encode()).hexdigest()}.temp.mp3'
103 |         self.package_download(scraper)
104 |         with open(temp_file_path, 'wb') as f:
105 |             f.write(self.audio)
106 | 
107 |         audio_file = eyed3.load(temp_file_path)
108 |         audio_file.initTag(version=(2, 4, 0))  # version is important
109 |         audio_file.tag.title = self.title
110 |         audio_file.tag.artist = ';'.join([artist.name for artist in self.artists])
111 |         audio_file.tag.album_artist = self.artists[0].name
112 |         audio_file.tag.album = self.album.title
113 |         audio_file.tag.original_release_date = datetime.datetime.fromtimestamp(self.album.release_date).year
114 |         audio_file.tag.track_num = self.track_number
115 |         audio_file.info.time_secs = self.duration_ms / 1000
116 |         audio_file.tag.images.set(3, self.thumbnail, 'image/jpeg', u'cover')
117 |         audio_file.tag.lyrics.set(str(self.lyrics))
118 |         audio_file.tag.comments.set('', str(self.data_dump))
119 | 
120 |         audio_file.tag.save()
121 | 
122 |         full_output_path = output_path + '/' + clean_file_path(self.preview_title()) + '.mp3'
123 |         os.makedirs(os.path.dirname(full_output_path), exist_ok=True)
124 |         shutil.move(temp_file_path, full_output_path)
125 | 


--------------------------------------------------------------------------------
/spotify_client.py:
--------------------------------------------------------------------------------
  1 | from config import *
  2 | from exceptions import SpotifyClientException
  3 | 
  4 | 
  5 | class SpotifyClient:
  6 |     _proxy = PROXY
  7 |     _client_token = ''
  8 |     _access_token = ''
  9 |     _client_id = ''
 10 |     __USER_AGENT = USER_AGENT
 11 |     _verify_ssl = VERIFY_SSL
 12 | 
 13 |     user_data = None
 14 | 
 15 |     def __init__(self, sp_dc=None, sp_key=None):
 16 |         self.dc = sp_dc
 17 |         self.key = sp_key
 18 |         self.__HEADERS = {
 19 |                 'User-Agent': self.__USER_AGENT,
 20 |                 'Accept': 'application/json',
 21 |                 'Origin': 'https://open.spotify.com',
 22 |                 'Sec-Fetch-Dest': 'empty',
 23 |                 'Sec-Fetch-Mode': 'cors',
 24 |                 'Sec-Fetch-Site': 'same-origin',
 25 |                 'Referer': 'https://open.spotify.com/',
 26 |                 'Te': 'trailers',
 27 |                 'App-Platform': 'WebPlayer'
 28 |             }
 29 |         self.get_tokens(sp_dc, sp_key)
 30 | 
 31 |     def get_tokens(self, sp_dc=None, sp_key=None):
 32 |         self._access_token, self._client_id = self.get_access_token(sp_dc=sp_dc, sp_key=sp_key)
 33 |         self._client_token = self.get_client_token(self._client_id)
 34 | 
 35 |         print('Client token: ', self._client_token)
 36 |         print('Access token: ', self._access_token)
 37 | 
 38 |     def refresh_tokens(self):
 39 |         self.get_tokens(self.dc, self.key)
 40 | 
 41 |     def get_client_token(self, client_id: str):
 42 |         with requests.session() as session:
 43 |             session.proxies = self._proxy
 44 |             session.headers = self.__HEADERS
 45 | 
 46 |             # Clear old tokens, otherwise we will get 400 Bad Request
 47 |             if 'client_token' in session.headers:
 48 |                 session.headers.pop('client_token')
 49 |             if 'Authorization' in session.headers:
 50 |                 session.headers.pop('Authorization')
 51 |             
 52 |             data = {
 53 |                 "client_data": {
 54 |                     "client_version": "1.2.13.477.ga4363038",
 55 |                     "client_id": client_id,
 56 |                     "js_sdk_data": 
 57 |                     {
 58 |                         "device_brand": "",
 59 |                         "device_id": "",
 60 |                         "device_model": "",
 61 |                         "device_type": "",
 62 |                         "os": "",
 63 |                         "os_version": ""
 64 |                     }
 65 |                 } 
 66 |             }
 67 | 
 68 |             response = session.post('https://clienttoken.spotify.com/v1/clienttoken', json=data, verify=self._verify_ssl)
 69 |             try:
 70 |                 rj = response.json()
 71 |             except Exception as ex:
 72 |                 print('Failed to parse client token response as json!', ex)
 73 |                 exit(0)
 74 |             return rj['granted_token']['token']
 75 | 
 76 |     def get_access_token(self, keys=None, sp_dc=None, sp_key=None):
 77 |         with requests.session() as session:
 78 |             session.proxies = self._proxy
 79 |             session.headers = self.__HEADERS
 80 |             cookie = {}
 81 |             if keys is not None:
 82 |                 cookie = keys
 83 |             if sp_dc is not None:
 84 |                 cookie['sp_dc'] = sp_dc
 85 |             if sp_key is not None:
 86 |                 cookie['sp_key'] = sp_key
 87 |             response = session.get('https://open.spotify.com/get_access_token', verify=self._verify_ssl, cookies=cookie)
 88 |             try:
 89 |                 rj = response.json()
 90 |             except Exception as ex:
 91 |                 print('An error occured when generating an access token!', ex)
 92 |                 exit(0)
 93 |             print('Access token is anon: ', rj['isAnonymous'])
 94 |             self.is_anonymous = rj['isAnonymous']
 95 |             return rj['accessToken'], rj['clientId'] if rj['clientId'].lower() != 'unknown' else self._client_id
 96 | 
 97 |     def get_me(self):
 98 |         with requests.session() as session:
 99 |             session.proxies = self._proxy
100 |             session.headers = self.__HEADERS
101 |             session.headers.update({
102 |                                     'Client-Token': self._client_token,
103 |                                     'Authorization': f'Bearer {self._access_token}'
104 |                                     })
105 | 
106 |             response_json = session.get('https://api.spotify.com/v1/me', verify=self._verify_ssl).json()
107 |         self.user_data = response_json
108 |         if not 'product' in self.user_data:
109 |             raise SpotifyClientException('Spotify client keys are invalid.\nVerify that you have entered valid SP_KEY & SP_DC values.')
110 |         if self.user_data['product'] == 'premium':
111 |             raise SpotifyClientException('THIS USER IS PREMIUM!')
112 |         return response_json
113 | 
114 |     def get_premium_keys(self):
115 |         page = requests.get('https://www.rkstore.tn/2022/03/spotify-premium-cookies.html', verify=self._verify_ssl)
116 |         root = html.document_fromstring(page.content)
117 |         cookies_element = root.get_element_by_id('download_link')
118 |         cookies = json.loads(cookies_element.text_content())
119 |         prem_keys = {}
120 |         for cookie in cookies:
121 |             prem_keys[cookie['name']] = cookie['value']
122 |         return prem_keys
123 | 
124 |     def get(self, url: str) -> Response:
125 |         with requests.session() as session:
126 |             session.proxies = self._proxy
127 |             session.headers = self.__HEADERS
128 |             session.headers.update({
129 |                                     'Client-Token': self._client_token,
130 |                                     'Authorization': f'Bearer {self._access_token}'
131 |                                     })
132 | 
133 |             response = session.get(url, verify=self._verify_ssl)
134 |             return response
135 | 
136 |     def post(self, url: str, payload=None) -> Response:
137 |         with requests.session() as session:
138 |             session.proxies = self._proxy
139 |             session.headers = self.__HEADERS
140 |             session.headers.update({
141 |                                     'Client-Token': self._client_token,
142 |                                     'Authorization': f'Bearer {self._access_token}'
143 |                                     })
144 | 
145 |             response = session.post(url, verify=self._verify_ssl, data=payload)
146 |             return response
147 | 
148 | 


--------------------------------------------------------------------------------
/utils/deezer_utils.py:
--------------------------------------------------------------------------------
  1 | from config import *
  2 | from exceptions import DeezerException
  3 | 
  4 | 
  5 | class Deezer:
  6 |     #_cookies = {'dzr_uniq_id': 'dzr_uniq_id_frc3270536fa4e8fd6594415125daa7ba2096811', 'sid': 'fre82a0685d587f159cb7cf0a5f1e8f7aee759d2'}
  7 |     _cookies = {
  8 |         'dzr_uniq_id': 'dzr_uniq_id_frffc916344f831b489e3f366778a86b7a0f3a2f', 
  9 |         'sid': 'fre1a5ee55bb5ebd4f8505add526aef95c47adf7',
 10 |         '_abck': 'C73904478BC37F15E7303B7140C34A1D~-1~YAAQvphmUrc22TWIAQAAxjnJaQktzRdJM/Z5JSO9mfO0N3a5a2jv1rvxchQJ+/438DyVm/nx+6lmw0PZL+S/zBD6rTRIsHiZzDHYGOL2JHskcx+qgFNFV3haB0NmrsRKzL48t0AfE+xh4uzKa1t6681eLEsxD2+XL4CLpP5dlj+ymhNqMFLY0eJ9fFCCGoXvLCSz8EXqD17PYcDD9DHDpGem7+JFNBfpMOtQuaynJh97LfFSwx/6uzpkjg/oO9cNZ1rfUk5Gy5WLkcz8hn4b6prZk1whzOhom5Zba6Vj1KOTY9DvT67udnGqlrau60nNnopoD1SBQNnFaGhGEV+6oUTCshYzMQ==~-1~-1~-1',
 11 |         'bm_sz':'A81B5CF520F243866A08F5D742986440~YAAQvphmUrg22TWIAQAAxjnJaRNH5QoYzzhPG/doMRczrBcZ8c/bzqsA+MMcCmvUHPtqKvixyokOz4OYzTlV6t8WzsLDAm5gsrf+9Ul9+GLxF/8EjLqXWNalyUDfkOI6tByxylzmM5qobXBE6YOrdBjYBrLqNh32vLej8JPLSoXV37F6iT1i3+TZpUZAf0EYPOoQLIHs5sZbmWtECvjMB0VE6qEeLsOam+BrLd7CupnL+aq/s3JcLPnQft/k2p0f3XUSjywe7DGXPfxitcIDRAYYG8cWoY2ohhU9KJqKNyFM8LQ=~4338228~3488051',
 12 |         'arl':'d4c0a94496e1193e04faf60bc5905f701d9a03c01f8aab3c19d96e82d622e930c1dc523dd78b0a88bfc416bad8096601d254c04d0e296d0e8e1f1be5df322d31ee5af48f6e782cff5b0c58b2f96c1980c7bb8755057c866c301752bf2f1da5b4',
 13 |         }
 14 | 
 15 |     @staticmethod
 16 |     def get_track_id_from_isrc(isrc: str) -> str:
 17 |         try:
 18 |             return str(requests.get(f'https://api.deezer.com/2.0/track/isrc:{isrc}').json()['id'])
 19 |         except KeyError:
 20 |             raise DeezerException(f'Could not find deezer track by isrc: {isrc}')
 21 | 
 22 |     @staticmethod
 23 |     def get_track_data(song_id: str) -> dict:
 24 |         #resp = requests.post('https://www.deezer.com/ajax/gw-light.php?api_version=1.0&api_token=By7mRaeO.7.UDI6~NtRjcR1whWRStYb4&input=3&method=deezer.pageTrack', data='{"sng_id":"' + song_id +'"}', cookies=Deezer._cookies)
 25 |         resp = requests.post('https://www.deezer.com/ajax/gw-light.php?api_version=1.0&api_token=YTIQw7E4nLSiyzB7A3s0kcBa1p63TSl6&input=3&method=deezer.pageTrack', data='{"sng_id":"' + song_id +'"}', cookies=Deezer._cookies)
 26 |         track_json = resp.json()
 27 |         data = {}
 28 |         data['md5_origin'] = track_json['results']['DATA']['MD5_ORIGIN']
 29 |         data['media_version'] = track_json['results']['DATA']['media_version'.upper()]
 30 |         data['id'] = song_id
 31 |         return data
 32 | 
 33 |     @staticmethod
 34 |     def get_track_download_url(track, **kwargs):
 35 |         """Gets and decrypts the download url of the given track in the given quality
 36 |         Arguments:
 37 |             track {dict} -- Track dictionary, similar to the {info} value that is returned {using get_track()}
 38 |         Keyword Arguments:
 39 |             quality {str} -- Use values from {constants.track_formats}, will get the default quality if None or an invalid is given. (default: {None})
 40 |             fallback {bool} -- Set to True to if you want to use fallback qualities when the given quality is not available. (default: {False})
 41 |             renew {bool} -- Will renew the track object (default: {False})
 42 |         Raises:
 43 |             DownloadLinkDecryptionError: Will be raised if the track dictionary does not have an MD5
 44 |             ValueError: Will be raised if valid track argument was given
 45 |         Returns:
 46 |             str -- Download url
 47 |         """
 48 | 
 49 |         # Decryption algo got from: https://git.fuwafuwa.moe/toad/ayeBot/src/branch/master/bot.py;
 50 |         # and https://notabug.org/deezpy-dev/Deezpy/src/master/deezpy.py
 51 |         # Huge thanks!
 52 | 
 53 |         quality = track_formats.FLAC
 54 |         fallback = True
 55 | 
 56 |         try:
 57 |             if not "md5_origin" in track:
 58 |                 raise Exception(
 59 |                     "MD5 is needed to decrypt the download link.")
 60 | 
 61 |             md5_origin = track["md5_origin"]
 62 |             track_id = track["id"]
 63 |             media_version = track["media_version"]
 64 |         except ValueError:
 65 |             raise ValueError(
 66 |                 "You have passed an invalid argument.")
 67 | 
 68 |         def decrypt_url(quality_code):
 69 |             magic_char = "¤"
 70 |             step1 = magic_char.join((md5_origin,
 71 |                                      str(quality_code),
 72 |                                      track_id,
 73 |                                      media_version))
 74 |             m = hashlib.md5()
 75 |             m.update(bytes([ord(x) for x in step1]))
 76 | 
 77 |             step2 = m.hexdigest() + magic_char + step1 + magic_char
 78 |             step2 = step2.ljust(80, " ")
 79 | 
 80 |             cipher = Cipher(algorithms.AES(bytes('jo6aey6haid2Teih', 'ascii')),
 81 |                             modes.ECB(), default_backend())
 82 | 
 83 |             encryptor = cipher.encryptor()
 84 |             step3 = encryptor.update(bytes([ord(x) for x in step2])).hex()
 85 | 
 86 |             cdn = track["md5_origin"][0]
 87 | 
 88 |             return f'https://e-cdns-proxy-{cdn}.dzcdn.net/mobile/1/{step3}'
 89 | 
 90 |         url = decrypt_url(track_formats.TRACK_FORMAT_MAP[quality]["code"])
 91 |         res = requests.get(url, stream=True)
 92 | 
 93 |         if not fallback or (res.status_code == 200 and int(res.headers["Content-length"]) > 0):
 94 |             res.close()
 95 |             return (url, quality)
 96 |         else:
 97 |             if "fallback_qualities" in kwargs:
 98 |                 fallback_qualities = kwargs["fallback_qualities"]
 99 |             else:
100 |                 fallback_qualities = track_formats.FALLBACK_QUALITIES
101 | 
102 |             for key in fallback_qualities:
103 |                 url = decrypt_url(
104 |                     track_formats.TRACK_FORMAT_MAP[key]["code"])
105 | 
106 |                 res = requests.get(
107 |                     url, stream=True)
108 | 
109 |                 if res.status_code == 200 and int(res.headers["Content-length"]) > 0:
110 |                     res.close()
111 |                     return (url, key)
112 | 
113 |     @staticmethod
114 |     def get_blowfish_key(track_id):
115 |         secret = 'g4el58wc0zvf9na1'
116 | 
117 |         m = hashlib.md5()
118 |         m.update(bytes([ord(x) for x in track_id]))
119 |         id_md5 = m.hexdigest()
120 | 
121 |         blowfish_key = bytes(([(ord(id_md5[i]) ^ ord(id_md5[i+16]) ^ ord(secret[i]))
122 |                             for i in range(16)]))
123 | 
124 |         return blowfish_key
125 | 
126 |     @staticmethod
127 |     def decrypt_download_data(content: Response, isrc: str) -> bytes:
128 |         chunk_size = 2048
129 |         data_iter = content.iter_content(chunk_size)
130 |         i = 0
131 |         decrypted = b''
132 |         blowfish_key = Deezer.get_blowfish_key(Deezer.get_track_id_from_isrc(isrc))
133 |         for chunk in data_iter:
134 |             current_chunk_size = len(chunk)
135 | 
136 |             if i % 3 > 0:
137 |                 decrypted += chunk
138 |             elif len(chunk) < chunk_size:
139 |                 decrypted += chunk
140 |                 break
141 |             else:
142 |                 cipher = Cipher(algorithms.Blowfish(blowfish_key),
143 |                                 modes.CBC(
144 |                                     bytes([i for i in range(8)])),
145 |                                 default_backend())
146 | 
147 |                 decryptor = cipher.decryptor()
148 |                 dec_data = decryptor.update(
149 |                     chunk) + decryptor.finalize()
150 |                 decrypted += dec_data
151 | 
152 |                 current_chunk_size = len(dec_data)
153 | 
154 |             i += 1
155 |         return decrypted
156 | 


--------------------------------------------------------------------------------
/spotify_scraper.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | from typing import Generator
  3 | from config import *
  4 | from utils.spotify_track import SpotifyTrack
  5 | from utils.spotify_album import SpotifyAlbum
  6 | from utils.spotify_playlist import SpotifyPlaylist
  7 | from utils.spotify_category import SpotifyCategory
  8 | from utils.spotify_artist import SpotifyArtist
  9 | from spotify_client import SpotifyClient
 10 | from typing import List
 11 | 
 12 | 
 13 | class SpotifyScraper:
 14 |     _client = None
 15 | 
 16 |     class IDTypes(Enum):
 17 |         Playlist = 0
 18 |         Album = 1
 19 |         Artist = 2
 20 |         Track = 3
 21 |         User = 4
 22 |         Unknown = -1
 23 | 
 24 |     def __init__(self, sp_dc=None, sp_key=None, client=None) -> None:
 25 |         if client is not None:
 26 |             self._client = client
 27 |         else:
 28 |             self._client = SpotifyClient(sp_dc=sp_dc, sp_key=sp_key)
 29 | 
 30 |     def identify_link_type(self, link: str) -> IDTypes:
 31 |         if 'playlist' in link.lower():
 32 |             return self.IDTypes.Playlist
 33 |         elif 'album' in link.lower():
 34 |             return self.IDTypes.Album
 35 |         elif 'artist' in link.lower():
 36 |             return self.IDTypes.Artist
 37 |         elif 'track' in link.lower():
 38 |             return self.IDTypes.Track
 39 |         elif 'user' in link.lower():
 40 |             return self.IDTypes.User
 41 |         return self.IDTypes.Unknown
 42 | 
 43 |     def extract_id_from_link(self, link: str) -> str:
 44 |         return link[link.rindex('/') + 1:]
 45 | 
 46 |     def scrape_tracks(self, link: str, console=None) -> Generator[SpotifyTrack, None, None]:
 47 |         id_type = self.identify_link_type(link)
 48 |         if id_type == self.IDTypes.Playlist:
 49 |             return self.scrape_playlist_tracks(self.extract_id_from_link(link))
 50 |         elif id_type == self.IDTypes.Album:
 51 |             return self.scrape_album_tracks(self.extract_id_from_link(link))
 52 |         elif id_type == self.IDTypes.Artist:
 53 |             return self.scrape_artist_tracks(self.extract_id_from_link(link), intense=True, console=console)
 54 |         elif id_type == self.IDTypes.Track:
 55 |             return [SpotifyTrack(self.get(f'https://api.spotify.com/v1/tracks/{self.extract_id_from_link(link)}').json())]
 56 |         elif id_type == self.IDTypes.User:
 57 |             return self.scrape_user_items(self.extract_id_from_link(link))
 58 | 
 59 |     def scrape_pagination(self, url:str) -> Generator[str, None, None]:
 60 |         limit = 50
 61 |         offset = 0
 62 |         ret = self._client.get(f'{url}{"?" if "?" not in url else ""}&limit={limit}').json()
 63 |         for item in ret['items']:
 64 |             yield item
 65 |         while ret['next'] is not None:
 66 |             offset += limit
 67 |             ret = self._client.get(f'{url}{"?" if "?" not in url else ""}&offset={offset}&limit={limit}').json()
 68 |             for item in ret['items']:
 69 |                 yield item
 70 | 
 71 |     def scrape_playlist(self, playlist_id: str):
 72 |         return self._client.get(f'https://api.spotify.com/v1/playlists/{playlist_id}').json()
 73 | 
 74 |     def scrape_playlist_tracks(self, playlist_id: str) -> Generator[SpotifyTrack, None, None]:
 75 |         tracks = []
 76 |         for track in self.scrape_pagination(f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks?market=from_token'):
 77 |             spotify_track = SpotifyTrack(self.get(track['track']['href']).json())
 78 |             tracks.append(spotify_track)
 79 |             yield spotify_track
 80 |         if settings.AUTO_DOWNLOAD_PLAYLIST_METADATA:
 81 |             playlist = SpotifyPlaylist(playlist_id, tracks, self.get_playlist_data(playlist_id))
 82 |             playlist.export_to_file()
 83 | 
 84 |     def scrape_album(self, album_id: str):
 85 |         return self._client.get(f'https://api.spotify.com/v1/albums/{album_id}').json()
 86 | 
 87 |     def scrape_album_tracks(self, album_id: str) -> Generator[SpotifyTrack, None, None]:
 88 |         for track in self.scrape_pagination(f'https://api.spotify.com/v1/albums/{album_id}/tracks'):
 89 |             yield SpotifyTrack(self.get(track['href']).json())
 90 | 
 91 |     def scrape_artist(self, artist_id: str):
 92 |         return self.get(f'https://api.spotify.com/v1/artists/{artist_id}/top-tracks?market=from_token').json()
 93 | 
 94 |     def scrape_artist_albums(self, artist_id: str) -> Generator[SpotifyAlbum, None, None]:
 95 |         for album in self.scrape_pagination(f'https://api.spotify.com/v1/artists/{artist_id}/albums?market=from_token'):
 96 |             yield SpotifyAlbum(album)
 97 | 
 98 |     def scrape_artist_tracks(self, artist_id: str, intense:bool=False, console=None) -> Generator[SpotifyTrack, None, None]:
 99 |         tracks = self.scrape_artist(artist_id)['tracks']
100 |         artist = SpotifyArtist(artist_data=tracks[0]['album']['artists'][0])
101 |         for track_data in tracks:
102 |             yield SpotifyTrack(track_data)
103 |         for track in self.scrape_playlist_tracks(artist.get_this_is_playlist(self)):
104 |             yield track
105 |         if intense:
106 |             for album in self.scrape_artist_albums(artist_id):
107 |                 for track in self.scrape_album_tracks(album.spotify_id):
108 |                     yield track
109 | 
110 |     def get(self, url: str) -> Response:
111 |         return self._client.get(url)
112 | 
113 |     def post(self, url: str, payload=None) -> Response:
114 |         return self._client.post(url, payload=payload)
115 | 
116 |     def get_lyrics(self, track_id: str) -> str:
117 |         try:
118 |             return self.get(f'https://spclient.wg.spotify.com/color-lyrics/v2/track/{track_id}').json()
119 |         except Exception as ex:
120 |             return ''
121 |     
122 |     def get_track_features(self, track_id: str) -> str:
123 |         try:
124 |             return self.get(f'https://api.spotify.com/v1/audio-features/{track_id}').json()
125 |         except Exception as ex:
126 |             return ''
127 | 
128 |     def get_category_playlist_ids(self, category_id: str, limit=50, offset=0) -> str:
129 |         playlist_ids = []
130 |         current_offset = offset
131 |         has_next = True
132 |         while len(playlist_ids) < limit and has_next:
133 |             category_playlists_json = self.get_category_playlists(category_id, limit=50, offset=current_offset)
134 |             has_next = category_playlists_json['playlists']['next'] is not None
135 |             for playlist in category_playlists_json['playlists']['items']:
136 |                 if not playlist:
137 |                     continue
138 |                 playlist_ids.append(playlist['id'])
139 |         return playlist_ids
140 | 
141 |     def get_category_playlists(self, category_id: str, limit:int=50, offset:int=0) -> str:
142 |         data = self.get(f'https://api.spotify.com/v1/browse/categories/{category_id}/playlists/?limit={limit}&offset={offset}').json()
143 |         return data
144 | 
145 |     def get_categories(self, limit=50) -> str:
146 |         return self.get(f'https://api.spotify.com/v1/browse/categories/?limit={limit}&country=IL').json()
147 | 
148 |     def get_categories_full(self, query:str='') -> List[SpotifyCategory]:
149 |         categories = self.get_categories()
150 |         categories_data = []
151 |         os.makedirs(f'{settings.DEFAULT_DOWNLOAD_DIRECTORY}/{settings.CATEGORY_METADATA_SUB_DIR}/', exist_ok=True)
152 |         for category_json in categories['categories']['items']:
153 |             if not query or query.lower() in category_json['name'].lower(): 
154 |                 category = SpotifyCategory(category_json)
155 |                 categories_data.append(category)
156 |         return categories_data
157 | 
158 |     def get_playlist_data(self, playlist_id: str) -> str:
159 |         return self.get(f'https://api.spotify.com/v1/playlists/{playlist_id}').json()
160 | 
161 |     def get_playlist(self, playlist_id: str) -> str:
162 |         playlist_data = self.get_playlist_data(playlist_id)
163 |         tracks = self.scrape_playlist_tracks(playlist_id)
164 |         return SpotifyPlaylist(spotify_id=playlist_id, tracks=tracks, data=playlist_data)
165 | 
166 |     def scrape_user_items(self, user_id: str) -> Generator[SpotifyTrack, None, None]:
167 |         for playlist in self.scrape_pagination(f'https://api.spotify.com/v1/users/{user_id}/playlists'):
168 |             for track in self.scrape_playlist_tracks(playlist['id']):
169 |                 yield track
170 | 


--------------------------------------------------------------------------------
/spotify_mass_download.py:
--------------------------------------------------------------------------------
  1 | from threading import Thread, get_ident
  2 | import pickle
  3 | from typing import Generator
  4 | from spotify_client import SpotifyClient
  5 | from spotify_scraper import SpotifyScraper
  6 | from config import *
  7 | import base64
  8 | from time import sleep
  9 | from datetime import datetime
 10 | import random
 11 | from utils.utils import clean_file_path
 12 | from utils.spotify_track import SpotifyTrack
 13 | 
 14 | client = SpotifyClient(sp_key=SP_KEY, sp_dc=SP_DC)
 15 | client.get_me()
 16 | scraper = SpotifyScraper(client=client)
 17 | 
 18 | g_downloaded_artist_covers = []
 19 | g_downloaded_songs = []
 20 | g_keep_saving = 0
 21 | 
 22 | 
 23 | class Console:
 24 |     console_output = []
 25 | 
 26 |     def log(self, value: str):
 27 |         self.cout(value, 'inherit')
 28 |     
 29 |     def error(self, value: str):
 30 |         self.cout(value, 'rgba(255,30,30,0.9)')
 31 | 
 32 |     def info(self, value: str):
 33 |         self.cout(value, 'rgba(30,255,255,0.9)')
 34 | 
 35 |     def happy(self, value: str):
 36 |         self.cout(value, 'rgba(30,255,30,0.9)')
 37 | 
 38 |     def cout(self, value: str, color: str):
 39 |         self.console_output.append(
 40 |             {
 41 |                 'time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
 42 |                 'value': value,
 43 |                 'color': color,
 44 |             }
 45 |             )
 46 | 
 47 |     def get(self):
 48 |         return self.console_output
 49 | 
 50 | console = Console()
 51 | 
 52 | 
 53 | def download_track_list(download_dir: str, track_list: Generator[SpotifyTrack, None, None], recursive_artist: bool=False, recursive_album: bool=False, recursive: bool=False):
 54 |     global g_downloaded_songs, g_downloaded_artist_covers
 55 |     my_thread_id = str(get_ident()).zfill(6)
 56 |     artist_images_download_dir = f'{download_dir}/{settings.ARTIST_IMAGES_SUB_DIR}'
 57 |     downloaded_count = 0
 58 |     for track in track_list:
 59 |         try:
 60 |             if downloaded_count % 20 == 0:
 61 |                 client.refresh_tokens()
 62 |             if track.spotify_id in g_downloaded_songs:
 63 |                 console.info(f'Thread<{my_thread_id}> | Skipping already downloaded song: {track.title}')
 64 |                 downloaded_count += 1
 65 |                 continue
 66 |             track_path = f'{download_dir}{clean_file_path(track.artists[0].name)}/{clean_file_path(track.album.title)}'
 67 |             track.download_to_file(scraper, track_path)
 68 |             console.happy(f'Thread<{my_thread_id}> | Downloaded: {track.preview_title()}')
 69 |             g_downloaded_songs.append(track.spotify_id)
 70 |             if (recursive_album or recursive):
 71 |                 download_track_list(download_dir=download_dir, track_list=scraper.scrape_album_tracks(track.album.spotify_id), recursive=False)
 72 |             
 73 |             for artist in track.artists:
 74 |                 if artist.spotify_id not in g_downloaded_artist_covers:
 75 |                     try:
 76 |                         artist_image = artist.download_image(scraper)
 77 |                         artist_name = base64.b64encode(artist.name.encode()).decode()
 78 |                         with open(f'{artist_images_download_dir}/{artist_name}.jpg', 'wb') as f:
 79 |                             f.write(artist_image)
 80 |                     except Exception as ex:
 81 |                         console.error(str(ex))
 82 |                     g_downloaded_artist_covers.append(artist.spotify_id)
 83 | 
 84 |                 if (recursive_artist or recursive):
 85 |                     download_track_list(download_dir=download_dir, track_list=scraper.scrape_artist_tracks(track.artist.spotify_id), recursive=False)
 86 |                     if recursive_artist:
 87 |                         for album in scraper.scrape_artist_albums(artist.spotify_id):
 88 |                             download_track_list(download_dir=download_dir, track_list=scraper.scrape_album_tracks(album['id']), recursive=False)
 89 |         except Exception as ex:
 90 |             console.error(f'Thread<{my_thread_id}> | Exception: {ex}')
 91 |         downloaded_count += 1
 92 |         if settings.VERBOSE_OUTPUTS:
 93 |             console.log(f'Thread<{my_thread_id}> | Processed {downloaded_count} tracks')
 94 | 
 95 | 
 96 | def save_globals_save_file():
 97 |     global g_keep_saving, g_downloaded_artist_covers, g_downloaded_songs
 98 |     try:
 99 |         with open(settings.GLOBALS_SAVE_FILE, 'r') as f:
100 |             data = json.loads(f.read())
101 |             g_downloaded_songs = json.loads(data['songs'])
102 |             g_downloaded_artist_covers = json.loads(data['artists'])
103 |             console.log(f'Loaded {len(g_downloaded_songs)} songs & {len(g_downloaded_artist_covers)} artists')
104 |     except Exception as ex:
105 |         console.error(f'Failed to load globals save file! Exception: {ex}')
106 |         if os.path.exists(settings.GLOBALS_SAVE_FILE):
107 |             console.error(f'To avoid data loss, SpotiFile will now exit.')
108 |             exit(1)
109 |     while g_keep_saving > 0:
110 |         with open(settings.GLOBALS_SAVE_FILE, 'w') as f:
111 |             g_downloaded_songs_json = json.dumps(g_downloaded_songs)
112 |             g_downloaded_artist_covers_json = json.dumps(g_downloaded_artist_covers)
113 |             data = {'songs':g_downloaded_songs_json, 'artists': g_downloaded_artist_covers_json }
114 |             f.write( json.dumps(data) )
115 |         if settings.VERBOSE_OUTPUTS:
116 |             console.log('Saved globals file!')
117 |         sleep(settings.DOWNLOADS_FILE_SAVE_INTERVAL)
118 | 
119 | 
120 | def full_download(download_dir: str, identifier: str, recursive_artist: bool=False, recursive_album: bool=False, recursive: bool=False):
121 |     global g_downloaded_songs, g_downloaded_artist_covers, g_keep_saving
122 |     try:
123 |         artist_images_download_dir = f'{download_dir}/{settings.ARTIST_IMAGES_SUB_DIR}'
124 |         os.makedirs(artist_images_download_dir, exist_ok=True)
125 |         os.makedirs(f'temp', exist_ok=True)
126 | 
127 | 
128 |         g_keep_saving += 1
129 | 
130 |         client.refresh_tokens()
131 |         console.log('Refreshed tokens!')
132 | 
133 |         console.log(f'Recieved scrape command on identifier: {identifier}, {recursive=}, {recursive_artist=}, {recursive_album=}')
134 |         download_track_list(download_dir=download_dir, track_list=scraper.scrape_tracks(identifier, console=console), recursive=recursive, recursive_album=recursive_album, recursive_artist=recursive_artist)
135 | 
136 |         console.log(f'Comletely done scraping identifier: {identifier}!')
137 | 
138 |         g_keep_saving -= 1
139 |     except Exception as ex:
140 |         console.error(f'Full download exception: {ex}')
141 | 
142 | 
143 | def download_category_playlists(category_id, category_index, category_ids, download_meta_data_only):
144 |     playlist_ids = scraper.get_category_playlist_ids(category_id)
145 |     random.shuffle(playlist_ids)
146 |     for playlist_index, playlist_id in enumerate(playlist_ids):
147 |         console.log(f'Scraping playlist data from playlist {playlist_id} ({playlist_index + 1}/{len(playlist_ids)}) from category {category_id} ({category_index + 1}/{len(category_ids)})')
148 |         try:
149 |             playlist = scraper.get_playlist(playlist_id)
150 |             playlist.export_to_file()
151 |             if not download_meta_data_only:
152 |                 full_download(f'{settings.DEFAULT_DOWNLOAD_DIRECTORY}', identifier=playlist.href, recursive=True, recursive_album=True, recursive_artist=True)
153 |         except Exception as ex:
154 |             console.error(f'Scraping categories exception: {ex}')
155 | 
156 | 
157 | def download_all_categories_playlists(download_meta_data_only=True, query:str=''):
158 |     client.refresh_tokens()
159 |     os.makedirs(f'{settings.DEFAULT_DOWNLOAD_DIRECTORY}/{settings.PLAYLIST_METADATA_SUB_DIR}/', exist_ok=True)
160 |     console.log(f'Scraping playlists from "{query}" categories')
161 |     categories = scraper.get_categories_full(query=query)
162 |     threads = []
163 |     random.shuffle(categories)
164 |     for category_index, category in enumerate(categories):
165 |         console.log(f'Scraping playlists from category {category.name} ({category_index + 1}/{len(categories)})')
166 |         category.download_metadata(scraper=scraper)
167 |         try:
168 |             thread = Thread(target=download_category_playlists, args=(category.spotify_id, category_index, categories, download_meta_data_only))
169 |             thread.start()
170 |             threads.append(thread)
171 |         except Exception as ex:
172 |                 console.error(f'Scraping categories exception: {ex}')
173 | 
174 |     [x.join() for x in threads]
175 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SpotiFile
  2 | ## A simple and open source spotify scraper.
  3 | *Python 3.8+*
  4 | 
  5 | ---
  6 | ## 2024 Update: Project has been archived!
  7 | Due to possible missuse of SpotiFile, I have decided to archive this repo. 
  8 | The existing code will stay up - though it no longer works and is not suited to interact with Spotify's new API. 
  9 | If you do wish to revive this project, please first review [Spotify's developers' ToS](https://developer.spotify.com/terms). 
 10 | 
 11 | ---
 12 | 
 13 | ## Quick Start
 14 | Make sure you have python 3.8 or above.  
 15 | $ git clone https://github.com/Michael-K-Stein/SpotiFile.git  
 16 | $ cd SpotiFile  
 17 | Now open config.py and setup your SP_KEY (Spotify has renamed this to sp_adid) and SP_DC tokens ([see below](https://github.com/Michael-K-Stein/SpotiFile#sp_key--sp_dc-tokens))  
 18 | $ python main.py  
 19 | 
 20 | ---
 21 | 
 22 | *DISCLAIMER: This script is intended for personal and non-commercial use only. The purpose of this script is to create datasets for training machine learning models. Any use of this script that violates Deezer's Terms of Use or infringes on its intellectual property rights is strictly prohibited. The writer of this script is not responsible for any illegal or unauthorized use of the script by third parties. Users of this script assume all responsibility for their actions and agree to use the script at their own risk.*<br>
 23 | *AVIS DE NON-RESPONSABILITÉ : Ce script est destiné à un usage personnel et non commercial uniquement. Le but de ce script est de créer des ensembles de données pour entraîner des modèles d'apprentissage automatique. Toute utilisation de ce script qui viole les Conditions d'utilisation de Deezer ou porte atteinte à ses droits de propriété intellectuelle est strictement interdite en vertu de la loi française. L'auteur de ce script n'est pas responsable de toute utilisation illégale ou non autorisée du script par des tiers. Les utilisateurs de ce script assument toutes les responsabilités de leurs actions et conviennent de l'utiliser à leurs propres risques.*
 24 | 
 25 | ---
 26 | 
 27 | ## What?
 28 | SpotiFile is a script which allows users to simply and easily, using a web-gui, scrape on Spotify playlists, albums, artists, etc.
 29 | More advanced usages can be done by importing the relevant classes (e.g. 
 30 | ```python
 31 | from spotify_scraper import SpotifyScraper
 32 | ```
 33 | ) and then using IPython to access specific Spotify API features.
 34 | ### Advantages
 35 | The main advantage of using SpotiFile is that it completely circumvents all of Spotify's api call limmits and restrictions. Spotifile offers an API to communicate with Spotify's API as if it were a real user.
 36 | This allows SpotiFile to download information en-masse quickly.
 37 | 
 38 | ---
 39 | 
 40 | ## Why?
 41 | Downloading massive amounts of songs and meta data can help if you prefer listening to music offline, or if you are desgining a music server which runs on an airgapped network.
 42 | *We do not encourage music piracy nor condone any illegal activity. SpotiFile is a usefull research tool. Usage of SpotiFile for other purposes is at the user's own risk. Be warned, we will not bear any responsibility for improper use of this educational software!*
 43 | ### Proper and legitimate uses of SpotiFile:
 44 | + Scraping tracks to create datasets for machine learning models.
 45 | + Creating remixes (for personal use only!)
 46 | + Downloading music which no longer falls under copyright law ([Generally, content who's original artist passed away over 70 years ago](https://www.copyright.gov/help/faq/faq-duration.html)).
 47 | ### Please notice Spotify's User Guidelines, and make sure you understand them. See section 5; 
 48 | *The following is not permitted for any reason whatsoever in relation to the Services and the material or content made available through the Services, or any part thereof: 
 49 | 5. "crawling" or "scraping", whether manually or by automated means, or otherwise using any automated means (including bots, scrapers, and spiders), to view, access or collect information;*
 50 | Usage of this "scraper" is in violation of Spotify's User Guidelines. By using this code, you assume responsibility - as *you* are the one "scraping" Spotify using automated means.
 51 | ### Please notice Deezer's Terms of Use, and make sure you understand them. See article 8 - Intellectual property;
 52 | *The Recordings on the Deezer Free Service are protected digital files by national and international copyright and neighboring rights. They may only therefore be listened to within a private or family setting. Any use for a non-private purpose will expose the Deezer Free User to civil and/or criminal proceedings. Any other use of the Recordings is strictly forbidden and more particularly any download or attempt to download, any transfer or attempt to transfer permanently or temporarily on the hard drive of a computer or any other device (notably music players), any burn or attempt to burn a CD or any other support are expressly forbidden. Any resale, exchange or renting of these files is strictly prohibited.*
 53 | Storing, or attempting to store files from Deezer is strictly prohibited. Use this software only to create, for personal use, a custom streaming app. Notice that you can only use this streaming app in a private or family setting. By using this code, you assume responsibility to perform only legal actions - such as *streaming* music from Deezer for personal use.
 54 | ### Do adhere to your local laws regarding intellectual property!
 55 | #### Notice: Local law (where this was written), explicitly permits reverse engeneering for non-commercial purposes.
 56 | 
 57 | ---
 58 | 
 59 | ## How?
 60 | SpotiFile starts its life by authenticating as a normal Spotify user, and then performs a wide range of conventional and unconventional API calls to Spotify in order to retrieve relevant information.
 61 | SpotiFile does not actually download audio from Spotify, since they use proper DRM encryption to protect against piracy. Rather, SpotiFile finds the relevant audio file on Deezer, using the copyright id (ironically). Then SpotiFile downloads the "encrypted" audio file from Deezer, which failed to implement DRM properly. Credit for reversing Deezer's encryption goes to https://git.fuwafuwa.moe/toad/ayeBot/src/branch/master/bot.py & https://notabug.org/deezpy-dev/Deezpy/src/master/deezpy.py & https://www.reddit.com/r/deemix/ (Original reversing algorithm has been taken down).
 62 | 
 63 | ---
 64 | 
 65 | ## Features
 66 | + Authenticating as a legitimate Spotify user.
 67 | + Scraping tracks from a playlist.
 68 | + Scraping tracks from an album.
 69 | + Scraping tracks from an artist.
 70 | + Scraping playlists from a user.
 71 | + Scraping playlists from a catergory.
 72 | + Scraping a track from a track url.
 73 | + Scraping artist images.
 74 | + Scraping popular playlists' metadata and tracks.
 75 | + Premium user token snatching (experimental).
 76 | + Scraping song lyrics (time synced when possible).
 77 | + Scraping track metadata.
 78 | + Scraping category metadata.
 79 | 
 80 | ---
 81 | 
 82 | ## SP_KEY & SP_DC tokens
 83 | Obtaining sp_dc and sp_key cookies (sp_key is now called sp_adid)
 84 | SpotiFile uses two cookies to authenticate against Spotify in order to have access to the required services.
 85 | *Shoutout to @fondberg for the explanation https://github.com/fondberg/spotcast*
 86 | 
 87 | To obtain the cookies, these different methods can be used:
 88 | 
 89 | ### Chrome based browser
 90 | Open a new Incognito window at https://open.spotify.com and login to Spotify.
 91 | Press Command+Option+I (Mac) or Control+Shift+I or F12. This should open the developer tools menu of your browser.
 92 | Go into the application section.
 93 | In the menu on the left go int Storage/Cookies/open.spotify.com.
 94 | Find the sp_dc and sp_key and copy the values.
 95 | Close the window without logging out (Otherwise the cookies are made invalid).
 96 | 
 97 | ### Firefox based browser
 98 | Open a new Incognito window at https://open.spotify.com and login to Spotify.
 99 | Press Command+Option+I (Mac) or Control+Shift+I or F12. This should open the developer tools menu of your browser.
100 | Go into the Storage section. (You might have to click on the right arrows to reveal the section).
101 | Select the Cookies sub-menu and then https://open.spotify.com.
102 | Find the sp_dc and sp_key and copy the values.
103 | Close the window without logging out (Otherwise the cookies are made invalid).
104 | 
105 | ---
106 | 
107 | # Example usages:
108 | ## Using SpotiFile to create a song recommendation module based off song lyrics' semantic similarity: 
109 | ```python
110 | from spotify_scraper import SpotifyScraper
111 | import nltk
112 | from nltk.corpus import stopwords
113 | from sklearn.feature_extraction.text import TfidfVectorizer
114 | from sklearn.metrics.pairwise import cosine_similarity
115 | import sys
116 | 
117 | 
118 | def semantic_similarity(paragraph1, paragraph2):
119 |     # Preprocess text
120 |     stop_words = set(stopwords.words('english'))
121 |     paragraph1 = ' '.join([word.lower() for word in nltk.word_tokenize(paragraph1) if word.lower() not in stop_words])
122 |     paragraph2 = ' '.join([word.lower() for word in nltk.word_tokenize(paragraph2) if word.lower() not in stop_words])
123 | 
124 |     # Compute similarity score
125 |     tfidf_vectorizer = TfidfVectorizer()
126 |     tfidf_matrix = tfidf_vectorizer.fit_transform([paragraph1, paragraph2])
127 |     similarity_score = cosine_similarity(tfidf_matrix)[0][1]
128 | 
129 |     return similarity_score
130 | 
131 | 
132 | # Usage
133 | scraper = SpotifyScraper()
134 | 
135 | lyrics1 = '\n'.join(x['words'] for x in scraper.get_lyrics(sys.argv[1])['lyrics']['lines'])
136 | lyrics2 = '\n'.join(x['words'] for x in scraper.get_lyrics(sys.argv[2])['lyrics']['lines'])
137 | 
138 | sim = semantic_similarity(lyrics1, lyrics2)
139 | 
140 | print(f'The similarity between the two tracks is: {sim}')
141 | 
142 | ```
143 | 
144 | ---
145 | 
146 | ### Legal
147 | The use of a script to download music and lyrics from Deezer for personal use only, to create machine learning datasets for non-commercial use, is not illegal under French and Israeli law. The use of such a script falls under the doctrine of fair use or fair dealing, which allows individuals to make copies of copyrighted works for their own private and non-commercial use without requiring permission from the copyright owner.
148 | 
149 | This interpretation is supported by precedent. In the case of Société Civile des Producteurs Phonographiques v. Delorme, the French Court of Cassation held that copying music for personal and non-commercial use is allowed under the doctrine of fair use. The court held that such copying did not infringe on the rights of the copyright owner as it did not compete with the original work or harm the market for the original work.
150 | 
151 | Furthermore, the purpose of using the script is to create machine learning datasets for non-commercial use, which falls under the category of research and study. Many countries, including France and Israel, have exceptions to copyright infringement for the purposes of research and study, which allow individuals to use copyrighted works without the need for permission from the copyright owner.
152 | 
153 | It is also worth noting that the script is not being used to distribute the copyrighted works to others or to make a profit, which reduces the likelihood of any significant harm to the copyright owner's rights.
154 | 
155 | Finally, the disclaimer notice attached to the script explicitly states that the script is intended for personal and non-commercial use only, and that any use of the script that violates Deezer's Terms of Use or infringes on its intellectual property rights is strictly prohibited. The writer of the script has taken reasonable steps to ensure that users understand the limitations of the script and are aware that any unauthorized use is prohibited.
156 | 
157 | In conclusion, the use of a script to download music and lyrics from Deezer for personal use only to create machine learning datasets for non-commercial use is legal under French and Israeli law. The doctrine of fair use and exceptions for research and study, as well as the absence of any significant harm to the copyright owner's rights and the presence of a clear disclaimer notice, support this interpretation.
158 | 


--------------------------------------------------------------------------------