├── .gitignore
├── .travis.yml
├── README.md
├── beetsplug
    ├── __init__.py
    └── ydl.py
├── env.config.yml
├── env.develop
├── requirements.txt
├── setup.py
└── test


/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode
 2 | 
 3 | /build
 4 | /dist
 5 | /env/*
 6 | env.lib.db
 7 | *.egg-info
 8 | 
 9 | __pycache__
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 |   - "3.5"
4 |   - "3.6"
5 | install:
6 |   - python setup.py install
7 | script:
8 |   - ./test


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![Travis CI](https://api.travis-ci.org/vmassuchetto/beets-ydl.svg?branch=master)
  2 | 
  3 | # beets ydl
  4 | 
  5 | Download audio from youtube-dl sources and import into beets
  6 | 
  7 |     $ beet ydl "https://www.youtube.com/watch?v=wW6ykueIhX8"
  8 | 
  9 |     $ beet ls short music for short people
 10 | 
 11 |     59 Times the Pain - Short Music for Short People - We Want the Kids
 12 |     7 Seconds - Short Music for Short People - F.O.F.O.D.
 13 |     88 Fingers Louie - Short Music for Short People - All My Friends Are in Popular Bands
 14 |     Adrenalin O.D. - Short Music for Short People - Your Kung Fu Is Old... And Now You Must Die!
 15 |     Aerobitch - Short Music for Short People - Steamroller Blues
 16 |     [...]
 17 | 
 18 | ## Installation
 19 | 
 20 |     pip install beets-ydl
 21 | 
 22 | And enable `ydl` plugin on your `config.yaml` file.
 23 | 
 24 | ## Configuration
 25 | 
 26 | Available options and default values on `config.yaml`:
 27 | 
 28 | ```yml
 29 | plugins: ydl
 30 | 
 31 | ydl:
 32 |     download: True         # download files from sources after getting information,
 33 |     split_files: True       # try to split album files into separate tracks,
 34 |     import: True           # import files on youtube-dl after downloading and splitting,
 35 |     youtubedl_options: {}  # youtube-dl available options -- https://git.io/fN0c7
 36 |     urls: []               # list of default urls to download when no arguments are provided, you
 37 |                            # can provide a playlist to get checked every time
 38 | ```
 39 | 
 40 | ## How it works
 41 | 
 42 | The plugin main goal is to deliver an importable file set to the `beet import`
 43 | command, so it will download an audio file, look for a tracklist with track
 44 | times in the video description, split the file into per-track files, assign
 45 | some basic ID3 tags to them, and finally run `beet import` on
 46 | `${BEETS_CONFIG}/ydl-cache/${VIDEO_ID}` directory.
 47 | 
 48 | ## Tips
 49 | 
 50 | - The video title can trick beets to find the correct album, in this case you'll
 51 |   have to manually enter a search term
 52 | 
 53 | - Use the `bandcamp` plugin for better results
 54 | 
 55 | - Use a `.netrc` file to use your own YouTube playlists
 56 | 
 57 |   Security discussions apart, you can create a `~/.netrc` with credentials for
 58 |   youtube-dl to read.
 59 | 
 60 |       machine youtube login somelogin@gmail.com password somepassword
 61 | 
 62 |   Check [this entry](https://git.io/fN2TD) on youtube-dl docs for more
 63 |   information.
 64 | 
 65 |   Like this, you can download private playlists or your subscriptions:
 66 | 
 67 |       beet ydl "https://www.youtube.com/feed/subscriptions"
 68 | 
 69 | - Download and import later
 70 | 
 71 |   To download and split files without importing into beets:
 72 | 
 73 |       beet ydl "<source>" --keep-files --no-import
 74 | 
 75 |   And later, to import:
 76 | 
 77 |       beet ydl "<source>" --no-download --no-split-files
 78 | 
 79 |   Like this, you can download a big playlist and then run the beets import
 80 |   routine, which requires manual intervention.
 81 | 
 82 | - (possibly) enhance audio quality
 83 | 
 84 |   beets-ydl uses a proposed [192kbps extractor 'bestaudio'](https://git.io/fN2mJ)
 85 |   format because it is more likely that it will find separate audio files on
 86 |   sources. Some high quality videos might have better audio quality embedded, so
 87 |   it can also make sense to set a higher quality extractor:
 88 | 
 89 |   ```yaml
 90 |   ydl:
 91 |       youtubedl_options:
 92 |           format: 'best',
 93 |           postprocessors:
 94 |               key: 'FFmpegExtractAudio'
 95 |               preferredcodec: 'mp3'
 96 |               preferredquality: '320'
 97 |               nopostoverwrites: True
 98 |   ```
 99 | 
100 |   This can, however, end-up with unnecessarily big files that have 320kbps as a
101 |   merely nominal quality. See [this discussion](https://askubuntu.com/q/634584).
102 | 
103 | ## Development
104 | 
105 | Execute the env script to get into a virtualenv.
106 | 
107 |     . ./env.develop
108 | 


--------------------------------------------------------------------------------
/beetsplug/__init__.py:
--------------------------------------------------------------------------------
1 | from pkgutil import extend_path
2 | __path__ = extend_path(__path__, __name__)


--------------------------------------------------------------------------------
/beetsplug/ydl.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright 2016, Vinicius Massuchetto.
  3 | #
  4 | # Permission is hereby granted, free of charge, to any person obtaining
  5 | # a copy of this software and associated documentation files (the
  6 | # "Software"), to deal in the Software without restriction, including
  7 | # without limitation the rights to use, copy, modify, merge, publish,
  8 | # distribute, sublicense, and/or sell copies of the Software, and to
  9 | # permit persons to whom the Software is furnished to do so, subject to
 10 | # the following conditions:
 11 | #
 12 | # The above copyright notice and this permission notice shall be
 13 | # included in all copies or substantial portions of the Software.
 14 | 
 15 | from beets import config
 16 | from beets import ui
 17 | from beets.plugins import BeetsPlugin
 18 | from optparse import OptionParser
 19 | from pathlib import Path
 20 | from shutil import copyfile
 21 | from xdg import BaseDirectory
 22 | from youtube_dl import YoutubeDL
 23 | from hashlib import md5
 24 | import glob
 25 | import json
 26 | import os
 27 | import re
 28 | import shutil
 29 | import subprocess
 30 | import uuid
 31 | 
 32 | class Colors():
 33 |     INFO = '\033[94m'
 34 |     SUCCESS = '\033[92m'
 35 |     WARNING = '\033[93m'
 36 |     BOLD = '\033[1m'
 37 |     END = '\033[0m'
 38 | 
 39 | class YdlPlugin(BeetsPlugin):
 40 |     """A plugin for downloading music from YouTube and importing into beets.
 41 | 
 42 |     It tries to split album files if it can identify track times somewhere.
 43 |     """
 44 |     def __init__(self, *args, **kwargs):
 45 |         """Set default values
 46 | 
 47 |         `self.config['youtubedl_options']` is a dict with a lot of options
 48 |         available from youtube-dl: https://git.io/fN0c7
 49 |         """
 50 |         super(YdlPlugin, self).__init__()
 51 | 
 52 |         self.search_query = "https://www.youtube.com/results?search_query="
 53 |         self.config_dir = config.config_dir()
 54 |         self.cache_dir = self.config_dir + "/ydl-cache"
 55 |         self.outtmpl = self.cache_dir + "/%(id)s/%(id)s.%(ext)s"
 56 | 
 57 |         # Default options
 58 |         self._config = {
 59 |             'urls': [],
 60 |             'verbose': False,
 61 |             'youtubedl_options': {
 62 |                 'verbose': False,
 63 |                 'keepvideo': False,
 64 |                 'cachedir': self.cache_dir,
 65 |                 'outtmpl': self.outtmpl,
 66 |                 'restrictfilenames': True,
 67 |                 'ignoreerrors': True,
 68 |                 'nooverwrites': True,
 69 |                 'writethumbnail': True,
 70 |                 'quiet': True,
 71 |                 'usenetrc': os.path.exists(
 72 |                     os.path.join(str(Path.home()), ".netrc")),
 73 |                 'format': 'bestaudio/best',
 74 |                 'postprocessors': [{
 75 |                     'key': 'FFmpegExtractAudio',
 76 |                     'preferredcodec': 'mp3',
 77 |                     'preferredquality': '192',
 78 |                     'nopostoverwrites': True
 79 |                 }]
 80 |             }
 81 |         }
 82 |         self._config.update(self.config)
 83 |         self.config = self._config
 84 | 
 85 |         # be verbose if beets is verbose
 86 |         if not self.config.get('verbose'):
 87 |             self.config['verbose'] = True
 88 | 
 89 |     def commands(self):
 90 |         outer_class = self
 91 | 
 92 |         def ydl_func(lib, opts, args):
 93 |             """Parse args and download one source at a time to pass it to
 94 |             beets
 95 |             """
 96 |             for opt, value in opts.__dict__.items():
 97 |                 self.config[opt] = value
 98 | 
 99 |             if len(args) > 0:
100 |                 for arg in args:
101 |                     outer_class.youtubedl(lib, opts, arg)
102 |             elif self.config.get('urls') is not None:
103 |                 if self.config.get('verbose'):
104 |                     print("[ydl] Falling back to default urls")
105 |                 for url in self.config.get('urls'):
106 |                     outer_class.youtubedl(lib, opts, str(url))
107 | 
108 |         parser = OptionParser()
109 |         parser.add_option("--no-download", action="store_false",
110 |             default=True, dest="download", help="don't actually " + \
111 |                 "download files, only the descriptions")
112 |         parser.add_option("--no-split-files", action="store_false",
113 |             default=True, dest="split_files", help="don't try " + \
114 |                 "to split files when an album is identified")
115 |         parser.add_option("--no-import", action="store_false",
116 |             default=True, dest="import", help="do not import into " + \
117 |                 "beets after downloading and processing")
118 |         parser.add_option("-f", "--force-download", action="store_true",
119 |             default=False, dest="force_download", help="always download " + \
120 |                 "and overwrite files")
121 |         parser.add_option("-k", "--keep-files", action="store_true",
122 |             default=False, dest="keep_files", help="keep the files " + \
123 |                 "downloaded on cache, useful for caching or bulk importing")
124 |         parser.add_option("-w", "--write-dummy-mp3", action="store_true",
125 |             default=False, dest="write_dummy_mp3", help="write blank " + \
126 |                 "dummy mp3 files with valid ID3 information")
127 |         parser.add_option("-v", "--verbose", action="store_true",
128 |             dest="verbose", default=False, help="print processing " + \
129 |                 "information")
130 | 
131 |         ydl_cmd = ui.Subcommand('ydl', parser=parser,
132 |             help=u'Download music from YouTube')
133 |         ydl_cmd.func = ydl_func
134 | 
135 |         return [ydl_cmd]
136 | 
137 |     def youtubedl(self, lib, opts, arg):
138 |         """Calls YoutubeDL
139 | 
140 |         Call beets when finishes downloading the audio file. We don't implement
141 |         a YoutubeDL's post processor because we want to call beets for every
142 |         download, and not after downloading a lot of files.
143 | 
144 |         So we try to read `YoutubeDL.extract_info` entries and process them
145 |         with an internal `YoutubeDL.process_ie_result` method, that will
146 |         actually download the audio file.
147 |         """
148 |         if self.config.get('verbose'):
149 |             print("[ydl] Calling youtube-dl")
150 | 
151 |         youtubedl_config = self.config.get('youtubedl_options')
152 |         youtubedl_config['keepvideo'] = self.config.get('keep_files')
153 |         y = YoutubeDL(youtubedl_config)
154 | 
155 |         ie_result = y.extract_info(arg, download=False, process=False)
156 | 
157 |         if ie_result is None:
158 |             print("[ydl] Error: Failed to fetch file information.")
159 |             print("[ydl]   If this is not a network problem, try upgrading")
160 |             print("[ydl]   beets-ydl:")
161 |             print("[ydl]")
162 |             print("[ydl]     pip install -U beets-ydl")
163 |             print("[ydl]")
164 |             exit(1)
165 | 
166 |         if 'entries' in ie_result:
167 |             entries = ie_result['entries']
168 |         else:
169 |             entries = [ie_result]
170 | 
171 |         download = self.config.get('download')
172 |         if self.config.get('force_download'):
173 |             download = True
174 | 
175 |         for entry in entries:
176 |             items = [x for x in lib.items('ydl:' + entry['id'])] + \
177 |                 [x for x in lib.albums('ydl:' + entry['id'])]
178 | 
179 |             if len(items) > 0 and not self.config.get('force_download'):
180 |                 if self.config.get('verbose'):
181 |                     print('[ydl] Skipping item already in library:' + \
182 |                         ' %s [%s]' % (entry['title'], entry['id']))
183 |                 continue
184 | 
185 |             if self.config.get('verbose') and not download:
186 |                 print("[ydl] Skipping download: " + entry['id'])
187 | 
188 |             data = y.process_ie_result(entry, download=download)
189 |             if data:
190 |                 ie_result.update(data)
191 |                 self.info = ie_result
192 |                 self.process_item()
193 |             else:
194 |                 print("[ydl] No data for " + entry['id'])
195 | 
196 |     def is_in_library(self, entry, lib):
197 |         """Check if an `entry` is already in the `lib` beets library
198 |         """
199 |         if lib.items(('ydl_id', entry['id'])):
200 |             return True
201 |         else:
202 |             return False
203 | 
204 |     def get_file_path(self, ext):
205 |         return self.outtmpl % { 'id': self.info.get('id'), 'ext': ext }
206 | 
207 |     def is_album(self):
208 |         return self.fullalbum_stripped or len(self.tracks) > 1
209 | 
210 |     def process_item(self):
211 |         """Called after downloading source with YoutubeDL
212 | 
213 |         From here on, the plugin assumes its state according to what
214 |         is being downloaded.
215 |         """
216 |         print('[ydl] Processing item: ' + self.info.get('title'))
217 | 
218 |         ext = self.config.get('youtubedl_options')\
219 |                 ['postprocessors'][0]['preferredcodec']
220 |         self.audio_file = self.get_file_path(ext)
221 |         self.outdir, self.audio_file_ext = os.path.splitext(self.audio_file)
222 |         self.outdir = os.path.dirname(self.outdir)
223 | 
224 |         if self.config.get('verbose') and \
225 |             self.config.get('download') and \
226 |             not os.path.exists(self.audio_file):
227 |             print('[ydl] Error: Audio file not found: ' + self.audio_file)
228 |             exit(1)
229 | 
230 |         self.strip_fullalbum()
231 |         self.extract_tracks()
232 | 
233 |         if not self.is_album():
234 |             self.set_single_file_data()
235 | 
236 |         if self.config.get('verbose'):
237 |             print(self.get_tracklist())
238 | 
239 |         if self.config.get('write_dummy_mp3'):
240 |             self.write_dummy_mp3()
241 | 
242 |         if self.config.get('verbose') and self.is_album():
243 |             print("[ydl] URL is identified as an album")
244 |         else:
245 |             print("[ydl] URL is identified as a singleton")
246 | 
247 |         if self.config.get('split_files') \
248 |             and not self.config.get('write_dummy_mp3') \
249 |             and self.is_album():
250 |             self.split_file()
251 | 
252 |         if self.config.get('import'):
253 |             beet_cmd = self.get_beet_cmd()
254 |             if self.config.get('verbose'):
255 |                 print("[ydl] Running beets: " + ' '.join(beet_cmd))
256 |             subprocess.run(beet_cmd)
257 |         elif self.config.get('verbose'):
258 |             print('[ydl] Skipping import')
259 | 
260 |         if not self.config.get('keep_files'):
261 |             self.clean()
262 |         elif self.config.get('verbose') and self.config.get('keep_files'):
263 |             print('[ydl] Keeping downloaded files on ' + self.outdir)
264 | 
265 |     def get_beet_cmd(self):
266 |         beet_cmd = ['beet']
267 | 
268 |         if os.getenv('BEETS_ENV') == 'develop':
269 |             beet_cmd.extend(['-c', 'env.config.yml'])
270 | 
271 |         if self.config.get('verbose'):
272 |             beet_cmd.extend(['-v'])
273 | 
274 |         beet_cmd.extend(['import', '--set', 'ydl=' + self.info.get('id')])
275 | 
276 |         if not self.is_album():
277 |             beet_cmd.extend(['--singletons'])
278 | 
279 |         if os.path.exists(self.outdir):
280 |             beet_cmd.extend([self.outdir])
281 |         else:
282 |             beet_cmd.extend([self.audio_file])
283 | 
284 |         return beet_cmd
285 | 
286 |     def __exit__(self, exc_type, exc_value, traceback):
287 |         cache_size = self.config.get('cache_dir')
288 |         if cache_size > 0:
289 |             print("[ydl] " + cache_size + " in cache")
290 | 
291 |         if self.config.get('verbose'):
292 |             print('[ydl] Leaving')
293 | 
294 |     def clean(self):
295 |         """Deletes everything related to the present run.
296 |         """
297 |         files = glob.glob(self.outdir + '*')
298 |         for f in files:
299 |             if os.path.isdir(f):
300 |                 shutil.rmtree(f)
301 |             else:
302 |                 os.remove(f)
303 | 
304 |     def strip_fullalbum(self):
305 |         """Will remove '[Full Album]' entries on video title.
306 |         """
307 |         regex = re.compile(r'\S*?(fullalbum|full[^a-z]+album|album)\S*?',
308 |             re.IGNORECASE)
309 |         title = regex.sub('', self.info.get('title'))
310 |         if title != self.info.get('title'):
311 |             self.info['title'] = title
312 |             self.fullalbum_stripped = True
313 | 
314 |         self.fullalbum_stripped = False
315 | 
316 |     def split_file(self):
317 |         """Split downloaded file into multiple tracks
318 | 
319 |         Tries to parse metadata from the video description.
320 |         """
321 |         # @TODO check for overwrites according to options
322 | 
323 |         if self.config.get('verbose'):
324 |             print("[ydl] Splitting tracks")
325 | 
326 |         cmds = []
327 |         ffmpeg_cmd = ['ffmpeg', '-y', '-i', self.audio_file,
328 |             '-acodec', 'copy']
329 | 
330 |         if not os.path.exists(self.outdir):
331 |             os.mkdir(self.outdir)
332 | 
333 |         file_id = os.path.basename(os.path.normpath(self.outdir))
334 | 
335 |         for track in self.tracks:
336 |             opts = ['-ss', str(track['start']), '-to', str(track['end'])]
337 | 
338 |             for k in track.keys():
339 |                 opts.extend(['-metadata', '%s=%s' % (k, track[k])])
340 | 
341 |             outfile = '%s/%03d-%s%s' % (self.outdir,
342 |                 track['track'], file_id, self.audio_file_ext)
343 |             opts.extend([outfile])
344 | 
345 |             cmds.append(ffmpeg_cmd + opts)
346 | 
347 |         if len(cmds) > 0 and os.path.exists(self.audio_file):
348 |             print("[ydl] Running ffmpeg")
349 |             for cmd in cmds:
350 |                 subprocess.run(cmd, stdout=subprocess.PIPE,
351 |                     stderr=subprocess.PIPE)
352 |             os.remove(self.audio_file)
353 | 
354 |     def clean_str(self, s):
355 |         s = re.sub(r'[^0-9a-zA-Z ]', '', s)
356 |         s = re.sub(r'\s+', ' ', s)
357 |         s = s.strip()
358 | 
359 |         return s
360 | 
361 |     def get_common_metadata(self):
362 |         """Tries to translate metadata parsed from video description into file
363 |         metadata.
364 | 
365 |         Will also remove years from title.
366 |         """
367 |         metadata = {}
368 | 
369 |         year = self.get_year()
370 |         if year is not None:
371 |             metadata['year'] = year
372 | 
373 |         metadata['artist'], metadata['album'] = self.parse_title()
374 | 
375 |         return metadata
376 | 
377 |     def get_year(self):
378 |         year_regex = r'[^\S]?([12][0-9]{3})[^\S]?'
379 |         regex = re.compile(year_regex)
380 |         matches = regex.match(self.info.get('title'))
381 |         if matches:
382 |             self.info['title'] = re.sub(year_regex, '', self.info['title'])
383 |             year = matches.group(1)
384 |             return year
385 | 
386 |         return None
387 | 
388 |     def parse_title(self):
389 |         """Parse the title trying to find an "Artist - Album" pattern
390 |         """
391 |         seps_regex = r'(.*?)[-~|*%#](.*)'
392 |         regex = re.compile(seps_regex)
393 | 
394 |         if regex.match(self.info.get('title')):
395 |             art_alb = regex.findall(self.info.get('title'))
396 |             first = art_alb[0][0]
397 |             second = art_alb[0][1]
398 | 
399 |         # in beets we trust
400 |         else:
401 |             first = self.info.get('title')
402 |             second = self.info.get('title')
403 | 
404 |         return (self.clean_str(first), self.clean_str(second))
405 | 
406 |     def to_seconds(self, time):
407 |         """Convert MM:SS to seconds
408 |         """
409 |         secs = 0
410 |         parts = [int(s) for s in time.split(':')]
411 |         secs = parts[len(parts)-1]
412 |         secs += parts[len(parts)-2] * 60
413 |         if len(parts) > 2:
414 |             secs += parts[len(parts)-3] * 3600
415 | 
416 |         return secs
417 | 
418 |     def to_hms(self, seconds):
419 |         """Convert seconds to HH:MM:SS
420 |         """
421 |         seconds, sec = divmod(float(seconds), 60)
422 |         hr, min = divmod(seconds, 60)
423 | 
424 |         return "%d:%02d:%02d" % (hr, min, sec)
425 | 
426 |     def extract_tracks(self):
427 |         """Try different methods to extract tracks metadata
428 |         """
429 |         print("[ydl] Extracting tracks metadata")
430 | 
431 |         self.tracks = []
432 |         if os.path.exists(self.audio_file):
433 |             self.tracks = self.extract_tracks_from_chapters()
434 |         elif self.config.get('verbose'):
435 |             print("[ydl] Audio file not found, won't look for chapters")
436 | 
437 |         if len(self.tracks) == 0:
438 |             if self.config.get('verbose'):
439 |                 print("[ydl] Chapters not found, trying video description")
440 |             self.tracks = self.extract_tracktimes_from_string(
441 |                 self.info.get('description'))
442 | 
443 |         if len(self.tracks) > 0:
444 |             self.extract_tracks_cleanup()
445 | 
446 |         common_metadata = self.get_common_metadata()
447 | 
448 |         for i in range(0, len(self.tracks) - 1):
449 |             self.tracks[i].update(common_metadata)
450 | 
451 |     def get_tracklist(self):
452 |         output = []
453 |         if len(self.tracks) > 1:
454 |             for track in self.tracks:
455 |                 output.append("[ydl] %03d: %s (%s - %s)" % (
456 |                     track['track'],
457 |                     track['title'],
458 |                     self.to_hms(track['start']),
459 |                     self.to_hms(track['end'])))
460 |         else:
461 |             for track in self.tracks:
462 |                 output.append("[ydl] %s (%s - %s)" % (
463 |                     track['title'],
464 |                     self.to_hms(track['start']),
465 |                     self.to_hms(track['end'])))
466 | 
467 |         return "\n".join(output)
468 | 
469 |     def extract_tracks_from_chapters(self):
470 |         """Read chapters tags on file to find times and metadata
471 |         """
472 |         tracks = []
473 |         ffprobe_cmd = ['ffprobe', '-i', self.audio_file]
474 |         info = str(subprocess.run(ffprobe_cmd,
475 |             stderr=subprocess.PIPE).stderr)
476 | 
477 |         chapters_regex = r'\s+Chapter\s+' + \
478 |             r'#(?P<track>[:0-9]+).*?' + \
479 |             r'start\s+(?P<start>[0-9.]+).*?' + \
480 |             r'end\s+(?P<end>[0-9.]+).*?' + \
481 |             r'Metadata:(?P<metadata>\\n' + \
482 |                 r'\s+(?P<key>\S+)\s+:' + \
483 |                 r'\s+(?P<value>.*?)' + \
484 |             r'\\n)+?'
485 |         regex = re.compile(chapters_regex, re.DOTALL)
486 |         for fields in re.findall(regex, info):
487 |             trackno = int(re.sub(r'[^0-9]', '', fields[0])) + 1
488 | 
489 |             track = {
490 |                 'track': trackno,
491 |                 'start': fields[1],
492 |                 'end': fields[2],
493 |             }
494 |             index = 4
495 |             while index < len(fields) - 1:
496 |                 track[self.clean_str(fields[index])] = \
497 |                     self.clean_str(fields[index + 1])
498 |                 index += 2
499 |             tracks.append(track)
500 | 
501 |         return tracks
502 | 
503 |     def extract_tracktimes_from_string(self, s):
504 |         """Try to find HH:MM patterns as track times on description
505 |         """
506 |         tracks_regex = \
507 |             r'^(.*?)(?P<time>[0-9]?[0-9]?:?[012345]?[0-9]:[012345][0-9])(.*)$'
508 |         regex = re.compile(tracks_regex, re.MULTILINE)
509 |         items = re.findall(regex, s)
510 | 
511 |         tracks = []
512 |         skipped = end = index = 0
513 |         indexes = len(items)
514 | 
515 |         while index < indexes:
516 |             start = self.to_seconds(items[index][1])
517 |             if index == indexes - 1:
518 |                 end = self.info.get('duration') # total file duration
519 |             else:
520 |                 end = self.to_seconds(items[index + 1][1]) - 0.05
521 | 
522 |             if start > end:
523 |                 print('[ydl] Skipping track %d: incorrect timing' % \
524 |                     int(index + 1))
525 |                 index += 1
526 |                 skipped += 1
527 |                 continue
528 | 
529 |             # track times can be at the beginning or end of the line
530 |             title = '%s %s' % (items[index][0], items[index][2])
531 |             title = self.clean_str(title)
532 | 
533 |             track = {
534 |                 'track': index - skipped + 1,
535 |                 'start': start,
536 |                 'end': end,
537 |                 'title': title
538 |             }
539 |             tracks.append(track)
540 |             index += 1
541 | 
542 |         return tracks
543 | 
544 |     def extract_tracks_cleanup(self):
545 |         """Clean tracks after extraction process
546 |         """
547 |         # remove track number from the beginning
548 |         regex = re.compile(r'^\s*?[0-9]+\s*?[^0-9a-zA-Z]*?\s*?')
549 |         for i in range(0, len(self.tracks)):
550 |             self.tracks[i]['title'] = \
551 |                 regex.sub('', self.tracks[i]['title']).strip()
552 | 
553 |     def set_single_file_data(self):
554 |         artist, title = self.parse_title()
555 |         self.tracks = [{
556 |             'artist': artist,
557 |             'title': title,
558 |             'start': 0,
559 |             'end': self.info.get('duration') - 0.05
560 |         }]
561 | 
562 |     def write_dummy_mp3(self):
563 |         """Create dummy mp3 files to test an import into beets
564 |         """
565 |         if not os.path.exists(self.outdir):
566 |             os.mkdir(self.outdir)
567 | 
568 |         if len(self.tracks) > 0:
569 |             self.write_dummy_mp3_tracks()
570 |         else:
571 |             self.write_dummy_mp3_file()
572 | 
573 |     def write_dummy_mp3_tracks(self):
574 |         for track in self.tracks:
575 |             self.write_dummy_mp3_file(track)
576 | 
577 |     def write_dummy_mp3_file(self, track=False):
578 |         if self.is_album():
579 |             outmp3 = '%s/%03d-%s%s' % (self.outdir,
580 |                 track['track'], self.info.get('id'), self.audio_file_ext)
581 |             outwav = '%s/%03d-%s%s' % (self.outdir,
582 |                 track['track'], self.info.get('id'), '.wav')
583 |             outdat = outwav + ".dat"
584 |         else:
585 |             outmp3 = '%s/%s%s' % (self.outdir,
586 |                 self.info.get('id'), self.audio_file_ext)
587 |             outwav = '%s/%s%s' % (self.outdir, self.info.get('id'), '.wav')
588 |             outdat = outwav + ".dat"
589 | 
590 |         with open(outdat, 'w') as out:
591 |             out.truncate()
592 |             out.write("; SampleRate 8000\n")
593 |             samples = (track['end'] - track['start']) * 8000
594 |             i = 0
595 |             for i in range(0, int(samples)):
596 |                 out.write("%f\t0\n" % (i / 8000))
597 |                 i += 1
598 | 
599 |         sox_cmd = ['sox', outdat, '-c', '2',
600 |                 '-r', '44100', '-e', 'signed-integer', outwav]
601 | 
602 |         ffmpeg_cmd = ['ffmpeg', '-y', '-i', outwav, '-vn', '-ar',
603 |             '44100', '-ac', '1', '-ab', '8k']
604 |         for k in track.keys():
605 |             if k == 'track':
606 |                 value = str(track[k])
607 |             else:
608 |                 value = '"' + str(track[k]) + '"'
609 |             ffmpeg_cmd.extend(['-metadata', '%s=%s' % (k, value)])
610 |         ffmpeg_cmd.append(outmp3)
611 | 
612 |         for cmd in (sox_cmd, ffmpeg_cmd):
613 |             subprocess.run(cmd, stderr=subprocess.PIPE,
614 |                 stdout=subprocess.PIPE)


--------------------------------------------------------------------------------
/env.config.yml:
--------------------------------------------------------------------------------
1 | plugins: ydl


--------------------------------------------------------------------------------
/env.develop:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Run this file like:
 4 | #
 5 | #  . ./env.develop
 6 | #
 7 | 
 8 | export PYTHONPATH="`pwd`:`pwd`/env/lib/python3.6/site-packages/"
 9 | export BEETS_ENV="develop"
10 | 
11 | if [ ! -d "`pwd`/env" ]; then
12 |     virtualenv env --distribute
13 | fi
14 | 
15 | source `pwd`/env/bin/activate


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beets
2 | youtube-dl
3 | pyxdg


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name="beets-ydl",
 8 |     version="0.0.4",
 9 |     author="Vinicius Massuchetto",
10 |     description="Download audio from youtube-dl soures and import into beets",
11 |     long_description=long_description,
12 |     long_description_content_type="text/markdown",
13 |     url="https://github.com/vmassuchetto/beets-ydl",
14 |     packages=setuptools.find_packages(),
15 |     classifiers=[
16 |         "Programming Language :: Python :: 3.6",
17 |         "License :: OSI Approved :: MIT License",
18 |         "Operating System :: OS Independent",
19 |     ],
20 |     install_requires=[
21 |         'beets',
22 |         'youtube-dl',
23 |         'pyxdg',
24 |     ]
25 | )
26 | 


--------------------------------------------------------------------------------
/test:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from beetsplug.ydl import Colors
 4 | import os
 5 | import subprocess
 6 | from hashlib import md5
 7 | 
 8 | #
 9 | # To get a checksum: leave the first field blank, put an URL in the
10 | # second field, and then run the test
11 | #
12 | TESTS = [
13 |     ('4e4372f5d09d872b69654c81620bf6ac', 'https://www.youtube.com/watch?v=uMMUcxvWOkY'),
14 |     ('bb31adda714de9244de82ef6dbff806e',  'https://www.youtube.com/watch?v=Zi_XLOBDo_Y'),
15 |     ('ec734593c0a61678f8a9399f1325d180', 'https://www.youtube.com/watch?v=wW6ykueIhX8'),
16 | ]
17 | 
18 | dbfile = 'env.lib.db'
19 | ydl_cmd = ['beet', '-c', 'env.config.yml', '-l', dbfile, 'ydl', '--no-download',
20 |     '--no-import', '--verbose']
21 | 
22 | # so no cache messages will appear
23 | if os.path.exists(dbfile):
24 |     os.remove(dbfile)
25 | 
26 | failed = False
27 | 
28 | for checksum, source in TESTS:
29 |     cmd = ydl_cmd + [source]
30 |     print(Colors.INFO + '=> ' + Colors.END + \
31 |         Colors.BOLD + ' '.join(cmd) + Colors.END)
32 |     result = subprocess.run(cmd, stdout=subprocess.PIPE)
33 |     md5_result = md5(str(result.stdout).encode()).hexdigest()
34 | 
35 |     if checksum and md5_result == checksum:
36 |         output = Colors.SUCCESS + "   [OK] " + Colors.END + md5_result
37 |     elif checksum:
38 |         output = Colors.WARNING + "   [ERROR] " + Colors.END + \
39 |             md5_result + " <> " + checksum
40 |         failed = True
41 |     else:
42 |         output = Colors.INFO + "   [CHECKSUM] " + md5_result + Colors.END
43 |         print(result.stdout.decode("unicode_escape"))
44 |     print(output)
45 | 
46 | if failed:
47 |     exit(2)
48 | else:
49 |     exit(0)
50 | 


--------------------------------------------------------------------------------