From e844629839d17f7dc1a8758856ab6c401b2a7ebe Mon Sep 17 00:00:00 2001 From: Skullheadx <94652084+Skullheadx@users.noreply.github.com> Date: Mon, 14 Oct 2024 00:37:25 -0400 Subject: [PATCH] fix file names with bad chars --- ytdl/__main__.py | 15 +-------- ytdl/funcmodule.py | 83 ++++++++++++---------------------------------- 2 files changed, 23 insertions(+), 75 deletions(-) diff --git a/ytdl/__main__.py b/ytdl/__main__.py index aa323d0..5f29931 100644 --- a/ytdl/__main__.py +++ b/ytdl/__main__.py @@ -1,5 +1,5 @@ import sys -from .funcmodule import check_playlist, get_audio_metadata_stream, download_audio_stream, get_and_download +from .funcmodule import check_playlist, get_and_download import concurrent.futures @@ -27,19 +27,6 @@ def main(): links = check_playlist(links) assert len(links) > 0, "Should be at least one song in playlist" - audio_streams = [] - metadata_list = [] - - for link in links: - stream, metadata = get_audio_metadata_stream(link) - assert stream is not None, "was not able to get audio stream" - assert metadata is not None, "no metadata found" - audio_streams.append(stream) - metadata_list.append(metadata) - - assert len(audio_streams) > 0, "no audio streams found" - assert len(metadata_list) > 0, "no metadata found" - if mode == "-d": pass elif mode == "-a": diff --git a/ytdl/funcmodule.py b/ytdl/funcmodule.py index 18c837e..124283c 100644 --- a/ytdl/funcmodule.py +++ b/ytdl/funcmodule.py @@ -2,6 +2,7 @@ from pytubefix import YouTube, Playlist import requests import subprocess import os +import glob def check_playlist(links): @@ -14,20 +15,6 @@ def check_playlist(links): return links -def get_audio_metadata_stream(link): - yt = YouTube(link) - yt.check_availability() - print(f"Fetching stream for {yt.title}") - assert len(yt.streams.filter(only_audio=True)) > 0, "No available audio streams" - yield yt.streams.filter(only_audio=True).order_by("abr").last() - yield { - "title": yt.title, - "artist": yt.author, - "thumbnail_url": yt.thumbnail_url, - "publish_date": yt.publish_date, - "views": yt.views - } - def big_num_format(num): # https://stackoverflow.com/a/579376 magnitude = 0 while abs(num) >= 1000: @@ -36,74 +23,48 @@ def big_num_format(num): # https://stackoverflow.com/a/579376 return '%.1f%s' % (num, ['', 'K', 'M', 'B'][magnitude]) -def download_audio_stream(audio_stream, metadata): - print(f"Downloading audio stream for {audio_stream.title}") - audio_stream.download() - - # create thumbnail file - data = requests.get(metadata["thumbnail_url"]).content - thumbnail_filename = f'{audio_stream.title}.jpg' - with open(thumbnail_filename, 'wb') as f: - f.write(data) - - command = [ - 'ffmpeg', - '-i', audio_stream.default_filename, - '-i', thumbnail_filename, - '-map', '0', - '-map', '1', - '-metadata', f'title={audio_stream.title}', - '-metadata', f'artist={metadata["artist"]}', - '-metadata', f'date={metadata["publish_date"]}', - '-metadata', f'comment={big_num_format(metadata["views"]) + " views"}', - audio_stream.title + ".mp4", - '-y' - ] - subprocess.run(command) - - # clean up tmp files - os.remove(thumbnail_filename) - os.remove(audio_stream.default_filename) +def fix_filename(filename): + for i in ['/', ':', '*', '?', '"', '<', '>', '|']: + filename = filename.replace(i, '') + return filename def get_and_download(link): yt = YouTube(link) + if fix_filename(yt.title) + '.mp4' in glob.glob("*.mp4"): + print(f"{yt.title} is already downloaded") + return + yt.check_availability() print(f"Fetching stream for {yt.title}") + assert len(yt.streams.filter(only_audio=True)) > 0, "No available audio streams" audio_stream = yt.streams.filter(only_audio=True).order_by("abr").last() - metadata = { - "title": yt.title, - "artist": yt.author, - "thumbnail_url": yt.thumbnail_url, - "publish_date": yt.publish_date, - "views": yt.views - } - print(f"Downloading audio stream for {audio_stream.title}") - audio_stream.download() + print(f"Downloading audio stream for {yt.title}") + audio_stream.download(filename=fix_filename(audio_stream.default_filename), skip_existing=True) # create thumbnail file - data = requests.get(metadata["thumbnail_url"]).content - thumbnail_filename = f'{audio_stream.title}.jpg' + data = requests.get(yt.thumbnail_url).content + thumbnail_filename = f'{fix_filename(audio_stream.title)}.jpg' with open(thumbnail_filename, 'wb') as f: f.write(data) command = [ 'ffmpeg', - '-i', audio_stream.default_filename, + '-i', fix_filename(audio_stream.default_filename), '-i', thumbnail_filename, '-map', '0', '-map', '1', - '-metadata', f'title={audio_stream.title}', - '-metadata', f'artist={metadata["artist"]}', - '-metadata', f'date={metadata["publish_date"]}', - '-metadata', f'comment={big_num_format(metadata["views"]) + " views"}', - audio_stream.title + ".mp4", - '-y' + '-metadata', f'title={fix_filename(audio_stream.title)}', + '-metadata', f'artist={yt.author}', + '-metadata', f'date={yt.publish_date}', + '-metadata', f'comment={big_num_format(yt.views) + " views"}', + fix_filename(audio_stream.title) + ".mp4", + '-n' ] subprocess.run(command) # clean up tmp files os.remove(thumbnail_filename) - os.remove(audio_stream.default_filename) \ No newline at end of file + os.remove(fix_filename(audio_stream.default_filename)) -- 2.54.0