Squashed commit of the following:

author Skullheadx <94652084+Skullheadx@users.noreply.github.com>

Mon, 14 Oct 2024 19:57:02 +0000 (15:57 -0400)

committer Skullheadx <94652084+Skullheadx@users.noreply.github.com>

Mon, 14 Oct 2024 19:57:02 +0000 (15:57 -0400)
author Skullheadx <94652084+Skullheadx@users.noreply.github.com>
Mon, 14 Oct 2024 19:57:02 +0000 (15:57 -0400)
committer Skullheadx <94652084+Skullheadx@users.noreply.github.com>
Mon, 14 Oct 2024 19:57:02 +0000 (15:57 -0400)
diff --git a/.gitignore b/.gitignore

index 52078987624f0db53d334896575122330c119834..1c115a2bbc35be9865c9277ad8355147ca8ea0ae 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,19 @@ links.txt
  /.venv
  .DS_Store
  links.txt
+*.egg-info/
+*.pyc
+
+ytdl.egg-info/dependency_links.txt
+ytdl.egg-info/dependency_links.txt
+ytdl.egg-info/entry_points.txt
+ytdl.egg-info/SOURCES.txt
+ytdl.egg-info/top_level.txt
+ytdl/__main__.py
+ytdl/__pycache__/__init__.cpython-312.pyc
+ytdl/__pycache__/__main__.cpython-312.pyc
+ytdl/__pycache__/classmodule.cpython-312.pyc
+ytdl/__pycache__/funcmodule.cpython-312.pyc
+/ytdl.egg-info
+/ytdl/__pycache__
+*.mp4
diff --git a/README.md b/README.md

index 76e3731a03bb5265021ad8125f52354b56d88d9b..8f6da0bbf59b8aa1900930ce0e1b82a489717d6f 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,7 +1,15 @@
-# youtube-downloader
+# Youtube Downloader - ytdl
  
-HOW TO USE
-- Enter links into the links_file.txt file each on a new line.
-  - You can even put links to playlists too!
-- Run the main.py file using python
-- Enjoy your newly downloaded videos in the "downloaded" folder. (audio and video streams are available in the respective folders in downloaded)
+## usage
+```shell
+ytdl "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+```
+downloads the audio and video and stitches it together in the current directory. Automatically detects playlists.
+
+- `-a` - audio only
+- `-v` - video only
+- `-av` - audio + video separate
+- `-f` - force replace if file exists
+
+# TODO:
+- [ ] figure out why -av takes so long compared to -a and -v
+\ No newline at end of file
diff --git a/dependencies.txt b/dependencies.txt

deleted file mode 100644 (file)

index 2fd322a..0000000
--- a/dependencies.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-pytube
-ffmpeg-python
-\ No newline at end of file
diff --git a/install.sh b/install.sh

new file mode 100644 (file)

index 0000000..7895f27
--- /dev/null
+++ b/install.sh
@@ -0,0 +1 @@
+pip install -e .
+\ No newline at end of file
diff --git a/links.txt b/links.txt

deleted file mode 100644 (file)

index b00161d..0000000
--- a/links.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-https://www.youtube.com/watch?app=desktop&v=lW4KseyDqcY
-https://www.youtube.com/watch?v=nJ7ZCN0m14A
-https://www.youtube.com/watch?app=desktop&v=VvWX3vRRLME
-https://www.youtube.com/watch?v=iCx2nBfu54I
-https://www.youtube.com/watch?app=desktop&v=V2hrTDS4Ml4
-https://www.youtube.com/watch?app=desktop&v=_Sd11FWbvZ8
-https://www.youtube.com/watch?v=HDOIQZZoABg
-https://www.youtube.com/watch?v=j-ttaqEzXKM
-https://www.youtube.com/watch?app=desktop&v=UMiW3G1USHg#dialog
-https://www.youtube.com/watch?v=uPG77Gtn0Ws
-https://www.youtube.com/watch?app=desktop&v=7cKGmeTY9eg
-https://www.youtube.com/watch?app=desktop&v=Pfs9yAsRbaU
-https://www.youtube.com/watch?v=WxKbU98GLRY
-https://www.youtube.com/watch?v=9MzCxt1QpWg
-https://www.youtube.com/watch?v=jCaug9SkKEI
-https://www.youtube.com/watch?v=V19v3oNPixQ
-https://www.youtube.com/watch?v=NscXXbmAggI
-https://www.youtube.com/watch?v=iCx2nBfu54I
-https://www.youtube.com/watch?v=9pq-G57iSEQ
-https://www.youtube.com/watch?v=XVRno3Y1TX8
-https://www.youtube.com/watch?v=codyY_-AiXc
-https://www.youtube.com/watch?v=qZ2yWvholHw
-https://www.youtube.com/watch?v=wodcSTIxZtw
-https://www.youtube.com/watch?v=btKTE4eMPf4
-https://www.youtube.com/watch?v=kW-WE5zrJsg
-https://www.youtube.com/watch?v=i5M-WHDhQ4c
diff --git a/main.py b/main.py

deleted file mode 100644 (file)

index 5f5b3a1..0000000
--- a/main.py
+++ /dev/null
@@ -1,87 +0,0 @@
-from pytubefix import YouTube, Playlist
-from pytubefix.exceptions import VideoUnavailable
-import ffmpeg
-
-
-RES = ["1440p", "1080p", "720p", "480p", "360p", "240p", "144p"]
-ABR = ["160kbps", "128kbps", "70kbps", "50kbps", "48kbps"]
-target_res = 0
-target_abr=0
-
-failed_download = set()
-
-if __name__ == "__main__":
-
-    # get list of links from file
-    links = []
-    with open('links.txt', 'r') as f:
-        links = f.read().split('\n')
-        if links[-1] == "":
-            links = links[:-1]
-
-    for link in links:
-        if "playlist" in link:
-            p = Playlist(link)
-            for url in p.video_urls:
-                links.append(url)
-            links.remove(link)
-
-
-    # download links one by one
-    for link in links:
-        target_res = 0
-        target_abr = 0
-        video_success = True
-        audio_success = True
-
-        try:
-            yt = YouTube(link)
-            yt.streams
-
-        except VideoUnavailable:
-            print(f'Video {link} is unavaialable, skipping.')
-            failed_download.add(((yt.title, link)))
-        else:
-            video_streams = []            
-            while len(video_streams) == 0:
-                video_streams = yt.streams.filter(file_extension='mp4', res=RES[target_res]) # find available streams
-                if target_res + 1 < len(RES):
-                    target_res = target_res + 1
-                else:
-                    video_success = False
-                    break
-            if not video_success:
-                print(f"Unable to find video stream for {yt.title}")
-                failed_download.add(((yt.title, link)))
-
-                break
-            vstream = video_streams[0]
-
-            # audio
-            audio_streams = []
-            while len(audio_streams) == 0:
-                audio_streams = yt.streams.filter(only_audio=True, abr=ABR[target_abr]) # find available streams
-
-                if target_abr + 1 < len(RES):
-                    target_abr = target_abr + 1
-                else:
-                    audio_success = False
-                    break
-            if not audio_success:
-                print(f"Unable to find audio stream for {yt.title}")
-                failed_download.add(((yt.title, link)))
-
-                break
-            astream = audio_streams[0]
-
-            vstream.download(output_path="downloaded/video_only")
-            astream.download(output_path="downloaded/audio_only")
-
-            input_video = ffmpeg.input(f"downloaded/video_only/{vstream.default_filename}")
-            input_audio = ffmpeg.input(f"downloaded/audio_only/{astream.default_filename}")
-
-            ffmpeg.concat(input_video, input_audio, v=1, a=1).output(f'downloaded/{yt.title}.mp4').run()
-
-print("Failed Downloading:")
-print(failed_download)
-
diff --git a/requirements.txt b/requirements.txt

new file mode 100644 (file)

index 0000000..e287554
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+pytubefix~=8.0.0
+requests~=2.32.3
+ffmpeg
+\ No newline at end of file
diff --git a/setup.py b/setup.py

new file mode 100644 (file)

index 0000000..b8c2199
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,10 @@
+from setuptools import setup
+setup(
+    name = 'ytdl',
+    version = '0.1.0',
+    packages = ['ytdl'],
+    entry_points = {
+        'console_scripts': [
+            'ytdl = ytdl.__main__:main'
+        ]
+    })
+\ No newline at end of file
diff --git a/ytdl.egg-info/PKG-INFO b/ytdl.egg-info/PKG-INFO

new file mode 100644 (file)

index 0000000..d54d315
--- /dev/null
+++ b/ytdl.egg-info/PKG-INFO
@@ -0,0 +1,4 @@
+Metadata-Version: 2.1
+Name: ytdl
+Version: 0.1.0
+License-File: LICENSE
diff --git a/ytdl.egg-info/SOURCES.txt b/ytdl.egg-info/SOURCES.txt

new file mode 100644 (file)

index 0000000..3a3b6f7
--- /dev/null
+++ b/ytdl.egg-info/SOURCES.txt
@@ -0,0 +1,11 @@
+LICENSE
+README.md
+setup.py
+ytdl/__init__.py
+ytdl/__main__.py
+ytdl/funcmodule.py
+ytdl.egg-info/PKG-INFO
+ytdl.egg-info/SOURCES.txt
+ytdl.egg-info/dependency_links.txt
+ytdl.egg-info/entry_points.txt
+ytdl.egg-info/top_level.txt
+\ No newline at end of file
diff --git a/ytdl.egg-info/dependency_links.txt b/ytdl.egg-info/dependency_links.txt

new file mode 100644 (file)

index 0000000..8b13789
--- /dev/null
+++ b/ytdl.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/ytdl.egg-info/entry_points.txt b/ytdl.egg-info/entry_points.txt

new file mode 100644 (file)

index 0000000..ab2a57f
--- /dev/null
+++ b/ytdl.egg-info/entry_points.txt
@@ -0,0 +1,2 @@
+[console_scripts]
+ytdl = ytdl.__main__:main
diff --git a/ytdl.egg-info/top_level.txt b/ytdl.egg-info/top_level.txt

new file mode 100644 (file)

index 0000000..763acad
--- /dev/null
+++ b/ytdl.egg-info/top_level.txt
@@ -0,0 +1 @@
+ytdl
diff --git a/ytdl/__init__.py b/ytdl/__init__.py

new file mode 100644 (file)

index 0000000..8b13789
--- /dev/null
+++ b/ytdl/__init__.py
@@ -0,0 +1 @@
+
diff --git a/ytdl/__main__.py b/ytdl/__main__.py

new file mode 100644 (file)

index 0000000..bc86369
--- /dev/null
+++ b/ytdl/__main__.py
@@ -0,0 +1,39 @@
+import sys
+from .funcmodule import check_playlist, download
+import concurrent.futures
+
+
+def main():
+    args = sys.argv[1:]
+    modes = ["-d", "-a", "-v", "-av"]
+
+    links = []
+    mode = "-d"
+    force = False
+    assert len(args) > 0, "no args :("
+    for arg in args:
+        if arg in modes:
+            mode = arg
+        if arg == '-f': # force
+            force = True
+        if "youtube" in arg or "youtu.be" in arg:
+            links.extend(arg.split(" "))
+
+    assert len(links) > 0, "Should pass at least one link as arg"
+    assert mode in modes, f"Mode should be one of {modes}"
+    print("Processing links")
+    # remove empty strings
+    links = list(filter(None, links))
+    assert len(links) > 0, "Should not remove all links"
+    print("Checking for playlists")
+    links = check_playlist(links)
+    assert len(links) > 0, "Should be at least one song in playlist"
+
+    # Use ThreadPoolExecutor to run downloads concurrently
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        # Schedule the download_audio_stream function for each audio stream
+        futures = {executor.submit(download, link, mode, force): link for link in links}
+
+
+if __name__ == '__main__':
+    main()
diff --git a/ytdl/__pycache__/__init__.cpython-312.pyc b/ytdl/__pycache__/__init__.cpython-312.pyc

new file mode 100644 (file)

index 0000000..a2889eb

Binary files /dev/null and b/ytdl/__pycache__/__init__.cpython-312.pyc differ
diff --git a/ytdl/__pycache__/__main__.cpython-312.pyc b/ytdl/__pycache__/__main__.cpython-312.pyc

new file mode 100644 (file)

index 0000000..0a297c4

Binary files /dev/null and b/ytdl/__pycache__/__main__.cpython-312.pyc differ
diff --git a/ytdl/__pycache__/classmodule.cpython-312.pyc b/ytdl/__pycache__/classmodule.cpython-312.pyc

new file mode 100644 (file)

index 0000000..626cbfd

Binary files /dev/null and b/ytdl/__pycache__/classmodule.cpython-312.pyc differ
diff --git a/ytdl/__pycache__/funcmodule.cpython-312.pyc b/ytdl/__pycache__/funcmodule.cpython-312.pyc

new file mode 100644 (file)

index 0000000..91e1dc7

Binary files /dev/null and b/ytdl/__pycache__/funcmodule.cpython-312.pyc differ
diff --git a/ytdl/funcmodule.py b/ytdl/funcmodule.py

new file mode 100644 (file)

index 0000000..53ddeee
--- /dev/null
+++ b/ytdl/funcmodule.py
@@ -0,0 +1,137 @@
+import glob
+import os
+import subprocess
+
+import requests
+from pytubefix import YouTube, Playlist
+
+
+def check_playlist(links):
+    for link in links:
+        if "playlist" in link:
+            p = Playlist(link)
+            for url in p.video_urls:
+                links.append(url)
+            links.remove(link)
+    return links
+
+
+def big_num_format(num):  # https://stackoverflow.com/a/579376
+    magnitude = 0
+    while abs(num) >= 1000:
+        magnitude += 1
+        num /= 1000.0
+    return '%.1f%s' % (num, ['', 'K', 'M', 'B'][magnitude])
+
+
+def fix_filename(filename):
+    for i in ['/', ':', '*', '?', '"', '<', '>', '|']:
+        filename = filename.replace(i, '')
+    return filename
+
+
+def download_thumbnail(thumbnail_url, thumbnail_filename):
+    data = requests.get(thumbnail_url).content
+    with open(thumbnail_filename, 'wb') as f:
+        f.write(data)
+
+
+def download(link, mode, force=False):
+    yt = YouTube(link)
+    filename = fix_filename(yt.title)
+    if (
+            (
+                    (mode == '-av' and
+                     (filename + ' (audio only).mp4' in glob.glob("*.mp4") or
+                      filename + ' (video only).mp4' in glob.glob("*.mp4"))) or
+                    (mode != '-av' and filename + '.mp4' in glob.glob("*.mp4"))
+            ) and
+            (not force)
+    ):
+        print(f"{yt.title} is already downloaded")
+        return
+    yt.check_availability()
+
+    thumbnail_filename = f'{filename}.jpg'
+    download_thumbnail(yt.thumbnail_url, thumbnail_filename)
+
+    if mode == '-a' or mode == '-v':
+        download_single_stream(yt, filename, thumbnail_filename, mode)
+    elif mode == '-av' or mode == '-d':
+        download_double_stream(yt, filename, thumbnail_filename, mode)
+
+
+def convert_add_metadata(input1, input2, output, yt, m1=1, m2=0):
+    album = yt.title
+    if 'keywords' in yt.vid_info['videoDetails'].keys() and len(yt.vid_info['videoDetails']['keywords']) > 2:
+        album = yt.vid_info['videoDetails']['keywords'][-2]
+    command = [
+        'ffmpeg',
+        '-i', input1,
+        '-i', input2,
+        '-map', f'{m1}',
+        '-map', f'{m2}',
+        '-c', 'copy',
+        f'-disposition:v:{m2}', 'attached_pic',
+        '-metadata', f'title={yt.title}',
+        '-metadata', f'artist={yt.author}',
+        '-metadata', f'comment={big_num_format(yt.views) + " views"}',
+        '-metadata', f'date={yt.publish_date}',
+        '-metadata', f'album={album}',
+        output + ".mp4",
+        '-y'
+    ]
+    subprocess.run(command)
+
+
+def download_single_stream(yt, filename, thumbnail_filename, mode):
+    print(f"Fetching stream for {yt.title}")
+    stream = None
+    if mode == "-a":
+        assert len(yt.streams.filter(only_audio=True)) > 0, "No available audio streams"
+        stream = yt.streams.filter(only_audio=True).order_by("abr").last()
+    if mode == "-v":
+        assert len(yt.streams.filter(only_video=True)) > 0, "No available video streams"
+        stream = yt.streams.filter(only_video=True).order_by("resolution").last()
+
+    assert stream is not None, "mode is not valid"
+    print(f"Downloading stream for {yt.title}")
+    default_filename = "default " + fix_filename(stream.default_filename)
+    stream.download(filename=default_filename, skip_existing=True)
+
+    print(f"Adding metadata to {yt.title}")
+    convert_add_metadata(default_filename, thumbnail_filename, filename, yt)
+
+    print("Removing temporary files")
+    os.remove(thumbnail_filename)
+    os.remove(default_filename)
+
+
+def download_double_stream(yt, filename, thumbnail_filename, mode):
+    print(f"Fetching streams for {yt.title}")
+    assert len(yt.streams.filter(only_audio=True)) > 0, "No available audio streams"
+    audio_stream = yt.streams.filter(only_audio=True).order_by("abr").last()
+    assert len(yt.streams.filter(only_video=True)) > 0, "No available video streams"
+    video_stream = yt.streams.filter(only_video=True).order_by("resolution").last()
+
+    print(f"Downloading streams for {yt.title}")
+    audio_default_filename = "default audio " + fix_filename(audio_stream.default_filename)
+    video_default_filename = "default video " + fix_filename(video_stream.default_filename)
+
+    audio_stream.download(filename=audio_default_filename, skip_existing=True)
+    video_stream.download(filename=video_default_filename, skip_existing=True)
+
+    print(f"Adding metadata to {yt.title}")
+    if mode == '-av':
+        for suffix, stream in [(" (audio only)", audio_default_filename), (" (video only)", video_default_filename)]:
+            convert_add_metadata(stream, thumbnail_filename, filename + suffix, yt)
+    elif mode == '-d':
+        convert_add_metadata(audio_default_filename, video_default_filename, filename + "tmp", yt, m1=0, m2=1)
+        convert_add_metadata(filename + "tmp.mp4", thumbnail_filename, filename, yt)
+
+    print("Removing temporary files")
+    os.remove(thumbnail_filename)
+    os.remove(audio_default_filename)
+    os.remove(video_default_filename)
+    if mode == '-d':
+        os.remove(filename + "tmp" + ".mp4")
author	Skullheadx <94652084+Skullheadx@users.noreply.github.com>
	Mon, 14 Oct 2024 19:57:02 +0000 (15:57 -0400)
committer	Skullheadx <94652084+Skullheadx@users.noreply.github.com>
	Mon, 14 Oct 2024 19:57:02 +0000 (15:57 -0400)
.gitignore		patch \| blob \| history
README.md		patch \| blob \| history
dependencies.txt	[deleted file]	patch \| blob \| history
install.sh	[new file with mode: 0644]	patch \| blob
links.txt	[deleted file]	patch \| blob \| history
main.py	[deleted file]	patch \| blob \| history
requirements.txt	[new file with mode: 0644]	patch \| blob
setup.py	[new file with mode: 0644]	patch \| blob
ytdl.egg-info/PKG-INFO	[new file with mode: 0644]	patch \| blob
ytdl.egg-info/SOURCES.txt	[new file with mode: 0644]	patch \| blob
ytdl.egg-info/dependency_links.txt	[new file with mode: 0644]	patch \| blob
ytdl.egg-info/entry_points.txt	[new file with mode: 0644]	patch \| blob
ytdl.egg-info/top_level.txt	[new file with mode: 0644]	patch \| blob
ytdl/__init__.py	[new file with mode: 0644]	patch \| blob
ytdl/__main__.py	[new file with mode: 0644]	patch \| blob
ytdl/__pycache__/__init__.cpython-312.pyc	[new file with mode: 0644]	patch \| blob
ytdl/__pycache__/__main__.cpython-312.pyc	[new file with mode: 0644]	patch \| blob
ytdl/__pycache__/classmodule.cpython-312.pyc	[new file with mode: 0644]	patch \| blob
ytdl/__pycache__/funcmodule.cpython-312.pyc	[new file with mode: 0644]	patch \| blob
ytdl/funcmodule.py	[new file with mode: 0644]	patch \| blob