From 094466ec5a37e7bf9e1fa57e43d6a86fceb09049 Mon Sep 17 00:00:00 2001
From: konsthol
+ 12-01-2025 - Simple way to extend yt-dlp
19-12-2024 - The magic of Wake-On-LAN
@@ -125,9 +128,9 @@
I could be livestreaming at
-My Piped instance which you can use resides at
- +My Invidious instance which you can use, resides at
+My Gopher Hole for this homepage is accessible via
@@ -182,8 +185,8 @@ function action() {I accept donations for any reason
-Saturday 28/12/24 15:36:55 -3 +Saturday 19/04/25 20:47:56 +2
++DATE: Sun 12 Jan 2025 15:51 By: konsthol@pm.me
+
+ Lots of people use yt-dlp either directly or indirectly through mpv. It’s a + powerful tool that acts as a website scraper and it supports thousands of + websites. The website its mostly used for is like the name suggests YouTube. + Now, YouTube is a great resource but usage through the website is quite + unpleasant so lots of people opt out to use alternative frontends like + Invidious or Piped. Lots of times you just want to use mpv to stream a YouTube + video by providing the link like: +
+++mpv https://youtube.com/watch?v=[VideoID]
+
+ That works like a charm, but what happens when you provide a link of an + alternative frontend? Well, it translates it to the aforementioned format in + order to work. But there are so many instances of Invidious and Piped, so how + does it know what to do? That was my question as well since I use a self + hosted Piped instance and it does not recognize the domain. Obviously. +
++ Thankfully, yt-dlp is an open source project so you can actually see what goes + on behind the scenes. In my case, I installed it with the Arch Linux package + manager and it resides at: +
+++/usr/lib/python3.13/site-packages/yt_dlp/
+
+ The way yt-dlp works is that it has a folder called “extractor” in that path + and in that folder there is a python file for each supported website. In + YouTube’s case it’s youtube.py. I opened it and I saw this: +
+class YoutubeBaseInfoExtractor(InfoExtractor):
+ """Provide base functions for Youtube extractors"""
+
+ _RESERVED_NAMES = (
+ r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
+ r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
+ r'browse|oembed|get_video_info|iframe_api|s/player|source|'
+ r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
+
+ _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
+
+ # _NETRC_MACHINE = 'youtube'
+
+ # If True it will raise an error if no login info is provided
+ _LOGIN_REQUIRED = False
+
+ _INVIDIOUS_SITES = (
+ # invidious-redirect websites
+ r'(?:www\.)?redirect\.invidious\.io',
+ r'(?:(?:www|dev)\.)?invidio\.us',
+ # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
+ r'(?:www\.)?invidious\.pussthecat\.org',
+ r'(?:www\.)?invidious\.zee\.li',
+ [more instances here]
+ )
++ There is a class called YoutubeBaseInfoExtractor that has an array of + instances called _INVIDIOUS_SITES that uses a regex to catch every domain + there. Now, I saw at the GitHub page of yt-dlp a lot of people asking the + maintainers to add more instances on this list. Theoretically you also can + just edit the file and add a domain so that it recognizes that one too. But, + in my personal opinion it’s never a good idea to edit upstream files because + as the program updates your changes will be overwritten. So I found another + way to deal with this. +
++ You see, yt-dlp is not just a command line utility. You can use it as a + library to make your own extractors for websites. The way you do that is by + creating your own plugins. In my case, I didn’t actually want to make a new + extractor but somehow extend an array of an already existing one. Not all + extractors use this method but since YouTube does, it would work. So I made + this file at this location: +
+++~/.config/yt-dlp/plugins/piped/yt_dlp_plugins/extractor/piped.py
+
The contents are simple:
+from yt_dlp.extractor.youtube import YoutubeBaseInfoExtractor, YoutubeIE
+
+class CustomYoutubeBaseInfoExtractor(YoutubeBaseInfoExtractor):
+ _INVIDIOUS_SITES = YoutubeBaseInfoExtractor._INVIDIOUS_SITES + (
+ r'(?:www\.)?piped\.konsthol\.eu',
+ )
+
+class PipedKonstholYoutubeIE(YoutubeIE, CustomYoutubeBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?piped\.konsthol\.eu/watch\?v=(?P<id>[0-9A-Za-z_-]{11})'
+ IE_NAME = 'piped.konsthol.eu'
+
++ We import the class that contains the array we need and the youtube extractor. + We make a new class in which we provide the one that has the array. We access + the array and add a new regex for our domain. Then we make a new class for the + extractor, provide the one we just created and the YouTube extractor class and + we tell it to work for urls that look like the one we provided. In that way, + this pseudo extractor is being activated when we provide a url that looks like + this, it extends the actual YouTube extractor and activates that one, only + this time it works for our domain too. +
++ It’s amazing what you can do with open source software just by observing how a + program works. Now every time someone needs a new domain for an alternative + YouTube frontend added, instead of asking the developers to do that, using + this simple solution he/she can just add it to the plugin. +
+ + + + +Previous Log entries are archived here.
+ 12-01-2025 - Simple way to extend yt-dlp
+
19-12-2024 - The magic of Wake-On-LAN
17-08-2022 - wpgtk is just more convenient
12-09-2021 - Awesome file sharing toolhttps://konsthol.eu/rss.xml
DATE: Sun 12 Jan 2025 15:51 By: konsthol@pm.me
+ Lots of people use yt-dlp either directly or indirectly through mpv. It’s a
+ powerful tool that acts as a website scraper and it supports thousands of
+ websites. The website its mostly used for is like the name suggests YouTube.
+ Now, YouTube is a great resource but usage through the website is quite
+ unpleasant so lots of people opt out to use alternative frontends like
+ Invidious or Piped. Lots of times you just want to use mpv to stream a YouTube
+ video by providing the link like:
+ mpv https://youtube.com/watch?v=[VideoID]
+ That works like a charm, but what happens when you provide a link of an
+ alternative frontend? Well, it translates it to the aforementioned format in
+ order to work. But there are so many instances of Invidious and Piped, so how
+ does it know what to do? That was my question as well since I use a self
+ hosted Piped instance and it does not recognize the domain. Obviously.
+
+ Thankfully, yt-dlp is an open source project so you can actually see what goes
+ on behind the scenes. In my case, I installed it with the Arch Linux package
+ manager and it resides at:
+ /usr/lib/python3.13/site-packages/yt_dlp/
+ The way yt-dlp works is that it has a folder called “extractor” in that path
+ and in that folder there is a python file for each supported website. In
+ YouTube’s case it’s youtube.py. I opened it and I saw this:
+
+ There is a class called YoutubeBaseInfoExtractor that has an array of
+ instances called _INVIDIOUS_SITES that uses a regex to catch every domain
+ there. Now, I saw at the GitHub page of yt-dlp a lot of people asking the
+ maintainers to add more instances on this list. Theoretically you also can
+ just edit the file and add a domain so that it recognizes that one too. But,
+ in my personal opinion it’s never a good idea to edit upstream files because
+ as the program updates your changes will be overwritten. So I found another
+ way to deal with this.
+
+ You see, yt-dlp is not just a command line utility. You can use it as a
+ library to make your own extractors for websites. The way you do that is by
+ creating your own plugins. In my case, I didn’t actually want to make a new
+ extractor but somehow extend an array of an already existing one. Not all
+ extractors use this method but since YouTube does, it would work. So I made
+ this file at this location:
+ ~/.config/yt-dlp/plugins/piped/yt_dlp_plugins/extractor/piped.py The contents are simple:
+ We import the class that contains the array we need and the youtube extractor.
+ We make a new class in which we provide the one that has the array. We access
+ the array and add a new regex for our domain. Then we make a new class for the
+ extractor, provide the one we just created and the YouTube extractor class and
+ we tell it to work for urls that look like the one we provided. In that way,
+ this pseudo extractor is being activated when we provide a url that looks like
+ this, it extends the actual YouTube extractor and activates that one, only
+ this time it works for our domain too.
+
+ It’s amazing what you can do with open source software just by observing how a
+ program works. Now every time someone needs a new domain for an alternative
+ YouTube frontend added, instead of asking the developers to do that, using
+ this simple solution he/she can just add it to the plugin.
+Simple way to extend yt-dlp
+
+
+
+
+
+class YoutubeBaseInfoExtractor(InfoExtractor):
+ """Provide base functions for Youtube extractors"""
+
+ _RESERVED_NAMES = (
+ r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
+ r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
+ r'browse|oembed|get_video_info|iframe_api|s/player|source|'
+ r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
+
+ _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
+
+ # _NETRC_MACHINE = 'youtube'
+
+ # If True it will raise an error if no login info is provided
+ _LOGIN_REQUIRED = False
+
+ _INVIDIOUS_SITES = (
+ # invidious-redirect websites
+ r'(?:www\.)?redirect\.invidious\.io',
+ r'(?:(?:www|dev)\.)?invidio\.us',
+ # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
+ r'(?:www\.)?invidious\.pussthecat\.org',
+ r'(?:www\.)?invidious\.zee\.li',
+ [more instances here]
+ )
+
+
+from yt_dlp.extractor.youtube import YoutubeBaseInfoExtractor, YoutubeIE
+
+class CustomYoutubeBaseInfoExtractor(YoutubeBaseInfoExtractor):
+ _INVIDIOUS_SITES = YoutubeBaseInfoExtractor._INVIDIOUS_SITES + (
+ r'(?:www\.)?piped\.konsthol\.eu',
+ )
+
+class PipedKonstholYoutubeIE(YoutubeIE, CustomYoutubeBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?piped\.konsthol\.eu/watch\?v=(?P<id>[0-9A-Za-z_-]{11})'
+ IE_NAME = 'piped.konsthol.eu'
+