From 833fe8c9afe565332f1c8388ec95705ac3d3ce3b Mon Sep 17 00:00:00 2001
From: deepspy <pavel.deepspy@gmail.com>
Date: Mon, 24 Jun 2024 00:10:22 +0300
Subject: [PATCH 1/8] Added a basic shell for a kankids extractor.

---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/kankids.py    | 44 ++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100644 youtube_dl/extractor/kankids.py
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 3da5f802093..c9def8240b4 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -560,6 +560,7 @@
 from .kakao import KakaoIE
 from .kaltura import KalturaIE
 from .kankan import KankanIE
+from .kankids import KanKidsIE
 from .karaoketv import KaraoketvIE
 from .karrierevideos import KarriereVideosIE
 from .keezmovies import KeezMoviesIE
diff --git a/youtube_dl/extractor/kankids.py b/youtube_dl/extractor/kankids.py
new file mode 100644
index 00000000000..f6f5ae38cfd
--- /dev/null
+++ b/youtube_dl/extractor/kankids.py
@@ -0,0 +1,44 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+class KanKidsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?kankids\.org\.il/content/kids/(?P<category>[a-z]+)-main/p-(?P<id>[0-9]+)/(?P<season>\w+/)?$'
+    _TEST = {
+        'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/',
+        'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
+        'info_dict': {
+            'id': '42',
+            'ext': 'mp4',
+            'title': 'Video title goes here',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            # TODO more properties, either as:
+            # * A value
+            # * MD5 checksum; start the string with md5:
+            # * A regular expression; start the string with re:
+            # * Any Python type (for example int or float)
+        }
+    }
+
+    def _real_extract(self, url):
+        a = super()._match_valid_url(url)
+        print(a.groupdict())
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        # TODO more code goes here, for example ...
+        # title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+        title = 'hi'
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': self._og_search_description(webpage),
+            'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/s1/89707/',
+            'ie_key': 'Generic',
+            '_type': 'url',
+            # 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
+            # TODO more properties (see youtube_dl/extractor/common.py)
+            }
+

From d335e0beecfbff7a55ba3a35ef3688104821525a Mon Sep 17 00:00:00 2001
From: deepspy <pavel.deepspy@gmail.com>
Date: Mon, 24 Jun 2024 14:22:56 +0300
Subject: [PATCH 2/8] Extracted episode list.

---
 youtube_dl/extractor/kankids.py | 34 ++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/kankids.py b/youtube_dl/extractor/kankids.py
index f6f5ae38cfd..38866dc6f8a 100644
--- a/youtube_dl/extractor/kankids.py
+++ b/youtube_dl/extractor/kankids.py
@@ -2,9 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+import re
 
 class KanKidsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?kankids\.org\.il/content/kids/(?P<category>[a-z]+)-main/p-(?P<id>[0-9]+)/(?P<season>\w+/)?$'
+    _VALID_URL = r'https?://(?:www\.)?kankids\.org\.il/content/kids/(?P<category>[a-z]+)-main/p-(?P<id>[0-9]+)/(?P<season>\w+)?/?$'
     _TEST = {
         'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/',
         'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
@@ -22,17 +23,32 @@ class KanKidsIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        a = super()._match_valid_url(url)
-        print(a.groupdict())
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        m = super()._match_valid_url(url)
+        series_id = m.group('id')
+        category = m.group('category')
+        playlist_season = m.group('season')
+        
+        webpage = self._download_webpage(url, series_id)
 
-        # TODO more code goes here, for example ...
-        # title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
-        title = 'hi'
+        series_title = self._html_search_regex(r'<title>(?P<title>.+) \|', webpage, 'title')
+
+        season = playlist_season if playlist_season else '(?P<season>\w+)'
+        playlist = set(re.findall(
+            r'href="/content/kids/' +   # Content dir
+            category + r'-main/' +      # Category
+            'p-' + series_id + '/' +    # Series
+            season + '/' +              # Season
+            '(?P<id>[0-9]+)/"' +        # Episode
+            '.+title="(?P<title>.+)"'   # Title
+            , webpage))
+            # , 'Episode list')
+        print('playlist:', playlist)
+
+        for season, video_id, title in playlist if not playlist_season else map(lambda episode: (playlist_season,) + episode, playlist):
+            pass
 
         return {
-            'id': video_id,
+            'id': series_id,
             'title': title,
             'description': self._og_search_description(webpage),
             'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/s1/89707/',

From 3fb423c0bfd87ea8ce7cabecf8bc99e180eef437 Mon Sep 17 00:00:00 2001
From: deepspy <pavel.deepspy@gmail.com>
Date: Mon, 24 Jun 2024 16:25:34 +0300
Subject: [PATCH 3/8] First working version of the kan kids extractor.

---
 youtube_dl/extractor/kankids.py | 40 ++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/youtube_dl/extractor/kankids.py b/youtube_dl/extractor/kankids.py
index 38866dc6f8a..19c9baf378f 100644
--- a/youtube_dl/extractor/kankids.py
+++ b/youtube_dl/extractor/kankids.py
@@ -4,8 +4,13 @@
 from .common import InfoExtractor
 import re
 
+CONTENT_DIR = r'/content/kids/'
+DOMAIN = r'kankids.org.il'
+
 class KanKidsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?kankids\.org\.il/content/kids/(?P<category>[a-z]+)-main/p-(?P<id>[0-9]+)/(?P<season>\w+)?/?$'
+    _VALID_URL = r'https?://(?:www\.)?' +\
+        DOMAIN.replace('.', '\.') + CONTENT_DIR +\
+        r'(?P<category>[a-z]+)-main/p-(?P<id>[0-9]+)/(?P<season>\w+)?/?$'
     _TEST = {
         'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/',
         'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
@@ -32,29 +37,32 @@ def _real_extract(self, url):
 
         series_title = self._html_search_regex(r'<title>(?P<title>.+) \|', webpage, 'title')
 
-        season = playlist_season if playlist_season else '(?P<season>\w+)'
+        season = playlist_season if playlist_season else r'(?P<season>\w+)'
+        content_dir = CONTENT_DIR + category + r'-main/'
         playlist = set(re.findall(
-            r'href="/content/kids/' +   # Content dir
-            category + r'-main/' +      # Category
-            'p-' + series_id + '/' +    # Series
-            season + '/' +              # Season
-            '(?P<id>[0-9]+)/"' +        # Episode
-            '.+title="(?P<title>.+)"'   # Title
+            r'href="' + content_dir +       # Content dir
+            r'p-' + series_id + r'/' +      # Series
+            season + r'/' +                 # Season
+            r'(?P<id>[0-9]+)/"' +           # Episode
+            r'.+title="(?P<title>.+)"'      # Title
             , webpage))
             # , 'Episode list')
         print('playlist:', playlist)
 
+        entries = []
+        content_dir = r'https://www.' + DOMAIN + content_dir
         for season, video_id, title in playlist if not playlist_season else map(lambda episode: (playlist_season,) + episode, playlist):
-            pass
+            entries.append(self.url_result(
+                content_dir + season + r'/' + video_id + r'/',
+                ie='Generic',
+                video_id=video_id,
+                video_title=title,
+                ))
 
         return {
+            '_type': 'playlist',
             'id': series_id,
-            'title': title,
-            'description': self._og_search_description(webpage),
-            'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/s1/89707/',
-            'ie_key': 'Generic',
-            '_type': 'url',
-            # 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
-            # TODO more properties (see youtube_dl/extractor/common.py)
+            'title': series_title,
+            'entries': entries,
             }
 

From c9265f6d609d3805e5c67ac615637a203ece80fc Mon Sep 17 00:00:00 2001
From: deepspy <pavel.deepspy@gmail.com>
Date: Mon, 24 Jun 2024 16:46:29 +0300
Subject: [PATCH 4/8] Fixed a series title location bug.

---
 youtube_dl/extractor/kankids.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/kankids.py b/youtube_dl/extractor/kankids.py
index 19c9baf378f..03051817c08 100644
--- a/youtube_dl/extractor/kankids.py
+++ b/youtube_dl/extractor/kankids.py
@@ -35,7 +35,12 @@ def _real_extract(self, url):
         
         webpage = self._download_webpage(url, series_id)
 
-        series_title = self._html_search_regex(r'<title>(?P<title>.+) \|', webpage, 'title')
+        title_pattern = r'<title>(?P<title>.+) \|'
+        series_title = re.search(title_pattern, webpage)
+        if not series_title:
+            series_title = re.search(title_pattern[:-1] + r'-', webpage)
+        if series_title:
+            series_title = series_title.group('title')
 
         season = playlist_season if playlist_season else r'(?P<season>\w+)'
         content_dir = CONTENT_DIR + category + r'-main/'
@@ -47,7 +52,6 @@ def _real_extract(self, url):
             r'.+title="(?P<title>.+)"'      # Title
             , webpage))
             # , 'Episode list')
-        print('playlist:', playlist)
 
         entries = []
         content_dir = r'https://www.' + DOMAIN + content_dir

From 162eb5632b70a2b84a07517bea7ae50c2b21b63b Mon Sep 17 00:00:00 2001
From: deepspy <pavel.deepspy@gmail.com>
Date: Mon, 24 Jun 2024 17:40:43 +0300
Subject: [PATCH 5/8] Added unittests for kankids extractor. Fixed a non p-
 playlist id matching bug.

---
 youtube_dl/extractor/kankids.py | 40 +++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/youtube_dl/extractor/kankids.py b/youtube_dl/extractor/kankids.py
index 03051817c08..906cf2a2d22 100644
--- a/youtube_dl/extractor/kankids.py
+++ b/youtube_dl/extractor/kankids.py
@@ -10,28 +10,34 @@
 class KanKidsIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?' +\
         DOMAIN.replace('.', '\.') + CONTENT_DIR +\
-        r'(?P<category>[a-z]+)-main/p-(?P<id>[0-9]+)/(?P<season>\w+)?/?$'
-    _TEST = {
-        'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/',
-        'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
-        'info_dict': {
-            'id': '42',
-            'ext': 'mp4',
-            'title': 'Video title goes here',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            # TODO more properties, either as:
-            # * A value
-            # * MD5 checksum; start the string with md5:
-            # * A regular expression; start the string with re:
-            # * Any Python type (for example int or float)
-        }
-    }
+        r'(?P<category>[a-z]+)-main/(?P<id>[\w\-0-9]+)/(?P<season>\w+)?/?$'
+    _TESTS = [
+        {
+            'url': 'https://www.kankids.org.il/content/kids/ktantanim-main/p-11732/',
+            'info_dict': {
+                '_type': 'playlist',
+                'id': 'p-11732',
+                'title': 'בית ספר לקוסמים',
+                },
+            'playlist_count': 60,
+        },
+        {
+            'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/cramel_main/s1/',
+            'info_dict': {
+                '_type': 'playlist',
+                'id': 'cramel_main',
+                'title': 'כראמל - עונה 1',
+                },
+            'playlist_count': 21,
+        },
+        ]
 
     def _real_extract(self, url):
         m = super()._match_valid_url(url)
         series_id = m.group('id')
         category = m.group('category')
         playlist_season = m.group('season')
+        print(m.groupdict())
         
         webpage = self._download_webpage(url, series_id)
 
@@ -46,7 +52,7 @@ def _real_extract(self, url):
         content_dir = CONTENT_DIR + category + r'-main/'
         playlist = set(re.findall(
             r'href="' + content_dir +       # Content dir
-            r'p-' + series_id + r'/' +      # Series
+            series_id + r'/' +              # Series
             season + r'/' +                 # Season
             r'(?P<id>[0-9]+)/"' +           # Episode
             r'.+title="(?P<title>.+)"'      # Title

From d3e980eaa526f2e7524c344cdef90abf85d1d55a Mon Sep 17 00:00:00 2001
From: deepspy <pavel.deepspy@gmail.com>
Date: Mon, 24 Jun 2024 22:11:06 +0300
Subject: [PATCH 6/8] Some cleanup of the kan kids extractor.

---
 youtube_dl/extractor/kankids.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/youtube_dl/extractor/kankids.py b/youtube_dl/extractor/kankids.py
index 906cf2a2d22..ca6d36fa895 100644
--- a/youtube_dl/extractor/kankids.py
+++ b/youtube_dl/extractor/kankids.py
@@ -37,7 +37,6 @@ def _real_extract(self, url):
         series_id = m.group('id')
         category = m.group('category')
         playlist_season = m.group('season')
-        print(m.groupdict())
         
         webpage = self._download_webpage(url, series_id)
 
@@ -57,7 +56,6 @@ def _real_extract(self, url):
             r'(?P<id>[0-9]+)/"' +           # Episode
             r'.+title="(?P<title>.+)"'      # Title
             , webpage))
-            # , 'Episode list')
 
         entries = []
         content_dir = r'https://www.' + DOMAIN + content_dir

From a4737bb7555d8f34a20168ab0a99e0a860727cca Mon Sep 17 00:00:00 2001
From: deepspy <pavel.deepspy@gmail.com>
Date: Mon, 24 Jun 2024 23:12:03 +0300
Subject: [PATCH 7/8] Fixed flake8 errors.

---
 youtube_dl/extractor/kankids.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/kankids.py b/youtube_dl/extractor/kankids.py
index ca6d36fa895..1bcc1d025c6 100644
--- a/youtube_dl/extractor/kankids.py
+++ b/youtube_dl/extractor/kankids.py
@@ -7,6 +7,7 @@
 CONTENT_DIR = r'/content/kids/'
 DOMAIN = r'kankids.org.il'
 
+
 class KanKidsIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?' +\
         DOMAIN.replace('.', '\.') + CONTENT_DIR +\
@@ -18,7 +19,7 @@ class KanKidsIE(InfoExtractor):
                 '_type': 'playlist',
                 'id': 'p-11732',
                 'title': 'בית ספר לקוסמים',
-                },
+            },
             'playlist_count': 60,
         },
         {
@@ -27,17 +28,17 @@ class KanKidsIE(InfoExtractor):
                 '_type': 'playlist',
                 'id': 'cramel_main',
                 'title': 'כראמל - עונה 1',
-                },
+            },
             'playlist_count': 21,
         },
-        ]
+    ]
 
     def _real_extract(self, url):
         m = super()._match_valid_url(url)
         series_id = m.group('id')
         category = m.group('category')
         playlist_season = m.group('season')
-        
+
         webpage = self._download_webpage(url, series_id)
 
         title_pattern = r'<title>(?P<title>.+) \|'
@@ -54,8 +55,8 @@ def _real_extract(self, url):
             series_id + r'/' +              # Series
             season + r'/' +                 # Season
             r'(?P<id>[0-9]+)/"' +           # Episode
-            r'.+title="(?P<title>.+)"'      # Title
-            , webpage))
+            r'.+title="(?P<title>.+)"',     # Title
+            webpage))
 
         entries = []
         content_dir = r'https://www.' + DOMAIN + content_dir
@@ -65,12 +66,12 @@ def _real_extract(self, url):
                 ie='Generic',
                 video_id=video_id,
                 video_title=title,
-                ))
+            ))
 
         return {
             '_type': 'playlist',
             'id': series_id,
             'title': series_title,
             'entries': entries,
-            }
+        }
 

From 5b088ccdd9ebb83ccd32a0a0a2cacd537fd2e03e Mon Sep 17 00:00:00 2001
From: deepspy <pavel.deepspy@gmail.com>
Date: Wed, 3 Jul 2024 14:31:36 +0300
Subject: [PATCH 8/8] Fixed linter warnings.

---
 youtube_dl/extractor/kankids.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/kankids.py b/youtube_dl/extractor/kankids.py
index 1bcc1d025c6..397da5517b3 100644
--- a/youtube_dl/extractor/kankids.py
+++ b/youtube_dl/extractor/kankids.py
@@ -10,7 +10,7 @@
 
 class KanKidsIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?' +\
-        DOMAIN.replace('.', '\.') + CONTENT_DIR +\
+        DOMAIN.replace('.', '\\.') + CONTENT_DIR +\
         r'(?P<category>[a-z]+)-main/(?P<id>[\w\-0-9]+)/(?P<season>\w+)?/?$'
     _TESTS = [
         {
@@ -51,11 +51,11 @@ def _real_extract(self, url):
         season = playlist_season if playlist_season else r'(?P<season>\w+)'
         content_dir = CONTENT_DIR + category + r'-main/'
         playlist = set(re.findall(
-            r'href="' + content_dir +       # Content dir
-            series_id + r'/' +              # Series
-            season + r'/' +                 # Season
-            r'(?P<id>[0-9]+)/"' +           # Episode
-            r'.+title="(?P<title>.+)"',     # Title
+            r'href="' + content_dir         # Content dir
+            + series_id + r'/'              # Series
+            + season + r'/'                 # Season
+            + r'(?P<id>[0-9]+)/"'           # Episode
+            + r'.+title="(?P<title>.+)"',   # Title
             webpage))
 
         entries = []
@@ -74,4 +74,3 @@ def _real_extract(self, url):
             'title': series_title,
             'entries': entries,
         }
-