-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathproject_functions.py
3073 lines (2621 loc) · 128 KB
/
project_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
"""
#
# Project by j54j6
# This program is used to make a private copy of youtube videos and potentially other websites
# supported by youtubedl
# Furthermore it supports automatic periodic checking of channels and auto downloading.
# It also checks if the downlaoded video already exists in the specified storage space and
# checks integrity of videos and redownload them if needed
#
#
# This file contains the "Project specific funtions" this means that all functions I cannot
# reuse in other projects
# like controls or checks are located inside this file.
#
"""
#Python modules
import os
import logging
import json
import pathlib
import hashlib
import re
from datetime import datetime
import urllib.parse as urlparse
import requests
import tldextract
import validators
import pytz
from prettytable import PrettyTable
from yt_dlp import YoutubeDL, DownloadError
#own modules
from database_manager import (check_table_exist, create_table, update_value,
insert_value, fetch_value, fetch_value_as_bool, delete_value, check_scheme_match)
from config_handler import config
# init logger
logger = logging.getLogger(__name__)
#Define buffer per Thread in Bytes for filehashing - Default 2GB = 2147483648
#If you have problems adding files decrease the value!
BUF_SIZE = 4096
################# MAIN
def start():
"""This function iterates over all subscriptions and checks for new content and downloads it
Return Values:
- True: Success (All subscriptions updated and files downloaded)
- False: Failed (There was an error either during update or download phase)
"""
logger.info("Checking all subscriptions for updates")
updated = update_subscriptions()
if not updated:
logger.error("Error while updating subscriptions!")
return False
return download_missing()
################# Subscription related
def add_subscription(url:str, downloaded:int = None, last_checked = None, meta_data = None, output_format:list[str] = None):
""" Add a subscription to the database
Return Values:
- True: Success (Subscription successfully added to db)
- False: Failed (Error while adding subscription. Most likly Ytdlp or SQLite error)
"""
#Lazy check if the entry already exist in db before downloading metadata and doing stuff...
#Check if subscription already exist
subscription_exist = fetch_value("subscriptions",
[
{"subscription_path": url},
{"passed_subscription_path": url}
], ["id", "scheme", "subscription_name"], True)
if subscription_exist is not None:
logger.info("%s subscription for %s already exists!",
subscription_exist[1], subscription_exist[2])
return True
#Check if the defined outputformat exists (if it is not none)
if output_format is not None:
for desired_format in output_format:
if not check_format_profile_exist(desired_format):
logger.error("The specified format %s is not a valid format profile!", desired_format)
return False
subscription_obj = get_subscription_data_obj(url, downloaded, last_checked, meta_data, output_format)
if not subscription_obj["status"]:
logger.error("Error while creating subscription obj!")
return False
#Check if the formatted link is already in db - This is url should every time the same
if subscription_obj["exist_in_db"]:
logger.info("%s subscription for %s already exists!", subscription_exist[1],
subscription_exist[2])
return True
added_subscr = insert_value("subscriptions", subscription_obj["obj"])
if not added_subscr:
logger.error("Error while inserting subscription for %s into db! - Check log",
subscription_obj["obj"]["subscription_name"])
return False
logger.info("Subscription for %s successfully created.",
subscription_obj["obj"]["subscription_name"])
return True
def add_subscription_batch(file:str, output_format:list[str] = None):
""" Add a subscription to the database using a file
Return Values:
- True: Success (Subscription successfully added to db)
- False: Failed (Error while adding subscription. Most likly Ytdlp or SQLite error)
"""
file = os.path.abspath(file)
if not os.path.isfile(file):
logger.error("File %s doesn't exist!", file)
return False
failed = False
with open(file, 'r', encoding="UTF-8") as input_file:
for line in input_file:
line = line.strip()
if not add_subscription(line, output_format):
failed = True
if not failed:
logger.info("All subscriptions successfully added")
return True
logger.error("Error while adding subscription batch!")
return False
def del_subscription(identifier:str):
""" This function removes a passed subscription from the database
(This function does NOT remove the files!)
Return Values:
- True: Success (Subscription successfully deleted from db)
- False: Failed (Error while removing subscription from db - Most likly SQL Error)
"""
if validators.url(identifier):
#Remove with url as ident
subscription_exist_1 = fetch_value("subscriptions", {"subscription_path": identifier},
["id"], True)
subscription_exist_2 = fetch_value("subscriptions",
{"passed_subscription_path": identifier},
["id"], True)
if subscription_exist_1 is None and subscription_exist_2 is None:
logger.info("Subscription does not exist!")
return True
subscription_deleted = delete_value("subscriptions", [
{"subscription_path": identifier},
{"passed_subscription_path": identifier}])
else:
subscription_exist = fetch_value("subscriptions",
{"subscription_name": identifier},
["id"], True)
if subscription_exist is None:
logger.info("Subscription does not exist!")
return True
subscription_deleted = delete_value("subscriptions", {"subscription_name": identifier})
if not subscription_deleted:
logger.error("Error while removing subscription!")
return False
logger.info("Subscription removed.")
return True
def list_subscriptions(scheme_filter:list=None):
"""This function list all subscriptions with prettyTables
Return Values:
- True: Success (Subscription Table was printed to CLI)
- False: Failed (Failed to fetch all data needed to build table. Most likly SQL Error)
"""
if scheme_filter is None:
logger.debug("List all subscriptions")
#List all subscriptions
subscriptions = fetch_value("subscriptions",
None,
[
"id",
"subscription_name",
"scheme",
"subscription_content_count",
"downloaded_content_count",
"subscription_last_checked",
"subscription_path",
"output_format"
], extra_sql="ORDER BY scheme")
else:
logger.debug("List subscriptions with Filter")
conditions = []
for condition in scheme_filter:
conditions.append({"scheme": condition})
subscriptions = fetch_value("subscriptions",
conditions,
[
"id",
"subscription_name",
"scheme",
"subscription_content_count",
"downloaded_content_count",
"subscription_last_checked",
"subscription_path",
"output_format"
], extra_sql="ORDER BY scheme")
if subscriptions is None:
logger.error("Error while fetching DB data!")
return False
subscriptions_table = PrettyTable(
['ID', 'Name', 'Scheme', 'Avail. Videos', 'Downloaded Videos', 'Last checked', 'url', 'format'])
subscriptions_table.align['ID'] = "c"
subscriptions_table.align['Name'] = "l"
subscriptions_table.align['Scheme'] = "l"
subscriptions_table.align['Avail. Videos'] = "c"
subscriptions_table.align['Downloaded Videos'] = "c"
subscriptions_table.align['Last checked'] = "c"
subscriptions_table.align['url'] = "l"
subscriptions_table.align['format'] = "c"
video_is = 0
video_should = 0
for index, subscription in enumerate(subscriptions):
video_is = video_is + int(subscription[4])
video_should = video_should + int(subscription[3])
enable_divider = False
if index < len(subscriptions)-1:
if subscription[2] != subscriptions[index+1][2]:
enable_divider = True
else:
logger.debug("%s == %s", subscription[2] , subscriptions[index][2])
if index == len(subscriptions)-1:
enable_divider = True
if enable_divider:
logger.debug("For ID %s no divider needed!", subscription[0])
if subscription[7] is None:
output_format = "global"
else:
output_format = subscription[7]
subscriptions_table.add_row([
subscription[0],
subscription[1],
subscription[2],
subscription[3],
subscription[4],
subscription[5],
subscription[6],
output_format],
divider=True)
else:
logger.debug("For ID %s no divider needed!", subscription[0])
if subscription[7] is None:
output_format = "global"
else:
output_format = subscription[7]
subscriptions_table.add_row([
subscription[0],
subscription[1],
subscription[2],
subscription[3],
subscription[4],
subscription[5],
subscription[6],
output_format],
divider=False)
enable_divider = False
subscriptions_table.add_row(["Total: ",len(subscriptions),'',video_should,video_is,'','', ''])
print(subscriptions_table)
return True
def update_subscriptions():
""" This function iterates over all subscriptions and update them.
It will NOT download any files!
Return Values:
- True: Success (All subscriptions updated)
- False: Failed (There was an error during updating the db. Most likly YT DLP or SQL Error)
"""
subscriptions = fetch_value("subscriptions",
None,
[
"scheme",
"subscription_name",
"subscription_path",
"subscription_last_checked",
"downloaded_content_count",
"subscription_content_count",
"id",
"current_subscription_data"
],
False,
"ORDER BY scheme")
if not subscriptions:
logger.error("Error while fetching subscription data! - Please check log.")
return False
error_during_process = False
faulty_subscriptions = []
faulty_messages = []
current_time = get_current_time()
if current_time == -1:
#Time cant be fetched! - This will have effect on all subscriptions - abort...
return False
#Iterate over all subscriptions
for subscription in subscriptions:
#Fetch the current object of the subscription
current_obj = get_subscription_data_obj(subscription[2])
if not current_obj["status"]:
logger.error("Error while fetching actual metadata for subscription %s",
subscription[1])
#Check if subscription needs to be checked
check_interval = fetch_value("config",
{"option_name": "subscription_check_delay"},
["option_value"], True)
if not check_interval:
logger.error("Error while fetching check interval value! - Continue")
else:
check_interval = check_interval[0]
last_checked = subscription[3]
current_time = get_current_time()
time_since_last_check = datetime.strptime(current_time, "%Y-%m-%d %H:%M:%S") - datetime.strptime(last_checked, "%Y-%m-%d %H:%M:%S")
hours_since_last_check = time_since_last_check.seconds/3600
if hours_since_last_check < int(check_interval):
logger.info("Subscription %s was checked %s hours ago. Skip",
subscription[1], str(round(hours_since_last_check, 2)))
continue
#Check for number of items
if (current_obj["obj"]["subscription_content_count"] == subscription[5] and
not subscription[4] != current_obj["obj"]["subscription_content_count"]):
#No update avail - Modify subscription data and continue
table_updates = update_value(
"subscriptions",
{
"subscription_last_checked": current_time,
"last_subscription_data": subscription[7],
"current_subscription_data": current_obj["obj"]["current_subscription_data"],
"subscription_has_new_data": "0"
},
{"id": subscription[6]}
)
elif current_obj["obj"]["subscription_content_count"] < subscription[5]:
#Less avail than before - just send a message...
faulty_subscriptions.append(subscription[1])
faulty_messages.append("""Number of items is less than the last check! -
Last time: %s, This time: %s""",
subscription[5],
current_obj["obj"]["subscription_content_count"])
#Update table
table_updates = update_value(
"subscriptions",
{
"subscription_last_checked": current_time,
"last_subscription_data": subscription[7],
"current_subscription_data": current_obj["obj"]["current_subscription_data"],
"subscription_has_new_data": "0",
"subscription_content_count": current_obj["obj"]["subscription_content_count"]
},
{"id": subscription[6]}
)
else:
#Updates avail
logger.info("New content for %s availiable", current_obj["obj"]["subscription_name"])
#Update table
table_updates = update_value(
"subscriptions",
{
"subscription_last_checked": current_time,
"last_subscription_data": subscription[7],
"current_subscription_data": current_obj["obj"]["current_subscription_data"],
"subscription_has_new_data": "1",
"subscription_content_count": current_obj["obj"]["subscription_content_count"]
},
{"id": subscription[6]}
)
if not table_updates:
logger.error("Error while updating table!")
faulty_subscriptions.append(subscription[1])
faulty_messages.append("Error while updating subscription!")
error_during_process = True
continue
logger.info("Subscription %s successfully updated", subscription[1])
if len(faulty_subscriptions) > 0:
for index, subscription in enumerate(faulty_subscriptions):
logger.warning("Subscription %s exited with an error! - Message: %s",
subscription, faulty_messages[index])
if error_during_process:
logger.error("Please check messages abve!")
return False
logger.info("All subscriptions updated!")
return True
def export_subscriptions():
"This functions exports all subscriptions saved in the db"
subscriptions = fetch_value("subscriptions", None, ["subscription_path",
"subscription_last_checked",
"downloaded_content_count",
"last_subscription_data",
"subscription_name",
"output_format"])
if subscriptions is None:
logging.error("Error while fetching subscriptions")
exported_subscriptions:list = []
for subscription in subscriptions:
subscription_obj = {
"subscription_path": subscription[0],
"subscription_last_checked": subscription[1],
"downloaded_content_count": subscription[2],
"last_subscription_data": subscription[3],
"subscription_name": subscription[4],
"output_format": subscription[5]
}
exported_subscriptions.append(subscription_obj)
base_path = fetch_value("config", {"option_name": "base_location"}, ["option_value"], True)
if not base_path or not isinstance(base_path, tuple):
logging.error("Error while fetching base path from config! - Use default (Partent directory)")
base_path = "./"
else:
base_path = base_path[0]
logging.info("Exported %i subscriptions. Create file at %s", len(exported_subscriptions), os.path.abspath(base_path))
#Insert list into file.
try:
with open(os.path.join(os.path.abspath(base_path),
"subscriptions_export.json"),
encoding="UTF-8",
mode="w+") as subscription_file:
subscriptions_as_json = json.dumps(exported_subscriptions)
subscription_file.write(subscriptions_as_json)
logging.info("Subscriptions exported")
except FileNotFoundError as e:
logging.error("Error while creating file! - Error: %s", e)
return False
except json.JSONDecodeError as e:
logging.error("Error while parsing JSON array with subscriptions! - Error: %s", e)
return False
return True
def import_subscriptions(path="./", delelte_current_subscriptions=False):
""" This function imports subscriptions based on a json file (generated by export_subscriptions function)"""
if delelte_current_subscriptions:
logger.info("Current subscriptions will be deleted before import!")
if(delete_value("subscriptions", None, True)):
logger.info("Subscriptions removed!")
else:
logger.error("Error while removing old subscriptions! - Abort")
return False
logging.info("Import subscriptions from %s", path)
if not os.path.exists(path):
logging.error("The given path does not exist!")
return False
try:
with open(os.path.abspath(path), encoding="UTF-8", mode="r") as f:
subscriptions = f.read()
subscriptions = json.loads(subscriptions)
#Iterate over json array
error_raised = False
failed_imports = []
for subscription in subscriptions:
try:
if subscription["output_format"] is not None and len(subscription["output_format"]) > 0:
format_list = json.loads(subscription["output_format"])
else:
format_list = None
except json.JSONDecodeError:
logger.error("Error while inserting output format for subscription! - Use NONE!")
format_list = None
success = add_subscription(subscription["subscription_path"],
subscription["downloaded_content_count"],
subscription["subscription_last_checked"],
subscription["last_subscription_data"],
format_list)
if not success:
error_raised = True
failed_imports.append(subscription["subscription_name"])
if error_raised:
for failed_import in failed_imports:
logging.error("Error while importing %s to db!", failed_import)
return False
logging.info("All subscriptions successfully imported!")
return True
except FileNotFoundError as e:
logging.error("Subscriptiuon file not found! - Error: %s", e)
return False
except json.JSONDecodeError as e:
logging.error("Error while loading JSON File! - Error: %s", e)
return False
return True
### Subscription helper
def create_subscription_url(url:str, scheme:json):
""" This function creates the subscription url which will used to subscribe to a channel or
anything else
Return Value:dict
{
"status": False, -> Operation success or failed (probe this to check if
operation successfull)
"subscribable": True, -> If status is false, it is possible that there was a
video passed. If true -> Wrong url passed
"scheme": None, -> Which scheme was used to create the url
"tld": None, -> tld of the url (e.g. .com)
"sld": None, -> sld of the url (e.g. reddit)
"subd": None, -> subdomain of the url (e.g. www.) van be empty!
"subscription_name": None, -> The subscription name - most likly playlist name /
channel name
"category_avail": False, -> Are there categories availiable
"category": None, -> If categories availiable -> Which category is the url
"subscription_url": None, -> passed url from parameters
"formed_subscription_url": None -> function created subscription url to grttant
uniform data
}
"""
logger.debug("Create subscription url for url %s", url)
return_val = {
"status": False,
"subscribable": True,
"scheme": None,
"tld": None,
"sld": None,
"subd": None,
"subscription_name": None,
"category_avail": False,
"category": None,
"subscription_url": None,
"formed_subscription_url": None
}
if(not "subscription" in scheme or
not "available" in scheme["subscription"] or
scheme["subscription"]["available"] is not True or
not "url_blueprint" in scheme["subscription"]):
if("subscription" in scheme and
"available" in scheme["subscription"] and
scheme["subscription"]["available"] is not True):
logger.info("Scheme %s does not support subscriptions!", scheme["schema_name"])
return_val["subscribable"] = False
return return_val
logger.error("Scheme does not contain a subscription key or url blueprint!")
return return_val
#Check which parts are needed
url_blueprint:str = scheme["subscription"]["url_blueprint"]
blueprint_data = re.findall(r'{\w*}',url_blueprint)
tld_url_parts = tldextract.extract(url)
parsed_url_parts = urlparse.urlparse(url)
if parsed_url_parts.scheme is not None:
logger.debug("Key scheme is in parsed urls")
if "{scheme}" in blueprint_data:
logger.debug("Key scheme is in the blueprint subscription link. Add it...")
return_val["scheme"] = parsed_url_parts.scheme
if tld_url_parts.subdomain is not None:
logger.debug("Key subdomain is in parsed urls")
if "{subd}" in blueprint_data:
logger.debug("Key subd is in the blueprint subscription link. Add it...")
return_val["subd"] = tld_url_parts.subdomain
if tld_url_parts.domain is not None:
logger.debug("Key domain is in parsed urls")
if "{sld}" in blueprint_data:
logger.debug("Key sld is in the blueprint subscription link. Add it...")
return_val["sld"] = tld_url_parts.domain
if tld_url_parts.suffix is not None:
logger.debug("Key suffix is in parsed urls")
if "{tld}" in blueprint_data:
logger.debug("Key tld is in the blueprint subscription link. Add it...")
return_val["tld"] = tld_url_parts.suffix
#Check if the scheme supports categories
if("categories" in scheme and
"available" in scheme["categories"] and
scheme["categories"]["available"] is True):
logger.debug("Categories are available. Fetch data...")
#extract the category from the url
category = fetch_category_name(url, scheme)
#Check if a category was fetched
if category is not None:
logger.debug("Category found")
if "{category}" in blueprint_data:
logger.debug("Key category is in the blueprint subscription link. Add it...")
return_val["category_avail"] = True
return_val["category"] = category
subscription_name = fetch_subscription_name(url, scheme)
if not subscription_name:
logger.error("Can't fetch subscription name! - Maybe you cannot subscribe to the url?")
return return_val
if "{subscription_name}" in blueprint_data:
logger.debug("Key subscription_name is in the blueprint subscription link. Add it...")
return_val["subscription_name"] = subscription_name
if "{subscription_url}" in blueprint_data:
logger.debug("Key subscription_url is in the blueprint subscription link. Add it...")
if(return_val["category_avail"] and
category in scheme["categories"]["categories"] and
"subscription_url" in scheme["categories"]["categories"][category]):
if(scheme["categories"]["categories"][category]["subscription_url"] is not False and
scheme["categories"]["categories"][category]["subscription_url"] is not None and
scheme["categories"]["categories"][category]["subscription_url"] != ""):
logger.debug("Subscription url added...")
return_val["subscription_url"] = scheme["categories"]["categories"][category]["subscription_url"]
logger.debug("All url data prepared. Create Link")
supported_keys = ["scheme", "subd", "sld", "tld", "category",
"subscription_name", "subscription_url"]
for part in supported_keys:
if return_val[part] is not None and return_val[part] is not False:
url_blueprint = url_blueprint.replace(f"{{{part}}}", return_val[part])
else:
url_blueprint = url_blueprint.replace(f"/{{{part}}}", "")
if url_blueprint.find("{") != -1 or url_blueprint.find("}") != -1:
logger.error("""Error while creating correct subscription url! -
Not all placeholders were replaced! - Url: %s""", url_blueprint)
return return_val
logger.debug("Url successfully created")
return_val["formed_subscription_url"] = url_blueprint
return_val["status"] = True
return return_val
def get_subscription_data_obj(url:str, downloaded = None, last_checked=None, last_metadata=None, output_format=None):
""" Returns a dict containing all information about a subscription (db obj) and
also if the url already exist in db
Return Value: dict
{
"status": False, -> Operation successfull? - Use this as probe!
"exist_in_db": False, -> Does the subscription already exist?
"obj": { -> The subscription object. This can directly passed to the SQL Engine
"scheme": None, -> Which scheme is used
"subscription_name": None, -> friendly name - most likly playlist/channel name
"subscription_path": None, -> function created url to the website
"passed_subscription_path": url, -> url that was passed by the user to add the
subscription. (not necessarily the same)
"subscription_content_count": None, -> How many entries have the channel/playlist
"current_subscription_data": None, -> Current metadata object
"last_subscription_data": None -> Last metadata object
(only used for stats if you want...)
}
}
"""
subscription_entry:dict = {
"status": False,
"exist_in_db": False
}
subscription_entry["obj"] = {
"scheme": None,
"subscription_name": None,
"subscription_path": None,
"passed_subscription_path": url,
"subscription_content_count": None,
"current_subscription_data": None,
"last_subscription_data": None,
"downloaded_content_count": None,
"output_format": output_format
}
data = prepare_scheme_dst_data(url)
if data["status"] is False or data["scheme"] is None:
logger.error("The provided url is not supported!")
return subscription_entry
logger.debug("Used scheme for url is: %s", data["scheme"])
subscription_data = create_subscription_url(url, data["scheme"])
if not subscription_data["status"]:
if subscription_data["subscribable"] is False:
schema_name = data["scheme"]["scheme_name"]
logger.info("Can't add subscription - Scheme %s does not support subscriptions",
schema_name)
return subscription_entry
logger.error("Error while fetching subscription data!")
return subscription_entry
metadata = get_metadata(subscription_data["formed_subscription_url"],
get_ydl_opts(data["dst_path"],
{'quiet': False, 'extract_flat': 'in_playlist'}))
if not metadata:
logger.error("Error while fetching metadata for subscription! - Please check the log.")
return subscription_entry
if("playlist_count" not in metadata or
"entries" not in metadata or
"_type" not in metadata):
logger.error("Fetched metadata does not contain all information needed! - Data: %s",
metadata)
return subscription_entry
obj = {}
obj["scheme"] = data["scheme"]["schema_name"]
obj["passed_subscription_path"] = url
obj["subscription_name"] = subscription_data["subscription_name"]
obj["subscription_path"] = subscription_data["formed_subscription_url"]
obj["subscription_content_count"] = metadata["playlist_count"]
obj["current_subscription_data"] = metadata
obj["output_format"] = output_format
if downloaded is not None and downloaded > 0:
obj["downloaded_content_count"] = downloaded
if last_checked is not None:
obj["subscription_last_checked"] = last_checked
if last_metadata is not None:
obj["last_subscription_data"] = last_metadata
subscription_entry["obj"] = obj
entry_in_db = fetch_value("subscriptions",
{"subscription_path": subscription_data["formed_subscription_url"]},
["id", "scheme", "subscription_name"], True)
if entry_in_db is None:
subscription_entry["exist_in_db"] = False
else:
subscription_entry["exist_in_db"] = True
subscription_entry["status"] = True
return subscription_entry
def fetch_subscription_name(url:str, scheme:json):
""" This function is a helper to extract the "target name" of your subscription.
Most likly it is the channel name or playlist name.
The scheme is used to extract the name from the url.
It is possible that this function will be changed in the future.
Some sites only have numeric values for the playlists/channels in the url.
In this case YT DLP need to fetch it. But for now
it works without it...
Return Value:str
- None -> Nothing found (empty)
- Subscription name string e.g j54j6
"""
logger.debug("Fetch subscription name for url %s", url)
if "subscription" in scheme:
subscription_path = scheme["subscription"]["subscription_name_locator"]
else:
#This should not be used!
subscription_path = 2
try:
parsed_url = urlparse.urlparse(url)
subscription_name = parsed_url.path.split('/')[subscription_path]
except IndexError:
logger.error("No subscription name found!")
return None
return subscription_name
################# Download functions
def direct_download_batch(file:str, output_format:list[str] = None):
""" This function represents the "manual" video download approach but using a batch file
You can pass an url and the file will be downlaoded, hashed and registered.
The parameter "own_file_data" is from prepare_scheme_dst_data()!
Return Values:bool
- True (Successfully downloaded file and registered it in db)
- False (Failed - either during download or registration / hashing)
"""
file = os.path.abspath(file)
if not os.path.isfile(file):
logger.error("File %s doesn't exist!", file)
return False
failed = False
with open(file, 'r', encoding="UTF-8") as input_file:
for line in input_file:
line = line.strip()
if not direct_download(line, output_format):
failed = True
if not failed:
logger.info("All files successfully downloaded")
return True
logger.error("Error while downloading batch!")
return False
#This function is called from CLI
def direct_download(url:str, own_file_data:dict=None, output_format:list[str] = None):
""" This function represents the "manual" video download approach
You can pass an url and the file will be downlaoded, hashed and registered.
The parameter "own_file_data" is from prepare_scheme_dst_data()!
Return Values:bool
- True (Successfully downloaded file and registered it in db)
- False (Failed - either during download or registration / hashing)
"""
#Line Break for Pylint #C0301
logger.info("""Directly download content from %s -
Check prerequisites and prepare download data""", url)
if output_format is not None:
logging.debug("Try to use passed format %s", output_format)
if not own_file_data:
prepared_data = prepare_scheme_dst_data(url)
else:
prepared_data = own_file_data
if prepared_data["status"] != 1:
logger.error("Error while preparing download! - Check log.")
return False
path = prepared_data["dst_path"]
logger.info("File will be saved under: %s", path)
downloaded = download_file(url=url, path=path, output_format=output_format)
if not downloaded["status"]:
logger.error("Error while downloading file from %s - Please check log!", url)
return False
full_file_path = downloaded["full_file_path"]
metadata = downloaded["metadata"]
logger.debug("Full File path is: %s", full_file_path)
#Compute hash from file
file_hash = create_hash_from_file(full_file_path)
#Check if hash created successfully
if not file_hash["status"] or file_hash["hash"] is None:
logger.error("Error while creating hash from file! - Please check log. - Results: %s, %s",
file_hash["status"], file_hash["hash"])
error_post_processing(full_file_path)
return False
#Check if hash is already in database
#If hash is not in db -> Video is new -
#If hash is in db video already exist. Check if the url is the same
hash_exist = fetch_value("items", {"file_hash": file_hash["hash"]}, None, True)
if not hash_exist:
video_registered = save_file_to_db(prepared_data,
full_file_path,
file_hash["hash"],
{"url": [url]},
metadata)
if video_registered:
logger.info("File successfully downlaoded.")
return True
logger.error("Error while register Video to db!")
error_post_processing(full_file_path)
return False
return True
#This function will actually download a file...
def download_file(url, path, metadata=None, ignore_existing_url=False, output_format:list[str] = None):
"""This function downloads the file specified in url and also provides the prepared
file path from ydl
if the metadata parameter is None they will be fetched
Return Value:dict
{
"status": False, - Operation successfull? - Use it as probe!
"full_file_path": None, - The full file path to the file (absolute path)
including the filename
"filename": None,
"metadata": None - Metadata from the file
}
"""
return_val = {"status": False, "full_file_path": None, "filename": None, "metadata": None}
metadata = get_metadata(url, get_ydl_opts(path, None, output_format))
if metadata is None:
logging.error("Error while fetching metadata to check if video already exists in db! - Continue without checking")
return return_val
full_file_path = YoutubeDL(get_ydl_opts(path, None, output_format)).prepare_filename(metadata,
outtmpl=path +
'/%(title)s.%(ext)s')
filename = os.path.basename(full_file_path).split(os.path.sep)[-1]
return_val["full_file_path"] = full_file_path
return_val["filename"] = filename
if not ignore_existing_url:
#Check if video (path) is in db
logger.debug("Check if file already exists in db")
file_in_db = fetch_value("items", {"file_path": path, "file_name": filename}, ["file_path"], True)
if file_in_db is not None:
logging.info("Video already exists in DB! - check if url exist")
url_is_in_db = check_is_url_in_items_db(url, filename, path)
if not url_is_in_db["status"]:
logger.error("Error while checking if url is in db!")
#Since the file already exist there is no really need to download the file again. The url add is only a double check.
# So we will return true
return_val["status"] = True
return return_val
if not url_is_in_db["url_exist"]:
logger.debug("File is already in DB (name match) but url is not the same. Add url to entry!")
url_added = add_url_to_item_is_db(url_is_in_db["id"], url)
if not url_added:
logger.error("Error while adding url to file in DB!")
return_val["status"] = True
return return_val
return_val["status"] = True
return return_val
logger.info("File %s dont exist in DB", full_file_path)
logger.info("Downloading file from server")
try:
ydl_opts = get_ydl_opts(path, None, output_format)
#Fetch metadata if not passed
if metadata is None:
metadata = get_metadata(url, ydl_opts)
if not metadata or "title" not in metadata or "ext" not in metadata:
#Line Break for Pylint #C0301
logger.error("""Error while fetching metadata from target server! -
Metadata could not be fetched or key \"title\" / \"ext\" is missing""")
return return_val
with YoutubeDL(ydl_opts) as ydl:
value = ydl.download([url])
#https://github.com/yt-dlp/yt-dlp/issues/4262
if value == 0 or value == 1 or value == 100:
#Line Break for Pylint #C0301
full_file_path = YoutubeDL(ydl_opts).prepare_filename(metadata,
outtmpl=path +
'/%(title)s.%(ext)s')