-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexperimentyt.py
60 lines (43 loc) · 1.83 KB
/
experimentyt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import whisper
from pytube import YouTube
from transformers import pipeline
import os
from typing import List
import logging
import time
from tqdm import tqdm
def download_audio_from_youtube(url: str, video_name: str) -> str:
video_url= YouTube(url)
video = video_url.streams.filter(only_audio=True).first()
filename = video_name + ".mp3"
video.download(filename=filename)
return filename
def load_whisper_model(model_name: str = "medium"):
return whisper.load_model(model_name)
def transcribe_audio_to_text(model, audio_path: str, language: str = "English"):
return model.transcribe(audio_path, fp16=False, language=language)
def save_text_to_file(text: str, file_name: str):
try:
with open(file_name, "w+") as file:
file.write(text)
except (IOError, OSError, FileNotFoundError, PermissionError) as e:
logging.debug(f"Error in file operation: {e}")
def get_text(url: str, video_name: str) -> None:
model = load_whisper_model()
audio_path = download_audio_from_youtube(url, video_name)
with tqdm(total=100, desc="Loading Model", unit="%", ascii=False) as pbar:
for _ in range(100):
time.sleep(0.01) # Simulate loading time
pbar.update(1)
# Show a progress bar for downloading audio
with tqdm(total=100, desc="Downloading Audio", unit="%", ascii=False) as pbar:
for _ in range(100):
time.sleep(0.01) # Simulate download time
pbar.update(1)
result = transcribe_audio_to_text(model, audio_path)
with tqdm(total=100, desc="Saving Text", unit="%", ascii=False) as pbar:
for _ in range(100):
time.sleep(0.01) # Simulate saving time
pbar.update(1)
save_text_to_file(result["text"], video_name + ".txt")
print("Text extraction completed.")