-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun.py
213 lines (171 loc) · 8.08 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import ffmpeg
import whisper
import os
import subprocess
import argparse
def extract_audio(video_file, audio_file):
try:
# Extract the audio from the video file
ffmpeg.input(video_file).output(audio_file).run(overwrite_output=True)
print(f"Audio extracted to {audio_file}")
except ffmpeg.Error as e:
print(f"Error extracting audio: {e.stderr.decode()}")
def generate_transcript(audio_file, transcript_file):
try:
# Load the Whisper model
model = whisper.load_model("base") # You can use "small", "medium", "large" for more accuracy
result = model.transcribe(audio_file)
# Write transcript to a .txt file
with open(transcript_file, 'w', encoding='utf-8') as f:
for segment in result['segments']:
f.write(f"{segment['text']}\n") # Write only the text, no timestamps
print(f"Transcript saved to {transcript_file}")
except Exception as e:
print(f"Error generating transcript: {str(e)}")
# Main function 1
def generate_video_transcript(video_file, transcript_file):
audio_file = "extracted_audio.wav"
# Extract audio from the video
extract_audio(video_file, audio_file)
# Generate transcript using Whisper
generate_transcript(audio_file, transcript_file)
# Clean up: Remove the extracted audio file if you don't need it
if os.path.exists(audio_file):
os.remove(audio_file)
# Function to convert seconds to SRT time format
def seconds_to_srt_time(seconds):
h = int(seconds // 3600)
m = int((seconds % 3600) // 60)
s = int(seconds % 60)
ms = int((seconds - int(seconds)) * 1000)
return f"{h:02}:{m:02}:{s:02},{ms:03}"
# Generate SRT file from Whisper transcription
def generate_srt(transcription, output_srt_file):
with open(output_srt_file, 'w', encoding='utf-8') as f:
for i, segment in enumerate(transcription['segments'], start=1):
start_time = seconds_to_srt_time(segment['start'])
end_time = seconds_to_srt_time(segment['end'])
text = segment['text'].strip()
f.write(f"{i}\n")
f.write(f"{start_time} --> {end_time}\n")
f.write(f"{text}\n\n")
# main function 2
def add_caption(i_video_file, o_video_file=None, lang=None, srt_file=None, txt_file=None):
audio_file = "extracted_audio.wav"
if o_video_file is None:
o_video_file = i_video_file[0:-4] + '_captions.mp4'
if srt_file is None:
srt_file = i_video_file[0:-4] + '.srt'
# Step 1: Extract audio from the video
print(f"generating audio file ...")
extract_audio(i_video_file, audio_file)
# Load and run Whisper to get transcription with timestamps
model = whisper.load_model("base")
if lang is None:
result = model.transcribe(audio_file)
else:
result = model.transcribe(audio_file, language=lang)
# Create subtitles.srt file
# srt_file = "captions.srt"
print("generating srt file {srt_file} ...")
generate_srt(result, srt_file)
generate_srt(result, txt_file)
# Add subtitles to the video using ffmpeg
print("generating output file {o_video_file} ...")
subprocess.run(['ffmpeg', '-i', i_video_file, '-vf', f"subtitles={srt_file}", '-c:a', 'copy', o_video_file, '-y'])
# Clean up: Remove the extracted audio file if you don't need it
if os.path.exists(audio_file):
os.remove(audio_file)
def add_caption_from_srtfile(i_video_file, o_video_file=None, srt_file=None):
if o_video_file is None:
o_video_file = i_video_file[0:-4] + '_captions.mp4'
if srt_file is None:
srt_file = i_video_file[0:-4] + '.srt'
# Add subtitles to the video using ffmpeg
print("generating output file {o_video_file} ...")
subprocess.run(['ffmpeg', '-i', i_video_file, '-vf', f"subtitles={srt_file}", '-c:a', 'copy', o_video_file, '-y'])
def print_usage():
print("command options:")
print("")
print("command 1:")
print(f"python run.py 1 [video file name]")
print(f"for example:")
print(f"python run.py 1 ling_1.mp4")
print("")
print("command 2:")
print(f"python run.py 2 [video file name] --txt_file [translated_subtitle file name]")
print(f"if [translated_subtitle file name] is the same as [video file name] (ex: video file name is ling_1.mp4, translated_subtitle file name is ling_1.txt): ")
print(f"python run.py 2 ling_1.mp4")
print(f"or you can also use --txt_file to indicate the translated_subtitle file you want to ")
print(f"python run.py 2 ling_1.mp4 --txt_file ling_1.txt")
print("")
print("command 3:")
print(f"python run.py 3 [video file name] --srt_file [translated_subtitle file name]")
print(f"if [translated_subtitle file name] is the same as [video file name] (ex: video file name is ling_1.mp4, translated_subtitle file name is ling_1.srt): ")
print(f"python run.py 3 ling_1.mp4")
print(f"or you can also use --srt_file to indicate the translated_subtitle file you want to ")
print(f"python run.py 3 ling_1.mp4 --srt_file ling_1.srt")
def main():
# Create the ArgumentParser object
parser = argparse.ArgumentParser(description="arguments for translating software")
# Add a required integer argument
parser.add_argument('step', type=int, help="run command: 1, 2, or 3")
parser.add_argument('i_video_file', type=str, help="original file name")
# Add optional arguments with default values
parser.add_argument('--txt_file', type=str, default=None, help="subtitle file (.txt) name")
parser.add_argument('--srt_file', type=str, default=None, help="subtitle file (.srt) name")
# Parse the arguments
args = parser.parse_args()
# Access the arguments
step = args.step
i_video_file = args.i_video_file
if not os.path.exists(i_video_file):
print(f"找不到原始视频文件{i_video_file}; 指定正确路径")
quit()
if step == 1:
o_video_file = i_video_file[0:-4] + '_src_captions.mp4'
txt_file = i_video_file[0:-4] + '.txt'
srt_file = i_video_file[0:-4] + '.srt'
add_caption(i_video_file, o_video_file=o_video_file, lang='ko', srt_file=srt_file, txt_file=txt_file)
print(f"\n\n\n\nProcess complete! check whether {o_video_file} and {txt_file} were generated")
elif step == 2:
# txt file as caption
o_video_file = i_video_file[0:-4] + '_dst_captions.mp4'
if args.txt_file is None:
txt_file = i_video_file[0:-4] + '.txt'
else:
txt_file = args.txt_file
if not os.path.exists(txt_file):
print(f"cannot locate subtitle file {txt_file}; please make sure it is in the correct directory")
quit()
add_caption_from_srtfile(i_video_file, o_video_file=o_video_file, srt_file=txt_file)
print(f"\n\n\n\nProcess complete! Check whether {o_video_file} was generated")
elif step == 3:
# srt file as caption
o_video_file = i_video_file[0:-4] + '_dst_captions.mp4'
if args.srt_file is None:
srt_file = i_video_file[0:-4] + '.srt'
else:
srt_file = args.srt_file
if not os.path.exists(srt_file):
print(f"cannot locate subtitle file {srt_file}; please make sure it is in the correct directory")
quit()
add_caption_from_srtfile(i_video_file, o_video_file=o_video_file, srt_file=srt_file)
print(f"\n\n\n\nProcess complete! Check whether {o_video_file} was generated")
else:
print(f"Error: Invalid command!")
print_usage()
quit()
if __name__ == '__main__':
main()
# Usage 1
# video_file = "arthistory_1.mp4" # Replace with your video file
# transcript_file = "arthistory_1_transcript.txt" # Desired output transcript file
# generate_video_transcript(video_file, transcript_file)
# Usage 2
# video_file = "arthistory_1.mp4" # Replace with your video file
# o_video_file = "arthistory_1_caption.mp4" # Desired output transcript file
# video_file = "ling_1.mp4" # Replace with your video file
# o_video_file = "arthistory_1_caption.mp4" # Desired output transcript file
# add_caption(video_file,lang='ko')
# add_caption_from_srtfile(video_file)