Changements de paramètres pour la performance

This commit is contained in:
François Pelletier 2024-11-03 23:12:05 -05:00
parent 9e02b85fe6
commit b077e7a7ee

20
app.py
View file

@ -5,6 +5,7 @@ from moviepy.editor import VideoFileClip
import ffmpeg
from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperTokenizer
import tqdm
import torch # Add this line to import PyTorch
# Load Whisper model
@st.cache_resource
@ -38,11 +39,19 @@ def transcribe_audio(audio_file, language, chunk_length=3):
chunk = audio_input[i:i+samples_per_chunk]
# Pad/trim audio chunk
chunk_input = processor.feature_extractor(chunk, sampling_rate=sr, return_tensors="pt").input_features
inputs = processor(chunk, sampling_rate=sr, return_tensors="pt")
input_features = inputs.input_features
# Generate attention mask
attention_mask = torch.ones_like(input_features)
# Generate token ids
forced_decoder_ids = tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")
predicted_ids = model.generate(chunk_input, forced_decoder_ids=forced_decoder_ids)
predicted_ids = model.generate(
input_features,
forced_decoder_ids=forced_decoder_ids,
attention_mask=attention_mask
)
# Decode token ids to text
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
@ -156,8 +165,11 @@ def convert_to_web_compatible(input_path):
(
ffmpeg
.input(input_path)
.output(output_path, vcodec='libx264', acodec='aac',
video_bitrate='1000k', audio_bitrate='128k')
.output(output_path,
vcodec='h264_videotoolbox', # Use VideoToolbox for hardware-accelerated encoding
acodec='aac',
video_bitrate='1000k',
audio_bitrate='128k')
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
)