Ajout Docker

This commit is contained in:
François Pelletier 2024-10-14 13:17:17 -04:00
parent 26938f8294
commit 2f1be80a4b
5 changed files with 96 additions and 12 deletions

32
Dockerfile Normal file
View file

@ -0,0 +1,32 @@
# Use an official Python runtime as a parent image
FROM python:3.12-slim
# Install system dependencies
RUN apt-get update && apt-get install -y \
ffmpeg \
libsndfile1 \
&& rm -rf /var/lib/apt/lists/*
# Create a volume for the Whisper model
VOLUME /root/.cache/huggingface
# Make port 8501 available to the world outside this container
EXPOSE 8501
# Set the working directory in the container
WORKDIR /app
# Copy requirements.txt
COPY requirements.txt /app
# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# Define environment variable
ENV NAME=ReelCaptionMaker
# Copy the current directory contents into the container at /app
COPY . /app
# Run app.py when the container launches
CMD ["streamlit", "run", "app.py"]

View file

@ -13,13 +13,15 @@ Before you begin, make sure you have the following installed on your computer:
1. Open your computer's terminal or command prompt.
2. Clone the repository:# reel-caption-maker
2. Clone the repository:
```
git clone https://github.com/yourusername/reel-caption-maker.git
cd reel-caption-maker
```
### Using Python locally
3. Create a virtual environment:
- On Windows:
```
@ -50,6 +52,33 @@ streamlit run app.py
3. Your default web browser should open automatically. If it doesn't, copy the URL shown in the terminal (usually http://localhost:8501) and paste it into your browser.
### Using Docker
To run the Reel Caption Maker using Docker, follow these steps:
1. Ensure you have Docker installed on your system.
2. If not, you can download and install it from [Docker's official website](https://www.docker.com/get-started).
3. Build the Docker image:
```
docker build -t reel-caption-maker .
```
4. Run the Docker container:
```
docker run -p 8501:8501 -v whisper_model:/root/.cache/huggingface reel-caption-maker
```
This command does the following:
- Maps port 8501 from the container to port 8501 on your host machine.
- Creates a volume named `whisper_model` to persist the Whisper model data.
5. Open your web browser and navigate to `http://localhost:8501` to use the Reel Caption Maker.
- Note: The first time you run the container, it may take a few minutes to download the Whisper model. Subsequent runs will be faster as the model will be cached in the Docker volume.
- To stop the container, use `Ctrl+C` in the terminal where it's running.
- For convenience, you can use the provided `docker-run.sh` script to start the container:
```
chmod +x docker-run.sh
./docker-run.sh
```
## Using Reel Caption Maker
1. Click "Browse files" to upload your video.

26
app.py
View file

@ -3,7 +3,7 @@ import os
import tempfile
from moviepy.editor import VideoFileClip
import ffmpeg
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperTokenizer
import tqdm
# Load Whisper model
@ -19,39 +19,43 @@ def load_whisper_model():
processor, model = load_whisper_model()
def transcribe_audio(audio_file, language, chunk_length=3): # Changed default to 3 seconds
def transcribe_audio(audio_file, language, chunk_length=3):
if model is None or processor is None:
st.error("Whisper model is not loaded. Cannot transcribe audio.")
return []
# Load audio
audio_input, sr = AudioLoader.load_audio(audio_file)
# Calculate number of samples per chunk
samples_per_chunk = int(chunk_length * sr)
# Get the tokenizer
tokenizer = WhisperTokenizer.from_pretrained(model.config._name_or_path, language=language)
segments = []
for i in tqdm.tqdm(range(0, len(audio_input), samples_per_chunk)):
chunk = audio_input[i:i+samples_per_chunk]
# Pad/trim audio chunk
chunk_input = processor.feature_extractor(chunk, sampling_rate=sr, return_tensors="pt").input_features
# Generate token ids
predicted_ids = model.generate(chunk_input, language=language)
forced_decoder_ids = tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")
predicted_ids = model.generate(chunk_input, forced_decoder_ids=forced_decoder_ids)
# Decode token ids to text
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
start_time = i / sr
end_time = min((i + samples_per_chunk) / sr, len(audio_input) / sr)
segments.append({
"start": start_time,
"end": end_time,
"text": transcription[0].strip()
})
return segments
def format_srt(segments):

1
build-local.sh Normal file
View file

@ -0,0 +1 @@
docker build -t reel-caption-maker .

18
docker-run.sh Normal file
View file

@ -0,0 +1,18 @@
#!/bin/bash
# Create a Docker network if it doesn't exist
docker network create reel-caption-network 2>/dev/null || true
# Remove existing container if it exists
docker rm -f reel-caption-maker 2>/dev/null || true
# Run the Docker container
docker run -d \
--name reel-caption-maker \
--network reel-caption-network \
-p 8501:8501 \
-v whisper_model:/root/.cache/huggingface \
reel-caption-maker
# Print the container logs
docker logs -f reel-caption-maker