diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1bc99af --- /dev/null +++ b/Dockerfile @@ -0,0 +1,32 @@ +# Use an official Python runtime as a parent image +FROM python:3.12-slim + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + ffmpeg \ + libsndfile1 \ + && rm -rf /var/lib/apt/lists/* + +# Create a volume for the Whisper model +VOLUME /root/.cache/huggingface + +# Make port 8501 available to the world outside this container +EXPOSE 8501 + +# Set the working directory in the container +WORKDIR /app + +# Copy requirements.txt +COPY requirements.txt /app + +# Install any needed packages specified in requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +# Define environment variable +ENV NAME=ReelCaptionMaker + +# Copy the current directory contents into the container at /app +COPY . /app + +# Run app.py when the container launches +CMD ["streamlit", "run", "app.py"] \ No newline at end of file diff --git a/README.md b/README.md index e3dd49e..685fedf 100644 --- a/README.md +++ b/README.md @@ -13,13 +13,15 @@ Before you begin, make sure you have the following installed on your computer: 1. Open your computer's terminal or command prompt. -2. Clone the repository:# reel-caption-maker +2. Clone the repository: ``` git clone https://github.com/yourusername/reel-caption-maker.git cd reel-caption-maker ``` +### Using Python locally + 3. Create a virtual environment: - On Windows: ``` @@ -50,6 +52,33 @@ streamlit run app.py 3. Your default web browser should open automatically. If it doesn't, copy the URL shown in the terminal (usually http://localhost:8501) and paste it into your browser. +### Using Docker + +To run the Reel Caption Maker using Docker, follow these steps: + +1. Ensure you have Docker installed on your system. +2. If not, you can download and install it from [Docker's official website](https://www.docker.com/get-started). +3. Build the Docker image: + ``` + docker build -t reel-caption-maker . + ``` +4. Run the Docker container: + ``` + docker run -p 8501:8501 -v whisper_model:/root/.cache/huggingface reel-caption-maker + ``` + This command does the following: + - Maps port 8501 from the container to port 8501 on your host machine. + - Creates a volume named `whisper_model` to persist the Whisper model data. + +5. Open your web browser and navigate to `http://localhost:8501` to use the Reel Caption Maker. + - Note: The first time you run the container, it may take a few minutes to download the Whisper model. Subsequent runs will be faster as the model will be cached in the Docker volume. + - To stop the container, use `Ctrl+C` in the terminal where it's running. + - For convenience, you can use the provided `docker-run.sh` script to start the container: + ``` + chmod +x docker-run.sh + ./docker-run.sh + ``` + ## Using Reel Caption Maker 1. Click "Browse files" to upload your video. diff --git a/app.py b/app.py index db60a3e..7404248 100644 --- a/app.py +++ b/app.py @@ -3,7 +3,7 @@ import os import tempfile from moviepy.editor import VideoFileClip import ffmpeg -from transformers import WhisperProcessor, WhisperForConditionalGeneration +from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperTokenizer import tqdm # Load Whisper model @@ -19,39 +19,43 @@ def load_whisper_model(): processor, model = load_whisper_model() -def transcribe_audio(audio_file, language, chunk_length=3): # Changed default to 3 seconds +def transcribe_audio(audio_file, language, chunk_length=3): if model is None or processor is None: st.error("Whisper model is not loaded. Cannot transcribe audio.") return [] - + # Load audio audio_input, sr = AudioLoader.load_audio(audio_file) - + # Calculate number of samples per chunk samples_per_chunk = int(chunk_length * sr) - + + # Get the tokenizer + tokenizer = WhisperTokenizer.from_pretrained(model.config._name_or_path, language=language) + segments = [] for i in tqdm.tqdm(range(0, len(audio_input), samples_per_chunk)): chunk = audio_input[i:i+samples_per_chunk] - + # Pad/trim audio chunk chunk_input = processor.feature_extractor(chunk, sampling_rate=sr, return_tensors="pt").input_features # Generate token ids - predicted_ids = model.generate(chunk_input, language=language) - + forced_decoder_ids = tokenizer.get_decoder_prompt_ids(language=language, task="transcribe") + predicted_ids = model.generate(chunk_input, forced_decoder_ids=forced_decoder_ids) + # Decode token ids to text transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) - + start_time = i / sr end_time = min((i + samples_per_chunk) / sr, len(audio_input) / sr) - + segments.append({ "start": start_time, "end": end_time, "text": transcription[0].strip() }) - + return segments def format_srt(segments): diff --git a/build-local.sh b/build-local.sh new file mode 100644 index 0000000..1df2709 --- /dev/null +++ b/build-local.sh @@ -0,0 +1 @@ +docker build -t reel-caption-maker . \ No newline at end of file diff --git a/docker-run.sh b/docker-run.sh new file mode 100644 index 0000000..01be9e9 --- /dev/null +++ b/docker-run.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Create a Docker network if it doesn't exist +docker network create reel-caption-network 2>/dev/null || true + +# Remove existing container if it exists +docker rm -f reel-caption-maker 2>/dev/null || true + +# Run the Docker container +docker run -d \ + --name reel-caption-maker \ + --network reel-caption-network \ + -p 8501:8501 \ + -v whisper_model:/root/.cache/huggingface \ + reel-caption-maker + +# Print the container logs +docker logs -f reel-caption-maker \ No newline at end of file