/// Extraction of embedded subtitles mod embedded; /// Synthesis of subtitles from audio using whisper.cpp mod whisper; use std::{collections::BTreeMap, sync::mpsc, thread}; use ffmpeg::Rational; use relm4::{ComponentSender, Worker}; use crate::tracks::{SUBTITLE_TRACKS, StreamIndex, SubtitleCue, SubtitleTrack, TrackMetadata}; pub struct SubtitleExtractor {} #[derive(Debug)] pub enum SubtitleExtractorMsg { ExtractFromUrl { url: String, // the index of the audio stream on which to run a whisper transcription whisper_stream_index: Option, }, } #[derive(Debug)] pub enum SubtitleExtractorOutput { NewCue(StreamIndex, SubtitleCue), ExtractionComplete, } impl Worker for SubtitleExtractor { type Init = (); type Input = SubtitleExtractorMsg; type Output = SubtitleExtractorOutput; fn init(_init: Self::Init, _sender: ComponentSender) -> Self { Self {} } fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender) { match msg { SubtitleExtractorMsg::ExtractFromUrl { url, whisper_stream_index: whisper_audio_stream_ix, } => { self.handle_extract_from_url(url, whisper_audio_stream_ix, sender); } } } } impl SubtitleExtractor { fn handle_extract_from_url( &mut self, url: String, whisper_audio_stream_ix: Option, sender: ComponentSender, ) { // Clear existing tracks SUBTITLE_TRACKS.write().clear(); match self.extract_subtitles(&url, whisper_audio_stream_ix, sender.clone()) { Ok(_) => { log::info!("Subtitle extraction completed successfully"); sender .output(SubtitleExtractorOutput::ExtractionComplete) .unwrap(); } Err(e) => { log::error!("Subtitle extraction failed: {}", e); } } } fn extract_subtitles( &self, url: &str, whisper_audio_stream_ix: Option, sender: ComponentSender, ) -> anyhow::Result<()> { let mut input = ffmpeg::format::input(&url)?; let mut subtitle_extractors = BTreeMap::new(); // create extractor for each subtitle stream for stream in input.streams() { let stream_ix = stream.index(); if stream.parameters().medium() == ffmpeg::media::Type::Subtitle { let metadata = TrackMetadata::from_ffmpeg_stream(&stream); let track = SubtitleTrack { metadata, cues: Vec::new(), }; SUBTITLE_TRACKS.write().insert(stream_ix, track); let context = ffmpeg::codec::Context::from_parameters(stream.parameters())?; let (packet_tx, packet_rx) = mpsc::channel(); let time_base = stream.time_base(); let sender = sender.clone(); let join_handle = thread::spawn(move || { embedded::extract_embedded_subtitles( stream_ix, context, time_base, packet_rx, sender, ) }); subtitle_extractors.insert(stream_ix, (packet_tx, join_handle)); } } if let Some(stream_ix) = whisper_audio_stream_ix { let stream = input.stream(stream_ix).unwrap(); let mut metadata = TrackMetadata::from_ffmpeg_stream(&stream); metadata.title = Some(match metadata.title { Some(title) => format!("Auto-generated from audio (Whisper): {}", title), None => "Auto-generated from audio (Whisper)".to_string(), }); let track = SubtitleTrack { metadata, cues: Vec::new(), }; SUBTITLE_TRACKS.write().insert(stream_ix, track); let context = ffmpeg::codec::Context::from_parameters(stream.parameters())?; let (packet_tx, packet_rx) = mpsc::channel(); let time_base = stream.time_base(); let sender = sender.clone(); let join_handle = thread::spawn(move || { whisper::generate_whisper_subtitles( stream_ix, context, time_base, packet_rx, sender, ) }); subtitle_extractors.insert(stream_ix, (packet_tx, join_handle)); } // process packets for (stream, packet) in input.packets() { let stream_index = stream.index(); if let Some((packet_tx, _)) = subtitle_extractors.get_mut(&stream_index) { packet_tx.send(packet).unwrap(); } } // wait for extraction to complete for (_, (_, join_handle)) in subtitle_extractors { join_handle .join() .unwrap() .unwrap_or_else(|e| log::error!("error running subtitle extraction: {}", e)); } Ok(()) } }