diff options
Diffstat (limited to 'src/subtitle_extractor.rs')
| -rw-r--r-- | src/subtitle_extractor.rs | 209 |
1 files changed, 0 insertions, 209 deletions
diff --git a/src/subtitle_extractor.rs b/src/subtitle_extractor.rs deleted file mode 100644 index b628d73..0000000 --- a/src/subtitle_extractor.rs +++ /dev/null @@ -1,209 +0,0 @@ -use std::collections::BTreeMap; - -use anyhow::Result; - -use ffmpeg::Rational; -use log::{debug, error, info}; -use relm4::{ComponentSender, SharedState, Worker}; - -pub type StreamIndex = usize; - -#[derive(Debug, Clone)] -pub struct SubtitleCue { - pub start: gst::ClockTime, - pub end: gst::ClockTime, - pub text: String, -} - -#[derive(Debug, Clone)] -pub struct SubtitleTrack { - pub language: Option<isolang::Language>, - pub title: Option<String>, - pub cues: Vec<SubtitleCue>, -} - -pub static TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new(); - -pub struct SubtitleExtractor {} - -#[derive(Debug)] -pub enum SubtitleExtractorMsg { - ExtractFromUrl(String), -} - -#[derive(Debug)] -pub enum SubtitleExtractorOutput { - NewOrUpdatedTrackMetadata(StreamIndex), - NewCue(StreamIndex, SubtitleCue), - ExtractionComplete, -} - -impl Worker for SubtitleExtractor { - type Init = (); - type Input = SubtitleExtractorMsg; - type Output = SubtitleExtractorOutput; - - fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self { - Self {} - } - - fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) { - match msg { - SubtitleExtractorMsg::ExtractFromUrl(url) => { - self.handle_extract_from_url(url, sender); - } - } - } -} - -impl SubtitleExtractor { - fn handle_extract_from_url(&mut self, url: String, sender: ComponentSender<Self>) { - // Clear existing tracks - TRACKS.write().clear(); - - // Try to extract subtitles using ffmpeg - match self.extract_subtitles_ffmpeg(&url, &sender) { - Ok(_) => { - info!("Subtitle extraction completed successfully"); - sender - .output(SubtitleExtractorOutput::ExtractionComplete) - .unwrap(); - } - Err(e) => { - error!("FFmpeg extraction failed: {}", e); - } - } - } - - fn extract_subtitles_ffmpeg(&self, url: &str, sender: &ComponentSender<Self>) -> Result<()> { - let mut input = ffmpeg::format::input(&url)?; - - let mut subtitle_decoders = BTreeMap::new(); - - // create decoder for each subtitle stream - for (stream_index, stream) in input.streams().enumerate() { - if stream.parameters().medium() == ffmpeg::media::Type::Subtitle { - let language_code = stream.metadata().get("language").map(|s| s.to_string()); - let title = stream.metadata().get("title").map(|s| s.to_string()); - - let track = SubtitleTrack { - language: language_code.and_then(|code| isolang::Language::from_639_2b(&code)), - title, - cues: Vec::new(), - }; - - TRACKS.write().insert(stream_index, track); - - sender - .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata( - stream_index, - )) - .unwrap(); - - let context = - ffmpeg::codec::context::Context::from_parameters(stream.parameters())?; - if let Ok(decoder) = context.decoder().subtitle() { - subtitle_decoders.insert(stream_index, decoder); - debug!("Created decoder for subtitle stream {}", stream_index); - } else { - error!( - "Failed to create decoder for subtitle stream {}", - stream_index - ); - } - } - } - - // process packets - for (stream, packet) in input.packets() { - let stream_index = stream.index(); - - if let Some(decoder) = subtitle_decoders.get_mut(&stream_index) { - let mut subtitle = ffmpeg::Subtitle::new(); - if decoder.decode(&packet, &mut subtitle).is_ok() { - if let Some(cue) = Self::subtitle_to_cue(&subtitle, &packet, stream.time_base()) - { - if let Some(track) = TRACKS.write().get_mut(&stream_index) { - track.cues.push(cue.clone()); - } - - sender - .output(SubtitleExtractorOutput::NewCue(stream_index, cue)) - .unwrap(); - } - } - } - } - - Ok(()) - } - - fn subtitle_to_cue( - subtitle: &ffmpeg::Subtitle, - packet: &ffmpeg::Packet, - time_base: Rational, - ) -> Option<SubtitleCue> { - let time_to_clock_time = |time: i64| { - let nseconds: i64 = (time * time_base.numerator() as i64 * 1_000_000_000) - / time_base.denominator() as i64; - gst::ClockTime::from_nseconds(nseconds as u64) - }; - - let text = subtitle - .rects() - .into_iter() - .map(|rect| match rect { - ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(), - ffmpeg::subtitle::Rect::Ass(ass) => { - Self::extract_dialogue_text(ass.get()).unwrap_or(String::new()) - } - _ => String::new(), - }) - .collect::<Vec<String>>() - .join("\n— "); - - let start = time_to_clock_time(packet.pts()?); - let end = time_to_clock_time(packet.pts()? + packet.duration()); - - Some(SubtitleCue { start, end, text }) - } - - fn extract_dialogue_text(dialogue_line: &str) -> Option<String> { - // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text - // we need the 9th field (Text), so split on comma but only take first 9 splits - // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433 - let text = dialogue_line.splitn(9, ',').last()?; - - // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc. - let mut result = String::new(); - let mut in_tag = false; - let mut char_iter = text.chars().peekable(); - - while let Some(c) = char_iter.next() { - if c == '{' && char_iter.peek() == Some(&'\\') { - in_tag = true; - } else if c == '}' { - in_tag = false; - } else if !in_tag { - // process line breaks and hard spaces - if c == '\\' { - match char_iter.peek() { - Some(&'N') => { - char_iter.next(); - result.push('\n'); - } - Some(&'n') | Some(&'h') => { - char_iter.next(); - result.push(' '); - } - _ => result.push(c), - } - } else { - result.push(c); - } - } - } - - Some(result) - } -} |