diff options
| author | Malte Voos <git@mal.tc> | 2025-11-14 15:30:49 +0100 |
|---|---|---|
| committer | Malte Voos <git@mal.tc> | 2025-11-14 15:30:49 +0100 |
| commit | a8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e (patch) | |
| tree | 542b42d3316138043272faba42e0d1005f8403b6 /src | |
| parent | a42a73378b7c527a5e4600544b2d7a86d68c5aac (diff) | |
| download | lleap-a8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e.tar.gz lleap-a8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e.zip | |
implement file/url open dialog
Diffstat (limited to 'src')
| -rw-r--r-- | src/app.rs | 84 | ||||
| -rw-r--r-- | src/main.rs | 21 | ||||
| -rw-r--r-- | src/open_dialog.rs | 328 | ||||
| -rw-r--r-- | src/subtitle_extraction/embedded.rs | 118 | ||||
| -rw-r--r-- | src/subtitle_extraction/mod.rs | 159 | ||||
| -rw-r--r-- | src/subtitle_extraction/whisper.rs | 75 | ||||
| -rw-r--r-- | src/subtitle_extractor.rs | 209 | ||||
| -rw-r--r-- | src/subtitle_extractor_aishit.rs | 732 | ||||
| -rw-r--r-- | src/subtitle_selection_dialog.rs | 214 | ||||
| -rw-r--r-- | src/subtitle_view.rs | 1 | ||||
| -rw-r--r-- | src/track_selector.rs | 188 | ||||
| -rw-r--r-- | src/tracks.rs | 38 | ||||
| -rw-r--r-- | src/transcript.rs | 4 | ||||
| -rw-r--r-- | src/util/tracker.rs | 6 |
14 files changed, 1768 insertions, 409 deletions
diff --git a/src/app.rs b/src/app.rs index 18f03e8..7aa5abd 100644 --- a/src/app.rs +++ b/src/app.rs @@ -2,26 +2,27 @@ use adw::prelude::*; use relm4::{WorkerController, prelude::*}; use crate::{ + open_dialog::{OpenDialog, OpenDialogMsg, OpenDialogOutput}, player::{Player, PlayerMsg, PlayerOutput}, preferences::{Preferences, PreferencesMsg}, - subtitle_extractor::{ - StreamIndex, SubtitleExtractor, SubtitleExtractorMsg, SubtitleExtractorOutput, TRACKS, - }, + subtitle_extraction::{SubtitleExtractor, SubtitleExtractorMsg, SubtitleExtractorOutput}, subtitle_selection_dialog::{ SubtitleSelectionDialog, SubtitleSelectionDialogMsg, SubtitleSelectionDialogOutput, }, subtitle_view::{SubtitleView, SubtitleViewMsg, SubtitleViewOutput}, + tracks::{SUBTITLE_TRACKS, StreamIndex, SubtitleCue}, transcript::{Transcript, TranscriptMsg, TranscriptOutput}, util::OptionTracker, }; pub struct App { - url: String, transcript: Controller<Transcript>, player: Controller<Player>, subtitle_view: Controller<SubtitleView>, extractor: WorkerController<SubtitleExtractor>, + preferences: Controller<Preferences>, + open_url_dialog: Controller<OpenDialog>, subtitle_selection_dialog: Controller<SubtitleSelectionDialog>, primary_stream_ix: Option<StreamIndex>, @@ -37,20 +38,24 @@ pub struct App { #[derive(Debug)] pub enum AppMsg { - NewOrUpdatedTrackMetadata(StreamIndex), - NewCue(StreamIndex, crate::subtitle_extractor::SubtitleCue), + NewCue(StreamIndex, SubtitleCue), SubtitleExtractionComplete, PrimarySubtitleTrackSelected(Option<StreamIndex>), SecondarySubtitleTrackSelected(Option<StreamIndex>), PositionUpdate(gst::ClockTime), SetHoveringSubtitleCue(bool), + ShowUrlOpenDialog, ShowPreferences, ShowSubtitleSelectionDialog, + Play { + url: String, + whisper_stream_index: Option<StreamIndex>, + }, } #[relm4::component(pub)] impl SimpleComponent for App { - type Init = String; + type Init = (); type Input = AppMsg; type Output = (); @@ -61,10 +66,13 @@ impl SimpleComponent for App { set_default_width: 800, set_default_height: 600, - #[name(toolbar_view)] adw::ToolbarView { add_top_bar = &adw::HeaderBar { pack_start = >k::Button { + set_label: "Open...", + connect_clicked => AppMsg::ShowUrlOpenDialog, + }, + pack_end = >k::Button { set_icon_name: "settings-symbolic", connect_clicked => AppMsg::ShowPreferences, } @@ -86,7 +94,7 @@ impl SimpleComponent for App { } fn init( - url: Self::Init, + _init: Self::Init, root: Self::Root, sender: ComponentSender<Self>, ) -> ComponentParts<Self> { @@ -112,9 +120,6 @@ impl SimpleComponent for App { let extractor = SubtitleExtractor::builder().detach_worker(()).forward( sender.input_sender(), |output| match output { - SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(stream_index) => { - AppMsg::NewOrUpdatedTrackMetadata(stream_index) - } SubtitleExtractorOutput::NewCue(stream_index, cue) => { AppMsg::NewCue(stream_index, cue) } @@ -123,6 +128,18 @@ impl SimpleComponent for App { ); let preferences = Preferences::builder().launch(root.clone().into()).detach(); + let open_url_dialog = OpenDialog::builder().launch(root.clone().into()).forward( + sender.input_sender(), + |output| match output { + OpenDialogOutput::Play { + url, + whisper_stream_index, + } => AppMsg::Play { + url, + whisper_stream_index, + }, + }, + ); let subtitle_selection_dialog = SubtitleSelectionDialog::builder() .launch(root.clone().into()) .forward(sender.input_sender(), |output| match output { @@ -135,12 +152,13 @@ impl SimpleComponent for App { }); let model = Self { - url: url.clone(), // TODO remove clone player, transcript, subtitle_view, extractor, + preferences, + open_url_dialog, subtitle_selection_dialog, primary_stream_ix: None, @@ -155,26 +173,14 @@ impl SimpleComponent for App { let widgets = view_output!(); - model - .player - .sender() - .send(PlayerMsg::SetUrl(url.clone())) - .unwrap(); - model - .extractor - .sender() - .send(SubtitleExtractorMsg::ExtractFromUrl(url)) - .unwrap(); - ComponentParts { model, widgets } } - fn update(&mut self, msg: Self::Input, _sender: ComponentSender<Self>) { + fn update(&mut self, message: Self::Input, _sender: ComponentSender<Self>) { self.primary_last_cue_ix.reset(); self.secondary_last_cue_ix.reset(); - match msg { - AppMsg::NewOrUpdatedTrackMetadata(_stream_index) => {} + match message { AppMsg::NewCue(stream_index, cue) => { self.transcript .sender() @@ -257,6 +263,12 @@ impl SimpleComponent for App { self.autopaused = false; } } + AppMsg::ShowUrlOpenDialog => { + self.open_url_dialog + .sender() + .send(OpenDialogMsg::Show) + .unwrap(); + } AppMsg::ShowPreferences => { self.preferences .sender() @@ -269,6 +281,22 @@ impl SimpleComponent for App { .send(SubtitleSelectionDialogMsg::Show) .unwrap(); } + AppMsg::Play { + url, + whisper_stream_index, + } => { + self.player + .sender() + .send(PlayerMsg::SetUrl(url.clone())) + .unwrap(); + self.extractor + .sender() + .send(SubtitleExtractorMsg::ExtractFromUrl { + url, + whisper_stream_index, + }) + .unwrap(); + } } } } @@ -279,7 +307,7 @@ impl App { position: gst::ClockTime, last_cue_ix: &mut OptionTracker<usize>, ) -> Option<String> { - let lock = TRACKS.read(); + let lock = SUBTITLE_TRACKS.read(); let track = lock.get(&stream_ix)?; // try to find current cue quickly (should usually succeed during playback) diff --git a/src/main.rs b/src/main.rs index 0b7db50..01ca56d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,15 +1,16 @@ mod app; mod cue_view; +mod open_dialog; mod player; mod preferences; -mod subtitle_extractor; +mod subtitle_extraction; mod subtitle_selection_dialog; mod subtitle_view; +mod track_selector; +mod tracks; mod transcript; mod util; -use std::env; - use gtk::{CssProvider, STYLE_PROVIDER_PRIORITY_APPLICATION, gdk, glib}; use relm4::RelmApp; @@ -18,12 +19,12 @@ use crate::app::App; fn main() { env_logger::init(); - let args: Vec<String> = env::args().collect(); - if args.len() != 2 { - eprintln!("Usage: {} <video_url>", args[0]); - std::process::exit(1); - } - let video_url = args[1].clone(); + // let args: Vec<String> = env::args().collect(); + // if args.len() != 2 { + // eprintln!("Usage: {} <video_url>", args[0]); + // std::process::exit(1); + // } + // let video_url = args[1].clone(); gtk::init().expect("Failed to initialize GTK"); gst::init().expect("Failed to initialize GStreamer"); @@ -43,5 +44,5 @@ fn main() { relm4::RELM_THREADS.set(4).unwrap(); let relm = RelmApp::new("tc.mal.lleap").with_args(vec![]); - relm.run::<App>(video_url); + relm.run::<App>(()); } diff --git a/src/open_dialog.rs b/src/open_dialog.rs new file mode 100644 index 0000000..2f17c59 --- /dev/null +++ b/src/open_dialog.rs @@ -0,0 +1,328 @@ +use std::collections::BTreeMap; + +use adw::prelude::*; +use gtk::gio; +use gtk::glib::clone; +use relm4::prelude::*; + +use crate::track_selector::{ + TrackInfo, TrackSelector, TrackSelectorInit, TrackSelectorMsg, TrackSelectorOutput, +}; +use crate::tracks::{StreamIndex, TrackMetadata}; +use crate::util::Tracker; + +pub struct OpenDialog { + parent_window: adw::ApplicationWindow, + dialog: adw::PreferencesDialog, + toast_overlay: Option<adw::ToastOverlay>, + navigation_view: Option<adw::NavigationView>, + whisper_track_selector: Controller<TrackSelector>, + + url: Tracker<String>, + do_whisper_extraction: bool, + whisper_stream_index: Option<StreamIndex>, + + metadata_command_running: bool, +} + +#[derive(Debug)] +pub enum OpenDialogMsg { + Show, + Next, + Cancel, + SelectFile, + FileSelected(gio::File), + UrlChanged(String), + SetDoWhisperExtraction(bool), + WhisperTrackSelected(Option<StreamIndex>), + Play, +} + +#[derive(Debug)] +pub enum OpenDialogOutput { + Play { + url: String, + whisper_stream_index: Option<StreamIndex>, + }, +} + +#[relm4::component(pub)] +impl Component for OpenDialog { + type Init = adw::ApplicationWindow; + type Input = OpenDialogMsg; + type Output = OpenDialogOutput; + type CommandOutput = Result<BTreeMap<StreamIndex, TrackMetadata>, ffmpeg::Error>; + + view! { + #[root] + adw::PreferencesDialog { + set_title: "Open URL", + + #[wrap(Some)] + #[name(toast_overlay)] + set_child = &adw::ToastOverlay { + #[wrap(Some)] + #[name(navigation_view)] + set_child = &adw::NavigationView { + add = &adw::NavigationPage { + set_title: "Open File or Stream", + + #[wrap(Some)] + set_child = &adw::ToolbarView { + add_top_bar = &adw::HeaderBar { + set_show_end_title_buttons: false, + + pack_start = >k::Button { + set_label: "Cancel", + connect_clicked => OpenDialogMsg::Cancel, + }, + + pack_end = >k::Button { + set_label: "Next", + #[watch] + set_sensitive: !(model.url.get().is_empty() || model.metadata_command_running), + connect_clicked => OpenDialogMsg::Next, + add_css_class: "suggested-action", + }, + + pack_end = &adw::Spinner { + #[watch] + set_visible: model.metadata_command_running, + }, + }, + + #[wrap(Some)] + set_content = &adw::PreferencesPage { + adw::PreferencesGroup { + set_title: "Open a file from your computer", + adw::ButtonRow { + set_title: "Select File", + connect_activated => OpenDialogMsg::SelectFile, + } + }, + + adw::PreferencesGroup { + set_title: "Or, enter a stream URL", + set_description: Some("Currently, only file:// and http(s):// URLs are officially supported, although other protocols may work as well."), + + adw::EntryRow { + set_title: "URL", + #[track(model.url.is_dirty())] + set_text: model.url.get(), + connect_changed[sender] => move |entry| { + sender.input(OpenDialogMsg::UrlChanged(entry.text().to_string())); + }, + } + } + } + } + }, + + add = &adw::NavigationPage { + set_tag = Some("playback_options"), + set_title: "Playback Options", + + #[wrap(Some)] + set_child = &adw::ToolbarView { + add_top_bar = &adw::HeaderBar { + set_show_end_title_buttons: false, + + pack_end = >k::Button { + connect_clicked => OpenDialogMsg::Play, + add_css_class: "suggested-action", + + gtk::Label { + set_text: "Play", + } + }, + }, + + #[wrap(Some)] + set_content = &adw::PreferencesPage { + adw::PreferencesGroup { + adw::ExpanderRow { + set_title: "Generate subtitles from audio", + set_subtitle: "See also \"Whisper settings\" in Preferences", + set_show_enable_switch: true, + #[watch] + set_enable_expansion: model.do_whisper_extraction, + connect_enable_expansion_notify[sender] => move |expander_row| { + sender.input(OpenDialogMsg::SetDoWhisperExtraction(expander_row.enables_expansion())) + }, + + add_row: model.whisper_track_selector.widget(), + }, + }, + } + } + } + } + } + } + } + + fn init( + parent_window: Self::Init, + root: Self::Root, + sender: ComponentSender<Self>, + ) -> ComponentParts<Self> { + let whisper_track_selector = TrackSelector::builder() + .launch(TrackSelectorInit { + title: "Audio track", + subtitle: None, + }) + .forward(sender.input_sender(), |output| match output { + TrackSelectorOutput::Changed(ix) => OpenDialogMsg::WhisperTrackSelected(ix), + }); + let mut model = Self { + parent_window, + dialog: root.clone(), + toast_overlay: None, + navigation_view: None, + whisper_track_selector, + + url: Tracker::new(String::new()), + do_whisper_extraction: false, + whisper_stream_index: None, + + metadata_command_running: false, + }; + + let widgets = view_output!(); + + model.toast_overlay = Some(widgets.toast_overlay.clone()); + model.navigation_view = Some(widgets.navigation_view.clone()); + + ComponentParts { model, widgets } + } + + fn update(&mut self, message: Self::Input, sender: ComponentSender<Self>, _root: &Self::Root) { + match message { + OpenDialogMsg::Show => { + self.reset(); + self.dialog.present(Some(&self.parent_window)); + } + OpenDialogMsg::UrlChanged(url) => self.url.set_clean(url), + OpenDialogMsg::Next => self.fetch_metadata(sender), + OpenDialogMsg::Cancel => { + self.dialog.close(); + } + OpenDialogMsg::SelectFile => { + let dialog = gtk::FileDialog::new(); + dialog.open( + Some(&self.parent_window), + None as Option<&gio::Cancellable>, + clone!( + #[strong] + sender, + move |res| { + if let Ok(file) = res { + sender.input(OpenDialogMsg::FileSelected(file)); + } + } + ), + ); + } + OpenDialogMsg::FileSelected(file) => { + self.url.set(file.uri().into()); + } + OpenDialogMsg::Play => { + sender + .output(OpenDialogOutput::Play { + url: self.url.get().clone(), + whisper_stream_index: if self.do_whisper_extraction { + self.whisper_stream_index + } else { + None + }, + }) + .unwrap(); + self.dialog.close(); + } + OpenDialogMsg::SetDoWhisperExtraction(val) => { + self.do_whisper_extraction = val; + } + OpenDialogMsg::WhisperTrackSelected(track_index) => { + self.whisper_stream_index = track_index; + } + } + } + + // once we get all the audio track metadata, we update the whisper track + // dropdown + fn update_cmd( + &mut self, + message: Self::CommandOutput, + _sender: ComponentSender<Self>, + _root: &Self::Root, + ) { + self.metadata_command_running = false; + + match message { + Ok(audio_tracks) => { + let list_model = gio::ListStore::new::<TrackInfo>(); + + for (&stream_index, track) in audio_tracks.iter() { + let track_info = TrackInfo::new( + stream_index, + track.language.map(|lang| lang.to_name()), + track.title.clone(), + ); + list_model.append(&track_info); + } + + self.whisper_track_selector + .sender() + .send(TrackSelectorMsg::SetListModel(list_model)) + .unwrap(); + + self.next(); + } + Err(e) => { + let toast = adw::Toast::builder() + .title(&format!("Error fetching stream metadata: {}", e)) + .build(); + + self.toast_overlay.as_ref().unwrap().add_toast(toast); + } + } + } +} + +impl OpenDialog { + fn reset(&mut self) { + self.url.get_mut().clear(); + self.do_whisper_extraction = false; + self.whisper_stream_index = None; + } + + fn fetch_metadata(&mut self, sender: ComponentSender<Self>) { + let url = self.url.get().clone(); + + sender.spawn_oneshot_command(move || { + let input = ffmpeg::format::input(&url)?; + + let audio_tracks = input + .streams() + .filter_map(|stream| { + if stream.parameters().medium() == ffmpeg::media::Type::Audio { + Some((stream.index(), TrackMetadata::from_ffmpeg_stream(&stream))) + } else { + None + } + }) + .collect::<BTreeMap<_, _>>(); + + Ok(audio_tracks) + }); + + self.metadata_command_running = true; + } + + fn next(&self) { + self.navigation_view + .as_ref() + .unwrap() + .push_by_tag("playback_options"); + } +} diff --git a/src/subtitle_extraction/embedded.rs b/src/subtitle_extraction/embedded.rs new file mode 100644 index 0000000..5cdf813 --- /dev/null +++ b/src/subtitle_extraction/embedded.rs @@ -0,0 +1,118 @@ +use std::sync::mpsc; + +use anyhow::Context; + +use crate::subtitle_extraction::*; + +pub fn extract_embedded_subtitles( + // stream index to use when storing extracted subtitles, this index already + // has to be in TRACKS when this function is called! + stream_ix: StreamIndex, + context: ffmpeg::codec::Context, + time_base: ffmpeg::Rational, + packet_rx: mpsc::Receiver<ffmpeg::Packet>, + sender: ComponentSender<SubtitleExtractor>, +) -> anyhow::Result<()> { + let mut decoder = context + .decoder() + .subtitle() + .with_context(|| format!("error creating subtitle decoder for stream {}", stream_ix))?; + + while let Ok(packet) = packet_rx.recv() { + let mut subtitle = ffmpeg::Subtitle::new(); + match decoder.decode(&packet, &mut subtitle) { + Ok(true) => { + if let Some(cue) = parse_subtitle(&subtitle, &packet, time_base) { + SUBTITLE_TRACKS + .write() + .get_mut(&stream_ix) + .unwrap() + .cues + .push(cue.clone()); + sender + .output(SubtitleExtractorOutput::NewCue(stream_ix, cue)) + .unwrap(); + } else { + log::error!("error parsing subtitle at pts {:?}", packet.pts()) + } + } + Ok(false) => { + log::debug!("got empty (?) subtitle, not sure if this should ever happen"); + } + Err(e) => { + log::error!("error decoding subtitle: {:?}", e) + } + } + } + + Ok(()) +} + +fn parse_subtitle( + subtitle: &ffmpeg::Subtitle, + packet: &ffmpeg::Packet, + time_base: Rational, +) -> Option<SubtitleCue> { + let time_to_clock_time = |time: i64| { + let nseconds: i64 = + (time * time_base.numerator() as i64 * 1_000_000_000) / time_base.denominator() as i64; + gst::ClockTime::from_nseconds(nseconds as u64) + }; + + let text = subtitle + .rects() + .into_iter() + .map(|rect| match rect { + ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(), + ffmpeg::subtitle::Rect::Ass(ass) => { + extract_dialogue_text(ass.get()).unwrap_or(String::new()) + } + _ => String::new(), + }) + .collect::<Vec<String>>() + .join("\n— "); + + let start = time_to_clock_time(packet.pts()?); + let end = time_to_clock_time(packet.pts()? + packet.duration()); + + Some(SubtitleCue { start, end, text }) +} + +fn extract_dialogue_text(dialogue_line: &str) -> Option<String> { + // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text + // we need the 9th field (Text), so split on comma but only take first 9 splits + // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433 + let text = dialogue_line.splitn(9, ',').last()?; + + // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc. + let mut result = String::new(); + let mut in_tag = false; + let mut char_iter = text.chars().peekable(); + + while let Some(c) = char_iter.next() { + if c == '{' && char_iter.peek() == Some(&'\\') { + in_tag = true; + } else if c == '}' { + in_tag = false; + } else if !in_tag { + // process line breaks and hard spaces + if c == '\\' { + match char_iter.peek() { + Some(&'N') => { + char_iter.next(); + result.push('\n'); + } + Some(&'n') | Some(&'h') => { + char_iter.next(); + result.push(' '); + } + _ => result.push(c), + } + } else { + result.push(c); + } + } + } + + Some(result) +} diff --git a/src/subtitle_extraction/mod.rs b/src/subtitle_extraction/mod.rs new file mode 100644 index 0000000..9e7fff4 --- /dev/null +++ b/src/subtitle_extraction/mod.rs @@ -0,0 +1,159 @@ +/// Extraction of embedded subtitles +mod embedded; +/// Synthesis of subtitles from audio using whisper.cpp +mod whisper; + +use std::{collections::BTreeMap, sync::mpsc, thread}; + +use ffmpeg::Rational; +use relm4::{ComponentSender, Worker}; + +use crate::tracks::{SUBTITLE_TRACKS, StreamIndex, SubtitleCue, SubtitleTrack, TrackMetadata}; + +pub struct SubtitleExtractor {} + +#[derive(Debug)] +pub enum SubtitleExtractorMsg { + ExtractFromUrl { + url: String, + // the index of the audio stream on which to run a whisper transcription + whisper_stream_index: Option<usize>, + }, +} + +#[derive(Debug)] +pub enum SubtitleExtractorOutput { + NewCue(StreamIndex, SubtitleCue), + ExtractionComplete, +} + +impl Worker for SubtitleExtractor { + type Init = (); + type Input = SubtitleExtractorMsg; + type Output = SubtitleExtractorOutput; + + fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self { + Self {} + } + + fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) { + match msg { + SubtitleExtractorMsg::ExtractFromUrl { + url, + whisper_stream_index: whisper_audio_stream_ix, + } => { + self.handle_extract_from_url(url, whisper_audio_stream_ix, sender); + } + } + } +} + +impl SubtitleExtractor { + fn handle_extract_from_url( + &mut self, + url: String, + whisper_audio_stream_ix: Option<usize>, + sender: ComponentSender<Self>, + ) { + // Clear existing tracks + SUBTITLE_TRACKS.write().clear(); + + match self.extract_subtitles(&url, whisper_audio_stream_ix, sender.clone()) { + Ok(_) => { + log::info!("Subtitle extraction completed successfully"); + sender + .output(SubtitleExtractorOutput::ExtractionComplete) + .unwrap(); + } + Err(e) => { + log::error!("Subtitle extraction failed: {}", e); + } + } + } + + fn extract_subtitles( + &self, + url: &str, + whisper_audio_stream_ix: Option<usize>, + sender: ComponentSender<Self>, + ) -> anyhow::Result<()> { + let mut input = ffmpeg::format::input(&url)?; + + let mut subtitle_extractors = BTreeMap::new(); + + // create extractor for each subtitle stream + for stream in input.streams() { + let stream_ix = stream.index(); + + if stream.parameters().medium() == ffmpeg::media::Type::Subtitle { + let metadata = TrackMetadata::from_ffmpeg_stream(&stream); + let track = SubtitleTrack { + metadata, + cues: Vec::new(), + }; + + SUBTITLE_TRACKS.write().insert(stream_ix, track); + + let context = ffmpeg::codec::Context::from_parameters(stream.parameters())?; + let (packet_tx, packet_rx) = mpsc::channel(); + let time_base = stream.time_base(); + let sender = sender.clone(); + let join_handle = thread::spawn(move || { + embedded::extract_embedded_subtitles( + stream_ix, context, time_base, packet_rx, sender, + ) + }); + + subtitle_extractors.insert(stream_ix, (packet_tx, join_handle)); + } + } + + if let Some(stream_ix) = whisper_audio_stream_ix { + let stream = input.stream(stream_ix).unwrap(); + + let mut metadata = TrackMetadata::from_ffmpeg_stream(&stream); + metadata.title = Some(match metadata.title { + Some(title) => format!("Auto-generated from audio (Whisper): {}", title), + None => "Auto-generated from audio (Whisper)".to_string(), + }); + + let track = SubtitleTrack { + metadata, + cues: Vec::new(), + }; + + SUBTITLE_TRACKS.write().insert(stream_ix, track); + + let context = ffmpeg::codec::Context::from_parameters(stream.parameters())?; + let (packet_tx, packet_rx) = mpsc::channel(); + let time_base = stream.time_base(); + let sender = sender.clone(); + let join_handle = thread::spawn(move || { + whisper::generate_whisper_subtitles( + stream_ix, context, time_base, packet_rx, sender, + ) + }); + + subtitle_extractors.insert(stream_ix, (packet_tx, join_handle)); + } + + // process packets + for (stream, packet) in input.packets() { + let stream_index = stream.index(); + + if let Some((packet_tx, _)) = subtitle_extractors.get_mut(&stream_index) { + packet_tx.send(packet).unwrap(); + } + } + + // wait for extraction to complete + for (_, (_, join_handle)) in subtitle_extractors { + join_handle + .join() + .unwrap() + .unwrap_or_else(|e| log::error!("error running subtitle extraction: {}", e)); + } + + Ok(()) + } +} diff --git a/src/subtitle_extraction/whisper.rs b/src/subtitle_extraction/whisper.rs new file mode 100644 index 0000000..5622d6f --- /dev/null +++ b/src/subtitle_extraction/whisper.rs @@ -0,0 +1,75 @@ +use std::sync::mpsc; + +use anyhow::Context; +use ffmpeg::filter; + +use crate::{subtitle_extraction::*, tracks::StreamIndex}; + +pub fn generate_whisper_subtitles( + // stream index to use when storing generated subtitles, this index + // already has to be in TRACKS when this function is called! + stream_ix: StreamIndex, + context: ffmpeg::codec::Context, + time_base: ffmpeg::Rational, + packet_rx: mpsc::Receiver<ffmpeg::Packet>, + sender: ComponentSender<SubtitleExtractor>, +) -> anyhow::Result<()> { + let mut decoder = context + .decoder() + .audio() + .with_context(|| format!("error creating subtitle decoder for stream {}", stream_ix))?; + + let mut filter = filter::Graph::new(); + + let abuffer_args = format!( + "time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}", + decoder.time_base(), + decoder.rate(), + decoder.format().name(), + decoder.channel_layout().bits() + ); + let whisper_args = format!( + "model={}:queue={}:format=json", + "/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin", 30 + ); + let filter_spec = format!("[src] whisper={} [sink]", whisper_args); + + filter.add(&filter::find("abuffer").unwrap(), "src", &abuffer_args)?; + filter.add(&filter::find("abuffersink").unwrap(), "sink", "")?; + filter + .output("src", 0)? + .input("sink", 0)? + .parse(&filter_spec)?; + filter.validate()?; + + let mut source_ctx = filter.get("src").unwrap(); + let mut sink_ctx = filter.get("sink").unwrap(); + + while let Ok(packet) = packet_rx.recv() { + handle_packet(&mut decoder, source_ctx.source(), sink_ctx.sink(), packet) + .unwrap_or_else(|e| log::error!("error handling audio packet: {}", e)) + } + + Ok(()) +} + +fn handle_packet( + decoder: &mut ffmpeg::decoder::Audio, + mut source: filter::Source, + mut sink: filter::Sink, + packet: ffmpeg::Packet, +) -> anyhow::Result<()> { + let mut in_frame = unsafe { ffmpeg::Frame::empty() }; + decoder.send_packet(&packet)?; + decoder.receive_frame(&mut in_frame)?; + source.add(&in_frame)?; + + let mut out_frame = unsafe { ffmpeg::Frame::empty() }; + sink.frame(&mut out_frame)?; + + if let Some(text) = out_frame.metadata().get("lavfi.whisper.text") { + println!("{}", text); + } + + Ok(()) +} diff --git a/src/subtitle_extractor.rs b/src/subtitle_extractor.rs deleted file mode 100644 index b628d73..0000000 --- a/src/subtitle_extractor.rs +++ /dev/null @@ -1,209 +0,0 @@ -use std::collections::BTreeMap; - -use anyhow::Result; - -use ffmpeg::Rational; -use log::{debug, error, info}; -use relm4::{ComponentSender, SharedState, Worker}; - -pub type StreamIndex = usize; - -#[derive(Debug, Clone)] -pub struct SubtitleCue { - pub start: gst::ClockTime, - pub end: gst::ClockTime, - pub text: String, -} - -#[derive(Debug, Clone)] -pub struct SubtitleTrack { - pub language: Option<isolang::Language>, - pub title: Option<String>, - pub cues: Vec<SubtitleCue>, -} - -pub static TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new(); - -pub struct SubtitleExtractor {} - -#[derive(Debug)] -pub enum SubtitleExtractorMsg { - ExtractFromUrl(String), -} - -#[derive(Debug)] -pub enum SubtitleExtractorOutput { - NewOrUpdatedTrackMetadata(StreamIndex), - NewCue(StreamIndex, SubtitleCue), - ExtractionComplete, -} - -impl Worker for SubtitleExtractor { - type Init = (); - type Input = SubtitleExtractorMsg; - type Output = SubtitleExtractorOutput; - - fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self { - Self {} - } - - fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) { - match msg { - SubtitleExtractorMsg::ExtractFromUrl(url) => { - self.handle_extract_from_url(url, sender); - } - } - } -} - -impl SubtitleExtractor { - fn handle_extract_from_url(&mut self, url: String, sender: ComponentSender<Self>) { - // Clear existing tracks - TRACKS.write().clear(); - - // Try to extract subtitles using ffmpeg - match self.extract_subtitles_ffmpeg(&url, &sender) { - Ok(_) => { - info!("Subtitle extraction completed successfully"); - sender - .output(SubtitleExtractorOutput::ExtractionComplete) - .unwrap(); - } - Err(e) => { - error!("FFmpeg extraction failed: {}", e); - } - } - } - - fn extract_subtitles_ffmpeg(&self, url: &str, sender: &ComponentSender<Self>) -> Result<()> { - let mut input = ffmpeg::format::input(&url)?; - - let mut subtitle_decoders = BTreeMap::new(); - - // create decoder for each subtitle stream - for (stream_index, stream) in input.streams().enumerate() { - if stream.parameters().medium() == ffmpeg::media::Type::Subtitle { - let language_code = stream.metadata().get("language").map(|s| s.to_string()); - let title = stream.metadata().get("title").map(|s| s.to_string()); - - let track = SubtitleTrack { - language: language_code.and_then(|code| isolang::Language::from_639_2b(&code)), - title, - cues: Vec::new(), - }; - - TRACKS.write().insert(stream_index, track); - - sender - .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata( - stream_index, - )) - .unwrap(); - - let context = - ffmpeg::codec::context::Context::from_parameters(stream.parameters())?; - if let Ok(decoder) = context.decoder().subtitle() { - subtitle_decoders.insert(stream_index, decoder); - debug!("Created decoder for subtitle stream {}", stream_index); - } else { - error!( - "Failed to create decoder for subtitle stream {}", - stream_index - ); - } - } - } - - // process packets - for (stream, packet) in input.packets() { - let stream_index = stream.index(); - - if let Some(decoder) = subtitle_decoders.get_mut(&stream_index) { - let mut subtitle = ffmpeg::Subtitle::new(); - if decoder.decode(&packet, &mut subtitle).is_ok() { - if let Some(cue) = Self::subtitle_to_cue(&subtitle, &packet, stream.time_base()) - { - if let Some(track) = TRACKS.write().get_mut(&stream_index) { - track.cues.push(cue.clone()); - } - - sender - .output(SubtitleExtractorOutput::NewCue(stream_index, cue)) - .unwrap(); - } - } - } - } - - Ok(()) - } - - fn subtitle_to_cue( - subtitle: &ffmpeg::Subtitle, - packet: &ffmpeg::Packet, - time_base: Rational, - ) -> Option<SubtitleCue> { - let time_to_clock_time = |time: i64| { - let nseconds: i64 = (time * time_base.numerator() as i64 * 1_000_000_000) - / time_base.denominator() as i64; - gst::ClockTime::from_nseconds(nseconds as u64) - }; - - let text = subtitle - .rects() - .into_iter() - .map(|rect| match rect { - ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(), - ffmpeg::subtitle::Rect::Ass(ass) => { - Self::extract_dialogue_text(ass.get()).unwrap_or(String::new()) - } - _ => String::new(), - }) - .collect::<Vec<String>>() - .join("\n— "); - - let start = time_to_clock_time(packet.pts()?); - let end = time_to_clock_time(packet.pts()? + packet.duration()); - - Some(SubtitleCue { start, end, text }) - } - - fn extract_dialogue_text(dialogue_line: &str) -> Option<String> { - // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text - // we need the 9th field (Text), so split on comma but only take first 9 splits - // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433 - let text = dialogue_line.splitn(9, ',').last()?; - - // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc. - let mut result = String::new(); - let mut in_tag = false; - let mut char_iter = text.chars().peekable(); - - while let Some(c) = char_iter.next() { - if c == '{' && char_iter.peek() == Some(&'\\') { - in_tag = true; - } else if c == '}' { - in_tag = false; - } else if !in_tag { - // process line breaks and hard spaces - if c == '\\' { - match char_iter.peek() { - Some(&'N') => { - char_iter.next(); - result.push('\n'); - } - Some(&'n') | Some(&'h') => { - char_iter.next(); - result.push(' '); - } - _ => result.push(c), - } - } else { - result.push(c); - } - } - } - - Some(result) - } -} diff --git a/src/subtitle_extractor_aishit.rs b/src/subtitle_extractor_aishit.rs new file mode 100644 index 0000000..c615f6c --- /dev/null +++ b/src/subtitle_extractor_aishit.rs @@ -0,0 +1,732 @@ +use std::collections::BTreeMap; + +use anyhow::Result; + +use ffmpeg::Rational; +use log::{debug, error, info, warn}; +use relm4::{ComponentSender, SharedState, Worker}; + +pub type StreamIndex = usize; + +#[derive(Debug, Clone)] +pub struct SubtitleCue { + pub start: gst::ClockTime, + pub end: gst::ClockTime, + pub text: String, +} + +#[derive(Debug, Clone)] +pub struct SubtitleTrack { + pub language: Option<isolang::Language>, + pub title: Option<String>, + pub cues: Vec<SubtitleCue>, + pub is_generated: bool, // true if generated from audio +} + +pub static TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new(); + +pub struct SubtitleExtractor {} + +#[derive(Debug)] +pub enum SubtitleExtractorMsg { + ExtractFromUrl(String), +} + +#[derive(Debug)] +pub enum SubtitleExtractorOutput { + NewOrUpdatedTrackMetadata(StreamIndex), + NewCue(StreamIndex, SubtitleCue), + ExtractionComplete, +} + +impl Worker for SubtitleExtractor { + type Init = (); + type Input = SubtitleExtractorMsg; + type Output = SubtitleExtractorOutput; + + fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self { + Self {} + } + + fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) { + match msg { + SubtitleExtractorMsg::ExtractFromUrl(url) => { + self.handle_extract_from_url(url, sender); + } + } + } +} + +impl SubtitleExtractor { + fn handle_extract_from_url(&mut self, url: String, sender: ComponentSender<Self>) { + // Clear existing tracks + TRACKS.write().clear(); + + // Try to extract subtitles using ffmpeg + match self.extract_subtitles_ffmpeg(&url, &sender) { + Ok(_) => { + info!("Subtitle extraction completed successfully"); + sender + .output(SubtitleExtractorOutput::ExtractionComplete) + .unwrap(); + } + Err(e) => { + error!("FFmpeg extraction failed: {}", e); + } + } + } + + fn extract_subtitles_ffmpeg(&self, url: &str, sender: &ComponentSender<Self>) -> Result<()> { + info!("Starting subtitle extraction from: {}", url); + let mut input = ffmpeg::format::input(&url)?; + + // Log input format info + info!( + "Input format: {} ({} streams)", + input.format().name(), + input.streams().count() + ); + + // Check if whisper filter is available + if let Some(whisper_filter) = ffmpeg::filter::find("whisper") { + info!("Whisper filter found: {}", whisper_filter.name()); + } else { + warn!("Whisper filter not found - audio transcription will be skipped"); + } + + let mut subtitle_decoders = BTreeMap::new(); + let mut audio_decoder: Option<ffmpeg::decoder::Audio> = None; + let mut _whisper_filter_graph: Option<ffmpeg::filter::Graph> = None; + let mut whisper_source: Option<ffmpeg::filter::Context> = None; + let mut whisper_sink: Option<ffmpeg::filter::Context> = None; + let mut best_audio_stream_index: Option<usize> = None; + + // Find best audio stream for whisper processing + if let Some(audio_stream) = input.streams().best(ffmpeg::media::Type::Audio) { + best_audio_stream_index = Some(audio_stream.index()); + + // Get audio parameters safely + let codec_id = audio_stream.parameters().id(); + let channels = if let Ok(context) = + ffmpeg::codec::context::Context::from_parameters(audio_stream.parameters()) + { + if let Ok(audio) = context.decoder().audio() { + audio.channels() + } else { + 0 + } + } else { + 0 + }; + let sample_rate = if let Ok(context) = + ffmpeg::codec::context::Context::from_parameters(audio_stream.parameters()) + { + if let Ok(audio) = context.decoder().audio() { + audio.rate() + } else { + 0 + } + } else { + 0 + }; + + info!( + "Found best audio stream: index {} (codec: {:?}, channels: {}, sample_rate: {})", + audio_stream.index(), + codec_id, + channels, + sample_rate + ); + } else { + info!("No audio stream found for whisper processing"); + } + + // Set up whisper filter graph if we found an audio stream + if let Some(audio_index) = best_audio_stream_index { + info!("Setting up whisper filter for audio stream {}", audio_index); + + let audio_stream = input.stream(audio_index).unwrap(); + if let Ok(context) = + ffmpeg::codec::context::Context::from_parameters(audio_stream.parameters()) + { + if let Ok(decoder) = context.decoder().audio() { + // Get decoder properties before moving it + let decoder_rate = decoder.rate(); + let decoder_format = decoder.format(); + let decoder_channel_layout = decoder.channel_layout().bits(); + + audio_decoder = Some(decoder); + + // Set up whisper filter graph + debug!("Creating whisper filter graph..."); + debug!( + "Audio stream time_base: {}, decoder rate: {}, format: {:?}, channel_layout: 0x{:x}", + audio_stream.time_base(), + decoder_rate, + decoder_format, + decoder_channel_layout + ); + match self.setup_whisper_filter(&audio_stream) { + Ok((graph, source, sink)) => { + info!("Whisper filter graph created successfully"); + _whisper_filter_graph = Some(graph); + whisper_source = Some(source); + whisper_sink = Some(sink); + debug!("Whisper source and sink contexts stored"); + + // Create a generated subtitle track + let track = SubtitleTrack { + language: Some(isolang::Language::from_639_1("en").unwrap_or_else( + || isolang::Language::from_639_3("eng").unwrap(), + )), + title: Some("Generated from Audio (Whisper)".to_string()), + cues: Vec::new(), + is_generated: true, + }; + + let whisper_stream_index = 1000; // Use high index for generated tracks + TRACKS.write().insert(whisper_stream_index, track); + + sender + .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata( + whisper_stream_index, + )) + .unwrap(); + } + Err(e) => { + error!("Failed to setup whisper filter: {}", e); + debug!("Whisper filter error details: {:?}", e); + warn!( + "Audio transcription will be skipped due to filter setup failure" + ); + } + } + } + } + } + + // Create decoder for each subtitle stream + for (stream_index, stream) in input.streams().enumerate() { + if stream.parameters().medium() == ffmpeg::media::Type::Subtitle { + let language_code = stream.metadata().get("language").map(|s| s.to_string()); + let title = stream.metadata().get("title").map(|s| s.to_string()); + + let track = SubtitleTrack { + language: language_code.and_then(|code| isolang::Language::from_639_2b(&code)), + title, + cues: Vec::new(), + is_generated: false, + }; + + TRACKS.write().insert(stream_index, track); + + sender + .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata( + stream_index, + )) + .unwrap(); + + let context = + ffmpeg::codec::context::Context::from_parameters(stream.parameters())?; + if let Ok(decoder) = context.decoder().subtitle() { + subtitle_decoders.insert(stream_index, decoder); + debug!("Created decoder for subtitle stream {}", stream_index); + } else { + error!( + "Failed to create decoder for subtitle stream {}", + stream_index + ); + } + } else { + debug!( + "Failed to create context for subtitle stream {}", + stream_index + ); + } + } + + // Process packets + for (stream, packet) in input.packets() { + let stream_index = stream.index(); + + // Process subtitle packets + if let Some(decoder) = subtitle_decoders.get_mut(&stream_index) { + let mut subtitle = ffmpeg::Subtitle::new(); + if decoder.decode(&packet, &mut subtitle).is_ok() { + if let Some(cue) = Self::subtitle_to_cue(&subtitle, &packet, stream.time_base()) + { + if let Some(track) = TRACKS.write().get_mut(&stream_index) { + track.cues.push(cue.clone()); + } + + sender + .output(SubtitleExtractorOutput::NewCue(stream_index, cue)) + .unwrap(); + } + } + } + + // Process audio packets for whisper + if Some(stream_index) == best_audio_stream_index { + debug!( + "Processing audio packet for whisper (stream: {}, pts: {:?}, duration: {:?})", + stream_index, + packet.pts(), + packet.duration() + ); + debug!( + "Audio decoder available: {}, Whisper source available: {}", + audio_decoder.is_some(), + whisper_source.is_some() + ); + if let (Some(decoder), Some(source)) = (&mut audio_decoder, &mut whisper_source) { + debug!("Both audio decoder and whisper source are available, processing..."); + // Send packet to audio decoder + if let Err(e) = decoder.send_packet(&packet) { + debug!("Failed to send packet to audio decoder: {}", e); + } + + // Get decoded frames and send to whisper filter + let mut frame = unsafe { ffmpeg::Frame::empty() }; + let mut frame_count = 0; + while decoder.receive_frame(&mut frame).is_ok() { + frame_count += 1; + debug!( + "Decoded audio frame {} (pts: {:?})", + frame_count, + frame.pts() + ); + + // Add frame to whisper filter + if let Err(e) = source.source().add(&frame) { + error!("Failed to add frame to whisper filter: {}", e); + } else { + debug!("Successfully added frame to whisper filter"); + } + + // Check for whisper output after adding each frame + if let Some(sink) = &mut whisper_sink { + self.check_whisper_output(sink, sender)?; + } + } + if frame_count > 0 { + debug!("Processed {} audio frames for whisper", frame_count); + } + } else { + debug!("Skipping audio packet - decoder or whisper source not available"); + } + } + } + + // Flush audio decoder and whisper filter + if let (Some(decoder), Some(source), Some(sink)) = + (&mut audio_decoder, &mut whisper_source, &mut whisper_sink) + { + info!("Flushing audio decoder and whisper filter..."); + // Flush decoder + if let Err(e) = decoder.send_eof() { + debug!("Failed to send EOF to decoder: {}", e); + } + let mut frame = unsafe { ffmpeg::Frame::empty() }; + let mut final_frame_count = 0; + while decoder.receive_frame(&mut frame).is_ok() { + final_frame_count += 1; + source.source().add(&frame).ok(); + } + debug!("Flushed {} final frames from decoder", final_frame_count); + + // Flush filter and get results + debug!("Flushing whisper filter..."); + if let Err(e) = source.source().flush() { + error!("Failed to flush whisper filter: {}", e); + } + + info!("Processing final whisper filter output..."); + self.check_whisper_output(sink, sender)?; + } + + Ok(()) + } + + fn setup_whisper_filter( + &self, + audio_stream: &ffmpeg::Stream, + ) -> Result<( + ffmpeg::filter::Graph, + ffmpeg::filter::Context, + ffmpeg::filter::Context, + )> { + debug!("Setting up whisper filter graph..."); + let mut filter_graph = ffmpeg::filter::Graph::new(); + debug!("Filter graph created successfully"); + + // Get audio parameters + debug!("Getting audio parameters..."); + let time_base = audio_stream.time_base(); + let audio_params = audio_stream.parameters(); + debug!("Creating context from parameters..."); + let context = ffmpeg::codec::context::Context::from_parameters(audio_params)?; + debug!("Getting audio decoder from context..."); + let audio_decoder = context.decoder().audio()?; + debug!("Audio decoder created successfully"); + + // Create buffer source + let buffer_args = format!( + "time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}", + time_base, + audio_decoder.rate(), + audio_decoder.format().name(), + audio_decoder.channel_layout().bits() + ); + debug!("Buffer args: {}", buffer_args); + + debug!("Looking for abuffer filter..."); + let abuffer_filter = ffmpeg::filter::find("abuffer") + .ok_or_else(|| anyhow::anyhow!("abuffer filter not found"))?; + debug!("abuffer filter found: {}", abuffer_filter.name()); + + debug!("Adding abuffer filter..."); + match filter_graph.add(&abuffer_filter, "src", &buffer_args) { + Ok(_) => debug!("abuffer filter added successfully"), + Err(e) => { + error!("Failed to add abuffer filter: {}", e); + return Err(anyhow::anyhow!("Failed to add abuffer filter: {}", e)); + } + } + + // Create whisper filter with parameters + // Try absolute path and different parameter formats + let model_path = std::path::Path::new("./whisper-models/ggml-large-v3.bin"); + let absolute_path = if model_path.exists() { + model_path + .canonicalize() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_else(|_| "./whisper-models/ggml-large-v3.bin".to_string()) + } else { + warn!("Whisper model file not found at: {:?}", model_path); + "./whisper-models/ggml-large-v3.bin".to_string() + }; + + debug!("Model path exists: {}", model_path.exists()); + debug!("Using absolute path: {}", absolute_path); + + debug!("Looking for whisper filter..."); + let whisper_filter = ffmpeg::filter::find("whisper").ok_or_else(|| { + error!("Whisper filter not found! Make sure FFmpeg was compiled with whisper support"); + anyhow::anyhow!("Whisper filter not available") + })?; + + debug!("Whisper filter found: {}", whisper_filter.name()); + // We'll create the whisper filter through the parse method instead of adding it manually + + // Create audio buffer sink for whisper output (whisper outputs audio + metadata) + debug!("Looking for abuffersink filter for audio output..."); + let abuffersink_filter = ffmpeg::filter::find("abuffersink") + .ok_or_else(|| anyhow::anyhow!("abuffersink filter not found"))?; + debug!("abuffersink filter found: {}", abuffersink_filter.name()); + + debug!("Adding abuffersink filter..."); + match filter_graph.add(&abuffersink_filter, "sink", "") { + Ok(_) => debug!("abuffersink filter added successfully"), + Err(e) => { + error!("Failed to add abuffersink filter: {}", e); + return Err(anyhow::anyhow!("Failed to add abuffersink filter: {}", e)); + } + } + + // Connect filters using the complete filter chain description + debug!("Connecting filter graph with complete chain: src -> whisper -> sink"); + + let filter_chain = format!( + "[src]whisper=model={}:queue=30:format=json[sink]", + "/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin", + //"/Users/malte/repos/lleap/whisper-models/ggml-silero-v5.1.2.bin" + ); + debug!("Using filter chain: {}", filter_chain); + + if let Err(e) = filter_graph + .output("src", 0) + .and_then(|o| o.input("sink", 0)) + .and_then(|i| i.parse(&filter_chain)) + { + error!("Failed to connect filter graph: {}", e); + return Err(anyhow::anyhow!("Failed to connect filter graph: {}", e)); + } + debug!("Filter graph connected successfully"); + + // Validate filter graph + debug!("Validating filter graph..."); + match filter_graph.validate() { + Ok(_) => { + info!("Filter graph validated successfully"); + debug!("Filter graph dump:\n{}", filter_graph.dump()); + } + Err(e) => { + error!("Filter graph validation failed: {}", e); + debug!( + "Filter graph dump before validation failure:\n{}", + filter_graph.dump() + ); + return Err(anyhow::anyhow!("Filter graph validation failed: {}", e)); + } + } + + debug!("Getting final source and sink contexts..."); + let source_ctx = filter_graph + .get("src") + .ok_or_else(|| anyhow::anyhow!("Source context not found"))?; + let sink_ctx = filter_graph + .get("sink") + .ok_or_else(|| anyhow::anyhow!("Sink context not found"))?; + debug!("Final contexts retrieved successfully"); + + Ok((filter_graph, source_ctx, sink_ctx)) + } + + fn check_whisper_output( + &self, + sink: &mut ffmpeg::filter::Context, + sender: &ComponentSender<Self>, + ) -> Result<()> { + debug!("Attempting to read audio frames from whisper filter output..."); + + // The whisper filter outputs audio frames with subtitle data in "lavfi.whisper.text" metadata + let mut frame = unsafe { ffmpeg::Frame::empty() }; + let mut output_count = 0; + + while sink.sink().frame(&mut frame).is_ok() { + output_count += 1; + debug!( + "Received audio frame {} from whisper filter (pts: {:?})", + output_count, + frame.pts() + ); + + // Look specifically for lavfi.whisper.text metadata + if let Some(whisper_text) = frame.metadata().get("lavfi.whisper.text") { + info!("Found whisper transcription: {}", whisper_text); + + let start_time = if let Some(pts) = frame.pts() { + // Convert PTS to nanoseconds based on whisper filter's time base (16kHz) + gst::ClockTime::from_nseconds((pts as u64 * 1_000_000_000) / 16000) + } else { + gst::ClockTime::ZERO + }; + + // Log all available metadata keys to help debug + let metadata_entries: Vec<(String, String)> = frame + .metadata() + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + if !metadata_entries.is_empty() { + let metadata_keys: Vec<String> = + metadata_entries.iter().map(|(k, _)| k.clone()).collect(); + debug!("Frame metadata keys: {:?}", metadata_keys); + } + + // Parse the whisper text (might be JSON format) + self.parse_whisper_text(whisper_text, start_time, sender)?; + } + } + + if output_count > 0 { + info!("Processed {} frames from whisper filter", output_count); + } else { + debug!("No frames available from whisper filter"); + } + + Ok(()) + } + + fn parse_whisper_text( + &self, + whisper_text: &str, + base_time: gst::ClockTime, + sender: &ComponentSender<Self>, + ) -> Result<()> { + debug!("Parsing whisper text: {}", whisper_text); + + // The whisper text might be in different formats depending on the filter configuration + // For now, treat it as plain text and create a single cue + let cue = SubtitleCue { + start: base_time, + end: base_time + gst::ClockTime::from_seconds(3), // Default 3 second duration + text: whisper_text.to_string(), + }; + + let whisper_stream_index = 1000; + if let Some(track) = TRACKS.write().get_mut(&whisper_stream_index) { + track.cues.push(cue.clone()); + } + + sender + .output(SubtitleExtractorOutput::NewCue(whisper_stream_index, cue)) + .unwrap(); + + Ok(()) + } + + fn parse_whisper_subtitle_data( + &self, + subtitle_data: &str, + sender: &ComponentSender<Self>, + ) -> Result<()> { + // Parse SRT-format output from whisper + info!( + "Parsing whisper subtitle data ({} characters)", + subtitle_data.len() + ); + debug!("Subtitle data content:\n{}", subtitle_data); + let lines: Vec<&str> = subtitle_data.lines().collect(); + let mut i = 0; + + while i < lines.len() { + // Skip subtitle number + if lines[i].trim().parse::<i32>().is_ok() { + i += 1; + } + + // Parse timestamp line + if i < lines.len() { + if let Some((start, end)) = self.parse_srt_timestamp(lines[i]) { + i += 1; + + // Collect text lines + let mut text_lines = Vec::new(); + while i < lines.len() && !lines[i].trim().is_empty() { + text_lines.push(lines[i].to_string()); + i += 1; + } + + if !text_lines.is_empty() { + let cue = SubtitleCue { + start, + end, + text: text_lines.join("\n"), + }; + + let whisper_stream_index = 1000; + if let Some(track) = TRACKS.write().get_mut(&whisper_stream_index) { + track.cues.push(cue.clone()); + } + + sender + .output(SubtitleExtractorOutput::NewCue(whisper_stream_index, cue)) + .unwrap(); + } + } + } + i += 1; + } + + Ok(()) + } + + fn parse_srt_timestamp(&self, line: &str) -> Option<(gst::ClockTime, gst::ClockTime)> { + // Parse SRT timestamp format: "00:00:01,234 --> 00:00:05,678" + let parts: Vec<&str> = line.split(" --> ").collect(); + if parts.len() != 2 { + return None; + } + + let start = self.parse_srt_time(parts[0])?; + let end = self.parse_srt_time(parts[1])?; + + Some((start, end)) + } + + fn parse_srt_time(&self, time_str: &str) -> Option<gst::ClockTime> { + // Parse SRT time format: "00:00:01,234" + let parts: Vec<&str> = time_str.split(',').collect(); + if parts.len() != 2 { + return None; + } + + let time_part = parts[0]; + let millis: u32 = parts[1].parse().ok()?; + + let time_components: Vec<&str> = time_part.split(':').collect(); + if time_components.len() != 3 { + return None; + } + + let hours: u32 = time_components[0].parse().ok()?; + let minutes: u32 = time_components[1].parse().ok()?; + let seconds: u32 = time_components[2].parse().ok()?; + + let total_millis = hours * 3600000 + minutes * 60000 + seconds * 1000 + millis; + let nanoseconds = total_millis as u64 * 1_000_000; + + Some(gst::ClockTime::from_nseconds(nanoseconds)) + } + + fn subtitle_to_cue( + subtitle: &ffmpeg::Subtitle, + packet: &ffmpeg::Packet, + time_base: Rational, + ) -> Option<SubtitleCue> { + let time_to_clock_time = |time: i64| { + let nseconds: i64 = (time * time_base.numerator() as i64 * 1_000_000_000) + / time_base.denominator() as i64; + gst::ClockTime::from_nseconds(nseconds as u64) + }; + + let text = subtitle + .rects() + .into_iter() + .map(|rect| match rect { + ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(), + ffmpeg::subtitle::Rect::Ass(ass) => { + Self::extract_dialogue_text(ass.get()).unwrap_or(String::new()) + } + _ => String::new(), + }) + .collect::<Vec<String>>() + .join("\n— "); + + let start = time_to_clock_time(packet.pts()?); + let end = time_to_clock_time(packet.pts()? + packet.duration()); + + Some(SubtitleCue { start, end, text }) + } + + fn extract_dialogue_text(dialogue_line: &str) -> Option<String> { + // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text + // we need the 9th field (Text), so split on comma but only take first 9 splits + // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433 + let text = dialogue_line.splitn(9, ',').last()?; + + // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc. + let mut result = String::new(); + let mut in_tag = false; + let mut char_iter = text.chars().peekable(); + + while let Some(c) = char_iter.next() { + if c == '{' && char_iter.peek() == Some(&'\\') { + in_tag = true; + } else if c == '}' { + in_tag = false; + } else if !in_tag { + // process line breaks and hard spaces + if c == '\\' { + match char_iter.peek() { + Some(&'N') => { + char_iter.next(); + result.push('\n'); + } + Some(&'n') | Some(&'h') => { + char_iter.next(); + result.push(' '); + } + _ => result.push(c), + } + } else { + result.push(c); + } + } + } + + Some(result) + } +} diff --git a/src/subtitle_selection_dialog.rs b/src/subtitle_selection_dialog.rs index 0c7f1cd..6136d56 100644 --- a/src/subtitle_selection_dialog.rs +++ b/src/subtitle_selection_dialog.rs @@ -1,63 +1,18 @@ use adw::prelude::*; -use gtk::{gio, glib}; +use gtk::gio; use relm4::prelude::*; -use crate::subtitle_extractor::{StreamIndex, TRACKS}; -use crate::util::Tracker; - -// Custom GObject wrapper for subtitle track information -glib::wrapper! { - pub struct SubtitleTrackInfo(ObjectSubclass<imp::SubtitleTrackInfo>); -} - -impl SubtitleTrackInfo { - pub fn new( - stream_index: StreamIndex, - language: Option<&'static str>, - title: Option<String>, - ) -> Self { - glib::Object::builder() - .property("stream-index", stream_index as i64) - .property("language", language.unwrap_or_default()) - .property("title", title.unwrap_or_default()) - .build() - } - - pub fn get_stream_index(&self) -> StreamIndex { - let index: i64 = self.property("stream-index"); - index as usize - } -} - -mod imp { - use gtk::{glib, prelude::*, subclass::prelude::*}; - use std::cell::RefCell; - - #[derive(Default, glib::Properties)] - #[properties(wrapper_type = super::SubtitleTrackInfo)] - pub struct SubtitleTrackInfo { - #[property(get, set)] - stream_index: RefCell<i64>, - #[property(get, set)] - language: RefCell<String>, - #[property(get, set)] - title: RefCell<String>, - } - - #[glib::object_subclass] - impl ObjectSubclass for SubtitleTrackInfo { - const NAME: &'static str = "SubtitleTrackInfo"; - type Type = super::SubtitleTrackInfo; - } - - #[glib::derived_properties] - impl ObjectImpl for SubtitleTrackInfo {} -} +use crate::track_selector::{ + TrackInfo, TrackSelector, TrackSelectorInit, TrackSelectorMsg, TrackSelectorOutput, +}; +use crate::tracks::{SUBTITLE_TRACKS, StreamIndex}; pub struct SubtitleSelectionDialog { parent_window: adw::ApplicationWindow, dialog: adw::PreferencesDialog, - track_list_model: Tracker<gio::ListStore>, + track_list_model: gio::ListStore, + primary_selector: Controller<TrackSelector>, + secondary_selector: Controller<TrackSelector>, primary_track_ix: Option<StreamIndex>, secondary_track_ix: Option<StreamIndex>, } @@ -91,79 +46,10 @@ impl SimpleComponent for SubtitleSelectionDialog { #[name(page)] adw::PreferencesPage { adw::PreferencesGroup { - #[name(primary_combo)] - adw::ComboRow { - set_title: "Primary Subtitle Track", - set_subtitle: "Main subtitle track for learning", - set_factory: Some(&track_factory), - #[track(model.track_list_model.is_dirty())] - set_model: Some(model.track_list_model.get()), - #[track(model.track_list_model.is_dirty())] - set_selected: model.primary_track_ix.map_or(gtk::INVALID_LIST_POSITION, |ix| get_list_ix_from_stream_ix(model.track_list_model.get(), ix)), - connect_selected_notify[sender] => move |combo| { - let stream_index = get_stream_ix_from_combo(combo); - sender.input(SubtitleSelectionDialogMsg::PrimaryTrackChanged(stream_index)); - }, - }, - - #[name(secondary_combo)] - adw::ComboRow { - set_title: "Secondary Subtitle Track", - set_subtitle: "Optional second track for comparison", - set_factory: Some(&track_factory), - #[track(model.track_list_model.is_dirty())] - set_model: Some(model.track_list_model.get()), - #[track(model.track_list_model.is_dirty())] - set_selected: model.secondary_track_ix.map_or(gtk::INVALID_LIST_POSITION, |ix| get_list_ix_from_stream_ix(model.track_list_model.get(), ix)), - connect_selected_notify[sender] => move |combo| { - let stream_index = get_stream_ix_from_combo(combo); - sender.input(SubtitleSelectionDialogMsg::SecondaryTrackChanged(stream_index)); - }, - }, + model.primary_selector.widget(), + model.secondary_selector.widget(), } }, - - #[name(track_factory)] - gtk::SignalListItemFactory { - connect_setup => move |_, list_item| { - let list_item = list_item.downcast_ref::<gtk::ListItem>().unwrap(); - let vbox = gtk::Box::new(gtk::Orientation::Vertical, 0); - - let language_label = gtk::Label::new(None); - language_label.set_halign(gtk::Align::Start); - language_label.set_ellipsize(gtk::pango::EllipsizeMode::End); - - let title_label = gtk::Label::new(None); - title_label.set_halign(gtk::Align::Start); - title_label.set_ellipsize(gtk::pango::EllipsizeMode::End); - title_label.add_css_class("subtitle"); - - vbox.append(&language_label); - vbox.append(&title_label); - list_item.set_child(Some(&vbox)); - }, - connect_bind => move |_, list_item| { - let list_item = list_item.downcast_ref::<gtk::ListItem>().unwrap(); - let item = list_item.item().unwrap(); - let track_info = item.downcast_ref::<SubtitleTrackInfo>().unwrap(); - let vbox = list_item.child().unwrap().downcast::<gtk::Box>().unwrap(); - let language_label = vbox.first_child().unwrap().downcast::<gtk::Label>().unwrap(); - let title_label = vbox.last_child().unwrap().downcast::<gtk::Label>().unwrap(); - - let language = track_info.language(); - let title = track_info.title(); - - let language_text = if !language.is_empty() { - &language - } else { - "Unknown Language" - }; - - language_label.set_text(&language_text); - title_label.set_text(&title); - title_label.set_visible(!title.is_empty()); - }, - }, } fn init( @@ -171,12 +57,33 @@ impl SimpleComponent for SubtitleSelectionDialog { root: Self::Root, sender: ComponentSender<Self>, ) -> ComponentParts<Self> { - let track_list_model = gio::ListStore::new::<SubtitleTrackInfo>(); + let primary_selector = TrackSelector::builder() + .launch(TrackSelectorInit { + title: "Primary subtitle track", + subtitle: Some("Select your target language here"), + }) + .forward(sender.input_sender(), |output| match output { + TrackSelectorOutput::Changed(ix) => { + SubtitleSelectionDialogMsg::PrimaryTrackChanged(ix) + } + }); + let secondary_selector = TrackSelector::builder() + .launch(TrackSelectorInit { + title: "Secondary subtitle track", + subtitle: Some("Pick a language you already know"), + }) + .forward(sender.input_sender(), |output| match output { + TrackSelectorOutput::Changed(ix) => { + SubtitleSelectionDialogMsg::SecondaryTrackChanged(ix) + } + }); let model = Self { parent_window, dialog: root.clone(), - track_list_model: Tracker::new(track_list_model), + track_list_model: gio::ListStore::new::<TrackInfo>(), + primary_selector, + secondary_selector, primary_track_ix: None, secondary_track_ix: None, }; @@ -187,11 +94,23 @@ impl SimpleComponent for SubtitleSelectionDialog { } fn update(&mut self, msg: Self::Input, sender: ComponentSender<Self>) { - self.track_list_model.reset(); - match msg { SubtitleSelectionDialogMsg::Show => { - self.update_combo_models(); + self.update_track_list_model(); + + self.primary_selector + .sender() + .send(TrackSelectorMsg::SetListModel( + self.track_list_model.clone(), + )) + .unwrap(); + self.secondary_selector + .sender() + .send(TrackSelectorMsg::SetListModel( + self.track_list_model.clone(), + )) + .unwrap(); + self.dialog.present(Some(&self.parent_window)); } SubtitleSelectionDialogMsg::PrimaryTrackChanged(stream_index) => { @@ -215,43 +134,20 @@ impl SimpleComponent for SubtitleSelectionDialog { } impl SubtitleSelectionDialog { - fn update_combo_models(&mut self) { - let tracks = TRACKS.read(); + fn update_track_list_model(&mut self) { + let tracks = SUBTITLE_TRACKS.read(); // Clear previous entries - self.track_list_model.get_mut().remove_all(); + self.track_list_model.remove_all(); // Add all available tracks for (&stream_index, track) in tracks.iter() { - let track_info = SubtitleTrackInfo::new( + let track_info = TrackInfo::new( stream_index, - track.language.map(|lang| lang.to_name()), - track.title.clone(), + track.metadata.language.map(|lang| lang.to_name()), + track.metadata.title.clone(), ); - self.track_list_model.get_mut().append(&track_info); - } - } -} - -fn get_stream_ix_from_combo(combo: &adw::ComboRow) -> Option<StreamIndex> { - let ix = combo - .selected_item()? - .downcast_ref::<SubtitleTrackInfo>() - .unwrap() - .get_stream_index(); - - Some(ix) -} - -fn get_list_ix_from_stream_ix(list_model: &gio::ListStore, stream_ix: StreamIndex) -> u32 { - for i in 0..list_model.n_items() { - if let Some(item) = list_model.item(i) { - if let Some(track_info) = item.downcast_ref::<SubtitleTrackInfo>() { - if track_info.get_stream_index() == stream_ix { - return i; - } - } + self.track_list_model.append(&track_info); } } - panic!("Stream index {} not found in list model", stream_ix); } diff --git a/src/subtitle_view.rs b/src/subtitle_view.rs index dc48561..50494b8 100644 --- a/src/subtitle_view.rs +++ b/src/subtitle_view.rs @@ -1,6 +1,5 @@ use crate::cue_view::{CueView, CueViewMsg, CueViewOutput}; use crate::util::OptionTracker; -use gtk::glib; use gtk::prelude::*; use relm4::prelude::*; diff --git a/src/track_selector.rs b/src/track_selector.rs new file mode 100644 index 0000000..5c56e4d --- /dev/null +++ b/src/track_selector.rs @@ -0,0 +1,188 @@ +use adw::prelude::*; +use gtk::{gio, glib}; +use relm4::prelude::*; + +use crate::tracks::StreamIndex; + +glib::wrapper! { + pub struct TrackInfo(ObjectSubclass<imp::TrackInfo>); +} + +impl TrackInfo { + pub fn new( + stream_index: StreamIndex, + language: Option<&'static str>, + title: Option<String>, + ) -> Self { + glib::Object::builder() + .property("stream-index", stream_index as i64) + .property("language", language.unwrap_or_default()) + .property("title", title.unwrap_or_default()) + .build() + } + + pub fn get_stream_index(&self) -> StreamIndex { + let index: i64 = self.property("stream-index"); + index as usize + } +} + +mod imp { + use gtk::{glib, prelude::*, subclass::prelude::*}; + use std::cell::RefCell; + + #[derive(Default, glib::Properties)] + #[properties(wrapper_type = super::TrackInfo)] + pub struct TrackInfo { + #[property(get, set)] + stream_index: RefCell<i64>, + #[property(get, set)] + language: RefCell<String>, + #[property(get, set)] + title: RefCell<String>, + } + + #[glib::object_subclass] + impl ObjectSubclass for TrackInfo { + const NAME: &'static str = "TrackInfo"; + type Type = super::TrackInfo; + } + + #[glib::derived_properties] + impl ObjectImpl for TrackInfo {} +} + +pub struct TrackSelector { + track_list_model: gio::ListStore, + track_ix: Option<StreamIndex>, +} + +pub struct TrackSelectorInit { + pub title: &'static str, + pub subtitle: Option<&'static str>, +} + +#[derive(Debug)] +pub enum TrackSelectorMsg { + SetListModel(gio::ListStore), +} + +#[derive(Debug)] +pub enum TrackSelectorOutput { + Changed(Option<StreamIndex>), +} + +#[relm4::component(pub)] +impl SimpleComponent for TrackSelector { + type Init = TrackSelectorInit; + type Input = TrackSelectorMsg; + type Output = TrackSelectorOutput; + + view! { + #[root] + #[name(primary_combo)] + adw::ComboRow { + set_title: init.title, + set_subtitle?: init.subtitle, + set_factory: Some(&track_factory), + #[watch] + set_model: Some(&model.track_list_model), + #[watch] + set_selected: model.track_ix.map_or(gtk::INVALID_LIST_POSITION, |ix| get_list_ix_from_stream_ix(&model.track_list_model, ix)), + connect_selected_notify[sender] => move |combo| { + let stream_index = get_stream_ix_from_combo(combo); + sender.output(TrackSelectorOutput::Changed(stream_index)).unwrap(); + }, + }, + + #[name(track_factory)] + gtk::SignalListItemFactory { + connect_setup => move |_, list_item| { + let list_item = list_item.downcast_ref::<gtk::ListItem>().unwrap(); + let vbox = gtk::Box::new(gtk::Orientation::Vertical, 0); + + let language_label = gtk::Label::new(None); + language_label.set_halign(gtk::Align::Start); + language_label.set_ellipsize(gtk::pango::EllipsizeMode::End); + + let title_label = gtk::Label::new(None); + title_label.set_halign(gtk::Align::Start); + title_label.set_ellipsize(gtk::pango::EllipsizeMode::End); + title_label.add_css_class("subtitle"); + + vbox.append(&language_label); + vbox.append(&title_label); + list_item.set_child(Some(&vbox)); + }, + connect_bind => move |_, list_item| { + let list_item = list_item.downcast_ref::<gtk::ListItem>().unwrap(); + let item = list_item.item().unwrap(); + let track_info = item.downcast_ref::<TrackInfo>().unwrap(); + let vbox = list_item.child().unwrap().downcast::<gtk::Box>().unwrap(); + let language_label = vbox.first_child().unwrap().downcast::<gtk::Label>().unwrap(); + let title_label = vbox.last_child().unwrap().downcast::<gtk::Label>().unwrap(); + + let language = track_info.language(); + let title = track_info.title(); + + let language_text = if !language.is_empty() { + &language + } else { + "Unknown Language" + }; + + language_label.set_text(&language_text); + title_label.set_text(&title); + title_label.set_visible(!title.is_empty()); + }, + }, + } + + fn init( + init: Self::Init, + root: Self::Root, + sender: ComponentSender<Self>, + ) -> ComponentParts<Self> { + let track_list_model = gio::ListStore::new::<TrackInfo>(); + + let model = Self { + track_list_model: track_list_model, + track_ix: None, + }; + + let widgets = view_output!(); + + ComponentParts { model, widgets } + } + + fn update(&mut self, msg: Self::Input, _sender: ComponentSender<Self>) { + match msg { + TrackSelectorMsg::SetListModel(list_model) => { + self.track_list_model = list_model; + } + } + } +} + +fn get_stream_ix_from_combo(combo: &adw::ComboRow) -> Option<StreamIndex> { + let ix = combo + .selected_item()? + .downcast_ref::<TrackInfo>() + .unwrap() + .get_stream_index(); + + Some(ix) +} + +fn get_list_ix_from_stream_ix(list_model: &gio::ListStore, stream_ix: StreamIndex) -> u32 { + for i in 0..list_model.n_items() { + if let Some(item) = list_model.item(i) { + if let Some(track_info) = item.downcast_ref::<TrackInfo>() { + if track_info.get_stream_index() == stream_ix { + return i; + } + } + } + } + panic!("Stream index {} not found in list model", stream_ix); +} diff --git a/src/tracks.rs b/src/tracks.rs new file mode 100644 index 0000000..4d69e12 --- /dev/null +++ b/src/tracks.rs @@ -0,0 +1,38 @@ +use std::collections::BTreeMap; + +use relm4::SharedState; + +pub type StreamIndex = usize; + +#[derive(Debug, Clone)] +pub struct TrackMetadata { + pub language: Option<isolang::Language>, + pub title: Option<String>, +} + +#[derive(Debug, Clone)] +pub struct SubtitleTrack { + pub metadata: TrackMetadata, + pub cues: Vec<SubtitleCue>, +} + +#[derive(Debug, Clone)] +pub struct SubtitleCue { + pub start: gst::ClockTime, + pub end: gst::ClockTime, + pub text: String, +} + +pub static SUBTITLE_TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new(); + +impl TrackMetadata { + pub fn from_ffmpeg_stream(stream: &ffmpeg::Stream) -> Self { + let language_code = stream.metadata().get("language").map(|s| s.to_string()); + let title = stream.metadata().get("title").map(|s| s.to_string()); + + Self { + language: language_code.and_then(|code| isolang::Language::from_639_2b(&code)), + title, + } + } +} diff --git a/src/transcript.rs b/src/transcript.rs index eb3459d..a8ae554 100644 --- a/src/transcript.rs +++ b/src/transcript.rs @@ -1,7 +1,7 @@ use gtk::{ListBox, pango::WrapMode, prelude::*}; use relm4::prelude::*; -use crate::subtitle_extractor::{StreamIndex, SubtitleCue, TRACKS}; +use crate::tracks::{SUBTITLE_TRACKS, StreamIndex, SubtitleCue}; #[derive(Debug)] pub enum SubtitleCueOutput { @@ -122,7 +122,7 @@ impl SimpleComponent for Transcript { self.active_cues.guard().clear(); if let Some(stream_ix) = stream_index { - let tracks = TRACKS.read(); + let tracks = SUBTITLE_TRACKS.read(); if let Some(track) = tracks.get(&stream_ix) { for cue in &track.cues { self.active_cues.guard().push_back(cue.clone()); diff --git a/src/util/tracker.rs b/src/util/tracker.rs index 66c30a9..69a1c5f 100644 --- a/src/util/tracker.rs +++ b/src/util/tracker.rs @@ -24,6 +24,12 @@ impl<T> Tracker<T> { self.inner = value; } + /// Sets the inner value to `value` and marks the tracker as clean. + pub fn set_clean(&mut self, value: T) { + self.dirty = false; + self.inner = value; + } + pub fn is_dirty(&self) -> bool { self.dirty } |