aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMalte Voos <git@mal.tc>2025-11-14 15:30:49 +0100
committerMalte Voos <git@mal.tc>2025-11-14 15:30:49 +0100
commita8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e (patch)
tree542b42d3316138043272faba42e0d1005f8403b6
parenta42a73378b7c527a5e4600544b2d7a86d68c5aac (diff)
downloadlleap-a8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e.tar.gz
lleap-a8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e.zip
implement file/url open dialog
-rw-r--r--src/app.rs84
-rw-r--r--src/main.rs21
-rw-r--r--src/open_dialog.rs328
-rw-r--r--src/subtitle_extraction/embedded.rs118
-rw-r--r--src/subtitle_extraction/mod.rs159
-rw-r--r--src/subtitle_extraction/whisper.rs75
-rw-r--r--src/subtitle_extractor.rs209
-rw-r--r--src/subtitle_extractor_aishit.rs732
-rw-r--r--src/subtitle_selection_dialog.rs214
-rw-r--r--src/subtitle_view.rs1
-rw-r--r--src/track_selector.rs188
-rw-r--r--src/tracks.rs38
-rw-r--r--src/transcript.rs4
-rw-r--r--src/util/tracker.rs6
14 files changed, 1768 insertions, 409 deletions
diff --git a/src/app.rs b/src/app.rs
index 18f03e8..7aa5abd 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -2,26 +2,27 @@ use adw::prelude::*;
use relm4::{WorkerController, prelude::*};
use crate::{
+ open_dialog::{OpenDialog, OpenDialogMsg, OpenDialogOutput},
player::{Player, PlayerMsg, PlayerOutput},
preferences::{Preferences, PreferencesMsg},
- subtitle_extractor::{
- StreamIndex, SubtitleExtractor, SubtitleExtractorMsg, SubtitleExtractorOutput, TRACKS,
- },
+ subtitle_extraction::{SubtitleExtractor, SubtitleExtractorMsg, SubtitleExtractorOutput},
subtitle_selection_dialog::{
SubtitleSelectionDialog, SubtitleSelectionDialogMsg, SubtitleSelectionDialogOutput,
},
subtitle_view::{SubtitleView, SubtitleViewMsg, SubtitleViewOutput},
+ tracks::{SUBTITLE_TRACKS, StreamIndex, SubtitleCue},
transcript::{Transcript, TranscriptMsg, TranscriptOutput},
util::OptionTracker,
};
pub struct App {
- url: String,
transcript: Controller<Transcript>,
player: Controller<Player>,
subtitle_view: Controller<SubtitleView>,
extractor: WorkerController<SubtitleExtractor>,
+
preferences: Controller<Preferences>,
+ open_url_dialog: Controller<OpenDialog>,
subtitle_selection_dialog: Controller<SubtitleSelectionDialog>,
primary_stream_ix: Option<StreamIndex>,
@@ -37,20 +38,24 @@ pub struct App {
#[derive(Debug)]
pub enum AppMsg {
- NewOrUpdatedTrackMetadata(StreamIndex),
- NewCue(StreamIndex, crate::subtitle_extractor::SubtitleCue),
+ NewCue(StreamIndex, SubtitleCue),
SubtitleExtractionComplete,
PrimarySubtitleTrackSelected(Option<StreamIndex>),
SecondarySubtitleTrackSelected(Option<StreamIndex>),
PositionUpdate(gst::ClockTime),
SetHoveringSubtitleCue(bool),
+ ShowUrlOpenDialog,
ShowPreferences,
ShowSubtitleSelectionDialog,
+ Play {
+ url: String,
+ whisper_stream_index: Option<StreamIndex>,
+ },
}
#[relm4::component(pub)]
impl SimpleComponent for App {
- type Init = String;
+ type Init = ();
type Input = AppMsg;
type Output = ();
@@ -61,10 +66,13 @@ impl SimpleComponent for App {
set_default_width: 800,
set_default_height: 600,
- #[name(toolbar_view)]
adw::ToolbarView {
add_top_bar = &adw::HeaderBar {
pack_start = &gtk::Button {
+ set_label: "Open...",
+ connect_clicked => AppMsg::ShowUrlOpenDialog,
+ },
+ pack_end = &gtk::Button {
set_icon_name: "settings-symbolic",
connect_clicked => AppMsg::ShowPreferences,
}
@@ -86,7 +94,7 @@ impl SimpleComponent for App {
}
fn init(
- url: Self::Init,
+ _init: Self::Init,
root: Self::Root,
sender: ComponentSender<Self>,
) -> ComponentParts<Self> {
@@ -112,9 +120,6 @@ impl SimpleComponent for App {
let extractor = SubtitleExtractor::builder().detach_worker(()).forward(
sender.input_sender(),
|output| match output {
- SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(stream_index) => {
- AppMsg::NewOrUpdatedTrackMetadata(stream_index)
- }
SubtitleExtractorOutput::NewCue(stream_index, cue) => {
AppMsg::NewCue(stream_index, cue)
}
@@ -123,6 +128,18 @@ impl SimpleComponent for App {
);
let preferences = Preferences::builder().launch(root.clone().into()).detach();
+ let open_url_dialog = OpenDialog::builder().launch(root.clone().into()).forward(
+ sender.input_sender(),
+ |output| match output {
+ OpenDialogOutput::Play {
+ url,
+ whisper_stream_index,
+ } => AppMsg::Play {
+ url,
+ whisper_stream_index,
+ },
+ },
+ );
let subtitle_selection_dialog = SubtitleSelectionDialog::builder()
.launch(root.clone().into())
.forward(sender.input_sender(), |output| match output {
@@ -135,12 +152,13 @@ impl SimpleComponent for App {
});
let model = Self {
- url: url.clone(), // TODO remove clone
player,
transcript,
subtitle_view,
extractor,
+
preferences,
+ open_url_dialog,
subtitle_selection_dialog,
primary_stream_ix: None,
@@ -155,26 +173,14 @@ impl SimpleComponent for App {
let widgets = view_output!();
- model
- .player
- .sender()
- .send(PlayerMsg::SetUrl(url.clone()))
- .unwrap();
- model
- .extractor
- .sender()
- .send(SubtitleExtractorMsg::ExtractFromUrl(url))
- .unwrap();
-
ComponentParts { model, widgets }
}
- fn update(&mut self, msg: Self::Input, _sender: ComponentSender<Self>) {
+ fn update(&mut self, message: Self::Input, _sender: ComponentSender<Self>) {
self.primary_last_cue_ix.reset();
self.secondary_last_cue_ix.reset();
- match msg {
- AppMsg::NewOrUpdatedTrackMetadata(_stream_index) => {}
+ match message {
AppMsg::NewCue(stream_index, cue) => {
self.transcript
.sender()
@@ -257,6 +263,12 @@ impl SimpleComponent for App {
self.autopaused = false;
}
}
+ AppMsg::ShowUrlOpenDialog => {
+ self.open_url_dialog
+ .sender()
+ .send(OpenDialogMsg::Show)
+ .unwrap();
+ }
AppMsg::ShowPreferences => {
self.preferences
.sender()
@@ -269,6 +281,22 @@ impl SimpleComponent for App {
.send(SubtitleSelectionDialogMsg::Show)
.unwrap();
}
+ AppMsg::Play {
+ url,
+ whisper_stream_index,
+ } => {
+ self.player
+ .sender()
+ .send(PlayerMsg::SetUrl(url.clone()))
+ .unwrap();
+ self.extractor
+ .sender()
+ .send(SubtitleExtractorMsg::ExtractFromUrl {
+ url,
+ whisper_stream_index,
+ })
+ .unwrap();
+ }
}
}
}
@@ -279,7 +307,7 @@ impl App {
position: gst::ClockTime,
last_cue_ix: &mut OptionTracker<usize>,
) -> Option<String> {
- let lock = TRACKS.read();
+ let lock = SUBTITLE_TRACKS.read();
let track = lock.get(&stream_ix)?;
// try to find current cue quickly (should usually succeed during playback)
diff --git a/src/main.rs b/src/main.rs
index 0b7db50..01ca56d 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,15 +1,16 @@
mod app;
mod cue_view;
+mod open_dialog;
mod player;
mod preferences;
-mod subtitle_extractor;
+mod subtitle_extraction;
mod subtitle_selection_dialog;
mod subtitle_view;
+mod track_selector;
+mod tracks;
mod transcript;
mod util;
-use std::env;
-
use gtk::{CssProvider, STYLE_PROVIDER_PRIORITY_APPLICATION, gdk, glib};
use relm4::RelmApp;
@@ -18,12 +19,12 @@ use crate::app::App;
fn main() {
env_logger::init();
- let args: Vec<String> = env::args().collect();
- if args.len() != 2 {
- eprintln!("Usage: {} <video_url>", args[0]);
- std::process::exit(1);
- }
- let video_url = args[1].clone();
+ // let args: Vec<String> = env::args().collect();
+ // if args.len() != 2 {
+ // eprintln!("Usage: {} <video_url>", args[0]);
+ // std::process::exit(1);
+ // }
+ // let video_url = args[1].clone();
gtk::init().expect("Failed to initialize GTK");
gst::init().expect("Failed to initialize GStreamer");
@@ -43,5 +44,5 @@ fn main() {
relm4::RELM_THREADS.set(4).unwrap();
let relm = RelmApp::new("tc.mal.lleap").with_args(vec![]);
- relm.run::<App>(video_url);
+ relm.run::<App>(());
}
diff --git a/src/open_dialog.rs b/src/open_dialog.rs
new file mode 100644
index 0000000..2f17c59
--- /dev/null
+++ b/src/open_dialog.rs
@@ -0,0 +1,328 @@
+use std::collections::BTreeMap;
+
+use adw::prelude::*;
+use gtk::gio;
+use gtk::glib::clone;
+use relm4::prelude::*;
+
+use crate::track_selector::{
+ TrackInfo, TrackSelector, TrackSelectorInit, TrackSelectorMsg, TrackSelectorOutput,
+};
+use crate::tracks::{StreamIndex, TrackMetadata};
+use crate::util::Tracker;
+
+pub struct OpenDialog {
+ parent_window: adw::ApplicationWindow,
+ dialog: adw::PreferencesDialog,
+ toast_overlay: Option<adw::ToastOverlay>,
+ navigation_view: Option<adw::NavigationView>,
+ whisper_track_selector: Controller<TrackSelector>,
+
+ url: Tracker<String>,
+ do_whisper_extraction: bool,
+ whisper_stream_index: Option<StreamIndex>,
+
+ metadata_command_running: bool,
+}
+
+#[derive(Debug)]
+pub enum OpenDialogMsg {
+ Show,
+ Next,
+ Cancel,
+ SelectFile,
+ FileSelected(gio::File),
+ UrlChanged(String),
+ SetDoWhisperExtraction(bool),
+ WhisperTrackSelected(Option<StreamIndex>),
+ Play,
+}
+
+#[derive(Debug)]
+pub enum OpenDialogOutput {
+ Play {
+ url: String,
+ whisper_stream_index: Option<StreamIndex>,
+ },
+}
+
+#[relm4::component(pub)]
+impl Component for OpenDialog {
+ type Init = adw::ApplicationWindow;
+ type Input = OpenDialogMsg;
+ type Output = OpenDialogOutput;
+ type CommandOutput = Result<BTreeMap<StreamIndex, TrackMetadata>, ffmpeg::Error>;
+
+ view! {
+ #[root]
+ adw::PreferencesDialog {
+ set_title: "Open URL",
+
+ #[wrap(Some)]
+ #[name(toast_overlay)]
+ set_child = &adw::ToastOverlay {
+ #[wrap(Some)]
+ #[name(navigation_view)]
+ set_child = &adw::NavigationView {
+ add = &adw::NavigationPage {
+ set_title: "Open File or Stream",
+
+ #[wrap(Some)]
+ set_child = &adw::ToolbarView {
+ add_top_bar = &adw::HeaderBar {
+ set_show_end_title_buttons: false,
+
+ pack_start = &gtk::Button {
+ set_label: "Cancel",
+ connect_clicked => OpenDialogMsg::Cancel,
+ },
+
+ pack_end = &gtk::Button {
+ set_label: "Next",
+ #[watch]
+ set_sensitive: !(model.url.get().is_empty() || model.metadata_command_running),
+ connect_clicked => OpenDialogMsg::Next,
+ add_css_class: "suggested-action",
+ },
+
+ pack_end = &adw::Spinner {
+ #[watch]
+ set_visible: model.metadata_command_running,
+ },
+ },
+
+ #[wrap(Some)]
+ set_content = &adw::PreferencesPage {
+ adw::PreferencesGroup {
+ set_title: "Open a file from your computer",
+ adw::ButtonRow {
+ set_title: "Select File",
+ connect_activated => OpenDialogMsg::SelectFile,
+ }
+ },
+
+ adw::PreferencesGroup {
+ set_title: "Or, enter a stream URL",
+ set_description: Some("Currently, only file:// and http(s):// URLs are officially supported, although other protocols may work as well."),
+
+ adw::EntryRow {
+ set_title: "URL",
+ #[track(model.url.is_dirty())]
+ set_text: model.url.get(),
+ connect_changed[sender] => move |entry| {
+ sender.input(OpenDialogMsg::UrlChanged(entry.text().to_string()));
+ },
+ }
+ }
+ }
+ }
+ },
+
+ add = &adw::NavigationPage {
+ set_tag = Some("playback_options"),
+ set_title: "Playback Options",
+
+ #[wrap(Some)]
+ set_child = &adw::ToolbarView {
+ add_top_bar = &adw::HeaderBar {
+ set_show_end_title_buttons: false,
+
+ pack_end = &gtk::Button {
+ connect_clicked => OpenDialogMsg::Play,
+ add_css_class: "suggested-action",
+
+ gtk::Label {
+ set_text: "Play",
+ }
+ },
+ },
+
+ #[wrap(Some)]
+ set_content = &adw::PreferencesPage {
+ adw::PreferencesGroup {
+ adw::ExpanderRow {
+ set_title: "Generate subtitles from audio",
+ set_subtitle: "See also \"Whisper settings\" in Preferences",
+ set_show_enable_switch: true,
+ #[watch]
+ set_enable_expansion: model.do_whisper_extraction,
+ connect_enable_expansion_notify[sender] => move |expander_row| {
+ sender.input(OpenDialogMsg::SetDoWhisperExtraction(expander_row.enables_expansion()))
+ },
+
+ add_row: model.whisper_track_selector.widget(),
+ },
+ },
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ fn init(
+ parent_window: Self::Init,
+ root: Self::Root,
+ sender: ComponentSender<Self>,
+ ) -> ComponentParts<Self> {
+ let whisper_track_selector = TrackSelector::builder()
+ .launch(TrackSelectorInit {
+ title: "Audio track",
+ subtitle: None,
+ })
+ .forward(sender.input_sender(), |output| match output {
+ TrackSelectorOutput::Changed(ix) => OpenDialogMsg::WhisperTrackSelected(ix),
+ });
+ let mut model = Self {
+ parent_window,
+ dialog: root.clone(),
+ toast_overlay: None,
+ navigation_view: None,
+ whisper_track_selector,
+
+ url: Tracker::new(String::new()),
+ do_whisper_extraction: false,
+ whisper_stream_index: None,
+
+ metadata_command_running: false,
+ };
+
+ let widgets = view_output!();
+
+ model.toast_overlay = Some(widgets.toast_overlay.clone());
+ model.navigation_view = Some(widgets.navigation_view.clone());
+
+ ComponentParts { model, widgets }
+ }
+
+ fn update(&mut self, message: Self::Input, sender: ComponentSender<Self>, _root: &Self::Root) {
+ match message {
+ OpenDialogMsg::Show => {
+ self.reset();
+ self.dialog.present(Some(&self.parent_window));
+ }
+ OpenDialogMsg::UrlChanged(url) => self.url.set_clean(url),
+ OpenDialogMsg::Next => self.fetch_metadata(sender),
+ OpenDialogMsg::Cancel => {
+ self.dialog.close();
+ }
+ OpenDialogMsg::SelectFile => {
+ let dialog = gtk::FileDialog::new();
+ dialog.open(
+ Some(&self.parent_window),
+ None as Option<&gio::Cancellable>,
+ clone!(
+ #[strong]
+ sender,
+ move |res| {
+ if let Ok(file) = res {
+ sender.input(OpenDialogMsg::FileSelected(file));
+ }
+ }
+ ),
+ );
+ }
+ OpenDialogMsg::FileSelected(file) => {
+ self.url.set(file.uri().into());
+ }
+ OpenDialogMsg::Play => {
+ sender
+ .output(OpenDialogOutput::Play {
+ url: self.url.get().clone(),
+ whisper_stream_index: if self.do_whisper_extraction {
+ self.whisper_stream_index
+ } else {
+ None
+ },
+ })
+ .unwrap();
+ self.dialog.close();
+ }
+ OpenDialogMsg::SetDoWhisperExtraction(val) => {
+ self.do_whisper_extraction = val;
+ }
+ OpenDialogMsg::WhisperTrackSelected(track_index) => {
+ self.whisper_stream_index = track_index;
+ }
+ }
+ }
+
+ // once we get all the audio track metadata, we update the whisper track
+ // dropdown
+ fn update_cmd(
+ &mut self,
+ message: Self::CommandOutput,
+ _sender: ComponentSender<Self>,
+ _root: &Self::Root,
+ ) {
+ self.metadata_command_running = false;
+
+ match message {
+ Ok(audio_tracks) => {
+ let list_model = gio::ListStore::new::<TrackInfo>();
+
+ for (&stream_index, track) in audio_tracks.iter() {
+ let track_info = TrackInfo::new(
+ stream_index,
+ track.language.map(|lang| lang.to_name()),
+ track.title.clone(),
+ );
+ list_model.append(&track_info);
+ }
+
+ self.whisper_track_selector
+ .sender()
+ .send(TrackSelectorMsg::SetListModel(list_model))
+ .unwrap();
+
+ self.next();
+ }
+ Err(e) => {
+ let toast = adw::Toast::builder()
+ .title(&format!("Error fetching stream metadata: {}", e))
+ .build();
+
+ self.toast_overlay.as_ref().unwrap().add_toast(toast);
+ }
+ }
+ }
+}
+
+impl OpenDialog {
+ fn reset(&mut self) {
+ self.url.get_mut().clear();
+ self.do_whisper_extraction = false;
+ self.whisper_stream_index = None;
+ }
+
+ fn fetch_metadata(&mut self, sender: ComponentSender<Self>) {
+ let url = self.url.get().clone();
+
+ sender.spawn_oneshot_command(move || {
+ let input = ffmpeg::format::input(&url)?;
+
+ let audio_tracks = input
+ .streams()
+ .filter_map(|stream| {
+ if stream.parameters().medium() == ffmpeg::media::Type::Audio {
+ Some((stream.index(), TrackMetadata::from_ffmpeg_stream(&stream)))
+ } else {
+ None
+ }
+ })
+ .collect::<BTreeMap<_, _>>();
+
+ Ok(audio_tracks)
+ });
+
+ self.metadata_command_running = true;
+ }
+
+ fn next(&self) {
+ self.navigation_view
+ .as_ref()
+ .unwrap()
+ .push_by_tag("playback_options");
+ }
+}
diff --git a/src/subtitle_extraction/embedded.rs b/src/subtitle_extraction/embedded.rs
new file mode 100644
index 0000000..5cdf813
--- /dev/null
+++ b/src/subtitle_extraction/embedded.rs
@@ -0,0 +1,118 @@
+use std::sync::mpsc;
+
+use anyhow::Context;
+
+use crate::subtitle_extraction::*;
+
+pub fn extract_embedded_subtitles(
+ // stream index to use when storing extracted subtitles, this index already
+ // has to be in TRACKS when this function is called!
+ stream_ix: StreamIndex,
+ context: ffmpeg::codec::Context,
+ time_base: ffmpeg::Rational,
+ packet_rx: mpsc::Receiver<ffmpeg::Packet>,
+ sender: ComponentSender<SubtitleExtractor>,
+) -> anyhow::Result<()> {
+ let mut decoder = context
+ .decoder()
+ .subtitle()
+ .with_context(|| format!("error creating subtitle decoder for stream {}", stream_ix))?;
+
+ while let Ok(packet) = packet_rx.recv() {
+ let mut subtitle = ffmpeg::Subtitle::new();
+ match decoder.decode(&packet, &mut subtitle) {
+ Ok(true) => {
+ if let Some(cue) = parse_subtitle(&subtitle, &packet, time_base) {
+ SUBTITLE_TRACKS
+ .write()
+ .get_mut(&stream_ix)
+ .unwrap()
+ .cues
+ .push(cue.clone());
+ sender
+ .output(SubtitleExtractorOutput::NewCue(stream_ix, cue))
+ .unwrap();
+ } else {
+ log::error!("error parsing subtitle at pts {:?}", packet.pts())
+ }
+ }
+ Ok(false) => {
+ log::debug!("got empty (?) subtitle, not sure if this should ever happen");
+ }
+ Err(e) => {
+ log::error!("error decoding subtitle: {:?}", e)
+ }
+ }
+ }
+
+ Ok(())
+}
+
+fn parse_subtitle(
+ subtitle: &ffmpeg::Subtitle,
+ packet: &ffmpeg::Packet,
+ time_base: Rational,
+) -> Option<SubtitleCue> {
+ let time_to_clock_time = |time: i64| {
+ let nseconds: i64 =
+ (time * time_base.numerator() as i64 * 1_000_000_000) / time_base.denominator() as i64;
+ gst::ClockTime::from_nseconds(nseconds as u64)
+ };
+
+ let text = subtitle
+ .rects()
+ .into_iter()
+ .map(|rect| match rect {
+ ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(),
+ ffmpeg::subtitle::Rect::Ass(ass) => {
+ extract_dialogue_text(ass.get()).unwrap_or(String::new())
+ }
+ _ => String::new(),
+ })
+ .collect::<Vec<String>>()
+ .join("\n— ");
+
+ let start = time_to_clock_time(packet.pts()?);
+ let end = time_to_clock_time(packet.pts()? + packet.duration());
+
+ Some(SubtitleCue { start, end, text })
+}
+
+fn extract_dialogue_text(dialogue_line: &str) -> Option<String> {
+ // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text
+ // we need the 9th field (Text), so split on comma but only take first 9 splits
+ // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433
+ let text = dialogue_line.splitn(9, ',').last()?;
+
+ // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc.
+ let mut result = String::new();
+ let mut in_tag = false;
+ let mut char_iter = text.chars().peekable();
+
+ while let Some(c) = char_iter.next() {
+ if c == '{' && char_iter.peek() == Some(&'\\') {
+ in_tag = true;
+ } else if c == '}' {
+ in_tag = false;
+ } else if !in_tag {
+ // process line breaks and hard spaces
+ if c == '\\' {
+ match char_iter.peek() {
+ Some(&'N') => {
+ char_iter.next();
+ result.push('\n');
+ }
+ Some(&'n') | Some(&'h') => {
+ char_iter.next();
+ result.push(' ');
+ }
+ _ => result.push(c),
+ }
+ } else {
+ result.push(c);
+ }
+ }
+ }
+
+ Some(result)
+}
diff --git a/src/subtitle_extraction/mod.rs b/src/subtitle_extraction/mod.rs
new file mode 100644
index 0000000..9e7fff4
--- /dev/null
+++ b/src/subtitle_extraction/mod.rs
@@ -0,0 +1,159 @@
+/// Extraction of embedded subtitles
+mod embedded;
+/// Synthesis of subtitles from audio using whisper.cpp
+mod whisper;
+
+use std::{collections::BTreeMap, sync::mpsc, thread};
+
+use ffmpeg::Rational;
+use relm4::{ComponentSender, Worker};
+
+use crate::tracks::{SUBTITLE_TRACKS, StreamIndex, SubtitleCue, SubtitleTrack, TrackMetadata};
+
+pub struct SubtitleExtractor {}
+
+#[derive(Debug)]
+pub enum SubtitleExtractorMsg {
+ ExtractFromUrl {
+ url: String,
+ // the index of the audio stream on which to run a whisper transcription
+ whisper_stream_index: Option<usize>,
+ },
+}
+
+#[derive(Debug)]
+pub enum SubtitleExtractorOutput {
+ NewCue(StreamIndex, SubtitleCue),
+ ExtractionComplete,
+}
+
+impl Worker for SubtitleExtractor {
+ type Init = ();
+ type Input = SubtitleExtractorMsg;
+ type Output = SubtitleExtractorOutput;
+
+ fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self {
+ Self {}
+ }
+
+ fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) {
+ match msg {
+ SubtitleExtractorMsg::ExtractFromUrl {
+ url,
+ whisper_stream_index: whisper_audio_stream_ix,
+ } => {
+ self.handle_extract_from_url(url, whisper_audio_stream_ix, sender);
+ }
+ }
+ }
+}
+
+impl SubtitleExtractor {
+ fn handle_extract_from_url(
+ &mut self,
+ url: String,
+ whisper_audio_stream_ix: Option<usize>,
+ sender: ComponentSender<Self>,
+ ) {
+ // Clear existing tracks
+ SUBTITLE_TRACKS.write().clear();
+
+ match self.extract_subtitles(&url, whisper_audio_stream_ix, sender.clone()) {
+ Ok(_) => {
+ log::info!("Subtitle extraction completed successfully");
+ sender
+ .output(SubtitleExtractorOutput::ExtractionComplete)
+ .unwrap();
+ }
+ Err(e) => {
+ log::error!("Subtitle extraction failed: {}", e);
+ }
+ }
+ }
+
+ fn extract_subtitles(
+ &self,
+ url: &str,
+ whisper_audio_stream_ix: Option<usize>,
+ sender: ComponentSender<Self>,
+ ) -> anyhow::Result<()> {
+ let mut input = ffmpeg::format::input(&url)?;
+
+ let mut subtitle_extractors = BTreeMap::new();
+
+ // create extractor for each subtitle stream
+ for stream in input.streams() {
+ let stream_ix = stream.index();
+
+ if stream.parameters().medium() == ffmpeg::media::Type::Subtitle {
+ let metadata = TrackMetadata::from_ffmpeg_stream(&stream);
+ let track = SubtitleTrack {
+ metadata,
+ cues: Vec::new(),
+ };
+
+ SUBTITLE_TRACKS.write().insert(stream_ix, track);
+
+ let context = ffmpeg::codec::Context::from_parameters(stream.parameters())?;
+ let (packet_tx, packet_rx) = mpsc::channel();
+ let time_base = stream.time_base();
+ let sender = sender.clone();
+ let join_handle = thread::spawn(move || {
+ embedded::extract_embedded_subtitles(
+ stream_ix, context, time_base, packet_rx, sender,
+ )
+ });
+
+ subtitle_extractors.insert(stream_ix, (packet_tx, join_handle));
+ }
+ }
+
+ if let Some(stream_ix) = whisper_audio_stream_ix {
+ let stream = input.stream(stream_ix).unwrap();
+
+ let mut metadata = TrackMetadata::from_ffmpeg_stream(&stream);
+ metadata.title = Some(match metadata.title {
+ Some(title) => format!("Auto-generated from audio (Whisper): {}", title),
+ None => "Auto-generated from audio (Whisper)".to_string(),
+ });
+
+ let track = SubtitleTrack {
+ metadata,
+ cues: Vec::new(),
+ };
+
+ SUBTITLE_TRACKS.write().insert(stream_ix, track);
+
+ let context = ffmpeg::codec::Context::from_parameters(stream.parameters())?;
+ let (packet_tx, packet_rx) = mpsc::channel();
+ let time_base = stream.time_base();
+ let sender = sender.clone();
+ let join_handle = thread::spawn(move || {
+ whisper::generate_whisper_subtitles(
+ stream_ix, context, time_base, packet_rx, sender,
+ )
+ });
+
+ subtitle_extractors.insert(stream_ix, (packet_tx, join_handle));
+ }
+
+ // process packets
+ for (stream, packet) in input.packets() {
+ let stream_index = stream.index();
+
+ if let Some((packet_tx, _)) = subtitle_extractors.get_mut(&stream_index) {
+ packet_tx.send(packet).unwrap();
+ }
+ }
+
+ // wait for extraction to complete
+ for (_, (_, join_handle)) in subtitle_extractors {
+ join_handle
+ .join()
+ .unwrap()
+ .unwrap_or_else(|e| log::error!("error running subtitle extraction: {}", e));
+ }
+
+ Ok(())
+ }
+}
diff --git a/src/subtitle_extraction/whisper.rs b/src/subtitle_extraction/whisper.rs
new file mode 100644
index 0000000..5622d6f
--- /dev/null
+++ b/src/subtitle_extraction/whisper.rs
@@ -0,0 +1,75 @@
+use std::sync::mpsc;
+
+use anyhow::Context;
+use ffmpeg::filter;
+
+use crate::{subtitle_extraction::*, tracks::StreamIndex};
+
+pub fn generate_whisper_subtitles(
+ // stream index to use when storing generated subtitles, this index
+ // already has to be in TRACKS when this function is called!
+ stream_ix: StreamIndex,
+ context: ffmpeg::codec::Context,
+ time_base: ffmpeg::Rational,
+ packet_rx: mpsc::Receiver<ffmpeg::Packet>,
+ sender: ComponentSender<SubtitleExtractor>,
+) -> anyhow::Result<()> {
+ let mut decoder = context
+ .decoder()
+ .audio()
+ .with_context(|| format!("error creating subtitle decoder for stream {}", stream_ix))?;
+
+ let mut filter = filter::Graph::new();
+
+ let abuffer_args = format!(
+ "time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}",
+ decoder.time_base(),
+ decoder.rate(),
+ decoder.format().name(),
+ decoder.channel_layout().bits()
+ );
+ let whisper_args = format!(
+ "model={}:queue={}:format=json",
+ "/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin", 30
+ );
+ let filter_spec = format!("[src] whisper={} [sink]", whisper_args);
+
+ filter.add(&filter::find("abuffer").unwrap(), "src", &abuffer_args)?;
+ filter.add(&filter::find("abuffersink").unwrap(), "sink", "")?;
+ filter
+ .output("src", 0)?
+ .input("sink", 0)?
+ .parse(&filter_spec)?;
+ filter.validate()?;
+
+ let mut source_ctx = filter.get("src").unwrap();
+ let mut sink_ctx = filter.get("sink").unwrap();
+
+ while let Ok(packet) = packet_rx.recv() {
+ handle_packet(&mut decoder, source_ctx.source(), sink_ctx.sink(), packet)
+ .unwrap_or_else(|e| log::error!("error handling audio packet: {}", e))
+ }
+
+ Ok(())
+}
+
+fn handle_packet(
+ decoder: &mut ffmpeg::decoder::Audio,
+ mut source: filter::Source,
+ mut sink: filter::Sink,
+ packet: ffmpeg::Packet,
+) -> anyhow::Result<()> {
+ let mut in_frame = unsafe { ffmpeg::Frame::empty() };
+ decoder.send_packet(&packet)?;
+ decoder.receive_frame(&mut in_frame)?;
+ source.add(&in_frame)?;
+
+ let mut out_frame = unsafe { ffmpeg::Frame::empty() };
+ sink.frame(&mut out_frame)?;
+
+ if let Some(text) = out_frame.metadata().get("lavfi.whisper.text") {
+ println!("{}", text);
+ }
+
+ Ok(())
+}
diff --git a/src/subtitle_extractor.rs b/src/subtitle_extractor.rs
deleted file mode 100644
index b628d73..0000000
--- a/src/subtitle_extractor.rs
+++ /dev/null
@@ -1,209 +0,0 @@
-use std::collections::BTreeMap;
-
-use anyhow::Result;
-
-use ffmpeg::Rational;
-use log::{debug, error, info};
-use relm4::{ComponentSender, SharedState, Worker};
-
-pub type StreamIndex = usize;
-
-#[derive(Debug, Clone)]
-pub struct SubtitleCue {
- pub start: gst::ClockTime,
- pub end: gst::ClockTime,
- pub text: String,
-}
-
-#[derive(Debug, Clone)]
-pub struct SubtitleTrack {
- pub language: Option<isolang::Language>,
- pub title: Option<String>,
- pub cues: Vec<SubtitleCue>,
-}
-
-pub static TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new();
-
-pub struct SubtitleExtractor {}
-
-#[derive(Debug)]
-pub enum SubtitleExtractorMsg {
- ExtractFromUrl(String),
-}
-
-#[derive(Debug)]
-pub enum SubtitleExtractorOutput {
- NewOrUpdatedTrackMetadata(StreamIndex),
- NewCue(StreamIndex, SubtitleCue),
- ExtractionComplete,
-}
-
-impl Worker for SubtitleExtractor {
- type Init = ();
- type Input = SubtitleExtractorMsg;
- type Output = SubtitleExtractorOutput;
-
- fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self {
- Self {}
- }
-
- fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) {
- match msg {
- SubtitleExtractorMsg::ExtractFromUrl(url) => {
- self.handle_extract_from_url(url, sender);
- }
- }
- }
-}
-
-impl SubtitleExtractor {
- fn handle_extract_from_url(&mut self, url: String, sender: ComponentSender<Self>) {
- // Clear existing tracks
- TRACKS.write().clear();
-
- // Try to extract subtitles using ffmpeg
- match self.extract_subtitles_ffmpeg(&url, &sender) {
- Ok(_) => {
- info!("Subtitle extraction completed successfully");
- sender
- .output(SubtitleExtractorOutput::ExtractionComplete)
- .unwrap();
- }
- Err(e) => {
- error!("FFmpeg extraction failed: {}", e);
- }
- }
- }
-
- fn extract_subtitles_ffmpeg(&self, url: &str, sender: &ComponentSender<Self>) -> Result<()> {
- let mut input = ffmpeg::format::input(&url)?;
-
- let mut subtitle_decoders = BTreeMap::new();
-
- // create decoder for each subtitle stream
- for (stream_index, stream) in input.streams().enumerate() {
- if stream.parameters().medium() == ffmpeg::media::Type::Subtitle {
- let language_code = stream.metadata().get("language").map(|s| s.to_string());
- let title = stream.metadata().get("title").map(|s| s.to_string());
-
- let track = SubtitleTrack {
- language: language_code.and_then(|code| isolang::Language::from_639_2b(&code)),
- title,
- cues: Vec::new(),
- };
-
- TRACKS.write().insert(stream_index, track);
-
- sender
- .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(
- stream_index,
- ))
- .unwrap();
-
- let context =
- ffmpeg::codec::context::Context::from_parameters(stream.parameters())?;
- if let Ok(decoder) = context.decoder().subtitle() {
- subtitle_decoders.insert(stream_index, decoder);
- debug!("Created decoder for subtitle stream {}", stream_index);
- } else {
- error!(
- "Failed to create decoder for subtitle stream {}",
- stream_index
- );
- }
- }
- }
-
- // process packets
- for (stream, packet) in input.packets() {
- let stream_index = stream.index();
-
- if let Some(decoder) = subtitle_decoders.get_mut(&stream_index) {
- let mut subtitle = ffmpeg::Subtitle::new();
- if decoder.decode(&packet, &mut subtitle).is_ok() {
- if let Some(cue) = Self::subtitle_to_cue(&subtitle, &packet, stream.time_base())
- {
- if let Some(track) = TRACKS.write().get_mut(&stream_index) {
- track.cues.push(cue.clone());
- }
-
- sender
- .output(SubtitleExtractorOutput::NewCue(stream_index, cue))
- .unwrap();
- }
- }
- }
- }
-
- Ok(())
- }
-
- fn subtitle_to_cue(
- subtitle: &ffmpeg::Subtitle,
- packet: &ffmpeg::Packet,
- time_base: Rational,
- ) -> Option<SubtitleCue> {
- let time_to_clock_time = |time: i64| {
- let nseconds: i64 = (time * time_base.numerator() as i64 * 1_000_000_000)
- / time_base.denominator() as i64;
- gst::ClockTime::from_nseconds(nseconds as u64)
- };
-
- let text = subtitle
- .rects()
- .into_iter()
- .map(|rect| match rect {
- ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(),
- ffmpeg::subtitle::Rect::Ass(ass) => {
- Self::extract_dialogue_text(ass.get()).unwrap_or(String::new())
- }
- _ => String::new(),
- })
- .collect::<Vec<String>>()
- .join("\n— ");
-
- let start = time_to_clock_time(packet.pts()?);
- let end = time_to_clock_time(packet.pts()? + packet.duration());
-
- Some(SubtitleCue { start, end, text })
- }
-
- fn extract_dialogue_text(dialogue_line: &str) -> Option<String> {
- // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text
- // we need the 9th field (Text), so split on comma but only take first 9 splits
- // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433
- let text = dialogue_line.splitn(9, ',').last()?;
-
- // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc.
- let mut result = String::new();
- let mut in_tag = false;
- let mut char_iter = text.chars().peekable();
-
- while let Some(c) = char_iter.next() {
- if c == '{' && char_iter.peek() == Some(&'\\') {
- in_tag = true;
- } else if c == '}' {
- in_tag = false;
- } else if !in_tag {
- // process line breaks and hard spaces
- if c == '\\' {
- match char_iter.peek() {
- Some(&'N') => {
- char_iter.next();
- result.push('\n');
- }
- Some(&'n') | Some(&'h') => {
- char_iter.next();
- result.push(' ');
- }
- _ => result.push(c),
- }
- } else {
- result.push(c);
- }
- }
- }
-
- Some(result)
- }
-}
diff --git a/src/subtitle_extractor_aishit.rs b/src/subtitle_extractor_aishit.rs
new file mode 100644
index 0000000..c615f6c
--- /dev/null
+++ b/src/subtitle_extractor_aishit.rs
@@ -0,0 +1,732 @@
+use std::collections::BTreeMap;
+
+use anyhow::Result;
+
+use ffmpeg::Rational;
+use log::{debug, error, info, warn};
+use relm4::{ComponentSender, SharedState, Worker};
+
+pub type StreamIndex = usize;
+
+#[derive(Debug, Clone)]
+pub struct SubtitleCue {
+ pub start: gst::ClockTime,
+ pub end: gst::ClockTime,
+ pub text: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct SubtitleTrack {
+ pub language: Option<isolang::Language>,
+ pub title: Option<String>,
+ pub cues: Vec<SubtitleCue>,
+ pub is_generated: bool, // true if generated from audio
+}
+
+pub static TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new();
+
+pub struct SubtitleExtractor {}
+
+#[derive(Debug)]
+pub enum SubtitleExtractorMsg {
+ ExtractFromUrl(String),
+}
+
+#[derive(Debug)]
+pub enum SubtitleExtractorOutput {
+ NewOrUpdatedTrackMetadata(StreamIndex),
+ NewCue(StreamIndex, SubtitleCue),
+ ExtractionComplete,
+}
+
+impl Worker for SubtitleExtractor {
+ type Init = ();
+ type Input = SubtitleExtractorMsg;
+ type Output = SubtitleExtractorOutput;
+
+ fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self {
+ Self {}
+ }
+
+ fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) {
+ match msg {
+ SubtitleExtractorMsg::ExtractFromUrl(url) => {
+ self.handle_extract_from_url(url, sender);
+ }
+ }
+ }
+}
+
+impl SubtitleExtractor {
+ fn handle_extract_from_url(&mut self, url: String, sender: ComponentSender<Self>) {
+ // Clear existing tracks
+ TRACKS.write().clear();
+
+ // Try to extract subtitles using ffmpeg
+ match self.extract_subtitles_ffmpeg(&url, &sender) {
+ Ok(_) => {
+ info!("Subtitle extraction completed successfully");
+ sender
+ .output(SubtitleExtractorOutput::ExtractionComplete)
+ .unwrap();
+ }
+ Err(e) => {
+ error!("FFmpeg extraction failed: {}", e);
+ }
+ }
+ }
+
+ fn extract_subtitles_ffmpeg(&self, url: &str, sender: &ComponentSender<Self>) -> Result<()> {
+ info!("Starting subtitle extraction from: {}", url);
+ let mut input = ffmpeg::format::input(&url)?;
+
+ // Log input format info
+ info!(
+ "Input format: {} ({} streams)",
+ input.format().name(),
+ input.streams().count()
+ );
+
+ // Check if whisper filter is available
+ if let Some(whisper_filter) = ffmpeg::filter::find("whisper") {
+ info!("Whisper filter found: {}", whisper_filter.name());
+ } else {
+ warn!("Whisper filter not found - audio transcription will be skipped");
+ }
+
+ let mut subtitle_decoders = BTreeMap::new();
+ let mut audio_decoder: Option<ffmpeg::decoder::Audio> = None;
+ let mut _whisper_filter_graph: Option<ffmpeg::filter::Graph> = None;
+ let mut whisper_source: Option<ffmpeg::filter::Context> = None;
+ let mut whisper_sink: Option<ffmpeg::filter::Context> = None;
+ let mut best_audio_stream_index: Option<usize> = None;
+
+ // Find best audio stream for whisper processing
+ if let Some(audio_stream) = input.streams().best(ffmpeg::media::Type::Audio) {
+ best_audio_stream_index = Some(audio_stream.index());
+
+ // Get audio parameters safely
+ let codec_id = audio_stream.parameters().id();
+ let channels = if let Ok(context) =
+ ffmpeg::codec::context::Context::from_parameters(audio_stream.parameters())
+ {
+ if let Ok(audio) = context.decoder().audio() {
+ audio.channels()
+ } else {
+ 0
+ }
+ } else {
+ 0
+ };
+ let sample_rate = if let Ok(context) =
+ ffmpeg::codec::context::Context::from_parameters(audio_stream.parameters())
+ {
+ if let Ok(audio) = context.decoder().audio() {
+ audio.rate()
+ } else {
+ 0
+ }
+ } else {
+ 0
+ };
+
+ info!(
+ "Found best audio stream: index {} (codec: {:?}, channels: {}, sample_rate: {})",
+ audio_stream.index(),
+ codec_id,
+ channels,
+ sample_rate
+ );
+ } else {
+ info!("No audio stream found for whisper processing");
+ }
+
+ // Set up whisper filter graph if we found an audio stream
+ if let Some(audio_index) = best_audio_stream_index {
+ info!("Setting up whisper filter for audio stream {}", audio_index);
+
+ let audio_stream = input.stream(audio_index).unwrap();
+ if let Ok(context) =
+ ffmpeg::codec::context::Context::from_parameters(audio_stream.parameters())
+ {
+ if let Ok(decoder) = context.decoder().audio() {
+ // Get decoder properties before moving it
+ let decoder_rate = decoder.rate();
+ let decoder_format = decoder.format();
+ let decoder_channel_layout = decoder.channel_layout().bits();
+
+ audio_decoder = Some(decoder);
+
+ // Set up whisper filter graph
+ debug!("Creating whisper filter graph...");
+ debug!(
+ "Audio stream time_base: {}, decoder rate: {}, format: {:?}, channel_layout: 0x{:x}",
+ audio_stream.time_base(),
+ decoder_rate,
+ decoder_format,
+ decoder_channel_layout
+ );
+ match self.setup_whisper_filter(&audio_stream) {
+ Ok((graph, source, sink)) => {
+ info!("Whisper filter graph created successfully");
+ _whisper_filter_graph = Some(graph);
+ whisper_source = Some(source);
+ whisper_sink = Some(sink);
+ debug!("Whisper source and sink contexts stored");
+
+ // Create a generated subtitle track
+ let track = SubtitleTrack {
+ language: Some(isolang::Language::from_639_1("en").unwrap_or_else(
+ || isolang::Language::from_639_3("eng").unwrap(),
+ )),
+ title: Some("Generated from Audio (Whisper)".to_string()),
+ cues: Vec::new(),
+ is_generated: true,
+ };
+
+ let whisper_stream_index = 1000; // Use high index for generated tracks
+ TRACKS.write().insert(whisper_stream_index, track);
+
+ sender
+ .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(
+ whisper_stream_index,
+ ))
+ .unwrap();
+ }
+ Err(e) => {
+ error!("Failed to setup whisper filter: {}", e);
+ debug!("Whisper filter error details: {:?}", e);
+ warn!(
+ "Audio transcription will be skipped due to filter setup failure"
+ );
+ }
+ }
+ }
+ }
+ }
+
+ // Create decoder for each subtitle stream
+ for (stream_index, stream) in input.streams().enumerate() {
+ if stream.parameters().medium() == ffmpeg::media::Type::Subtitle {
+ let language_code = stream.metadata().get("language").map(|s| s.to_string());
+ let title = stream.metadata().get("title").map(|s| s.to_string());
+
+ let track = SubtitleTrack {
+ language: language_code.and_then(|code| isolang::Language::from_639_2b(&code)),
+ title,
+ cues: Vec::new(),
+ is_generated: false,
+ };
+
+ TRACKS.write().insert(stream_index, track);
+
+ sender
+ .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(
+ stream_index,
+ ))
+ .unwrap();
+
+ let context =
+ ffmpeg::codec::context::Context::from_parameters(stream.parameters())?;
+ if let Ok(decoder) = context.decoder().subtitle() {
+ subtitle_decoders.insert(stream_index, decoder);
+ debug!("Created decoder for subtitle stream {}", stream_index);
+ } else {
+ error!(
+ "Failed to create decoder for subtitle stream {}",
+ stream_index
+ );
+ }
+ } else {
+ debug!(
+ "Failed to create context for subtitle stream {}",
+ stream_index
+ );
+ }
+ }
+
+ // Process packets
+ for (stream, packet) in input.packets() {
+ let stream_index = stream.index();
+
+ // Process subtitle packets
+ if let Some(decoder) = subtitle_decoders.get_mut(&stream_index) {
+ let mut subtitle = ffmpeg::Subtitle::new();
+ if decoder.decode(&packet, &mut subtitle).is_ok() {
+ if let Some(cue) = Self::subtitle_to_cue(&subtitle, &packet, stream.time_base())
+ {
+ if let Some(track) = TRACKS.write().get_mut(&stream_index) {
+ track.cues.push(cue.clone());
+ }
+
+ sender
+ .output(SubtitleExtractorOutput::NewCue(stream_index, cue))
+ .unwrap();
+ }
+ }
+ }
+
+ // Process audio packets for whisper
+ if Some(stream_index) == best_audio_stream_index {
+ debug!(
+ "Processing audio packet for whisper (stream: {}, pts: {:?}, duration: {:?})",
+ stream_index,
+ packet.pts(),
+ packet.duration()
+ );
+ debug!(
+ "Audio decoder available: {}, Whisper source available: {}",
+ audio_decoder.is_some(),
+ whisper_source.is_some()
+ );
+ if let (Some(decoder), Some(source)) = (&mut audio_decoder, &mut whisper_source) {
+ debug!("Both audio decoder and whisper source are available, processing...");
+ // Send packet to audio decoder
+ if let Err(e) = decoder.send_packet(&packet) {
+ debug!("Failed to send packet to audio decoder: {}", e);
+ }
+
+ // Get decoded frames and send to whisper filter
+ let mut frame = unsafe { ffmpeg::Frame::empty() };
+ let mut frame_count = 0;
+ while decoder.receive_frame(&mut frame).is_ok() {
+ frame_count += 1;
+ debug!(
+ "Decoded audio frame {} (pts: {:?})",
+ frame_count,
+ frame.pts()
+ );
+
+ // Add frame to whisper filter
+ if let Err(e) = source.source().add(&frame) {
+ error!("Failed to add frame to whisper filter: {}", e);
+ } else {
+ debug!("Successfully added frame to whisper filter");
+ }
+
+ // Check for whisper output after adding each frame
+ if let Some(sink) = &mut whisper_sink {
+ self.check_whisper_output(sink, sender)?;
+ }
+ }
+ if frame_count > 0 {
+ debug!("Processed {} audio frames for whisper", frame_count);
+ }
+ } else {
+ debug!("Skipping audio packet - decoder or whisper source not available");
+ }
+ }
+ }
+
+ // Flush audio decoder and whisper filter
+ if let (Some(decoder), Some(source), Some(sink)) =
+ (&mut audio_decoder, &mut whisper_source, &mut whisper_sink)
+ {
+ info!("Flushing audio decoder and whisper filter...");
+ // Flush decoder
+ if let Err(e) = decoder.send_eof() {
+ debug!("Failed to send EOF to decoder: {}", e);
+ }
+ let mut frame = unsafe { ffmpeg::Frame::empty() };
+ let mut final_frame_count = 0;
+ while decoder.receive_frame(&mut frame).is_ok() {
+ final_frame_count += 1;
+ source.source().add(&frame).ok();
+ }
+ debug!("Flushed {} final frames from decoder", final_frame_count);
+
+ // Flush filter and get results
+ debug!("Flushing whisper filter...");
+ if let Err(e) = source.source().flush() {
+ error!("Failed to flush whisper filter: {}", e);
+ }
+
+ info!("Processing final whisper filter output...");
+ self.check_whisper_output(sink, sender)?;
+ }
+
+ Ok(())
+ }
+
+ fn setup_whisper_filter(
+ &self,
+ audio_stream: &ffmpeg::Stream,
+ ) -> Result<(
+ ffmpeg::filter::Graph,
+ ffmpeg::filter::Context,
+ ffmpeg::filter::Context,
+ )> {
+ debug!("Setting up whisper filter graph...");
+ let mut filter_graph = ffmpeg::filter::Graph::new();
+ debug!("Filter graph created successfully");
+
+ // Get audio parameters
+ debug!("Getting audio parameters...");
+ let time_base = audio_stream.time_base();
+ let audio_params = audio_stream.parameters();
+ debug!("Creating context from parameters...");
+ let context = ffmpeg::codec::context::Context::from_parameters(audio_params)?;
+ debug!("Getting audio decoder from context...");
+ let audio_decoder = context.decoder().audio()?;
+ debug!("Audio decoder created successfully");
+
+ // Create buffer source
+ let buffer_args = format!(
+ "time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}",
+ time_base,
+ audio_decoder.rate(),
+ audio_decoder.format().name(),
+ audio_decoder.channel_layout().bits()
+ );
+ debug!("Buffer args: {}", buffer_args);
+
+ debug!("Looking for abuffer filter...");
+ let abuffer_filter = ffmpeg::filter::find("abuffer")
+ .ok_or_else(|| anyhow::anyhow!("abuffer filter not found"))?;
+ debug!("abuffer filter found: {}", abuffer_filter.name());
+
+ debug!("Adding abuffer filter...");
+ match filter_graph.add(&abuffer_filter, "src", &buffer_args) {
+ Ok(_) => debug!("abuffer filter added successfully"),
+ Err(e) => {
+ error!("Failed to add abuffer filter: {}", e);
+ return Err(anyhow::anyhow!("Failed to add abuffer filter: {}", e));
+ }
+ }
+
+ // Create whisper filter with parameters
+ // Try absolute path and different parameter formats
+ let model_path = std::path::Path::new("./whisper-models/ggml-large-v3.bin");
+ let absolute_path = if model_path.exists() {
+ model_path
+ .canonicalize()
+ .map(|p| p.to_string_lossy().to_string())
+ .unwrap_or_else(|_| "./whisper-models/ggml-large-v3.bin".to_string())
+ } else {
+ warn!("Whisper model file not found at: {:?}", model_path);
+ "./whisper-models/ggml-large-v3.bin".to_string()
+ };
+
+ debug!("Model path exists: {}", model_path.exists());
+ debug!("Using absolute path: {}", absolute_path);
+
+ debug!("Looking for whisper filter...");
+ let whisper_filter = ffmpeg::filter::find("whisper").ok_or_else(|| {
+ error!("Whisper filter not found! Make sure FFmpeg was compiled with whisper support");
+ anyhow::anyhow!("Whisper filter not available")
+ })?;
+
+ debug!("Whisper filter found: {}", whisper_filter.name());
+ // We'll create the whisper filter through the parse method instead of adding it manually
+
+ // Create audio buffer sink for whisper output (whisper outputs audio + metadata)
+ debug!("Looking for abuffersink filter for audio output...");
+ let abuffersink_filter = ffmpeg::filter::find("abuffersink")
+ .ok_or_else(|| anyhow::anyhow!("abuffersink filter not found"))?;
+ debug!("abuffersink filter found: {}", abuffersink_filter.name());
+
+ debug!("Adding abuffersink filter...");
+ match filter_graph.add(&abuffersink_filter, "sink", "") {
+ Ok(_) => debug!("abuffersink filter added successfully"),
+ Err(e) => {
+ error!("Failed to add abuffersink filter: {}", e);
+ return Err(anyhow::anyhow!("Failed to add abuffersink filter: {}", e));
+ }
+ }
+
+ // Connect filters using the complete filter chain description
+ debug!("Connecting filter graph with complete chain: src -> whisper -> sink");
+
+ let filter_chain = format!(
+ "[src]whisper=model={}:queue=30:format=json[sink]",
+ "/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin",
+ //"/Users/malte/repos/lleap/whisper-models/ggml-silero-v5.1.2.bin"
+ );
+ debug!("Using filter chain: {}", filter_chain);
+
+ if let Err(e) = filter_graph
+ .output("src", 0)
+ .and_then(|o| o.input("sink", 0))
+ .and_then(|i| i.parse(&filter_chain))
+ {
+ error!("Failed to connect filter graph: {}", e);
+ return Err(anyhow::anyhow!("Failed to connect filter graph: {}", e));
+ }
+ debug!("Filter graph connected successfully");
+
+ // Validate filter graph
+ debug!("Validating filter graph...");
+ match filter_graph.validate() {
+ Ok(_) => {
+ info!("Filter graph validated successfully");
+ debug!("Filter graph dump:\n{}", filter_graph.dump());
+ }
+ Err(e) => {
+ error!("Filter graph validation failed: {}", e);
+ debug!(
+ "Filter graph dump before validation failure:\n{}",
+ filter_graph.dump()
+ );
+ return Err(anyhow::anyhow!("Filter graph validation failed: {}", e));
+ }
+ }
+
+ debug!("Getting final source and sink contexts...");
+ let source_ctx = filter_graph
+ .get("src")
+ .ok_or_else(|| anyhow::anyhow!("Source context not found"))?;
+ let sink_ctx = filter_graph
+ .get("sink")
+ .ok_or_else(|| anyhow::anyhow!("Sink context not found"))?;
+ debug!("Final contexts retrieved successfully");
+
+ Ok((filter_graph, source_ctx, sink_ctx))
+ }
+
+ fn check_whisper_output(
+ &self,
+ sink: &mut ffmpeg::filter::Context,
+ sender: &ComponentSender<Self>,
+ ) -> Result<()> {
+ debug!("Attempting to read audio frames from whisper filter output...");
+
+ // The whisper filter outputs audio frames with subtitle data in "lavfi.whisper.text" metadata
+ let mut frame = unsafe { ffmpeg::Frame::empty() };
+ let mut output_count = 0;
+
+ while sink.sink().frame(&mut frame).is_ok() {
+ output_count += 1;
+ debug!(
+ "Received audio frame {} from whisper filter (pts: {:?})",
+ output_count,
+ frame.pts()
+ );
+
+ // Look specifically for lavfi.whisper.text metadata
+ if let Some(whisper_text) = frame.metadata().get("lavfi.whisper.text") {
+ info!("Found whisper transcription: {}", whisper_text);
+
+ let start_time = if let Some(pts) = frame.pts() {
+ // Convert PTS to nanoseconds based on whisper filter's time base (16kHz)
+ gst::ClockTime::from_nseconds((pts as u64 * 1_000_000_000) / 16000)
+ } else {
+ gst::ClockTime::ZERO
+ };
+
+ // Log all available metadata keys to help debug
+ let metadata_entries: Vec<(String, String)> = frame
+ .metadata()
+ .iter()
+ .map(|(k, v)| (k.to_string(), v.to_string()))
+ .collect();
+ if !metadata_entries.is_empty() {
+ let metadata_keys: Vec<String> =
+ metadata_entries.iter().map(|(k, _)| k.clone()).collect();
+ debug!("Frame metadata keys: {:?}", metadata_keys);
+ }
+
+ // Parse the whisper text (might be JSON format)
+ self.parse_whisper_text(whisper_text, start_time, sender)?;
+ }
+ }
+
+ if output_count > 0 {
+ info!("Processed {} frames from whisper filter", output_count);
+ } else {
+ debug!("No frames available from whisper filter");
+ }
+
+ Ok(())
+ }
+
+ fn parse_whisper_text(
+ &self,
+ whisper_text: &str,
+ base_time: gst::ClockTime,
+ sender: &ComponentSender<Self>,
+ ) -> Result<()> {
+ debug!("Parsing whisper text: {}", whisper_text);
+
+ // The whisper text might be in different formats depending on the filter configuration
+ // For now, treat it as plain text and create a single cue
+ let cue = SubtitleCue {
+ start: base_time,
+ end: base_time + gst::ClockTime::from_seconds(3), // Default 3 second duration
+ text: whisper_text.to_string(),
+ };
+
+ let whisper_stream_index = 1000;
+ if let Some(track) = TRACKS.write().get_mut(&whisper_stream_index) {
+ track.cues.push(cue.clone());
+ }
+
+ sender
+ .output(SubtitleExtractorOutput::NewCue(whisper_stream_index, cue))
+ .unwrap();
+
+ Ok(())
+ }
+
+ fn parse_whisper_subtitle_data(
+ &self,
+ subtitle_data: &str,
+ sender: &ComponentSender<Self>,
+ ) -> Result<()> {
+ // Parse SRT-format output from whisper
+ info!(
+ "Parsing whisper subtitle data ({} characters)",
+ subtitle_data.len()
+ );
+ debug!("Subtitle data content:\n{}", subtitle_data);
+ let lines: Vec<&str> = subtitle_data.lines().collect();
+ let mut i = 0;
+
+ while i < lines.len() {
+ // Skip subtitle number
+ if lines[i].trim().parse::<i32>().is_ok() {
+ i += 1;
+ }
+
+ // Parse timestamp line
+ if i < lines.len() {
+ if let Some((start, end)) = self.parse_srt_timestamp(lines[i]) {
+ i += 1;
+
+ // Collect text lines
+ let mut text_lines = Vec::new();
+ while i < lines.len() && !lines[i].trim().is_empty() {
+ text_lines.push(lines[i].to_string());
+ i += 1;
+ }
+
+ if !text_lines.is_empty() {
+ let cue = SubtitleCue {
+ start,
+ end,
+ text: text_lines.join("\n"),
+ };
+
+ let whisper_stream_index = 1000;
+ if let Some(track) = TRACKS.write().get_mut(&whisper_stream_index) {
+ track.cues.push(cue.clone());
+ }
+
+ sender
+ .output(SubtitleExtractorOutput::NewCue(whisper_stream_index, cue))
+ .unwrap();
+ }
+ }
+ }
+ i += 1;
+ }
+
+ Ok(())
+ }
+
+ fn parse_srt_timestamp(&self, line: &str) -> Option<(gst::ClockTime, gst::ClockTime)> {
+ // Parse SRT timestamp format: "00:00:01,234 --> 00:00:05,678"
+ let parts: Vec<&str> = line.split(" --> ").collect();
+ if parts.len() != 2 {
+ return None;
+ }
+
+ let start = self.parse_srt_time(parts[0])?;
+ let end = self.parse_srt_time(parts[1])?;
+
+ Some((start, end))
+ }
+
+ fn parse_srt_time(&self, time_str: &str) -> Option<gst::ClockTime> {
+ // Parse SRT time format: "00:00:01,234"
+ let parts: Vec<&str> = time_str.split(',').collect();
+ if parts.len() != 2 {
+ return None;
+ }
+
+ let time_part = parts[0];
+ let millis: u32 = parts[1].parse().ok()?;
+
+ let time_components: Vec<&str> = time_part.split(':').collect();
+ if time_components.len() != 3 {
+ return None;
+ }
+
+ let hours: u32 = time_components[0].parse().ok()?;
+ let minutes: u32 = time_components[1].parse().ok()?;
+ let seconds: u32 = time_components[2].parse().ok()?;
+
+ let total_millis = hours * 3600000 + minutes * 60000 + seconds * 1000 + millis;
+ let nanoseconds = total_millis as u64 * 1_000_000;
+
+ Some(gst::ClockTime::from_nseconds(nanoseconds))
+ }
+
+ fn subtitle_to_cue(
+ subtitle: &ffmpeg::Subtitle,
+ packet: &ffmpeg::Packet,
+ time_base: Rational,
+ ) -> Option<SubtitleCue> {
+ let time_to_clock_time = |time: i64| {
+ let nseconds: i64 = (time * time_base.numerator() as i64 * 1_000_000_000)
+ / time_base.denominator() as i64;
+ gst::ClockTime::from_nseconds(nseconds as u64)
+ };
+
+ let text = subtitle
+ .rects()
+ .into_iter()
+ .map(|rect| match rect {
+ ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(),
+ ffmpeg::subtitle::Rect::Ass(ass) => {
+ Self::extract_dialogue_text(ass.get()).unwrap_or(String::new())
+ }
+ _ => String::new(),
+ })
+ .collect::<Vec<String>>()
+ .join("\n— ");
+
+ let start = time_to_clock_time(packet.pts()?);
+ let end = time_to_clock_time(packet.pts()? + packet.duration());
+
+ Some(SubtitleCue { start, end, text })
+ }
+
+ fn extract_dialogue_text(dialogue_line: &str) -> Option<String> {
+ // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text
+ // we need the 9th field (Text), so split on comma but only take first 9 splits
+ // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433
+ let text = dialogue_line.splitn(9, ',').last()?;
+
+ // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc.
+ let mut result = String::new();
+ let mut in_tag = false;
+ let mut char_iter = text.chars().peekable();
+
+ while let Some(c) = char_iter.next() {
+ if c == '{' && char_iter.peek() == Some(&'\\') {
+ in_tag = true;
+ } else if c == '}' {
+ in_tag = false;
+ } else if !in_tag {
+ // process line breaks and hard spaces
+ if c == '\\' {
+ match char_iter.peek() {
+ Some(&'N') => {
+ char_iter.next();
+ result.push('\n');
+ }
+ Some(&'n') | Some(&'h') => {
+ char_iter.next();
+ result.push(' ');
+ }
+ _ => result.push(c),
+ }
+ } else {
+ result.push(c);
+ }
+ }
+ }
+
+ Some(result)
+ }
+}
diff --git a/src/subtitle_selection_dialog.rs b/src/subtitle_selection_dialog.rs
index 0c7f1cd..6136d56 100644
--- a/src/subtitle_selection_dialog.rs
+++ b/src/subtitle_selection_dialog.rs
@@ -1,63 +1,18 @@
use adw::prelude::*;
-use gtk::{gio, glib};
+use gtk::gio;
use relm4::prelude::*;
-use crate::subtitle_extractor::{StreamIndex, TRACKS};
-use crate::util::Tracker;
-
-// Custom GObject wrapper for subtitle track information
-glib::wrapper! {
- pub struct SubtitleTrackInfo(ObjectSubclass<imp::SubtitleTrackInfo>);
-}
-
-impl SubtitleTrackInfo {
- pub fn new(
- stream_index: StreamIndex,
- language: Option<&'static str>,
- title: Option<String>,
- ) -> Self {
- glib::Object::builder()
- .property("stream-index", stream_index as i64)
- .property("language", language.unwrap_or_default())
- .property("title", title.unwrap_or_default())
- .build()
- }
-
- pub fn get_stream_index(&self) -> StreamIndex {
- let index: i64 = self.property("stream-index");
- index as usize
- }
-}
-
-mod imp {
- use gtk::{glib, prelude::*, subclass::prelude::*};
- use std::cell::RefCell;
-
- #[derive(Default, glib::Properties)]
- #[properties(wrapper_type = super::SubtitleTrackInfo)]
- pub struct SubtitleTrackInfo {
- #[property(get, set)]
- stream_index: RefCell<i64>,
- #[property(get, set)]
- language: RefCell<String>,
- #[property(get, set)]
- title: RefCell<String>,
- }
-
- #[glib::object_subclass]
- impl ObjectSubclass for SubtitleTrackInfo {
- const NAME: &'static str = "SubtitleTrackInfo";
- type Type = super::SubtitleTrackInfo;
- }
-
- #[glib::derived_properties]
- impl ObjectImpl for SubtitleTrackInfo {}
-}
+use crate::track_selector::{
+ TrackInfo, TrackSelector, TrackSelectorInit, TrackSelectorMsg, TrackSelectorOutput,
+};
+use crate::tracks::{SUBTITLE_TRACKS, StreamIndex};
pub struct SubtitleSelectionDialog {
parent_window: adw::ApplicationWindow,
dialog: adw::PreferencesDialog,
- track_list_model: Tracker<gio::ListStore>,
+ track_list_model: gio::ListStore,
+ primary_selector: Controller<TrackSelector>,
+ secondary_selector: Controller<TrackSelector>,
primary_track_ix: Option<StreamIndex>,
secondary_track_ix: Option<StreamIndex>,
}
@@ -91,79 +46,10 @@ impl SimpleComponent for SubtitleSelectionDialog {
#[name(page)]
adw::PreferencesPage {
adw::PreferencesGroup {
- #[name(primary_combo)]
- adw::ComboRow {
- set_title: "Primary Subtitle Track",
- set_subtitle: "Main subtitle track for learning",
- set_factory: Some(&track_factory),
- #[track(model.track_list_model.is_dirty())]
- set_model: Some(model.track_list_model.get()),
- #[track(model.track_list_model.is_dirty())]
- set_selected: model.primary_track_ix.map_or(gtk::INVALID_LIST_POSITION, |ix| get_list_ix_from_stream_ix(model.track_list_model.get(), ix)),
- connect_selected_notify[sender] => move |combo| {
- let stream_index = get_stream_ix_from_combo(combo);
- sender.input(SubtitleSelectionDialogMsg::PrimaryTrackChanged(stream_index));
- },
- },
-
- #[name(secondary_combo)]
- adw::ComboRow {
- set_title: "Secondary Subtitle Track",
- set_subtitle: "Optional second track for comparison",
- set_factory: Some(&track_factory),
- #[track(model.track_list_model.is_dirty())]
- set_model: Some(model.track_list_model.get()),
- #[track(model.track_list_model.is_dirty())]
- set_selected: model.secondary_track_ix.map_or(gtk::INVALID_LIST_POSITION, |ix| get_list_ix_from_stream_ix(model.track_list_model.get(), ix)),
- connect_selected_notify[sender] => move |combo| {
- let stream_index = get_stream_ix_from_combo(combo);
- sender.input(SubtitleSelectionDialogMsg::SecondaryTrackChanged(stream_index));
- },
- },
+ model.primary_selector.widget(),
+ model.secondary_selector.widget(),
}
},
-
- #[name(track_factory)]
- gtk::SignalListItemFactory {
- connect_setup => move |_, list_item| {
- let list_item = list_item.downcast_ref::<gtk::ListItem>().unwrap();
- let vbox = gtk::Box::new(gtk::Orientation::Vertical, 0);
-
- let language_label = gtk::Label::new(None);
- language_label.set_halign(gtk::Align::Start);
- language_label.set_ellipsize(gtk::pango::EllipsizeMode::End);
-
- let title_label = gtk::Label::new(None);
- title_label.set_halign(gtk::Align::Start);
- title_label.set_ellipsize(gtk::pango::EllipsizeMode::End);
- title_label.add_css_class("subtitle");
-
- vbox.append(&language_label);
- vbox.append(&title_label);
- list_item.set_child(Some(&vbox));
- },
- connect_bind => move |_, list_item| {
- let list_item = list_item.downcast_ref::<gtk::ListItem>().unwrap();
- let item = list_item.item().unwrap();
- let track_info = item.downcast_ref::<SubtitleTrackInfo>().unwrap();
- let vbox = list_item.child().unwrap().downcast::<gtk::Box>().unwrap();
- let language_label = vbox.first_child().unwrap().downcast::<gtk::Label>().unwrap();
- let title_label = vbox.last_child().unwrap().downcast::<gtk::Label>().unwrap();
-
- let language = track_info.language();
- let title = track_info.title();
-
- let language_text = if !language.is_empty() {
- &language
- } else {
- "Unknown Language"
- };
-
- language_label.set_text(&language_text);
- title_label.set_text(&title);
- title_label.set_visible(!title.is_empty());
- },
- },
}
fn init(
@@ -171,12 +57,33 @@ impl SimpleComponent for SubtitleSelectionDialog {
root: Self::Root,
sender: ComponentSender<Self>,
) -> ComponentParts<Self> {
- let track_list_model = gio::ListStore::new::<SubtitleTrackInfo>();
+ let primary_selector = TrackSelector::builder()
+ .launch(TrackSelectorInit {
+ title: "Primary subtitle track",
+ subtitle: Some("Select your target language here"),
+ })
+ .forward(sender.input_sender(), |output| match output {
+ TrackSelectorOutput::Changed(ix) => {
+ SubtitleSelectionDialogMsg::PrimaryTrackChanged(ix)
+ }
+ });
+ let secondary_selector = TrackSelector::builder()
+ .launch(TrackSelectorInit {
+ title: "Secondary subtitle track",
+ subtitle: Some("Pick a language you already know"),
+ })
+ .forward(sender.input_sender(), |output| match output {
+ TrackSelectorOutput::Changed(ix) => {
+ SubtitleSelectionDialogMsg::SecondaryTrackChanged(ix)
+ }
+ });
let model = Self {
parent_window,
dialog: root.clone(),
- track_list_model: Tracker::new(track_list_model),
+ track_list_model: gio::ListStore::new::<TrackInfo>(),
+ primary_selector,
+ secondary_selector,
primary_track_ix: None,
secondary_track_ix: None,
};
@@ -187,11 +94,23 @@ impl SimpleComponent for SubtitleSelectionDialog {
}
fn update(&mut self, msg: Self::Input, sender: ComponentSender<Self>) {
- self.track_list_model.reset();
-
match msg {
SubtitleSelectionDialogMsg::Show => {
- self.update_combo_models();
+ self.update_track_list_model();
+
+ self.primary_selector
+ .sender()
+ .send(TrackSelectorMsg::SetListModel(
+ self.track_list_model.clone(),
+ ))
+ .unwrap();
+ self.secondary_selector
+ .sender()
+ .send(TrackSelectorMsg::SetListModel(
+ self.track_list_model.clone(),
+ ))
+ .unwrap();
+
self.dialog.present(Some(&self.parent_window));
}
SubtitleSelectionDialogMsg::PrimaryTrackChanged(stream_index) => {
@@ -215,43 +134,20 @@ impl SimpleComponent for SubtitleSelectionDialog {
}
impl SubtitleSelectionDialog {
- fn update_combo_models(&mut self) {
- let tracks = TRACKS.read();
+ fn update_track_list_model(&mut self) {
+ let tracks = SUBTITLE_TRACKS.read();
// Clear previous entries
- self.track_list_model.get_mut().remove_all();
+ self.track_list_model.remove_all();
// Add all available tracks
for (&stream_index, track) in tracks.iter() {
- let track_info = SubtitleTrackInfo::new(
+ let track_info = TrackInfo::new(
stream_index,
- track.language.map(|lang| lang.to_name()),
- track.title.clone(),
+ track.metadata.language.map(|lang| lang.to_name()),
+ track.metadata.title.clone(),
);
- self.track_list_model.get_mut().append(&track_info);
- }
- }
-}
-
-fn get_stream_ix_from_combo(combo: &adw::ComboRow) -> Option<StreamIndex> {
- let ix = combo
- .selected_item()?
- .downcast_ref::<SubtitleTrackInfo>()
- .unwrap()
- .get_stream_index();
-
- Some(ix)
-}
-
-fn get_list_ix_from_stream_ix(list_model: &gio::ListStore, stream_ix: StreamIndex) -> u32 {
- for i in 0..list_model.n_items() {
- if let Some(item) = list_model.item(i) {
- if let Some(track_info) = item.downcast_ref::<SubtitleTrackInfo>() {
- if track_info.get_stream_index() == stream_ix {
- return i;
- }
- }
+ self.track_list_model.append(&track_info);
}
}
- panic!("Stream index {} not found in list model", stream_ix);
}
diff --git a/src/subtitle_view.rs b/src/subtitle_view.rs
index dc48561..50494b8 100644
--- a/src/subtitle_view.rs
+++ b/src/subtitle_view.rs
@@ -1,6 +1,5 @@
use crate::cue_view::{CueView, CueViewMsg, CueViewOutput};
use crate::util::OptionTracker;
-use gtk::glib;
use gtk::prelude::*;
use relm4::prelude::*;
diff --git a/src/track_selector.rs b/src/track_selector.rs
new file mode 100644
index 0000000..5c56e4d
--- /dev/null
+++ b/src/track_selector.rs
@@ -0,0 +1,188 @@
+use adw::prelude::*;
+use gtk::{gio, glib};
+use relm4::prelude::*;
+
+use crate::tracks::StreamIndex;
+
+glib::wrapper! {
+ pub struct TrackInfo(ObjectSubclass<imp::TrackInfo>);
+}
+
+impl TrackInfo {
+ pub fn new(
+ stream_index: StreamIndex,
+ language: Option<&'static str>,
+ title: Option<String>,
+ ) -> Self {
+ glib::Object::builder()
+ .property("stream-index", stream_index as i64)
+ .property("language", language.unwrap_or_default())
+ .property("title", title.unwrap_or_default())
+ .build()
+ }
+
+ pub fn get_stream_index(&self) -> StreamIndex {
+ let index: i64 = self.property("stream-index");
+ index as usize
+ }
+}
+
+mod imp {
+ use gtk::{glib, prelude::*, subclass::prelude::*};
+ use std::cell::RefCell;
+
+ #[derive(Default, glib::Properties)]
+ #[properties(wrapper_type = super::TrackInfo)]
+ pub struct TrackInfo {
+ #[property(get, set)]
+ stream_index: RefCell<i64>,
+ #[property(get, set)]
+ language: RefCell<String>,
+ #[property(get, set)]
+ title: RefCell<String>,
+ }
+
+ #[glib::object_subclass]
+ impl ObjectSubclass for TrackInfo {
+ const NAME: &'static str = "TrackInfo";
+ type Type = super::TrackInfo;
+ }
+
+ #[glib::derived_properties]
+ impl ObjectImpl for TrackInfo {}
+}
+
+pub struct TrackSelector {
+ track_list_model: gio::ListStore,
+ track_ix: Option<StreamIndex>,
+}
+
+pub struct TrackSelectorInit {
+ pub title: &'static str,
+ pub subtitle: Option<&'static str>,
+}
+
+#[derive(Debug)]
+pub enum TrackSelectorMsg {
+ SetListModel(gio::ListStore),
+}
+
+#[derive(Debug)]
+pub enum TrackSelectorOutput {
+ Changed(Option<StreamIndex>),
+}
+
+#[relm4::component(pub)]
+impl SimpleComponent for TrackSelector {
+ type Init = TrackSelectorInit;
+ type Input = TrackSelectorMsg;
+ type Output = TrackSelectorOutput;
+
+ view! {
+ #[root]
+ #[name(primary_combo)]
+ adw::ComboRow {
+ set_title: init.title,
+ set_subtitle?: init.subtitle,
+ set_factory: Some(&track_factory),
+ #[watch]
+ set_model: Some(&model.track_list_model),
+ #[watch]
+ set_selected: model.track_ix.map_or(gtk::INVALID_LIST_POSITION, |ix| get_list_ix_from_stream_ix(&model.track_list_model, ix)),
+ connect_selected_notify[sender] => move |combo| {
+ let stream_index = get_stream_ix_from_combo(combo);
+ sender.output(TrackSelectorOutput::Changed(stream_index)).unwrap();
+ },
+ },
+
+ #[name(track_factory)]
+ gtk::SignalListItemFactory {
+ connect_setup => move |_, list_item| {
+ let list_item = list_item.downcast_ref::<gtk::ListItem>().unwrap();
+ let vbox = gtk::Box::new(gtk::Orientation::Vertical, 0);
+
+ let language_label = gtk::Label::new(None);
+ language_label.set_halign(gtk::Align::Start);
+ language_label.set_ellipsize(gtk::pango::EllipsizeMode::End);
+
+ let title_label = gtk::Label::new(None);
+ title_label.set_halign(gtk::Align::Start);
+ title_label.set_ellipsize(gtk::pango::EllipsizeMode::End);
+ title_label.add_css_class("subtitle");
+
+ vbox.append(&language_label);
+ vbox.append(&title_label);
+ list_item.set_child(Some(&vbox));
+ },
+ connect_bind => move |_, list_item| {
+ let list_item = list_item.downcast_ref::<gtk::ListItem>().unwrap();
+ let item = list_item.item().unwrap();
+ let track_info = item.downcast_ref::<TrackInfo>().unwrap();
+ let vbox = list_item.child().unwrap().downcast::<gtk::Box>().unwrap();
+ let language_label = vbox.first_child().unwrap().downcast::<gtk::Label>().unwrap();
+ let title_label = vbox.last_child().unwrap().downcast::<gtk::Label>().unwrap();
+
+ let language = track_info.language();
+ let title = track_info.title();
+
+ let language_text = if !language.is_empty() {
+ &language
+ } else {
+ "Unknown Language"
+ };
+
+ language_label.set_text(&language_text);
+ title_label.set_text(&title);
+ title_label.set_visible(!title.is_empty());
+ },
+ },
+ }
+
+ fn init(
+ init: Self::Init,
+ root: Self::Root,
+ sender: ComponentSender<Self>,
+ ) -> ComponentParts<Self> {
+ let track_list_model = gio::ListStore::new::<TrackInfo>();
+
+ let model = Self {
+ track_list_model: track_list_model,
+ track_ix: None,
+ };
+
+ let widgets = view_output!();
+
+ ComponentParts { model, widgets }
+ }
+
+ fn update(&mut self, msg: Self::Input, _sender: ComponentSender<Self>) {
+ match msg {
+ TrackSelectorMsg::SetListModel(list_model) => {
+ self.track_list_model = list_model;
+ }
+ }
+ }
+}
+
+fn get_stream_ix_from_combo(combo: &adw::ComboRow) -> Option<StreamIndex> {
+ let ix = combo
+ .selected_item()?
+ .downcast_ref::<TrackInfo>()
+ .unwrap()
+ .get_stream_index();
+
+ Some(ix)
+}
+
+fn get_list_ix_from_stream_ix(list_model: &gio::ListStore, stream_ix: StreamIndex) -> u32 {
+ for i in 0..list_model.n_items() {
+ if let Some(item) = list_model.item(i) {
+ if let Some(track_info) = item.downcast_ref::<TrackInfo>() {
+ if track_info.get_stream_index() == stream_ix {
+ return i;
+ }
+ }
+ }
+ }
+ panic!("Stream index {} not found in list model", stream_ix);
+}
diff --git a/src/tracks.rs b/src/tracks.rs
new file mode 100644
index 0000000..4d69e12
--- /dev/null
+++ b/src/tracks.rs
@@ -0,0 +1,38 @@
+use std::collections::BTreeMap;
+
+use relm4::SharedState;
+
+pub type StreamIndex = usize;
+
+#[derive(Debug, Clone)]
+pub struct TrackMetadata {
+ pub language: Option<isolang::Language>,
+ pub title: Option<String>,
+}
+
+#[derive(Debug, Clone)]
+pub struct SubtitleTrack {
+ pub metadata: TrackMetadata,
+ pub cues: Vec<SubtitleCue>,
+}
+
+#[derive(Debug, Clone)]
+pub struct SubtitleCue {
+ pub start: gst::ClockTime,
+ pub end: gst::ClockTime,
+ pub text: String,
+}
+
+pub static SUBTITLE_TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new();
+
+impl TrackMetadata {
+ pub fn from_ffmpeg_stream(stream: &ffmpeg::Stream) -> Self {
+ let language_code = stream.metadata().get("language").map(|s| s.to_string());
+ let title = stream.metadata().get("title").map(|s| s.to_string());
+
+ Self {
+ language: language_code.and_then(|code| isolang::Language::from_639_2b(&code)),
+ title,
+ }
+ }
+}
diff --git a/src/transcript.rs b/src/transcript.rs
index eb3459d..a8ae554 100644
--- a/src/transcript.rs
+++ b/src/transcript.rs
@@ -1,7 +1,7 @@
use gtk::{ListBox, pango::WrapMode, prelude::*};
use relm4::prelude::*;
-use crate::subtitle_extractor::{StreamIndex, SubtitleCue, TRACKS};
+use crate::tracks::{SUBTITLE_TRACKS, StreamIndex, SubtitleCue};
#[derive(Debug)]
pub enum SubtitleCueOutput {
@@ -122,7 +122,7 @@ impl SimpleComponent for Transcript {
self.active_cues.guard().clear();
if let Some(stream_ix) = stream_index {
- let tracks = TRACKS.read();
+ let tracks = SUBTITLE_TRACKS.read();
if let Some(track) = tracks.get(&stream_ix) {
for cue in &track.cues {
self.active_cues.guard().push_back(cue.clone());
diff --git a/src/util/tracker.rs b/src/util/tracker.rs
index 66c30a9..69a1c5f 100644
--- a/src/util/tracker.rs
+++ b/src/util/tracker.rs
@@ -24,6 +24,12 @@ impl<T> Tracker<T> {
self.inner = value;
}
+ /// Sets the inner value to `value` and marks the tracker as clean.
+ pub fn set_clean(&mut self, value: T) {
+ self.dirty = false;
+ self.inner = value;
+ }
+
pub fn is_dirty(&self) -> bool {
self.dirty
}