summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/app.rs84
-rw-r--r--src/main.rs21
-rw-r--r--src/open_dialog.rs328
-rw-r--r--src/subtitle_extraction/embedded.rs118
-rw-r--r--src/subtitle_extraction/mod.rs159
-rw-r--r--src/subtitle_extraction/whisper.rs75
-rw-r--r--src/subtitle_extractor.rs209
-rw-r--r--src/subtitle_extractor_aishit.rs732
-rw-r--r--src/subtitle_selection_dialog.rs214
-rw-r--r--src/subtitle_view.rs1
-rw-r--r--src/track_selector.rs188
-rw-r--r--src/tracks.rs38
-rw-r--r--src/transcript.rs4
-rw-r--r--src/util/tracker.rs6
14 files changed, 1768 insertions, 409 deletions
diff --git a/src/app.rs b/src/app.rs
index 18f03e8..7aa5abd 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -2,26 +2,27 @@ use adw::prelude::*;
 use relm4::{WorkerController, prelude::*};
 
 use crate::{
+    open_dialog::{OpenDialog, OpenDialogMsg, OpenDialogOutput},
     player::{Player, PlayerMsg, PlayerOutput},
     preferences::{Preferences, PreferencesMsg},
-    subtitle_extractor::{
-        StreamIndex, SubtitleExtractor, SubtitleExtractorMsg, SubtitleExtractorOutput, TRACKS,
-    },
+    subtitle_extraction::{SubtitleExtractor, SubtitleExtractorMsg, SubtitleExtractorOutput},
     subtitle_selection_dialog::{
         SubtitleSelectionDialog, SubtitleSelectionDialogMsg, SubtitleSelectionDialogOutput,
     },
     subtitle_view::{SubtitleView, SubtitleViewMsg, SubtitleViewOutput},
+    tracks::{SUBTITLE_TRACKS, StreamIndex, SubtitleCue},
     transcript::{Transcript, TranscriptMsg, TranscriptOutput},
     util::OptionTracker,
 };
 
 pub struct App {
-    url: String,
     transcript: Controller<Transcript>,
     player: Controller<Player>,
     subtitle_view: Controller<SubtitleView>,
     extractor: WorkerController<SubtitleExtractor>,
+
     preferences: Controller<Preferences>,
+    open_url_dialog: Controller<OpenDialog>,
     subtitle_selection_dialog: Controller<SubtitleSelectionDialog>,
 
     primary_stream_ix: Option<StreamIndex>,
@@ -37,20 +38,24 @@ pub struct App {
 
 #[derive(Debug)]
 pub enum AppMsg {
-    NewOrUpdatedTrackMetadata(StreamIndex),
-    NewCue(StreamIndex, crate::subtitle_extractor::SubtitleCue),
+    NewCue(StreamIndex, SubtitleCue),
     SubtitleExtractionComplete,
     PrimarySubtitleTrackSelected(Option<StreamIndex>),
     SecondarySubtitleTrackSelected(Option<StreamIndex>),
     PositionUpdate(gst::ClockTime),
     SetHoveringSubtitleCue(bool),
+    ShowUrlOpenDialog,
     ShowPreferences,
     ShowSubtitleSelectionDialog,
+    Play {
+        url: String,
+        whisper_stream_index: Option<StreamIndex>,
+    },
 }
 
 #[relm4::component(pub)]
 impl SimpleComponent for App {
-    type Init = String;
+    type Init = ();
     type Input = AppMsg;
     type Output = ();
 
@@ -61,10 +66,13 @@ impl SimpleComponent for App {
             set_default_width: 800,
             set_default_height: 600,
 
-            #[name(toolbar_view)]
             adw::ToolbarView {
                 add_top_bar = &adw::HeaderBar {
                     pack_start = &gtk::Button {
+                        set_label: "Open...",
+                        connect_clicked => AppMsg::ShowUrlOpenDialog,
+                    },
+                    pack_end = &gtk::Button {
                         set_icon_name: "settings-symbolic",
                         connect_clicked => AppMsg::ShowPreferences,
                     }
@@ -86,7 +94,7 @@ impl SimpleComponent for App {
     }
 
     fn init(
-        url: Self::Init,
+        _init: Self::Init,
         root: Self::Root,
         sender: ComponentSender<Self>,
     ) -> ComponentParts<Self> {
@@ -112,9 +120,6 @@ impl SimpleComponent for App {
         let extractor = SubtitleExtractor::builder().detach_worker(()).forward(
             sender.input_sender(),
             |output| match output {
-                SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(stream_index) => {
-                    AppMsg::NewOrUpdatedTrackMetadata(stream_index)
-                }
                 SubtitleExtractorOutput::NewCue(stream_index, cue) => {
                     AppMsg::NewCue(stream_index, cue)
                 }
@@ -123,6 +128,18 @@ impl SimpleComponent for App {
         );
 
         let preferences = Preferences::builder().launch(root.clone().into()).detach();
+        let open_url_dialog = OpenDialog::builder().launch(root.clone().into()).forward(
+            sender.input_sender(),
+            |output| match output {
+                OpenDialogOutput::Play {
+                    url,
+                    whisper_stream_index,
+                } => AppMsg::Play {
+                    url,
+                    whisper_stream_index,
+                },
+            },
+        );
         let subtitle_selection_dialog = SubtitleSelectionDialog::builder()
             .launch(root.clone().into())
             .forward(sender.input_sender(), |output| match output {
@@ -135,12 +152,13 @@ impl SimpleComponent for App {
             });
 
         let model = Self {
-            url: url.clone(), // TODO remove clone
             player,
             transcript,
             subtitle_view,
             extractor,
+
             preferences,
+            open_url_dialog,
             subtitle_selection_dialog,
 
             primary_stream_ix: None,
@@ -155,26 +173,14 @@ impl SimpleComponent for App {
 
         let widgets = view_output!();
 
-        model
-            .player
-            .sender()
-            .send(PlayerMsg::SetUrl(url.clone()))
-            .unwrap();
-        model
-            .extractor
-            .sender()
-            .send(SubtitleExtractorMsg::ExtractFromUrl(url))
-            .unwrap();
-
         ComponentParts { model, widgets }
     }
 
-    fn update(&mut self, msg: Self::Input, _sender: ComponentSender<Self>) {
+    fn update(&mut self, message: Self::Input, _sender: ComponentSender<Self>) {
         self.primary_last_cue_ix.reset();
         self.secondary_last_cue_ix.reset();
 
-        match msg {
-            AppMsg::NewOrUpdatedTrackMetadata(_stream_index) => {}
+        match message {
             AppMsg::NewCue(stream_index, cue) => {
                 self.transcript
                     .sender()
@@ -257,6 +263,12 @@ impl SimpleComponent for App {
                     self.autopaused = false;
                 }
             }
+            AppMsg::ShowUrlOpenDialog => {
+                self.open_url_dialog
+                    .sender()
+                    .send(OpenDialogMsg::Show)
+                    .unwrap();
+            }
             AppMsg::ShowPreferences => {
                 self.preferences
                     .sender()
@@ -269,6 +281,22 @@ impl SimpleComponent for App {
                     .send(SubtitleSelectionDialogMsg::Show)
                     .unwrap();
             }
+            AppMsg::Play {
+                url,
+                whisper_stream_index,
+            } => {
+                self.player
+                    .sender()
+                    .send(PlayerMsg::SetUrl(url.clone()))
+                    .unwrap();
+                self.extractor
+                    .sender()
+                    .send(SubtitleExtractorMsg::ExtractFromUrl {
+                        url,
+                        whisper_stream_index,
+                    })
+                    .unwrap();
+            }
         }
     }
 }
@@ -279,7 +307,7 @@ impl App {
         position: gst::ClockTime,
         last_cue_ix: &mut OptionTracker<usize>,
     ) -> Option<String> {
-        let lock = TRACKS.read();
+        let lock = SUBTITLE_TRACKS.read();
         let track = lock.get(&stream_ix)?;
 
         // try to find current cue quickly (should usually succeed during playback)
diff --git a/src/main.rs b/src/main.rs
index 0b7db50..01ca56d 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,15 +1,16 @@
 mod app;
 mod cue_view;
+mod open_dialog;
 mod player;
 mod preferences;
-mod subtitle_extractor;
+mod subtitle_extraction;
 mod subtitle_selection_dialog;
 mod subtitle_view;
+mod track_selector;
+mod tracks;
 mod transcript;
 mod util;
 
-use std::env;
-
 use gtk::{CssProvider, STYLE_PROVIDER_PRIORITY_APPLICATION, gdk, glib};
 use relm4::RelmApp;
 
@@ -18,12 +19,12 @@ use crate::app::App;
 fn main() {
     env_logger::init();
 
-    let args: Vec<String> = env::args().collect();
-    if args.len() != 2 {
-        eprintln!("Usage: {} <video_url>", args[0]);
-        std::process::exit(1);
-    }
-    let video_url = args[1].clone();
+    // let args: Vec<String> = env::args().collect();
+    // if args.len() != 2 {
+    //     eprintln!("Usage: {} <video_url>", args[0]);
+    //     std::process::exit(1);
+    // }
+    // let video_url = args[1].clone();
 
     gtk::init().expect("Failed to initialize GTK");
     gst::init().expect("Failed to initialize GStreamer");
@@ -43,5 +44,5 @@ fn main() {
     relm4::RELM_THREADS.set(4).unwrap();
 
     let relm = RelmApp::new("tc.mal.lleap").with_args(vec![]);
-    relm.run::<App>(video_url);
+    relm.run::<App>(());
 }
diff --git a/src/open_dialog.rs b/src/open_dialog.rs
new file mode 100644
index 0000000..2f17c59
--- /dev/null
+++ b/src/open_dialog.rs
@@ -0,0 +1,328 @@
+use std::collections::BTreeMap;
+
+use adw::prelude::*;
+use gtk::gio;
+use gtk::glib::clone;
+use relm4::prelude::*;
+
+use crate::track_selector::{
+    TrackInfo, TrackSelector, TrackSelectorInit, TrackSelectorMsg, TrackSelectorOutput,
+};
+use crate::tracks::{StreamIndex, TrackMetadata};
+use crate::util::Tracker;
+
+pub struct OpenDialog {
+    parent_window: adw::ApplicationWindow,
+    dialog: adw::PreferencesDialog,
+    toast_overlay: Option<adw::ToastOverlay>,
+    navigation_view: Option<adw::NavigationView>,
+    whisper_track_selector: Controller<TrackSelector>,
+
+    url: Tracker<String>,
+    do_whisper_extraction: bool,
+    whisper_stream_index: Option<StreamIndex>,
+
+    metadata_command_running: bool,
+}
+
+#[derive(Debug)]
+pub enum OpenDialogMsg {
+    Show,
+    Next,
+    Cancel,
+    SelectFile,
+    FileSelected(gio::File),
+    UrlChanged(String),
+    SetDoWhisperExtraction(bool),
+    WhisperTrackSelected(Option<StreamIndex>),
+    Play,
+}
+
+#[derive(Debug)]
+pub enum OpenDialogOutput {
+    Play {
+        url: String,
+        whisper_stream_index: Option<StreamIndex>,
+    },
+}
+
+#[relm4::component(pub)]
+impl Component for OpenDialog {
+    type Init = adw::ApplicationWindow;
+    type Input = OpenDialogMsg;
+    type Output = OpenDialogOutput;
+    type CommandOutput = Result<BTreeMap<StreamIndex, TrackMetadata>, ffmpeg::Error>;
+
+    view! {
+        #[root]
+        adw::PreferencesDialog {
+            set_title: "Open URL",
+
+            #[wrap(Some)]
+            #[name(toast_overlay)]
+            set_child = &adw::ToastOverlay {
+                #[wrap(Some)]
+                #[name(navigation_view)]
+                set_child = &adw::NavigationView {
+                    add = &adw::NavigationPage {
+                        set_title: "Open File or Stream",
+
+                        #[wrap(Some)]
+                        set_child = &adw::ToolbarView {
+                            add_top_bar = &adw::HeaderBar {
+                                set_show_end_title_buttons: false,
+
+                                pack_start = &gtk::Button {
+                                    set_label: "Cancel",
+                                    connect_clicked => OpenDialogMsg::Cancel,
+                                },
+
+                                pack_end = &gtk::Button {
+                                    set_label: "Next",
+                                    #[watch]
+                                    set_sensitive: !(model.url.get().is_empty() || model.metadata_command_running),
+                                    connect_clicked => OpenDialogMsg::Next,
+                                    add_css_class: "suggested-action",
+                                },
+
+                                pack_end = &adw::Spinner {
+                                    #[watch]
+                                    set_visible: model.metadata_command_running,
+                                },
+                            },
+
+                            #[wrap(Some)]
+                            set_content = &adw::PreferencesPage {
+                                adw::PreferencesGroup {
+                                    set_title: "Open a file from your computer",
+                                    adw::ButtonRow {
+                                        set_title: "Select File",
+                                        connect_activated => OpenDialogMsg::SelectFile,
+                                    }
+                                },
+
+                                adw::PreferencesGroup {
+                                    set_title: "Or, enter a stream URL",
+                                    set_description: Some("Currently, only file:// and http(s):// URLs are officially supported, although other protocols may work as well."),
+
+                                    adw::EntryRow {
+                                        set_title: "URL",
+                                        #[track(model.url.is_dirty())]
+                                        set_text: model.url.get(),
+                                        connect_changed[sender] => move |entry| {
+                                            sender.input(OpenDialogMsg::UrlChanged(entry.text().to_string()));
+                                        },
+                                    }
+                                }
+                            }
+                        }
+                    },
+
+                    add = &adw::NavigationPage {
+                        set_tag = Some("playback_options"),
+                        set_title: "Playback Options",
+
+                        #[wrap(Some)]
+                        set_child = &adw::ToolbarView {
+                            add_top_bar = &adw::HeaderBar {
+                                set_show_end_title_buttons: false,
+
+                                pack_end = &gtk::Button {
+                                    connect_clicked => OpenDialogMsg::Play,
+                                    add_css_class: "suggested-action",
+
+                                    gtk::Label {
+                                        set_text: "Play",
+                                    }
+                                },
+                            },
+
+                            #[wrap(Some)]
+                            set_content = &adw::PreferencesPage {
+                                adw::PreferencesGroup {
+                                    adw::ExpanderRow {
+                                        set_title: "Generate subtitles from audio",
+                                        set_subtitle: "See also \"Whisper settings\" in Preferences",
+                                        set_show_enable_switch: true,
+                                        #[watch]
+                                        set_enable_expansion: model.do_whisper_extraction,
+                                        connect_enable_expansion_notify[sender] => move |expander_row| {
+                                            sender.input(OpenDialogMsg::SetDoWhisperExtraction(expander_row.enables_expansion()))
+                                        },
+
+                                        add_row: model.whisper_track_selector.widget(),
+                                    },
+                                },
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    fn init(
+        parent_window: Self::Init,
+        root: Self::Root,
+        sender: ComponentSender<Self>,
+    ) -> ComponentParts<Self> {
+        let whisper_track_selector = TrackSelector::builder()
+            .launch(TrackSelectorInit {
+                title: "Audio track",
+                subtitle: None,
+            })
+            .forward(sender.input_sender(), |output| match output {
+                TrackSelectorOutput::Changed(ix) => OpenDialogMsg::WhisperTrackSelected(ix),
+            });
+        let mut model = Self {
+            parent_window,
+            dialog: root.clone(),
+            toast_overlay: None,
+            navigation_view: None,
+            whisper_track_selector,
+
+            url: Tracker::new(String::new()),
+            do_whisper_extraction: false,
+            whisper_stream_index: None,
+
+            metadata_command_running: false,
+        };
+
+        let widgets = view_output!();
+
+        model.toast_overlay = Some(widgets.toast_overlay.clone());
+        model.navigation_view = Some(widgets.navigation_view.clone());
+
+        ComponentParts { model, widgets }
+    }
+
+    fn update(&mut self, message: Self::Input, sender: ComponentSender<Self>, _root: &Self::Root) {
+        match message {
+            OpenDialogMsg::Show => {
+                self.reset();
+                self.dialog.present(Some(&self.parent_window));
+            }
+            OpenDialogMsg::UrlChanged(url) => self.url.set_clean(url),
+            OpenDialogMsg::Next => self.fetch_metadata(sender),
+            OpenDialogMsg::Cancel => {
+                self.dialog.close();
+            }
+            OpenDialogMsg::SelectFile => {
+                let dialog = gtk::FileDialog::new();
+                dialog.open(
+                    Some(&self.parent_window),
+                    None as Option<&gio::Cancellable>,
+                    clone!(
+                        #[strong]
+                        sender,
+                        move |res| {
+                            if let Ok(file) = res {
+                                sender.input(OpenDialogMsg::FileSelected(file));
+                            }
+                        }
+                    ),
+                );
+            }
+            OpenDialogMsg::FileSelected(file) => {
+                self.url.set(file.uri().into());
+            }
+            OpenDialogMsg::Play => {
+                sender
+                    .output(OpenDialogOutput::Play {
+                        url: self.url.get().clone(),
+                        whisper_stream_index: if self.do_whisper_extraction {
+                            self.whisper_stream_index
+                        } else {
+                            None
+                        },
+                    })
+                    .unwrap();
+                self.dialog.close();
+            }
+            OpenDialogMsg::SetDoWhisperExtraction(val) => {
+                self.do_whisper_extraction = val;
+            }
+            OpenDialogMsg::WhisperTrackSelected(track_index) => {
+                self.whisper_stream_index = track_index;
+            }
+        }
+    }
+
+    // once we get all the audio track metadata, we update the whisper track
+    // dropdown
+    fn update_cmd(
+        &mut self,
+        message: Self::CommandOutput,
+        _sender: ComponentSender<Self>,
+        _root: &Self::Root,
+    ) {
+        self.metadata_command_running = false;
+
+        match message {
+            Ok(audio_tracks) => {
+                let list_model = gio::ListStore::new::<TrackInfo>();
+
+                for (&stream_index, track) in audio_tracks.iter() {
+                    let track_info = TrackInfo::new(
+                        stream_index,
+                        track.language.map(|lang| lang.to_name()),
+                        track.title.clone(),
+                    );
+                    list_model.append(&track_info);
+                }
+
+                self.whisper_track_selector
+                    .sender()
+                    .send(TrackSelectorMsg::SetListModel(list_model))
+                    .unwrap();
+
+                self.next();
+            }
+            Err(e) => {
+                let toast = adw::Toast::builder()
+                    .title(&format!("Error fetching stream metadata: {}", e))
+                    .build();
+
+                self.toast_overlay.as_ref().unwrap().add_toast(toast);
+            }
+        }
+    }
+}
+
+impl OpenDialog {
+    fn reset(&mut self) {
+        self.url.get_mut().clear();
+        self.do_whisper_extraction = false;
+        self.whisper_stream_index = None;
+    }
+
+    fn fetch_metadata(&mut self, sender: ComponentSender<Self>) {
+        let url = self.url.get().clone();
+
+        sender.spawn_oneshot_command(move || {
+            let input = ffmpeg::format::input(&url)?;
+
+            let audio_tracks = input
+                .streams()
+                .filter_map(|stream| {
+                    if stream.parameters().medium() == ffmpeg::media::Type::Audio {
+                        Some((stream.index(), TrackMetadata::from_ffmpeg_stream(&stream)))
+                    } else {
+                        None
+                    }
+                })
+                .collect::<BTreeMap<_, _>>();
+
+            Ok(audio_tracks)
+        });
+
+        self.metadata_command_running = true;
+    }
+
+    fn next(&self) {
+        self.navigation_view
+            .as_ref()
+            .unwrap()
+            .push_by_tag("playback_options");
+    }
+}
diff --git a/src/subtitle_extraction/embedded.rs b/src/subtitle_extraction/embedded.rs
new file mode 100644
index 0000000..5cdf813
--- /dev/null
+++ b/src/subtitle_extraction/embedded.rs
@@ -0,0 +1,118 @@
+use std::sync::mpsc;
+
+use anyhow::Context;
+
+use crate::subtitle_extraction::*;
+
+pub fn extract_embedded_subtitles(
+    // stream index to use when storing extracted subtitles, this index already
+    // has to be in TRACKS when this function is called!
+    stream_ix: StreamIndex,
+    context: ffmpeg::codec::Context,
+    time_base: ffmpeg::Rational,
+    packet_rx: mpsc::Receiver<ffmpeg::Packet>,
+    sender: ComponentSender<SubtitleExtractor>,
+) -> anyhow::Result<()> {
+    let mut decoder = context
+        .decoder()
+        .subtitle()
+        .with_context(|| format!("error creating subtitle decoder for stream {}", stream_ix))?;
+
+    while let Ok(packet) = packet_rx.recv() {
+        let mut subtitle = ffmpeg::Subtitle::new();
+        match decoder.decode(&packet, &mut subtitle) {
+            Ok(true) => {
+                if let Some(cue) = parse_subtitle(&subtitle, &packet, time_base) {
+                    SUBTITLE_TRACKS
+                        .write()
+                        .get_mut(&stream_ix)
+                        .unwrap()
+                        .cues
+                        .push(cue.clone());
+                    sender
+                        .output(SubtitleExtractorOutput::NewCue(stream_ix, cue))
+                        .unwrap();
+                } else {
+                    log::error!("error parsing subtitle at pts {:?}", packet.pts())
+                }
+            }
+            Ok(false) => {
+                log::debug!("got empty (?) subtitle, not sure if this should ever happen");
+            }
+            Err(e) => {
+                log::error!("error decoding subtitle: {:?}", e)
+            }
+        }
+    }
+
+    Ok(())
+}
+
+fn parse_subtitle(
+    subtitle: &ffmpeg::Subtitle,
+    packet: &ffmpeg::Packet,
+    time_base: Rational,
+) -> Option<SubtitleCue> {
+    let time_to_clock_time = |time: i64| {
+        let nseconds: i64 =
+            (time * time_base.numerator() as i64 * 1_000_000_000) / time_base.denominator() as i64;
+        gst::ClockTime::from_nseconds(nseconds as u64)
+    };
+
+    let text = subtitle
+        .rects()
+        .into_iter()
+        .map(|rect| match rect {
+            ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(),
+            ffmpeg::subtitle::Rect::Ass(ass) => {
+                extract_dialogue_text(ass.get()).unwrap_or(String::new())
+            }
+            _ => String::new(),
+        })
+        .collect::<Vec<String>>()
+        .join("\n— ");
+
+    let start = time_to_clock_time(packet.pts()?);
+    let end = time_to_clock_time(packet.pts()? + packet.duration());
+
+    Some(SubtitleCue { start, end, text })
+}
+
+fn extract_dialogue_text(dialogue_line: &str) -> Option<String> {
+    // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text
+    // we need the 9th field (Text), so split on comma but only take first 9 splits
+    // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433
+    let text = dialogue_line.splitn(9, ',').last()?;
+
+    // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc.
+    let mut result = String::new();
+    let mut in_tag = false;
+    let mut char_iter = text.chars().peekable();
+
+    while let Some(c) = char_iter.next() {
+        if c == '{' && char_iter.peek() == Some(&'\\') {
+            in_tag = true;
+        } else if c == '}' {
+            in_tag = false;
+        } else if !in_tag {
+            // process line breaks and hard spaces
+            if c == '\\' {
+                match char_iter.peek() {
+                    Some(&'N') => {
+                        char_iter.next();
+                        result.push('\n');
+                    }
+                    Some(&'n') | Some(&'h') => {
+                        char_iter.next();
+                        result.push(' ');
+                    }
+                    _ => result.push(c),
+                }
+            } else {
+                result.push(c);
+            }
+        }
+    }
+
+    Some(result)
+}
diff --git a/src/subtitle_extraction/mod.rs b/src/subtitle_extraction/mod.rs
new file mode 100644
index 0000000..9e7fff4
--- /dev/null
+++ b/src/subtitle_extraction/mod.rs
@@ -0,0 +1,159 @@
+/// Extraction of embedded subtitles
+mod embedded;
+/// Synthesis of subtitles from audio using whisper.cpp
+mod whisper;
+
+use std::{collections::BTreeMap, sync::mpsc, thread};
+
+use ffmpeg::Rational;
+use relm4::{ComponentSender, Worker};
+
+use crate::tracks::{SUBTITLE_TRACKS, StreamIndex, SubtitleCue, SubtitleTrack, TrackMetadata};
+
+pub struct SubtitleExtractor {}
+
+#[derive(Debug)]
+pub enum SubtitleExtractorMsg {
+    ExtractFromUrl {
+        url: String,
+        // the index of the audio stream on which to run a whisper transcription
+        whisper_stream_index: Option<usize>,
+    },
+}
+
+#[derive(Debug)]
+pub enum SubtitleExtractorOutput {
+    NewCue(StreamIndex, SubtitleCue),
+    ExtractionComplete,
+}
+
+impl Worker for SubtitleExtractor {
+    type Init = ();
+    type Input = SubtitleExtractorMsg;
+    type Output = SubtitleExtractorOutput;
+
+    fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self {
+        Self {}
+    }
+
+    fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) {
+        match msg {
+            SubtitleExtractorMsg::ExtractFromUrl {
+                url,
+                whisper_stream_index: whisper_audio_stream_ix,
+            } => {
+                self.handle_extract_from_url(url, whisper_audio_stream_ix, sender);
+            }
+        }
+    }
+}
+
+impl SubtitleExtractor {
+    fn handle_extract_from_url(
+        &mut self,
+        url: String,
+        whisper_audio_stream_ix: Option<usize>,
+        sender: ComponentSender<Self>,
+    ) {
+        // Clear existing tracks
+        SUBTITLE_TRACKS.write().clear();
+
+        match self.extract_subtitles(&url, whisper_audio_stream_ix, sender.clone()) {
+            Ok(_) => {
+                log::info!("Subtitle extraction completed successfully");
+                sender
+                    .output(SubtitleExtractorOutput::ExtractionComplete)
+                    .unwrap();
+            }
+            Err(e) => {
+                log::error!("Subtitle extraction failed: {}", e);
+            }
+        }
+    }
+
+    fn extract_subtitles(
+        &self,
+        url: &str,
+        whisper_audio_stream_ix: Option<usize>,
+        sender: ComponentSender<Self>,
+    ) -> anyhow::Result<()> {
+        let mut input = ffmpeg::format::input(&url)?;
+
+        let mut subtitle_extractors = BTreeMap::new();
+
+        // create extractor for each subtitle stream
+        for stream in input.streams() {
+            let stream_ix = stream.index();
+
+            if stream.parameters().medium() == ffmpeg::media::Type::Subtitle {
+                let metadata = TrackMetadata::from_ffmpeg_stream(&stream);
+                let track = SubtitleTrack {
+                    metadata,
+                    cues: Vec::new(),
+                };
+
+                SUBTITLE_TRACKS.write().insert(stream_ix, track);
+
+                let context = ffmpeg::codec::Context::from_parameters(stream.parameters())?;
+                let (packet_tx, packet_rx) = mpsc::channel();
+                let time_base = stream.time_base();
+                let sender = sender.clone();
+                let join_handle = thread::spawn(move || {
+                    embedded::extract_embedded_subtitles(
+                        stream_ix, context, time_base, packet_rx, sender,
+                    )
+                });
+
+                subtitle_extractors.insert(stream_ix, (packet_tx, join_handle));
+            }
+        }
+
+        if let Some(stream_ix) = whisper_audio_stream_ix {
+            let stream = input.stream(stream_ix).unwrap();
+
+            let mut metadata = TrackMetadata::from_ffmpeg_stream(&stream);
+            metadata.title = Some(match metadata.title {
+                Some(title) => format!("Auto-generated from audio (Whisper): {}", title),
+                None => "Auto-generated from audio (Whisper)".to_string(),
+            });
+
+            let track = SubtitleTrack {
+                metadata,
+                cues: Vec::new(),
+            };
+
+            SUBTITLE_TRACKS.write().insert(stream_ix, track);
+
+            let context = ffmpeg::codec::Context::from_parameters(stream.parameters())?;
+            let (packet_tx, packet_rx) = mpsc::channel();
+            let time_base = stream.time_base();
+            let sender = sender.clone();
+            let join_handle = thread::spawn(move || {
+                whisper::generate_whisper_subtitles(
+                    stream_ix, context, time_base, packet_rx, sender,
+                )
+            });
+
+            subtitle_extractors.insert(stream_ix, (packet_tx, join_handle));
+        }
+
+        // process packets
+        for (stream, packet) in input.packets() {
+            let stream_index = stream.index();
+
+            if let Some((packet_tx, _)) = subtitle_extractors.get_mut(&stream_index) {
+                packet_tx.send(packet).unwrap();
+            }
+        }
+
+        // wait for extraction to complete
+        for (_, (_, join_handle)) in subtitle_extractors {
+            join_handle
+                .join()
+                .unwrap()
+                .unwrap_or_else(|e| log::error!("error running subtitle extraction: {}", e));
+        }
+
+        Ok(())
+    }
+}
diff --git a/src/subtitle_extraction/whisper.rs b/src/subtitle_extraction/whisper.rs
new file mode 100644
index 0000000..5622d6f
--- /dev/null
+++ b/src/subtitle_extraction/whisper.rs
@@ -0,0 +1,75 @@
+use std::sync::mpsc;
+
+use anyhow::Context;
+use ffmpeg::filter;
+
+use crate::{subtitle_extraction::*, tracks::StreamIndex};
+
+pub fn generate_whisper_subtitles(
+    // stream index to use when storing generated subtitles, this index
+    // already has to be in TRACKS when this function is called!
+    stream_ix: StreamIndex,
+    context: ffmpeg::codec::Context,
+    time_base: ffmpeg::Rational,
+    packet_rx: mpsc::Receiver<ffmpeg::Packet>,
+    sender: ComponentSender<SubtitleExtractor>,
+) -> anyhow::Result<()> {
+    let mut decoder = context
+        .decoder()
+        .audio()
+        .with_context(|| format!("error creating subtitle decoder for stream {}", stream_ix))?;
+
+    let mut filter = filter::Graph::new();
+
+    let abuffer_args = format!(
+        "time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}",
+        decoder.time_base(),
+        decoder.rate(),
+        decoder.format().name(),
+        decoder.channel_layout().bits()
+    );
+    let whisper_args = format!(
+        "model={}:queue={}:format=json",
+        "/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin", 30
+    );
+    let filter_spec = format!("[src] whisper={} [sink]", whisper_args);
+
+    filter.add(&filter::find("abuffer").unwrap(), "src", &abuffer_args)?;
+    filter.add(&filter::find("abuffersink").unwrap(), "sink", "")?;
+    filter
+        .output("src", 0)?
+        .input("sink", 0)?
+        .parse(&filter_spec)?;
+    filter.validate()?;
+
+    let mut source_ctx = filter.get("src").unwrap();
+    let mut sink_ctx = filter.get("sink").unwrap();
+
+    while let Ok(packet) = packet_rx.recv() {
+        handle_packet(&mut decoder, source_ctx.source(), sink_ctx.sink(), packet)
+            .unwrap_or_else(|e| log::error!("error handling audio packet: {}", e))
+    }
+
+    Ok(())
+}
+
+fn handle_packet(
+    decoder: &mut ffmpeg::decoder::Audio,
+    mut source: filter::Source,
+    mut sink: filter::Sink,
+    packet: ffmpeg::Packet,
+) -> anyhow::Result<()> {
+    let mut in_frame = unsafe { ffmpeg::Frame::empty() };
+    decoder.send_packet(&packet)?;
+    decoder.receive_frame(&mut in_frame)?;
+    source.add(&in_frame)?;
+
+    let mut out_frame = unsafe { ffmpeg::Frame::empty() };
+    sink.frame(&mut out_frame)?;
+
+    if let Some(text) = out_frame.metadata().get("lavfi.whisper.text") {
+        println!("{}", text);
+    }
+
+    Ok(())
+}
diff --git a/src/subtitle_extractor.rs b/src/subtitle_extractor.rs
deleted file mode 100644
index b628d73..0000000
--- a/src/subtitle_extractor.rs
+++ /dev/null
@@ -1,209 +0,0 @@
-use std::collections::BTreeMap;
-
-use anyhow::Result;
-
-use ffmpeg::Rational;
-use log::{debug, error, info};
-use relm4::{ComponentSender, SharedState, Worker};
-
-pub type StreamIndex = usize;
-
-#[derive(Debug, Clone)]
-pub struct SubtitleCue {
-    pub start: gst::ClockTime,
-    pub end: gst::ClockTime,
-    pub text: String,
-}
-
-#[derive(Debug, Clone)]
-pub struct SubtitleTrack {
-    pub language: Option<isolang::Language>,
-    pub title: Option<String>,
-    pub cues: Vec<SubtitleCue>,
-}
-
-pub static TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new();
-
-pub struct SubtitleExtractor {}
-
-#[derive(Debug)]
-pub enum SubtitleExtractorMsg {
-    ExtractFromUrl(String),
-}
-
-#[derive(Debug)]
-pub enum SubtitleExtractorOutput {
-    NewOrUpdatedTrackMetadata(StreamIndex),
-    NewCue(StreamIndex, SubtitleCue),
-    ExtractionComplete,
-}
-
-impl Worker for SubtitleExtractor {
-    type Init = ();
-    type Input = SubtitleExtractorMsg;
-    type Output = SubtitleExtractorOutput;
-
-    fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self {
-        Self {}
-    }
-
-    fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) {
-        match msg {
-            SubtitleExtractorMsg::ExtractFromUrl(url) => {
-                self.handle_extract_from_url(url, sender);
-            }
-        }
-    }
-}
-
-impl SubtitleExtractor {
-    fn handle_extract_from_url(&mut self, url: String, sender: ComponentSender<Self>) {
-        // Clear existing tracks
-        TRACKS.write().clear();
-
-        // Try to extract subtitles using ffmpeg
-        match self.extract_subtitles_ffmpeg(&url, &sender) {
-            Ok(_) => {
-                info!("Subtitle extraction completed successfully");
-                sender
-                    .output(SubtitleExtractorOutput::ExtractionComplete)
-                    .unwrap();
-            }
-            Err(e) => {
-                error!("FFmpeg extraction failed: {}", e);
-            }
-        }
-    }
-
-    fn extract_subtitles_ffmpeg(&self, url: &str, sender: &ComponentSender<Self>) -> Result<()> {
-        let mut input = ffmpeg::format::input(&url)?;
-
-        let mut subtitle_decoders = BTreeMap::new();
-
-        // create decoder for each subtitle stream
-        for (stream_index, stream) in input.streams().enumerate() {
-            if stream.parameters().medium() == ffmpeg::media::Type::Subtitle {
-                let language_code = stream.metadata().get("language").map(|s| s.to_string());
-                let title = stream.metadata().get("title").map(|s| s.to_string());
-
-                let track = SubtitleTrack {
-                    language: language_code.and_then(|code| isolang::Language::from_639_2b(&code)),
-                    title,
-                    cues: Vec::new(),
-                };
-
-                TRACKS.write().insert(stream_index, track);
-
-                sender
-                    .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(
-                        stream_index,
-                    ))
-                    .unwrap();
-
-                let context =
-                    ffmpeg::codec::context::Context::from_parameters(stream.parameters())?;
-                if let Ok(decoder) = context.decoder().subtitle() {
-                    subtitle_decoders.insert(stream_index, decoder);
-                    debug!("Created decoder for subtitle stream {}", stream_index);
-                } else {
-                    error!(
-                        "Failed to create decoder for subtitle stream {}",
-                        stream_index
-                    );
-                }
-            }
-        }
-
-        // process packets
-        for (stream, packet) in input.packets() {
-            let stream_index = stream.index();
-
-            if let Some(decoder) = subtitle_decoders.get_mut(&stream_index) {
-                let mut subtitle = ffmpeg::Subtitle::new();
-                if decoder.decode(&packet, &mut subtitle).is_ok() {
-                    if let Some(cue) = Self::subtitle_to_cue(&subtitle, &packet, stream.time_base())
-                    {
-                        if let Some(track) = TRACKS.write().get_mut(&stream_index) {
-                            track.cues.push(cue.clone());
-                        }
-
-                        sender
-                            .output(SubtitleExtractorOutput::NewCue(stream_index, cue))
-                            .unwrap();
-                    }
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    fn subtitle_to_cue(
-        subtitle: &ffmpeg::Subtitle,
-        packet: &ffmpeg::Packet,
-        time_base: Rational,
-    ) -> Option<SubtitleCue> {
-        let time_to_clock_time = |time: i64| {
-            let nseconds: i64 = (time * time_base.numerator() as i64 * 1_000_000_000)
-                / time_base.denominator() as i64;
-            gst::ClockTime::from_nseconds(nseconds as u64)
-        };
-
-        let text = subtitle
-            .rects()
-            .into_iter()
-            .map(|rect| match rect {
-                ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(),
-                ffmpeg::subtitle::Rect::Ass(ass) => {
-                    Self::extract_dialogue_text(ass.get()).unwrap_or(String::new())
-                }
-                _ => String::new(),
-            })
-            .collect::<Vec<String>>()
-            .join("\n— ");
-
-        let start = time_to_clock_time(packet.pts()?);
-        let end = time_to_clock_time(packet.pts()? + packet.duration());
-
-        Some(SubtitleCue { start, end, text })
-    }
-
-    fn extract_dialogue_text(dialogue_line: &str) -> Option<String> {
-        // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text
-        // we need the 9th field (Text), so split on comma but only take first 9 splits
-        // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433
-        let text = dialogue_line.splitn(9, ',').last()?;
-
-        // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc.
-        let mut result = String::new();
-        let mut in_tag = false;
-        let mut char_iter = text.chars().peekable();
-
-        while let Some(c) = char_iter.next() {
-            if c == '{' && char_iter.peek() == Some(&'\\') {
-                in_tag = true;
-            } else if c == '}' {
-                in_tag = false;
-            } else if !in_tag {
-                // process line breaks and hard spaces
-                if c == '\\' {
-                    match char_iter.peek() {
-                        Some(&'N') => {
-                            char_iter.next();
-                            result.push('\n');
-                        }
-                        Some(&'n') | Some(&'h') => {
-                            char_iter.next();
-                            result.push(' ');
-                        }
-                        _ => result.push(c),
-                    }
-                } else {
-                    result.push(c);
-                }
-            }
-        }
-
-        Some(result)
-    }
-}
diff --git a/src/subtitle_extractor_aishit.rs b/src/subtitle_extractor_aishit.rs
new file mode 100644
index 0000000..c615f6c
--- /dev/null
+++ b/src/subtitle_extractor_aishit.rs
@@ -0,0 +1,732 @@
+use std::collections::BTreeMap;
+
+use anyhow::Result;
+
+use ffmpeg::Rational;
+use log::{debug, error, info, warn};
+use relm4::{ComponentSender, SharedState, Worker};
+
+pub type StreamIndex = usize;
+
+#[derive(Debug, Clone)]
+pub struct SubtitleCue {
+    pub start: gst::ClockTime,
+    pub end: gst::ClockTime,
+    pub text: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct SubtitleTrack {
+    pub language: Option<isolang::Language>,
+    pub title: Option<String>,
+    pub cues: Vec<SubtitleCue>,
+    pub is_generated: bool, // true if generated from audio
+}
+
+pub static TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new();
+
+pub struct SubtitleExtractor {}
+
+#[derive(Debug)]
+pub enum SubtitleExtractorMsg {
+    ExtractFromUrl(String),
+}
+
+#[derive(Debug)]
+pub enum SubtitleExtractorOutput {
+    NewOrUpdatedTrackMetadata(StreamIndex),
+    NewCue(StreamIndex, SubtitleCue),
+    ExtractionComplete,
+}
+
+impl Worker for SubtitleExtractor {
+    type Init = ();
+    type Input = SubtitleExtractorMsg;
+    type Output = SubtitleExtractorOutput;
+
+    fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self {
+        Self {}
+    }
+
+    fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) {
+        match msg {
+            SubtitleExtractorMsg::ExtractFromUrl(url) => {
+                self.handle_extract_from_url(url, sender);
+            }
+        }
+    }
+}
+
+impl SubtitleExtractor {
+    fn handle_extract_from_url(&mut self, url: String, sender: ComponentSender<Self>) {
+        // Clear existing tracks
+        TRACKS.write().clear();
+
+        // Try to extract subtitles using ffmpeg
+        match self.extract_subtitles_ffmpeg(&url, &sender) {
+            Ok(_) => {
+                info!("Subtitle extraction completed successfully");
+                sender
+                    .output(SubtitleExtractorOutput::ExtractionComplete)
+                    .unwrap();
+            }
+            Err(e) => {
+                error!("FFmpeg extraction failed: {}", e);
+            }
+        }
+    }
+
+    fn extract_subtitles_ffmpeg(&self, url: &str, sender: &ComponentSender<Self>) -> Result<()> {
+        info!("Starting subtitle extraction from: {}", url);
+        let mut input = ffmpeg::format::input(&url)?;
+
+        // Log input format info
+        info!(
+            "Input format: {} ({} streams)",
+            input.format().name(),
+            input.streams().count()
+        );
+
+        // Check if whisper filter is available
+        if let Some(whisper_filter) = ffmpeg::filter::find("whisper") {
+            info!("Whisper filter found: {}", whisper_filter.name());
+        } else {
+            warn!("Whisper filter not found - audio transcription will be skipped");
+        }
+
+        let mut subtitle_decoders = BTreeMap::new();
+        let mut audio_decoder: Option<ffmpeg::decoder::Audio> = None;
+        let mut _whisper_filter_graph: Option<ffmpeg::filter::Graph> = None;
+        let mut whisper_source: Option<ffmpeg::filter::Context> = None;
+        let mut whisper_sink: Option<ffmpeg::filter::Context> = None;
+        let mut best_audio_stream_index: Option<usize> = None;
+
+        // Find best audio stream for whisper processing
+        if let Some(audio_stream) = input.streams().best(ffmpeg::media::Type::Audio) {
+            best_audio_stream_index = Some(audio_stream.index());
+
+            // Get audio parameters safely
+            let codec_id = audio_stream.parameters().id();
+            let channels = if let Ok(context) =
+                ffmpeg::codec::context::Context::from_parameters(audio_stream.parameters())
+            {
+                if let Ok(audio) = context.decoder().audio() {
+                    audio.channels()
+                } else {
+                    0
+                }
+            } else {
+                0
+            };
+            let sample_rate = if let Ok(context) =
+                ffmpeg::codec::context::Context::from_parameters(audio_stream.parameters())
+            {
+                if let Ok(audio) = context.decoder().audio() {
+                    audio.rate()
+                } else {
+                    0
+                }
+            } else {
+                0
+            };
+
+            info!(
+                "Found best audio stream: index {} (codec: {:?}, channels: {}, sample_rate: {})",
+                audio_stream.index(),
+                codec_id,
+                channels,
+                sample_rate
+            );
+        } else {
+            info!("No audio stream found for whisper processing");
+        }
+
+        // Set up whisper filter graph if we found an audio stream
+        if let Some(audio_index) = best_audio_stream_index {
+            info!("Setting up whisper filter for audio stream {}", audio_index);
+
+            let audio_stream = input.stream(audio_index).unwrap();
+            if let Ok(context) =
+                ffmpeg::codec::context::Context::from_parameters(audio_stream.parameters())
+            {
+                if let Ok(decoder) = context.decoder().audio() {
+                    // Get decoder properties before moving it
+                    let decoder_rate = decoder.rate();
+                    let decoder_format = decoder.format();
+                    let decoder_channel_layout = decoder.channel_layout().bits();
+
+                    audio_decoder = Some(decoder);
+
+                    // Set up whisper filter graph
+                    debug!("Creating whisper filter graph...");
+                    debug!(
+                        "Audio stream time_base: {}, decoder rate: {}, format: {:?}, channel_layout: 0x{:x}",
+                        audio_stream.time_base(),
+                        decoder_rate,
+                        decoder_format,
+                        decoder_channel_layout
+                    );
+                    match self.setup_whisper_filter(&audio_stream) {
+                        Ok((graph, source, sink)) => {
+                            info!("Whisper filter graph created successfully");
+                            _whisper_filter_graph = Some(graph);
+                            whisper_source = Some(source);
+                            whisper_sink = Some(sink);
+                            debug!("Whisper source and sink contexts stored");
+
+                            // Create a generated subtitle track
+                            let track = SubtitleTrack {
+                                language: Some(isolang::Language::from_639_1("en").unwrap_or_else(
+                                    || isolang::Language::from_639_3("eng").unwrap(),
+                                )),
+                                title: Some("Generated from Audio (Whisper)".to_string()),
+                                cues: Vec::new(),
+                                is_generated: true,
+                            };
+
+                            let whisper_stream_index = 1000; // Use high index for generated tracks
+                            TRACKS.write().insert(whisper_stream_index, track);
+
+                            sender
+                                .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(
+                                    whisper_stream_index,
+                                ))
+                                .unwrap();
+                        }
+                        Err(e) => {
+                            error!("Failed to setup whisper filter: {}", e);
+                            debug!("Whisper filter error details: {:?}", e);
+                            warn!(
+                                "Audio transcription will be skipped due to filter setup failure"
+                            );
+                        }
+                    }
+                }
+            }
+        }
+
+        // Create decoder for each subtitle stream
+        for (stream_index, stream) in input.streams().enumerate() {
+            if stream.parameters().medium() == ffmpeg::media::Type::Subtitle {
+                let language_code = stream.metadata().get("language").map(|s| s.to_string());
+                let title = stream.metadata().get("title").map(|s| s.to_string());
+
+                let track = SubtitleTrack {
+                    language: language_code.and_then(|code| isolang::Language::from_639_2b(&code)),
+                    title,
+                    cues: Vec::new(),
+                    is_generated: false,
+                };
+
+                TRACKS.write().insert(stream_index, track);
+
+                sender
+                    .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(
+                        stream_index,
+                    ))
+                    .unwrap();
+
+                let context =
+                    ffmpeg::codec::context::Context::from_parameters(stream.parameters())?;
+                if let Ok(decoder) = context.decoder().subtitle() {
+                    subtitle_decoders.insert(stream_index, decoder);
+                    debug!("Created decoder for subtitle stream {}", stream_index);
+                } else {
+                    error!(
+                        "Failed to create decoder for subtitle stream {}",
+                        stream_index
+                    );
+                }
+            } else {
+                debug!(
+                    "Failed to create context for subtitle stream {}",
+                    stream_index
+                );
+            }
+        }
+
+        // Process packets
+        for (stream, packet) in input.packets() {
+            let stream_index = stream.index();
+
+            // Process subtitle packets
+            if let Some(decoder) = subtitle_decoders.get_mut(&stream_index) {
+                let mut subtitle = ffmpeg::Subtitle::new();
+                if decoder.decode(&packet, &mut subtitle).is_ok() {
+                    if let Some(cue) = Self::subtitle_to_cue(&subtitle, &packet, stream.time_base())
+                    {
+                        if let Some(track) = TRACKS.write().get_mut(&stream_index) {
+                            track.cues.push(cue.clone());
+                        }
+
+                        sender
+                            .output(SubtitleExtractorOutput::NewCue(stream_index, cue))
+                            .unwrap();
+                    }
+                }
+            }
+
+            // Process audio packets for whisper
+            if Some(stream_index) == best_audio_stream_index {
+                debug!(
+                    "Processing audio packet for whisper (stream: {}, pts: {:?}, duration: {:?})",
+                    stream_index,
+                    packet.pts(),
+                    packet.duration()
+                );
+                debug!(
+                    "Audio decoder available: {}, Whisper source available: {}",
+                    audio_decoder.is_some(),
+                    whisper_source.is_some()
+                );
+                if let (Some(decoder), Some(source)) = (&mut audio_decoder, &mut whisper_source) {
+                    debug!("Both audio decoder and whisper source are available, processing...");
+                    // Send packet to audio decoder
+                    if let Err(e) = decoder.send_packet(&packet) {
+                        debug!("Failed to send packet to audio decoder: {}", e);
+                    }
+
+                    // Get decoded frames and send to whisper filter
+                    let mut frame = unsafe { ffmpeg::Frame::empty() };
+                    let mut frame_count = 0;
+                    while decoder.receive_frame(&mut frame).is_ok() {
+                        frame_count += 1;
+                        debug!(
+                            "Decoded audio frame {} (pts: {:?})",
+                            frame_count,
+                            frame.pts()
+                        );
+
+                        // Add frame to whisper filter
+                        if let Err(e) = source.source().add(&frame) {
+                            error!("Failed to add frame to whisper filter: {}", e);
+                        } else {
+                            debug!("Successfully added frame to whisper filter");
+                        }
+
+                        // Check for whisper output after adding each frame
+                        if let Some(sink) = &mut whisper_sink {
+                            self.check_whisper_output(sink, sender)?;
+                        }
+                    }
+                    if frame_count > 0 {
+                        debug!("Processed {} audio frames for whisper", frame_count);
+                    }
+                } else {
+                    debug!("Skipping audio packet - decoder or whisper source not available");
+                }
+            }
+        }
+
+        // Flush audio decoder and whisper filter
+        if let (Some(decoder), Some(source), Some(sink)) =
+            (&mut audio_decoder, &mut whisper_source, &mut whisper_sink)
+        {
+            info!("Flushing audio decoder and whisper filter...");
+            // Flush decoder
+            if let Err(e) = decoder.send_eof() {
+                debug!("Failed to send EOF to decoder: {}", e);
+            }
+            let mut frame = unsafe { ffmpeg::Frame::empty() };
+            let mut final_frame_count = 0;
+            while decoder.receive_frame(&mut frame).is_ok() {
+                final_frame_count += 1;
+                source.source().add(&frame).ok();
+            }
+            debug!("Flushed {} final frames from decoder", final_frame_count);
+
+            // Flush filter and get results
+            debug!("Flushing whisper filter...");
+            if let Err(e) = source.source().flush() {
+                error!("Failed to flush whisper filter: {}", e);
+            }
+
+            info!("Processing final whisper filter output...");
+            self.check_whisper_output(sink, sender)?;
+        }
+
+        Ok(())
+    }
+
+    fn setup_whisper_filter(
+        &self,
+        audio_stream: &ffmpeg::Stream,
+    ) -> Result<(
+        ffmpeg::filter::Graph,
+        ffmpeg::filter::Context,
+        ffmpeg::filter::Context,
+    )> {
+        debug!("Setting up whisper filter graph...");
+        let mut filter_graph = ffmpeg::filter::Graph::new();
+        debug!("Filter graph created successfully");
+
+        // Get audio parameters
+        debug!("Getting audio parameters...");
+        let time_base = audio_stream.time_base();
+        let audio_params = audio_stream.parameters();
+        debug!("Creating context from parameters...");
+        let context = ffmpeg::codec::context::Context::from_parameters(audio_params)?;
+        debug!("Getting audio decoder from context...");
+        let audio_decoder = context.decoder().audio()?;
+        debug!("Audio decoder created successfully");
+
+        // Create buffer source
+        let buffer_args = format!(
+            "time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}",
+            time_base,
+            audio_decoder.rate(),
+            audio_decoder.format().name(),
+            audio_decoder.channel_layout().bits()
+        );
+        debug!("Buffer args: {}", buffer_args);
+
+        debug!("Looking for abuffer filter...");
+        let abuffer_filter = ffmpeg::filter::find("abuffer")
+            .ok_or_else(|| anyhow::anyhow!("abuffer filter not found"))?;
+        debug!("abuffer filter found: {}", abuffer_filter.name());
+
+        debug!("Adding abuffer filter...");
+        match filter_graph.add(&abuffer_filter, "src", &buffer_args) {
+            Ok(_) => debug!("abuffer filter added successfully"),
+            Err(e) => {
+                error!("Failed to add abuffer filter: {}", e);
+                return Err(anyhow::anyhow!("Failed to add abuffer filter: {}", e));
+            }
+        }
+
+        // Create whisper filter with parameters
+        // Try absolute path and different parameter formats
+        let model_path = std::path::Path::new("./whisper-models/ggml-large-v3.bin");
+        let absolute_path = if model_path.exists() {
+            model_path
+                .canonicalize()
+                .map(|p| p.to_string_lossy().to_string())
+                .unwrap_or_else(|_| "./whisper-models/ggml-large-v3.bin".to_string())
+        } else {
+            warn!("Whisper model file not found at: {:?}", model_path);
+            "./whisper-models/ggml-large-v3.bin".to_string()
+        };
+
+        debug!("Model path exists: {}", model_path.exists());
+        debug!("Using absolute path: {}", absolute_path);
+
+        debug!("Looking for whisper filter...");
+        let whisper_filter = ffmpeg::filter::find("whisper").ok_or_else(|| {
+            error!("Whisper filter not found! Make sure FFmpeg was compiled with whisper support");
+            anyhow::anyhow!("Whisper filter not available")
+        })?;
+
+        debug!("Whisper filter found: {}", whisper_filter.name());
+        // We'll create the whisper filter through the parse method instead of adding it manually
+
+        // Create audio buffer sink for whisper output (whisper outputs audio + metadata)
+        debug!("Looking for abuffersink filter for audio output...");
+        let abuffersink_filter = ffmpeg::filter::find("abuffersink")
+            .ok_or_else(|| anyhow::anyhow!("abuffersink filter not found"))?;
+        debug!("abuffersink filter found: {}", abuffersink_filter.name());
+
+        debug!("Adding abuffersink filter...");
+        match filter_graph.add(&abuffersink_filter, "sink", "") {
+            Ok(_) => debug!("abuffersink filter added successfully"),
+            Err(e) => {
+                error!("Failed to add abuffersink filter: {}", e);
+                return Err(anyhow::anyhow!("Failed to add abuffersink filter: {}", e));
+            }
+        }
+
+        // Connect filters using the complete filter chain description
+        debug!("Connecting filter graph with complete chain: src -> whisper -> sink");
+
+        let filter_chain = format!(
+            "[src]whisper=model={}:queue=30:format=json[sink]",
+            "/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin",
+            //"/Users/malte/repos/lleap/whisper-models/ggml-silero-v5.1.2.bin"
+        );
+        debug!("Using filter chain: {}", filter_chain);
+
+        if let Err(e) = filter_graph
+            .output("src", 0)
+            .and_then(|o| o.input("sink", 0))
+            .and_then(|i| i.parse(&filter_chain))
+        {
+            error!("Failed to connect filter graph: {}", e);
+            return Err(anyhow::anyhow!("Failed to connect filter graph: {}", e));
+        }
+        debug!("Filter graph connected successfully");
+
+        // Validate filter graph
+        debug!("Validating filter graph...");
+        match filter_graph.validate() {
+            Ok(_) => {
+                info!("Filter graph validated successfully");
+                debug!("Filter graph dump:\n{}", filter_graph.dump());
+            }
+            Err(e) => {
+                error!("Filter graph validation failed: {}", e);
+                debug!(
+                    "Filter graph dump before validation failure:\n{}",
+                    filter_graph.dump()
+                );
+                return Err(anyhow::anyhow!("Filter graph validation failed: {}", e));
+            }
+        }
+
+        debug!("Getting final source and sink contexts...");
+        let source_ctx = filter_graph
+            .get("src")
+            .ok_or_else(|| anyhow::anyhow!("Source context not found"))?;
+        let sink_ctx = filter_graph
+            .get("sink")
+            .ok_or_else(|| anyhow::anyhow!("Sink context not found"))?;
+        debug!("Final contexts retrieved successfully");
+
+        Ok((filter_graph, source_ctx, sink_ctx))
+    }
+
+    fn check_whisper_output(
+        &self,
+        sink: &mut ffmpeg::filter::Context,
+        sender: &ComponentSender<Self>,
+    ) -> Result<()> {
+        debug!("Attempting to read audio frames from whisper filter output...");
+
+        // The whisper filter outputs audio frames with subtitle data in "lavfi.whisper.text" metadata
+        let mut frame = unsafe { ffmpeg::Frame::empty() };
+        let mut output_count = 0;
+
+        while sink.sink().frame(&mut frame).is_ok() {
+            output_count += 1;
+            debug!(
+                "Received audio frame {} from whisper filter (pts: {:?})",
+                output_count,
+                frame.pts()
+            );
+
+            // Look specifically for lavfi.whisper.text metadata
+            if let Some(whisper_text) = frame.metadata().get("lavfi.whisper.text") {
+                info!("Found whisper transcription: {}", whisper_text);
+
+                let start_time = if let Some(pts) = frame.pts() {
+                    // Convert PTS to nanoseconds based on whisper filter's time base (16kHz)
+                    gst::ClockTime::from_nseconds((pts as u64 * 1_000_000_000) / 16000)
+                } else {
+                    gst::ClockTime::ZERO
+                };
+
+                // Log all available metadata keys to help debug
+                let metadata_entries: Vec<(String, String)> = frame
+                    .metadata()
+                    .iter()
+                    .map(|(k, v)| (k.to_string(), v.to_string()))
+                    .collect();
+                if !metadata_entries.is_empty() {
+                    let metadata_keys: Vec<String> =
+                        metadata_entries.iter().map(|(k, _)| k.clone()).collect();
+                    debug!("Frame metadata keys: {:?}", metadata_keys);
+                }
+
+                // Parse the whisper text (might be JSON format)
+                self.parse_whisper_text(whisper_text, start_time, sender)?;
+            }
+        }
+
+        if output_count > 0 {
+            info!("Processed {} frames from whisper filter", output_count);
+        } else {
+            debug!("No frames available from whisper filter");
+        }
+
+        Ok(())
+    }
+
+    fn parse_whisper_text(
+        &self,
+        whisper_text: &str,
+        base_time: gst::ClockTime,
+        sender: &ComponentSender<Self>,
+    ) -> Result<()> {
+        debug!("Parsing whisper text: {}", whisper_text);
+
+        // The whisper text might be in different formats depending on the filter configuration
+        // For now, treat it as plain text and create a single cue
+        let cue = SubtitleCue {
+            start: base_time,
+            end: base_time + gst::ClockTime::from_seconds(3), // Default 3 second duration
+            text: whisper_text.to_string(),
+        };
+
+        let whisper_stream_index = 1000;
+        if let Some(track) = TRACKS.write().get_mut(&whisper_stream_index) {
+            track.cues.push(cue.clone());
+        }
+
+        sender
+            .output(SubtitleExtractorOutput::NewCue(whisper_stream_index, cue))
+            .unwrap();
+
+        Ok(())
+    }
+
+    fn parse_whisper_subtitle_data(
+        &self,
+        subtitle_data: &str,
+        sender: &ComponentSender<Self>,
+    ) -> Result<()> {
+        // Parse SRT-format output from whisper
+        info!(
+            "Parsing whisper subtitle data ({} characters)",
+            subtitle_data.len()
+        );
+        debug!("Subtitle data content:\n{}", subtitle_data);
+        let lines: Vec<&str> = subtitle_data.lines().collect();
+        let mut i = 0;
+
+        while i < lines.len() {
+            // Skip subtitle number
+            if lines[i].trim().parse::<i32>().is_ok() {
+                i += 1;
+            }
+
+            // Parse timestamp line
+            if i < lines.len() {
+                if let Some((start, end)) = self.parse_srt_timestamp(lines[i]) {
+                    i += 1;
+
+                    // Collect text lines
+                    let mut text_lines = Vec::new();
+                    while i < lines.len() && !lines[i].trim().is_empty() {
+                        text_lines.push(lines[i].to_string());
+                        i += 1;
+                    }
+
+                    if !text_lines.is_empty() {
+                        let cue = SubtitleCue {
+                            start,
+                            end,
+                            text: text_lines.join("\n"),
+                        };
+
+                        let whisper_stream_index = 1000;
+                        if let Some(track) = TRACKS.write().get_mut(&whisper_stream_index) {
+                            track.cues.push(cue.clone());
+                        }
+
+                        sender
+                            .output(SubtitleExtractorOutput::NewCue(whisper_stream_index, cue))
+                            .unwrap();
+                    }
+                }
+            }
+            i += 1;
+        }
+
+        Ok(())
+    }
+
+    fn parse_srt_timestamp(&self, line: &str) -> Option<(gst::ClockTime, gst::ClockTime)> {
+        // Parse SRT timestamp format: "00:00:01,234 --> 00:00:05,678"
+        let parts: Vec<&str> = line.split(" --> ").collect();
+        if parts.len() != 2 {
+            return None;
+        }
+
+        let start = self.parse_srt_time(parts[0])?;
+        let end = self.parse_srt_time(parts[1])?;
+
+        Some((start, end))
+    }
+
+    fn parse_srt_time(&self, time_str: &str) -> Option<gst::ClockTime> {
+        // Parse SRT time format: "00:00:01,234"
+        let parts: Vec<&str> = time_str.split(',').collect();
+        if parts.len() != 2 {
+            return None;
+        }
+
+        let time_part = parts[0];
+        let millis: u32 = parts[1].parse().ok()?;
+
+        let time_components: Vec<&str> = time_part.split(':').collect();
+        if time_components.len() != 3 {
+            return None;
+        }
+
+        let hours: u32 = time_components[0].parse().ok()?;
+        let minutes: u32 = time_components[1].parse().ok()?;
+        let seconds: u32 = time_components[2].parse().ok()?;
+
+        let total_millis = hours * 3600000 + minutes * 60000 + seconds * 1000 + millis;
+        let nanoseconds = total_millis as u64 * 1_000_000;
+
+        Some(gst::ClockTime::from_nseconds(nanoseconds))
+    }
+
+    fn subtitle_to_cue(
+        subtitle: &ffmpeg::Subtitle,
+        packet: &ffmpeg::Packet,
+        time_base: Rational,
+    ) -> Option<SubtitleCue> {
+        let time_to_clock_time = |time: i64| {
+            let nseconds: i64 = (time * time_base.numerator() as i64 * 1_000_000_000)
+                / time_base.denominator() as i64;
+            gst::ClockTime::from_nseconds(nseconds as u64)
+        };
+
+        let text = subtitle
+            .rects()
+            .into_iter()
+            .map(|rect| match rect {
+                ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(),
+                ffmpeg::subtitle::Rect::Ass(ass) => {
+                    Self::extract_dialogue_text(ass.get()).unwrap_or(String::new())
+                }
+                _ => String::new(),
+            })
+            .collect::<Vec<String>>()
+            .join("\n— ");
+
+        let start = time_to_clock_time(packet.pts()?);
+        let end = time_to_clock_time(packet.pts()? + packet.duration());
+
+        Some(SubtitleCue { start, end, text })
+    }
+
+    fn extract_dialogue_text(dialogue_line: &str) -> Option<String> {
+        // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text
+        // we need the 9th field (Text), so split on comma but only take first 9 splits
+        // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433
+        let text = dialogue_line.splitn(9, ',').last()?;
+
+        // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc.
+        let mut result = String::new();
+        let mut in_tag = false;
+        let mut char_iter = text.chars().peekable();
+
+        while let Some(c) = char_iter.next() {
+            if c == '{' && char_iter.peek() == Some(&'\\') {
+                in_tag = true;
+            } else if c == '}' {
+                in_tag = false;
+            } else if !in_tag {
+                // process line breaks and hard spaces
+                if c == '\\' {
+                    match char_iter.peek() {
+                        Some(&'N') => {
+                            char_iter.next();
+                            result.push('\n');
+                        }
+                        Some(&'n') | Some(&'h') => {
+                            char_iter.next();
+                            result.push(' ');
+                        }
+                        _ => result.push(c),
+                    }
+                } else {
+                    result.push(c);
+                }
+            }
+        }
+
+        Some(result)
+    }
+}
diff --git a/src/subtitle_selection_dialog.rs b/src/subtitle_selection_dialog.rs
index 0c7f1cd..6136d56 100644
--- a/src/subtitle_selection_dialog.rs
+++ b/src/subtitle_selection_dialog.rs
@@ -1,63 +1,18 @@
 use adw::prelude::*;
-use gtk::{gio, glib};
+use gtk::gio;
 use relm4::prelude::*;
 
-use crate::subtitle_extractor::{StreamIndex, TRACKS};
-use crate::util::Tracker;
-
-// Custom GObject wrapper for subtitle track information
-glib::wrapper! {
-    pub struct SubtitleTrackInfo(ObjectSubclass<imp::SubtitleTrackInfo>);
-}
-
-impl SubtitleTrackInfo {
-    pub fn new(
-        stream_index: StreamIndex,
-        language: Option<&'static str>,
-        title: Option<String>,
-    ) -> Self {
-        glib::Object::builder()
-            .property("stream-index", stream_index as i64)
-            .property("language", language.unwrap_or_default())
-            .property("title", title.unwrap_or_default())
-            .build()
-    }
-
-    pub fn get_stream_index(&self) -> StreamIndex {
-        let index: i64 = self.property("stream-index");
-        index as usize
-    }
-}
-
-mod imp {
-    use gtk::{glib, prelude::*, subclass::prelude::*};
-    use std::cell::RefCell;
-
-    #[derive(Default, glib::Properties)]
-    #[properties(wrapper_type = super::SubtitleTrackInfo)]
-    pub struct SubtitleTrackInfo {
-        #[property(get, set)]
-        stream_index: RefCell<i64>,
-        #[property(get, set)]
-        language: RefCell<String>,
-        #[property(get, set)]
-        title: RefCell<String>,
-    }
-
-    #[glib::object_subclass]
-    impl ObjectSubclass for SubtitleTrackInfo {
-        const NAME: &'static str = "SubtitleTrackInfo";
-        type Type = super::SubtitleTrackInfo;
-    }
-
-    #[glib::derived_properties]
-    impl ObjectImpl for SubtitleTrackInfo {}
-}
+use crate::track_selector::{
+    TrackInfo, TrackSelector, TrackSelectorInit, TrackSelectorMsg, TrackSelectorOutput,
+};
+use crate::tracks::{SUBTITLE_TRACKS, StreamIndex};
 
 pub struct SubtitleSelectionDialog {
     parent_window: adw::ApplicationWindow,
     dialog: adw::PreferencesDialog,
-    track_list_model: Tracker<gio::ListStore>,
+    track_list_model: gio::ListStore,
+    primary_selector: Controller<TrackSelector>,
+    secondary_selector: Controller<TrackSelector>,
     primary_track_ix: Option<StreamIndex>,
     secondary_track_ix: Option<StreamIndex>,
 }
@@ -91,79 +46,10 @@ impl SimpleComponent for SubtitleSelectionDialog {
         #[name(page)]
         adw::PreferencesPage {
             adw::PreferencesGroup {
-                #[name(primary_combo)]
-                adw::ComboRow {
-                    set_title: "Primary Subtitle Track",
-                    set_subtitle: "Main subtitle track for learning",
-                    set_factory: Some(&track_factory),
-                    #[track(model.track_list_model.is_dirty())]
-                    set_model: Some(model.track_list_model.get()),
-                    #[track(model.track_list_model.is_dirty())]
-                    set_selected: model.primary_track_ix.map_or(gtk::INVALID_LIST_POSITION, |ix| get_list_ix_from_stream_ix(model.track_list_model.get(), ix)),
-                    connect_selected_notify[sender] => move |combo| {
-                        let stream_index = get_stream_ix_from_combo(combo);
-                        sender.input(SubtitleSelectionDialogMsg::PrimaryTrackChanged(stream_index));
-                    },
-                },
-
-                #[name(secondary_combo)]
-                adw::ComboRow {
-                    set_title: "Secondary Subtitle Track",
-                    set_subtitle: "Optional second track for comparison",
-                    set_factory: Some(&track_factory),
-                    #[track(model.track_list_model.is_dirty())]
-                    set_model: Some(model.track_list_model.get()),
-                    #[track(model.track_list_model.is_dirty())]
-                    set_selected: model.secondary_track_ix.map_or(gtk::INVALID_LIST_POSITION, |ix| get_list_ix_from_stream_ix(model.track_list_model.get(), ix)),
-                    connect_selected_notify[sender] => move |combo| {
-                        let stream_index = get_stream_ix_from_combo(combo);
-                        sender.input(SubtitleSelectionDialogMsg::SecondaryTrackChanged(stream_index));
-                    },
-                },
+                model.primary_selector.widget(),
+                model.secondary_selector.widget(),
             }
         },
-
-        #[name(track_factory)]
-        gtk::SignalListItemFactory {
-            connect_setup => move |_, list_item| {
-                let list_item = list_item.downcast_ref::<gtk::ListItem>().unwrap();
-                let vbox = gtk::Box::new(gtk::Orientation::Vertical, 0);
-
-                let language_label = gtk::Label::new(None);
-                language_label.set_halign(gtk::Align::Start);
-                language_label.set_ellipsize(gtk::pango::EllipsizeMode::End);
-
-                let title_label = gtk::Label::new(None);
-                title_label.set_halign(gtk::Align::Start);
-                title_label.set_ellipsize(gtk::pango::EllipsizeMode::End);
-                title_label.add_css_class("subtitle");
-
-                vbox.append(&language_label);
-                vbox.append(&title_label);
-                list_item.set_child(Some(&vbox));
-            },
-            connect_bind => move |_, list_item| {
-                let list_item = list_item.downcast_ref::<gtk::ListItem>().unwrap();
-                let item = list_item.item().unwrap();
-                let track_info = item.downcast_ref::<SubtitleTrackInfo>().unwrap();
-                let vbox = list_item.child().unwrap().downcast::<gtk::Box>().unwrap();
-                let language_label = vbox.first_child().unwrap().downcast::<gtk::Label>().unwrap();
-                let title_label = vbox.last_child().unwrap().downcast::<gtk::Label>().unwrap();
-
-                let language = track_info.language();
-                let title = track_info.title();
-
-                let language_text = if !language.is_empty() {
-                    &language
-                } else {
-                    "Unknown Language"
-                };
-
-                language_label.set_text(&language_text);
-                title_label.set_text(&title);
-                title_label.set_visible(!title.is_empty());
-            },
-        },
     }
 
     fn init(
@@ -171,12 +57,33 @@ impl SimpleComponent for SubtitleSelectionDialog {
         root: Self::Root,
         sender: ComponentSender<Self>,
     ) -> ComponentParts<Self> {
-        let track_list_model = gio::ListStore::new::<SubtitleTrackInfo>();
+        let primary_selector = TrackSelector::builder()
+            .launch(TrackSelectorInit {
+                title: "Primary subtitle track",
+                subtitle: Some("Select your target language here"),
+            })
+            .forward(sender.input_sender(), |output| match output {
+                TrackSelectorOutput::Changed(ix) => {
+                    SubtitleSelectionDialogMsg::PrimaryTrackChanged(ix)
+                }
+            });
+        let secondary_selector = TrackSelector::builder()
+            .launch(TrackSelectorInit {
+                title: "Secondary subtitle track",
+                subtitle: Some("Pick a language you already know"),
+            })
+            .forward(sender.input_sender(), |output| match output {
+                TrackSelectorOutput::Changed(ix) => {
+                    SubtitleSelectionDialogMsg::SecondaryTrackChanged(ix)
+                }
+            });
 
         let model = Self {
             parent_window,
             dialog: root.clone(),
-            track_list_model: Tracker::new(track_list_model),
+            track_list_model: gio::ListStore::new::<TrackInfo>(),
+            primary_selector,
+            secondary_selector,
             primary_track_ix: None,
             secondary_track_ix: None,
         };
@@ -187,11 +94,23 @@ impl SimpleComponent for SubtitleSelectionDialog {
     }
 
     fn update(&mut self, msg: Self::Input, sender: ComponentSender<Self>) {
-        self.track_list_model.reset();
-
         match msg {
             SubtitleSelectionDialogMsg::Show => {
-                self.update_combo_models();
+                self.update_track_list_model();
+
+                self.primary_selector
+                    .sender()
+                    .send(TrackSelectorMsg::SetListModel(
+                        self.track_list_model.clone(),
+                    ))
+                    .unwrap();
+                self.secondary_selector
+                    .sender()
+                    .send(TrackSelectorMsg::SetListModel(
+                        self.track_list_model.clone(),
+                    ))
+                    .unwrap();
+
                 self.dialog.present(Some(&self.parent_window));
             }
             SubtitleSelectionDialogMsg::PrimaryTrackChanged(stream_index) => {
@@ -215,43 +134,20 @@ impl SimpleComponent for SubtitleSelectionDialog {
 }
 
 impl SubtitleSelectionDialog {
-    fn update_combo_models(&mut self) {
-        let tracks = TRACKS.read();
+    fn update_track_list_model(&mut self) {
+        let tracks = SUBTITLE_TRACKS.read();
 
         // Clear previous entries
-        self.track_list_model.get_mut().remove_all();
+        self.track_list_model.remove_all();
 
         // Add all available tracks
         for (&stream_index, track) in tracks.iter() {
-            let track_info = SubtitleTrackInfo::new(
+            let track_info = TrackInfo::new(
                 stream_index,
-                track.language.map(|lang| lang.to_name()),
-                track.title.clone(),
+                track.metadata.language.map(|lang| lang.to_name()),
+                track.metadata.title.clone(),
             );
-            self.track_list_model.get_mut().append(&track_info);
-        }
-    }
-}
-
-fn get_stream_ix_from_combo(combo: &adw::ComboRow) -> Option<StreamIndex> {
-    let ix = combo
-        .selected_item()?
-        .downcast_ref::<SubtitleTrackInfo>()
-        .unwrap()
-        .get_stream_index();
-
-    Some(ix)
-}
-
-fn get_list_ix_from_stream_ix(list_model: &gio::ListStore, stream_ix: StreamIndex) -> u32 {
-    for i in 0..list_model.n_items() {
-        if let Some(item) = list_model.item(i) {
-            if let Some(track_info) = item.downcast_ref::<SubtitleTrackInfo>() {
-                if track_info.get_stream_index() == stream_ix {
-                    return i;
-                }
-            }
+            self.track_list_model.append(&track_info);
         }
     }
-    panic!("Stream index {} not found in list model", stream_ix);
 }
diff --git a/src/subtitle_view.rs b/src/subtitle_view.rs
index dc48561..50494b8 100644
--- a/src/subtitle_view.rs
+++ b/src/subtitle_view.rs
@@ -1,6 +1,5 @@
 use crate::cue_view::{CueView, CueViewMsg, CueViewOutput};
 use crate::util::OptionTracker;
-use gtk::glib;
 use gtk::prelude::*;
 use relm4::prelude::*;
 
diff --git a/src/track_selector.rs b/src/track_selector.rs
new file mode 100644
index 0000000..5c56e4d
--- /dev/null
+++ b/src/track_selector.rs
@@ -0,0 +1,188 @@
+use adw::prelude::*;
+use gtk::{gio, glib};
+use relm4::prelude::*;
+
+use crate::tracks::StreamIndex;
+
+glib::wrapper! {
+    pub struct TrackInfo(ObjectSubclass<imp::TrackInfo>);
+}
+
+impl TrackInfo {
+    pub fn new(
+        stream_index: StreamIndex,
+        language: Option<&'static str>,
+        title: Option<String>,
+    ) -> Self {
+        glib::Object::builder()
+            .property("stream-index", stream_index as i64)
+            .property("language", language.unwrap_or_default())
+            .property("title", title.unwrap_or_default())
+            .build()
+    }
+
+    pub fn get_stream_index(&self) -> StreamIndex {
+        let index: i64 = self.property("stream-index");
+        index as usize
+    }
+}
+
+mod imp {
+    use gtk::{glib, prelude::*, subclass::prelude::*};
+    use std::cell::RefCell;
+
+    #[derive(Default, glib::Properties)]
+    #[properties(wrapper_type = super::TrackInfo)]
+    pub struct TrackInfo {
+        #[property(get, set)]
+        stream_index: RefCell<i64>,
+        #[property(get, set)]
+        language: RefCell<String>,
+        #[property(get, set)]
+        title: RefCell<String>,
+    }
+
+    #[glib::object_subclass]
+    impl ObjectSubclass for TrackInfo {
+        const NAME: &'static str = "TrackInfo";
+        type Type = super::TrackInfo;
+    }
+
+    #[glib::derived_properties]
+    impl ObjectImpl for TrackInfo {}
+}
+
+pub struct TrackSelector {
+    track_list_model: gio::ListStore,
+    track_ix: Option<StreamIndex>,
+}
+
+pub struct TrackSelectorInit {
+    pub title: &'static str,
+    pub subtitle: Option<&'static str>,
+}
+
+#[derive(Debug)]
+pub enum TrackSelectorMsg {
+    SetListModel(gio::ListStore),
+}
+
+#[derive(Debug)]
+pub enum TrackSelectorOutput {
+    Changed(Option<StreamIndex>),
+}
+
+#[relm4::component(pub)]
+impl SimpleComponent for TrackSelector {
+    type Init = TrackSelectorInit;
+    type Input = TrackSelectorMsg;
+    type Output = TrackSelectorOutput;
+
+    view! {
+        #[root]
+        #[name(primary_combo)]
+        adw::ComboRow {
+            set_title: init.title,
+            set_subtitle?: init.subtitle,
+            set_factory: Some(&track_factory),
+            #[watch]
+            set_model: Some(&model.track_list_model),
+            #[watch]
+            set_selected: model.track_ix.map_or(gtk::INVALID_LIST_POSITION, |ix| get_list_ix_from_stream_ix(&model.track_list_model, ix)),
+            connect_selected_notify[sender] => move |combo| {
+                let stream_index = get_stream_ix_from_combo(combo);
+                sender.output(TrackSelectorOutput::Changed(stream_index)).unwrap();
+            },
+        },
+
+        #[name(track_factory)]
+        gtk::SignalListItemFactory {
+            connect_setup => move |_, list_item| {
+                let list_item = list_item.downcast_ref::<gtk::ListItem>().unwrap();
+                let vbox = gtk::Box::new(gtk::Orientation::Vertical, 0);
+
+                let language_label = gtk::Label::new(None);
+                language_label.set_halign(gtk::Align::Start);
+                language_label.set_ellipsize(gtk::pango::EllipsizeMode::End);
+
+                let title_label = gtk::Label::new(None);
+                title_label.set_halign(gtk::Align::Start);
+                title_label.set_ellipsize(gtk::pango::EllipsizeMode::End);
+                title_label.add_css_class("subtitle");
+
+                vbox.append(&language_label);
+                vbox.append(&title_label);
+                list_item.set_child(Some(&vbox));
+            },
+            connect_bind => move |_, list_item| {
+                let list_item = list_item.downcast_ref::<gtk::ListItem>().unwrap();
+                let item = list_item.item().unwrap();
+                let track_info = item.downcast_ref::<TrackInfo>().unwrap();
+                let vbox = list_item.child().unwrap().downcast::<gtk::Box>().unwrap();
+                let language_label = vbox.first_child().unwrap().downcast::<gtk::Label>().unwrap();
+                let title_label = vbox.last_child().unwrap().downcast::<gtk::Label>().unwrap();
+
+                let language = track_info.language();
+                let title = track_info.title();
+
+                let language_text = if !language.is_empty() {
+                    &language
+                } else {
+                    "Unknown Language"
+                };
+
+                language_label.set_text(&language_text);
+                title_label.set_text(&title);
+                title_label.set_visible(!title.is_empty());
+            },
+        },
+    }
+
+    fn init(
+        init: Self::Init,
+        root: Self::Root,
+        sender: ComponentSender<Self>,
+    ) -> ComponentParts<Self> {
+        let track_list_model = gio::ListStore::new::<TrackInfo>();
+
+        let model = Self {
+            track_list_model: track_list_model,
+            track_ix: None,
+        };
+
+        let widgets = view_output!();
+
+        ComponentParts { model, widgets }
+    }
+
+    fn update(&mut self, msg: Self::Input, _sender: ComponentSender<Self>) {
+        match msg {
+            TrackSelectorMsg::SetListModel(list_model) => {
+                self.track_list_model = list_model;
+            }
+        }
+    }
+}
+
+fn get_stream_ix_from_combo(combo: &adw::ComboRow) -> Option<StreamIndex> {
+    let ix = combo
+        .selected_item()?
+        .downcast_ref::<TrackInfo>()
+        .unwrap()
+        .get_stream_index();
+
+    Some(ix)
+}
+
+fn get_list_ix_from_stream_ix(list_model: &gio::ListStore, stream_ix: StreamIndex) -> u32 {
+    for i in 0..list_model.n_items() {
+        if let Some(item) = list_model.item(i) {
+            if let Some(track_info) = item.downcast_ref::<TrackInfo>() {
+                if track_info.get_stream_index() == stream_ix {
+                    return i;
+                }
+            }
+        }
+    }
+    panic!("Stream index {} not found in list model", stream_ix);
+}
diff --git a/src/tracks.rs b/src/tracks.rs
new file mode 100644
index 0000000..4d69e12
--- /dev/null
+++ b/src/tracks.rs
@@ -0,0 +1,38 @@
+use std::collections::BTreeMap;
+
+use relm4::SharedState;
+
+pub type StreamIndex = usize;
+
+#[derive(Debug, Clone)]
+pub struct TrackMetadata {
+    pub language: Option<isolang::Language>,
+    pub title: Option<String>,
+}
+
+#[derive(Debug, Clone)]
+pub struct SubtitleTrack {
+    pub metadata: TrackMetadata,
+    pub cues: Vec<SubtitleCue>,
+}
+
+#[derive(Debug, Clone)]
+pub struct SubtitleCue {
+    pub start: gst::ClockTime,
+    pub end: gst::ClockTime,
+    pub text: String,
+}
+
+pub static SUBTITLE_TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new();
+
+impl TrackMetadata {
+    pub fn from_ffmpeg_stream(stream: &ffmpeg::Stream) -> Self {
+        let language_code = stream.metadata().get("language").map(|s| s.to_string());
+        let title = stream.metadata().get("title").map(|s| s.to_string());
+
+        Self {
+            language: language_code.and_then(|code| isolang::Language::from_639_2b(&code)),
+            title,
+        }
+    }
+}
diff --git a/src/transcript.rs b/src/transcript.rs
index eb3459d..a8ae554 100644
--- a/src/transcript.rs
+++ b/src/transcript.rs
@@ -1,7 +1,7 @@
 use gtk::{ListBox, pango::WrapMode, prelude::*};
 use relm4::prelude::*;
 
-use crate::subtitle_extractor::{StreamIndex, SubtitleCue, TRACKS};
+use crate::tracks::{SUBTITLE_TRACKS, StreamIndex, SubtitleCue};
 
 #[derive(Debug)]
 pub enum SubtitleCueOutput {
@@ -122,7 +122,7 @@ impl SimpleComponent for Transcript {
                 self.active_cues.guard().clear();
 
                 if let Some(stream_ix) = stream_index {
-                    let tracks = TRACKS.read();
+                    let tracks = SUBTITLE_TRACKS.read();
                     if let Some(track) = tracks.get(&stream_ix) {
                         for cue in &track.cues {
                             self.active_cues.guard().push_back(cue.clone());
diff --git a/src/util/tracker.rs b/src/util/tracker.rs
index 66c30a9..69a1c5f 100644
--- a/src/util/tracker.rs
+++ b/src/util/tracker.rs
@@ -24,6 +24,12 @@ impl<T> Tracker<T> {
         self.inner = value;
     }
 
+    /// Sets the inner value to `value` and marks the tracker as clean.
+    pub fn set_clean(&mut self, value: T) {
+        self.dirty = false;
+        self.inner = value;
+    }
+
     pub fn is_dirty(&self) -> bool {
         self.dirty
     }