summary refs log tree commit diff
path: root/src/subtitle_extraction/whisper.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/subtitle_extraction/whisper.rs')
-rw-r--r--src/subtitle_extraction/whisper.rs75
1 files changed, 75 insertions, 0 deletions
diff --git a/src/subtitle_extraction/whisper.rs b/src/subtitle_extraction/whisper.rs
new file mode 100644
index 0000000..5622d6f
--- /dev/null
+++ b/src/subtitle_extraction/whisper.rs
@@ -0,0 +1,75 @@
+use std::sync::mpsc;
+
+use anyhow::Context;
+use ffmpeg::filter;
+
+use crate::{subtitle_extraction::*, tracks::StreamIndex};
+
+pub fn generate_whisper_subtitles(
+    // stream index to use when storing generated subtitles, this index
+    // already has to be in TRACKS when this function is called!
+    stream_ix: StreamIndex,
+    context: ffmpeg::codec::Context,
+    time_base: ffmpeg::Rational,
+    packet_rx: mpsc::Receiver<ffmpeg::Packet>,
+    sender: ComponentSender<SubtitleExtractor>,
+) -> anyhow::Result<()> {
+    let mut decoder = context
+        .decoder()
+        .audio()
+        .with_context(|| format!("error creating subtitle decoder for stream {}", stream_ix))?;
+
+    let mut filter = filter::Graph::new();
+
+    let abuffer_args = format!(
+        "time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}",
+        decoder.time_base(),
+        decoder.rate(),
+        decoder.format().name(),
+        decoder.channel_layout().bits()
+    );
+    let whisper_args = format!(
+        "model={}:queue={}:format=json",
+        "/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin", 30
+    );
+    let filter_spec = format!("[src] whisper={} [sink]", whisper_args);
+
+    filter.add(&filter::find("abuffer").unwrap(), "src", &abuffer_args)?;
+    filter.add(&filter::find("abuffersink").unwrap(), "sink", "")?;
+    filter
+        .output("src", 0)?
+        .input("sink", 0)?
+        .parse(&filter_spec)?;
+    filter.validate()?;
+
+    let mut source_ctx = filter.get("src").unwrap();
+    let mut sink_ctx = filter.get("sink").unwrap();
+
+    while let Ok(packet) = packet_rx.recv() {
+        handle_packet(&mut decoder, source_ctx.source(), sink_ctx.sink(), packet)
+            .unwrap_or_else(|e| log::error!("error handling audio packet: {}", e))
+    }
+
+    Ok(())
+}
+
+fn handle_packet(
+    decoder: &mut ffmpeg::decoder::Audio,
+    mut source: filter::Source,
+    mut sink: filter::Sink,
+    packet: ffmpeg::Packet,
+) -> anyhow::Result<()> {
+    let mut in_frame = unsafe { ffmpeg::Frame::empty() };
+    decoder.send_packet(&packet)?;
+    decoder.receive_frame(&mut in_frame)?;
+    source.add(&in_frame)?;
+
+    let mut out_frame = unsafe { ffmpeg::Frame::empty() };
+    sink.frame(&mut out_frame)?;
+
+    if let Some(text) = out_frame.metadata().get("lavfi.whisper.text") {
+        println!("{}", text);
+    }
+
+    Ok(())
+}