diff options
| author | Malte Voos <git@mal.tc> | 2025-11-14 15:30:49 +0100 |
|---|---|---|
| committer | Malte Voos <git@mal.tc> | 2025-11-14 15:30:49 +0100 |
| commit | a8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e (patch) | |
| tree | 542b42d3316138043272faba42e0d1005f8403b6 /src/subtitle_extraction/whisper.rs | |
| parent | a42a73378b7c527a5e4600544b2d7a86d68c5aac (diff) | |
| download | lleap-a8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e.tar.gz lleap-a8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e.zip | |
implement file/url open dialog
Diffstat (limited to 'src/subtitle_extraction/whisper.rs')
| -rw-r--r-- | src/subtitle_extraction/whisper.rs | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/src/subtitle_extraction/whisper.rs b/src/subtitle_extraction/whisper.rs new file mode 100644 index 0000000..5622d6f --- /dev/null +++ b/src/subtitle_extraction/whisper.rs @@ -0,0 +1,75 @@ +use std::sync::mpsc; + +use anyhow::Context; +use ffmpeg::filter; + +use crate::{subtitle_extraction::*, tracks::StreamIndex}; + +pub fn generate_whisper_subtitles( + // stream index to use when storing generated subtitles, this index + // already has to be in TRACKS when this function is called! + stream_ix: StreamIndex, + context: ffmpeg::codec::Context, + time_base: ffmpeg::Rational, + packet_rx: mpsc::Receiver<ffmpeg::Packet>, + sender: ComponentSender<SubtitleExtractor>, +) -> anyhow::Result<()> { + let mut decoder = context + .decoder() + .audio() + .with_context(|| format!("error creating subtitle decoder for stream {}", stream_ix))?; + + let mut filter = filter::Graph::new(); + + let abuffer_args = format!( + "time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}", + decoder.time_base(), + decoder.rate(), + decoder.format().name(), + decoder.channel_layout().bits() + ); + let whisper_args = format!( + "model={}:queue={}:format=json", + "/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin", 30 + ); + let filter_spec = format!("[src] whisper={} [sink]", whisper_args); + + filter.add(&filter::find("abuffer").unwrap(), "src", &abuffer_args)?; + filter.add(&filter::find("abuffersink").unwrap(), "sink", "")?; + filter + .output("src", 0)? + .input("sink", 0)? + .parse(&filter_spec)?; + filter.validate()?; + + let mut source_ctx = filter.get("src").unwrap(); + let mut sink_ctx = filter.get("sink").unwrap(); + + while let Ok(packet) = packet_rx.recv() { + handle_packet(&mut decoder, source_ctx.source(), sink_ctx.sink(), packet) + .unwrap_or_else(|e| log::error!("error handling audio packet: {}", e)) + } + + Ok(()) +} + +fn handle_packet( + decoder: &mut ffmpeg::decoder::Audio, + mut source: filter::Source, + mut sink: filter::Sink, + packet: ffmpeg::Packet, +) -> anyhow::Result<()> { + let mut in_frame = unsafe { ffmpeg::Frame::empty() }; + decoder.send_packet(&packet)?; + decoder.receive_frame(&mut in_frame)?; + source.add(&in_frame)?; + + let mut out_frame = unsafe { ffmpeg::Frame::empty() }; + sink.frame(&mut out_frame)?; + + if let Some(text) = out_frame.metadata().get("lavfi.whisper.text") { + println!("{}", text); + } + + Ok(()) +} |