use std::sync::mpsc; use anyhow::Context; use ffmpeg::filter; use crate::{subtitle_extraction::*, tracks::StreamIndex}; pub fn generate_whisper_subtitles( // stream index to use when storing generated subtitles, this index // already has to be in TRACKS when this function is called! stream_ix: StreamIndex, context: ffmpeg::codec::Context, time_base: ffmpeg::Rational, packet_rx: mpsc::Receiver, sender: ComponentSender, ) -> anyhow::Result<()> { let mut decoder = context .decoder() .audio() .with_context(|| format!("error creating subtitle decoder for stream {}", stream_ix))?; let mut filter = filter::Graph::new(); let abuffer_args = format!( "time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}", decoder.time_base(), decoder.rate(), decoder.format().name(), decoder.channel_layout().bits() ); let whisper_args = format!( "model={}:queue={}:format=json", "/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin", 30 ); let filter_spec = format!("[src] whisper={} [sink]", whisper_args); filter.add(&filter::find("abuffer").unwrap(), "src", &abuffer_args)?; filter.add(&filter::find("abuffersink").unwrap(), "sink", "")?; filter .output("src", 0)? .input("sink", 0)? .parse(&filter_spec)?; filter.validate()?; let mut source_ctx = filter.get("src").unwrap(); let mut sink_ctx = filter.get("sink").unwrap(); while let Ok(packet) = packet_rx.recv() { handle_packet(&mut decoder, source_ctx.source(), sink_ctx.sink(), packet) .unwrap_or_else(|e| log::error!("error handling audio packet: {}", e)) } Ok(()) } fn handle_packet( decoder: &mut ffmpeg::decoder::Audio, mut source: filter::Source, mut sink: filter::Sink, packet: ffmpeg::Packet, ) -> anyhow::Result<()> { let mut in_frame = unsafe { ffmpeg::Frame::empty() }; decoder.send_packet(&packet)?; decoder.receive_frame(&mut in_frame)?; source.add(&in_frame)?; let mut out_frame = unsafe { ffmpeg::Frame::empty() }; sink.frame(&mut out_frame)?; if let Some(text) = out_frame.metadata().get("lavfi.whisper.text") { println!("{}", text); } Ok(()) }