1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
use std::sync::mpsc;
use anyhow::Context;
use ffmpeg::filter;
use crate::{subtitle_extraction::*, tracks::StreamIndex};
pub fn generate_whisper_subtitles(
// stream index to use when storing generated subtitles, this index
// already has to be in TRACKS when this function is called!
stream_ix: StreamIndex,
context: ffmpeg::codec::Context,
time_base: ffmpeg::Rational,
packet_rx: mpsc::Receiver<ffmpeg::Packet>,
sender: ComponentSender<SubtitleExtractor>,
) -> anyhow::Result<()> {
let mut decoder = context
.decoder()
.audio()
.with_context(|| format!("error creating subtitle decoder for stream {}", stream_ix))?;
let mut filter = filter::Graph::new();
let abuffer_args = format!(
"time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}",
decoder.time_base(),
decoder.rate(),
decoder.format().name(),
decoder.channel_layout().bits()
);
let whisper_args = format!(
"model={}:queue={}:format=json",
"/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin", 30
);
let filter_spec = format!("[src] whisper={} [sink]", whisper_args);
filter.add(&filter::find("abuffer").unwrap(), "src", &abuffer_args)?;
filter.add(&filter::find("abuffersink").unwrap(), "sink", "")?;
filter
.output("src", 0)?
.input("sink", 0)?
.parse(&filter_spec)?;
filter.validate()?;
let mut source_ctx = filter.get("src").unwrap();
let mut sink_ctx = filter.get("sink").unwrap();
while let Ok(packet) = packet_rx.recv() {
handle_packet(&mut decoder, source_ctx.source(), sink_ctx.sink(), packet)
.unwrap_or_else(|e| log::error!("error handling audio packet: {}", e))
}
Ok(())
}
fn handle_packet(
decoder: &mut ffmpeg::decoder::Audio,
mut source: filter::Source,
mut sink: filter::Sink,
packet: ffmpeg::Packet,
) -> anyhow::Result<()> {
let mut in_frame = unsafe { ffmpeg::Frame::empty() };
decoder.send_packet(&packet)?;
decoder.receive_frame(&mut in_frame)?;
source.add(&in_frame)?;
let mut out_frame = unsafe { ffmpeg::Frame::empty() };
sink.frame(&mut out_frame)?;
if let Some(text) = out_frame.metadata().get("lavfi.whisper.text") {
println!("{}", text);
}
Ok(())
}
|