summary refs log tree commit diff
path: root/src/subtitle_extractor.rs
blob: b628d736d2326e0abf08a40fcc36c9dea5fd80fc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
use std::collections::BTreeMap;

use anyhow::Result;

use ffmpeg::Rational;
use log::{debug, error, info};
use relm4::{ComponentSender, SharedState, Worker};

pub type StreamIndex = usize;

#[derive(Debug, Clone)]
pub struct SubtitleCue {
    pub start: gst::ClockTime,
    pub end: gst::ClockTime,
    pub text: String,
}

#[derive(Debug, Clone)]
pub struct SubtitleTrack {
    pub language: Option<isolang::Language>,
    pub title: Option<String>,
    pub cues: Vec<SubtitleCue>,
}

pub static TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new();

pub struct SubtitleExtractor {}

#[derive(Debug)]
pub enum SubtitleExtractorMsg {
    ExtractFromUrl(String),
}

#[derive(Debug)]
pub enum SubtitleExtractorOutput {
    NewOrUpdatedTrackMetadata(StreamIndex),
    NewCue(StreamIndex, SubtitleCue),
    ExtractionComplete,
}

impl Worker for SubtitleExtractor {
    type Init = ();
    type Input = SubtitleExtractorMsg;
    type Output = SubtitleExtractorOutput;

    fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self {
        Self {}
    }

    fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) {
        match msg {
            SubtitleExtractorMsg::ExtractFromUrl(url) => {
                self.handle_extract_from_url(url, sender);
            }
        }
    }
}

impl SubtitleExtractor {
    fn handle_extract_from_url(&mut self, url: String, sender: ComponentSender<Self>) {
        // Clear existing tracks
        TRACKS.write().clear();

        // Try to extract subtitles using ffmpeg
        match self.extract_subtitles_ffmpeg(&url, &sender) {
            Ok(_) => {
                info!("Subtitle extraction completed successfully");
                sender
                    .output(SubtitleExtractorOutput::ExtractionComplete)
                    .unwrap();
            }
            Err(e) => {
                error!("FFmpeg extraction failed: {}", e);
            }
        }
    }

    fn extract_subtitles_ffmpeg(&self, url: &str, sender: &ComponentSender<Self>) -> Result<()> {
        let mut input = ffmpeg::format::input(&url)?;

        let mut subtitle_decoders = BTreeMap::new();

        // create decoder for each subtitle stream
        for (stream_index, stream) in input.streams().enumerate() {
            if stream.parameters().medium() == ffmpeg::media::Type::Subtitle {
                let language_code = stream.metadata().get("language").map(|s| s.to_string());
                let title = stream.metadata().get("title").map(|s| s.to_string());

                let track = SubtitleTrack {
                    language: language_code.and_then(|code| isolang::Language::from_639_2b(&code)),
                    title,
                    cues: Vec::new(),
                };

                TRACKS.write().insert(stream_index, track);

                sender
                    .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(
                        stream_index,
                    ))
                    .unwrap();

                let context =
                    ffmpeg::codec::context::Context::from_parameters(stream.parameters())?;
                if let Ok(decoder) = context.decoder().subtitle() {
                    subtitle_decoders.insert(stream_index, decoder);
                    debug!("Created decoder for subtitle stream {}", stream_index);
                } else {
                    error!(
                        "Failed to create decoder for subtitle stream {}",
                        stream_index
                    );
                }
            }
        }

        // process packets
        for (stream, packet) in input.packets() {
            let stream_index = stream.index();

            if let Some(decoder) = subtitle_decoders.get_mut(&stream_index) {
                let mut subtitle = ffmpeg::Subtitle::new();
                if decoder.decode(&packet, &mut subtitle).is_ok() {
                    if let Some(cue) = Self::subtitle_to_cue(&subtitle, &packet, stream.time_base())
                    {
                        if let Some(track) = TRACKS.write().get_mut(&stream_index) {
                            track.cues.push(cue.clone());
                        }

                        sender
                            .output(SubtitleExtractorOutput::NewCue(stream_index, cue))
                            .unwrap();
                    }
                }
            }
        }

        Ok(())
    }

    fn subtitle_to_cue(
        subtitle: &ffmpeg::Subtitle,
        packet: &ffmpeg::Packet,
        time_base: Rational,
    ) -> Option<SubtitleCue> {
        let time_to_clock_time = |time: i64| {
            let nseconds: i64 = (time * time_base.numerator() as i64 * 1_000_000_000)
                / time_base.denominator() as i64;
            gst::ClockTime::from_nseconds(nseconds as u64)
        };

        let text = subtitle
            .rects()
            .into_iter()
            .map(|rect| match rect {
                ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(),
                ffmpeg::subtitle::Rect::Ass(ass) => {
                    Self::extract_dialogue_text(ass.get()).unwrap_or(String::new())
                }
                _ => String::new(),
            })
            .collect::<Vec<String>>()
            .join("\n— ");

        let start = time_to_clock_time(packet.pts()?);
        let end = time_to_clock_time(packet.pts()? + packet.duration());

        Some(SubtitleCue { start, end, text })
    }

    fn extract_dialogue_text(dialogue_line: &str) -> Option<String> {
        // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text
        // we need the 9th field (Text), so split on comma but only take first 9 splits
        // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433
        let text = dialogue_line.splitn(9, ',').last()?;

        // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc.
        let mut result = String::new();
        let mut in_tag = false;
        let mut char_iter = text.chars().peekable();

        while let Some(c) = char_iter.next() {
            if c == '{' && char_iter.peek() == Some(&'\\') {
                in_tag = true;
            } else if c == '}' {
                in_tag = false;
            } else if !in_tag {
                // process line breaks and hard spaces
                if c == '\\' {
                    match char_iter.peek() {
                        Some(&'N') => {
                            char_iter.next();
                            result.push('\n');
                        }
                        Some(&'n') | Some(&'h') => {
                            char_iter.next();
                            result.push(' ');
                        }
                        _ => result.push(c),
                    }
                } else {
                    result.push(c);
                }
            }
        }

        Some(result)
    }
}