避免重复的 segment
This commit is contained in:
parent
a7046eba8c
commit
479049501b
@ -104,9 +104,9 @@ impl WhisperEngine {
|
|||||||
let vad_coverage = speech_coverage_ratio(&normalized_ranges, total_seconds);
|
let vad_coverage = speech_coverage_ratio(&normalized_ranges, total_seconds);
|
||||||
let should_retry_full_audio = !audio.is_empty()
|
let should_retry_full_audio = !audio.is_empty()
|
||||||
&& (segments.is_empty()
|
&& (segments.is_empty()
|
||||||
|| vad_coverage < 0.72
|
|| vad_coverage < 0.60
|
||||||
|| vad_end + 2.5 < total_seconds
|
|| vad_end + 5.0 < total_seconds
|
||||||
|| (total_seconds > 45.0 && vad_text_len < (total_seconds / 2.4) as usize));
|
|| (total_seconds > 60.0 && vad_text_len < (total_seconds / 3.0) as usize));
|
||||||
|
|
||||||
if should_retry_full_audio {
|
if should_retry_full_audio {
|
||||||
on_log(format!(
|
on_log(format!(
|
||||||
@ -141,6 +141,12 @@ impl WhisperEngine {
|
|||||||
))?;
|
))?;
|
||||||
segments = full_audio_segments;
|
segments = full_audio_segments;
|
||||||
} else {
|
} else {
|
||||||
|
on_log(format!(
|
||||||
|
"whisper: keeping VAD-based transcript (vad_segments={}, full_segments={})",
|
||||||
|
segments.len(),
|
||||||
|
full_audio_segments.len()
|
||||||
|
))?;
|
||||||
|
on_reset_segments()?;
|
||||||
segments.iter().cloned().try_for_each(&mut on_segment)?;
|
segments.iter().cloned().try_for_each(&mut on_segment)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -327,10 +333,10 @@ fn should_prefer_full_audio(
|
|||||||
let vad_end = last_end(vad_segments);
|
let vad_end = last_end(vad_segments);
|
||||||
let full_end = last_end(full_audio_segments);
|
let full_end = last_end(full_audio_segments);
|
||||||
|
|
||||||
full_text_len > vad_text_len + vad_text_len / 5
|
full_text_len > vad_text_len + vad_text_len * 3 / 5
|
||||||
|| full_audio_segments.len() > vad_segments.len() + 2
|
|| full_audio_segments.len() > vad_segments.len() + 5
|
||||||
|| full_end > vad_end + 2.0
|
|| full_end > vad_end + 5.0
|
||||||
|| (total_seconds > 30.0 && full_end + 1.5 >= total_seconds && vad_end + 3.0 < total_seconds)
|
|| (total_seconds > 60.0 && full_end + 1.5 >= total_seconds && vad_end + 5.0 < total_seconds)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_source_language<'a>(
|
fn resolve_source_language<'a>(
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user