避免重复的 segment
This commit is contained in:
parent
a7046eba8c
commit
479049501b
@ -104,9 +104,9 @@ impl WhisperEngine {
|
||||
let vad_coverage = speech_coverage_ratio(&normalized_ranges, total_seconds);
|
||||
let should_retry_full_audio = !audio.is_empty()
|
||||
&& (segments.is_empty()
|
||||
|| vad_coverage < 0.72
|
||||
|| vad_end + 2.5 < total_seconds
|
||||
|| (total_seconds > 45.0 && vad_text_len < (total_seconds / 2.4) as usize));
|
||||
|| vad_coverage < 0.60
|
||||
|| vad_end + 5.0 < total_seconds
|
||||
|| (total_seconds > 60.0 && vad_text_len < (total_seconds / 3.0) as usize));
|
||||
|
||||
if should_retry_full_audio {
|
||||
on_log(format!(
|
||||
@ -141,6 +141,12 @@ impl WhisperEngine {
|
||||
))?;
|
||||
segments = full_audio_segments;
|
||||
} else {
|
||||
on_log(format!(
|
||||
"whisper: keeping VAD-based transcript (vad_segments={}, full_segments={})",
|
||||
segments.len(),
|
||||
full_audio_segments.len()
|
||||
))?;
|
||||
on_reset_segments()?;
|
||||
segments.iter().cloned().try_for_each(&mut on_segment)?;
|
||||
}
|
||||
}
|
||||
@ -327,10 +333,10 @@ fn should_prefer_full_audio(
|
||||
let vad_end = last_end(vad_segments);
|
||||
let full_end = last_end(full_audio_segments);
|
||||
|
||||
full_text_len > vad_text_len + vad_text_len / 5
|
||||
|| full_audio_segments.len() > vad_segments.len() + 2
|
||||
|| full_end > vad_end + 2.0
|
||||
|| (total_seconds > 30.0 && full_end + 1.5 >= total_seconds && vad_end + 3.0 < total_seconds)
|
||||
full_text_len > vad_text_len + vad_text_len * 3 / 5
|
||||
|| full_audio_segments.len() > vad_segments.len() + 5
|
||||
|| full_end > vad_end + 5.0
|
||||
|| (total_seconds > 60.0 && full_end + 1.5 >= total_seconds && vad_end + 5.0 < total_seconds)
|
||||
}
|
||||
|
||||
fn resolve_source_language<'a>(
|
||||
|
||||
Loading…
Reference in New Issue
Block a user