From 0247f7f510323948d6080bcfde3c0e5c63be63f6 Mon Sep 17 00:00:00 2001 From: kura Date: Fri, 1 May 2026 19:52:32 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=AF=A6=E7=BB=86=E8=BF=9B?= =?UTF-8?q?=E5=BA=A6=E5=B1=95=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-tauri/src/audio.rs | 84 ++++++++++++-- src-tauri/src/models.rs | 22 ++++ src-tauri/src/task.rs | 152 ++++++++++++++++++++++++- src-tauri/src/vad.rs | 44 ++++++-- src/components/SubtitleEditor.vue | 6 +- src/components/TaskQueue.vue | 179 +++++++++++++++++++++++------- src/lib/types.ts | 9 ++ src/locales/en.ts | 7 ++ src/locales/zh-CN.ts | 7 ++ src/stores/tasks.ts | 3 + src/style.css | 100 ++++++++++++++++- 11 files changed, 539 insertions(+), 74 deletions(-) diff --git a/src-tauri/src/audio.rs b/src-tauri/src/audio.rs index d2a2888..c9256cc 100644 --- a/src-tauri/src/audio.rs +++ b/src-tauri/src/audio.rs @@ -1,5 +1,6 @@ use std::{ fs, + io::BufRead, path::{Path, PathBuf}, process::Command, }; @@ -9,7 +10,12 @@ use anyhow::{anyhow, Context, Result}; pub struct AudioPipeline; impl AudioPipeline { - pub fn extract_to_wav(ffmpeg_path: &Path, input_path: &str, workspace: &Path) -> Result { + pub fn extract_to_wav( + ffmpeg_path: &Path, + input_path: &str, + workspace: &Path, + on_progress: F, + ) -> Result { fs::create_dir_all(workspace) .with_context(|| format!("failed to create workspace: {}", workspace.display()))?; @@ -22,7 +28,7 @@ impl AudioPipeline { } } - let output = command + let mut child = command .arg("-y") .arg("-i") .arg(input_path) @@ -33,21 +39,43 @@ impl AudioPipeline { .arg("-f") .arg("wav") .arg(&output_path) - .output() + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn() .with_context(|| format!("failed to launch ffmpeg: {}", ffmpeg_path.display()))?; - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string(); - if stderr.is_empty() { - return Err(anyhow!("ffmpeg exited with status: {}", output.status)); + let stderr = child.stderr.take().unwrap(); + let reader = std::io::BufReader::new(stderr); + + let mut total_duration_secs: Option = None; + let mut last_progress = 0.0f32; + + for line in reader.lines() { + let line = line?; + + if total_duration_secs.is_none() { + if let Some(dur) = parse_ffmpeg_duration(&line) { + total_duration_secs = Some(dur); + } + } + + if let Some(current_time) = parse_ffmpeg_time(&line) { + if let Some(total) = total_duration_secs { + let ratio = (current_time / total).clamp(0.0, 1.0) as f32; + if (ratio - last_progress).abs() >= 0.01 { + last_progress = ratio; + on_progress(ratio); + } + } } - return Err(anyhow!( - "ffmpeg exited with status: {} | stderr: {}", - output.status, - stderr - )); } + let status = child.wait().with_context(|| "ffmpeg process failed to wait")?; + if !status.success() { + return Err(anyhow!("ffmpeg exited with status: {}", status)); + } + + on_progress(1.0); Ok(output_path) } @@ -76,3 +104,35 @@ impl AudioPipeline { Ok(samples) } } + +fn parse_ffmpeg_duration(line: &str) -> Option { + let pos = line.find("Duration: ")?; + let rest = &line[pos + 10..]; + let end = rest.find(|c: char| c == ',' || c == ' ')?; + let time_str = &rest[..end]; + let parts: Vec<&str> = time_str.split(':').collect(); + if parts.len() == 3 { + let h: f64 = parts[0].parse().ok()?; + let m: f64 = parts[1].parse().ok()?; + let s: f64 = parts[2].parse().ok()?; + Some(h * 3600.0 + m * 60.0 + s) + } else { + None + } +} + +fn parse_ffmpeg_time(line: &str) -> Option { + let pos = line.find("time=")?; + let rest = &line[pos + 5..]; + let end = rest.find(|c: char| !c.is_digit(10) && c != ':' && c != '.').unwrap_or(rest.len()); + let time_str = &rest[..end]; + let parts: Vec<&str> = time_str.split(':').collect(); + if parts.len() == 3 { + let h: f64 = parts[0].parse().ok()?; + let m: f64 = parts[1].parse().ok()?; + let s: f64 = parts[2].parse().ok()?; + Some(h * 3600.0 + m * 60.0 + s) + } else { + None + } +} diff --git a/src-tauri/src/models.rs b/src-tauri/src/models.rs index 435dd51..305e1d2 100644 --- a/src-tauri/src/models.rs +++ b/src-tauri/src/models.rs @@ -37,6 +37,26 @@ pub struct SubtitleSegment { pub translated_text: Option, } +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SubStageProgress { + pub extracting: f32, + pub vad: f32, + pub transcribing: f32, + pub translating: f32, +} + +impl Default for SubStageProgress { + fn default() -> Self { + Self { + extracting: 0.0, + vad: 0.0, + transcribing: 0.0, + translating: 0.0, + } + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct SubtitleTask { @@ -51,6 +71,7 @@ pub struct SubtitleTask { pub progress: f32, pub segments: Vec, pub error: Option, + pub sub_stage_progress: SubStageProgress, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -73,6 +94,7 @@ pub struct ProgressEvent { pub status: TaskStatus, pub progress: f32, pub message: String, + pub sub_stage_progress: SubStageProgress, } #[derive(Debug, Clone, Serialize)] diff --git a/src-tauri/src/task.rs b/src-tauri/src/task.rs index 48e54e9..64a0fa7 100644 --- a/src-tauri/src/task.rs +++ b/src-tauri/src/task.rs @@ -11,8 +11,8 @@ use crate::{ audio::AudioPipeline, models::{ DefaultModelPaths, ErrorEvent, LogEvent, OutputMode, ProgressEvent, - ResetSegmentsEvent, StartTaskPayload, SubtitleSegment, SubtitleTask, - TargetLanguage, TaskStatus, TranslationConfig, + ResetSegmentsEvent, StartTaskPayload, SubStageProgress, SubtitleSegment, + SubtitleTask, TargetLanguage, TaskStatus, TranslationConfig, }, state::AppState, subtitle::{render, SubtitleFormat}, @@ -68,6 +68,7 @@ pub async fn start_task( progress: 0.0, segments: Vec::new(), error: None, + sub_stage_progress: SubStageProgress::default(), }; state.upsert_task(task.clone())?; @@ -172,13 +173,60 @@ async fn run_pipeline( set_status(&window, &app_state, &mut task, TaskStatus::Extracting, 5.0, "正在抽取音频")?; emit_log(&window, &task.id, format!("task: input file={}", payload.file_path))?; emit_log(&window, &task.id, format!("audio: ffmpeg={}", ffmpeg_path.display()))?; - let wav_path = AudioPipeline::extract_to_wav(&ffmpeg_path, &payload.file_path, &workspace)?; + + let window_for_extract = window.clone(); + let task_id_for_extract = task.id.clone(); + let wav_path = AudioPipeline::extract_to_wav( + &ffmpeg_path, + &payload.file_path, + &workspace, + move |ratio: f32| { + let overall = 5.0 + ratio.clamp(0.0, 1.0) * 10.0; + let sub = SubStageProgress { + extracting: ratio.clamp(0.0, 1.0) * 100.0, + vad: 0.0, + transcribing: 0.0, + translating: 0.0, + }; + let _ = window_for_extract.emit( + "task:progress", + ProgressEvent { + task_id: task_id_for_extract.clone(), + status: TaskStatus::Extracting, + progress: overall, + message: "正在抽取音频".to_string(), + sub_stage_progress: sub, + }, + ); + }, + )?; emit_log(&window, &task.id, format!("audio: normalized wav={}", wav_path.display()))?; set_status(&window, &app_state, &mut task, TaskStatus::VadProcessing, 15.0, "正在分析语音片段")?; let samples = AudioPipeline::load_wav_f32(&wav_path)?; let vad = VadEngine::new(payload.vad_model_path.clone(), VadConfig::default())?; - let speech_ranges = vad.detect_segments(&samples).await; + + let window_for_vad = window.clone(); + let task_id_for_vad = task.id.clone(); + let speech_ranges = vad.detect_segments(&samples, move |ratio: f32| { + let overall = 15.0 + ratio.clamp(0.0, 1.0) * 15.0; + let sub = SubStageProgress { + extracting: 100.0, + vad: ratio.clamp(0.0, 1.0) * 100.0, + transcribing: 0.0, + translating: 0.0, + }; + let _ = window_for_vad.emit( + "task:progress", + ProgressEvent { + task_id: task_id_for_vad.clone(), + status: TaskStatus::VadProcessing, + progress: overall, + message: "正在分析语音片段".to_string(), + sub_stage_progress: sub, + }, + ); + }).await; emit_log( &window, &task.id, @@ -239,6 +287,12 @@ async fn run_pipeline( &speech_ranges, |ratio| { let progress = 30.0 + ratio.clamp(0.0, 1.0) * 40.0; + let sub = SubStageProgress { + extracting: 100.0, + vad: 100.0, + transcribing: ratio.clamp(0.0, 1.0) * 100.0, + translating: 0.0, + }; window.emit( "task:progress", ProgressEvent { @@ -246,6 +300,7 @@ async fn run_pipeline( status: TaskStatus::Transcribing, progress, message: "正在执行 Whisper".to_string(), + sub_stage_progress: sub, }, )?; Ok(()) @@ -320,6 +375,33 @@ async fn incremental_translate( let context_size = translator.context_size().min(5); let mut all_segments: Vec = Vec::new(); let mut buffer: Vec = Vec::new(); + let mut translated_count: usize = 0; + + let emit_translate_progress = |window: &Window, task_id: &str, done: usize, total: usize| -> Result<()> { + let ratio = if total > 0 { + (done as f32 / total as f32).clamp(0.0, 1.0) + } else { + 0.0 + }; + let overall = 70.0 + ratio * 25.0; + let sub = SubStageProgress { + extracting: 100.0, + vad: 100.0, + transcribing: 100.0, + translating: ratio * 100.0, + }; + window.emit( + "task:progress", + ProgressEvent { + task_id: task_id.to_string(), + status: TaskStatus::Translating, + progress: overall, + message: "正在生成译文".to_string(), + sub_stage_progress: sub, + }, + )?; + Ok(()) + }; while let Some(segment) = rx.recv().await { all_segments.push(segment.clone()); @@ -340,6 +422,8 @@ async fn incremental_translate( .translate_batch_with_retries(context, &batch, target_lang_name(target_lang)) .await?; + translated_count += rows.len(); + emit_log(window, task_id, format!("translation: batch done, translated={}", rows.len()))?; for row in rows { @@ -360,6 +444,8 @@ async fn incremental_translate( ); } } + + emit_translate_progress(window, task_id, translated_count, all_segments.len())?; } } @@ -379,6 +465,8 @@ async fn incremental_translate( .translate_batch_with_retries(context, &batch, target_lang_name(target_lang)) .await?; + translated_count += rows.len(); + for row in rows { if let Some(original) = batch.iter().find(|item| item.id == row.id) { let mut emitted = original.clone(); @@ -397,8 +485,28 @@ async fn incremental_translate( ); } } + + emit_translate_progress(window, task_id, translated_count, all_segments.len())?; } + // Translation complete + let sub = SubStageProgress { + extracting: 100.0, + vad: 100.0, + transcribing: 100.0, + translating: 100.0, + }; + window.emit( + "task:progress", + ProgressEvent { + task_id: task_id.to_string(), + status: TaskStatus::Translating, + progress: 95.0, + message: "译文生成完毕".to_string(), + sub_stage_progress: sub, + }, + )?; + Ok(()) } @@ -432,6 +540,34 @@ fn set_status( ) -> Result<()> { task.status = status.clone(); task.progress = progress; + + // Mark completed sub-stages as 100% based on current stage. + // Only mark stages that have fully finished before this one. + match &status { + TaskStatus::Extracting => { + // extracting just started, no previous stage to mark + } + TaskStatus::VadProcessing => { + task.sub_stage_progress.extracting = 100.0; + } + TaskStatus::Transcribing => { + task.sub_stage_progress.extracting = 100.0; + task.sub_stage_progress.vad = 100.0; + } + TaskStatus::Translating => { + task.sub_stage_progress.extracting = 100.0; + task.sub_stage_progress.vad = 100.0; + task.sub_stage_progress.transcribing = 100.0; + } + TaskStatus::Completed => { + task.sub_stage_progress.extracting = 100.0; + task.sub_stage_progress.vad = 100.0; + task.sub_stage_progress.transcribing = 100.0; + task.sub_stage_progress.translating = 100.0; + } + TaskStatus::Queued | TaskStatus::Failed => {} + } + state.upsert_task(task.clone())?; window.emit( "task:progress", @@ -440,6 +576,7 @@ fn set_status( status, progress, message: message.to_string(), + sub_stage_progress: task.sub_stage_progress.clone(), }, )?; Ok(()) @@ -551,6 +688,12 @@ pub async fn retry_translation( }, |ratio| { let progress = 5.0 + ratio.clamp(0.0, 1.0) * 90.0; + let sub = SubStageProgress { + extracting: 100.0, + vad: 100.0, + transcribing: 100.0, + translating: ratio.clamp(0.0, 1.0) * 100.0, + }; let _ = window_for_progress.emit( "task:progress", ProgressEvent { @@ -558,6 +701,7 @@ pub async fn retry_translation( status: TaskStatus::Translating, progress, message: "正在生成译文".to_string(), + sub_stage_progress: sub, }, ); }, diff --git a/src-tauri/src/vad.rs b/src-tauri/src/vad.rs index 77fea74..47334ef 100644 --- a/src-tauri/src/vad.rs +++ b/src-tauri/src/vad.rs @@ -47,12 +47,17 @@ impl VadEngine { Ok(Self { model_path, config }) } - pub async fn detect_segments(&self, samples: &[f32]) -> Vec<(f32, f32)> { + pub async fn detect_segments( + &self, + samples: &[f32], + on_progress: F, + ) -> Vec<(f32, f32)> { if self.model_path.is_some() { let samples_owned = samples.to_vec(); let model_path = self.model_path.clone().unwrap(); let config = self.config.clone(); let timeout_secs = self.config.timeout_seconds; + let on_progress_onnx = on_progress.clone(); match tokio::time::timeout( Duration::from_secs(timeout_secs), @@ -64,7 +69,7 @@ impl VadEngine { return None; } }; - Self::detect_with_onnx(&mut session, &samples_owned, &config).ok() + Self::detect_with_onnx(&mut session, &samples_owned, &config, on_progress_onnx).ok() }), ) .await @@ -86,7 +91,7 @@ impl VadEngine { } } - let ranges = self.detect_segments_with_energy(samples); + let ranges = self.detect_segments_with_energy(samples, &on_progress); eprintln!("vad: energy detection found {} speech ranges", ranges.len()); ranges } @@ -98,17 +103,20 @@ impl VadEngine { .with_context(|| format!("failed to load silero vad model: {}", model_path.display())) } - fn detect_with_onnx( + fn detect_with_onnx( session: &mut Session, samples: &[f32], config: &VadConfig, + on_progress: F, ) -> Result> { let chunk_size = 512usize; + let total_chunks = (samples.len() + chunk_size - 1) / chunk_size; let mut state = Array3::::zeros((2, 1, 128)); let sr = Array1::::from_vec(vec![config.sample_rate as i64]); let mut speech_probabilities = Vec::new(); + let mut last_progress = 0.0f32; - for chunk in samples.chunks(chunk_size) { + for (chunk_idx, chunk) in samples.chunks(chunk_size).enumerate() { let mut padded = vec![0.0_f32; chunk_size]; padded[..chunk.len()].copy_from_slice(chunk); let input = Array2::from_shape_vec((1, chunk_size), padded) @@ -139,23 +147,45 @@ impl VadEngine { .context("failed to rebuild vad state")?; } } + + let ratio = (chunk_idx + 1) as f32 / total_chunks as f32; + if (ratio - last_progress).abs() >= 0.02 { + last_progress = ratio; + on_progress(ratio); + } } + on_progress(1.0); Ok(Self::merge_probabilities(&speech_probabilities, chunk_size, config)) } - fn detect_segments_with_energy(&self, samples: &[f32]) -> Vec<(f32, f32)> { + fn detect_segments_with_energy( + &self, + samples: &[f32], + on_progress: &F, + ) -> Vec<(f32, f32)> { let frame_size = (self.config.sample_rate / 50).max(1); + let total_frames = (samples.len() + frame_size - 1) / frame_size; let mut energies = Vec::new(); - for chunk in samples.chunks(frame_size) { + let mut last_progress = 0.0f32; + + for (frame_idx, chunk) in samples.chunks(frame_size).enumerate() { let energy = chunk.iter().map(|sample| sample.abs()).sum::() / chunk.len() as f32; energies.push(energy); + + let ratio = (frame_idx + 1) as f32 / total_frames as f32; + if (ratio - last_progress).abs() >= 0.02 { + last_progress = ratio; + on_progress(ratio); + } } if energies.is_empty() { return Vec::new(); } + on_progress(1.0); + let dynamic_threshold = self.dynamic_energy_threshold(&energies); eprintln!( "vad: using energy fallback, frames={}, threshold={:.5}", diff --git a/src/components/SubtitleEditor.vue b/src/components/SubtitleEditor.vue index dcca131..c181309 100644 --- a/src/components/SubtitleEditor.vue +++ b/src/components/SubtitleEditor.vue @@ -1,5 +1,5 @@ diff --git a/src/lib/types.ts b/src/lib/types.ts index a7f22ab..ab792dd 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -19,6 +19,13 @@ export interface SubtitleSegment { translatedText?: string | null } +export interface SubStageProgress { + extracting: number + vad: number + transcribing: number + translating: number +} + export interface SubtitleTask { id: string filePath: string @@ -31,6 +38,7 @@ export interface SubtitleTask { progress: number segments: SubtitleSegment[] error?: string | null + subStageProgress: SubStageProgress } export interface TranslationConfig { @@ -62,6 +70,7 @@ export interface ProgressEvent { status: TaskStatus progress: number message: string + subStageProgress: SubStageProgress } export interface SegmentEvent { diff --git a/src/locales/en.ts b/src/locales/en.ts index de930b1..dc2e7bb 100644 --- a/src/locales/en.ts +++ b/src/locales/en.ts @@ -54,6 +54,7 @@ export default { retry: 'Retry', retryTranslate: 'Retry Translation', delete: 'Delete', + queuedHint: 'Waiting in queue...', status: { queued: 'Queued', extracting: 'Extracting', @@ -63,6 +64,12 @@ export default { completed: 'Completed', failed: 'Failed', }, + subStage: { + extracting: 'Extract Audio', + vad: 'Voice Detection', + transcribing: 'Transcribe', + translating: 'Translate', + }, }, editor: { title: 'Workspace', diff --git a/src/locales/zh-CN.ts b/src/locales/zh-CN.ts index 41270ee..937d4e1 100644 --- a/src/locales/zh-CN.ts +++ b/src/locales/zh-CN.ts @@ -54,6 +54,7 @@ export default { retry: '重试', retryTranslate: '重试翻译', delete: '移除', + queuedHint: '正在排队等待...', status: { queued: '排队中', extracting: '抽取', @@ -63,6 +64,12 @@ export default { completed: '完成', failed: '失败', }, + subStage: { + extracting: '音频抽取', + vad: '语音检测', + transcribing: '语音识别', + translating: '翻译', + }, }, editor: { title: '字幕工作区', diff --git a/src/stores/tasks.ts b/src/stores/tasks.ts index 176f99b..5b6ab8f 100644 --- a/src/stores/tasks.ts +++ b/src/stores/tasks.ts @@ -51,6 +51,9 @@ export const useTaskStore = defineStore('tasks', { if (!task) return task.status = payload.status task.progress = payload.progress + if (payload.subStageProgress) { + Object.assign(task.subStageProgress, payload.subStageProgress) + } }) const segmentUnlisten = await listen('task:segment', ({ payload }) => { diff --git a/src/style.css b/src/style.css index cbc6d23..3a94faa 100644 --- a/src/style.css +++ b/src/style.css @@ -378,14 +378,10 @@ textarea { color: var(--c-text-tertiary); } -.list-stack, -.segment-list { +.list-stack { display: flex; flex-direction: column; gap: 8px; -} - -.list-stack { min-height: 0; overflow: auto; } @@ -393,7 +389,7 @@ textarea { .segment-list { min-height: 0; flex: 1 1 auto; - overflow: auto; + overflow-y: auto; padding-right: 2px; will-change: scroll-position; } @@ -402,6 +398,10 @@ textarea { min-width: 100%; } +.segment-item + .segment-item { + margin-top: 8px; +} + .task-item, .segment-item { width: 100%; @@ -488,16 +488,104 @@ textarea { color: #fff; } +.task-item-wrapper { + width: 100%; +} + .task-item { position: relative; border-left: 3px solid transparent; } +.task-item.expanded { + border-color: var(--c-border-hover); + background: rgba(26, 26, 46, 0.02); +} + .task-item:hover .delete-button { opacity: 1; pointer-events: auto; } +.task-name-row { + display: flex; + align-items: center; + gap: 6px; + min-width: 0; + flex: 1; +} + +.expand-toggle { + flex-shrink: 0; + width: 18px; + height: 18px; + display: flex; + align-items: center; + justify-content: center; + border: none; + background: transparent; + color: var(--c-text-tertiary); + cursor: pointer; + padding: 0; + border-radius: var(--radius-sm); + transition: transform var(--transition), color var(--transition), background var(--transition); +} + +.expand-toggle:hover { + color: var(--c-text); + background: var(--c-focus); +} + +.expand-toggle.expanded { + transform: rotate(90deg); +} + +.sub-stages { + margin-top: 10px; + padding-top: 8px; + border-top: 1px solid var(--c-border); + display: flex; + flex-direction: column; + gap: 6px; +} + +.sub-stage { + display: flex; + flex-direction: column; + gap: 2px; +} + +.sub-stage-label { + display: flex; + align-items: center; + justify-content: space-between; + font-size: 11px; + color: var(--c-text-secondary); +} + +.sub-stage-pct { + font-family: "SF Mono", "Fira Code", "Cascadia Code", monospace; + font-size: 10px; + color: var(--c-text-tertiary); +} + +.progress.sub-progress { + height: 2px; + margin-top: 0; +} + +.progress-bar.sub-progress-bar.done { + background: var(--c-success); + opacity: 0.7; +} + +.queued-hint { + font-size: 11px; + color: var(--c-text-tertiary); + font-style: italic; + padding: 2px 0; +} + .delete-button { position: absolute; top: 8px;