初始化
This commit is contained in:
parent
836e754c34
commit
e0057c7060
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
/dist
|
||||
/node_modules
|
||||
/src-tauri/target/
|
||||
**/*.rs.bk
|
||||
/src-tauri/model/
|
||||
21
README.md
21
README.md
@ -23,21 +23,22 @@
|
||||
## 当前实现说明
|
||||
|
||||
- 当前仓库已补齐完整工程骨架与核心数据流。
|
||||
- 由于本机环境缺少 Rust 工具链,本次未能执行 `cargo check` 或 `tauri dev`。
|
||||
- `whisper.rs` 目前提供了稳定的接口与任务路由,但真实 `whisper-rs` 推理仍需在安装 Rust 后继续接入具体模型调用。
|
||||
- `vad.rs` 已提供可用的 VAD 模块 API,默认实现为能量检测回退;如果你后续放入 Silero ONNX,可在此模块内替换为 `ort` 推理。
|
||||
- 前端 `npm run build` 已通过,Rust 侧 `cargo check` 已通过。
|
||||
- `whisper.rs` 已接入真实 `whisper-rs`,会基于 VAD 片段逐段转录;目标语言为英文时启用 Whisper 原生 `translate`。
|
||||
- `vad.rs` 已接入 `ort` 版 Silero VAD 推理入口;当模型缺失或推理失败时,会自动回退到能量检测,保证链路不断。
|
||||
|
||||
## 运行前准备
|
||||
|
||||
1. 安装 Rust 工具链。
|
||||
2. 安装 `ffmpeg`,并确保可通过命令行直接调用。
|
||||
3. 安装前端依赖:
|
||||
2. 安装 `cmake`,`whisper-rs-sys` 在首次编译时需要它。
|
||||
3. 安装 `ffmpeg`,并确保可通过命令行直接调用。
|
||||
4. 安装前端依赖:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
4. 如需中文翻译,配置环境变量:
|
||||
5. 如需中文翻译,配置环境变量:
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=https://your-openai-compatible-endpoint/v1
|
||||
@ -45,7 +46,9 @@ export OPENAI_API_KEY=your_api_key
|
||||
export OPENAI_MODEL=gpt-4o-mini
|
||||
```
|
||||
|
||||
5. 启动桌面应用:
|
||||
6. 若要真正启用 ONNX Runtime 推理,请确保本机存在可被 `ort` 动态加载的 ONNX Runtime 库,或按你的部署方式提供运行库。
|
||||
|
||||
7. 启动桌面应用:
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
@ -53,6 +56,6 @@ npm run dev
|
||||
|
||||
## 下一步建议
|
||||
|
||||
- 将 `src-tauri/src/whisper.rs` 的占位实现替换为真实 `whisper-rs` 推理。
|
||||
- 在 `src-tauri/src/vad.rs` 接入 Silero VAD ONNX Runtime。
|
||||
- 为 `src-tauri/src/vad.rs` 补模型输入名自适应和更多异常日志。
|
||||
- 加入文件选择器、任务恢复、批量导出与测试用例。
|
||||
- 为 `whisper-rs` 增加硬件加速参数与模型配置面板。
|
||||
|
||||
2869
package-lock.json
generated
Normal file
2869
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@ -5,15 +5,18 @@
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"tauri": "tauri dev",
|
||||
"build": "vue-tsc --noEmit && vite build",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@tauri-apps/api": "^2.0.0",
|
||||
"@tauri-apps/plugin-dialog": "^2.0.0",
|
||||
"pinia": "^2.1.7",
|
||||
"vue": "^3.5.13"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tauri-apps/cli": "^2.0.4",
|
||||
"@vitejs/plugin-vue": "^5.2.1",
|
||||
"autoprefixer": "^10.4.20",
|
||||
"postcss": "^8.4.49",
|
||||
|
||||
5796
src-tauri/Cargo.lock
generated
Normal file
5796
src-tauri/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@ -13,13 +13,16 @@ tauri-build = { version = "2.0.3", features = [] }
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
hound = "3.5"
|
||||
ndarray = "0.17.2"
|
||||
ort = { version = "2.0.0-rc.9", features = ["ndarray", "load-dynamic"] }
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
tauri = { version = "2.0.6", features = [] }
|
||||
tauri-plugin-dialog = "2.4.2"
|
||||
tempfile = "3.14"
|
||||
thiserror = "2.0"
|
||||
tokio = { version = "1.42", features = ["macros", "rt-multi-thread", "time"] }
|
||||
uuid = { version = "1.11", features = ["serde", "v4"] }
|
||||
walkdir = "2.5"
|
||||
|
||||
whisper-rs = "0.16"
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
"permissions": [
|
||||
"core:event:default",
|
||||
"core:path:default",
|
||||
"core:window:default"
|
||||
"core:window:default",
|
||||
"dialog:default"
|
||||
]
|
||||
}
|
||||
|
||||
13
src-tauri/exports/2.找春天.ass
Normal file
13
src-tauri/exports/2.找春天.ass
Normal file
@ -0,0 +1,13 @@
|
||||
[Script Info]
|
||||
ScriptType: v4.00+
|
||||
Collisions: Normal
|
||||
PlayResX: 1280
|
||||
PlayResY: 720
|
||||
|
||||
[V4+ Styles]
|
||||
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
||||
Style: Default,Arial,22,&H00FFFFFF,&H000000FF,&H00000000,&H64000000,0,0,0,0,100,100,0,0,1,2,1,2,32,32,24,1
|
||||
|
||||
[Events]
|
||||
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
67
src-tauri/exports/2.找春天.srt
Normal file
67
src-tauri/exports/2.找春天.srt
Normal file
@ -0,0 +1,67 @@
|
||||
1
|
||||
00:00:00,000 --> 00:00:02,000
|
||||
课文部分
|
||||
|
||||
2
|
||||
00:00:02,000 --> 00:00:03,700
|
||||
第二课
|
||||
|
||||
3
|
||||
00:00:03,700 --> 00:00:05,700
|
||||
找春天
|
||||
|
||||
4
|
||||
00:00:17,680 --> 00:00:21,280
|
||||
春天来了,春天来了!
|
||||
|
||||
5
|
||||
00:00:21,280 --> 00:00:30,280
|
||||
我们几个孩子,拖掉棉袄,冲出家门,奔向田野,去寻找春天。
|
||||
|
||||
6
|
||||
00:00:31,420 --> 00:00:40,420
|
||||
春天,像個害羞的小姑娘,遮遮掩掩,躲躲藏藏。
|
||||
|
||||
7
|
||||
00:00:40,640 --> 00:00:43,640
|
||||
我們仔細的找啊,找啊!
|
||||
|
||||
8
|
||||
00:00:43,640 --> 00:00:50,640
|
||||
小草從地下探出頭來,那是春天的眉帽吧!
|
||||
|
||||
9
|
||||
00:00:50,640 --> 00:00:58,640
|
||||
早開的野花,一朵兩朵,那是春天的眼睛吧!
|
||||
|
||||
10
|
||||
00:00:58,640 --> 00:01:04,640
|
||||
樹木吐出點點嫩芽,那是春天的音符吧!
|
||||
|
||||
11
|
||||
00:01:05,640 --> 00:01:12,640
|
||||
解凍的小溪叮叮咚咚,那是春天的琴聲吧!
|
||||
|
||||
12
|
||||
00:01:13,940 --> 00:01:22,440
|
||||
春天来了,我们看到了他,我们听到了他,我们闻到了他。
|
||||
|
||||
13
|
||||
00:01:23,020 --> 00:01:25,020
|
||||
我們觸到了它
|
||||
|
||||
14
|
||||
00:01:25,020 --> 00:01:29,120
|
||||
它在柳枝上當秋千
|
||||
|
||||
15
|
||||
00:01:29,120 --> 00:01:31,920
|
||||
在方正尾巴上搖啊搖
|
||||
|
||||
16
|
||||
00:01:31,920 --> 00:01:35,120
|
||||
它在喜雀肚尖嘴裡叫
|
||||
|
||||
17
|
||||
00:01:35,120 --> 00:01:38,920
|
||||
在桃花信花枝頭笑
|
||||
2
src-tauri/exports/2.找春天.vtt
Normal file
2
src-tauri/exports/2.找春天.vtt
Normal file
@ -0,0 +1,2 @@
|
||||
WEBVTT
|
||||
|
||||
74
src-tauri/exports/IS_Intro.srt
Normal file
74
src-tauri/exports/IS_Intro.srt
Normal file
@ -0,0 +1,74 @@
|
||||
1
|
||||
00:00:01,710 --> 00:00:10,630
|
||||
Inkscribe works on Mac or Windows, and while this demo is on the Mac version, the software
|
||||
Inkscribe适用于Mac或Windows,虽然这个演示是在Mac版本上进行的,但软件在两个版本上几乎完全相同。
|
||||
|
||||
2
|
||||
00:00:10,630 --> 00:00:11,990
|
||||
is pretty much identical on both versions.
|
||||
使用Inkscribe非常简单。启动应用程序,将音频或视频文件拖放到Inkscribe的媒体窗口上,点击播放,然后开始输入。
|
||||
|
||||
3
|
||||
00:00:13,150 --> 00:00:17,150
|
||||
Using Inkscribe couldn't be simpler. Launch the application,
|
||||
你好。我是Rare先生。
|
||||
|
||||
4
|
||||
00:00:17,150 --> 00:00:21,270
|
||||
drag an audio or video file onto Inkscribe's media window,
|
||||
就是这么简单。
|
||||
|
||||
5
|
||||
00:00:21,270 --> 00:00:25,390
|
||||
click play, and start typing.
|
||||
你可以在Inkscribe的文本区域中输入任何内容。
|
||||
|
||||
6
|
||||
00:00:25,390 --> 00:00:29,590
|
||||
Hello. I'm Mr. Rare.
|
||||
你也可以在任何你想标记事情发生的时间插入时间码,稍后,你可以点击时间码跳转到视频的精确位置。
|
||||
|
||||
7
|
||||
00:00:29,590 --> 00:00:30,590
|
||||
It's that easy.
|
||||
Inkscribe不会为你输入,但它广泛的自定义键盘快捷键使其超级容易输入笔记和文稿,而你的双手永远不会离开键盘。
|
||||
|
||||
8
|
||||
00:00:31,110 --> 00:00:34,610
|
||||
You can type anything you want in Inkscribe's text area.
|
||||
你可以在Inkscribe的文本区域输入任何你想要的内容。
|
||||
|
||||
9
|
||||
00:00:34,610 --> 00:00:39,110
|
||||
You can also insert time codes anywhere you want to mark when things happen,
|
||||
你也可以在任何你想要的位置插入时间码,以标记事情发生的时间,
|
||||
|
||||
10
|
||||
00:00:39,110 --> 00:00:48,110
|
||||
and later, you can click on the time codes to jump to that exact point in your video.
|
||||
稍后,你可以点击时间码跳转到视频中的确切位置。
|
||||
|
||||
11
|
||||
00:00:49,410 --> 00:00:54,770
|
||||
Inkscribe does not do the typing for you, but its wide selection of custom keyboard shortcuts
|
||||
Inkscribe不会替你输入,但它丰富的自定义键盘快捷键
|
||||
|
||||
12
|
||||
00:00:54,770 --> 00:01:00,690
|
||||
makes it super easy to type notes and transcripts, without your hands ever leaving the keyboard.
|
||||
使得输入笔记和文稿变得超级容易,而你的双手始终不会离开键盘。
|
||||
|
||||
13
|
||||
00:01:00,690 --> 00:01:05,290
|
||||
You can learn more about Inkscribe and find additional tutorials on Inkscribe's many features
|
||||
您可以了解更多关于Inkscribe,并在Inkscribe.com上找到关于其许多功能的额外教程。
|
||||
|
||||
14
|
||||
00:01:05,290 --> 00:01:07,330
|
||||
at Inkscribe.com.
|
||||
感谢观看。
|
||||
|
||||
15
|
||||
00:01:07,330 --> 00:01:08,130
|
||||
Thanks for watching.
|
||||
感谢观看。
|
||||
1
src-tauri/gen/schemas/acl-manifests.json
Normal file
1
src-tauri/gen/schemas/acl-manifests.json
Normal file
File diff suppressed because one or more lines are too long
1
src-tauri/gen/schemas/capabilities.json
Normal file
1
src-tauri/gen/schemas/capabilities.json
Normal file
@ -0,0 +1 @@
|
||||
{"default":{"identifier":"default","description":"Default capability for CrossSubtitle-AI","local":true,"windows":["main"],"permissions":["core:event:default","core:path:default","core:window:default","dialog:default"]}}
|
||||
2310
src-tauri/gen/schemas/desktop-schema.json
Normal file
2310
src-tauri/gen/schemas/desktop-schema.json
Normal file
File diff suppressed because it is too large
Load Diff
2310
src-tauri/gen/schemas/macOS-schema.json
Normal file
2310
src-tauri/gen/schemas/macOS-schema.json
Normal file
File diff suppressed because it is too large
Load Diff
BIN
src-tauri/icons/icon.png
Normal file
BIN
src-tauri/icons/icon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 104 B |
@ -51,6 +51,7 @@ fn error_to_string(error: anyhow::Error) -> String {
|
||||
#[cfg_attr(mobile, tauri::mobile_entry_point)]
|
||||
pub fn run() {
|
||||
tauri::Builder::default()
|
||||
.plugin(tauri_plugin_dialog::init())
|
||||
.manage(AppState::default())
|
||||
.invoke_handler(tauri::generate_handler![
|
||||
start_subtitle_task,
|
||||
|
||||
@ -19,6 +19,13 @@ pub enum TargetLanguage {
|
||||
En,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum OutputMode {
|
||||
Source,
|
||||
Translate,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SubtitleSegment {
|
||||
@ -38,6 +45,8 @@ pub struct SubtitleTask {
|
||||
pub file_name: String,
|
||||
pub source_lang: Option<String>,
|
||||
pub target_lang: TargetLanguage,
|
||||
pub output_mode: OutputMode,
|
||||
pub bilingual_output: bool,
|
||||
pub status: TaskStatus,
|
||||
pub progress: f32,
|
||||
pub segments: Vec<SubtitleSegment>,
|
||||
@ -50,6 +59,9 @@ pub struct StartTaskPayload {
|
||||
pub file_path: String,
|
||||
pub source_lang: Option<String>,
|
||||
pub target_lang: TargetLanguage,
|
||||
pub output_mode: OutputMode,
|
||||
pub bilingual_output: bool,
|
||||
pub translation_config: Option<TranslationConfig>,
|
||||
pub whisper_model_path: Option<String>,
|
||||
pub vad_model_path: Option<String>,
|
||||
}
|
||||
|
||||
@ -53,15 +53,15 @@ impl Default for AssStyle {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn render(segments: &[SubtitleSegment], format: SubtitleFormat) -> String {
|
||||
pub fn render(segments: &[SubtitleSegment], format: SubtitleFormat, bilingual: bool) -> String {
|
||||
match format {
|
||||
SubtitleFormat::Srt => render_srt(segments),
|
||||
SubtitleFormat::Vtt => render_vtt(segments),
|
||||
SubtitleFormat::Ass => render_ass(segments, AssStyle::default()),
|
||||
SubtitleFormat::Srt => render_srt(segments, bilingual),
|
||||
SubtitleFormat::Vtt => render_vtt(segments, bilingual),
|
||||
SubtitleFormat::Ass => render_ass(segments, AssStyle::default(), bilingual),
|
||||
}
|
||||
}
|
||||
|
||||
fn render_srt(segments: &[SubtitleSegment]) -> String {
|
||||
fn render_srt(segments: &[SubtitleSegment], bilingual: bool) -> String {
|
||||
segments
|
||||
.iter()
|
||||
.enumerate()
|
||||
@ -71,14 +71,14 @@ fn render_srt(segments: &[SubtitleSegment]) -> String {
|
||||
index + 1,
|
||||
format_timestamp(segment.start, ","),
|
||||
format_timestamp(segment.end, ","),
|
||||
segment.translated_text.as_deref().unwrap_or(&segment.source_text)
|
||||
compose_subtitle_text(segment, bilingual)
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
fn render_vtt(segments: &[SubtitleSegment]) -> String {
|
||||
fn render_vtt(segments: &[SubtitleSegment], bilingual: bool) -> String {
|
||||
let body = segments
|
||||
.iter()
|
||||
.map(|segment| {
|
||||
@ -86,7 +86,7 @@ fn render_vtt(segments: &[SubtitleSegment]) -> String {
|
||||
"{} --> {}\n{}\n",
|
||||
format_timestamp(segment.start, "."),
|
||||
format_timestamp(segment.end, "."),
|
||||
segment.translated_text.as_deref().unwrap_or(&segment.source_text)
|
||||
compose_subtitle_text(segment, bilingual)
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
@ -95,7 +95,7 @@ fn render_vtt(segments: &[SubtitleSegment]) -> String {
|
||||
format!("WEBVTT\n\n{}", body)
|
||||
}
|
||||
|
||||
fn render_ass(segments: &[SubtitleSegment], style: AssStyle) -> String {
|
||||
fn render_ass(segments: &[SubtitleSegment], style: AssStyle, bilingual: bool) -> String {
|
||||
let header = format!(
|
||||
"[Script Info]\nScriptType: v4.00+\nCollisions: Normal\nPlayResX: 1280\nPlayResY: 720\n\n[V4+ Styles]\nFormat: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\nStyle: {},{},{},{},&H000000FF,{},&H64000000,0,0,0,0,100,100,0,0,1,2,1,2,32,32,24,1\n\n[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n",
|
||||
style.name, style.font_name, style.font_size, style.primary_colour, style.outline_colour
|
||||
@ -104,11 +104,7 @@ fn render_ass(segments: &[SubtitleSegment], style: AssStyle) -> String {
|
||||
let body = segments
|
||||
.iter()
|
||||
.map(|segment| {
|
||||
let text = segment
|
||||
.translated_text
|
||||
.as_deref()
|
||||
.unwrap_or(&segment.source_text)
|
||||
.replace('\n', "\\N");
|
||||
let text = compose_subtitle_text(segment, bilingual).replace('\n', "\\N");
|
||||
format!(
|
||||
"Dialogue: 0,{}, {},{},{},0,0,0,,{}",
|
||||
format_ass_timestamp(segment.start),
|
||||
@ -124,6 +120,16 @@ fn render_ass(segments: &[SubtitleSegment], style: AssStyle) -> String {
|
||||
format!("{header}{body}\n")
|
||||
}
|
||||
|
||||
fn compose_subtitle_text(segment: &SubtitleSegment, bilingual: bool) -> String {
|
||||
match (bilingual, segment.translated_text.as_deref()) {
|
||||
(true, Some(translated)) if !translated.trim().is_empty() => {
|
||||
format!("{}\n{}", segment.source_text.trim(), translated.trim())
|
||||
}
|
||||
(_, Some(translated)) if !translated.trim().is_empty() => translated.trim().to_string(),
|
||||
_ => segment.source_text.trim().to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn format_timestamp(seconds: f32, separator: &str) -> String {
|
||||
let millis = (seconds * 1000.0).round() as u64;
|
||||
let hours = millis / 3_600_000;
|
||||
|
||||
@ -10,7 +10,7 @@ use uuid::Uuid;
|
||||
use crate::{
|
||||
audio::AudioPipeline,
|
||||
models::{
|
||||
ErrorEvent, ProgressEvent, StartTaskPayload, SubtitleSegment, SubtitleTask, TargetLanguage,
|
||||
ErrorEvent, OutputMode, ProgressEvent, StartTaskPayload, SubtitleSegment, SubtitleTask,
|
||||
TaskStatus, TranslationConfig,
|
||||
},
|
||||
state::AppState,
|
||||
@ -20,12 +20,27 @@ use crate::{
|
||||
whisper::WhisperEngine,
|
||||
};
|
||||
|
||||
const DEFAULT_WHISPER_MODEL: &str =
|
||||
"/Users/kura/Documents/work/tauri/CrossSubtitle/src-tauri/model/ggml-small-q5_1.bin";
|
||||
const DEFAULT_VAD_MODEL: &str =
|
||||
"/Users/kura/Documents/work/tauri/CrossSubtitle/src-tauri/model/silero_vad.onnx";
|
||||
|
||||
pub async fn start_task(
|
||||
app: tauri::AppHandle,
|
||||
window: Window,
|
||||
state: tauri::State<'_, AppState>,
|
||||
payload: StartTaskPayload,
|
||||
mut payload: StartTaskPayload,
|
||||
) -> Result<SubtitleTask> {
|
||||
if payload.whisper_model_path.as_deref().is_none_or(str::is_empty) {
|
||||
payload.whisper_model_path = Some(DEFAULT_WHISPER_MODEL.to_string());
|
||||
}
|
||||
if payload.vad_model_path.as_deref().is_none_or(str::is_empty) {
|
||||
payload.vad_model_path = Some(DEFAULT_VAD_MODEL.to_string());
|
||||
}
|
||||
if payload.source_lang.as_deref().is_none_or(str::is_empty) {
|
||||
payload.source_lang = Some("auto".to_string());
|
||||
}
|
||||
|
||||
let file_path = PathBuf::from(&payload.file_path);
|
||||
let task = SubtitleTask {
|
||||
id: Uuid::new_v4().to_string(),
|
||||
@ -37,6 +52,8 @@ pub async fn start_task(
|
||||
file_path: payload.file_path.clone(),
|
||||
source_lang: payload.source_lang.clone(),
|
||||
target_lang: payload.target_lang.clone(),
|
||||
output_mode: payload.output_mode.clone(),
|
||||
bilingual_output: payload.bilingual_output,
|
||||
status: TaskStatus::Queued,
|
||||
progress: 0.0,
|
||||
segments: Vec::new(),
|
||||
@ -74,6 +91,7 @@ async fn run_pipeline(
|
||||
) -> Result<()> {
|
||||
let app_state = app.state::<AppState>();
|
||||
let workspace = std::env::temp_dir().join("crosssubtitle-ai").join(&task.id);
|
||||
let should_translate = matches!(payload.output_mode, OutputMode::Translate);
|
||||
|
||||
set_status(&window, &app_state, &mut task, TaskStatus::Extracting, 8.0, "正在抽取音频")?;
|
||||
let wav_path = AudioPipeline::extract_to_wav(&payload.file_path, &workspace)?;
|
||||
@ -85,12 +103,27 @@ async fn run_pipeline(
|
||||
|
||||
set_status(&window, &app_state, &mut task, TaskStatus::Transcribing, 45.0, "正在执行 Whisper")?;
|
||||
let whisper = WhisperEngine::new(payload.whisper_model_path.clone());
|
||||
let task_id_for_progress = task.id.clone();
|
||||
let mut segments = whisper.infer_segments(
|
||||
&wav_path,
|
||||
&task.id,
|
||||
task.source_lang.as_deref(),
|
||||
&task.target_lang,
|
||||
should_translate,
|
||||
&speech_ranges,
|
||||
|ratio| {
|
||||
let progress = 45.0 + ratio.clamp(0.0, 1.0) * 27.0;
|
||||
window.emit(
|
||||
"task:progress",
|
||||
ProgressEvent {
|
||||
task_id: task_id_for_progress.clone(),
|
||||
status: TaskStatus::Transcribing,
|
||||
progress,
|
||||
message: "正在执行 Whisper".to_string(),
|
||||
},
|
||||
)?;
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
|
||||
for segment in &segments {
|
||||
@ -106,23 +139,26 @@ async fn run_pipeline(
|
||||
task.segments = segments.clone();
|
||||
app_state.upsert_task(task.clone())?;
|
||||
|
||||
if matches!(task.target_lang, TargetLanguage::Zh) {
|
||||
if let Some(config) = load_translation_config() {
|
||||
set_status(&window, &app_state, &mut task, TaskStatus::Translating, 72.0, "正在生成中文译文")?;
|
||||
let translator = Translator::new(config)?;
|
||||
segments = translator.translate_to_zh(&segments).await?;
|
||||
task.segments = segments.clone();
|
||||
app_state.upsert_task(task.clone())?;
|
||||
if should_translate {
|
||||
let config = payload
|
||||
.translation_config
|
||||
.clone()
|
||||
.or_else(load_translation_config)
|
||||
.ok_or_else(|| anyhow::anyhow!("翻译模式需要填写 LLM API 配置,或设置 OPENAI_API_BASE / OPENAI_API_KEY"))?;
|
||||
set_status(&window, &app_state, &mut task, TaskStatus::Translating, 72.0, "正在生成译文")?;
|
||||
let translator = Translator::new(config)?;
|
||||
segments = translator.translate_segments(&segments, &task.target_lang).await?;
|
||||
task.segments = segments.clone();
|
||||
app_state.upsert_task(task.clone())?;
|
||||
|
||||
for segment in segments {
|
||||
window.emit(
|
||||
"task:segment",
|
||||
crate::models::SegmentEvent {
|
||||
task_id: task.id.clone(),
|
||||
segment,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
for segment in segments {
|
||||
window.emit(
|
||||
"task:segment",
|
||||
crate::models::SegmentEvent {
|
||||
task_id: task.id.clone(),
|
||||
segment,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
@ -180,9 +216,10 @@ pub fn list_tasks(state: tauri::State<'_, AppState>) -> Result<Vec<SubtitleTask>
|
||||
pub fn export_task(state: tauri::State<'_, AppState>, task_id: String, format: String) -> Result<String> {
|
||||
let task = state.get_task(&task_id)?;
|
||||
let format = SubtitleFormat::try_from(format.as_str())?;
|
||||
let content = render(&task.segments, format);
|
||||
let content = render(&task.segments, format, task.bilingual_output);
|
||||
|
||||
let stem = PathBuf::from(&task.file_name)
|
||||
let file_name_path = PathBuf::from(&task.file_name);
|
||||
let stem = file_name_path
|
||||
.file_stem()
|
||||
.and_then(|item| item.to_str())
|
||||
.unwrap_or("subtitle");
|
||||
|
||||
@ -5,7 +5,7 @@ use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::models::{SubtitleSegment, TranslationConfig};
|
||||
use crate::models::{SubtitleSegment, TargetLanguage, TranslationConfig};
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ChatCompletionRequest {
|
||||
@ -44,10 +44,12 @@ struct ChatMessageContent {
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TranslationResponse {
|
||||
translations: Vec<TranslatedRow>,
|
||||
translations: Option<Vec<TranslatedRow>>,
|
||||
items: Option<Vec<TranslatedRow>>,
|
||||
results: Option<Vec<TranslatedRow>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
struct TranslatedRow {
|
||||
id: String,
|
||||
text: String,
|
||||
@ -68,17 +70,27 @@ impl Translator {
|
||||
Ok(Self { client, config })
|
||||
}
|
||||
|
||||
pub async fn translate_to_zh(&self, segments: &[SubtitleSegment]) -> Result<Vec<SubtitleSegment>> {
|
||||
pub async fn translate_segments(
|
||||
&self,
|
||||
segments: &[SubtitleSegment],
|
||||
target_language: &TargetLanguage,
|
||||
) -> Result<Vec<SubtitleSegment>> {
|
||||
let batch_size = self.config.batch_size.clamp(10, 15);
|
||||
let context_size = self.config.context_size.min(5);
|
||||
let mut translated = segments.to_vec();
|
||||
let target_language_name = match target_language {
|
||||
TargetLanguage::Zh => "简体中文",
|
||||
TargetLanguage::En => "英文",
|
||||
};
|
||||
|
||||
for batch_start in (0..segments.len()).step_by(batch_size) {
|
||||
let batch_end = (batch_start + batch_size).min(segments.len());
|
||||
let context_start = batch_start.saturating_sub(context_size);
|
||||
let context = &segments[context_start..batch_start];
|
||||
let batch = &segments[batch_start..batch_end];
|
||||
let rows = self.request_translation(context, batch).await?;
|
||||
let rows = self
|
||||
.translate_batch_with_retries(context, batch, target_language_name)
|
||||
.await?;
|
||||
|
||||
for row in rows {
|
||||
if let Some(segment) = translated.iter_mut().find(|item| item.id == row.id) {
|
||||
@ -90,10 +102,64 @@ impl Translator {
|
||||
Ok(translated)
|
||||
}
|
||||
|
||||
async fn translate_batch_with_retries(
|
||||
&self,
|
||||
context: &[SubtitleSegment],
|
||||
batch: &[SubtitleSegment],
|
||||
target_language_name: &str,
|
||||
) -> Result<Vec<TranslatedRow>> {
|
||||
let mut collected = Vec::<TranslatedRow>::new();
|
||||
let mut pending = batch.to_vec();
|
||||
|
||||
for retry in 0..3 {
|
||||
if pending.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
if retry > 0 {
|
||||
eprintln!(
|
||||
"translation retry: attempt={}, missing_segments={}",
|
||||
retry + 1,
|
||||
pending
|
||||
.iter()
|
||||
.map(|segment| segment.id.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
);
|
||||
}
|
||||
|
||||
let rows = self
|
||||
.request_translation(context, &pending, target_language_name, retry)
|
||||
.await?;
|
||||
merge_rows(&mut collected, rows);
|
||||
|
||||
let translated_ids = collected
|
||||
.iter()
|
||||
.map(|row| row.id.as_str())
|
||||
.collect::<std::collections::HashSet<_>>();
|
||||
pending.retain(|segment| !translated_ids.contains(segment.id.as_str()));
|
||||
}
|
||||
|
||||
if !pending.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"translation missing segments after retries: {}",
|
||||
pending
|
||||
.iter()
|
||||
.map(|segment| segment.id.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
));
|
||||
}
|
||||
|
||||
Ok(order_rows(batch, &collected))
|
||||
}
|
||||
|
||||
async fn request_translation(
|
||||
&self,
|
||||
context: &[SubtitleSegment],
|
||||
batch: &[SubtitleSegment],
|
||||
target_language_name: &str,
|
||||
retry: usize,
|
||||
) -> Result<Vec<TranslatedRow>> {
|
||||
let context_text = if context.is_empty() {
|
||||
"无".to_string()
|
||||
@ -125,8 +191,11 @@ impl Translator {
|
||||
ChatMessage {
|
||||
role: "user".to_string(),
|
||||
content: format!(
|
||||
"把以下字幕翻译成简体中文。上下文:\n{}\n\n待翻译片段:\n{}\n\n请返回 {{\"translations\":[{{\"id\":\"seg-0001\",\"text\":\"译文\"}}]}}",
|
||||
context_text, batch_text
|
||||
"{}把以下字幕翻译成{}。保持专有名词、角色称呼和上下文一致。必须逐条返回所有待翻译片段,禁止遗漏、合并、拆分或改写 id。上下文:\n{}\n\n待翻译片段:\n{}\n\n请返回 {{\"translations\":[{{\"id\":\"seg-0001\",\"text\":\"译文\"}}]}}",
|
||||
retry_prompt_prefix(retry),
|
||||
target_language_name,
|
||||
context_text,
|
||||
batch_text
|
||||
),
|
||||
},
|
||||
],
|
||||
@ -136,6 +205,14 @@ impl Translator {
|
||||
"{}/chat/completions",
|
||||
self.config.api_base.trim_end_matches('/')
|
||||
);
|
||||
let request_json = serde_json::to_string_pretty(&request)
|
||||
.context("failed to serialize translation request")?;
|
||||
eprintln!(
|
||||
"translation request url: {}\ntranslation request headers: Authorization: Bearer {}\ntranslation request body:\n{}",
|
||||
url,
|
||||
mask_secret(&self.config.api_key),
|
||||
request_json
|
||||
);
|
||||
|
||||
let mut last_error: Option<anyhow::Error> = None;
|
||||
for attempt in 0..3 {
|
||||
@ -150,7 +227,10 @@ impl Translator {
|
||||
match response {
|
||||
Ok(response) => {
|
||||
let response = response.error_for_status().context("translation http error")?;
|
||||
let payload: ChatCompletionResponse = response.json().await.context("invalid response body")?;
|
||||
let raw_text = response.text().await.context("invalid response body")?;
|
||||
eprintln!("translation raw response:\n{}", raw_text);
|
||||
let payload: ChatCompletionResponse =
|
||||
serde_json::from_str(&raw_text).context("invalid response body")?;
|
||||
let content = payload
|
||||
.choices
|
||||
.first()
|
||||
@ -158,9 +238,9 @@ impl Translator {
|
||||
.message
|
||||
.content
|
||||
.clone();
|
||||
let rows: TranslationResponse =
|
||||
serde_json::from_str(&content).context("translation json parse failed")?;
|
||||
return Ok(rows.translations);
|
||||
let rows = parse_translation_response(&content)
|
||||
.with_context(|| format!("translation json parse failed: {}", preview(&content)))?;
|
||||
return Ok(rows);
|
||||
}
|
||||
Err(error) => {
|
||||
last_error = Some(error.into());
|
||||
@ -172,3 +252,193 @@ impl Translator {
|
||||
Err(last_error.unwrap_or_else(|| anyhow!("translation request failed")))
|
||||
}
|
||||
}
|
||||
|
||||
fn retry_prompt_prefix(retry: usize) -> &'static str {
|
||||
if retry == 0 {
|
||||
""
|
||||
} else {
|
||||
"这是补漏重试。你上次漏掉了部分片段,这次只需要返回当前待翻译片段对应的结果,确保每个 id 都出现且只出现一次。\n\n"
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_rows(collected: &mut Vec<TranslatedRow>, rows: Vec<TranslatedRow>) {
|
||||
for row in rows {
|
||||
if let Some(existing) = collected.iter_mut().find(|item| item.id == row.id) {
|
||||
if !row.text.trim().is_empty() {
|
||||
existing.text = row.text;
|
||||
}
|
||||
} else {
|
||||
collected.push(row);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn order_rows(batch: &[SubtitleSegment], rows: &[TranslatedRow]) -> Vec<TranslatedRow> {
|
||||
batch
|
||||
.iter()
|
||||
.filter_map(|segment| rows.iter().find(|row| row.id == segment.id).cloned())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn parse_translation_response(content: &str) -> Result<Vec<TranslatedRow>> {
|
||||
let candidates = [
|
||||
content.trim().to_string(),
|
||||
strip_code_fence(content),
|
||||
strip_think_block(content),
|
||||
extract_json_after_think(content).unwrap_or_default(),
|
||||
extract_last_json_object(content).unwrap_or_default(),
|
||||
extract_json_object(content).unwrap_or_default(),
|
||||
];
|
||||
|
||||
for candidate in candidates {
|
||||
if candidate.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Ok(response) = serde_json::from_str::<TranslationResponse>(&candidate) {
|
||||
if let Some(rows) = response
|
||||
.translations
|
||||
.or(response.items)
|
||||
.or(response.results)
|
||||
.filter(|rows| !rows.is_empty())
|
||||
{
|
||||
return Ok(rows);
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(rows) = serde_json::from_str::<Vec<TranslatedRow>>(&candidate) {
|
||||
if !rows.is_empty() {
|
||||
return Ok(rows);
|
||||
}
|
||||
}
|
||||
|
||||
let loose_rows = extract_rows_loose(&candidate);
|
||||
if !loose_rows.is_empty() {
|
||||
return Ok(loose_rows);
|
||||
}
|
||||
}
|
||||
|
||||
Err(anyhow!("unable to parse translation response"))
|
||||
}
|
||||
|
||||
fn strip_code_fence(content: &str) -> String {
|
||||
let trimmed = content.trim();
|
||||
if !trimmed.starts_with("```") {
|
||||
return trimmed.to_string();
|
||||
}
|
||||
|
||||
let without_prefix = trimmed
|
||||
.trim_start_matches("```json")
|
||||
.trim_start_matches("```JSON")
|
||||
.trim_start_matches("```");
|
||||
without_prefix
|
||||
.trim_end_matches("```")
|
||||
.trim()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn extract_json_object(content: &str) -> Option<String> {
|
||||
let start = content.find('{')?;
|
||||
let end = content.rfind('}')?;
|
||||
(end > start).then(|| content[start..=end].trim().to_string())
|
||||
}
|
||||
|
||||
fn strip_think_block(content: &str) -> String {
|
||||
if let Some(end) = content.rfind("</think>") {
|
||||
return content[end + "</think>".len()..].trim().to_string();
|
||||
}
|
||||
|
||||
content.trim().to_string()
|
||||
}
|
||||
|
||||
fn extract_json_after_think(content: &str) -> Option<String> {
|
||||
let stripped = strip_think_block(content);
|
||||
extract_last_json_object(&stripped)
|
||||
}
|
||||
|
||||
fn extract_last_json_object(content: &str) -> Option<String> {
|
||||
let end = content.rfind('}')?;
|
||||
let start = content[..=end].rfind('{')?;
|
||||
(end > start).then(|| content[start..=end].trim().to_string())
|
||||
}
|
||||
|
||||
fn preview(content: &str) -> String {
|
||||
let compact = content.replace('\n', " ");
|
||||
compact.chars().take(240).collect()
|
||||
}
|
||||
|
||||
fn mask_secret(secret: &str) -> String {
|
||||
if secret.len() <= 8 {
|
||||
return "****".to_string();
|
||||
}
|
||||
|
||||
format!("{}****{}", &secret[..4], &secret[secret.len().saturating_sub(4)..])
|
||||
}
|
||||
|
||||
fn extract_rows_loose(content: &str) -> Vec<TranslatedRow> {
|
||||
let mut rows = Vec::new();
|
||||
let mut cursor = 0usize;
|
||||
|
||||
while let Some(id_key_pos) = content[cursor..].find("\"id\"") {
|
||||
let id_key_pos = cursor + id_key_pos;
|
||||
let Some((id, after_id)) = extract_field_value(content, id_key_pos, "id") else {
|
||||
cursor = id_key_pos + 4;
|
||||
continue;
|
||||
};
|
||||
let Some(text_key_rel) = content[after_id..].find("\"text\"") else {
|
||||
cursor = after_id;
|
||||
continue;
|
||||
};
|
||||
let text_key_pos = after_id + text_key_rel;
|
||||
let Some((text, after_text)) = extract_field_value(content, text_key_pos, "text") else {
|
||||
cursor = text_key_pos + 6;
|
||||
continue;
|
||||
};
|
||||
|
||||
rows.push(TranslatedRow { id, text });
|
||||
cursor = after_text;
|
||||
}
|
||||
|
||||
rows
|
||||
}
|
||||
|
||||
fn extract_field_value(content: &str, key_pos: usize, key: &str) -> Option<(String, usize)> {
|
||||
let search_start = key_pos + key.len() + 2;
|
||||
let colon_rel = content[search_start..].find(':')?;
|
||||
let after_colon = search_start + colon_rel + 1;
|
||||
let first_quote_rel = content[after_colon..].find('"')?;
|
||||
let value_start = after_colon + first_quote_rel;
|
||||
let (value, next_index) = parse_json_string(content, value_start)?;
|
||||
Some((value, next_index))
|
||||
}
|
||||
|
||||
fn parse_json_string(content: &str, start_quote: usize) -> Option<(String, usize)> {
|
||||
let bytes = content.as_bytes();
|
||||
if *bytes.get(start_quote)? != b'"' {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut end = start_quote + 1;
|
||||
let mut escaped = false;
|
||||
while let Some(&byte) = bytes.get(end) {
|
||||
if escaped {
|
||||
escaped = false;
|
||||
end += 1;
|
||||
continue;
|
||||
}
|
||||
match byte {
|
||||
b'\\' => {
|
||||
escaped = true;
|
||||
end += 1;
|
||||
}
|
||||
b'"' => {
|
||||
let raw = &content[start_quote..=end];
|
||||
let parsed = serde_json::from_str::<String>(raw).ok()?;
|
||||
return Some((parsed, end + 1));
|
||||
}
|
||||
_ => end += 1,
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
@ -1,6 +1,12 @@
|
||||
use std::path::PathBuf;
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
sync::mpsc,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use ndarray::{Array1, Array2, Array3};
|
||||
use ort::{session::Session, value::TensorRef};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct VadConfig {
|
||||
@ -15,16 +21,15 @@ impl Default for VadConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
sample_rate: 16_000,
|
||||
threshold: 0.015,
|
||||
min_speech_ms: 250,
|
||||
min_silence_ms: 180,
|
||||
pad_ms: 120,
|
||||
threshold: 0.01,
|
||||
min_speech_ms: 180,
|
||||
min_silence_ms: 320,
|
||||
pad_ms: 220,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct VadEngine {
|
||||
#[allow(dead_code)]
|
||||
model_path: Option<PathBuf>,
|
||||
config: VadConfig,
|
||||
}
|
||||
@ -42,23 +47,130 @@ impl VadEngine {
|
||||
}
|
||||
|
||||
pub fn detect_segments(&self, samples: &[f32]) -> Vec<(f32, f32)> {
|
||||
if let Some(model_path) = &self.model_path {
|
||||
let model_path = model_path.clone();
|
||||
let samples = samples.to_vec();
|
||||
let config = self.config.clone();
|
||||
let (sender, receiver) = mpsc::channel();
|
||||
|
||||
std::thread::spawn(move || {
|
||||
let engine = VadEngine {
|
||||
model_path: Some(model_path.clone()),
|
||||
config,
|
||||
};
|
||||
let result = engine.detect_segments_with_onnx(&samples, &model_path);
|
||||
let _ = sender.send(result);
|
||||
});
|
||||
|
||||
match receiver.recv_timeout(Duration::from_secs(3)) {
|
||||
Ok(Ok(result)) if !result.is_empty() => return result,
|
||||
Ok(Ok(_)) => {}
|
||||
Ok(Err(error)) => {
|
||||
eprintln!("silero vad failed, falling back to energy detection: {error:#}");
|
||||
}
|
||||
Err(mpsc::RecvTimeoutError::Timeout) => {
|
||||
eprintln!("silero vad timed out, falling back to energy detection");
|
||||
}
|
||||
Err(mpsc::RecvTimeoutError::Disconnected) => {
|
||||
eprintln!("silero vad worker disconnected, falling back to energy detection");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.detect_segments_with_energy(samples)
|
||||
}
|
||||
|
||||
fn detect_segments_with_onnx(&self, samples: &[f32], model_path: &Path) -> Result<Vec<(f32, f32)>> {
|
||||
let mut session = Session::builder()
|
||||
.context("failed to build onnx session")?
|
||||
.commit_from_file(model_path)
|
||||
.with_context(|| format!("failed to load silero vad model: {}", model_path.display()))?;
|
||||
|
||||
let chunk_size = 512usize;
|
||||
let mut state = Array3::<f32>::zeros((2, 1, 128));
|
||||
let sr = Array1::<i64>::from_vec(vec![self.config.sample_rate as i64]);
|
||||
let mut speech_probabilities = Vec::new();
|
||||
|
||||
for chunk in samples.chunks(chunk_size) {
|
||||
let mut padded = vec![0.0_f32; chunk_size];
|
||||
padded[..chunk.len()].copy_from_slice(chunk);
|
||||
let input = Array2::from_shape_vec((1, chunk_size), padded)
|
||||
.context("failed to build vad input tensor")?;
|
||||
|
||||
let outputs = session
|
||||
.run(ort::inputs![
|
||||
TensorRef::from_array_view(input.view())?,
|
||||
TensorRef::from_array_view(sr.view())?,
|
||||
TensorRef::from_array_view(state.view())?
|
||||
])
|
||||
.context("silero vad inference failed")?;
|
||||
|
||||
let first = &outputs[0];
|
||||
let (_, probs) = first
|
||||
.try_extract_tensor::<f32>()
|
||||
.context("failed to extract vad probabilities")?;
|
||||
let probability = probs
|
||||
.iter()
|
||||
.copied()
|
||||
.fold(0.0_f32, f32::max);
|
||||
speech_probabilities.push(probability);
|
||||
|
||||
if outputs.len() > 1 {
|
||||
let second = &outputs[1];
|
||||
let (_, next_state) = second
|
||||
.try_extract_tensor::<f32>()
|
||||
.context("failed to extract vad state")?;
|
||||
if next_state.len() == state.len() {
|
||||
state = Array3::from_shape_vec((2, 1, 128), next_state.to_vec())
|
||||
.context("failed to rebuild vad state")?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(self.merge_probabilities(&speech_probabilities, chunk_size))
|
||||
}
|
||||
|
||||
fn detect_segments_with_energy(&self, samples: &[f32]) -> Vec<(f32, f32)> {
|
||||
let frame_size = (self.config.sample_rate / 50).max(1);
|
||||
let mut energies = Vec::new();
|
||||
for chunk in samples.chunks(frame_size) {
|
||||
let energy = chunk.iter().map(|sample| sample.abs()).sum::<f32>() / chunk.len() as f32;
|
||||
energies.push(energy);
|
||||
}
|
||||
|
||||
if energies.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let dynamic_threshold = self.dynamic_energy_threshold(&energies);
|
||||
eprintln!(
|
||||
"vad: using energy fallback, frames={}, threshold={:.5}",
|
||||
energies.len(),
|
||||
dynamic_threshold
|
||||
);
|
||||
self.merge_probabilities_with_threshold(&energies, frame_size, dynamic_threshold)
|
||||
}
|
||||
|
||||
fn merge_probabilities(&self, frames: &[f32], frame_size: usize) -> Vec<(f32, f32)> {
|
||||
self.merge_probabilities_with_threshold(frames, frame_size, self.config.threshold)
|
||||
}
|
||||
|
||||
fn merge_probabilities_with_threshold(
|
||||
&self,
|
||||
frames: &[f32],
|
||||
frame_size: usize,
|
||||
threshold: f32,
|
||||
) -> Vec<(f32, f32)> {
|
||||
let min_speech_frames = (self.config.min_speech_ms / 20).max(1);
|
||||
let min_silence_frames = (self.config.min_silence_ms / 20).max(1);
|
||||
let pad_seconds = self.config.pad_ms as f32 / 1000.0;
|
||||
|
||||
let mut frames = Vec::new();
|
||||
for chunk in samples.chunks(frame_size) {
|
||||
let energy = chunk.iter().map(|sample| sample.abs()).sum::<f32>() / chunk.len() as f32;
|
||||
frames.push(energy);
|
||||
}
|
||||
|
||||
let mut result = Vec::new();
|
||||
let mut start_frame: Option<usize> = None;
|
||||
let mut silent_frames = 0usize;
|
||||
|
||||
for (index, energy) in frames.iter().enumerate() {
|
||||
if *energy >= self.config.threshold {
|
||||
for (index, probability) in frames.iter().enumerate() {
|
||||
if *probability >= threshold {
|
||||
if start_frame.is_none() {
|
||||
start_frame = Some(index);
|
||||
}
|
||||
@ -90,11 +202,32 @@ impl VadEngine {
|
||||
}
|
||||
}
|
||||
|
||||
if result.is_empty() && !samples.is_empty() {
|
||||
let total_seconds = samples.len() as f32 / self.config.sample_rate as f32;
|
||||
if result.is_empty() && !frames.is_empty() {
|
||||
let total_seconds = (frames.len() * frame_size) as f32 / self.config.sample_rate as f32;
|
||||
result.push((0.0, total_seconds));
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn dynamic_energy_threshold(&self, energies: &[f32]) -> f32 {
|
||||
let mut sorted = energies.to_vec();
|
||||
sorted.sort_by(|left, right| left.total_cmp(right));
|
||||
|
||||
let mean = energies.iter().sum::<f32>() / energies.len() as f32;
|
||||
let p70 = percentile(&sorted, 0.70);
|
||||
let p90 = percentile(&sorted, 0.90);
|
||||
let adaptive = (mean + (p90 - mean) * 0.18).max(p70 * 0.72);
|
||||
|
||||
adaptive.max(self.config.threshold * 0.45)
|
||||
}
|
||||
}
|
||||
|
||||
fn percentile(sorted: &[f32], ratio: f32) -> f32 {
|
||||
if sorted.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let index = ((sorted.len() - 1) as f32 * ratio.clamp(0.0, 1.0)).round() as usize;
|
||||
sorted[index.min(sorted.len() - 1)]
|
||||
}
|
||||
|
||||
@ -1,15 +1,12 @@
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use whisper_rs::{
|
||||
FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters,
|
||||
};
|
||||
|
||||
use crate::models::{SubtitleSegment, TargetLanguage};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum WhisperTaskKind {
|
||||
Transcribe,
|
||||
Translate,
|
||||
}
|
||||
|
||||
pub struct WhisperEngine {
|
||||
model_path: Option<String>,
|
||||
}
|
||||
@ -19,14 +16,19 @@ impl WhisperEngine {
|
||||
Self { model_path }
|
||||
}
|
||||
|
||||
pub fn infer_segments(
|
||||
pub fn infer_segments<F>(
|
||||
&self,
|
||||
wav_path: &Path,
|
||||
task_id: &str,
|
||||
source_lang: Option<&str>,
|
||||
target_lang: &TargetLanguage,
|
||||
should_translate: bool,
|
||||
speech_ranges: &[(f32, f32)],
|
||||
) -> Result<Vec<SubtitleSegment>> {
|
||||
mut on_progress: F,
|
||||
) -> Result<Vec<SubtitleSegment>>
|
||||
where
|
||||
F: FnMut(f32) -> Result<()>,
|
||||
{
|
||||
let Some(model_path) = &self.model_path else {
|
||||
return Err(anyhow!(
|
||||
"whisper model path is missing. Please provide a local ggml model path for task {}",
|
||||
@ -38,34 +40,264 @@ impl WhisperEngine {
|
||||
return Err(anyhow!("whisper model not found: {model_path}"));
|
||||
}
|
||||
|
||||
let _wav_path = wav_path;
|
||||
let _task_kind = match target_lang {
|
||||
TargetLanguage::En => WhisperTaskKind::Translate,
|
||||
TargetLanguage::Zh => WhisperTaskKind::Transcribe,
|
||||
};
|
||||
let audio = load_audio_f32(wav_path)?;
|
||||
let total_seconds = audio.len() as f32 / 16_000.0;
|
||||
let normalized_ranges = normalize_speech_ranges(speech_ranges, audio.len());
|
||||
let context = WhisperContext::new_with_params(
|
||||
model_path,
|
||||
WhisperContextParameters::default(),
|
||||
)
|
||||
.with_context(|| format!("failed to load whisper model: {model_path}"))?;
|
||||
let mut state = context.create_state().context("failed to create whisper state")?;
|
||||
|
||||
let mut segments = Vec::new();
|
||||
for (index, (start, end)) in speech_ranges.iter().enumerate() {
|
||||
segments.push(SubtitleSegment {
|
||||
id: format!("seg-{:04}", index + 1),
|
||||
task_id: task_id.to_string(),
|
||||
start: *start,
|
||||
end: *end,
|
||||
source_text: format!(
|
||||
"[待接入 whisper-rs] {} -> {},源语言:{},模型:{}",
|
||||
start,
|
||||
end,
|
||||
source_lang.unwrap_or("auto"),
|
||||
model_path
|
||||
),
|
||||
translated_text: if matches!(target_lang, TargetLanguage::En) {
|
||||
Some("Pending native Whisper translation".to_string())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
});
|
||||
eprintln!(
|
||||
"whisper: processing {} speech ranges (normalized from {}), coverage={:.1}%",
|
||||
normalized_ranges.len(),
|
||||
speech_ranges.len(),
|
||||
speech_coverage_ratio(&normalized_ranges, total_seconds) * 100.0
|
||||
);
|
||||
for (range_index, (start, end)) in normalized_ranges.iter().enumerate() {
|
||||
let clip = slice_audio(&audio, *start, *end);
|
||||
if clip.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let progress_base = range_index as f32 / normalized_ranges.len().max(1) as f32;
|
||||
let progress_span = 1.0 / normalized_ranges.len().max(1) as f32;
|
||||
on_progress(progress_base)?;
|
||||
|
||||
let clip_segments = transcribe_clip(
|
||||
&mut state,
|
||||
&clip,
|
||||
range_index,
|
||||
*start,
|
||||
*end,
|
||||
task_id,
|
||||
source_lang,
|
||||
target_lang,
|
||||
should_translate,
|
||||
segments.len(),
|
||||
)?;
|
||||
segments.extend(clip_segments);
|
||||
|
||||
on_progress((progress_base + progress_span).min(1.0))?;
|
||||
}
|
||||
|
||||
let vad_text_len = text_len(&segments);
|
||||
let vad_end = last_end(&segments);
|
||||
let vad_coverage = speech_coverage_ratio(&normalized_ranges, total_seconds);
|
||||
let should_retry_full_audio = !audio.is_empty()
|
||||
&& (segments.is_empty()
|
||||
|| vad_coverage < 0.72
|
||||
|| vad_end + 2.5 < total_seconds
|
||||
|| (total_seconds > 45.0 && vad_text_len < (total_seconds / 2.4) as usize));
|
||||
|
||||
if should_retry_full_audio {
|
||||
eprintln!(
|
||||
"whisper: VAD result looks incomplete, retrying full audio (segments={}, chars={}, end={:.2}s/{:.2}s, coverage={:.1}%)",
|
||||
segments.len(),
|
||||
vad_text_len,
|
||||
vad_end,
|
||||
total_seconds,
|
||||
vad_coverage * 100.0
|
||||
);
|
||||
let full_audio_segments = transcribe_clip(
|
||||
&mut state,
|
||||
&audio,
|
||||
0,
|
||||
0.0,
|
||||
total_seconds,
|
||||
task_id,
|
||||
source_lang,
|
||||
target_lang,
|
||||
should_translate,
|
||||
0,
|
||||
)?;
|
||||
|
||||
if should_prefer_full_audio(&segments, &full_audio_segments, total_seconds) {
|
||||
eprintln!(
|
||||
"whisper: using full-audio transcript (vad_segments={}, full_segments={})",
|
||||
segments.len(),
|
||||
full_audio_segments.len()
|
||||
);
|
||||
segments = full_audio_segments;
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("whisper: total emitted segments={}", segments.len());
|
||||
Ok(segments)
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn transcribe_clip(
|
||||
state: &mut whisper_rs::WhisperState,
|
||||
clip: &[f32],
|
||||
range_index: usize,
|
||||
start: f32,
|
||||
end: f32,
|
||||
task_id: &str,
|
||||
source_lang: Option<&str>,
|
||||
_target_lang: &TargetLanguage,
|
||||
_should_translate: bool,
|
||||
segment_offset: usize,
|
||||
) -> Result<Vec<SubtitleSegment>> {
|
||||
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
|
||||
params.set_n_threads(4);
|
||||
params.set_print_special(false);
|
||||
params.set_print_progress(false);
|
||||
params.set_print_realtime(false);
|
||||
params.set_print_timestamps(false);
|
||||
params.set_token_timestamps(false);
|
||||
params.set_translate(false);
|
||||
if let Some(lang) = source_lang {
|
||||
params.set_language(Some(lang));
|
||||
}
|
||||
|
||||
state.full(params, clip).context("whisper inference failed")?;
|
||||
|
||||
let num_segments = state.full_n_segments();
|
||||
eprintln!(
|
||||
"whisper: range #{}, {:.2}-{:.2}s, samples={}, segments={}",
|
||||
range_index + 1,
|
||||
start,
|
||||
end,
|
||||
clip.len(),
|
||||
num_segments
|
||||
);
|
||||
|
||||
let mut results = Vec::new();
|
||||
for offset in 0..num_segments {
|
||||
let segment = state
|
||||
.get_segment(offset)
|
||||
.ok_or_else(|| anyhow!("failed to access whisper segment {offset}"))?;
|
||||
let text = segment
|
||||
.to_str_lossy()
|
||||
.context("failed to get whisper segment text")?
|
||||
.trim()
|
||||
.to_string();
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
eprintln!("whisper text: {}", text);
|
||||
|
||||
let local_start = segment.start_timestamp() as f32 / 100.0;
|
||||
let local_end = segment.end_timestamp() as f32 / 100.0;
|
||||
|
||||
results.push(SubtitleSegment {
|
||||
id: format!("seg-{:04}", segment_offset + results.len() + 1),
|
||||
task_id: task_id.to_string(),
|
||||
start: start + local_start,
|
||||
end: start + local_end,
|
||||
source_text: text.clone(),
|
||||
translated_text: None,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
fn normalize_speech_ranges(ranges: &[(f32, f32)], total_samples: usize) -> Vec<(f32, f32)> {
|
||||
if ranges.is_empty() {
|
||||
return vec![(0.0, total_samples as f32 / 16_000.0)];
|
||||
}
|
||||
|
||||
let total_seconds = total_samples as f32 / 16_000.0;
|
||||
let mut merged = Vec::new();
|
||||
let mut current = ranges[0];
|
||||
|
||||
for &(start, end) in &ranges[1..] {
|
||||
let current_duration = current.1 - current.0;
|
||||
let gap = start - current.1;
|
||||
if gap <= 1.2 || current_duration < 8.0 {
|
||||
current.1 = end;
|
||||
} else {
|
||||
merged.push(current);
|
||||
current = (start, end);
|
||||
}
|
||||
}
|
||||
merged.push(current);
|
||||
|
||||
merged
|
||||
.into_iter()
|
||||
.map(|(start, end)| ((start - 0.35).max(0.0), (end + 0.35).min(total_seconds)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn load_audio_f32(path: &Path) -> Result<Vec<f32>> {
|
||||
let reader = hound::WavReader::open(path)
|
||||
.with_context(|| format!("failed to open wav file: {}", path.display()))?;
|
||||
let spec = reader.spec();
|
||||
if spec.sample_rate != 16_000 {
|
||||
return Err(anyhow!("whisper expects 16k audio, got {}", spec.sample_rate));
|
||||
}
|
||||
if spec.channels != 1 {
|
||||
return Err(anyhow!("whisper expects mono audio, got {}", spec.channels));
|
||||
}
|
||||
|
||||
let samples = reader
|
||||
.into_samples::<i16>()
|
||||
.map(|sample| sample.map(|value| value as f32 / i16::MAX as f32).map_err(anyhow::Error::from))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(samples)
|
||||
}
|
||||
|
||||
fn slice_audio(audio: &[f32], start: f32, end: f32) -> Vec<f32> {
|
||||
let begin = (start * 16_000.0).floor() as usize;
|
||||
let finish = (end * 16_000.0).ceil() as usize;
|
||||
audio
|
||||
.get(begin.min(audio.len())..finish.min(audio.len()))
|
||||
.unwrap_or(&[])
|
||||
.to_vec()
|
||||
}
|
||||
|
||||
fn speech_coverage_ratio(ranges: &[(f32, f32)], total_seconds: f32) -> f32 {
|
||||
if total_seconds <= 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let covered = ranges
|
||||
.iter()
|
||||
.map(|(start, end)| (end - start).max(0.0))
|
||||
.sum::<f32>();
|
||||
(covered / total_seconds).clamp(0.0, 1.0)
|
||||
}
|
||||
|
||||
fn text_len(segments: &[SubtitleSegment]) -> usize {
|
||||
segments
|
||||
.iter()
|
||||
.map(|segment| segment.source_text.chars().count())
|
||||
.sum()
|
||||
}
|
||||
|
||||
fn last_end(segments: &[SubtitleSegment]) -> f32 {
|
||||
segments
|
||||
.iter()
|
||||
.map(|segment| segment.end)
|
||||
.fold(0.0_f32, f32::max)
|
||||
}
|
||||
|
||||
fn should_prefer_full_audio(
|
||||
vad_segments: &[SubtitleSegment],
|
||||
full_audio_segments: &[SubtitleSegment],
|
||||
total_seconds: f32,
|
||||
) -> bool {
|
||||
if full_audio_segments.is_empty() {
|
||||
return vad_segments.is_empty();
|
||||
}
|
||||
if vad_segments.is_empty() {
|
||||
return true;
|
||||
}
|
||||
|
||||
let vad_text_len = text_len(vad_segments);
|
||||
let full_text_len = text_len(full_audio_segments);
|
||||
let vad_end = last_end(vad_segments);
|
||||
let full_end = last_end(full_audio_segments);
|
||||
|
||||
full_text_len > vad_text_len + vad_text_len / 5
|
||||
|| full_audio_segments.len() > vad_segments.len() + 2
|
||||
|| full_end > vad_end + 2.0
|
||||
|| (total_seconds > 30.0 && full_end + 1.5 >= total_seconds && vad_end + 3.0 < total_seconds)
|
||||
}
|
||||
|
||||
142
src/App.vue
142
src/App.vue
@ -1,15 +1,28 @@
|
||||
<script setup lang="ts">
|
||||
import { computed, onMounted, ref } from 'vue'
|
||||
import { open } from '@tauri-apps/plugin-dialog'
|
||||
import TaskQueue from './components/TaskQueue.vue'
|
||||
import SubtitleEditor from './components/SubtitleEditor.vue'
|
||||
import { useTaskStore } from './stores/tasks'
|
||||
import type { TargetLanguage } from './lib/types'
|
||||
import type { OutputMode, TargetLanguage, TranslationConfig } from './lib/types'
|
||||
|
||||
const DEFAULT_WHISPER_MODEL = '/Users/kura/Documents/work/tauri/CrossSubtitle/src-tauri/model/ggml-small-q5_1.bin'
|
||||
const DEFAULT_VAD_MODEL = '/Users/kura/Documents/work/tauri/CrossSubtitle/src-tauri/model/silero_vad.onnx'
|
||||
|
||||
const taskStore = useTaskStore()
|
||||
const targetLang = ref<TargetLanguage>('zh')
|
||||
const outputMode = ref<OutputMode>('translate')
|
||||
const sourceLang = ref('auto')
|
||||
const whisperModelPath = ref('')
|
||||
const vadModelPath = ref('')
|
||||
const bilingualOutput = ref(true)
|
||||
const whisperModelPath = ref(DEFAULT_WHISPER_MODEL)
|
||||
const vadModelPath = ref(DEFAULT_VAD_MODEL)
|
||||
const translationConfig = ref<TranslationConfig>({
|
||||
apiBase: localStorage.getItem('llm.apiBase') ?? '',
|
||||
apiKey: localStorage.getItem('llm.apiKey') ?? '',
|
||||
model: localStorage.getItem('llm.model') ?? 'gpt-4o-mini',
|
||||
batchSize: Number(localStorage.getItem('llm.batchSize') ?? '12'),
|
||||
contextSize: Number(localStorage.getItem('llm.contextSize') ?? '3'),
|
||||
})
|
||||
const pending = ref(false)
|
||||
const feedback = ref('')
|
||||
|
||||
@ -19,38 +32,83 @@ onMounted(() => {
|
||||
taskStore.initialize()
|
||||
})
|
||||
|
||||
async function handleFiles(event: Event) {
|
||||
const input = event.target as HTMLInputElement
|
||||
const files = Array.from(input.files ?? [])
|
||||
if (files.length === 0) return
|
||||
function persistTranslationConfig() {
|
||||
localStorage.setItem('llm.apiBase', translationConfig.value.apiBase)
|
||||
localStorage.setItem('llm.apiKey', translationConfig.value.apiKey)
|
||||
localStorage.setItem('llm.model', translationConfig.value.model)
|
||||
localStorage.setItem('llm.batchSize', String(translationConfig.value.batchSize))
|
||||
localStorage.setItem('llm.contextSize', String(translationConfig.value.contextSize))
|
||||
}
|
||||
|
||||
async function submitFiles(filePaths: string[]) {
|
||||
pending.value = true
|
||||
feedback.value = ''
|
||||
|
||||
try {
|
||||
for (const file of files) {
|
||||
const filePath = (file as File & { path?: string }).path
|
||||
if (!filePath) {
|
||||
throw new Error('当前运行环境未暴露本地文件路径,请在 Tauri 桌面环境中使用此功能。')
|
||||
}
|
||||
|
||||
for (const filePath of filePaths) {
|
||||
await taskStore.startTask({
|
||||
filePath,
|
||||
sourceLang: sourceLang.value === 'auto' ? null : sourceLang.value,
|
||||
targetLang: targetLang.value,
|
||||
outputMode: outputMode.value,
|
||||
bilingualOutput: bilingualOutput.value,
|
||||
translationConfig: outputMode.value === 'translate' ? translationConfig.value : null,
|
||||
whisperModelPath: whisperModelPath.value || null,
|
||||
vadModelPath: vadModelPath.value || null,
|
||||
})
|
||||
}
|
||||
feedback.value = `已提交 ${files.length} 个任务。`
|
||||
feedback.value = `已提交 ${filePaths.length} 个任务。`
|
||||
} catch (error) {
|
||||
feedback.value = error instanceof Error ? error.message : '任务提交失败'
|
||||
} finally {
|
||||
pending.value = false
|
||||
input.value = ''
|
||||
}
|
||||
}
|
||||
|
||||
async function handlePickFiles() {
|
||||
try {
|
||||
feedback.value = ''
|
||||
persistTranslationConfig()
|
||||
const selected = await open({
|
||||
multiple: true,
|
||||
directory: false,
|
||||
title: '选择音视频文件',
|
||||
filters: [
|
||||
{
|
||||
name: '媒体文件',
|
||||
extensions: ['mp3', 'wav', 'm4a', 'flac', 'mp4', 'mkv', 'mov', 'avi'],
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
if (!selected) return
|
||||
|
||||
const filePaths = Array.isArray(selected) ? selected : [selected]
|
||||
if (filePaths.length === 0) return
|
||||
|
||||
await submitFiles(filePaths)
|
||||
} catch (error) {
|
||||
feedback.value = error instanceof Error ? `打开文件对话框失败:${error.message}` : '打开文件对话框失败'
|
||||
}
|
||||
}
|
||||
|
||||
async function handleFiles(event: Event) {
|
||||
const input = event.target as HTMLInputElement
|
||||
const files = Array.from(input.files ?? [])
|
||||
const filePaths = files
|
||||
.map((file) => (file as File & { path?: string }).path)
|
||||
.filter((path): path is string => Boolean(path))
|
||||
|
||||
if (filePaths.length === 0) {
|
||||
await handlePickFiles()
|
||||
input.value = ''
|
||||
return
|
||||
}
|
||||
|
||||
await submitFiles(filePaths)
|
||||
input.value = ''
|
||||
}
|
||||
|
||||
async function handleExport(format: 'srt' | 'vtt' | 'ass') {
|
||||
if (!selectedTask.value) return
|
||||
const output = await taskStore.exportTask(selectedTask.value.id, format)
|
||||
@ -74,30 +132,76 @@ async function handleExport(format: 'srt' | 'vtt' | 'ass') {
|
||||
<div class="glass rounded-[2rem] p-6 shadow-float">
|
||||
<h2 class="font-display text-2xl font-semibold text-white">新建任务</h2>
|
||||
<div class="mt-5 grid gap-4">
|
||||
<label class="text-sm text-slate-200">
|
||||
输出模式
|
||||
<select v-model="outputMode" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none">
|
||||
<option value="source">原文字幕</option>
|
||||
<option value="translate">翻译字幕</option>
|
||||
</select>
|
||||
</label>
|
||||
<label class="flex items-center gap-3 rounded-2xl border border-white/10 bg-slate-950/40 px-4 py-3 text-sm text-slate-200">
|
||||
<input v-model="bilingualOutput" type="checkbox" class="h-4 w-4" />
|
||||
导出双语字幕
|
||||
</label>
|
||||
<label class="text-sm text-slate-200">
|
||||
目标语言
|
||||
<select v-model="targetLang" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none">
|
||||
<select
|
||||
v-model="targetLang"
|
||||
class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none"
|
||||
:disabled="outputMode === 'source'"
|
||||
>
|
||||
<option value="zh">中文</option>
|
||||
<option value="en">英文</option>
|
||||
</select>
|
||||
</label>
|
||||
<label class="text-sm text-slate-200">
|
||||
源语言
|
||||
<input v-model="sourceLang" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none" placeholder="auto / en / ja / zh" />
|
||||
<input v-model="sourceLang" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none" placeholder="auto / zh / en / ja" />
|
||||
</label>
|
||||
<template v-if="outputMode === 'translate'">
|
||||
<label class="text-sm text-slate-200">
|
||||
LLM API Base
|
||||
<input v-model="translationConfig.apiBase" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none" placeholder="https://api.openai.com/v1" />
|
||||
</label>
|
||||
<label class="text-sm text-slate-200">
|
||||
LLM API Key
|
||||
<input v-model="translationConfig.apiKey" type="password" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none" placeholder="sk-..." />
|
||||
</label>
|
||||
<label class="text-sm text-slate-200">
|
||||
LLM Model
|
||||
<input v-model="translationConfig.model" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none" placeholder="gpt-4o-mini" />
|
||||
</label>
|
||||
<div class="grid gap-4 md:grid-cols-2">
|
||||
<label class="text-sm text-slate-200">
|
||||
批大小
|
||||
<input v-model.number="translationConfig.batchSize" type="number" min="10" max="15" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none" />
|
||||
</label>
|
||||
<label class="text-sm text-slate-200">
|
||||
上下文条数
|
||||
<input v-model.number="translationConfig.contextSize" type="number" min="0" max="5" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none" />
|
||||
</label>
|
||||
</div>
|
||||
</template>
|
||||
<label class="text-sm text-slate-200">
|
||||
Whisper 模型路径
|
||||
<input v-model="whisperModelPath" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none" placeholder="/models/ggml-large-v3-turbo.bin" />
|
||||
<input v-model="whisperModelPath" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none" :placeholder="DEFAULT_WHISPER_MODEL" />
|
||||
</label>
|
||||
<label class="text-sm text-slate-200">
|
||||
VAD 模型路径
|
||||
<input v-model="vadModelPath" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none" placeholder="/models/silero_vad.onnx(可选)" />
|
||||
<input v-model="vadModelPath" class="mt-2 w-full rounded-2xl border border-white/10 bg-slate-950/70 px-4 py-3 text-white outline-none" :placeholder="DEFAULT_VAD_MODEL" />
|
||||
</label>
|
||||
<label class="rounded-[1.75rem] border border-dashed border-cyan-300/30 bg-cyan-300/5 p-5 text-center text-sm text-slate-200 transition hover:border-cyan-300/60 hover:bg-cyan-300/10">
|
||||
<input class="hidden" type="file" multiple @change="handleFiles" />
|
||||
<span v-if="pending">任务提交中...</span>
|
||||
<span v-else>点击选择音视频文件,支持多文件排队</span>
|
||||
</label>
|
||||
<button
|
||||
class="rounded-full bg-cyan-400 px-4 py-3 text-sm font-medium text-slate-950 transition hover:bg-cyan-300"
|
||||
type="button"
|
||||
@click="handlePickFiles"
|
||||
>
|
||||
使用原生文件对话框选择文件
|
||||
</button>
|
||||
<p class="min-h-6 text-sm text-emerald-300">{{ feedback }}</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -8,6 +8,7 @@ export type TaskStatus =
|
||||
| 'failed'
|
||||
|
||||
export type TargetLanguage = 'zh' | 'en'
|
||||
export type OutputMode = 'source' | 'translate'
|
||||
|
||||
export interface SubtitleSegment {
|
||||
id: string
|
||||
@ -24,16 +25,29 @@ export interface SubtitleTask {
|
||||
fileName: string
|
||||
sourceLang?: string | null
|
||||
targetLang: TargetLanguage
|
||||
outputMode: OutputMode
|
||||
bilingualOutput: boolean
|
||||
status: TaskStatus
|
||||
progress: number
|
||||
segments: SubtitleSegment[]
|
||||
error?: string | null
|
||||
}
|
||||
|
||||
export interface TranslationConfig {
|
||||
apiBase: string
|
||||
apiKey: string
|
||||
model: string
|
||||
batchSize: number
|
||||
contextSize: number
|
||||
}
|
||||
|
||||
export interface StartTaskPayload {
|
||||
filePath: string
|
||||
sourceLang?: string | null
|
||||
targetLang: TargetLanguage
|
||||
outputMode: OutputMode
|
||||
bilingualOutput: boolean
|
||||
translationConfig?: TranslationConfig | null
|
||||
whisperModelPath?: string | null
|
||||
vadModelPath?: string | null
|
||||
}
|
||||
|
||||
@ -4,6 +4,7 @@ import vue from '@vitejs/plugin-vue'
|
||||
export default defineConfig({
|
||||
plugins: [vue()],
|
||||
server: {
|
||||
host: '127.0.0.1',
|
||||
port: 1420,
|
||||
strictPort: true,
|
||||
},
|
||||
|
||||
Loading…
Reference in New Issue
Block a user