SquirrelConfig/others/script/rime/rime.go

145 lines
3.3 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

package rime
import (
"bufio"
"fmt"
mapset "github.com/deckarep/golang-set/v2"
"log"
"os"
"os/user"
"path"
"path/filepath"
"strconv"
"strings"
"time"
)
// 一个词的组成部分
type lemma struct {
text string // 汉字
code string // 编码
weight int // 权重
}
var (
mark = "# +_+" // 词库中的标记符号,表示从这行开始进行检查或排序
RimeDir = getRimeDir() // Rime 配置目录
EmojiMapPath = filepath.Join(RimeDir, "others/emoji-map.txt")
EmojiPath = filepath.Join(RimeDir, "opencc/emoji.txt")
HanziPath = filepath.Join(RimeDir, "cn_dicts/8105.dict.yaml")
BasePath = filepath.Join(RimeDir, "cn_dicts/base.dict.yaml")
ExtPath = filepath.Join(RimeDir, "cn_dicts/ext.dict.yaml")
TencentPath = filepath.Join(RimeDir, "cn_dicts/tencent.dict.yaml")
HanziSet = readToSet(HanziPath)
BaseSet = readToSet(BasePath)
ExtSet = readToSet(ExtPath)
TencentSet = readToSet(TencentPath)
TXT = filepath.Join(RimeDir, "others/script/rime/需要注音.txt")
TXT = filepath.Join(RimeDir, "others/script/rime/错别字.txt")
TXT = filepath.Join(RimeDir, "others/script/rime/汉字拼音映射.txt")
)
// 获取 macOS Rime 配置目录
func getRimeDir() string {
u, err := user.Current()
if err != nil {
log.Fatalln(err)
}
return filepath.Join(u.HomeDir, "Library/Rime")
}
// 将所有词库读入 set供检查或排序使用
func readToSet(dictPath string) mapset.Set[string] {
set := mapset.NewSet[string]()
file, err := os.Open(dictPath)
if err != nil {
log.Fatalln(err)
}
defer file.Close()
sc := bufio.NewScanner(file)
isMark := false
for sc.Scan() {
line := sc.Text()
if !isMark {
if strings.HasPrefix(line, mark) {
isMark = true
}
continue
}
parts := strings.Split(line, "\t")
set.Add(parts[0])
}
return set
}
// 打印耗时时间
func printlnTimeCost(content string, start time.Time) {
// fmt.Printf("%s\t%.2fs\n", content, time.Since(start).Seconds())
printfTimeCost(content, start)
fmt.Println()
}
// 打印耗时时间
func printfTimeCost(content string, start time.Time) {
fmt.Printf("%s\t%.2fs", content, time.Since(start).Seconds())
}
// slice 是否包含 item
func contains(arr []string, item string) bool {
for _, x := range arr {
if item == x {
return true
}
}
return false
}
// AddWeight 为 ext、tencent 没权重的词条加上权重,有权重的改为 weight
func AddWeight(dictPath string, weight int) {
// 控制台输出
printlnTimeCost("加权重\t"+path.Base(dictPath), time.Now())
// 读取到 lines 数组
file, err := os.ReadFile(dictPath)
if err != nil {
log.Fatal(err)
}
lines := strings.Split(string(file), "\n")
isMark := false
for i, line := range lines {
if !isMark {
if strings.HasPrefix(line, mark) {
isMark = true
}
continue
}
// 过滤空行
if line == "" {
continue
}
// 修改权重为传入的 weight没有就加上
parts := strings.Split(line, "\t")
_, err := strconv.Atoi(parts[len(parts)-1])
if err != nil {
lines[i] = line + "\t" + strconv.Itoa(weight)
} else {
lines[i] = strings.Join(parts[:len(parts)-1], "\t") + "\t" + strconv.Itoa(weight)
}
}
// 写入
resultString := strings.Join(lines, "\n")
err = os.WriteFile(dictPath, []byte(resultString), 0644)
if err != nil {
log.Fatal(err)
}
}