SquirrelConfig/others/script/rime/others.go

148 lines
3.0 KiB
Go
Raw Normal View History

2024-03-20 03:18:06 +00:00
package rime
import (
"bufio"
"fmt"
mapset "github.com/deckarep/golang-set/v2"
"log"
"os"
"strconv"
"strings"
"unicode/utf8"
)
// 一些临时用的函数
func Temp() {
// GeneratePinyinTest("你的行动力")
// GeneratePinyinTest("都挺长的")
// GeneratePinyinTest("血条长")
// findP(BasePath, "血")
// Pinyin(ExtPath)
// AddWeight(ExtPath, 100)
}
// 列出字表中多音字的状况:是否参与自动注音
func polyphone() {
// open file
file, err := os.Open(HanziPath)
if err != nil {
log.Fatalln(err)
}
defer file.Close()
// 将所有读音读入 m
type py struct {
pinyin string
weight int
isAuto bool // 是否参与自动注音
}
m := make(map[string][]py)
sc := bufio.NewScanner(file)
isMark := false
for sc.Scan() {
line := sc.Text()
if !isMark {
if line == "..." {
isMark = true
}
continue
}
if line == "" || strings.HasPrefix(line, "#") {
continue
}
parts := strings.Split(line, "\t")
if len(parts) != 3 {
log.Fatalln("len(parts) != 3", line)
}
hanzi, pinyin := parts[0], parts[1]
weight, _ := strconv.Atoi(parts[2])
m[hanzi] = append(m[hanzi], py{pinyin: pinyin, weight: weight})
}
// 判断是否参与注音
for hanzi, pys := range m {
if len(pys) == 1 {
continue
}
// 找到最大的权重
max := 0
for _, py := range pys {
if py.weight > max {
max = py.weight
}
}
// 计算其他权重相较于 max 的比值,是否大于 0.05
for i, py := range pys {
if py.weight == max {
m[hanzi][i].isAuto = true
} else if float64(py.weight)/float64(max) > 0.05 {
m[hanzi][i].isAuto = true
}
}
// 输出
fmt.Println(hanzi)
for _, py := range pys {
fmt.Println(py.pinyin, py.weight, py.isAuto)
}
}
}
// 在词库中找到此行是否包含同义多音字如果包含且长度大于等于3从文件中删除这行并将所有删除的行写入到 1.txt 中
func findP(dictPath string, ch string) {
// open file
file, err := os.OpenFile(dictPath, os.O_RDWR, 0666)
if err != nil {
log.Fatalln(err)
}
defer file.Close()
outFile, err := os.Create("1.txt")
if err != nil {
log.Fatalln(err)
}
defer outFile.Close()
lines := make([]string, 0)
isMark := false
sc := bufio.NewScanner(file)
set := mapset.NewSet[string]() // 去重用的
for sc.Scan() {
line := sc.Text()
if !isMark {
lines = append(lines, line)
if line == mark {
isMark = true
}
continue
}
if line == "" || strings.HasPrefix(line, "#") {
lines = append(lines, line)
continue
}
parts := strings.Split(line, "\t")
if len(parts) != 3 {
log.Fatalln("len(parts) != 3", line)
}
text := parts[0]
if strings.Contains(text, ch) && utf8.RuneCountInString(text) >= 3 && !set.Contains(text) {
outFile.WriteString(line + "\n")
} else {
set.Add(text)
lines = append(lines, line)
}
}
// 从 lines 重新写入 file
file.Truncate(0)
file.Seek(0, 0)
for _, line := range lines {
file.WriteString(line + "\n")
}
fmt.Println("done")
}