SeqCombGo/parser.go

120 lines
2.5 KiB
Go
Raw Normal View History

2022-01-01 16:21:41 +08:00
package main
import (
"flag"
"fmt"
fas_parser "gocomb/src"
2022-01-03 12:38:07 +08:00
"os"
"strings"
2022-01-03 12:38:07 +08:00
"text/template"
2022-01-01 16:21:41 +08:00
)
type dna struct {
name string
min_dna map[string]string
count int
}
type charset struct {
2022-01-04 13:55:05 +08:00
Name string
From int
To int
}
type tmpl_data struct {
2022-01-04 13:55:05 +08:00
Ntax int
Nchar int
Matrix map[string]string
Charset []charset
2022-01-03 12:38:07 +08:00
}
2022-01-01 16:21:41 +08:00
func main() {
2022-01-03 12:38:07 +08:00
// 读取命令行,这里一定要是指针
file_export := flag.String("o", "a.nex", "files name wait to out")
2022-01-01 16:21:41 +08:00
flag.Parse()
file_names := flag.Args() // []string{"foo", "bar"}
2022-01-04 13:55:05 +08:00
fmt.Println("[ export here ]", *file_export)
2022-01-03 12:38:07 +08:00
2022-01-04 16:59:37 +08:00
// 遍历文件得到基本数据
sum_nex := make([]dna, 0, 5)
for k, v := range file_names {
i, j := fas_parser.Fas_parser(v)
new_nex := dna{v, i, j}
sum_nex = append(sum_nex, new_nex)
2022-01-04 16:59:37 +08:00
fmt.Println("[ working A ]", k+1, v)
2022-01-03 12:38:07 +08:00
}
// 整合若干文件的统计
sum_charset := []charset{}
for k, v := range sum_nex {
n := v.name
f := 1
if k != 0 {
2022-01-05 16:26:59 +08:00
f = sum_charset[k-1].To + 1
}
2022-01-05 16:26:59 +08:00
t := f + v.count - 1
2022-01-04 16:59:37 +08:00
fmt.Println("[ working B ]", n, f, t)
new_charset := charset{n, f, t}
sum_charset = append(sum_charset, new_charset)
}
2022-01-04 16:59:37 +08:00
// fmt.Println(sum_charset)
2022-01-03 12:38:07 +08:00
// dna 的整合
2022-01-04 16:59:37 +08:00
ntax := 0
2022-01-04 13:55:05 +08:00
nchar := sum_charset[len(sum_charset)-1].To
2022-01-04 16:59:37 +08:00
sum_dna := make(map[string][]string)
2022-01-05 16:36:26 +08:00
for _, v := range sum_nex {
2022-01-04 16:59:37 +08:00
for k1 := range v.min_dna {
_, has := sum_dna[k1]
if !has {
sum_dna[k1] = make([]string, len(sum_charset))
2022-01-05 16:36:26 +08:00
ntax ++
2022-01-04 16:59:37 +08:00
}
2022-01-03 12:38:07 +08:00
}
2022-01-04 16:59:37 +08:00
}
for k, v := range sum_nex {
for _, v1 := range v.min_dna {
for k2 := range sum_dna {
if _, ok := v.min_dna[k2]; ok {
sum_dna[k2][k] = v1
} else {
sum_dna[k2][k] = strings.Repeat("?", v.count)
}
}
}
}
2022-01-04 16:59:37 +08:00
// fmt.Println(sum_dna)
matrix := make(map[string]string, ntax)
for k := range sum_dna {
matrix[k] = strings.Join(sum_dna[k], "")
}
// 准备发射到模板的数据
last_data := tmpl_data{ntax, nchar, matrix, sum_charset}
// fmt.Println(last_data)
2022-01-03 12:38:07 +08:00
// 读取模板
nex_tmpl, err := template.New("nex").Parse(fas_parser.Nex_tmpl)
if err != nil {
fmt.Println("[ tmpl err ]", err)
return
2022-01-01 16:21:41 +08:00
}
// 覆盖创建要写入的 nex 文件
2022-01-04 13:55:05 +08:00
new_file, err := os.OpenFile(*file_export, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0666)
2022-01-01 16:21:41 +08:00
if err != nil {
fmt.Println("[ create or open file error ]", err)
return
2022-01-01 16:21:41 +08:00
}
defer new_file.Close()
2022-01-01 16:21:41 +08:00
// 写入 nex 模板
2022-01-04 13:55:05 +08:00
err = nex_tmpl.Execute(new_file, last_data)
2022-01-04 16:59:37 +08:00
if err != nil {
fmt.Println("[ err at tmpl exec ]", err)
return
2022-01-01 16:21:41 +08:00
}
}