SeqCombGo/parser.go

57 lines
933 B
Go
Raw Permalink Normal View History

2022-01-13 20:12:32 +08:00
package main
import (
"fmt"
"io/ioutil"
2022-01-14 17:20:42 +08:00
"strings"
2022-01-13 20:12:32 +08:00
)
2022-01-18 21:44:24 +08:00
// one file
2022-01-13 20:12:32 +08:00
type dna struct {
2022-01-18 21:45:40 +08:00
name string //file name
2022-01-14 17:20:42 +08:00
count int
2022-01-18 21:45:40 +08:00
min map[string]string // details
2022-01-13 20:12:32 +08:00
}
// 读取fas文件
2022-01-15 19:51:43 +08:00
// read fas format files
2022-01-13 20:12:32 +08:00
func fas_parser(file_name string) dna {
// 读取文件
f, err := ioutil.ReadFile("./" + file_name)
if err != nil {
fmt.Println(err)
return dna{"", 0, nil}
}
count := 0
2022-01-15 19:51:43 +08:00
//sequence lines amount
2022-01-13 20:12:32 +08:00
i := 0 // acgt行计数
2022-01-15 19:51:43 +08:00
//samples amount
2022-01-13 20:12:32 +08:00
j := -1 // 标题行计数
seq := make(map[string]string)
2022-01-14 17:20:42 +08:00
indid := ""
2022-01-13 20:12:32 +08:00
for k, v := range f {
switch v {
case '>':
2022-01-14 17:20:42 +08:00
j = k + 1
2022-01-13 20:12:32 +08:00
case '\n':
if j != -1 {
2022-01-14 17:20:42 +08:00
indid = string(f[j:k])
2022-01-13 20:12:32 +08:00
i = k + 1
j = -1
continue
}
2022-01-14 17:20:42 +08:00
seq[indid] = seq[indid] + strings.ToLower(string(f[i:k]))
2022-01-13 20:12:32 +08:00
i = k + 1
}
}
2022-01-14 17:20:42 +08:00
count = len(seq[indid])
2022-01-13 20:12:32 +08:00
// for k1, v1 := range seq {
// fmt.Println(k1)
// fmt.Println(v1)
// }
// fmt.Println(count)
return dna{file_name, count, seq}
}