2022-01-13 20:12:32 +08:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"io/ioutil"
|
2022-01-14 17:20:42 +08:00
|
|
|
"strings"
|
2022-01-13 20:12:32 +08:00
|
|
|
)
|
2022-01-18 21:44:24 +08:00
|
|
|
// one file
|
2022-01-13 20:12:32 +08:00
|
|
|
type dna struct {
|
2022-01-18 21:45:40 +08:00
|
|
|
name string //file name
|
2022-01-14 17:20:42 +08:00
|
|
|
count int
|
2022-01-18 21:45:40 +08:00
|
|
|
min map[string]string // details
|
2022-01-13 20:12:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// 读取fas文件
|
2022-01-15 19:51:43 +08:00
|
|
|
// read fas format files
|
2022-01-13 20:12:32 +08:00
|
|
|
func fas_parser(file_name string) dna {
|
|
|
|
|
|
|
|
// 读取文件
|
|
|
|
f, err := ioutil.ReadFile("./" + file_name)
|
|
|
|
if err != nil {
|
|
|
|
fmt.Println(err)
|
|
|
|
return dna{"", 0, nil}
|
|
|
|
}
|
|
|
|
|
|
|
|
count := 0
|
2022-01-15 19:51:43 +08:00
|
|
|
//sequence lines amount
|
2022-01-13 20:12:32 +08:00
|
|
|
i := 0 // acgt行计数
|
2022-01-15 19:51:43 +08:00
|
|
|
//samples amount
|
2022-01-13 20:12:32 +08:00
|
|
|
j := -1 // 标题行计数
|
|
|
|
seq := make(map[string]string)
|
2022-01-14 17:20:42 +08:00
|
|
|
indid := ""
|
2022-01-13 20:12:32 +08:00
|
|
|
|
|
|
|
for k, v := range f {
|
|
|
|
switch v {
|
|
|
|
case '>':
|
2022-01-14 17:20:42 +08:00
|
|
|
j = k + 1
|
2022-01-13 20:12:32 +08:00
|
|
|
case '\n':
|
|
|
|
if j != -1 {
|
2022-01-14 17:20:42 +08:00
|
|
|
indid = string(f[j:k])
|
2022-01-13 20:12:32 +08:00
|
|
|
i = k + 1
|
|
|
|
j = -1
|
|
|
|
continue
|
|
|
|
}
|
2022-01-14 17:20:42 +08:00
|
|
|
seq[indid] = seq[indid] + strings.ToLower(string(f[i:k]))
|
2022-01-13 20:12:32 +08:00
|
|
|
i = k + 1
|
|
|
|
}
|
|
|
|
}
|
2022-01-14 17:20:42 +08:00
|
|
|
count = len(seq[indid])
|
2022-01-13 20:12:32 +08:00
|
|
|
// for k1, v1 := range seq {
|
|
|
|
// fmt.Println(k1)
|
|
|
|
// fmt.Println(v1)
|
|
|
|
// }
|
|
|
|
// fmt.Println(count)
|
|
|
|
return dna{file_name, count, seq}
|
|
|
|
}
|