commit
f09afd591b
10 changed files with 249 additions and 181 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
a.nex
|
||||
test.go
|
16
a.nex
Normal file
16
a.nex
Normal file
|
@ -0,0 +1,16 @@
|
|||
#NEXUS
|
||||
BEGIN DATA;
|
||||
DIMENSIONS NTAX=4 NCHAR=2031;
|
||||
FORMAT DATATYPE=DNA GAP=- MISSING=?;
|
||||
MATRIX
|
||||
'Bradybaena circulus circulus' -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------aactatatg-gtatcatatatagctatttctgctcaatg-taatataaatagccgcagtactctgactgtgctaaggtagcataatcatttggcttataattgaagtctagtatgaaagaagatatgggagttaactgtttcctaaacgtttacttaatttacttagggggtgaaaatacccccacaaacataatagacgagaagacccttgaaatttttagtata---attttaaatcgtgctttttgttggggcgacaaggtagcatagtaaacctactaagtggttttattagaacaaaattgtatgaataattaaattactcaagggataacagcataatattttaaagtttgtgacctcgatgttggactaggacaatatagtttaaaagactattatttttgctctgttcg---------------------tatatattatttggtgtttggtgtgggatagttggtacaggtttaagattattgattcgaatagagttaggtagttctggtgttatagcagaagagcatttctacaatgttattgtaacagctcatgcttttgtaataattttttttatagttatgccaatcatgattggaggttttggaaattgaatagtaccgttgttgattggggcacccgatatgagctttccacgtataaacaatataaggttttgattgttacccccttcttttcttctattaattagaagtagtctagtagaaggcggtgcagggaccggttgaacagtgtatcctccacttagctcacttgtaggtcataggagagctgccgtagacttagcaatcttttctcttcatttggctgggatatcatcaattttaggtgcaatcaattttattacaactatttttaatatacgagccccaggaataactatggaacgtgttagactgtttgtttgatctattttagtgacagtgtttcttttattact-----------------------------------------------------------------------------tatatattatttggtgtttggtgtgggatagttggtacaggtttaagattattgattcgaatagagttaggtagttctggtgttatagcagaagagcatttctacaatgttattgtaacagctcatgcttttgtaataattttttttatagttatgccaatcatgattggaggttttggaaattgaatagtaccgttgttgattggggcacccgatatgagctttccacgtataaacaatataaggttttgattgttacccccttcttttcttctattaattagaagtagtctagtagaaggcggtgcagggaccggttgaacagtgtatcctccacttagctcacttgtaggtcataggagagctgccgtagacttagcaatcttttctcttcatttggctgggatatcatcaattttaggtgcaatcaattttattacaactatttttaatatacgagccccaggaataactatggaacgtgttagactgtttgtttgatctattttagtgacagtgtttcttttattact-----------------------------------------------------------------------------
|
||||
'Bradybaena phaeogramma phaeogramma' -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------aactatatg-gtatcatatatagctatttctgctcaatg-taatataaatagccgcagtactctgactgtgctaaggtagcataatcatttggcttataattgaagtctagtatgaaagaagatatgggagttaactgtttcctaaacgtttacttaatttacttagggggtgaaaatacccccacaaacataatagacgagaagacccttgaaatttttagtata---attttaaatcgtgctttttgttggggcgacaaggtagcatagtaaacctactaagtggttttattagaacaaaattgtatgaataattaaattactcaagggataacagcataatattttaaagtttgtgacctcgatgttggactaggacaatatagtttaaaagactattatttttgctctgttcg---------------------tatatattatttggtgtttggtgtgggatagttggtacaggtttaagattattgattcgaatagagttaggtagttctggtgttatagcagaagagcatttctacaatgttattgtaacagctcatgcttttgtaataattttttttatagttatgccaatcatgattggaggttttggaaattgaatagtaccgttgttgattggggcacccgatatgagctttccacgtataaacaatataaggttttgattgttacccccttcttttcttctattaattagaagtagtctagtagaaggcggtgcagggaccggttgaacagtgtatcctccacttagctcacttgtaggtcataggagagctgccgtagacttagcaatcttttctcttcatttggctgggatatcatcaattttaggtgcaatcaattttattacaactatttttaatatacgagccccaggaataactatggaacgtgttagactgtttgtttgatctattttagtgacagtgtttcttttattact-----------------------------------------------------------------------------tatatattatttggtgtttggtgtgggatagttggtacaggtttaagattattgattcgaatagagttaggtagttctggtgttatagcagaagagcatttctacaatgttattgtaacagctcatgcttttgtaataattttttttatagttatgccaatcatgattggaggttttggaaattgaatagtaccgttgttgattggggcacccgatatgagctttccacgtataaacaatataaggttttgattgttacccccttcttttcttctattaattagaagtagtctagtagaaggcggtgcagggaccggttgaacagtgtatcctccacttagctcacttgtaggtcataggagagctgccgtagacttagcaatcttttctcttcatttggctgggatatcatcaattttaggtgcaatcaattttattacaactatttttaatatacgagccccaggaataactatggaacgtgttagactgtttgtttgatctattttagtgacagtgtttcttttattact-----------------------------------------------------------------------------
|
||||
'Bradybaena similaris' -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------aactatatg-gtatcatatatagctatttctgctcaatg-taatataaatagccgcagtactctgactgtgctaaggtagcataatcatttggcttataattgaagtctagtatgaaagaagatatgggagttaactgtttcctaaacgtttacttaatttacttagggggtgaaaatacccccacaaacataatagacgagaagacccttgaaatttttagtata---attttaaatcgtgctttttgttggggcgacaaggtagcatagtaaacctactaagtggttttattagaacaaaattgtatgaataattaaattactcaagggataacagcataatattttaaagtttgtgacctcgatgttggactaggacaatatagtttaaaagactattatttttgctctgttcg---------------------????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
|
||||
'Bradybaena virgo virgo' ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????tatatattatttggtgtttggtgtgggatagttggtacaggtttaagattattgattcgaatagagttaggtagttctggtgttatagcagaagagcatttctacaatgttattgtaacagctcatgcttttgtaataattttttttatagttatgccaatcatgattggaggttttggaaattgaatagtaccgttgttgattggggcacccgatatgagctttccacgtataaacaatataaggttttgattgttacccccttcttttcttctattaattagaagtagtctagtagaaggcggtgcagggaccggttgaacagtgtatcctccacttagctcacttgtaggtcataggagagctgccgtagacttagcaatcttttctcttcatttggctgggatatcatcaattttaggtgcaatcaattttattacaactatttttaatatacgagccccaggaataactatggaacgtgttagactgtttgtttgatctattttagtgacagtgtttcttttattact-----------------------------------------------------------------------------tatatattatttggtgtttggtgtgggatagttggtacaggtttaagattattgattcgaatagagttaggtagttctggtgttatagcagaagagcatttctacaatgttattgtaacagctcatgcttttgtaataattttttttatagttatgccaatcatgattggaggttttggaaattgaatagtaccgttgttgattggggcacccgatatgagctttccacgtataaacaatataaggttttgattgttacccccttcttttcttctattaattagaagtagtctagtagaaggcggtgcagggaccggttgaacagtgtatcctccacttagctcacttgtaggtcataggagagctgccgtagacttagcaatcttttctcttcatttggctgggatatcatcaattttaggtgcaatcaattttattacaactatttttaatatacgagccccaggaataactatggaacgtgttagactgtttgtttgatctattttagtgacagtgtttcttttattact-----------------------------------------------------------------------------
|
||||
;
|
||||
END;
|
||||
BEGIN SETS;
|
||||
CHARSET ./data/16S.fas = 1-811;
|
||||
CHARSET ./data/CO1.fas = 812-1421;
|
||||
CHARSET ./data/CO1.mega.fas = 1422-2031;
|
||||
END;
|
49
count.go
Normal file
49
count.go
Normal file
|
@ -0,0 +1,49 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
type charset struct {
|
||||
Name string
|
||||
From int
|
||||
To int
|
||||
}
|
||||
|
||||
// 遍历文件得到基本数据
|
||||
func fas_sum() []dna {
|
||||
sum := []dna{}
|
||||
for i, f := range file_input {
|
||||
sum = append(sum, fas_parser(f))
|
||||
fmt.Println("[ working A ]", i+1, f)
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// 整合若干文件的统计
|
||||
func fas_count(sum_nex []dna) []charset {
|
||||
fas_charset := []charset{}
|
||||
for k, v := range sum_nex {
|
||||
n := fas_name(v.name)
|
||||
f := 1
|
||||
if k != 0 {
|
||||
f = fas_charset[k-1].To + 1
|
||||
}
|
||||
t := f + v.count - 1
|
||||
fmt.Println("[ working B ]", n, f, t)
|
||||
new_charset := charset{n, f, t}
|
||||
fas_charset = append(fas_charset, new_charset)
|
||||
}
|
||||
fmt.Println(fas_charset)
|
||||
return fas_charset
|
||||
}
|
||||
|
||||
func fas_name(old_name string) string {
|
||||
//needed to import string
|
||||
compileRegex := regexp.MustCompile(`(\w+)\.\w+`)
|
||||
matchArr := compileRegex.FindStringSubmatch(old_name)
|
||||
//needed to use the string get from the old string
|
||||
new_name := matchArr[len(matchArr)-1]
|
||||
return new_name
|
||||
}
|
20
flag.go
Normal file
20
flag.go
Normal file
|
@ -0,0 +1,20 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
var (
|
||||
file_output string
|
||||
file_input []string
|
||||
)
|
||||
|
||||
func dna_flag() {
|
||||
flag.StringVar(&file_output, "o", "a.nex", "files name wait to out")
|
||||
flag.Parse()
|
||||
file_input = flag.Args() // []string{"foo", "bar"}
|
||||
fmt.Println("==============")
|
||||
fmt.Println("[input file:]", file_input)
|
||||
fmt.Println("[output file:]", file_output)
|
||||
}
|
33
gocomb.go
Normal file
33
gocomb.go
Normal file
|
@ -0,0 +1,33 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
type tmpl_data struct {
|
||||
Ntax int
|
||||
Nchar int
|
||||
Matrix map[string]string
|
||||
Charset []charset
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
||||
dna_flag()
|
||||
|
||||
sum_nex := fas_sum()
|
||||
|
||||
sum_charset := fas_count(sum_nex)
|
||||
|
||||
sum_dna, ntax, nchar := fas_mix(sum_nex, sum_charset)
|
||||
|
||||
matrix := make(map[string]string, ntax)
|
||||
for k := range sum_dna {
|
||||
matrix[k] = strings.Join(sum_dna[k], "")
|
||||
}
|
||||
|
||||
// 准备发射到模板的数据
|
||||
last_data := tmpl_data{ntax, nchar, matrix, sum_charset}
|
||||
// fmt.Println(last_data)
|
||||
do_impl(last_data)
|
||||
}
|
33
mix.go
Normal file
33
mix.go
Normal file
|
@ -0,0 +1,33 @@
|
|||
package main
|
||||
|
||||
import "strings"
|
||||
|
||||
func fas_mix(sum_nex []dna, sum_charset []charset) (map[string][]string, int, int) {
|
||||
// dna 的整合
|
||||
ntax := 0
|
||||
nchar := sum_charset[len(sum_charset)-1].To
|
||||
sum_dna := make(map[string][]string)
|
||||
for _, v := range sum_nex {
|
||||
for k1 := range v.min {
|
||||
_, has := sum_dna[k1]
|
||||
if !has {
|
||||
sum_dna[k1] = make([]string, len(sum_charset))
|
||||
ntax++
|
||||
}
|
||||
}
|
||||
}
|
||||
for k, v := range sum_nex {
|
||||
for _, v1 := range v.min {
|
||||
for k2 := range sum_dna {
|
||||
if _, ok := v.min[k2]; ok {
|
||||
sum_dna[k2][k] = v1
|
||||
} else {
|
||||
sum_dna[k2][k] = strings.Repeat("?", v.count)
|
||||
// 之前就没写错吗
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// fmt.Println(sum_dna)
|
||||
return sum_dna, ntax, nchar
|
||||
}
|
|
@ -1,20 +1,15 @@
|
|||
package fas_parser
|
||||
|
||||
const Nex_tmpl = `#NEXUS
|
||||
#NEXUS
|
||||
BEGIN DATA;
|
||||
DIMENSIONS NTAX={{ .Ntax }} NCHAR={{ .Nchar }};
|
||||
FORMAT DATATYPE=DNA GAP=- MISSING=?;
|
||||
MATRIX
|
||||
{{- range $k, $v := .Matrix }}
|
||||
'{{ $k }}' {{ $v }}{{ end }}
|
||||
'{{ $k }}' {{ $v }}
|
||||
{{- end }}
|
||||
;
|
||||
END;
|
||||
BEGIN SETS;
|
||||
{{- range $_, $i := .Charset }}
|
||||
CHARSET {{ $i.Name }} = {{ $i.From }}-{{ $i.To }};
|
||||
{{- end }}
|
||||
END;
|
||||
`
|
||||
|
||||
// 最后那个 $i 好像有问题
|
||||
// {{/* $k| printf "%-40s" */}}
|
||||
END;
|
172
parser.go
172
parser.go
|
@ -1,119 +1,53 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
fas_parser "gocomb/src"
|
||||
"os"
|
||||
"strings"
|
||||
"text/template"
|
||||
)
|
||||
|
||||
type dna struct {
|
||||
name string
|
||||
min_dna map[string]string
|
||||
count int
|
||||
}
|
||||
|
||||
type charset struct {
|
||||
Name string
|
||||
From int
|
||||
To int
|
||||
}
|
||||
|
||||
type tmpl_data struct {
|
||||
Ntax int
|
||||
Nchar int
|
||||
Matrix map[string]string
|
||||
Charset []charset
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
||||
// 读取命令行,这里一定要是指针
|
||||
file_export := flag.String("o", "a.nex", "files name wait to out")
|
||||
flag.Parse()
|
||||
file_names := flag.Args() // []string{"foo", "bar"}
|
||||
fmt.Println("[ export here ]", *file_export)
|
||||
|
||||
// 遍历文件得到基本数据
|
||||
sum_nex := make([]dna, 0, 5)
|
||||
for k, v := range file_names {
|
||||
i, j := fas_parser.Fas_parser(v)
|
||||
new_nex := dna{v, i, j}
|
||||
sum_nex = append(sum_nex, new_nex)
|
||||
fmt.Println("[ working A ]", k+1, v)
|
||||
}
|
||||
|
||||
// 整合若干文件的统计
|
||||
sum_charset := []charset{}
|
||||
for k, v := range sum_nex {
|
||||
n := v.name
|
||||
f := 1
|
||||
if k != 0 {
|
||||
f = sum_charset[k-1].To + 1
|
||||
}
|
||||
t := f + v.count - 1
|
||||
fmt.Println("[ working B ]", n, f, t)
|
||||
new_charset := charset{n, f, t}
|
||||
sum_charset = append(sum_charset, new_charset)
|
||||
}
|
||||
// fmt.Println(sum_charset)
|
||||
|
||||
// dna 的整合
|
||||
ntax := 0
|
||||
nchar := sum_charset[len(sum_charset)-1].To
|
||||
sum_dna := make(map[string][]string)
|
||||
for _, v := range sum_nex {
|
||||
for k1 := range v.min_dna {
|
||||
_, has := sum_dna[k1]
|
||||
if !has {
|
||||
sum_dna[k1] = make([]string, len(sum_charset))
|
||||
ntax ++
|
||||
}
|
||||
}
|
||||
}
|
||||
for k, v := range sum_nex {
|
||||
for _, v1 := range v.min_dna {
|
||||
for k2 := range sum_dna {
|
||||
if _, ok := v.min_dna[k2]; ok {
|
||||
sum_dna[k2][k] = v1
|
||||
} else {
|
||||
sum_dna[k2][k] = strings.Repeat("?", v.count)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// fmt.Println(sum_dna)
|
||||
|
||||
matrix := make(map[string]string, ntax)
|
||||
for k := range sum_dna {
|
||||
matrix[k] = strings.Join(sum_dna[k], "")
|
||||
}
|
||||
|
||||
// 准备发射到模板的数据
|
||||
last_data := tmpl_data{ntax, nchar, matrix, sum_charset}
|
||||
// fmt.Println(last_data)
|
||||
|
||||
// 读取模板
|
||||
nex_tmpl, err := template.New("nex").Parse(fas_parser.Nex_tmpl)
|
||||
if err != nil {
|
||||
fmt.Println("[ tmpl err ]", err)
|
||||
return
|
||||
}
|
||||
|
||||
// 覆盖创建要写入的 nex 文件
|
||||
new_file, err := os.OpenFile(*file_export, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0666)
|
||||
if err != nil {
|
||||
fmt.Println("[ create or open file error ]", err)
|
||||
return
|
||||
}
|
||||
defer new_file.Close()
|
||||
|
||||
// 写入 nex 模板
|
||||
err = nex_tmpl.Execute(new_file, last_data)
|
||||
if err != nil {
|
||||
fmt.Println("[ err at tmpl exec ]", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type dna struct {
|
||||
name string
|
||||
count int
|
||||
min map[string]string
|
||||
}
|
||||
|
||||
// 读取fas文件
|
||||
func fas_parser(file_name string) dna {
|
||||
|
||||
// 读取文件
|
||||
f, err := ioutil.ReadFile("./" + file_name)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return dna{"", 0, nil}
|
||||
}
|
||||
|
||||
count := 0
|
||||
i := 0 // acgt行计数
|
||||
j := -1 // 标题行计数
|
||||
seq := make(map[string]string)
|
||||
indid := ""
|
||||
|
||||
for k, v := range f {
|
||||
switch v {
|
||||
case '>':
|
||||
j = k + 1
|
||||
case '\n':
|
||||
if j != -1 {
|
||||
indid = string(f[j:k])
|
||||
i = k + 1
|
||||
j = -1
|
||||
continue
|
||||
}
|
||||
seq[indid] = seq[indid] + strings.ToLower(string(f[i:k]))
|
||||
i = k + 1
|
||||
}
|
||||
}
|
||||
count = len(seq[indid])
|
||||
// for k1, v1 := range seq {
|
||||
// fmt.Println(k1)
|
||||
// fmt.Println(v1)
|
||||
// }
|
||||
// fmt.Println(count)
|
||||
return dna{file_name, count, seq}
|
||||
}
|
||||
|
|
|
@ -1,53 +0,0 @@
|
|||
package fas_parser
|
||||
|
||||
import(
|
||||
"io/ioutil"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
func Fas_parser(file_name string) (map[string]string, int) {
|
||||
f, err := ioutil.ReadFile("./" + file_name)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return nil, 0
|
||||
}
|
||||
// fmt.Println(f)
|
||||
count := 0
|
||||
i := 0 // DNA行计数
|
||||
j := 0 // 非序列行计数
|
||||
seq := make(map[string]string)
|
||||
section := ""
|
||||
// fmt.Println('a', 'c', 'g', 't', '-', '\n', '\r')
|
||||
for k, v := range f {
|
||||
switch v {
|
||||
case 'a', 'c', 'g', 't', '-':
|
||||
if j != 0 {
|
||||
continue
|
||||
}
|
||||
if i == 0 {
|
||||
i = k
|
||||
}
|
||||
case '\n':
|
||||
if i != 0 {
|
||||
seq[section] = seq[section] + string(f[i:k])
|
||||
if len(seq) < 2 && j == 0 {
|
||||
count += k - i
|
||||
}
|
||||
i = 0
|
||||
continue
|
||||
}
|
||||
section = string(f[j:k])
|
||||
j = 0
|
||||
default:
|
||||
if j == 0 {
|
||||
j = k + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
// for k1, v1 := range seq {
|
||||
// fmt.Println(k1)
|
||||
// fmt.Println(v1)
|
||||
// }
|
||||
// fmt.Println(count)
|
||||
return seq, count
|
||||
}
|
39
tmpl.go
Normal file
39
tmpl.go
Normal file
|
@ -0,0 +1,39 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"text/template"
|
||||
)
|
||||
|
||||
func do_impl(last_data tmpl_data) {
|
||||
|
||||
f, err := ioutil.ReadFile("nex.tmpl")
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return
|
||||
}
|
||||
|
||||
// 读取模板
|
||||
nex_tmpl, err := template.New("nex").Parse(string(f))
|
||||
if err != nil {
|
||||
fmt.Println("[ tmpl err ]", err)
|
||||
return
|
||||
}
|
||||
|
||||
// 覆盖创建要写入的 nex 文件
|
||||
new_file, err := os.OpenFile(file_output, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0666)
|
||||
if err != nil {
|
||||
fmt.Println("[ create or open file error ]", err)
|
||||
return
|
||||
}
|
||||
defer new_file.Close()
|
||||
|
||||
// 写入 nex 模板
|
||||
err = nex_tmpl.Execute(new_file, last_data)
|
||||
if err != nil {
|
||||
fmt.Println("[ err at tmpl exec ]", err)
|
||||
return
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue