From df8400617956bd0c62964dfddcba5be86df5df6a Mon Sep 17 00:00:00 2001 From: Guoyi Zhang Date: Fri, 5 Jul 2024 11:53:06 +1000 Subject: [PATCH] add: more comments --- RGBEPP.sh | 10 ++++++++++ sortdiamond.cpp | 3 +++ 2 files changed, 13 insertions(+) diff --git a/RGBEPP.sh b/RGBEPP.sh index 4b2f408..6c5e87e 100644 --- a/RGBEPP.sh +++ b/RGBEPP.sh @@ -303,6 +303,7 @@ if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "pre" ]; then check_var "ARG_L" check_path "PathSortdiamond" + # extract fasta file from diamond balst style output for (( i=0; i<$length_fn; i++ )); do $PathSortdiamond $DirMap/${full_names[$i]}.m8 $DirPre/${full_names[$i]}.fasta done @@ -316,9 +317,12 @@ if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "split" ]; then check_var "ARG_L" check_path "PathSplitfasta" + # split fasta into different folders based on sequence names for (( i=0; i<$length_fn; i++ )); do $PathSplitfsata ${full_names[$i]}.fasta done + + # mv to destdir find . -mindepth 1 -maxdepth 1 -type d -exec mv {} ../$DirSplit \; cd - fi @@ -330,13 +334,18 @@ if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "merge" ]; then mkdir -p $DirMerge cd $DirSplit + + # merge different taxa sequences in same gene to one fasta for (( i=0; i<$length_gn; i++ )) do cd ${genes[$i]} cat * > ../${genes[$i]}.fasta cd .. done + + # mv to destdir mv *.fasta ../$DirMerge + cd - fi @@ -354,6 +363,7 @@ if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "align" ]; then mkdir -p $DirAlign/AA && mkdir -p $DirAlign/NT cd $DirMerge + # align the sequence based on codon parallel -j $ARG_T java -jar $PathMacse -prog alignSequences -seq {}.fasta -out_AA ../$DirAlign/AA/{}.fasta -out_NT ../$DirAlign/NT/{}.fasta ::: "${genes[@]}" cd - diff --git a/sortdiamond.cpp b/sortdiamond.cpp index 3141dac..75cf8b9 100644 --- a/sortdiamond.cpp +++ b/sortdiamond.cpp @@ -80,7 +80,9 @@ void readInputFile(const string &filename, while (iss >> fields[i] && i < 20) { i++; } + // subject seq id string key = fields[1]; + // bit score double value = stod(fields[11]); // Check if the key already exists in the map if (max_map.find(key) != max_map.end()) { @@ -106,6 +108,7 @@ void processMap(const map> &max_map, while (iss >> fields[i] && i < 20) { i++; } + // check if qstart is larger than qend if (stoi(fields[6]) > stoi(fields[7])) { fields[17] = revcomp(fields[17]); }