fix: fasta space issue; polish: readfas, readnex

This commit is contained in:
kuoi 2023-03-21 01:19:39 +08:00
parent c32f318f85
commit d81bfbd8d9

View file

@ -3,6 +3,7 @@
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <vector>
using namespace std; using namespace std;
@ -77,48 +78,53 @@ Sample readFas(char* itn) {
int ntax, nchar, lnum; int ntax, nchar, lnum;
ifstream matrixfile; ifstream matrixfile;
matrixfile.open(itn); matrixfile.open(itn);
// use vector to read once use serveral times
vector<string> file_content;
// check line number and taxa number // check line number and taxa number
ntax = 0; ntax = 0;
string temln; string temln;
for (lnum = 0; getline(matrixfile, temln); lnum++) { for (lnum = 0; getline(matrixfile, temln); lnum++) {
file_content.push_back(temln);
if (temln[0] == '>') { if (temln[0] == '>') {
ntax++; ntax++;
} }
} }
matrixfile.clear(); matrixfile.close();
matrixfile.seekg(0);
// check the nchar // check the nchar
string str_a, str_b; string str_i;
int r = lnum / ntax; int r;
if (file_content.size() != 0) {
r = file_content.size() / ntax;
} else {
cout << "MiMi:\tInput file contains 0 line" << endl;
exit(0);
}
for (int i = 0; i < r; i++) { for (int i = 0; i < r; i++) {
getline(matrixfile, str_a);
if (i > 0) { if (i > 0) {
str_b = str_b + str_a; file_content[i] = del_space(file_content[i]);
str_i = str_i + file_content[i];
} }
} }
nchar = str_b.length(); nchar = str_i.length();
matrixfile.clear();
matrixfile.seekg(0);
// create class // create class
Sample sam(ntax, nchar); Sample sam(ntax, nchar);
// get class // get class
string str_c; for (int a = 0, b = 0; a < (int)file_content.size(); a++) {
for (int a = 1, b = 0; a <= lnum; a++) { if ((a + 1) % r == 1) {
if (a % r == 1) { sam.taxas[b] = file_content[a];
getline(matrixfile, sam.taxas[b]);
sam.taxas[b].erase(0, 1); sam.taxas[b].erase(0, 1);
sam.taxas[b] = rep_space(sam.taxas[b]);
} }
if (a % r > 1) { if ((a + 1) % r > 1) {
getline(matrixfile, str_c); file_content[a] = del_space(file_content[a]);
sam.chars[b] = sam.chars[b] + str_c; sam.chars[b] = sam.chars[b] + file_content[a];
} }
if (a % r == 0) { if ((a + 1) % r == 0) {
getline(matrixfile, str_c); file_content[a] = del_space(file_content[a]);
sam.chars[b] = sam.chars[b] + str_c; sam.chars[b] = sam.chars[b] + file_content[a];
b++; b++;
} }
} }
matrixfile.close();
return sam; return sam;
} }
@ -164,15 +170,21 @@ Sample readNex(char* itn) {
// open file // open file
ifstream matrixfile; ifstream matrixfile;
matrixfile.open(itn); matrixfile.open(itn);
// some tem // some var
string snall, stri, str_a, str_b; string snall, stri, str_a, str_b;
bool found_ntax = false, found_nchar = false, found_equal = false; bool found_ntax = false, found_nchar = false, found_equal = false;
char x = '='; char x = '=';
// get line number and read line to vector
int lnum; int lnum;
unsigned int eulnum; unsigned int eulnum;
vector<string> file_content;
while (getline(matrixfile, snall)) {
file_content.push_back(snall);
}
matrixfile.close();
// getline line by line // getline line by line
for (lnum = 0; getline(matrixfile, snall); lnum++) { for (lnum = 0; lnum < (int)file_content.size(); lnum++) {
str_a = to_lower(snall); str_a = to_lower(file_content[lnum]);
str_b = add_space(x, str_a); str_b = add_space(x, str_a);
istringstream istr(str_b); istringstream istr(str_b);
// convert to words // convert to words
@ -202,26 +214,17 @@ Sample readNex(char* itn) {
} }
} }
} }
// go back
matrixfile.clear();
matrixfile.seekg(0);
// create class // create class
Sample sam(ntax, nchar); Sample sam(ntax, nchar);
// some temp, z is line number, l is the string arrary number // read line by line, limit line number
unsigned int z = 0;
int l = 0; int l = 0;
// read line by line for (unsigned int z = eulnum; z < (eulnum + sam.ntax); z++) {
while (getline(matrixfile, snall)) {
// convert to word // convert to word
istringstream istr(snall); istringstream istr(file_content[z]);
// limit the read line number
if ((z > (eulnum - 1)) && (z < (eulnum + sam.ntax))) {
istr >> sam.taxas[l]; istr >> sam.taxas[l];
istr >> sam.chars[l]; istr >> sam.chars[l];
l++; l++;
} }
z++;
}
return sam; return sam;
} }
string add_space(char x, string str_old) { string add_space(char x, string str_old) {
@ -293,6 +296,7 @@ void writeNex(class Sample sam, char* otn) {
matrixfile << "#NEXUS\nBegin data;\n\tDimensions nchar=" << sam.nchar matrixfile << "#NEXUS\nBegin data;\n\tDimensions nchar=" << sam.nchar
<< " ntax=" << sam.ntax << ";\n\tFormat datatype=" << datatype << " ntax=" << sam.ntax << ";\n\tFormat datatype=" << datatype
<< " missing=? gap=-;\n\tMatrix" << endl; << " missing=? gap=-;\n\tMatrix" << endl;
for (unsigned int i2 = 0; i2 < sam.ntax; i2++) { for (unsigned int i2 = 0; i2 < sam.ntax; i2++) {
matrixfile << "\t\t" << sam.taxas[i2] << "\t" << sam.chars[i2] << endl; matrixfile << "\t\t" << sam.taxas[i2] << "\t" << sam.chars[i2] << endl;
} }
@ -456,18 +460,14 @@ Sample read_input(char* itn, int intype) {
void write_output(class Sample sam, char* otn, int outype) { void write_output(class Sample sam, char* otn, int outype) {
ofstream matrixfile(otn); ofstream matrixfile(otn);
if (matrixfile.is_open()) { if (matrixfile.is_open()) {
for (unsigned int i = 0; i < sam.ntax; i++) {
sam.chars[i] = del_space(sam.chars[i]);
sam.taxas[i] = rep_space(sam.taxas[i]);
}
} else {
cout << "MiMi:\tOutput file can't be open" << endl;
exit(0);
}
if (outype == 1) writeFas(sam, otn); if (outype == 1) writeFas(sam, otn);
if (outype == 2) writeNex(sam, otn); if (outype == 2) writeNex(sam, otn);
if (outype == 3) writePhy(sam, otn); if (outype == 3) writePhy(sam, otn);
if (outype == 4) writeTnt(sam, otn); if (outype == 4) writeTnt(sam, otn);
} else {
cout << "MiMi:\tOutput file can't be open" << endl;
exit(0);
}
} }
bool checkalign(class Sample sam) { bool checkalign(class Sample sam) {