2023-03-20 14:45:56 +08:00
|
|
|
|
#include <algorithm>
|
2023-02-03 01:33:26 +08:00
|
|
|
|
#include <fstream>
|
2023-03-20 14:45:56 +08:00
|
|
|
|
#include <iostream>
|
|
|
|
|
#include <sstream>
|
2023-02-03 01:33:26 +08:00
|
|
|
|
#include <string>
|
2023-03-21 01:19:39 +08:00
|
|
|
|
#include <vector>
|
2023-02-03 01:33:26 +08:00
|
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
class Basic_arg {
|
|
|
|
|
public:
|
|
|
|
|
int intype = 0, outype = 0;
|
|
|
|
|
char *itn, *otn;
|
|
|
|
|
Basic_arg(int intype, int outype, char* itn, char* otn)
|
|
|
|
|
: intype(intype), outype(outype), itn(itn), otn(otn){};
|
2023-03-20 14:30:42 +08:00
|
|
|
|
};
|
2023-03-19 02:17:39 +08:00
|
|
|
|
|
|
|
|
|
class Sample {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
public:
|
|
|
|
|
unsigned ntax, nchar;
|
|
|
|
|
string *taxas, *chars;
|
|
|
|
|
Sample(unsigned ntax, unsigned nchar) : ntax(ntax), nchar(nchar) {
|
|
|
|
|
taxas = new string[ntax];
|
|
|
|
|
chars = new string[ntax];
|
|
|
|
|
};
|
2023-03-19 02:17:39 +08:00
|
|
|
|
};
|
2023-02-03 01:33:26 +08:00
|
|
|
|
|
2023-03-20 18:34:38 +08:00
|
|
|
|
Basic_arg procargs(int nargs, char** arg);
|
2023-03-20 14:30:42 +08:00
|
|
|
|
Sample read_input(char* itn, int intype);
|
2023-02-03 01:33:26 +08:00
|
|
|
|
void show_help(int help_num);
|
2023-03-20 14:30:42 +08:00
|
|
|
|
Sample readFas(char* itn);
|
|
|
|
|
Sample readPhy(char* itn);
|
|
|
|
|
Sample readTnt(char* itn);
|
|
|
|
|
Sample readNex(char* itn);
|
|
|
|
|
void write_output(class Sample sam, char* otn, int outype);
|
|
|
|
|
void writeFas(class Sample sam, char* otn);
|
|
|
|
|
void writePhy(class Sample sam, char* otn);
|
|
|
|
|
void writeTnt(class Sample sam, char* otn);
|
|
|
|
|
void writeNex(class Sample sam, char* otn);
|
2023-03-20 03:27:06 +08:00
|
|
|
|
bool isNum(string strnum);
|
2023-03-20 14:30:42 +08:00
|
|
|
|
bool checkalign(class Sample sam);
|
2023-03-20 03:27:06 +08:00
|
|
|
|
string to_lower(string stri);
|
2023-03-20 04:09:09 +08:00
|
|
|
|
string add_space(char x, string str_old);
|
2023-03-20 21:40:40 +08:00
|
|
|
|
string rep_space(string str_old);
|
|
|
|
|
string del_space(string str_old);
|
2023-03-20 04:09:09 +08:00
|
|
|
|
string checktype(string str);
|
|
|
|
|
int countfre(string str, char c);
|
2023-03-20 14:30:42 +08:00
|
|
|
|
int checkextension(string str);
|
2023-03-20 14:45:56 +08:00
|
|
|
|
|
|
|
|
|
Sample readPhy(char* itn) {
|
|
|
|
|
// read file
|
2023-03-20 14:30:42 +08:00
|
|
|
|
ifstream matrixfile;
|
|
|
|
|
matrixfile.open(itn);
|
2023-03-20 14:45:56 +08:00
|
|
|
|
// read number of taxa and charcter
|
2023-02-03 01:33:26 +08:00
|
|
|
|
string sntax, snseq, snall;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
getline(matrixfile, snall);
|
|
|
|
|
istringstream istr(snall);
|
|
|
|
|
istr >> sntax;
|
|
|
|
|
istr >> snseq;
|
2023-03-19 02:17:39 +08:00
|
|
|
|
int ntax, nchar;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
ntax = stoi(sntax);
|
|
|
|
|
nchar = stoi(snseq); // string to int
|
|
|
|
|
Sample sam(ntax, nchar);
|
|
|
|
|
// read sequence
|
2023-03-20 18:34:38 +08:00
|
|
|
|
unsigned int lennum;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
for (lennum = 0; lennum < sam.ntax; lennum++) {
|
|
|
|
|
getline(matrixfile, snall);
|
2023-02-03 02:13:39 +08:00
|
|
|
|
istringstream istr(snall);
|
2023-03-20 14:45:56 +08:00
|
|
|
|
istr >> sam.taxas[lennum];
|
|
|
|
|
istr >> sam.chars[lennum];
|
2023-02-03 02:13:39 +08:00
|
|
|
|
}
|
2023-03-20 14:30:42 +08:00
|
|
|
|
matrixfile.close();
|
2023-03-19 02:17:39 +08:00
|
|
|
|
// return to class
|
|
|
|
|
return sam;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
Sample readFas(char* itn) {
|
2023-03-19 02:17:39 +08:00
|
|
|
|
int ntax, nchar, lnum;
|
2023-03-20 14:30:42 +08:00
|
|
|
|
ifstream matrixfile;
|
|
|
|
|
matrixfile.open(itn);
|
2023-03-21 01:19:39 +08:00
|
|
|
|
// use vector to read once use serveral times
|
|
|
|
|
vector<string> file_content;
|
2023-03-20 03:27:06 +08:00
|
|
|
|
// check line number and taxa number
|
2023-03-19 02:17:39 +08:00
|
|
|
|
ntax = 0;
|
|
|
|
|
string temln;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
for (lnum = 0; getline(matrixfile, temln); lnum++) {
|
2023-03-21 01:19:39 +08:00
|
|
|
|
file_content.push_back(temln);
|
2023-03-20 14:45:56 +08:00
|
|
|
|
if (temln[0] == '>') {
|
2023-03-19 02:17:39 +08:00
|
|
|
|
ntax++;
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-03-21 01:19:39 +08:00
|
|
|
|
matrixfile.close();
|
2023-03-20 03:27:06 +08:00
|
|
|
|
// check the nchar
|
2023-03-21 01:19:39 +08:00
|
|
|
|
string str_i;
|
|
|
|
|
int r;
|
|
|
|
|
if (file_content.size() != 0) {
|
|
|
|
|
r = file_content.size() / ntax;
|
|
|
|
|
} else {
|
|
|
|
|
cout << "MiMi:\tInput file contains 0 line" << endl;
|
|
|
|
|
exit(0);
|
|
|
|
|
}
|
2023-03-20 14:45:56 +08:00
|
|
|
|
for (int i = 0; i < r; i++) {
|
|
|
|
|
if (i > 0) {
|
2023-03-21 01:19:39 +08:00
|
|
|
|
file_content[i] = del_space(file_content[i]);
|
|
|
|
|
str_i = str_i + file_content[i];
|
2023-03-20 14:45:56 +08:00
|
|
|
|
}
|
2023-03-19 02:17:39 +08:00
|
|
|
|
}
|
2023-03-21 01:19:39 +08:00
|
|
|
|
nchar = str_i.length();
|
2023-03-20 03:27:06 +08:00
|
|
|
|
// create class
|
2023-03-19 02:17:39 +08:00
|
|
|
|
Sample sam(ntax, nchar);
|
2023-03-20 03:27:06 +08:00
|
|
|
|
// get class
|
2023-03-21 01:19:39 +08:00
|
|
|
|
for (int a = 0, b = 0; a < (int)file_content.size(); a++) {
|
|
|
|
|
if ((a + 1) % r == 1) {
|
|
|
|
|
sam.taxas[b] = file_content[a];
|
2023-03-20 14:45:56 +08:00
|
|
|
|
sam.taxas[b].erase(0, 1);
|
2023-03-21 01:19:39 +08:00
|
|
|
|
sam.taxas[b] = rep_space(sam.taxas[b]);
|
2023-03-19 02:17:39 +08:00
|
|
|
|
}
|
2023-03-21 01:19:39 +08:00
|
|
|
|
if ((a + 1) % r > 1) {
|
|
|
|
|
file_content[a] = del_space(file_content[a]);
|
|
|
|
|
sam.chars[b] = sam.chars[b] + file_content[a];
|
2023-03-19 02:17:39 +08:00
|
|
|
|
}
|
2023-03-21 01:19:39 +08:00
|
|
|
|
if ((a + 1) % r == 0) {
|
|
|
|
|
file_content[a] = del_space(file_content[a]);
|
|
|
|
|
sam.chars[b] = sam.chars[b] + file_content[a];
|
2023-03-19 02:17:39 +08:00
|
|
|
|
b++;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
}
|
2023-03-19 02:17:39 +08:00
|
|
|
|
}
|
2023-03-20 03:27:06 +08:00
|
|
|
|
return sam;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
Sample readTnt(char* itn) {
|
2023-03-20 03:27:06 +08:00
|
|
|
|
int ntax, nchar;
|
2023-03-20 14:30:42 +08:00
|
|
|
|
ifstream matrixfile;
|
|
|
|
|
matrixfile.open(itn);
|
2023-03-20 03:27:06 +08:00
|
|
|
|
// get nchar and ntax
|
|
|
|
|
string stri, sntax, snchar;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
for (int i = 0; i < 1;) {
|
|
|
|
|
getline(matrixfile, stri);
|
|
|
|
|
istringstream istr(stri);
|
|
|
|
|
istr >> snchar;
|
|
|
|
|
istr >> sntax;
|
|
|
|
|
if (isNum(sntax) && isNum(snchar)) {
|
|
|
|
|
ntax = stoi(sntax);
|
|
|
|
|
nchar = stoi(snchar);
|
|
|
|
|
i++;
|
|
|
|
|
}
|
2023-03-20 03:27:06 +08:00
|
|
|
|
}
|
|
|
|
|
// create class
|
2023-03-20 14:45:56 +08:00
|
|
|
|
Sample sam(ntax, nchar);
|
2023-03-20 03:27:06 +08:00
|
|
|
|
// get class
|
2023-03-20 18:34:38 +08:00
|
|
|
|
unsigned int lennum;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
for (lennum = 0; lennum < sam.ntax; lennum++) {
|
|
|
|
|
getline(matrixfile, stri);
|
2023-03-20 03:27:06 +08:00
|
|
|
|
istringstream istr(stri);
|
2023-03-20 14:45:56 +08:00
|
|
|
|
istr >> sam.taxas[lennum];
|
|
|
|
|
istr >> sam.chars[lennum];
|
2023-03-19 02:17:39 +08:00
|
|
|
|
}
|
2023-03-20 14:30:42 +08:00
|
|
|
|
matrixfile.close();
|
2023-03-19 02:17:39 +08:00
|
|
|
|
return sam;
|
2023-02-03 02:13:39 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 03:27:06 +08:00
|
|
|
|
bool isNum(string strnum) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
char* p;
|
|
|
|
|
strtol(strnum.c_str(), &p, 10);
|
|
|
|
|
return *p == 0;
|
2023-03-20 03:27:06 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
Sample readNex(char* itn) {
|
2023-03-19 02:17:39 +08:00
|
|
|
|
int ntax, nchar;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
// open file
|
2023-03-20 14:30:42 +08:00
|
|
|
|
ifstream matrixfile;
|
|
|
|
|
matrixfile.open(itn);
|
2023-03-21 01:19:39 +08:00
|
|
|
|
// some var
|
2023-03-20 03:27:06 +08:00
|
|
|
|
string snall, stri, str_a, str_b;
|
2023-03-20 18:34:38 +08:00
|
|
|
|
bool found_ntax = false, found_nchar = false, found_equal = false;
|
2023-03-20 03:27:06 +08:00
|
|
|
|
char x = '=';
|
2023-03-21 01:19:39 +08:00
|
|
|
|
// get line number and read line to vector
|
2023-03-20 18:34:38 +08:00
|
|
|
|
int lnum;
|
|
|
|
|
unsigned int eulnum;
|
2023-03-21 01:19:39 +08:00
|
|
|
|
vector<string> file_content;
|
|
|
|
|
while (getline(matrixfile, snall)) {
|
|
|
|
|
file_content.push_back(snall);
|
|
|
|
|
}
|
|
|
|
|
matrixfile.close();
|
2023-03-20 03:27:06 +08:00
|
|
|
|
// getline line by line
|
2023-03-21 01:19:39 +08:00
|
|
|
|
for (lnum = 0; lnum < (int)file_content.size(); lnum++) {
|
|
|
|
|
str_a = to_lower(file_content[lnum]);
|
2023-03-20 14:45:56 +08:00
|
|
|
|
str_b = add_space(x, str_a);
|
|
|
|
|
istringstream istr(str_b);
|
|
|
|
|
// convert to words
|
|
|
|
|
while (istr >> stri) {
|
2023-03-20 18:34:38 +08:00
|
|
|
|
if (stri == "ntax") {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
found_ntax = true;
|
|
|
|
|
} else if (stri == "nchar") {
|
|
|
|
|
found_nchar = true;
|
|
|
|
|
} else if (stri == "=") {
|
|
|
|
|
found_equal = true;
|
|
|
|
|
} else if (found_ntax && found_equal) {
|
|
|
|
|
if (stri.back() == ';') {
|
|
|
|
|
stri.pop_back();
|
|
|
|
|
}
|
|
|
|
|
ntax = stoi(stri);
|
|
|
|
|
found_equal = false;
|
|
|
|
|
found_ntax = false;
|
|
|
|
|
} else if (found_nchar && found_equal) {
|
|
|
|
|
if (stri.back() == ';') {
|
|
|
|
|
stri.pop_back();
|
|
|
|
|
}
|
|
|
|
|
nchar = stoi(stri);
|
|
|
|
|
found_equal = false;
|
|
|
|
|
found_nchar = false;
|
|
|
|
|
} else if (stri == "matrix") {
|
|
|
|
|
eulnum = lnum + 1;
|
|
|
|
|
}
|
2023-03-20 03:27:06 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// create class
|
2023-03-20 14:45:56 +08:00
|
|
|
|
Sample sam(ntax, nchar);
|
2023-03-21 01:19:39 +08:00
|
|
|
|
// read line by line, limit line number
|
2023-03-20 14:45:56 +08:00
|
|
|
|
int l = 0;
|
2023-03-21 01:19:39 +08:00
|
|
|
|
for (unsigned int z = eulnum; z < (eulnum + sam.ntax); z++) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
// convert to word
|
2023-03-21 01:19:39 +08:00
|
|
|
|
istringstream istr(file_content[z]);
|
|
|
|
|
istr >> sam.taxas[l];
|
|
|
|
|
istr >> sam.chars[l];
|
|
|
|
|
l++;
|
2023-03-20 03:27:06 +08:00
|
|
|
|
}
|
2023-03-19 02:17:39 +08:00
|
|
|
|
return sam;
|
2023-02-03 01:33:26 +08:00
|
|
|
|
}
|
2023-03-21 01:36:27 +08:00
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
string add_space(char x, string str_old) {
|
|
|
|
|
int i;
|
|
|
|
|
string str_new;
|
2023-03-20 18:34:38 +08:00
|
|
|
|
for (i = 0; i < (int)str_old.length(); i++) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
if (str_old[i] != x) {
|
|
|
|
|
str_new = str_new + str_old[i];
|
|
|
|
|
} else {
|
|
|
|
|
str_new = str_new + " " + str_old[i] + " ";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return str_new;
|
2023-03-20 03:27:06 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 21:40:40 +08:00
|
|
|
|
string del_space(string str_old) {
|
|
|
|
|
int i;
|
|
|
|
|
string str_new;
|
|
|
|
|
char x = ' ';
|
|
|
|
|
for (i = 0; i < (int)str_old.length(); i++) {
|
|
|
|
|
if (str_old[i] != x) {
|
|
|
|
|
str_new = str_new + str_old[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return str_new;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string rep_space(string str_old) {
|
|
|
|
|
int i;
|
|
|
|
|
string str_new;
|
|
|
|
|
char x = ' ', c = '_';
|
|
|
|
|
for (i = 0; i < (int)str_old.length(); i++) {
|
|
|
|
|
if (str_old[i] != x) {
|
|
|
|
|
str_new = str_new + str_old[i];
|
|
|
|
|
} else {
|
|
|
|
|
str_new = str_new + c;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return str_new;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
string to_lower(string stri) {
|
|
|
|
|
transform(stri.begin(), stri.end(), stri.begin(), ::tolower);
|
2023-03-20 03:27:06 +08:00
|
|
|
|
return stri;
|
|
|
|
|
}
|
2023-02-03 01:33:26 +08:00
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
void writeFas(class Sample sam, char* otn) {
|
2023-03-20 14:30:42 +08:00
|
|
|
|
ofstream matrixfile(otn);
|
2023-03-20 18:34:38 +08:00
|
|
|
|
for (unsigned int i = 0; i < sam.ntax; i++) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
matrixfile << ">" << sam.taxas[i] << endl;
|
|
|
|
|
matrixfile << sam.chars[i] << endl;
|
|
|
|
|
}
|
2023-03-20 14:30:42 +08:00
|
|
|
|
matrixfile.close();
|
2023-03-19 02:17:39 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
void writePhy(class Sample sam, char* otn) {
|
2023-03-20 14:30:42 +08:00
|
|
|
|
ofstream matrixfile(otn);
|
2023-03-20 14:45:56 +08:00
|
|
|
|
matrixfile << sam.ntax << " " << sam.nchar << endl;
|
2023-03-20 18:34:38 +08:00
|
|
|
|
for (unsigned int i = 0; i < sam.ntax; i++) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
matrixfile << sam.taxas[i] << "\t" << sam.chars[i] << endl;
|
|
|
|
|
}
|
2023-03-20 14:30:42 +08:00
|
|
|
|
matrixfile.close();
|
2023-03-19 02:17:39 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
void writeNex(class Sample sam, char* otn) {
|
2023-03-20 14:30:42 +08:00
|
|
|
|
ofstream matrixfile(otn);
|
2023-03-20 04:09:09 +08:00
|
|
|
|
string datatype;
|
|
|
|
|
datatype = checktype(sam.chars[0]);
|
2023-03-20 19:36:26 +08:00
|
|
|
|
matrixfile << "#NEXUS\nBegin data;\n\tDimensions nchar=" << sam.nchar
|
|
|
|
|
<< " ntax=" << sam.ntax << ";\n\tFormat datatype=" << datatype
|
|
|
|
|
<< " missing=? gap=-;\n\tMatrix" << endl;
|
2023-03-21 01:19:39 +08:00
|
|
|
|
|
2023-03-20 18:34:38 +08:00
|
|
|
|
for (unsigned int i2 = 0; i2 < sam.ntax; i2++) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
matrixfile << "\t\t" << sam.taxas[i2] << "\t" << sam.chars[i2] << endl;
|
|
|
|
|
}
|
2023-03-20 19:36:26 +08:00
|
|
|
|
matrixfile << "\t;\nEnd;" << endl;
|
2023-03-20 14:30:42 +08:00
|
|
|
|
matrixfile.close();
|
2023-03-19 02:17:39 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
string checktype(string str) {
|
2023-03-20 14:30:42 +08:00
|
|
|
|
// some var
|
2023-03-20 04:09:09 +08:00
|
|
|
|
float a, c, t, g, zero, one, two, dna, standard;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
char ca = 'a', cc = 'c', ct = 't', cg = 'g', czero = '0', cone = '1',
|
|
|
|
|
ctwo = '2';
|
2023-03-20 04:09:09 +08:00
|
|
|
|
string datatype;
|
2023-03-20 14:30:42 +08:00
|
|
|
|
// count fre
|
2023-03-20 04:09:09 +08:00
|
|
|
|
a = countfre(str, ca);
|
|
|
|
|
c = countfre(str, cc);
|
|
|
|
|
t = countfre(str, ct);
|
|
|
|
|
g = countfre(str, cg);
|
|
|
|
|
zero = countfre(str, czero);
|
|
|
|
|
one = countfre(str, cone);
|
|
|
|
|
two = countfre(str, ctwo);
|
2023-03-20 14:30:42 +08:00
|
|
|
|
// summary dna or morphology
|
2023-03-20 14:45:56 +08:00
|
|
|
|
dna = a + c + t + g;
|
|
|
|
|
standard = zero + one + two;
|
2023-03-20 14:30:42 +08:00
|
|
|
|
// use percentage to test
|
2023-03-20 14:45:56 +08:00
|
|
|
|
if ((dna / str.length()) > 0.7) {
|
|
|
|
|
datatype = "dna";
|
|
|
|
|
} else if ((standard / str.length()) > 0.7) {
|
|
|
|
|
datatype = "standard";
|
2023-03-20 04:09:09 +08:00
|
|
|
|
} else {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
datatype = "protein";
|
2023-03-20 04:09:09 +08:00
|
|
|
|
}
|
|
|
|
|
return datatype;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
int countfre(string str, char c) {
|
2023-03-20 04:09:09 +08:00
|
|
|
|
int num;
|
|
|
|
|
str = to_lower(str);
|
2023-03-20 14:45:56 +08:00
|
|
|
|
num = count(str.begin(), str.end(), c);
|
|
|
|
|
return num;
|
2023-03-20 04:09:09 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
void writeTnt(class Sample sam, char* otn) {
|
2023-03-20 14:30:42 +08:00
|
|
|
|
ofstream matrixfile(otn);
|
2023-03-20 19:36:26 +08:00
|
|
|
|
matrixfile << "xread\n\' \'\n" << sam.nchar << " " << sam.ntax << endl;
|
2023-03-20 18:34:38 +08:00
|
|
|
|
for (unsigned int i = 0; i < sam.ntax; i++) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
matrixfile << sam.taxas[i] << "\t" << sam.chars[i] << endl;
|
|
|
|
|
}
|
2023-10-11 21:49:54 +08:00
|
|
|
|
matrixfile << "\n;\nproc / ;" << endl;
|
2023-03-20 14:30:42 +08:00
|
|
|
|
matrixfile.close();
|
2023-03-19 02:17:39 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 18:34:38 +08:00
|
|
|
|
Basic_arg procargs(int nargs, char** arg) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
int i, sta = 0, intype = 0, outype = 0;
|
2023-03-20 14:30:42 +08:00
|
|
|
|
string para, inputfile, outputfile;
|
2023-03-20 18:34:38 +08:00
|
|
|
|
char *itn, *otn;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
// no arg, show help
|
|
|
|
|
if (nargs == 1) {
|
2023-02-03 01:33:26 +08:00
|
|
|
|
show_help(0);
|
|
|
|
|
exit(0);
|
2023-03-20 14:45:56 +08:00
|
|
|
|
sta = 2;
|
2023-02-03 01:33:26 +08:00
|
|
|
|
}
|
2023-03-20 14:45:56 +08:00
|
|
|
|
// recognize arg
|
|
|
|
|
for (i = 1; i < nargs; i++) {
|
|
|
|
|
// to string
|
|
|
|
|
string para(arg[i]);
|
2023-03-20 18:34:38 +08:00
|
|
|
|
if ((para == "-h") || (para == "--help")) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
show_help(1);
|
|
|
|
|
sta = 2;
|
2023-03-20 18:34:38 +08:00
|
|
|
|
} else if ((para == "-i") || (para == "--input")) {
|
|
|
|
|
if ((i + 1) < nargs) {
|
|
|
|
|
i++;
|
|
|
|
|
itn = arg[i];
|
|
|
|
|
string inputfile(arg[i]);
|
|
|
|
|
intype = checkextension(inputfile);
|
|
|
|
|
sta++;
|
|
|
|
|
} else {
|
|
|
|
|
cout << "MiMi:\tOInput file name must be defined" << endl;
|
|
|
|
|
}
|
|
|
|
|
} else if (((para == "-o") || (para == "--output"))) {
|
|
|
|
|
if ((i + 1) < nargs) {
|
|
|
|
|
i++;
|
|
|
|
|
otn = arg[i];
|
|
|
|
|
string outputfile(arg[i]);
|
|
|
|
|
outype = checkextension(outputfile);
|
|
|
|
|
sta++;
|
|
|
|
|
} else {
|
|
|
|
|
cout << "MiMi:\tOutput file name must be defined" << endl;
|
|
|
|
|
}
|
2023-03-20 14:30:42 +08:00
|
|
|
|
} else {
|
2023-03-20 18:34:38 +08:00
|
|
|
|
cout << "MiMi:\tUnknown arguments, please use -h to check" << endl;
|
2023-03-20 14:30:42 +08:00
|
|
|
|
exit(0);
|
2023-02-03 01:33:26 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2023-03-20 14:45:56 +08:00
|
|
|
|
if (sta != 2) {
|
2023-03-20 18:34:38 +08:00
|
|
|
|
cout << "MiMi:\tInput and Output can't be empty" << endl;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
exit(0);
|
2023-03-20 14:30:42 +08:00
|
|
|
|
}
|
2023-03-20 14:45:56 +08:00
|
|
|
|
Basic_arg arguvar(intype, outype, itn, otn);
|
|
|
|
|
return arguvar;
|
2023-03-20 14:30:42 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
int checkextension(string str) {
|
|
|
|
|
int loc, type;
|
|
|
|
|
string extension;
|
|
|
|
|
loc = str.rfind('.');
|
|
|
|
|
if (loc) {
|
|
|
|
|
extension = str.substr(loc + 1);
|
|
|
|
|
} else {
|
2023-03-20 18:34:38 +08:00
|
|
|
|
cout << "MiMi:\tPlease sepecifc the extension name" << endl;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
exit(0);
|
|
|
|
|
}
|
|
|
|
|
extension = to_lower(extension);
|
2023-03-20 18:34:38 +08:00
|
|
|
|
if ((extension == "fas") || (extension == "fasta")) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
type = 1;
|
2023-03-20 18:34:38 +08:00
|
|
|
|
} else if ((extension == "nex") || (extension == "nexus")) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
type = 2;
|
2023-03-20 18:34:38 +08:00
|
|
|
|
} else if ((extension == "phy") || (extension == "phylip")) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
type = 3;
|
2023-03-20 18:34:38 +08:00
|
|
|
|
} else if ((extension == "tnt") || (extension == "ss")) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
type = 4;
|
|
|
|
|
} else {
|
2023-03-20 18:34:38 +08:00
|
|
|
|
cout << "MiMi:\tUnknown format" << endl;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
exit(0);
|
|
|
|
|
}
|
|
|
|
|
return type;
|
2023-02-03 01:33:26 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
void show_help(int help_num) {
|
|
|
|
|
if (help_num == 0) {
|
|
|
|
|
cout << "MiMi, GPL, Guoyi Zhang, 2023.\nPlease use -h to see more help"
|
|
|
|
|
<< endl;
|
2023-02-03 01:33:26 +08:00
|
|
|
|
} else {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
cout << "\n /l、 \t. . . .\n(゚、 。 7 \t|\\/|*|\\/|*\n l ~ヽ "
|
2023-03-20 19:36:26 +08:00
|
|
|
|
" \t| ||| ||\n じしf_,)ノ\t| ||| ||\n\nMorphology into "
|
|
|
|
|
"Molecules into\nGPL;\tGuoyi "
|
|
|
|
|
"Zhang;\t2023\n\nArguments:\n-h\t--help;\n-i\t--input\t\t${"
|
|
|
|
|
"filename};\n-o\t--output\t${filename};\n\nAccepted "
|
|
|
|
|
"formats:\nfas\tfasta;\nnex\tnexus;\nphy\tphylip;\ntnt\tss;"
|
2023-03-20 14:45:56 +08:00
|
|
|
|
<< endl;
|
2023-02-03 01:33:26 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
Sample read_input(char* itn, int intype) {
|
2023-03-20 18:34:38 +08:00
|
|
|
|
int ntax = 0, nchar = 0;
|
2023-03-20 14:45:56 +08:00
|
|
|
|
Sample sam(ntax, nchar);
|
2023-03-20 14:30:42 +08:00
|
|
|
|
ifstream matrixfile;
|
|
|
|
|
matrixfile.open(itn);
|
2023-03-20 14:45:56 +08:00
|
|
|
|
if (matrixfile.is_open()) {
|
|
|
|
|
if (intype == 1) sam = readFas(itn);
|
|
|
|
|
if (intype == 2) sam = readNex(itn);
|
|
|
|
|
if (intype == 3) sam = readPhy(itn);
|
|
|
|
|
if (intype == 4) sam = readTnt(itn);
|
2023-03-20 14:30:42 +08:00
|
|
|
|
} else {
|
2023-03-20 18:34:38 +08:00
|
|
|
|
cout << "MiMi:\tInput file can't be open" << endl;
|
2023-03-20 14:30:42 +08:00
|
|
|
|
exit(0);
|
|
|
|
|
}
|
2023-03-19 02:17:39 +08:00
|
|
|
|
return sam;
|
2023-02-03 02:13:39 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
void write_output(class Sample sam, char* otn, int outype) {
|
2023-03-20 14:30:42 +08:00
|
|
|
|
ofstream matrixfile(otn);
|
|
|
|
|
if (matrixfile.is_open()) {
|
2023-03-21 01:19:39 +08:00
|
|
|
|
if (outype == 1) writeFas(sam, otn);
|
|
|
|
|
if (outype == 2) writeNex(sam, otn);
|
|
|
|
|
if (outype == 3) writePhy(sam, otn);
|
|
|
|
|
if (outype == 4) writeTnt(sam, otn);
|
2023-03-20 14:30:42 +08:00
|
|
|
|
} else {
|
2023-03-20 18:34:38 +08:00
|
|
|
|
cout << "MiMi:\tOutput file can't be open" << endl;
|
2023-03-20 14:30:42 +08:00
|
|
|
|
exit(0);
|
|
|
|
|
}
|
2023-02-03 01:33:26 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
bool checkalign(class Sample sam) {
|
2023-03-20 18:34:38 +08:00
|
|
|
|
int a = 0, b = 0;
|
|
|
|
|
unsigned int x = 0;
|
2023-03-20 14:30:42 +08:00
|
|
|
|
a = sam.nchar;
|
2023-03-20 04:09:09 +08:00
|
|
|
|
bool aligned = true;
|
2023-03-20 18:34:38 +08:00
|
|
|
|
for (unsigned int i = 0; i < sam.ntax; i++) {
|
2023-03-20 14:45:56 +08:00
|
|
|
|
b = sam.chars[i].length();
|
|
|
|
|
if (a == b) {
|
|
|
|
|
x++;
|
|
|
|
|
}
|
2023-03-20 14:30:42 +08:00
|
|
|
|
}
|
2023-03-20 14:45:56 +08:00
|
|
|
|
if (x != sam.ntax) {
|
|
|
|
|
aligned = false;
|
2023-03-20 04:09:09 +08:00
|
|
|
|
}
|
|
|
|
|
return aligned;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-20 14:45:56 +08:00
|
|
|
|
int main(int argc, char** argv) {
|
2023-03-20 18:34:38 +08:00
|
|
|
|
Basic_arg arguvar = procargs(argc, argv);
|
2023-03-20 14:45:56 +08:00
|
|
|
|
if (arguvar.intype != 0 && arguvar.outype != 0) {
|
|
|
|
|
Sample sam = read_input(arguvar.itn, arguvar.intype);
|
|
|
|
|
cout << "MiMi:\tInput\tfinished" << endl;
|
|
|
|
|
if (!checkalign(sam)) {
|
|
|
|
|
cout << "MiMi:\tInput file should be aligned" << endl;
|
|
|
|
|
exit(0);
|
|
|
|
|
}
|
|
|
|
|
write_output(sam, arguvar.otn, arguvar.outype);
|
|
|
|
|
cout << "MiMi:\tOutput\tfinished" << endl;
|
2023-03-19 02:17:39 +08:00
|
|
|
|
}
|
2023-03-20 14:30:42 +08:00
|
|
|
|
return 0;
|
2023-02-03 01:33:26 +08:00
|
|
|
|
}
|