add: function checkaligned and format the arg

This commit is contained in:
kuoi 2023-03-20 14:30:42 +08:00
parent c080629b63
commit 5a337fd47a
1 changed files with 219 additions and 206 deletions

425
main.cpp
View File

@ -2,14 +2,19 @@
#include <fstream> #include <fstream>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <vector>
#include <algorithm> #include <algorithm>
using namespace std; using namespace std;
int intype=0, outype=0; class Basic_arg{
char *fn; public:
char *otn; int intype=0, outype=0;
char *itn, *otn;
Basic_arg(int intype, int outype, char* itn, char *otn):
intype(intype), outype(outype),
itn(itn), otn(otn)
{};
};
class Sample { class Sample {
public: public:
@ -20,77 +25,72 @@ public:
taxas = new string[ntax]; taxas = new string[ntax];
chars = new string[ntax]; chars = new string[ntax];
}; };
// ~Sample(){
// delete[] taxas;
// delete[] chars;
// }
}; };
Basic_arg procargs (int nargs, char ** arg, char* itn, char* otn);
Sample read_input(char* itn, int intype);
void show_help(int help_num); void show_help(int help_num);
Sample read_input(); Sample readFas(char* itn);
Sample readFas(); Sample readPhy(char* itn);
Sample readPhy(); Sample readTnt(char* itn);
Sample readTnt(); Sample readNex(char* itn);
Sample readNex(); void write_output(class Sample sam, char* otn, int outype);
void write_output(class Sample sam); void writeFas(class Sample sam, char* otn);
void writeFas(class Sample sam); void writePhy(class Sample sam, char* otn);
void writePhy(class Sample sam); void writeTnt(class Sample sam, char* otn);
void writeTnt(class Sample sam); void writeNex(class Sample sam, char* otn);
void writeNex(class Sample sam);
bool isNum(string strnum); bool isNum(string strnum);
bool checkalign(class Sample sam);
string to_lower(string stri); string to_lower(string stri);
string add_space(char x, string str_old); string add_space(char x, string str_old);
string checktype(string str); string checktype(string str);
int countfre(string str, char c); int countfre(string str, char c);
int checkextension(string str);
Sample readPhy(){
Sample readPhy(char* itn){
//read file //read file
ifstream seqfile; ifstream matrixfile;
seqfile.open(fn); matrixfile.open(itn);
//read number of taxa and charcter //read number of taxa and charcter
string sntax, snseq, snall; string sntax, snseq, snall;
cout << "Reading from the file" << endl; getline(matrixfile,snall);
getline(seqfile,snall);
istringstream istr(snall); istringstream istr(snall);
istr >> sntax; istr >> snseq; istr >> sntax; istr >> snseq;
int ntax, nchar; int ntax, nchar;
ntax = stoi (sntax); nchar = stoi (snseq); // string to int ntax = stoi (sntax); nchar = stoi (snseq); // string to int
cout << "ntax=" << ntax << " nseq=" << nchar << endl;
Sample sam(ntax, nchar); Sample sam(ntax, nchar);
//read sequence //read sequence
int lennum; int lennum;
for(lennum=0;lennum<sam.ntax;lennum++){ for(lennum=0;lennum<sam.ntax;lennum++){
getline(seqfile,snall); getline(matrixfile,snall);
istringstream istr(snall); istringstream istr(snall);
istr >> sam.taxas[lennum]; istr >> sam.chars[lennum]; istr >> sam.taxas[lennum]; istr >> sam.chars[lennum];
cout << "tax" << lennum << " is " << sam.taxas[lennum] << "; seq" << lennum << " is " << sam.chars[lennum] << endl;
} }
seqfile.close(); matrixfile.close();
// return to class // return to class
return sam; return sam;
} }
Sample readFas(){ Sample readFas(char* itn){
int ntax, nchar, lnum; int ntax, nchar, lnum;
ifstream seqfile; ifstream matrixfile;
seqfile.open(fn); matrixfile.open(itn);
// check line number and taxa number // check line number and taxa number
ntax = 0; ntax = 0;
string temln; string temln;
for(lnum=0;getline(seqfile,temln);lnum++){ for(lnum=0;getline(matrixfile,temln);lnum++){
if(temln[0]=='>'){ if(temln[0]=='>'){
ntax++; ntax++;
} }
} }
cout << "lnum= " << lnum << ", ntax= " << ntax << endl; matrixfile.clear();
seqfile.clear(); matrixfile.seekg(0);
seqfile.seekg(0);
// check the nchar // check the nchar
string *str_a = new string; string *str_a = new string;
string *str_b = new string; string *str_b = new string;
int r = lnum/ntax; int r = lnum/ntax;
for (int i=0; i<r;i++){ for (int i=0; i<r;i++){
getline(seqfile, *str_a); getline(matrixfile, *str_a);
if(i>0){ if(i>0){
*str_b = *str_b + *str_a; *str_b = *str_b + *str_a;
} }
@ -100,42 +100,41 @@ Sample readFas(){
str_a = nullptr; str_a = nullptr;
delete str_b; delete str_b;
str_b = nullptr; str_b = nullptr;
cout << "ntax= " << ntax << ", nchar= " << nchar << endl; matrixfile.clear();
seqfile.clear(); matrixfile.seekg(0);
seqfile.seekg(0);
// create class // create class
Sample sam(ntax, nchar); Sample sam(ntax, nchar);
// get class // get class
string *str_c = new string; string *str_c = new string;
for (int a=1, b=0;a<=lnum;a++){ for (int a=1, b=0;a<=lnum;a++){
if(a%r==1){ if(a%r==1){
getline(seqfile,sam.taxas[b]); getline(matrixfile,sam.taxas[b]);
sam.taxas[b].erase(0,1); sam.taxas[b].erase(0,1);
} }
if(a%r>1){ if(a%r>1){
getline(seqfile,*str_c); getline(matrixfile,*str_c);
sam.chars[b] = sam.chars[b] + *str_c; sam.chars[b] = sam.chars[b] + *str_c;
} }
if(a%r==0){ if(a%r==0){
getline(seqfile,*str_c); getline(matrixfile,*str_c);
sam.chars[b] = sam.chars[b] + *str_c; sam.chars[b] = sam.chars[b] + *str_c;
b++; b++;
} }
} }
delete str_c; delete str_c;
str_c = nullptr; str_c = nullptr;
seqfile.close(); matrixfile.close();
return sam; return sam;
} }
Sample readTnt(){ Sample readTnt(char* itn){
int ntax, nchar; int ntax, nchar;
ifstream seqfile; ifstream matrixfile;
seqfile.open(fn); matrixfile.open(itn);
// get nchar and ntax // get nchar and ntax
string stri, sntax, snchar; string stri, sntax, snchar;
for(int i=0;i<1;){ for(int i=0;i<1;){
getline(seqfile,stri); getline(matrixfile,stri);
istringstream istr(stri); istringstream istr(stri);
istr >> snchar; istr >> sntax; istr >> snchar; istr >> sntax;
if(isNum(sntax) && isNum(snchar)){ if(isNum(sntax) && isNum(snchar)){
@ -149,12 +148,11 @@ Sample readTnt(){
// get class // get class
int lennum; int lennum;
for(lennum=0;lennum<sam.ntax;lennum++){ for(lennum=0;lennum<sam.ntax;lennum++){
getline(seqfile,stri); getline(matrixfile,stri);
istringstream istr(stri); istringstream istr(stri);
istr >> sam.taxas[lennum]; istr >> sam.chars[lennum]; istr >> sam.taxas[lennum]; istr >> sam.chars[lennum];
cout << "tax" << lennum << " is " << sam.taxas[lennum] << "; seq" << lennum << " is " << sam.chars[lennum] << endl;
} }
seqfile.close(); matrixfile.close();
return sam; return sam;
} }
@ -164,90 +162,67 @@ bool isNum(string strnum) {
return *p == 0; return *p == 0;
} }
Sample readNex(){ Sample readNex(char* itn){
int ntax, nchar; int ntax, nchar;
// open file // open file
ifstream seqfile; ifstream matrixfile;
seqfile.open(fn); matrixfile.open(itn);
// some tem // some tem
string snall, stri, str_a, str_b; string snall, stri, str_a, str_b;
bool found = false; bool found = false, found_ntax = false, found_nchar = false, found_equal = false;
bool found_ntax = false;
bool found_nchar = false;
bool found_equal = false;
char x = '='; char x = '=';
int lnum, e, eulnum; int lnum, eulnum;
e = 0;
// getline line by line // getline line by line
for(lnum=0;getline(seqfile,snall);lnum++){ for(lnum=0;getline(matrixfile,snall);lnum++){
str_a = to_lower(snall); str_a = to_lower(snall);
str_b = add_space(x,str_a); str_b = add_space(x,str_a);
istringstream istr(str_b); istringstream istr(str_b);
// convert to words // convert to words
// e will enter the ntax/nchar function just after statisfy the `=`
while(istr>> stri){ while(istr>> stri){
if(stri=="dimensions"){ if(stri=="dimensions"){
found = true; found = true;
} else if(stri=="ntax"){
found_ntax = true;
} else if(stri=="nchar"){
found_nchar = true;
} else if(stri=="="){
found_equal = true;
} else if (found_ntax&&found_equal){
if(stri.back()==';'){
stri.pop_back();
found = false;
}
ntax = stoi(stri);
found_equal = false;
found_ntax = false;
} else if (found_nchar&&found_equal){
if(stri.back()==';'){
stri.pop_back();
found = false;
}
nchar = stoi(stri);
found_equal = false;
found_nchar = false;
} else if (stri=="matrix"){
eulnum = lnum+1;
} }
if(stri=="ntax"){
found_ntax = true;
}
if(stri=="nchar"){
found_nchar = true;
}
if(stri=="="){
found_equal = true;
}
if (found_ntax&&found_equal){
e++;
if(e>1){
if(stri.back()==';'){
stri.pop_back();
found = false;
}
ntax = stoi(stri);
found_equal = false;
found_ntax = false;
e=0;
}
}
if (found_nchar&&found_equal){
e++;
if(e>1){
if(stri.back()==';'){
stri.pop_back();
found = false;
}
nchar = stoi(stri);
found_equal = false;
found_nchar = false;
e=0;
}
}
if(stri=="matrix"){
// get the position of matrix
eulnum = lnum+1;
}
} }
} }
// go back // go back
seqfile.clear(); matrixfile.clear();
seqfile.seekg(0); matrixfile.seekg(0);
// create class // create class
Sample sam(ntax,nchar); Sample sam(ntax,nchar);
// some temp, z is line number, l is the string arrary number // some temp, z is line number, l is the string arrary number
int z=0; int l=0; int z=0; int l=0;
// read line by line // read line by line
while(getline(seqfile,snall)){ while(getline(matrixfile,snall)){
// convert to word // convert to word
istringstream istr(snall); istringstream istr(snall);
// limit the read line number // limit the read line number
if(z>(eulnum-1)&&z<(eulnum+sam.ntax)){ if(z>(eulnum-1)&&z<(eulnum+sam.ntax)){
istr >> sam.taxas[l]; istr >> sam.taxas[l];
istr >> sam.chars[l]; istr >> sam.chars[l];
cout << "tax" << l << " is " << sam.taxas[l] << "; seq" << l << " is " << sam.chars[l] << "; l=" << l << endl;
l++; l++;
} }
z++; z++;
@ -272,58 +247,43 @@ string to_lower(string stri){
return stri; return stri;
} }
void writeFas(class Sample sam){ void writeFas(class Sample sam, char* otn){
ofstream outputFile(otn); ofstream matrixfile(otn);
if (outputFile.is_open()) {
for(int i=0;i<sam.ntax;i++){ for(int i=0;i<sam.ntax;i++){
outputFile << ">" << sam.taxas[i] << endl; matrixfile << ">" << sam.taxas[i] << endl;
outputFile << sam.chars[i] << endl; matrixfile << sam.chars[i] << endl;
} }
} else { matrixfile.close();
cout << "File can't be written" << endl;
}
outputFile.close();
} }
void writePhy(class Sample sam){ void writePhy(class Sample sam, char* otn){
ofstream outputFile(otn); ofstream matrixfile(otn);
if (outputFile.is_open()) { matrixfile << sam.ntax << " " << sam.nchar << endl;
outputFile << sam.ntax << " " << sam.nchar << endl;
for(int i=0;i<sam.ntax;i++){ for(int i=0;i<sam.ntax;i++){
outputFile << sam.taxas[i] << "\t" << sam.chars[i] << endl; matrixfile << sam.taxas[i] << "\t" << sam.chars[i] << endl;
} }
} else { matrixfile.close();
cout << "File can't be written" << endl;
}
outputFile.close();
} }
void writeNex(class Sample sam){ void writeNex(class Sample sam, char* otn){
ofstream outputFile(otn); ofstream matrixfile(otn);
string datatype; string datatype;
datatype = checktype(sam.chars[0]); datatype = checktype(sam.chars[0]);
if (outputFile.is_open()) { matrixfile << "#NEXUS" << endl;
outputFile << "#NEXUS" << endl; matrixfile << "Begin data;" << endl << "\tDimensions nchar=" << sam.nchar << " ntax=" << sam.ntax << ";" << endl << "\tFormat datatype=" << datatype << " missing=? gap=-;" << endl << "\tMatrix" << endl;
// outputFile << "Begin TAXA;" << endl << "\tDimensions ntax=" << sam.ntax << ";" << endl << "\tTaxLabels";
// for(int i1=0;i1<sam.ntax;i1++){
// outputFile << " " << sam.taxas[i1];
// }
// outputFile << ";" << endl << "End;" << endl << endl;
outputFile << "Begin data;" << endl << "\tDimensions nchar=" << sam.nchar << " ntax=" << sam.ntax << ";" << endl << "\tFormat datatype=" << datatype << " missing=? gap=-;" << endl << "\tMatrix" << endl;
for(int i2=0;i2<sam.ntax;i2++){ for(int i2=0;i2<sam.ntax;i2++){
outputFile << "\t\t" << sam.taxas[i2] << "\t" << sam.chars[i2] << endl; matrixfile << "\t\t" << sam.taxas[i2] << "\t" << sam.chars[i2] << endl;
} }
outputFile << "\t;" << endl << "End;" << endl; matrixfile << "\t;" << endl << "End;" << endl;
} else { matrixfile.close();
cout << "File can't be written" << endl;
}
outputFile.close();
} }
string checktype(string str){ string checktype(string str){
// some var
float a, c, t, g, zero, one, two, dna, standard; float a, c, t, g, zero, one, two, dna, standard;
char ca='a', cc='c', ct='t', cg='g', czero='0', cone='1', ctwo='2'; char ca='a', cc='c', ct='t', cg='g', czero='0', cone='1', ctwo='2';
string datatype; string datatype;
// count fre
a = countfre(str, ca); a = countfre(str, ca);
c = countfre(str, cc); c = countfre(str, cc);
t = countfre(str, ct); t = countfre(str, ct);
@ -331,10 +291,10 @@ string checktype(string str){
zero = countfre(str, czero); zero = countfre(str, czero);
one = countfre(str, cone); one = countfre(str, cone);
two = countfre(str, ctwo); two = countfre(str, ctwo);
// summary dna or morphology
dna = a+c+t+g; dna = a+c+t+g;
standard = zero+one+two; standard = zero+one+two;
// use percentage to test
if((dna/str.length())>0.7){ if((dna/str.length())>0.7){
datatype = "dna"; datatype = "dna";
} else if ((standard/str.length())>0.7){ } else if ((standard/str.length())>0.7){
@ -352,107 +312,160 @@ int countfre(string str, char c){
return num; return num;
} }
void writeTnt(class Sample sam){ void writeTnt(class Sample sam, char* otn){
ofstream outputFile(otn); ofstream matrixfile(otn);
if (outputFile.is_open()) { matrixfile << "xread" << endl << "\' \'" << endl;
outputFile << "xread" << endl << "\' \'" << endl; matrixfile << sam.nchar << " " << sam.ntax << endl;
outputFile << sam.nchar << " " << sam.ntax << endl;
for(int i=0;i<sam.ntax;i++){ for(int i=0;i<sam.ntax;i++){
outputFile << sam.taxas[i] << "\t" << sam.chars[i] << endl; matrixfile << sam.taxas[i] << "\t" << sam.chars[i] << endl;
} }
outputFile << "proc / ;" << endl; matrixfile << "proc / ;" << endl;
} else { matrixfile.close();
cout << "File can't be written" << endl;
}
outputFile.close();
} }
void procargs (int nargs, char ** arg){ //*arg 视为整体,是字符串指针 Basic_arg procargs (int nargs, char ** arg, char* itn, char* otn){
int i; int i, sta = 0, intype=0, outype=0;
char * cp;//cp 字符指针 *p 第一个char string para, inputfile, outputfile;
//string ext1, ext2;
//no arg, show help //no arg, show help
if (nargs==1){ if (nargs==1){
show_help(0); show_help(0);
exit(0); exit(0);
sta=2;
} }
//recognize arg //recognize arg
for (i=1;i<nargs;i++){ for (i=1;i<nargs;i++){
cp = *(arg+i); //to string
//error arg string para (arg[i]);
if (*cp != '-'){ if ( para =="-h"| para=="--help" ) {
cout << "Unknown arguments, please use -h to check" << endl; show_help(1);
sta=2;
}
else if ( para =="-i"| para=="--input"){
i++;
itn=arg[i];
string inputfile (arg[i]);
intype = checkextension(inputfile);
sta++;
}
else if ( para =="-o"| para=="--output"){
i++;
otn=arg[i];
string outputfile (arg[i]);
outype = checkextension(outputfile);
sta++;
} else {
cout << "MiMi\tUnknown arguments, please use -h to check" << endl;
exit(0); exit(0);
} }
//check arg
switch (*++cp){//先++
case 'f':
cp++;
if (*cp=='f') {intype=1; cout << "intype is fasta, " << intype << endl;}
if (*cp=='n') intype=2;
if (*cp=='p') {intype=3; cout << "intype is phylip, " << intype << endl;}
if (*cp=='t') {intype=4;}
break;
case 'h': show_help(1); break;
case 'i': cp++; fn = cp; break;
case 'o': cp++; otn = cp; break;
case 't':
cp++;
if (*cp=='f') outype=1;
if (*cp=='n') outype=2;
if (*cp=='p') {outype=3; cout << "outype is phylip, " << intype << endl;}
if (*cp=='t') outype=4;
break;
default: cout << "Unrecognized agruments " << *(arg+i) << endl; exit(0);
}
} }
if(sta!=2){
cout << "MiMi\tInput and Output can't be empty" << endl;
exit(0);
}
Basic_arg arguvar(intype,outype,itn,otn);
return arguvar;
}
int checkextension(string str){
int loc, type;
string extension;
loc = str.rfind('.');
if(loc){
extension = str.substr(loc+1);
} else {
cout << "MiMi\tPlease sepecifc the extension name" << endl;
exit(0);
}
extension = to_lower(extension);
if(extension=="fas"|extension=="fasta"){
type = 1;
} else if (extension=="nex"|extension=="nexus"){
type = 2;
} else if (extension=="phy"|extension=="phylip"){
type = 3;
} else if (extension=="tnt"|extension=="ss"){
type = 4;
} else {
cout << "MiMi\tUnknown format" << endl;
exit(0);
}
return type;
} }
void show_help (int help_num){ void show_help (int help_num){
if (help_num == 0){ if (help_num == 0){
cout << "SeqConvert GPL, please use -h to see more help" << endl; cout << "MiMi, GPL, Guoyi Zhang, 2023.\nPlease use -h to see more help" << endl;
} else { } else {
cout << "-f from: -ff fasta; -fn nexus; -fp phylip;\n-h help;\n-i input: -i${filename};" << endl; cout << "\n l、 \t. . . .\n(゚、 。 \t|\\/|*|\\/|*\n l ~ヽ \t| ||| ||\n じしf_,)\t| ||| ||\n" << endl;
cout << "Morphology into Molecules into\n" << "GPL\tGuoyi\tZhang,\t2023\n" << endl;
cout << "-h\t--help;\n-i\t--input\t\t${filename};\n-o\t--output\t${filename};\n" << endl;
cout << "Accepted format:\nfas\tfasta;\nnex\tnexus\nphy\tphylip\ntnt\tss" << endl;
} }
} }
Sample read_input (void){ Sample read_input (char* itn, int intype){
int ntax, nchar; int ntax, nchar;
Sample sam(ntax,nchar); Sample sam(ntax,nchar);
if (intype==1) sam = readFas(); ifstream matrixfile;
if (intype==2) sam = readNex(); matrixfile.open(itn);
if (intype==3) sam = readPhy(); if (matrixfile.is_open()){
if (intype==4) sam = readTnt(); if (intype==1) sam = readFas(itn);
if (intype==2) sam = readNex(itn);
if (intype==3) sam = readPhy(itn);
if (intype==4) sam = readTnt(itn);
} else {
cout << "MiMi\tInput file can't be open" << endl;
exit(0);
}
return sam; return sam;
} }
void write_output (class Sample sam){ void write_output (class Sample sam, char* otn, int outype){
if (outype==1) writeFas(sam); ofstream matrixfile(otn);
if (outype==2) writeNex(sam); if (matrixfile.is_open()) {
if (outype==3) writePhy(sam); if (outype==1) writeFas(sam,otn);
if (outype==4) writeTnt(sam); if (outype==2) writeNex(sam,otn);
if (outype==3) writePhy(sam,otn);
if (outype==4) writeTnt(sam,otn);
} else {
cout << "MiMi\tOutput file can't be open" << endl;
exit(0);
}
} }
bool checkalign(class Sample sam){ bool checkalign(class Sample sam){
int a=0, b=0; int a=0, b=0, x=0;
a = sam.chars[0].length(); a = sam.nchar;
bool aligned = true; bool aligned = true;
for(int i=1;i<(sam.ntax-1)&&aligned;i++){ for(int i=0;i<sam.ntax;i++){
b = sam.chars[i].length(); b=sam.chars[i].length();
aligned = (a==b); if(a==b){
x++;
}
}
if (x!=sam.ntax){
aligned = false;
} }
return aligned; return aligned;
} }
int main(int argc, char **argv){ int main(int argc, char **argv){
procargs (argc, argv); char *itn, *otn;
if(intype!=0&&outype!=0){ Basic_arg arguvar = procargs (argc, argv, itn, otn);
Sample sam = read_input(); if(arguvar.intype!=0&&arguvar.outype!=0){
write_output(sam); Sample sam = read_input(arguvar.itn,arguvar.intype);
cout << "MiMi:\tInput\tfinished" << endl;
if(!checkalign(sam)){
cout << "MiMi:\tInput file should be aligned" << endl;
exit(0);
} }
int i; write_output(sam,arguvar.otn,arguvar.outype);
for (i = 0; i < argc; i++) cout << "argument "<< i << " is " << *(argv+i) << endl; cout << "MiMi:\tOutput\tfinished" << endl;
return 1; }
return 0;
} }