polish: use struct and map
This commit is contained in:
parent
69bf191a83
commit
9e7602de4b
1 changed files with 76 additions and 55 deletions
131
sortdiamond.cpp
131
sortdiamond.cpp
|
@ -10,6 +10,15 @@ using namespace std;
|
||||||
const int n_intarr = 5;
|
const int n_intarr = 5;
|
||||||
bool use_bitscore = true;
|
bool use_bitscore = true;
|
||||||
|
|
||||||
|
// Struct to store the data fields
|
||||||
|
struct SeqData {
|
||||||
|
string sseqid;
|
||||||
|
int qstart;
|
||||||
|
int qend;
|
||||||
|
double bit_or_e;
|
||||||
|
string qseq;
|
||||||
|
};
|
||||||
|
|
||||||
// Function to generate reverse complement of a DNA sequence
|
// Function to generate reverse complement of a DNA sequence
|
||||||
string revcomp(const string &seq) {
|
string revcomp(const string &seq) {
|
||||||
string revseq;
|
string revseq;
|
||||||
|
@ -67,8 +76,7 @@ string revcomp(const string &seq) {
|
||||||
return revseq;
|
return revseq;
|
||||||
}
|
}
|
||||||
|
|
||||||
void readInputFile(const string &filename,
|
void readInputFile(const string &filename, vector<SeqData> &data_vector,
|
||||||
map<string, pair<double, string>> &max_map,
|
|
||||||
const int *intnums, const int intmax) {
|
const int *intnums, const int intmax) {
|
||||||
ifstream infile(filename);
|
ifstream infile(filename);
|
||||||
if (!infile) {
|
if (!infile) {
|
||||||
|
@ -84,59 +92,69 @@ void readInputFile(const string &filename,
|
||||||
while (iss >> fields[i] && i < (intmax + 1)) {
|
while (iss >> fields[i] && i < (intmax + 1)) {
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
// subject seq id
|
// Extract fields
|
||||||
string seqid = fields[intnums[0]];
|
SeqData data;
|
||||||
double bitE = stod(fields[intnums[3]]);
|
data.sseqid = fields[intnums[0]];
|
||||||
if (use_bitscore) {
|
data.qstart = stoi(fields[intnums[1]]);
|
||||||
// bit score
|
data.qend = stoi(fields[intnums[2]]);
|
||||||
// Check if the seqid already exists in the map
|
data.bit_or_e = stod(fields[intnums[3]]);
|
||||||
if (max_map.find(seqid) != max_map.end()) {
|
data.qseq = fields[intnums[4]];
|
||||||
// If the new bitscore is greater, update the
|
|
||||||
// map
|
// Store the data
|
||||||
if (bitE > max_map[seqid].first) {
|
data_vector.push_back(data);
|
||||||
max_map[seqid] = make_pair(bitE, line);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// If the seqid does not exist, insert the new
|
|
||||||
// seqid-bitE pair
|
|
||||||
max_map[seqid] = make_pair(bitE, line);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// evalue
|
|
||||||
// Check if the seqid already exists in the map
|
|
||||||
if (max_map.find(seqid) != max_map.end()) {
|
|
||||||
// If the new evalue is greater, update the
|
|
||||||
// map
|
|
||||||
if (bitE < max_map[seqid].first) {
|
|
||||||
max_map[seqid] = make_pair(bitE, line);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// If the seqid does not exist, insert the new
|
|
||||||
// seqid-bitE pair
|
|
||||||
max_map[seqid] = make_pair(bitE, line);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
infile.close();
|
infile.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
void processMap(const map<string, pair<double, string>> &max_map,
|
void processCompare(const vector<SeqData> &data_vector,
|
||||||
vector<pair<string, string>> &result, const int *intnums,
|
map<string, SeqData> &best_map) {
|
||||||
const int intmax) {
|
for (const auto &data : data_vector) {
|
||||||
for (const auto &entry : max_map) {
|
double bitE = data.bit_or_e;
|
||||||
istringstream iss(entry.second.second);
|
const string &sseqid = data.sseqid;
|
||||||
vector<string> fields(intmax + 1);
|
|
||||||
// string fields[intmax + 1];
|
if (use_bitscore) {
|
||||||
int i = 0;
|
// bit score
|
||||||
while (iss >> fields[i] && i < (intmax + 1)) {
|
// Check if the seqid already exists in the map
|
||||||
i++;
|
if (best_map.find(sseqid) != best_map.end()) {
|
||||||
|
// If the new bitscore is greater, update the
|
||||||
|
// map
|
||||||
|
if (bitE > best_map[sseqid].bit_or_e) {
|
||||||
|
best_map[sseqid] = data;
|
||||||
}
|
}
|
||||||
// check if qstart is larger than qend
|
} else {
|
||||||
if (stoi(fields[intnums[1]]) > stoi(fields[intnums[2]])) {
|
// If the seqid does not exist, insert the new
|
||||||
fields[intnums[4]] = revcomp(fields[intnums[4]]);
|
// seqid-bitE pair
|
||||||
|
best_map[sseqid] = data;
|
||||||
}
|
}
|
||||||
result.push_back(
|
} else {
|
||||||
make_pair(">" + fields[intnums[0]], fields[intnums[4]]));
|
// evalue
|
||||||
|
// Check if the seqid already exists in the map
|
||||||
|
if (best_map.find(sseqid) != best_map.end()) {
|
||||||
|
// If the new evalue is greater, update the
|
||||||
|
// map
|
||||||
|
if (bitE < best_map[sseqid].bit_or_e) {
|
||||||
|
best_map[sseqid] = data;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// If the seqid does not exist, insert the new
|
||||||
|
// seqid-bitE pair
|
||||||
|
best_map[sseqid] = data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void processRevert(const map<string, SeqData> &best_map,
|
||||||
|
vector<pair<string, string>> &result) {
|
||||||
|
for (const auto &entry : best_map) {
|
||||||
|
const SeqData &data = entry.second;
|
||||||
|
|
||||||
|
// Check if qstart is larger than qend
|
||||||
|
string qseq = data.qseq;
|
||||||
|
if (data.qstart > data.qend) {
|
||||||
|
qseq = revcomp(qseq);
|
||||||
|
}
|
||||||
|
result.push_back(make_pair(">" + data.sseqid, qseq));
|
||||||
}
|
}
|
||||||
sort(result.begin(), result.end());
|
sort(result.begin(), result.end());
|
||||||
}
|
}
|
||||||
|
@ -148,7 +166,6 @@ void writeOutputFile(const string &filename,
|
||||||
cerr << "Error opening output file: " << filename << endl;
|
cerr << "Error opening output file: " << filename << endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto &entry : result) {
|
for (const auto &entry : result) {
|
||||||
outfile << entry.first << "\n" << entry.second << "\n";
|
outfile << entry.first << "\n" << entry.second << "\n";
|
||||||
}
|
}
|
||||||
|
@ -188,12 +205,10 @@ int main(int argc, char *argv[]) {
|
||||||
use_bitscore = true;
|
use_bitscore = true;
|
||||||
} else if (tmpstri == "evalue") {
|
} else if (tmpstri == "evalue") {
|
||||||
use_bitscore = false;
|
use_bitscore = false;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
cout << "Unknown argument: " << argv[4] << endl;
|
cout << "Unknown argument: " << argv[4] << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (argc != 3) {
|
} else if (argc != 3) {
|
||||||
cerr << "Usage: " << argv[0]
|
cerr << "Usage: " << argv[0]
|
||||||
<< " <input_file> <output_file> "
|
<< " <input_file> <output_file> "
|
||||||
|
@ -208,11 +223,17 @@ int main(int argc, char *argv[]) {
|
||||||
string in_name = argv[1];
|
string in_name = argv[1];
|
||||||
string ot_name = argv[2];
|
string ot_name = argv[2];
|
||||||
|
|
||||||
map<string, pair<double, string>> max_map;
|
// get useful information from reading input file
|
||||||
readInputFile(in_name, max_map, intnums, intmax);
|
vector<SeqData> data_vector;
|
||||||
|
readInputFile(in_name, data_vector, intnums, intmax);
|
||||||
|
|
||||||
|
// calculate best bitscore or evalue
|
||||||
|
map<string, SeqData> best_map;
|
||||||
|
processCompare(data_vector, best_map);
|
||||||
|
|
||||||
|
// write the result
|
||||||
vector<pair<string, string>> result;
|
vector<pair<string, string>> result;
|
||||||
processMap(max_map, result, intnums, intmax);
|
processRevert(best_map, result);
|
||||||
|
|
||||||
writeOutputFile(ot_name, result);
|
writeOutputFile(ot_name, result);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue