commit c601a8b65f1832439e64e37d3137fe0cf5ed10b3 Author: Guoyi Zhang Date: Tue May 7 12:33:05 2024 +0800 init diff --git a/gene_marker.cpp b/gene_marker.cpp new file mode 100644 index 0000000..ee03857 --- /dev/null +++ b/gene_marker.cpp @@ -0,0 +1,88 @@ +/* + * Author: Guoyi Zhang + * Date: 07 May 2024 + * License: GPLv2 + * Function: Generate fasta file which `m` + * means selected position by Gblocks + * from Gblocks html report file. + * */ + +#include +#include +#include +#include + +bool checkArguments(int argc, char* argv[]) { + if (argc != 3) { + std::cerr << "Usage: " << argv[0] + << " input_file output_file\n"; + return false; + } + return true; +} + +bool openFiles(std::ifstream& input, std::ofstream& output, + const std::string& inputFilename, + const std::string& outputFilename) { + input.open(inputFilename); + if (!input.is_open()) { + std::cerr << "Unable to open input file: " << inputFilename + << std::endl; + return false; + } + + output.open(outputFilename); + if (!output.is_open()) { + std::cerr << "Unable to open output file: " << outputFilename + << std::endl; + input.close(); + return false; + } + + return true; +} + +void closeFiles(std::ifstream& input, std::ofstream& output) { + input.close(); + output.close(); +} + +void processFile(const std::string& inputFilename, + const std::string& outputFilename) { + std::ifstream input; + std::ofstream output; + + if (!openFiles(input, output, inputFilename, outputFilename)) { + return; + } + + output << ">marker" << std::endl; + + std::regex pattern("class=BL|^\\s{70}"); + std::string line; + while (std::getline(input, line)) { + if (std::regex_search(line, pattern)) { + std::string processed_line = std::regex_replace( + line, + std::regex("||^\\s{17}"), ""); + std::replace(processed_line.begin(), + processed_line.end(), ' ', '-'); + std::replace(processed_line.begin(), + processed_line.end(), '#', 'm'); + output << processed_line << std::endl; + } + } + + closeFiles(input, output); +} + +int main(int argc, char* argv[]) { + if (!checkArguments(argc, argv)) { + return 1; + } + + processFile(argv[1], argv[2]); + + return 0; +} + diff --git a/trim_marker.cpp b/trim_marker.cpp new file mode 100644 index 0000000..8c04ce1 --- /dev/null +++ b/trim_marker.cpp @@ -0,0 +1,115 @@ +/* + * Author: Guoyi Zhang + * Date: 07 May 2024 + * License: GPLv2 + * Function: Trim the multi-sequence- + * alignment based on the marker + * which `m` indicates selected + * position. + * */ + +#include +#include +#include +#include + +// Function: check the arg +void check_argument_count(int argc, char* argv[]) { + if (argc != 4) { + std::cerr + << "Usage: " << argv[0] + << " " + << std::endl; + exit(1); + } +} + +// Function: check the file +void check_file_exists(const std::string& file) { + std::ifstream infile(file); + if (!infile.good()) { + std::cerr << "Error: file " << file << " doesn't exsit" + << std::endl; + exit(1); + } +} + +// Function: get the positions in marker file +std::vector read_positions(const std::string& file) { + std::ifstream infile(file); + std::string line; + std::vector positions; + int index = 1; + while (std::getline(infile, line)) { + if (line[0] != '>') { + for (char c : line) { + if (c == 'm' || c == 'M') { + positions.push_back(index); + } + index++; + } + } + } + return positions; +} + +// Function: trimming the sequences in input_fasta_file +void extract_characters(const std::string& line, const std::string& file, + const std::vector positions) { + std::ofstream outfile(file, std::ios::app); + std::string new_line; + for (int pos : positions) { + if (pos <= line.length()) { + new_line += line[pos - 1]; // start from 0 + } + } + for (size_t i = 0; i < new_line.length(); i += 60) { + int num_chars = + std::min(static_cast(new_line.length() - i), 60); + outfile << new_line.substr(i, num_chars) << std::endl; + } + outfile.close(); +} + +// Function: parse the fasta multi-lines format +void process_fasta_file(const std::string& fasta_file, + const std::string& output_file, + const std::vector positions) { + std::ifstream infile(fasta_file); + std::string line; + std::string prev_line = ""; + bool prev_line_is_header = false; + while (std::getline(infile, line)) { + if (line[0] == '>') { + if (!prev_line_is_header) { + extract_characters(prev_line, output_file, + positions); + } + std::ofstream outfile(output_file, std::ios::app); + outfile << line << std::endl; + outfile.close(); + prev_line_is_header = true; + } else { + if (prev_line_is_header) { + prev_line = ""; + prev_line_is_header = false; + } + prev_line += line; + } + } + extract_characters(prev_line, output_file, positions); +} + +int main(int argc, char* argv[]) { + check_argument_count(argc, argv); + + for (int i = 1; i <= 2; i++) { + check_file_exists(argv[i]); + } + + std::vector positions = read_positions(argv[1]); + + process_fasta_file(argv[2], argv[3], positions); + + return 0; +}