2024-09-08 02:48:54 +08:00
|
|
|
#!/usr/bin/env rdmd
|
|
|
|
|
|
|
|
import std.stdio;
|
|
|
|
import std.file;
|
|
|
|
import std.process;
|
|
|
|
import std.algorithm;
|
|
|
|
import std.conv;
|
|
|
|
import std.array;
|
|
|
|
import std.path;
|
2024-09-09 01:19:42 +08:00
|
|
|
import std.parallelism;
|
2024-09-09 09:33:50 +08:00
|
|
|
import std.regex;
|
2024-09-08 02:48:54 +08:00
|
|
|
|
|
|
|
void show_help(string pkgver) {
|
|
|
|
writeln("\t\t\t\t\t\033[0;47;31mR\033[0m\033[0;47;92mG\033[0m\033[0;47;94mB\033[0m\033[0;47m \033[0m\033[0;47;33mE\033[0m\033[0;47;94mP\033[0m\033[0;47;33mP\033[0m
|
|
|
|
\t\t\tReference Genome based Exon Phylogeny Pipeline
|
|
|
|
Version: ", pkgver, "
|
|
|
|
License: GPL-2.0-only
|
|
|
|
Author: Guoyi Zhang
|
2024-09-09 09:45:55 +08:00
|
|
|
-c\t--config\tconfig file for software path (optional)
|
2024-12-09 14:24:08 +08:00
|
|
|
-f\t--functions\tfunctions type (optional): all clean assembly
|
|
|
|
\t \t map postmap varcall consen codon align trim
|
2024-12-10 16:11:18 +08:00
|
|
|
-g\t--genes\t\tgene file path (optional, if -r is specified)
|
2024-09-08 02:48:54 +08:00
|
|
|
-h\t--help\t\tshow this information
|
|
|
|
-l\t--list\t\tlist file path
|
|
|
|
-m\t--memory\tmemory settings (optional, default 16 GB)
|
|
|
|
-r\t--reference\treference genome path
|
|
|
|
-t\t--threads\tthreads setting (optional, default 8 threads)
|
2024-12-09 12:23:55 +08:00
|
|
|
--codon\t\tOnly use the codon region (optional)
|
2024-09-09 09:45:55 +08:00
|
|
|
--fastp\t\tFastp path (optional)
|
2024-12-08 14:15:43 +08:00
|
|
|
--spades\t\tSpades python path (optional)
|
|
|
|
--diamond\t\tDiamond python path (optional)
|
2024-12-09 12:23:55 +08:00
|
|
|
--sortdiamond\tSortDiamond python path (optional)
|
2024-09-09 09:45:55 +08:00
|
|
|
--bowtie2\t\tBowtie2 path (optional)
|
|
|
|
--samtools\t\tSamtools path (optional)
|
|
|
|
--bcftools\t\tBcftools path (optional)
|
2024-12-09 12:23:55 +08:00
|
|
|
--exonerate\t\tExonerate path (optional)
|
2024-09-09 09:45:55 +08:00
|
|
|
--macse\t\tMacse jarfile path (optional)
|
2024-09-11 10:13:14 +08:00
|
|
|
--delstop\t\tDelstop path (optional)
|
2024-09-09 09:45:55 +08:00
|
|
|
--trimal\t\tTrimal path (optional)
|
2024-09-09 01:19:42 +08:00
|
|
|
for example: ./RGBEPP -f all -l list -t 8 -r reference.fasta \n");
|
2024-09-08 02:48:54 +08:00
|
|
|
}
|
|
|
|
|
2024-09-11 10:13:14 +08:00
|
|
|
bool testJava() {
|
|
|
|
bool pass = true;
|
|
|
|
auto result = execute(["java", "-version"]);
|
|
|
|
if (result.status != 0) {
|
|
|
|
pass = false;
|
|
|
|
writeln("Error: Java is not found");
|
|
|
|
}
|
|
|
|
return pass;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool testFiles(string[] filePaths) {
|
|
|
|
bool pass = true;
|
|
|
|
foreach(filePath; filePaths){
|
|
|
|
if (!exists(filePath) && filePath != "") {
|
|
|
|
writeln("Error: " ~ filePath ~ " does not exists.");
|
|
|
|
pass = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return pass;
|
|
|
|
}
|
|
|
|
|
2024-12-08 15:08:00 +08:00
|
|
|
bool testString(string input) {
|
|
|
|
return input.length != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool testStringArray(string[] input) {
|
|
|
|
return !input.empty;
|
|
|
|
}
|
|
|
|
|
2024-09-08 02:48:54 +08:00
|
|
|
void createDir(string path) {
|
|
|
|
if (!exists(path)) {
|
|
|
|
mkdir(path);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-12-09 12:23:55 +08:00
|
|
|
void moveDir (string oldPath, string newPath) {
|
|
|
|
try {
|
|
|
|
rename(oldPath, newPath);
|
|
|
|
writeln("Directory renamed successfully.");
|
|
|
|
} catch (Exception e) {
|
|
|
|
writeln("Error renaming directory: ", e.msg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-09 01:19:42 +08:00
|
|
|
void executeCommand(string[] cmd) {
|
2024-09-08 02:48:54 +08:00
|
|
|
auto process = spawnProcess(cmd);
|
|
|
|
|
|
|
|
if (wait(process) != 0) {
|
|
|
|
writeln("Error executing command: ", cmd.join(" "));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-12-09 12:23:55 +08:00
|
|
|
void executeCommandToFile(string[] cmd, string outputFile) {
|
|
|
|
// Create a pipe for the command's output
|
|
|
|
auto pipe = pipe();
|
|
|
|
|
|
|
|
// Spawn the process
|
|
|
|
auto pid = spawnProcess(cmd, stdin, pipe.writeEnd);
|
|
|
|
|
|
|
|
// Close the write end of the pipe to signal EOF
|
|
|
|
pipe.writeEnd.close();
|
|
|
|
|
|
|
|
// Read the output from the pipe
|
|
|
|
auto output = cast(string) pipe.readEnd.byChunk(4096).joiner.array;
|
|
|
|
|
|
|
|
// Wait for the process to finish
|
|
|
|
wait(pid);
|
|
|
|
|
|
|
|
// Write the output to the specified file
|
|
|
|
std.file.write(outputFile, cast(ubyte[])output);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2024-09-08 02:48:54 +08:00
|
|
|
void executeCommandPipe(string[][] cmds) {
|
|
|
|
|
|
|
|
Pid[] pids;
|
|
|
|
scope(exit) {
|
|
|
|
foreach (pid; pids) {
|
|
|
|
wait(pid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// pipe init
|
|
|
|
auto temp_pipe = pipe();
|
|
|
|
// process first
|
|
|
|
pids ~= spawnProcess(cmds[0], stdin, temp_pipe.writeEnd);
|
|
|
|
|
|
|
|
// process cmd2 ~ cmdN-1
|
|
|
|
for (int i = 1; i < cmds.length - 1; i++) {
|
|
|
|
auto new_pipe = pipe(); // create next pipe
|
|
|
|
pids ~= spawnProcess(cmds[i], temp_pipe.readEnd, new_pipe.writeEnd);
|
|
|
|
temp_pipe = new_pipe; // update the pipe
|
|
|
|
}
|
|
|
|
|
|
|
|
// process final, output to stdout
|
|
|
|
pids ~= spawnProcess(cmds[$-1], temp_pipe.readEnd, stdout);
|
|
|
|
}
|
|
|
|
|
|
|
|
string[] readArrFromFile(string filename) {
|
|
|
|
string[] arr;
|
|
|
|
|
|
|
|
try {
|
|
|
|
arr = filename.readText().splitter.array;
|
|
|
|
} catch (FileException ex) {
|
|
|
|
writeln("Error reading file: ", ex.msg);
|
|
|
|
} catch (Exception ex) {
|
|
|
|
writeln("Exception: ", ex.msg);
|
|
|
|
}
|
|
|
|
|
|
|
|
return arr;
|
|
|
|
}
|
|
|
|
|
|
|
|
string getBaseName(string ARG_R){
|
|
|
|
string ARG_R_extension = extension(ARG_R); // get extension
|
|
|
|
string baseNameRef = baseName(ARG_R, ARG_R_extension); //rm dir and extension
|
|
|
|
return baseNameRef;
|
|
|
|
}
|
|
|
|
|
|
|
|
string[] getRef(string ARG_R, string DirMap){
|
|
|
|
string baseNameRef = getBaseName(ARG_R);
|
2024-12-09 12:23:55 +08:00
|
|
|
string ARG_R_index = buildPath(DirMap, "index", baseNameRef); // bt2_index_base
|
2024-09-08 02:48:54 +08:00
|
|
|
string ARG_R_refer = ARG_R_index ~ ".fasta"; //reference_in fasta file
|
|
|
|
string[] Refs = [ARG_R_index, ARG_R_refer];
|
|
|
|
return Refs;
|
|
|
|
}
|
|
|
|
|
2024-09-09 00:04:36 +08:00
|
|
|
string[] getARG_G(string ARG_R){
|
|
|
|
string[] ARG_G;
|
|
|
|
// if ARG_G is empty
|
|
|
|
if (ARG_G.length == 0) {
|
|
|
|
auto file = File(ARG_R, "r");
|
|
|
|
ARG_G = file.byLine
|
|
|
|
.filter!(line => line.startsWith(">")) // flitering
|
|
|
|
.map!(line => line[1..$].idup) // convert to word
|
|
|
|
.array;
|
|
|
|
}
|
|
|
|
return ARG_G;
|
|
|
|
}
|
|
|
|
|
2024-09-09 10:25:32 +08:00
|
|
|
string getValueFromConfig(string file, string key) {
|
|
|
|
string content = readText(file);
|
|
|
|
string value;
|
|
|
|
auto regex = regex(key ~ r"\s*=\s*(.+)");
|
|
|
|
|
|
|
|
foreach (line; content.splitter("\n")) {
|
|
|
|
if (auto match = matchFirst(line, regex)) {
|
|
|
|
value = match.captures[1];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
|
|
|
void processQcTrim(string[] ARG_L, int ARG_T, string DirRaw, string DirQcTrim, string PathFastp) {
|
2024-09-08 02:48:54 +08:00
|
|
|
// Prepare directory
|
|
|
|
createDir(DirQcTrim);
|
2024-09-09 09:33:50 +08:00
|
|
|
writeln("QcTrimming::Start");
|
2024-09-08 02:48:54 +08:00
|
|
|
foreach (string file; ARG_L) {
|
|
|
|
string baseName = getBaseName(file);
|
2024-12-09 12:23:55 +08:00
|
|
|
string inputFileR1 = buildPath(DirRaw, baseName ~ "_R1.fastq.gz");
|
|
|
|
string inputFileR2 = buildPath(DirRaw, baseName ~ "_R2.fastq.gz");
|
|
|
|
string outputFileR1 = buildPath(DirQcTrim, baseName ~ "_R1.fastq.gz");
|
|
|
|
string outputFileR2 = buildPath(DirQcTrim, baseName ~ "_R2.fastq.gz");
|
|
|
|
string jsonFile = buildPath(DirQcTrim, baseName ~ ".json");
|
|
|
|
string htmlFile = buildPath(DirQcTrim, baseName ~ ".html");
|
2024-09-08 02:48:54 +08:00
|
|
|
|
|
|
|
// Perform quality control and trimming using external program `fastp`
|
2024-09-09 09:33:50 +08:00
|
|
|
string[] cmdQcTrim = [PathFastp, "-i", inputFileR1, "-I", inputFileR2,
|
2024-09-08 02:48:54 +08:00
|
|
|
"-o", outputFileR1, "-O", outputFileR2,
|
|
|
|
"-j", jsonFile, "-h", htmlFile,
|
|
|
|
"-w", ARG_T.to!string];
|
2024-09-09 09:33:50 +08:00
|
|
|
executeCommand(cmdQcTrim);
|
2024-09-08 02:48:54 +08:00
|
|
|
}
|
2024-09-09 09:33:50 +08:00
|
|
|
writeln("QcTrimming::End");
|
2024-09-08 02:48:54 +08:00
|
|
|
}
|
|
|
|
|
2024-12-08 14:15:43 +08:00
|
|
|
void processAssembly(string[] ARG_L, int ARG_M, int ARG_T, string DirQcTrim, string DirAssembly, string PathSpades){
|
|
|
|
writeln("Assembly::Start");
|
|
|
|
createDir(DirAssembly);
|
|
|
|
foreach (string file; ARG_L) {
|
|
|
|
string baseName = getBaseName(file);
|
2024-12-09 12:23:55 +08:00
|
|
|
string DirAss = buildPath(DirAssembly, baseName);
|
2024-12-08 14:15:43 +08:00
|
|
|
createDir(DirAss);
|
2024-12-09 12:23:55 +08:00
|
|
|
string inputFileR1 = buildPath(DirQcTrim, baseName ~ "_R1.fastq.gz");
|
|
|
|
string inputFileR2 = buildPath(DirQcTrim, baseName ~ "_R2.fastq.gz");
|
2024-12-08 14:15:43 +08:00
|
|
|
string[] cmdAssembly = [PathSpades, "--pe1-1", inputFileR1, "--pe1-2", inputFileR2, "-t", ARG_T.to!string, "-m", ARG_M.to!string, "--careful", "--phred-offset", "33", "-o", DirAss];
|
|
|
|
executeCommand(cmdAssembly);
|
|
|
|
}
|
|
|
|
writeln("Assembly::End");
|
|
|
|
}
|
|
|
|
|
2024-10-23 15:26:22 +08:00
|
|
|
void processAssemMv(string[] ARG_L,string DirAssembly){
|
|
|
|
// Prepare
|
2024-12-09 12:23:55 +08:00
|
|
|
string DirAssemblySca = buildPath(DirAssembly, "scaffolds");
|
|
|
|
string DirAssemblyCont = buildPath(DirAssembly, "contigs");
|
2024-10-23 15:26:22 +08:00
|
|
|
writeln("Assembly_Move::Start");
|
|
|
|
createDir(DirAssemblySca);
|
|
|
|
createDir(DirAssemblyCont);
|
|
|
|
foreach (string file; ARG_L ){
|
|
|
|
string baseName = getBaseName(file);
|
2024-12-09 12:23:55 +08:00
|
|
|
string DirAssemblyInd = buildPath(DirAssembly, baseName);
|
|
|
|
string inputSca = buildPath(DirAssemblyInd, "scaffolds.fasta");
|
|
|
|
string inputCont = buildPath(DirAssemblyInd, "contigs.fasta");
|
|
|
|
string outputSca = buildPath(DirAssemblySca, baseName ~ ".fasta");
|
|
|
|
string outputCont = buildPath(DirAssemblyCont, baseName ~ ".fasta");
|
2024-10-23 15:26:22 +08:00
|
|
|
if (!exists(inputSca)) {
|
|
|
|
writeln("File not found: ", inputSca);
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
copy(inputSca, outputSca);
|
|
|
|
}
|
|
|
|
if (!exists(inputCont)) {
|
|
|
|
writeln("File not found: ", inputCont);
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
copy(inputCont, outputCont);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
writeln("Assembly_Move::End");
|
|
|
|
}
|
|
|
|
|
2024-12-08 14:15:43 +08:00
|
|
|
void processMappingDenovo(string[] ARG_L, string ARG_R, int ARG_T, string DirQcTrim, string DirAssembly, string DirMap, string PathBowtie2, string PathDiamond, string PathSamtools, string PathSortDiamond){
|
2024-09-14 13:50:09 +08:00
|
|
|
// Prepare directory
|
2024-10-23 15:26:22 +08:00
|
|
|
writeln("Mapping::Start");
|
2024-09-14 13:50:09 +08:00
|
|
|
createDir(DirMap);
|
2024-12-09 12:23:55 +08:00
|
|
|
createDir(buildPath(DirMap, "index"));
|
|
|
|
string DirAssemblySca = buildPath(DirAssembly, "scaffolds");
|
|
|
|
string DirAssemblyFas = buildPath(DirAssembly, "fasta");
|
2024-09-14 13:50:09 +08:00
|
|
|
createDir(DirAssemblyFas);
|
2024-10-23 15:26:22 +08:00
|
|
|
|
|
|
|
string ARG_R_Base = getBaseName(ARG_R);
|
2024-12-09 12:23:55 +08:00
|
|
|
string ARG_R_Ref = buildPath(DirAssemblyFas, ARG_R_Base ~ ".fasta");
|
2024-10-23 15:26:22 +08:00
|
|
|
copy(ARG_R, ARG_R_Ref);
|
2024-12-08 14:15:43 +08:00
|
|
|
string [] cmdDmMakeDB = [ PathDiamond, "makedb", "--db", "Reference", "--in", ARG_R_Ref];
|
2024-10-23 15:26:22 +08:00
|
|
|
executeCommand(cmdDmMakeDB);
|
2024-12-09 12:23:55 +08:00
|
|
|
string ReferDmnd = buildPath(DirAssemblyFas, "Reference.dmnd");
|
2024-09-14 13:50:09 +08:00
|
|
|
string PathBowtie2_build = PathBowtie2 ~ "-build";
|
|
|
|
|
|
|
|
foreach (string file; ARG_L) {
|
|
|
|
string baseName = getBaseName(file);
|
2024-12-09 12:23:55 +08:00
|
|
|
string inputM8 = buildPath(DirAssemblySca, baseName ~ ".m8");
|
|
|
|
string inputFasta = buildPath(DirAssemblySca, baseName ~ ".fasta");
|
|
|
|
string outputSort = buildPath(DirAssemblyFas, baseName ~ ".fasta");
|
|
|
|
string outputIndex = buildPath(DirAssemblyFas, baseName);
|
|
|
|
string inputFileR1 = buildPath(DirQcTrim, baseName ~ "_R1.fastq.gz");
|
|
|
|
string inputFileR2 = buildPath(DirQcTrim, baseName ~ "_R2.fastq.gz");
|
|
|
|
string outputBam = buildPath(DirMap, baseName ~ ".bam");
|
2024-09-14 13:50:09 +08:00
|
|
|
|
2024-12-08 14:15:43 +08:00
|
|
|
string[] cmdDiamond = [PathDiamond, "blastx", "-d", "Reference.dmnd", "-q", inputFasta, "-o", inputM8, "--outfmt", "6", "qseqid", "sseqid", "pident", "length", "mismatch", "gapopen", "qstart", "qend", "sstart", "send", "evalue", "bitscore", "qlen", "slen", "gaps", "ppos", "qframe", "qseq"];
|
|
|
|
string[] cmdSortDiamond = [PathSortDiamond, inputM8, outputSort];
|
2024-09-14 13:50:09 +08:00
|
|
|
string[] cmdBuildDB = [PathBowtie2_build, "--threads", ARG_T.to!string, outputSort, outputIndex];
|
|
|
|
string[] cmdMap = [PathBowtie2, "-x", outputIndex, "-1", inputFileR1, "-2", inputFileR2, "-p", ARG_T.to!string];
|
|
|
|
string[] cmdSam2Bam = [PathSamtools, "view", "-bS", "-@", ARG_T.to!string, "-o", outputBam];
|
|
|
|
executeCommand(cmdDiamond);
|
|
|
|
executeCommand(cmdSortDiamond);
|
|
|
|
executeCommand(cmdBuildDB);
|
|
|
|
executeCommandPipe([cmdMap, cmdSam2Bam]);
|
|
|
|
}
|
|
|
|
|
2024-10-23 15:26:22 +08:00
|
|
|
writeln("Mapping::End");
|
2024-09-14 13:50:09 +08:00
|
|
|
}
|
|
|
|
|
2024-09-09 09:33:50 +08:00
|
|
|
void processPostMap(string[] ARG_L, int ARG_T, string DirMap, string DirBam, string PathSamtools) {
|
2024-09-08 02:48:54 +08:00
|
|
|
|
|
|
|
createDir(DirBam);
|
|
|
|
writeln("PostMapping::Start");
|
|
|
|
|
|
|
|
foreach (string file; ARG_L) {
|
|
|
|
string baseName = getBaseName(file);
|
2024-12-09 12:23:55 +08:00
|
|
|
string inputBam = buildPath(DirMap, baseName ~ ".bam");
|
|
|
|
string outputBam = buildPath(DirBam, baseName ~ ".bam");
|
2024-09-08 02:48:54 +08:00
|
|
|
|
|
|
|
// Convert SAM to BAM, sort and remove duplicates using Samtools
|
2024-09-09 09:33:50 +08:00
|
|
|
string[] cmdFixmate = [PathSamtools, "fixmate", "-@", ARG_T.to!string, "-m", inputBam, "-"];
|
|
|
|
string[] cmdSort = [PathSamtools, "sort", "-@", ARG_T.to!string, "-"];
|
|
|
|
string[] cmdMarkdup = [PathSamtools, "markdup", "-@", ARG_T.to!string, "-", outputBam];
|
2024-09-08 23:17:57 +08:00
|
|
|
executeCommandPipe([cmdFixmate, cmdSort, cmdMarkdup]);
|
|
|
|
|
2024-09-09 09:33:50 +08:00
|
|
|
string [] cmdIndexBam = [PathSamtools, "index", "-@", ARG_T.to!string, outputBam];
|
2024-09-08 23:17:57 +08:00
|
|
|
executeCommand(cmdIndexBam);
|
2024-09-08 02:48:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
writeln("PostMapping::End");
|
|
|
|
}
|
|
|
|
|
2024-09-14 13:50:09 +08:00
|
|
|
void processVarCallDenovo(string[] ARG_L, int ARG_T, string DirAssembly, string DirMap, string DirBam, string DirVcf, string PathBcftools) {
|
|
|
|
writeln("VarCalling::Start");
|
|
|
|
|
2024-12-09 12:23:55 +08:00
|
|
|
string DirAssemblyFas = buildPath(DirAssembly, "fasta");
|
2024-09-14 13:50:09 +08:00
|
|
|
createDir(DirVcf);
|
|
|
|
|
|
|
|
foreach (string file; parallel(ARG_L, 1)) {
|
|
|
|
string baseName = getBaseName(file);
|
2024-12-09 12:23:55 +08:00
|
|
|
string inputBam = buildPath(DirBam, baseName ~ ".bam");
|
|
|
|
string outputVcf = buildPath(DirVcf, baseName ~ ".vcf.gz");
|
|
|
|
string referFasta = buildPath(DirAssemblyFas, baseName ~ ".fasta");
|
2024-09-14 13:50:09 +08:00
|
|
|
// Variant calling using bcftools
|
|
|
|
string[] cmdPileup = [PathBcftools, "mpileup", "-Oz", "--threads", ARG_T.to!string, "-f", referFasta, inputBam];
|
|
|
|
string[] cmdVarCall = [PathBcftools, "call", "-mv", "-Oz", "--threads", ARG_T.to!string];
|
|
|
|
string[] cmdNorm = [PathBcftools, "norm", "--threads", ARG_T.to!string, "-f", referFasta, "-Oz"];
|
|
|
|
string[] cmdFilter = [PathBcftools, "filter", "--threads", ARG_T.to!string, "--IndelGap", "5", "-Oz", "-o", outputVcf];
|
|
|
|
executeCommandPipe([cmdPileup, cmdVarCall, cmdNorm, cmdFilter]);
|
|
|
|
}
|
|
|
|
|
|
|
|
writeln("VarCalling::End");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2024-09-08 02:48:54 +08:00
|
|
|
|
2024-09-15 11:48:19 +08:00
|
|
|
void processConDenovo(string[] ARG_G, string[] ARG_L, int ARG_T, string DirAssembly, string DirVcf, string DirConsensus, string PathBcftools) {
|
|
|
|
createDir(DirConsensus);
|
|
|
|
|
2024-12-09 12:23:55 +08:00
|
|
|
string DirConTaxa = buildPath(DirConsensus, "taxa");
|
|
|
|
string DirAssemblyFas = buildPath(DirAssembly, "fasta");
|
2024-09-15 11:48:19 +08:00
|
|
|
createDir(DirConTaxa);
|
|
|
|
|
|
|
|
writeln("Consensus::Start");
|
|
|
|
// Extract fasta from vcf file
|
|
|
|
foreach (string file; ARG_L) {
|
|
|
|
string baseName = getBaseName(file);
|
2024-12-09 12:23:55 +08:00
|
|
|
string inputVcf = buildPath(DirVcf, baseName ~ ".vcf.gz");
|
|
|
|
string outputFasta = buildPath(DirConTaxa, baseName ~ ".fasta");
|
|
|
|
string referFasta = buildPath(DirAssemblyFas, baseName ~ ".fasta");
|
2024-09-15 11:48:19 +08:00
|
|
|
// index vcf.gz
|
|
|
|
string[] cmdIndexVcf = [PathBcftools, "index", inputVcf];
|
|
|
|
executeCommand(cmdIndexVcf);
|
|
|
|
|
|
|
|
// Generate consensus sequences using bcftools
|
|
|
|
string[] cmdCon = [PathBcftools, "consensus", "-f", referFasta, inputVcf, "-o", outputFasta];
|
|
|
|
executeCommand(cmdCon);
|
|
|
|
}
|
|
|
|
// Recombine the sequences based on genes
|
|
|
|
writeln("Consensus::End");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2024-09-09 09:33:50 +08:00
|
|
|
void processCombFasta(string[] ARG_G, string[] ARG_L, string DirConsensus) {
|
2024-09-08 23:17:57 +08:00
|
|
|
|
2024-12-09 12:23:55 +08:00
|
|
|
string DirConTaxa = buildPath(DirConsensus, "taxa");
|
|
|
|
string DirConGene = buildPath(DirConsensus, "gene");
|
2024-09-09 00:04:36 +08:00
|
|
|
createDir(DirConGene);
|
|
|
|
|
2024-09-08 23:17:57 +08:00
|
|
|
// create a dictory
|
|
|
|
string[string] geneSequences;
|
|
|
|
|
2024-09-09 00:04:36 +08:00
|
|
|
writeln("ConvertFasta::Start");
|
2024-09-08 23:17:57 +08:00
|
|
|
// read first
|
2024-09-14 13:50:09 +08:00
|
|
|
foreach (file; ARG_L) {
|
2024-12-09 12:23:55 +08:00
|
|
|
string inputFile = buildPath(DirConTaxa, file ~ ".fasta");
|
2024-09-08 23:17:57 +08:00
|
|
|
if (!exists(inputFile)) {
|
|
|
|
writeln("File not found: ", inputFile);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
string content = cast(string) readText(inputFile);
|
|
|
|
bool inSequence = false;
|
|
|
|
string currentGene;
|
|
|
|
|
|
|
|
foreach (line; content.splitter("\n")) {
|
|
|
|
if (line.empty) continue;
|
|
|
|
|
|
|
|
if (line[0] == '>') {
|
|
|
|
string header = line[1 .. $];
|
|
|
|
if (ARG_G.canFind(header)) {
|
|
|
|
currentGene = header;
|
|
|
|
geneSequences[currentGene] ~= ">" ~ file ~ "\n";
|
|
|
|
inSequence = true;
|
|
|
|
} else {
|
|
|
|
inSequence = false;
|
|
|
|
}
|
|
|
|
} else if (inSequence) {
|
|
|
|
geneSequences[currentGene] ~= line ~ "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// write different files
|
2024-09-14 13:50:09 +08:00
|
|
|
foreach (gene; ARG_G) {
|
2024-12-09 12:23:55 +08:00
|
|
|
string outputFile = buildPath(DirConGene, gene ~ ".fasta");
|
2024-09-09 00:04:36 +08:00
|
|
|
File output = File(outputFile, "w");
|
2024-09-08 23:17:57 +08:00
|
|
|
if (gene in geneSequences) {
|
|
|
|
output.write(geneSequences[gene]);
|
|
|
|
}
|
|
|
|
}
|
2024-09-09 00:04:36 +08:00
|
|
|
writeln("ConvertFasta::End");
|
|
|
|
}
|
|
|
|
|
2024-12-09 13:22:03 +08:00
|
|
|
void splitFasta(string inputFasta, string DirOut) {
|
2024-12-09 12:23:55 +08:00
|
|
|
File infile;
|
|
|
|
try {
|
|
|
|
infile = File(inputFasta, "r");
|
|
|
|
} catch (FileException e) {
|
|
|
|
stderr.writeln("Error: Unable to open input file ", inputFasta);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
string line;
|
|
|
|
string seqName;
|
|
|
|
File outfile;
|
|
|
|
bool in_sequence = false;
|
|
|
|
|
|
|
|
foreach (lineContent; infile.byLine()) {
|
|
|
|
if (lineContent.empty) continue;
|
|
|
|
|
|
|
|
if (lineContent[0] == '>') {
|
|
|
|
// New sequence header
|
|
|
|
if (in_sequence) { // if found new sequence, close
|
|
|
|
outfile.close(); // previous output file
|
|
|
|
}
|
|
|
|
seqName = cast(string)lineContent[1 .. $]; // Remove '>'
|
2024-12-09 13:22:03 +08:00
|
|
|
string outputFile = buildPath(DirOut, seqName ~ ".fasta"); // suitable to many os
|
2024-12-09 12:23:55 +08:00
|
|
|
outfile = File(outputFile, "w");
|
|
|
|
outfile.writeln(">", getBaseName(inputFasta));
|
|
|
|
// will enter sequence
|
|
|
|
in_sequence = true;
|
|
|
|
} else if (in_sequence) {
|
|
|
|
// Inside sequence content
|
|
|
|
outfile.writeln(lineContent);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (in_sequence) {
|
|
|
|
outfile.close();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (infile.eof) {
|
|
|
|
writeln("Sequences have been split into individual files.");
|
|
|
|
} else {
|
|
|
|
stderr.writeln("Error occurred while reading file.");
|
|
|
|
}
|
|
|
|
|
|
|
|
infile.close();
|
|
|
|
}
|
|
|
|
|
|
|
|
void processCodon(string[] ARG_G, string ARG_R, string DirConsensus, string PathExonerate){
|
|
|
|
|
|
|
|
string DirConGene = buildPath(DirConsensus , "gene");
|
|
|
|
|
|
|
|
string ARG_R_Base = getBaseName(ARG_R);
|
|
|
|
string ARG_R_Ref = buildPath(DirConsensus, ARG_R_Base ~ ".fasta");
|
|
|
|
copy(ARG_R, ARG_R_Ref);
|
2024-12-09 13:22:03 +08:00
|
|
|
splitFasta(ARG_R_Ref, DirConsensus);
|
2024-12-09 12:23:55 +08:00
|
|
|
|
2024-12-09 13:22:03 +08:00
|
|
|
if (!exists(DirConGene ~ "_bak")) {
|
|
|
|
moveDir(DirConGene, DirConGene ~ "_bak");
|
|
|
|
}
|
|
|
|
if (!exists(DirConGene)) {
|
|
|
|
createDir(DirConGene);
|
|
|
|
}
|
|
|
|
|
2024-12-09 12:23:55 +08:00
|
|
|
writeln("GetCodon::Start");
|
|
|
|
|
|
|
|
foreach (gene; ARG_G) {
|
|
|
|
string inputFile = buildPath(DirConGene ~ "_bak", gene ~ ".fasta");
|
|
|
|
string outputFile = buildPath(DirConGene, gene ~ ".fasta");
|
|
|
|
string referFile = buildPath(DirConsensus, gene ~ ".fasta");
|
2024-12-09 13:22:03 +08:00
|
|
|
|
2024-12-09 12:23:55 +08:00
|
|
|
if (!exists(inputFile)) {
|
|
|
|
writeln("File not found: ", inputFile);
|
|
|
|
continue;
|
|
|
|
} else {
|
2024-12-09 13:22:03 +08:00
|
|
|
string[] cmdExonerate = [PathExonerate, inputFile, referFile, "--showalignment", "no", "--showvulgar", "no", "--showtargetgff", "no", "--ryo", ">%qi\n%qcs\n", "--verbose", "0"];
|
2024-12-09 12:23:55 +08:00
|
|
|
executeCommandToFile(cmdExonerate, outputFile);
|
|
|
|
}
|
|
|
|
std.file.remove(referFile);
|
|
|
|
}
|
|
|
|
|
|
|
|
rmdirRecurse(DirConGene ~ "_bak");
|
|
|
|
|
|
|
|
writeln("GetCodon::End");
|
|
|
|
}
|
|
|
|
|
2024-09-09 00:04:36 +08:00
|
|
|
void processAlign(string[] ARG_G, string DirConsensus, string DirAlign, string PathMacse){
|
2024-09-08 23:17:57 +08:00
|
|
|
|
2024-12-09 12:23:55 +08:00
|
|
|
string DirConGene = buildPath(DirConsensus, "gene");
|
|
|
|
string DirAlignAA = buildPath(DirAlign, "AA");
|
|
|
|
string DirAlignNT = buildPath(DirAlign, "NT");
|
2024-09-09 01:19:42 +08:00
|
|
|
|
|
|
|
writeln("Align::Start");
|
|
|
|
createDir(DirAlign);
|
|
|
|
createDir(DirAlignAA);
|
|
|
|
createDir(DirAlignNT);
|
|
|
|
foreach (gene; parallel(ARG_G, 1)) {
|
2024-12-09 12:23:55 +08:00
|
|
|
string inputFasta = buildPath(DirConGene, gene ~ ".fasta");
|
|
|
|
string outAA = buildPath(DirAlignAA, gene ~ ".fasta");
|
|
|
|
string outNT = buildPath(DirAlignNT, gene ~ ".fasta");
|
2024-09-09 09:33:50 +08:00
|
|
|
string[] cmdAlign = ["java", "-jar", PathMacse, "-prog", "alignSequences", "-seq" , inputFasta, "-out_AA", outAA, "-out_NT", outNT ];
|
|
|
|
executeCommand(cmdAlign);
|
2024-09-09 01:19:42 +08:00
|
|
|
}
|
|
|
|
writeln("Align::End");
|
|
|
|
|
2024-09-08 23:17:57 +08:00
|
|
|
}
|
2024-09-08 02:48:54 +08:00
|
|
|
|
2024-09-10 00:37:06 +08:00
|
|
|
void processTrimming(string[] ARG_G, string DirAlign, string DirTrim, string PathDelstop, string PathTrimal){
|
2024-09-10 16:43:18 +08:00
|
|
|
writeln("Trimming::Start");
|
2024-09-10 00:37:06 +08:00
|
|
|
|
2024-12-09 12:23:55 +08:00
|
|
|
string DirAA = buildPath(DirAlign, "AA");
|
|
|
|
string DirNT = buildPath(DirAlign, "NT");
|
|
|
|
string DirAA_out = buildPath(DirAlign, "AA_out");
|
|
|
|
string DirNT_out = buildPath(DirAlign, "NT_out");
|
2024-09-10 00:37:06 +08:00
|
|
|
|
2024-09-10 13:09:05 +08:00
|
|
|
createDir(DirAA_out);
|
|
|
|
createDir(DirNT_out);
|
|
|
|
|
2024-09-10 00:37:06 +08:00
|
|
|
// copy file firstly
|
2024-09-11 09:41:09 +08:00
|
|
|
foreach (gene; parallel(ARG_G,1)){
|
2024-12-09 12:23:55 +08:00
|
|
|
string inputFastaAA = buildPath(DirAA, gene ~ ".fasta");
|
|
|
|
string outputFastaAA = buildPath(DirAA_out, gene ~ ".fasta");
|
|
|
|
string inputFastaNT = buildPath(DirNT, gene ~ ".fasta");
|
|
|
|
string outputFastaNT = buildPath(DirNT_out, gene ~ ".fasta");
|
2024-09-10 00:37:06 +08:00
|
|
|
|
|
|
|
copy(inputFastaNT, outputFastaNT);
|
|
|
|
copy(inputFastaAA, outputFastaAA);
|
|
|
|
// del stop codon
|
|
|
|
string[] cmdDelStop = [PathDelstop, outputFastaAA, outputFastaNT, "--delete"];
|
|
|
|
executeCommand(cmdDelStop);
|
|
|
|
}
|
|
|
|
|
2024-12-09 12:23:55 +08:00
|
|
|
string DirTrimNT = buildPath(DirTrim, "NT");
|
2024-09-10 00:37:06 +08:00
|
|
|
createDir(DirTrim);
|
2024-09-10 13:09:05 +08:00
|
|
|
createDir(DirTrimNT);
|
2024-09-11 09:41:09 +08:00
|
|
|
foreach (gene; parallel(ARG_G,1)){
|
2024-12-09 12:23:55 +08:00
|
|
|
string inputFastaAA = buildPath(DirAA_out, gene ~ ".fasta");
|
|
|
|
string inputBackTransNT = buildPath(DirNT_out, gene ~ ".fasta");
|
|
|
|
string outputFastaNT = buildPath(DirTrimNT, gene ~ ".fasta");
|
2024-09-19 11:48:13 +08:00
|
|
|
if (exists(inputFastaAA) && exists(inputBackTransNT)) {
|
|
|
|
string[] cmdTrim = [PathTrimal, "-in", inputFastaAA, "-backtrans", inputBackTransNT, "-out", outputFastaNT, "-automated1"];
|
|
|
|
executeCommand(cmdTrim);
|
|
|
|
} else {
|
|
|
|
writeln("Skipping gene: ", gene, " as files are missing.");
|
|
|
|
}
|
2024-09-10 00:37:06 +08:00
|
|
|
}
|
2024-09-10 13:09:05 +08:00
|
|
|
writeln("Trimming::End");
|
|
|
|
|
2024-09-10 00:37:06 +08:00
|
|
|
}
|
|
|
|
|
2024-09-08 02:48:54 +08:00
|
|
|
void main(string[] args) {
|
2024-09-08 23:17:57 +08:00
|
|
|
string pkgver = "0.0.3";
|
2024-09-08 02:48:54 +08:00
|
|
|
|
|
|
|
string DirHome = std.file.getcwd();
|
2024-12-09 12:23:55 +08:00
|
|
|
string DirRaw = buildPath(DirHome, "00_raw");
|
|
|
|
string DirQcTrim = buildPath(DirHome, "01_fastp");
|
2024-12-09 13:51:23 +08:00
|
|
|
string DirAssembly = buildPath(DirHome, "02_spades");
|
|
|
|
string DirMap = buildPath(DirHome, "03_bowtie2");
|
2024-12-09 12:23:55 +08:00
|
|
|
string DirBam = buildPath(DirHome, "04_bam");
|
|
|
|
string DirVcf = buildPath(DirHome, "05_vcf");
|
|
|
|
string DirConsensus = buildPath(DirHome, "06_consen");
|
2024-12-09 13:24:56 +08:00
|
|
|
string DirAlign = buildPath(DirHome, "07_macse");
|
|
|
|
string DirTrim = buildPath(DirHome, "08_trimal");
|
2024-09-08 02:48:54 +08:00
|
|
|
|
2024-09-09 09:33:50 +08:00
|
|
|
string PathFastp = "/usr/bin/fastp";
|
2024-12-08 14:15:43 +08:00
|
|
|
string PathSpades = "/usr/bin/spades.py";
|
|
|
|
string PathDiamond = "/usr/bin/diamond";
|
|
|
|
string PathSortDiamond = "/usr/bin/sortdiamond";
|
2024-09-09 09:33:50 +08:00
|
|
|
string PathBowtie2 = "/usr/bin/bowtie2";
|
|
|
|
string PathSamtools = "/usr/bin/samtools";
|
|
|
|
string PathBcftools = "/usr/bin/bcftools";
|
2024-12-09 12:23:55 +08:00
|
|
|
string PathExonerate = "/usr/bin/exonerate";
|
2024-09-08 02:48:54 +08:00
|
|
|
string PathMacse = "/usr/share/java/macse.jar";
|
2024-09-10 00:37:06 +08:00
|
|
|
string PathDelstop = "/usr/bin/delstop";
|
2024-09-09 09:33:50 +08:00
|
|
|
string PathTrimal = "/usr/bin/trimal";
|
2024-09-08 02:48:54 +08:00
|
|
|
|
|
|
|
int ARG_T = 8;
|
2024-09-09 10:25:32 +08:00
|
|
|
int ARG_M = 16;
|
2024-09-08 23:17:57 +08:00
|
|
|
string[] ARG_G;
|
2024-09-08 02:48:54 +08:00
|
|
|
string[] ARG_L;
|
2024-09-09 09:33:50 +08:00
|
|
|
string ARG_C;
|
2024-09-08 02:48:54 +08:00
|
|
|
string ARG_F;
|
|
|
|
string ARG_R;
|
2024-12-09 12:23:55 +08:00
|
|
|
bool enableCodon = false;
|
|
|
|
|
2024-09-08 02:48:54 +08:00
|
|
|
if (args.length > 1){
|
|
|
|
foreach (int i; 0 .. cast(int)args.length) {
|
|
|
|
switch (args[i]) {
|
2024-09-09 09:33:50 +08:00
|
|
|
case "-c", "--config":
|
|
|
|
i++;
|
|
|
|
ARG_C = args[i];
|
|
|
|
break;
|
2024-09-08 02:48:54 +08:00
|
|
|
case "-f", "--functions":
|
|
|
|
i++;
|
|
|
|
ARG_F = args[i];
|
|
|
|
break;
|
2024-09-08 23:17:57 +08:00
|
|
|
case "-g", "--gene":
|
|
|
|
i++;
|
|
|
|
ARG_G ~= readArrFromFile(args[i]);
|
|
|
|
break;
|
2024-09-08 02:48:54 +08:00
|
|
|
case "-h", "--help":
|
|
|
|
show_help(pkgver);
|
|
|
|
return;
|
|
|
|
case "-l", "--list":
|
|
|
|
i++;
|
2024-09-08 23:17:57 +08:00
|
|
|
ARG_L ~= readArrFromFile(args[i]);
|
2024-09-08 02:48:54 +08:00
|
|
|
break;
|
2024-12-10 16:11:18 +08:00
|
|
|
case "-m", "--memory":
|
|
|
|
i++;
|
|
|
|
ARG_M = args[i].to!int;
|
|
|
|
break;
|
2024-09-08 02:48:54 +08:00
|
|
|
case "-r", "--reference":
|
|
|
|
i++;
|
|
|
|
ARG_R = args[i];
|
|
|
|
break;
|
|
|
|
case "-t", "--threads":
|
|
|
|
i++;
|
|
|
|
ARG_T = args[i].to!int;
|
|
|
|
break;
|
2024-12-09 12:23:55 +08:00
|
|
|
case "--codon":
|
|
|
|
enableCodon = true;
|
|
|
|
break;
|
2024-09-09 09:45:55 +08:00
|
|
|
case "--fastp":
|
|
|
|
i++;
|
|
|
|
PathFastp = args[i];
|
|
|
|
break;
|
2024-12-08 14:15:43 +08:00
|
|
|
case "--spades":
|
|
|
|
i++;
|
|
|
|
PathSpades = args[i];
|
|
|
|
break;
|
|
|
|
case "--diamond":
|
|
|
|
i++;
|
|
|
|
PathDiamond = args[i];
|
|
|
|
break;
|
|
|
|
case "--sortdiamond":
|
|
|
|
i++;
|
|
|
|
PathSortDiamond = args[i];
|
|
|
|
break;
|
2024-09-09 09:45:55 +08:00
|
|
|
case "--bowtie2":
|
|
|
|
i++;
|
|
|
|
PathBowtie2 = args[i];
|
|
|
|
break;
|
|
|
|
case "--samtools":
|
|
|
|
i++;
|
|
|
|
PathSamtools = args[i];
|
|
|
|
break;
|
|
|
|
case "--bcftools":
|
|
|
|
i++;
|
|
|
|
PathBcftools = args[i];
|
|
|
|
break;
|
2024-12-09 12:23:55 +08:00
|
|
|
case "--exonerate":
|
|
|
|
i++;
|
|
|
|
PathExonerate = args[i];
|
|
|
|
break;
|
2024-09-08 02:48:54 +08:00
|
|
|
case "--macse":
|
|
|
|
i++;
|
|
|
|
PathMacse = args[i];
|
|
|
|
break;
|
2024-09-10 13:09:05 +08:00
|
|
|
case "--delstop":
|
|
|
|
i++;
|
|
|
|
PathDelstop = args[i];
|
|
|
|
break;
|
2024-12-10 16:11:18 +08:00
|
|
|
case "--trimal":
|
|
|
|
i++;
|
|
|
|
PathTrimal = args[i];
|
|
|
|
break;
|
2024-09-08 02:48:54 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
show_help(pkgver);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2024-09-09 00:04:36 +08:00
|
|
|
// get gene from ARG_R reference fasta
|
2024-12-10 12:39:56 +08:00
|
|
|
if (ARG_R.length != 0 && ARG_G.length == 0 ){
|
2024-09-09 00:04:36 +08:00
|
|
|
ARG_G = getARG_G(ARG_R);
|
|
|
|
}
|
2024-12-10 12:39:56 +08:00
|
|
|
|
|
|
|
if (ARG_F.length == 0 ){
|
|
|
|
ARG_F = "all";
|
|
|
|
}
|
|
|
|
|
2024-09-09 09:33:50 +08:00
|
|
|
// get pathXXX form config file
|
2024-12-10 12:39:56 +08:00
|
|
|
if (ARG_C.length != 0){
|
2024-09-09 00:04:36 +08:00
|
|
|
|
2024-09-09 09:33:50 +08:00
|
|
|
PathFastp = getValueFromConfig(ARG_C, "fastp");
|
2024-12-08 14:15:43 +08:00
|
|
|
PathSpades = getValueFromConfig(ARG_C, "spades");
|
|
|
|
PathDiamond = getValueFromConfig(ARG_C, "diamond");
|
|
|
|
PathSortDiamond = getValueFromConfig(ARG_C, "sortdiamond");
|
2024-09-09 09:33:50 +08:00
|
|
|
PathBowtie2 = getValueFromConfig(ARG_C, "bowtie2");
|
|
|
|
PathSamtools = getValueFromConfig(ARG_C, "samtools");
|
|
|
|
PathBcftools = getValueFromConfig(ARG_C, "bcftools");
|
2024-12-09 12:23:55 +08:00
|
|
|
PathExonerate = getValueFromConfig(ARG_C, "exonerate");
|
2024-09-09 09:33:50 +08:00
|
|
|
PathMacse = getValueFromConfig(ARG_C, "macse");
|
2024-09-11 10:13:14 +08:00
|
|
|
PathDelstop = getValueFromConfig(ARG_C, "delstop");
|
2024-09-09 09:33:50 +08:00
|
|
|
PathTrimal = getValueFromConfig(ARG_C, "trimal");
|
2024-12-09 13:51:23 +08:00
|
|
|
|
|
|
|
DirRaw = getValueFromConfig(ARG_C, "raw_dir");
|
|
|
|
DirQcTrim = getValueFromConfig(ARG_C, "fastp_dir");
|
|
|
|
DirAssembly = getValueFromConfig(ARG_C, "spades_dir");
|
|
|
|
DirMap = getValueFromConfig(ARG_C, "bowtie2_dir");
|
|
|
|
DirBam = getValueFromConfig(ARG_C, "bam_dir");
|
|
|
|
DirVcf = getValueFromConfig(ARG_C, "vcf_dir");
|
|
|
|
DirConsensus = getValueFromConfig(ARG_C, "consen_dir");
|
|
|
|
DirAlign = getValueFromConfig(ARG_C, "macse_dir");
|
|
|
|
DirTrim = getValueFromConfig(ARG_C, "trimal_dir");
|
2024-09-09 09:33:50 +08:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2024-09-08 02:48:54 +08:00
|
|
|
writeln("RGBEPP::Start");
|
|
|
|
// Perform steps based on provided function argument
|
|
|
|
if (ARG_F == "all" || ARG_F == "clean") {
|
2024-12-08 15:08:00 +08:00
|
|
|
if(testFiles([PathFastp]) && testStringArray(ARG_L)){
|
|
|
|
processQcTrim(ARG_L, ARG_T, DirRaw, DirQcTrim, PathFastp); //ARG_L
|
|
|
|
} else {
|
|
|
|
throw new Exception("please confirm paramenters are correct");
|
2024-09-11 10:13:14 +08:00
|
|
|
}
|
2024-09-09 10:25:32 +08:00
|
|
|
}
|
|
|
|
|
2024-10-23 15:26:22 +08:00
|
|
|
if (ARG_F == "all" || ARG_F == "assembly") {
|
2024-12-08 15:08:00 +08:00
|
|
|
if(testFiles([PathSpades]) && testStringArray(ARG_L)){
|
|
|
|
processAssembly(ARG_L, ARG_M, ARG_T, DirQcTrim, DirAssembly, PathSpades); //ARG_L
|
2024-10-23 15:26:22 +08:00
|
|
|
processAssemMv(ARG_L, DirAssembly);
|
2024-12-08 15:08:00 +08:00
|
|
|
} else {
|
|
|
|
throw new Exception("please confirm paramenters are correct");
|
2024-09-11 10:13:14 +08:00
|
|
|
}
|
2024-09-08 02:48:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (ARG_F == "all" || ARG_F == "map") {
|
2024-12-08 15:08:00 +08:00
|
|
|
if(testFiles([PathBowtie2, PathDiamond, PathSamtools, PathSortDiamond]) && testStringArray(ARG_L) && testString(ARG_R) ){
|
|
|
|
processMappingDenovo(ARG_L, ARG_R, ARG_T, DirQcTrim, DirAssembly, DirMap, PathBowtie2, PathDiamond, PathSamtools, PathSortDiamond); //ARG_L, ARG_R
|
|
|
|
} else {
|
|
|
|
throw new Exception("please confirm paramenters are correct");
|
2024-09-11 10:13:14 +08:00
|
|
|
}
|
2024-09-08 02:48:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (ARG_F == "all" || ARG_F == "postmap") {
|
2024-12-08 15:08:00 +08:00
|
|
|
if(testFiles([PathSamtools]) && testStringArray(ARG_L) ){
|
|
|
|
processPostMap(ARG_L, ARG_T, DirMap, DirBam, PathSamtools); //ARG_L
|
|
|
|
} else {
|
|
|
|
throw new Exception("please confirm paramenters are correct");
|
|
|
|
}
|
2024-09-08 02:48:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (ARG_F == "all" || ARG_F == "varcall") {
|
2024-12-08 15:08:00 +08:00
|
|
|
if(testFiles([PathBcftools]) && testStringArray(ARG_L) ){
|
|
|
|
processVarCallDenovo(ARG_L, ARG_T, DirAssembly, DirMap, DirBam, DirVcf, PathBcftools); //ARG_L
|
|
|
|
} else {
|
|
|
|
throw new Exception("please confirm paramenters are correct");
|
2024-09-11 10:13:14 +08:00
|
|
|
}
|
2024-09-08 02:48:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (ARG_F == "all" || ARG_F == "consen") {
|
2024-12-08 15:08:00 +08:00
|
|
|
if(testFiles([PathBcftools]) && testStringArray(ARG_L) && testStringArray(ARG_G) ){
|
|
|
|
processConDenovo(ARG_G, ARG_L, ARG_T, DirAssembly, DirVcf, DirConsensus, PathBcftools); //ARG_G ARG_L
|
|
|
|
processCombFasta(ARG_G, ARG_L, DirConsensus); //ARG_G ARG_L
|
|
|
|
} else {
|
|
|
|
throw new Exception("please confirm paramenters are correct");
|
2024-09-11 10:13:14 +08:00
|
|
|
}
|
2024-09-08 02:48:54 +08:00
|
|
|
}
|
|
|
|
|
2024-12-09 12:34:33 +08:00
|
|
|
if (ARG_F == "all" && enableCodon || ARG_F == "codon") {
|
2024-12-09 12:23:55 +08:00
|
|
|
if(testFiles([PathExonerate]) && testStringArray(ARG_G) && testString(ARG_R)){
|
|
|
|
processCodon(ARG_G, ARG_R, DirConsensus, PathExonerate); //ARG_G
|
|
|
|
} else {
|
|
|
|
throw new Exception("please confirm paramenters are correct");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-08 23:17:57 +08:00
|
|
|
if (ARG_F == "all" || ARG_F == "align") {
|
2024-12-08 15:08:00 +08:00
|
|
|
if(testFiles([PathMacse]) && testJava && testStringArray(ARG_G)){
|
|
|
|
processAlign(ARG_G, DirConsensus, DirAlign, PathMacse); //ARG_G
|
|
|
|
} else {
|
|
|
|
throw new Exception("please confirm paramenters are correct");
|
2024-09-11 10:13:14 +08:00
|
|
|
}
|
2024-09-08 23:17:57 +08:00
|
|
|
}
|
2024-09-08 02:48:54 +08:00
|
|
|
|
2024-09-10 00:37:06 +08:00
|
|
|
if (ARG_F == "all" || ARG_F == "trim") {
|
2024-12-08 15:08:00 +08:00
|
|
|
if(testFiles([PathTrimal]) && testStringArray(ARG_G) ){
|
|
|
|
processTrimming(ARG_G, DirAlign, DirTrim, PathDelstop, PathTrimal); //ARG_G
|
|
|
|
} else {
|
|
|
|
throw new Exception("please confirm paramenters are correct");
|
2024-09-11 10:13:14 +08:00
|
|
|
}
|
2024-09-10 00:37:06 +08:00
|
|
|
}
|
2024-09-09 10:25:32 +08:00
|
|
|
|
2024-09-08 02:48:54 +08:00
|
|
|
writeln("RGBEPP::End");
|
|
|
|
}
|
|
|
|
|