RGBEPP/RGBEPP.d

666 lines
23 KiB
D
Raw Normal View History

2024-09-08 02:48:54 +08:00
#!/usr/bin/env rdmd
import std.stdio;
import std.file;
import std.process;
import std.algorithm;
import std.conv;
import std.array;
import std.path;
2024-09-09 01:19:42 +08:00
import std.parallelism;
2024-09-09 09:33:50 +08:00
import std.regex;
2024-09-08 02:48:54 +08:00
void show_help(string pkgver) {
writeln("\t\t\t\t\t\033[0;47;31mR\033[0m\033[0;47;92mG\033[0m\033[0;47;94mB\033[0m\033[0;47m \033[0m\033[0;47;33mE\033[0m\033[0;47;94mP\033[0m\033[0;47;33mP\033[0m
\t\t\tReference Genome based Exon Phylogeny Pipeline
Version: ", pkgver, "
License: GPL-2.0-only
Author: Guoyi Zhang
2024-09-09 09:45:55 +08:00
-c\t--config\tconfig file for software path (optional)
2024-09-09 01:19:42 +08:00
-g\t--genes\t\tgene file path (optional, if -r is specified)
-f\t--functions\tfunctions type (optional): all clean map
\t \tpostmap varcall consen align
2024-09-08 02:48:54 +08:00
-h\t--help\t\tshow this information
-l\t--list\t\tlist file path
-m\t--memory\tmemory settings (optional, default 16 GB)
-r\t--reference\treference genome path
-t\t--threads\tthreads setting (optional, default 8 threads)
2024-09-09 09:45:55 +08:00
--fastp\t\tFastp path (optional)
--spades\t\tSpades python path (optional)
--diamond\t\tDiamond python path (optional)
--sortdiamond\t\tSortDiamond python path (optional)
2024-09-09 09:45:55 +08:00
--bowtie2\t\tBowtie2 path (optional)
--samtools\t\tSamtools path (optional)
--bcftools\t\tBcftools path (optional)
--macse\t\tMacse jarfile path (optional)
--delstop\t\tDelstop path (optional)
2024-09-09 09:45:55 +08:00
--trimal\t\tTrimal path (optional)
2024-09-09 01:19:42 +08:00
for example: ./RGBEPP -f all -l list -t 8 -r reference.fasta \n");
2024-09-08 02:48:54 +08:00
}
bool testJava() {
bool pass = true;
auto result = execute(["java", "-version"]);
if (result.status != 0) {
pass = false;
writeln("Error: Java is not found");
}
return pass;
}
bool testFiles(string[] filePaths) {
bool pass = true;
foreach(filePath; filePaths){
if (!exists(filePath) && filePath != "") {
writeln("Error: " ~ filePath ~ " does not exists.");
pass = false;
}
}
return pass;
}
2024-12-08 15:08:00 +08:00
bool testString(string input) {
return input.length != 0;
}
bool testStringArray(string[] input) {
return !input.empty;
}
2024-09-08 02:48:54 +08:00
void createDir(string path) {
if (!exists(path)) {
mkdir(path);
}
}
2024-09-09 01:19:42 +08:00
void executeCommand(string[] cmd) {
2024-09-08 02:48:54 +08:00
auto process = spawnProcess(cmd);
if (wait(process) != 0) {
writeln("Error executing command: ", cmd.join(" "));
}
}
void executeCommandPipe(string[][] cmds) {
Pid[] pids;
scope(exit) {
foreach (pid; pids) {
wait(pid);
}
}
// pipe init
auto temp_pipe = pipe();
// process first
pids ~= spawnProcess(cmds[0], stdin, temp_pipe.writeEnd);
// process cmd2 ~ cmdN-1
for (int i = 1; i < cmds.length - 1; i++) {
auto new_pipe = pipe(); // create next pipe
pids ~= spawnProcess(cmds[i], temp_pipe.readEnd, new_pipe.writeEnd);
temp_pipe = new_pipe; // update the pipe
}
// process final, output to stdout
pids ~= spawnProcess(cmds[$-1], temp_pipe.readEnd, stdout);
}
string[] readArrFromFile(string filename) {
string[] arr;
try {
arr = filename.readText().splitter.array;
} catch (FileException ex) {
writeln("Error reading file: ", ex.msg);
} catch (Exception ex) {
writeln("Exception: ", ex.msg);
}
return arr;
}
string getBaseName(string ARG_R){
string ARG_R_extension = extension(ARG_R); // get extension
string baseNameRef = baseName(ARG_R, ARG_R_extension); //rm dir and extension
return baseNameRef;
}
string[] getRef(string ARG_R, string DirMap){
string baseNameRef = getBaseName(ARG_R);
string ARG_R_index = DirMap ~ "/index/" ~ baseNameRef; // bt2_index_base
string ARG_R_refer = ARG_R_index ~ ".fasta"; //reference_in fasta file
string[] Refs = [ARG_R_index, ARG_R_refer];
return Refs;
}
2024-09-09 00:04:36 +08:00
string[] getARG_G(string ARG_R){
string[] ARG_G;
// if ARG_G is empty
if (ARG_G.length == 0) {
auto file = File(ARG_R, "r");
ARG_G = file.byLine
.filter!(line => line.startsWith(">")) // flitering
.map!(line => line[1..$].idup) // convert to word
.array;
}
return ARG_G;
}
2024-09-09 10:25:32 +08:00
string getValueFromConfig(string file, string key) {
string content = readText(file);
string value;
auto regex = regex(key ~ r"\s*=\s*(.+)");
foreach (line; content.splitter("\n")) {
if (auto match = matchFirst(line, regex)) {
value = match.captures[1];
break;
}
}
return value;
}
void processQcTrim(string[] ARG_L, int ARG_T, string DirRaw, string DirQcTrim, string PathFastp) {
2024-09-08 02:48:54 +08:00
// Prepare directory
createDir(DirQcTrim);
2024-09-09 09:33:50 +08:00
writeln("QcTrimming::Start");
2024-09-08 02:48:54 +08:00
foreach (string file; ARG_L) {
string baseName = getBaseName(file);
string inputFileR1 = DirRaw ~ "/" ~ baseName ~ "_R1.fastq.gz";
string inputFileR2 = DirRaw ~ "/" ~ baseName ~ "_R2.fastq.gz";
string outputFileR1 = DirQcTrim ~ "/" ~ baseName ~ "_R1.fastq.gz";
string outputFileR2 = DirQcTrim ~ "/" ~ baseName ~ "_R2.fastq.gz";
string jsonFile = DirQcTrim ~ "/" ~ baseName ~ ".json";
string htmlFile = DirQcTrim ~ "/" ~ baseName ~ ".html";
// Perform quality control and trimming using external program `fastp`
2024-09-09 09:33:50 +08:00
string[] cmdQcTrim = [PathFastp, "-i", inputFileR1, "-I", inputFileR2,
2024-09-08 02:48:54 +08:00
"-o", outputFileR1, "-O", outputFileR2,
"-j", jsonFile, "-h", htmlFile,
"-w", ARG_T.to!string];
2024-09-09 09:33:50 +08:00
executeCommand(cmdQcTrim);
2024-09-08 02:48:54 +08:00
}
2024-09-09 09:33:50 +08:00
writeln("QcTrimming::End");
2024-09-08 02:48:54 +08:00
}
void processAssembly(string[] ARG_L, int ARG_M, int ARG_T, string DirQcTrim, string DirAssembly, string PathSpades){
writeln("Assembly::Start");
createDir(DirAssembly);
foreach (string file; ARG_L) {
string baseName = getBaseName(file);
string DirAss = DirAssembly ~ "/" ~ baseName;
createDir(DirAss);
string inputFileR1 = DirQcTrim ~ "/" ~ baseName ~ "_R1.fastq.gz";
string inputFileR2 = DirQcTrim ~ "/" ~ baseName ~ "_R2.fastq.gz";
string[] cmdAssembly = [PathSpades, "--pe1-1", inputFileR1, "--pe1-2", inputFileR2, "-t", ARG_T.to!string, "-m", ARG_M.to!string, "--careful", "--phred-offset", "33", "-o", DirAss];
executeCommand(cmdAssembly);
}
writeln("Assembly::End");
}
2024-10-23 15:26:22 +08:00
void processAssemMv(string[] ARG_L,string DirAssembly){
// Prepare
string DirAssemblySca = DirAssembly ~ "/" ~ "scaffolds";
string DirAssemblyCont = DirAssembly ~ "/" ~ "contigs";
writeln("Assembly_Move::Start");
createDir(DirAssemblySca);
createDir(DirAssemblyCont);
foreach (string file; ARG_L ){
string baseName = getBaseName(file);
string DirAssemblyInd = DirAssembly ~ "/" ~ baseName;
string inputSca = DirAssemblyInd ~ "/" ~ "scaffolds.fasta";
string inputCont = DirAssemblyInd ~ "/" ~ "contigs.fasta";
string outputSca = DirAssemblySca ~ "/" ~ baseName ~ ".fasta";
string outputCont = DirAssemblyCont ~ "/" ~ baseName ~ ".fasta";
if (!exists(inputSca)) {
writeln("File not found: ", inputSca);
continue;
} else {
copy(inputSca, outputSca);
}
if (!exists(inputCont)) {
writeln("File not found: ", inputCont);
continue;
} else {
copy(inputCont, outputCont);
}
}
writeln("Assembly_Move::End");
}
void processMappingDenovo(string[] ARG_L, string ARG_R, int ARG_T, string DirQcTrim, string DirAssembly, string DirMap, string PathBowtie2, string PathDiamond, string PathSamtools, string PathSortDiamond){
2024-09-14 13:50:09 +08:00
// Prepare directory
2024-10-23 15:26:22 +08:00
writeln("Mapping::Start");
2024-09-14 13:50:09 +08:00
createDir(DirMap);
createDir(DirMap ~ "/index");
string DirAssemblySca = DirAssembly ~ "/" ~ "scaffolds";
2024-09-14 14:51:22 +08:00
string DirAssemblyFas = DirAssembly ~ "/" ~ "fasta";
2024-09-14 13:50:09 +08:00
createDir(DirAssemblyFas);
2024-10-23 15:26:22 +08:00
string ARG_R_Base = getBaseName(ARG_R);
string ARG_R_Ref = DirAssemblyFas ~ "/" ~ ARG_R_Base ~ ".fasta";
copy(ARG_R, ARG_R_Ref);
string [] cmdDmMakeDB = [ PathDiamond, "makedb", "--db", "Reference", "--in", ARG_R_Ref];
2024-10-23 15:26:22 +08:00
executeCommand(cmdDmMakeDB);
string ReferDmnd = DirAssemblyFas ~ "/" ~ "Reference.dmnd";
2024-09-14 13:50:09 +08:00
string PathBowtie2_build = PathBowtie2 ~ "-build";
foreach (string file; ARG_L) {
string baseName = getBaseName(file);
string inputM8 = DirAssemblySca ~ "/" ~ baseName ~ ".m8";
string inputFasta = DirAssemblySca ~ "/" ~ baseName ~ ".fasta";
string outputSort = DirAssemblyFas ~ "/" ~ baseName ~ ".fasta";
string outputIndex = DirAssemblyFas ~ "/" ~ baseName;
string inputFileR1 = DirQcTrim ~ "/" ~ baseName ~ "_R1.fastq.gz";
string inputFileR2 = DirQcTrim ~ "/" ~ baseName ~ "_R2.fastq.gz";
string outputBam = DirMap ~ "/" ~ baseName ~ ".bam";
string[] cmdDiamond = [PathDiamond, "blastx", "-d", "Reference.dmnd", "-q", inputFasta, "-o", inputM8, "--outfmt", "6", "qseqid", "sseqid", "pident", "length", "mismatch", "gapopen", "qstart", "qend", "sstart", "send", "evalue", "bitscore", "qlen", "slen", "gaps", "ppos", "qframe", "qseq"];
string[] cmdSortDiamond = [PathSortDiamond, inputM8, outputSort];
2024-09-14 13:50:09 +08:00
string[] cmdBuildDB = [PathBowtie2_build, "--threads", ARG_T.to!string, outputSort, outputIndex];
string[] cmdMap = [PathBowtie2, "-x", outputIndex, "-1", inputFileR1, "-2", inputFileR2, "-p", ARG_T.to!string];
string[] cmdSam2Bam = [PathSamtools, "view", "-bS", "-@", ARG_T.to!string, "-o", outputBam];
executeCommand(cmdDiamond);
executeCommand(cmdSortDiamond);
executeCommand(cmdBuildDB);
executeCommandPipe([cmdMap, cmdSam2Bam]);
}
2024-10-23 15:26:22 +08:00
writeln("Mapping::End");
2024-09-14 13:50:09 +08:00
}
2024-09-09 09:33:50 +08:00
void processPostMap(string[] ARG_L, int ARG_T, string DirMap, string DirBam, string PathSamtools) {
2024-09-08 02:48:54 +08:00
createDir(DirBam);
writeln("PostMapping::Start");
foreach (string file; ARG_L) {
string baseName = getBaseName(file);
string inputBam = DirMap ~ "/" ~ baseName ~ ".bam";
string outputBam = DirBam ~ "/" ~ baseName ~ ".bam";
// Convert SAM to BAM, sort and remove duplicates using Samtools
2024-09-09 09:33:50 +08:00
string[] cmdFixmate = [PathSamtools, "fixmate", "-@", ARG_T.to!string, "-m", inputBam, "-"];
string[] cmdSort = [PathSamtools, "sort", "-@", ARG_T.to!string, "-"];
string[] cmdMarkdup = [PathSamtools, "markdup", "-@", ARG_T.to!string, "-", outputBam];
executeCommandPipe([cmdFixmate, cmdSort, cmdMarkdup]);
2024-09-09 09:33:50 +08:00
string [] cmdIndexBam = [PathSamtools, "index", "-@", ARG_T.to!string, outputBam];
executeCommand(cmdIndexBam);
2024-09-08 02:48:54 +08:00
}
writeln("PostMapping::End");
}
2024-09-14 13:50:09 +08:00
void processVarCallDenovo(string[] ARG_L, int ARG_T, string DirAssembly, string DirMap, string DirBam, string DirVcf, string PathBcftools) {
writeln("VarCalling::Start");
string DirAssemblyFas = DirAssembly ~ "/" ~ "fasta";
createDir(DirVcf);
foreach (string file; parallel(ARG_L, 1)) {
string baseName = getBaseName(file);
string inputBam = DirBam ~ "/" ~ baseName ~ ".bam";
string outputVcf = DirVcf ~ "/" ~ baseName ~ ".vcf.gz";
string referFasta = DirAssemblyFas ~ "/" ~ baseName ~ ".fasta";
// Variant calling using bcftools
string[] cmdPileup = [PathBcftools, "mpileup", "-Oz", "--threads", ARG_T.to!string, "-f", referFasta, inputBam];
string[] cmdVarCall = [PathBcftools, "call", "-mv", "-Oz", "--threads", ARG_T.to!string];
string[] cmdNorm = [PathBcftools, "norm", "--threads", ARG_T.to!string, "-f", referFasta, "-Oz"];
string[] cmdFilter = [PathBcftools, "filter", "--threads", ARG_T.to!string, "--IndelGap", "5", "-Oz", "-o", outputVcf];
executeCommandPipe([cmdPileup, cmdVarCall, cmdNorm, cmdFilter]);
}
writeln("VarCalling::End");
}
2024-09-08 02:48:54 +08:00
2024-09-15 11:48:19 +08:00
void processConDenovo(string[] ARG_G, string[] ARG_L, int ARG_T, string DirAssembly, string DirVcf, string DirConsensus, string PathBcftools) {
createDir(DirConsensus);
string DirConTaxa = DirConsensus ~ "/" ~ "taxa";
string DirAssemblyFas = DirAssembly ~ "/" ~ "fasta";
createDir(DirConTaxa);
writeln("Consensus::Start");
// Extract fasta from vcf file
foreach (string file; ARG_L) {
string baseName = getBaseName(file);
string inputVcf = DirVcf ~ "/" ~ baseName ~ ".vcf.gz";
string outputFasta = DirConTaxa ~ "/" ~ baseName ~ ".fasta";
string referFasta = DirAssemblyFas ~ "/" ~ baseName ~ ".fasta";
// index vcf.gz
string[] cmdIndexVcf = [PathBcftools, "index", inputVcf];
executeCommand(cmdIndexVcf);
// Generate consensus sequences using bcftools
string[] cmdCon = [PathBcftools, "consensus", "-f", referFasta, inputVcf, "-o", outputFasta];
executeCommand(cmdCon);
}
// Recombine the sequences based on genes
writeln("Consensus::End");
}
2024-09-09 09:33:50 +08:00
void processCombFasta(string[] ARG_G, string[] ARG_L, string DirConsensus) {
string DirConTaxa = DirConsensus ~ "/" ~ "taxa";
string DirConGene = DirConsensus ~ "/" ~ "gene";
2024-09-09 00:04:36 +08:00
createDir(DirConGene);
// create a dictory
string[string] geneSequences;
2024-09-09 00:04:36 +08:00
writeln("ConvertFasta::Start");
// read first
2024-09-14 13:50:09 +08:00
foreach (file; ARG_L) {
2024-09-09 00:04:36 +08:00
string inputFile = DirConTaxa ~ "/" ~ file ~ ".fasta";
if (!exists(inputFile)) {
writeln("File not found: ", inputFile);
continue;
}
string content = cast(string) readText(inputFile);
bool inSequence = false;
string currentGene;
foreach (line; content.splitter("\n")) {
if (line.empty) continue;
if (line[0] == '>') {
string header = line[1 .. $];
if (ARG_G.canFind(header)) {
currentGene = header;
geneSequences[currentGene] ~= ">" ~ file ~ "\n";
inSequence = true;
} else {
inSequence = false;
}
} else if (inSequence) {
geneSequences[currentGene] ~= line ~ "\n";
}
}
}
// write different files
2024-09-14 13:50:09 +08:00
foreach (gene; ARG_G) {
string outputFile = DirConGene ~ "/" ~ gene ~ ".fasta";
2024-09-09 00:04:36 +08:00
File output = File(outputFile, "w");
if (gene in geneSequences) {
output.write(geneSequences[gene]);
}
}
2024-09-09 00:04:36 +08:00
writeln("ConvertFasta::End");
}
void processAlign(string[] ARG_G, string DirConsensus, string DirAlign, string PathMacse){
2024-09-09 01:19:42 +08:00
string DirConGene = DirConsensus ~ "/" ~ "gene";
string DirAlignAA = DirAlign ~ "/" ~ "AA";
string DirAlignNT = DirAlign ~ "/" ~ "NT";
writeln("Align::Start");
createDir(DirAlign);
createDir(DirAlignAA);
createDir(DirAlignNT);
foreach (gene; parallel(ARG_G, 1)) {
string inputFasta = DirConGene ~ "/" ~ gene ~ ".fasta";
string outAA = DirAlignAA ~ "/" ~ gene ~ ".fasta";
string outNT = DirAlignNT ~ "/" ~ gene ~ ".fasta";
2024-09-09 09:33:50 +08:00
string[] cmdAlign = ["java", "-jar", PathMacse, "-prog", "alignSequences", "-seq" , inputFasta, "-out_AA", outAA, "-out_NT", outNT ];
executeCommand(cmdAlign);
2024-09-09 01:19:42 +08:00
}
writeln("Align::End");
}
2024-09-08 02:48:54 +08:00
2024-09-10 00:37:06 +08:00
void processTrimming(string[] ARG_G, string DirAlign, string DirTrim, string PathDelstop, string PathTrimal){
2024-09-10 16:43:18 +08:00
writeln("Trimming::Start");
2024-09-10 00:37:06 +08:00
string DirAA = DirAlign ~ "/" ~ "AA";
string DirNT = DirAlign ~ "/" ~ "NT";
2024-09-10 13:09:05 +08:00
string DirAA_out = DirAlign ~ "/" ~ "AA_out";
string DirNT_out = DirAlign ~ "/" ~ "NT_out";
2024-09-10 00:37:06 +08:00
2024-09-10 13:09:05 +08:00
createDir(DirAA_out);
createDir(DirNT_out);
2024-09-10 00:37:06 +08:00
// copy file firstly
2024-09-11 09:41:09 +08:00
foreach (gene; parallel(ARG_G,1)){
2024-09-10 00:37:06 +08:00
string inputFastaAA = DirAA ~ "/" ~ gene ~ ".fasta";
2024-09-10 13:09:05 +08:00
string outputFastaAA = DirAA_out ~ "/" ~ gene ~ ".fasta";
2024-09-10 00:37:06 +08:00
string inputFastaNT = DirNT ~ "/" ~ gene ~ ".fasta";
2024-09-10 13:09:05 +08:00
string outputFastaNT = DirNT_out ~ "/" ~ gene ~ ".fasta";
2024-09-10 00:37:06 +08:00
copy(inputFastaNT, outputFastaNT);
copy(inputFastaAA, outputFastaAA);
// del stop codon
string[] cmdDelStop = [PathDelstop, outputFastaAA, outputFastaNT, "--delete"];
executeCommand(cmdDelStop);
}
2024-09-10 13:09:05 +08:00
string DirTrimNT = DirTrim ~ "/" ~ "NT";
2024-09-10 00:37:06 +08:00
createDir(DirTrim);
2024-09-10 13:09:05 +08:00
createDir(DirTrimNT);
2024-09-11 09:41:09 +08:00
foreach (gene; parallel(ARG_G,1)){
2024-09-10 13:09:05 +08:00
string inputFastaAA = DirAA_out ~ "/" ~ gene ~ ".fasta";
string inputBackTransNT = DirNT_out ~ "/" ~ gene ~ ".fasta";
string outputFastaNT = DirTrimNT ~ "/" ~ gene ~ ".fasta";
2024-09-19 11:48:13 +08:00
if (exists(inputFastaAA) && exists(inputBackTransNT)) {
string[] cmdTrim = [PathTrimal, "-in", inputFastaAA, "-backtrans", inputBackTransNT, "-out", outputFastaNT, "-automated1"];
executeCommand(cmdTrim);
} else {
writeln("Skipping gene: ", gene, " as files are missing.");
}
2024-09-10 00:37:06 +08:00
}
2024-09-10 13:09:05 +08:00
writeln("Trimming::End");
2024-09-10 00:37:06 +08:00
}
2024-09-08 02:48:54 +08:00
void main(string[] args) {
string pkgver = "0.0.3";
2024-09-08 02:48:54 +08:00
string DirHome = std.file.getcwd();
string DirRaw = DirHome ~ "/00_raw";
string DirQcTrim = DirHome ~ "/01_fastp";
string DirMap = DirHome ~ "/02_bowtie2";
string DirAssembly = DirHome ~ "/03_spades";
string DirBam = DirHome ~ "/04_bam";
string DirVcf = DirHome ~ "/05_vcf";
string DirConsensus = DirHome ~ "/06_consen";
string DirConsensus1 = DirHome ~ "/07_consen1";
string DirAlign = DirHome ~ "/08_macse";
string DirTrim = DirHome ~ "/09_trimal";
2024-09-08 02:48:54 +08:00
2024-09-09 09:33:50 +08:00
string PathFastp = "/usr/bin/fastp";
string PathSpades = "/usr/bin/spades.py";
string PathDiamond = "/usr/bin/diamond";
string PathSortDiamond = "/usr/bin/sortdiamond";
2024-09-09 09:33:50 +08:00
string PathBowtie2 = "/usr/bin/bowtie2";
string PathSamtools = "/usr/bin/samtools";
string PathBcftools = "/usr/bin/bcftools";
2024-09-08 02:48:54 +08:00
string PathMacse = "/usr/share/java/macse.jar";
2024-09-10 00:37:06 +08:00
string PathDelstop = "/usr/bin/delstop";
2024-09-09 09:33:50 +08:00
string PathTrimal = "/usr/bin/trimal";
2024-09-08 02:48:54 +08:00
int ARG_T = 8;
2024-09-09 10:25:32 +08:00
int ARG_M = 16;
string[] ARG_G;
2024-09-08 02:48:54 +08:00
string[] ARG_L;
2024-09-09 09:33:50 +08:00
string ARG_C;
2024-09-08 02:48:54 +08:00
string ARG_F;
string ARG_R;
if (args.length > 1){
foreach (int i; 0 .. cast(int)args.length) {
switch (args[i]) {
2024-09-09 09:33:50 +08:00
case "-c", "--config":
i++;
ARG_C = args[i];
break;
2024-09-08 02:48:54 +08:00
case "-f", "--functions":
i++;
ARG_F = args[i];
break;
case "-g", "--gene":
i++;
ARG_G ~= readArrFromFile(args[i]);
break;
2024-09-08 02:48:54 +08:00
case "-h", "--help":
show_help(pkgver);
return;
case "-l", "--list":
i++;
ARG_L ~= readArrFromFile(args[i]);
2024-09-08 02:48:54 +08:00
break;
case "-r", "--reference":
i++;
ARG_R = args[i];
break;
case "-t", "--threads":
i++;
ARG_T = args[i].to!int;
break;
2024-09-09 09:45:55 +08:00
case "--fastp":
i++;
PathFastp = args[i];
break;
case "--spades":
i++;
PathSpades = args[i];
break;
case "--diamond":
i++;
PathDiamond = args[i];
break;
case "--sortdiamond":
i++;
PathSortDiamond = args[i];
break;
2024-09-09 09:45:55 +08:00
case "--bowtie2":
i++;
PathBowtie2 = args[i];
break;
case "--samtools":
i++;
PathSamtools = args[i];
break;
case "--bcftools":
i++;
PathBcftools = args[i];
break;
2024-09-08 02:48:54 +08:00
case "--macse":
i++;
PathMacse = args[i];
break;
2024-09-09 09:45:55 +08:00
case "--trimal":
i++;
PathTrimal = args[i];
break;
2024-09-10 13:09:05 +08:00
case "--delstop":
i++;
PathDelstop = args[i];
break;
2024-09-08 02:48:54 +08:00
default:
break;
}
}
} else {
show_help(pkgver);
return;
}
2024-09-09 00:04:36 +08:00
// get gene from ARG_R reference fasta
if (ARG_R.length != 0 ){
ARG_G = getARG_G(ARG_R);
}
2024-09-09 09:33:50 +08:00
// get pathXXX form config file
if (ARG_C != ""){
2024-09-09 00:04:36 +08:00
2024-09-09 09:33:50 +08:00
PathFastp = getValueFromConfig(ARG_C, "fastp");
PathSpades = getValueFromConfig(ARG_C, "spades");
PathDiamond = getValueFromConfig(ARG_C, "diamond");
PathSortDiamond = getValueFromConfig(ARG_C, "sortdiamond");
2024-09-09 09:33:50 +08:00
PathBowtie2 = getValueFromConfig(ARG_C, "bowtie2");
PathSamtools = getValueFromConfig(ARG_C, "samtools");
PathBcftools = getValueFromConfig(ARG_C, "bcftools");
PathMacse = getValueFromConfig(ARG_C, "macse");
PathDelstop = getValueFromConfig(ARG_C, "delstop");
2024-09-09 09:33:50 +08:00
PathTrimal = getValueFromConfig(ARG_C, "trimal");
}
2024-09-08 02:48:54 +08:00
writeln("RGBEPP::Start");
// Perform steps based on provided function argument
if (ARG_F == "all" || ARG_F == "clean") {
2024-12-08 15:08:00 +08:00
if(testFiles([PathFastp]) && testStringArray(ARG_L)){
processQcTrim(ARG_L, ARG_T, DirRaw, DirQcTrim, PathFastp); //ARG_L
} else {
throw new Exception("please confirm paramenters are correct");
}
2024-09-09 10:25:32 +08:00
}
2024-10-23 15:26:22 +08:00
if (ARG_F == "all" || ARG_F == "assembly") {
2024-12-08 15:08:00 +08:00
if(testFiles([PathSpades]) && testStringArray(ARG_L)){
processAssembly(ARG_L, ARG_M, ARG_T, DirQcTrim, DirAssembly, PathSpades); //ARG_L
2024-10-23 15:26:22 +08:00
processAssemMv(ARG_L, DirAssembly);
2024-12-08 15:08:00 +08:00
} else {
throw new Exception("please confirm paramenters are correct");
}
2024-09-08 02:48:54 +08:00
}
if (ARG_F == "all" || ARG_F == "map") {
2024-12-08 15:08:00 +08:00
if(testFiles([PathBowtie2, PathDiamond, PathSamtools, PathSortDiamond]) && testStringArray(ARG_L) && testString(ARG_R) ){
processMappingDenovo(ARG_L, ARG_R, ARG_T, DirQcTrim, DirAssembly, DirMap, PathBowtie2, PathDiamond, PathSamtools, PathSortDiamond); //ARG_L, ARG_R
} else {
throw new Exception("please confirm paramenters are correct");
}
2024-09-08 02:48:54 +08:00
}
if (ARG_F == "all" || ARG_F == "postmap") {
2024-12-08 15:08:00 +08:00
if(testFiles([PathSamtools]) && testStringArray(ARG_L) ){
processPostMap(ARG_L, ARG_T, DirMap, DirBam, PathSamtools); //ARG_L
} else {
throw new Exception("please confirm paramenters are correct");
}
2024-09-08 02:48:54 +08:00
}
if (ARG_F == "all" || ARG_F == "varcall") {
2024-12-08 15:08:00 +08:00
if(testFiles([PathBcftools]) && testStringArray(ARG_L) ){
processVarCallDenovo(ARG_L, ARG_T, DirAssembly, DirMap, DirBam, DirVcf, PathBcftools); //ARG_L
} else {
throw new Exception("please confirm paramenters are correct");
}
2024-09-08 02:48:54 +08:00
}
if (ARG_F == "all" || ARG_F == "consen") {
2024-12-08 15:08:00 +08:00
if(testFiles([PathBcftools]) && testStringArray(ARG_L) && testStringArray(ARG_G) ){
processConDenovo(ARG_G, ARG_L, ARG_T, DirAssembly, DirVcf, DirConsensus, PathBcftools); //ARG_G ARG_L
processCombFasta(ARG_G, ARG_L, DirConsensus); //ARG_G ARG_L
} else {
throw new Exception("please confirm paramenters are correct");
}
2024-09-08 02:48:54 +08:00
}
if (ARG_F == "all" || ARG_F == "align") {
2024-12-08 15:08:00 +08:00
if(testFiles([PathMacse]) && testJava && testStringArray(ARG_G)){
processAlign(ARG_G, DirConsensus, DirAlign, PathMacse); //ARG_G
} else {
throw new Exception("please confirm paramenters are correct");
}
}
2024-09-08 02:48:54 +08:00
2024-09-10 00:37:06 +08:00
if (ARG_F == "all" || ARG_F == "trim") {
2024-12-08 15:08:00 +08:00
if(testFiles([PathTrimal]) && testStringArray(ARG_G) ){
processTrimming(ARG_G, DirAlign, DirTrim, PathDelstop, PathTrimal); //ARG_G
} else {
throw new Exception("please confirm paramenters are correct");
}
2024-09-10 00:37:06 +08:00
}
2024-09-09 10:25:32 +08:00
2024-09-08 02:48:54 +08:00
writeln("RGBEPP::End");
}