polish: use buildPath instead of /; add: codon only
This commit is contained in:
parent
78d7aad869
commit
4d3b5c2001
3 changed files with 204 additions and 445 deletions
275
RGBEPP.d
275
RGBEPP.d
|
@ -25,13 +25,15 @@ void show_help(string pkgver) {
|
||||||
-m\t--memory\tmemory settings (optional, default 16 GB)
|
-m\t--memory\tmemory settings (optional, default 16 GB)
|
||||||
-r\t--reference\treference genome path
|
-r\t--reference\treference genome path
|
||||||
-t\t--threads\tthreads setting (optional, default 8 threads)
|
-t\t--threads\tthreads setting (optional, default 8 threads)
|
||||||
|
--codon\t\tOnly use the codon region (optional)
|
||||||
--fastp\t\tFastp path (optional)
|
--fastp\t\tFastp path (optional)
|
||||||
--spades\t\tSpades python path (optional)
|
--spades\t\tSpades python path (optional)
|
||||||
--diamond\t\tDiamond python path (optional)
|
--diamond\t\tDiamond python path (optional)
|
||||||
--sortdiamond\t\tSortDiamond python path (optional)
|
--sortdiamond\tSortDiamond python path (optional)
|
||||||
--bowtie2\t\tBowtie2 path (optional)
|
--bowtie2\t\tBowtie2 path (optional)
|
||||||
--samtools\t\tSamtools path (optional)
|
--samtools\t\tSamtools path (optional)
|
||||||
--bcftools\t\tBcftools path (optional)
|
--bcftools\t\tBcftools path (optional)
|
||||||
|
--exonerate\t\tExonerate path (optional)
|
||||||
--macse\t\tMacse jarfile path (optional)
|
--macse\t\tMacse jarfile path (optional)
|
||||||
--delstop\t\tDelstop path (optional)
|
--delstop\t\tDelstop path (optional)
|
||||||
--trimal\t\tTrimal path (optional)
|
--trimal\t\tTrimal path (optional)
|
||||||
|
@ -73,6 +75,15 @@ void createDir(string path) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void moveDir (string oldPath, string newPath) {
|
||||||
|
try {
|
||||||
|
rename(oldPath, newPath);
|
||||||
|
writeln("Directory renamed successfully.");
|
||||||
|
} catch (Exception e) {
|
||||||
|
writeln("Error renaming directory: ", e.msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void executeCommand(string[] cmd) {
|
void executeCommand(string[] cmd) {
|
||||||
auto process = spawnProcess(cmd);
|
auto process = spawnProcess(cmd);
|
||||||
|
|
||||||
|
@ -81,6 +92,27 @@ void executeCommand(string[] cmd) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void executeCommandToFile(string[] cmd, string outputFile) {
|
||||||
|
// Create a pipe for the command's output
|
||||||
|
auto pipe = pipe();
|
||||||
|
|
||||||
|
// Spawn the process
|
||||||
|
auto pid = spawnProcess(cmd, stdin, pipe.writeEnd);
|
||||||
|
|
||||||
|
// Close the write end of the pipe to signal EOF
|
||||||
|
pipe.writeEnd.close();
|
||||||
|
|
||||||
|
// Read the output from the pipe
|
||||||
|
auto output = cast(string) pipe.readEnd.byChunk(4096).joiner.array;
|
||||||
|
|
||||||
|
// Wait for the process to finish
|
||||||
|
wait(pid);
|
||||||
|
|
||||||
|
// Write the output to the specified file
|
||||||
|
std.file.write(outputFile, cast(ubyte[])output);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void executeCommandPipe(string[][] cmds) {
|
void executeCommandPipe(string[][] cmds) {
|
||||||
|
|
||||||
Pid[] pids;
|
Pid[] pids;
|
||||||
|
@ -128,7 +160,7 @@ string getBaseName(string ARG_R){
|
||||||
|
|
||||||
string[] getRef(string ARG_R, string DirMap){
|
string[] getRef(string ARG_R, string DirMap){
|
||||||
string baseNameRef = getBaseName(ARG_R);
|
string baseNameRef = getBaseName(ARG_R);
|
||||||
string ARG_R_index = DirMap ~ "/index/" ~ baseNameRef; // bt2_index_base
|
string ARG_R_index = buildPath(DirMap, "index", baseNameRef); // bt2_index_base
|
||||||
string ARG_R_refer = ARG_R_index ~ ".fasta"; //reference_in fasta file
|
string ARG_R_refer = ARG_R_index ~ ".fasta"; //reference_in fasta file
|
||||||
string[] Refs = [ARG_R_index, ARG_R_refer];
|
string[] Refs = [ARG_R_index, ARG_R_refer];
|
||||||
return Refs;
|
return Refs;
|
||||||
|
@ -168,12 +200,12 @@ void processQcTrim(string[] ARG_L, int ARG_T, string DirRaw, string DirQcTrim, s
|
||||||
writeln("QcTrimming::Start");
|
writeln("QcTrimming::Start");
|
||||||
foreach (string file; ARG_L) {
|
foreach (string file; ARG_L) {
|
||||||
string baseName = getBaseName(file);
|
string baseName = getBaseName(file);
|
||||||
string inputFileR1 = DirRaw ~ "/" ~ baseName ~ "_R1.fastq.gz";
|
string inputFileR1 = buildPath(DirRaw, baseName ~ "_R1.fastq.gz");
|
||||||
string inputFileR2 = DirRaw ~ "/" ~ baseName ~ "_R2.fastq.gz";
|
string inputFileR2 = buildPath(DirRaw, baseName ~ "_R2.fastq.gz");
|
||||||
string outputFileR1 = DirQcTrim ~ "/" ~ baseName ~ "_R1.fastq.gz";
|
string outputFileR1 = buildPath(DirQcTrim, baseName ~ "_R1.fastq.gz");
|
||||||
string outputFileR2 = DirQcTrim ~ "/" ~ baseName ~ "_R2.fastq.gz";
|
string outputFileR2 = buildPath(DirQcTrim, baseName ~ "_R2.fastq.gz");
|
||||||
string jsonFile = DirQcTrim ~ "/" ~ baseName ~ ".json";
|
string jsonFile = buildPath(DirQcTrim, baseName ~ ".json");
|
||||||
string htmlFile = DirQcTrim ~ "/" ~ baseName ~ ".html";
|
string htmlFile = buildPath(DirQcTrim, baseName ~ ".html");
|
||||||
|
|
||||||
// Perform quality control and trimming using external program `fastp`
|
// Perform quality control and trimming using external program `fastp`
|
||||||
string[] cmdQcTrim = [PathFastp, "-i", inputFileR1, "-I", inputFileR2,
|
string[] cmdQcTrim = [PathFastp, "-i", inputFileR1, "-I", inputFileR2,
|
||||||
|
@ -190,10 +222,10 @@ void processAssembly(string[] ARG_L, int ARG_M, int ARG_T, string DirQcTrim, str
|
||||||
createDir(DirAssembly);
|
createDir(DirAssembly);
|
||||||
foreach (string file; ARG_L) {
|
foreach (string file; ARG_L) {
|
||||||
string baseName = getBaseName(file);
|
string baseName = getBaseName(file);
|
||||||
string DirAss = DirAssembly ~ "/" ~ baseName;
|
string DirAss = buildPath(DirAssembly, baseName);
|
||||||
createDir(DirAss);
|
createDir(DirAss);
|
||||||
string inputFileR1 = DirQcTrim ~ "/" ~ baseName ~ "_R1.fastq.gz";
|
string inputFileR1 = buildPath(DirQcTrim, baseName ~ "_R1.fastq.gz");
|
||||||
string inputFileR2 = DirQcTrim ~ "/" ~ baseName ~ "_R2.fastq.gz";
|
string inputFileR2 = buildPath(DirQcTrim, baseName ~ "_R2.fastq.gz");
|
||||||
string[] cmdAssembly = [PathSpades, "--pe1-1", inputFileR1, "--pe1-2", inputFileR2, "-t", ARG_T.to!string, "-m", ARG_M.to!string, "--careful", "--phred-offset", "33", "-o", DirAss];
|
string[] cmdAssembly = [PathSpades, "--pe1-1", inputFileR1, "--pe1-2", inputFileR2, "-t", ARG_T.to!string, "-m", ARG_M.to!string, "--careful", "--phred-offset", "33", "-o", DirAss];
|
||||||
executeCommand(cmdAssembly);
|
executeCommand(cmdAssembly);
|
||||||
}
|
}
|
||||||
|
@ -202,18 +234,18 @@ void processAssembly(string[] ARG_L, int ARG_M, int ARG_T, string DirQcTrim, str
|
||||||
|
|
||||||
void processAssemMv(string[] ARG_L,string DirAssembly){
|
void processAssemMv(string[] ARG_L,string DirAssembly){
|
||||||
// Prepare
|
// Prepare
|
||||||
string DirAssemblySca = DirAssembly ~ "/" ~ "scaffolds";
|
string DirAssemblySca = buildPath(DirAssembly, "scaffolds");
|
||||||
string DirAssemblyCont = DirAssembly ~ "/" ~ "contigs";
|
string DirAssemblyCont = buildPath(DirAssembly, "contigs");
|
||||||
writeln("Assembly_Move::Start");
|
writeln("Assembly_Move::Start");
|
||||||
createDir(DirAssemblySca);
|
createDir(DirAssemblySca);
|
||||||
createDir(DirAssemblyCont);
|
createDir(DirAssemblyCont);
|
||||||
foreach (string file; ARG_L ){
|
foreach (string file; ARG_L ){
|
||||||
string baseName = getBaseName(file);
|
string baseName = getBaseName(file);
|
||||||
string DirAssemblyInd = DirAssembly ~ "/" ~ baseName;
|
string DirAssemblyInd = buildPath(DirAssembly, baseName);
|
||||||
string inputSca = DirAssemblyInd ~ "/" ~ "scaffolds.fasta";
|
string inputSca = buildPath(DirAssemblyInd, "scaffolds.fasta");
|
||||||
string inputCont = DirAssemblyInd ~ "/" ~ "contigs.fasta";
|
string inputCont = buildPath(DirAssemblyInd, "contigs.fasta");
|
||||||
string outputSca = DirAssemblySca ~ "/" ~ baseName ~ ".fasta";
|
string outputSca = buildPath(DirAssemblySca, baseName ~ ".fasta");
|
||||||
string outputCont = DirAssemblyCont ~ "/" ~ baseName ~ ".fasta";
|
string outputCont = buildPath(DirAssemblyCont, baseName ~ ".fasta");
|
||||||
if (!exists(inputSca)) {
|
if (!exists(inputSca)) {
|
||||||
writeln("File not found: ", inputSca);
|
writeln("File not found: ", inputSca);
|
||||||
continue;
|
continue;
|
||||||
|
@ -234,28 +266,28 @@ void processMappingDenovo(string[] ARG_L, string ARG_R, int ARG_T, string DirQcT
|
||||||
// Prepare directory
|
// Prepare directory
|
||||||
writeln("Mapping::Start");
|
writeln("Mapping::Start");
|
||||||
createDir(DirMap);
|
createDir(DirMap);
|
||||||
createDir(DirMap ~ "/index");
|
createDir(buildPath(DirMap, "index"));
|
||||||
string DirAssemblySca = DirAssembly ~ "/" ~ "scaffolds";
|
string DirAssemblySca = buildPath(DirAssembly, "scaffolds");
|
||||||
string DirAssemblyFas = DirAssembly ~ "/" ~ "fasta";
|
string DirAssemblyFas = buildPath(DirAssembly, "fasta");
|
||||||
createDir(DirAssemblyFas);
|
createDir(DirAssemblyFas);
|
||||||
|
|
||||||
string ARG_R_Base = getBaseName(ARG_R);
|
string ARG_R_Base = getBaseName(ARG_R);
|
||||||
string ARG_R_Ref = DirAssemblyFas ~ "/" ~ ARG_R_Base ~ ".fasta";
|
string ARG_R_Ref = buildPath(DirAssemblyFas, ARG_R_Base ~ ".fasta");
|
||||||
copy(ARG_R, ARG_R_Ref);
|
copy(ARG_R, ARG_R_Ref);
|
||||||
string [] cmdDmMakeDB = [ PathDiamond, "makedb", "--db", "Reference", "--in", ARG_R_Ref];
|
string [] cmdDmMakeDB = [ PathDiamond, "makedb", "--db", "Reference", "--in", ARG_R_Ref];
|
||||||
executeCommand(cmdDmMakeDB);
|
executeCommand(cmdDmMakeDB);
|
||||||
string ReferDmnd = DirAssemblyFas ~ "/" ~ "Reference.dmnd";
|
string ReferDmnd = buildPath(DirAssemblyFas, "Reference.dmnd");
|
||||||
string PathBowtie2_build = PathBowtie2 ~ "-build";
|
string PathBowtie2_build = PathBowtie2 ~ "-build";
|
||||||
|
|
||||||
foreach (string file; ARG_L) {
|
foreach (string file; ARG_L) {
|
||||||
string baseName = getBaseName(file);
|
string baseName = getBaseName(file);
|
||||||
string inputM8 = DirAssemblySca ~ "/" ~ baseName ~ ".m8";
|
string inputM8 = buildPath(DirAssemblySca, baseName ~ ".m8");
|
||||||
string inputFasta = DirAssemblySca ~ "/" ~ baseName ~ ".fasta";
|
string inputFasta = buildPath(DirAssemblySca, baseName ~ ".fasta");
|
||||||
string outputSort = DirAssemblyFas ~ "/" ~ baseName ~ ".fasta";
|
string outputSort = buildPath(DirAssemblyFas, baseName ~ ".fasta");
|
||||||
string outputIndex = DirAssemblyFas ~ "/" ~ baseName;
|
string outputIndex = buildPath(DirAssemblyFas, baseName);
|
||||||
string inputFileR1 = DirQcTrim ~ "/" ~ baseName ~ "_R1.fastq.gz";
|
string inputFileR1 = buildPath(DirQcTrim, baseName ~ "_R1.fastq.gz");
|
||||||
string inputFileR2 = DirQcTrim ~ "/" ~ baseName ~ "_R2.fastq.gz";
|
string inputFileR2 = buildPath(DirQcTrim, baseName ~ "_R2.fastq.gz");
|
||||||
string outputBam = DirMap ~ "/" ~ baseName ~ ".bam";
|
string outputBam = buildPath(DirMap, baseName ~ ".bam");
|
||||||
|
|
||||||
string[] cmdDiamond = [PathDiamond, "blastx", "-d", "Reference.dmnd", "-q", inputFasta, "-o", inputM8, "--outfmt", "6", "qseqid", "sseqid", "pident", "length", "mismatch", "gapopen", "qstart", "qend", "sstart", "send", "evalue", "bitscore", "qlen", "slen", "gaps", "ppos", "qframe", "qseq"];
|
string[] cmdDiamond = [PathDiamond, "blastx", "-d", "Reference.dmnd", "-q", inputFasta, "-o", inputM8, "--outfmt", "6", "qseqid", "sseqid", "pident", "length", "mismatch", "gapopen", "qstart", "qend", "sstart", "send", "evalue", "bitscore", "qlen", "slen", "gaps", "ppos", "qframe", "qseq"];
|
||||||
string[] cmdSortDiamond = [PathSortDiamond, inputM8, outputSort];
|
string[] cmdSortDiamond = [PathSortDiamond, inputM8, outputSort];
|
||||||
|
@ -278,8 +310,8 @@ void processPostMap(string[] ARG_L, int ARG_T, string DirMap, string DirBam, str
|
||||||
|
|
||||||
foreach (string file; ARG_L) {
|
foreach (string file; ARG_L) {
|
||||||
string baseName = getBaseName(file);
|
string baseName = getBaseName(file);
|
||||||
string inputBam = DirMap ~ "/" ~ baseName ~ ".bam";
|
string inputBam = buildPath(DirMap, baseName ~ ".bam");
|
||||||
string outputBam = DirBam ~ "/" ~ baseName ~ ".bam";
|
string outputBam = buildPath(DirBam, baseName ~ ".bam");
|
||||||
|
|
||||||
// Convert SAM to BAM, sort and remove duplicates using Samtools
|
// Convert SAM to BAM, sort and remove duplicates using Samtools
|
||||||
string[] cmdFixmate = [PathSamtools, "fixmate", "-@", ARG_T.to!string, "-m", inputBam, "-"];
|
string[] cmdFixmate = [PathSamtools, "fixmate", "-@", ARG_T.to!string, "-m", inputBam, "-"];
|
||||||
|
@ -297,14 +329,14 @@ void processPostMap(string[] ARG_L, int ARG_T, string DirMap, string DirBam, str
|
||||||
void processVarCallDenovo(string[] ARG_L, int ARG_T, string DirAssembly, string DirMap, string DirBam, string DirVcf, string PathBcftools) {
|
void processVarCallDenovo(string[] ARG_L, int ARG_T, string DirAssembly, string DirMap, string DirBam, string DirVcf, string PathBcftools) {
|
||||||
writeln("VarCalling::Start");
|
writeln("VarCalling::Start");
|
||||||
|
|
||||||
string DirAssemblyFas = DirAssembly ~ "/" ~ "fasta";
|
string DirAssemblyFas = buildPath(DirAssembly, "fasta");
|
||||||
createDir(DirVcf);
|
createDir(DirVcf);
|
||||||
|
|
||||||
foreach (string file; parallel(ARG_L, 1)) {
|
foreach (string file; parallel(ARG_L, 1)) {
|
||||||
string baseName = getBaseName(file);
|
string baseName = getBaseName(file);
|
||||||
string inputBam = DirBam ~ "/" ~ baseName ~ ".bam";
|
string inputBam = buildPath(DirBam, baseName ~ ".bam");
|
||||||
string outputVcf = DirVcf ~ "/" ~ baseName ~ ".vcf.gz";
|
string outputVcf = buildPath(DirVcf, baseName ~ ".vcf.gz");
|
||||||
string referFasta = DirAssemblyFas ~ "/" ~ baseName ~ ".fasta";
|
string referFasta = buildPath(DirAssemblyFas, baseName ~ ".fasta");
|
||||||
// Variant calling using bcftools
|
// Variant calling using bcftools
|
||||||
string[] cmdPileup = [PathBcftools, "mpileup", "-Oz", "--threads", ARG_T.to!string, "-f", referFasta, inputBam];
|
string[] cmdPileup = [PathBcftools, "mpileup", "-Oz", "--threads", ARG_T.to!string, "-f", referFasta, inputBam];
|
||||||
string[] cmdVarCall = [PathBcftools, "call", "-mv", "-Oz", "--threads", ARG_T.to!string];
|
string[] cmdVarCall = [PathBcftools, "call", "-mv", "-Oz", "--threads", ARG_T.to!string];
|
||||||
|
@ -321,17 +353,17 @@ void processVarCallDenovo(string[] ARG_L, int ARG_T, string DirAssembly, string
|
||||||
void processConDenovo(string[] ARG_G, string[] ARG_L, int ARG_T, string DirAssembly, string DirVcf, string DirConsensus, string PathBcftools) {
|
void processConDenovo(string[] ARG_G, string[] ARG_L, int ARG_T, string DirAssembly, string DirVcf, string DirConsensus, string PathBcftools) {
|
||||||
createDir(DirConsensus);
|
createDir(DirConsensus);
|
||||||
|
|
||||||
string DirConTaxa = DirConsensus ~ "/" ~ "taxa";
|
string DirConTaxa = buildPath(DirConsensus, "taxa");
|
||||||
string DirAssemblyFas = DirAssembly ~ "/" ~ "fasta";
|
string DirAssemblyFas = buildPath(DirAssembly, "fasta");
|
||||||
createDir(DirConTaxa);
|
createDir(DirConTaxa);
|
||||||
|
|
||||||
writeln("Consensus::Start");
|
writeln("Consensus::Start");
|
||||||
// Extract fasta from vcf file
|
// Extract fasta from vcf file
|
||||||
foreach (string file; ARG_L) {
|
foreach (string file; ARG_L) {
|
||||||
string baseName = getBaseName(file);
|
string baseName = getBaseName(file);
|
||||||
string inputVcf = DirVcf ~ "/" ~ baseName ~ ".vcf.gz";
|
string inputVcf = buildPath(DirVcf, baseName ~ ".vcf.gz");
|
||||||
string outputFasta = DirConTaxa ~ "/" ~ baseName ~ ".fasta";
|
string outputFasta = buildPath(DirConTaxa, baseName ~ ".fasta");
|
||||||
string referFasta = DirAssemblyFas ~ "/" ~ baseName ~ ".fasta";
|
string referFasta = buildPath(DirAssemblyFas, baseName ~ ".fasta");
|
||||||
// index vcf.gz
|
// index vcf.gz
|
||||||
string[] cmdIndexVcf = [PathBcftools, "index", inputVcf];
|
string[] cmdIndexVcf = [PathBcftools, "index", inputVcf];
|
||||||
executeCommand(cmdIndexVcf);
|
executeCommand(cmdIndexVcf);
|
||||||
|
@ -347,8 +379,8 @@ void processConDenovo(string[] ARG_G, string[] ARG_L, int ARG_T, string DirAssem
|
||||||
|
|
||||||
void processCombFasta(string[] ARG_G, string[] ARG_L, string DirConsensus) {
|
void processCombFasta(string[] ARG_G, string[] ARG_L, string DirConsensus) {
|
||||||
|
|
||||||
string DirConTaxa = DirConsensus ~ "/" ~ "taxa";
|
string DirConTaxa = buildPath(DirConsensus, "taxa");
|
||||||
string DirConGene = DirConsensus ~ "/" ~ "gene";
|
string DirConGene = buildPath(DirConsensus, "gene");
|
||||||
createDir(DirConGene);
|
createDir(DirConGene);
|
||||||
|
|
||||||
// create a dictory
|
// create a dictory
|
||||||
|
@ -357,7 +389,7 @@ void processCombFasta(string[] ARG_G, string[] ARG_L, string DirConsensus) {
|
||||||
writeln("ConvertFasta::Start");
|
writeln("ConvertFasta::Start");
|
||||||
// read first
|
// read first
|
||||||
foreach (file; ARG_L) {
|
foreach (file; ARG_L) {
|
||||||
string inputFile = DirConTaxa ~ "/" ~ file ~ ".fasta";
|
string inputFile = buildPath(DirConTaxa, file ~ ".fasta");
|
||||||
if (!exists(inputFile)) {
|
if (!exists(inputFile)) {
|
||||||
writeln("File not found: ", inputFile);
|
writeln("File not found: ", inputFile);
|
||||||
continue;
|
continue;
|
||||||
|
@ -385,7 +417,7 @@ void processCombFasta(string[] ARG_G, string[] ARG_L, string DirConsensus) {
|
||||||
}
|
}
|
||||||
// write different files
|
// write different files
|
||||||
foreach (gene; ARG_G) {
|
foreach (gene; ARG_G) {
|
||||||
string outputFile = DirConGene ~ "/" ~ gene ~ ".fasta";
|
string outputFile = buildPath(DirConGene, gene ~ ".fasta");
|
||||||
File output = File(outputFile, "w");
|
File output = File(outputFile, "w");
|
||||||
if (gene in geneSequences) {
|
if (gene in geneSequences) {
|
||||||
output.write(geneSequences[gene]);
|
output.write(geneSequences[gene]);
|
||||||
|
@ -394,20 +426,99 @@ void processCombFasta(string[] ARG_G, string[] ARG_L, string DirConsensus) {
|
||||||
writeln("ConvertFasta::End");
|
writeln("ConvertFasta::End");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void splitFasta(const string inputFasta) {
|
||||||
|
File infile;
|
||||||
|
try {
|
||||||
|
infile = File(inputFasta, "r");
|
||||||
|
} catch (FileException e) {
|
||||||
|
stderr.writeln("Error: Unable to open input file ", inputFasta);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
string line;
|
||||||
|
string seqName;
|
||||||
|
File outfile;
|
||||||
|
bool in_sequence = false;
|
||||||
|
|
||||||
|
foreach (lineContent; infile.byLine()) {
|
||||||
|
if (lineContent.empty) continue;
|
||||||
|
|
||||||
|
if (lineContent[0] == '>') {
|
||||||
|
// New sequence header
|
||||||
|
if (in_sequence) { // if found new sequence, close
|
||||||
|
outfile.close(); // previous output file
|
||||||
|
}
|
||||||
|
seqName = cast(string)lineContent[1 .. $]; // Remove '>'
|
||||||
|
string outputFile = seqName ~ ".fasta"; // suitable to many os
|
||||||
|
outfile = File(outputFile, "w");
|
||||||
|
outfile.writeln(">", getBaseName(inputFasta));
|
||||||
|
// will enter sequence
|
||||||
|
in_sequence = true;
|
||||||
|
} else if (in_sequence) {
|
||||||
|
// Inside sequence content
|
||||||
|
outfile.writeln(lineContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_sequence) {
|
||||||
|
outfile.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (infile.eof) {
|
||||||
|
writeln("Sequences have been split into individual files.");
|
||||||
|
} else {
|
||||||
|
stderr.writeln("Error occurred while reading file.");
|
||||||
|
}
|
||||||
|
|
||||||
|
infile.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
void processCodon(string[] ARG_G, string ARG_R, string DirConsensus, string PathExonerate){
|
||||||
|
|
||||||
|
string DirConGene = buildPath(DirConsensus , "gene");
|
||||||
|
|
||||||
|
string ARG_R_Base = getBaseName(ARG_R);
|
||||||
|
string ARG_R_Ref = buildPath(DirConsensus, ARG_R_Base ~ ".fasta");
|
||||||
|
copy(ARG_R, ARG_R_Ref);
|
||||||
|
splitFasta(ARG_R_Ref);
|
||||||
|
|
||||||
|
moveDir(DirConGene, DirConGene ~ "_bak");
|
||||||
|
|
||||||
|
writeln("GetCodon::Start");
|
||||||
|
|
||||||
|
foreach (gene; ARG_G) {
|
||||||
|
string inputFile = buildPath(DirConGene ~ "_bak", gene ~ ".fasta");
|
||||||
|
string outputFile = buildPath(DirConGene, gene ~ ".fasta");
|
||||||
|
string referFile = buildPath(DirConsensus, gene ~ ".fasta");
|
||||||
|
if (!exists(inputFile)) {
|
||||||
|
writeln("File not found: ", inputFile);
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
string[] cmdExonerate = [PathExonerate, inputFile, referFile, "--showalignment", "no", "--showvulgar", "no", "--showtargetgff", "no", "--ryo", "\">%qi\n%qcs\n\"", "--verbose", "0"];
|
||||||
|
executeCommandToFile(cmdExonerate, outputFile);
|
||||||
|
}
|
||||||
|
std.file.remove(referFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
rmdirRecurse(DirConGene ~ "_bak");
|
||||||
|
|
||||||
|
writeln("GetCodon::End");
|
||||||
|
}
|
||||||
|
|
||||||
void processAlign(string[] ARG_G, string DirConsensus, string DirAlign, string PathMacse){
|
void processAlign(string[] ARG_G, string DirConsensus, string DirAlign, string PathMacse){
|
||||||
|
|
||||||
string DirConGene = DirConsensus ~ "/" ~ "gene";
|
string DirConGene = buildPath(DirConsensus, "gene");
|
||||||
string DirAlignAA = DirAlign ~ "/" ~ "AA";
|
string DirAlignAA = buildPath(DirAlign, "AA");
|
||||||
string DirAlignNT = DirAlign ~ "/" ~ "NT";
|
string DirAlignNT = buildPath(DirAlign, "NT");
|
||||||
|
|
||||||
writeln("Align::Start");
|
writeln("Align::Start");
|
||||||
createDir(DirAlign);
|
createDir(DirAlign);
|
||||||
createDir(DirAlignAA);
|
createDir(DirAlignAA);
|
||||||
createDir(DirAlignNT);
|
createDir(DirAlignNT);
|
||||||
foreach (gene; parallel(ARG_G, 1)) {
|
foreach (gene; parallel(ARG_G, 1)) {
|
||||||
string inputFasta = DirConGene ~ "/" ~ gene ~ ".fasta";
|
string inputFasta = buildPath(DirConGene, gene ~ ".fasta");
|
||||||
string outAA = DirAlignAA ~ "/" ~ gene ~ ".fasta";
|
string outAA = buildPath(DirAlignAA, gene ~ ".fasta");
|
||||||
string outNT = DirAlignNT ~ "/" ~ gene ~ ".fasta";
|
string outNT = buildPath(DirAlignNT, gene ~ ".fasta");
|
||||||
string[] cmdAlign = ["java", "-jar", PathMacse, "-prog", "alignSequences", "-seq" , inputFasta, "-out_AA", outAA, "-out_NT", outNT ];
|
string[] cmdAlign = ["java", "-jar", PathMacse, "-prog", "alignSequences", "-seq" , inputFasta, "-out_AA", outAA, "-out_NT", outNT ];
|
||||||
executeCommand(cmdAlign);
|
executeCommand(cmdAlign);
|
||||||
}
|
}
|
||||||
|
@ -418,20 +529,20 @@ void processAlign(string[] ARG_G, string DirConsensus, string DirAlign, string P
|
||||||
void processTrimming(string[] ARG_G, string DirAlign, string DirTrim, string PathDelstop, string PathTrimal){
|
void processTrimming(string[] ARG_G, string DirAlign, string DirTrim, string PathDelstop, string PathTrimal){
|
||||||
writeln("Trimming::Start");
|
writeln("Trimming::Start");
|
||||||
|
|
||||||
string DirAA = DirAlign ~ "/" ~ "AA";
|
string DirAA = buildPath(DirAlign, "AA");
|
||||||
string DirNT = DirAlign ~ "/" ~ "NT";
|
string DirNT = buildPath(DirAlign, "NT");
|
||||||
string DirAA_out = DirAlign ~ "/" ~ "AA_out";
|
string DirAA_out = buildPath(DirAlign, "AA_out");
|
||||||
string DirNT_out = DirAlign ~ "/" ~ "NT_out";
|
string DirNT_out = buildPath(DirAlign, "NT_out");
|
||||||
|
|
||||||
createDir(DirAA_out);
|
createDir(DirAA_out);
|
||||||
createDir(DirNT_out);
|
createDir(DirNT_out);
|
||||||
|
|
||||||
// copy file firstly
|
// copy file firstly
|
||||||
foreach (gene; parallel(ARG_G,1)){
|
foreach (gene; parallel(ARG_G,1)){
|
||||||
string inputFastaAA = DirAA ~ "/" ~ gene ~ ".fasta";
|
string inputFastaAA = buildPath(DirAA, gene ~ ".fasta");
|
||||||
string outputFastaAA = DirAA_out ~ "/" ~ gene ~ ".fasta";
|
string outputFastaAA = buildPath(DirAA_out, gene ~ ".fasta");
|
||||||
string inputFastaNT = DirNT ~ "/" ~ gene ~ ".fasta";
|
string inputFastaNT = buildPath(DirNT, gene ~ ".fasta");
|
||||||
string outputFastaNT = DirNT_out ~ "/" ~ gene ~ ".fasta";
|
string outputFastaNT = buildPath(DirNT_out, gene ~ ".fasta");
|
||||||
|
|
||||||
copy(inputFastaNT, outputFastaNT);
|
copy(inputFastaNT, outputFastaNT);
|
||||||
copy(inputFastaAA, outputFastaAA);
|
copy(inputFastaAA, outputFastaAA);
|
||||||
|
@ -440,13 +551,13 @@ void processTrimming(string[] ARG_G, string DirAlign, string DirTrim, string Pat
|
||||||
executeCommand(cmdDelStop);
|
executeCommand(cmdDelStop);
|
||||||
}
|
}
|
||||||
|
|
||||||
string DirTrimNT = DirTrim ~ "/" ~ "NT";
|
string DirTrimNT = buildPath(DirTrim, "NT");
|
||||||
createDir(DirTrim);
|
createDir(DirTrim);
|
||||||
createDir(DirTrimNT);
|
createDir(DirTrimNT);
|
||||||
foreach (gene; parallel(ARG_G,1)){
|
foreach (gene; parallel(ARG_G,1)){
|
||||||
string inputFastaAA = DirAA_out ~ "/" ~ gene ~ ".fasta";
|
string inputFastaAA = buildPath(DirAA_out, gene ~ ".fasta");
|
||||||
string inputBackTransNT = DirNT_out ~ "/" ~ gene ~ ".fasta";
|
string inputBackTransNT = buildPath(DirNT_out, gene ~ ".fasta");
|
||||||
string outputFastaNT = DirTrimNT ~ "/" ~ gene ~ ".fasta";
|
string outputFastaNT = buildPath(DirTrimNT, gene ~ ".fasta");
|
||||||
if (exists(inputFastaAA) && exists(inputBackTransNT)) {
|
if (exists(inputFastaAA) && exists(inputBackTransNT)) {
|
||||||
string[] cmdTrim = [PathTrimal, "-in", inputFastaAA, "-backtrans", inputBackTransNT, "-out", outputFastaNT, "-automated1"];
|
string[] cmdTrim = [PathTrimal, "-in", inputFastaAA, "-backtrans", inputBackTransNT, "-out", outputFastaNT, "-automated1"];
|
||||||
executeCommand(cmdTrim);
|
executeCommand(cmdTrim);
|
||||||
|
@ -462,16 +573,16 @@ void main(string[] args) {
|
||||||
string pkgver = "0.0.3";
|
string pkgver = "0.0.3";
|
||||||
|
|
||||||
string DirHome = std.file.getcwd();
|
string DirHome = std.file.getcwd();
|
||||||
string DirRaw = DirHome ~ "/00_raw";
|
string DirRaw = buildPath(DirHome, "00_raw");
|
||||||
string DirQcTrim = DirHome ~ "/01_fastp";
|
string DirQcTrim = buildPath(DirHome, "01_fastp");
|
||||||
string DirMap = DirHome ~ "/02_bowtie2";
|
string DirMap = buildPath(DirHome, "02_bowtie2");
|
||||||
string DirAssembly = DirHome ~ "/03_spades";
|
string DirAssembly = buildPath(DirHome, "03_spades");
|
||||||
string DirBam = DirHome ~ "/04_bam";
|
string DirBam = buildPath(DirHome, "04_bam");
|
||||||
string DirVcf = DirHome ~ "/05_vcf";
|
string DirVcf = buildPath(DirHome, "05_vcf");
|
||||||
string DirConsensus = DirHome ~ "/06_consen";
|
string DirConsensus = buildPath(DirHome, "06_consen");
|
||||||
string DirConsensus1 = DirHome ~ "/07_consen1";
|
string DirConsensus1 = buildPath(DirHome, "07_consen1");
|
||||||
string DirAlign = DirHome ~ "/08_macse";
|
string DirAlign = buildPath(DirHome, "08_macse");
|
||||||
string DirTrim = DirHome ~ "/09_trimal";
|
string DirTrim = buildPath(DirHome, "09_trimal");
|
||||||
|
|
||||||
string PathFastp = "/usr/bin/fastp";
|
string PathFastp = "/usr/bin/fastp";
|
||||||
string PathSpades = "/usr/bin/spades.py";
|
string PathSpades = "/usr/bin/spades.py";
|
||||||
|
@ -480,6 +591,7 @@ void main(string[] args) {
|
||||||
string PathBowtie2 = "/usr/bin/bowtie2";
|
string PathBowtie2 = "/usr/bin/bowtie2";
|
||||||
string PathSamtools = "/usr/bin/samtools";
|
string PathSamtools = "/usr/bin/samtools";
|
||||||
string PathBcftools = "/usr/bin/bcftools";
|
string PathBcftools = "/usr/bin/bcftools";
|
||||||
|
string PathExonerate = "/usr/bin/exonerate";
|
||||||
string PathMacse = "/usr/share/java/macse.jar";
|
string PathMacse = "/usr/share/java/macse.jar";
|
||||||
string PathDelstop = "/usr/bin/delstop";
|
string PathDelstop = "/usr/bin/delstop";
|
||||||
string PathTrimal = "/usr/bin/trimal";
|
string PathTrimal = "/usr/bin/trimal";
|
||||||
|
@ -491,6 +603,7 @@ void main(string[] args) {
|
||||||
string ARG_C;
|
string ARG_C;
|
||||||
string ARG_F;
|
string ARG_F;
|
||||||
string ARG_R;
|
string ARG_R;
|
||||||
|
bool enableCodon = false;
|
||||||
|
|
||||||
if (args.length > 1){
|
if (args.length > 1){
|
||||||
foreach (int i; 0 .. cast(int)args.length) {
|
foreach (int i; 0 .. cast(int)args.length) {
|
||||||
|
@ -522,6 +635,9 @@ void main(string[] args) {
|
||||||
i++;
|
i++;
|
||||||
ARG_T = args[i].to!int;
|
ARG_T = args[i].to!int;
|
||||||
break;
|
break;
|
||||||
|
case "--codon":
|
||||||
|
enableCodon = true;
|
||||||
|
break;
|
||||||
case "--fastp":
|
case "--fastp":
|
||||||
i++;
|
i++;
|
||||||
PathFastp = args[i];
|
PathFastp = args[i];
|
||||||
|
@ -550,6 +666,10 @@ void main(string[] args) {
|
||||||
i++;
|
i++;
|
||||||
PathBcftools = args[i];
|
PathBcftools = args[i];
|
||||||
break;
|
break;
|
||||||
|
case "--exonerate":
|
||||||
|
i++;
|
||||||
|
PathExonerate = args[i];
|
||||||
|
break;
|
||||||
case "--macse":
|
case "--macse":
|
||||||
i++;
|
i++;
|
||||||
PathMacse = args[i];
|
PathMacse = args[i];
|
||||||
|
@ -586,6 +706,7 @@ void main(string[] args) {
|
||||||
PathBowtie2 = getValueFromConfig(ARG_C, "bowtie2");
|
PathBowtie2 = getValueFromConfig(ARG_C, "bowtie2");
|
||||||
PathSamtools = getValueFromConfig(ARG_C, "samtools");
|
PathSamtools = getValueFromConfig(ARG_C, "samtools");
|
||||||
PathBcftools = getValueFromConfig(ARG_C, "bcftools");
|
PathBcftools = getValueFromConfig(ARG_C, "bcftools");
|
||||||
|
PathExonerate = getValueFromConfig(ARG_C, "exonerate");
|
||||||
PathMacse = getValueFromConfig(ARG_C, "macse");
|
PathMacse = getValueFromConfig(ARG_C, "macse");
|
||||||
PathDelstop = getValueFromConfig(ARG_C, "delstop");
|
PathDelstop = getValueFromConfig(ARG_C, "delstop");
|
||||||
PathTrimal = getValueFromConfig(ARG_C, "trimal");
|
PathTrimal = getValueFromConfig(ARG_C, "trimal");
|
||||||
|
@ -644,6 +765,14 @@ void main(string[] args) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ARG_F == "all" && enableCodon) {
|
||||||
|
if(testFiles([PathExonerate]) && testStringArray(ARG_G) && testString(ARG_R)){
|
||||||
|
processCodon(ARG_G, ARG_R, DirConsensus, PathExonerate); //ARG_G
|
||||||
|
} else {
|
||||||
|
throw new Exception("please confirm paramenters are correct");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (ARG_F == "all" || ARG_F == "align") {
|
if (ARG_F == "all" || ARG_F == "align") {
|
||||||
if(testFiles([PathMacse]) && testJava && testStringArray(ARG_G)){
|
if(testFiles([PathMacse]) && testJava && testStringArray(ARG_G)){
|
||||||
processAlign(ARG_G, DirConsensus, DirAlign, PathMacse); //ARG_G
|
processAlign(ARG_G, DirConsensus, DirAlign, PathMacse); //ARG_G
|
||||||
|
|
371
RGBEPP.sh
371
RGBEPP.sh
|
@ -1,371 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
### Environment Setting
|
|
||||||
|
|
||||||
pkgver=0.0.2
|
|
||||||
|
|
||||||
DirHome=$(pwd)
|
|
||||||
DirRaw=$DirHome/00_raw
|
|
||||||
DirQcTrim=$DirHome/01_fastp
|
|
||||||
DirAssembly=$DirHome/02_spades
|
|
||||||
DirFasta=$DirHome/03_assemblied
|
|
||||||
DirMap=$DirHome/04_diamond
|
|
||||||
DirPre=$DirHome/05_pre
|
|
||||||
DirSplit=$DirHome/06_split
|
|
||||||
DirMerge=$DirHome/07_merge
|
|
||||||
DirAlign=$DirHome/08_macse
|
|
||||||
|
|
||||||
PathSplitfsata=/usr/bin/splitfasta-cpp
|
|
||||||
PathMacse=/usr/share/java/macse.jar
|
|
||||||
PathSortdiamond=/usr/bin/sortdiamond
|
|
||||||
|
|
||||||
ARG_C='scaffolds'
|
|
||||||
ARG_M=16
|
|
||||||
ARG_T=8
|
|
||||||
|
|
||||||
### Get some arrays
|
|
||||||
|
|
||||||
show_help(){
|
|
||||||
# echo -e "\t\t\t\t\t\t\t\033[0;31mR\033[0m\033[0;92mG\033[0m\033[0;94mB\033[0m \033[0;33mE\033[0m\033[0;94mP\033[0m\033[0;33mP\033[0m\n\t\t\t\t\tReference Genome based Exon Phylogeny Pipeline\n \
|
|
||||||
echo -e "\t\t\t\t\t\t\t\033[0;47;31mR\033[0m\033[0;47;92mG\033[0m\033[0;47;94mB\033[0m\033[0;47m \033[0m\033[0;47;33mE\033[0m\033[0;47;94mP\033[0m\033[0;47;33mP\033[0m\n\t\t\t\t\tReference Genome based Exon Phylogeny Pipeline\n \
|
|
||||||
Version: $pkgver\n \
|
|
||||||
License: GPL-2.0-only\n \
|
|
||||||
Author: Guoyi Zhang\n \
|
|
||||||
-c\t--contigs\tcontings type: scaffolds or contigs\n \
|
|
||||||
-g\t--genes\t\tgene file path\n \
|
|
||||||
-f\t--functions\tfunctions type (optional): all clean \n \
|
|
||||||
\t \t\tassembly fasta map pre split merge align\n \
|
|
||||||
-h\t--help\t\tshow this information\n \
|
|
||||||
-l\t--list\t\tlist file path\n \
|
|
||||||
-m\t--memory\tmemory settings (optional, default 16 GB)\n \
|
|
||||||
-r\t--reference\treference genome path\n \
|
|
||||||
-t\t--threads\tthreads setting (optional, default 8 threads)\n \
|
|
||||||
\t--macse\t\tMacse jarfile path\n \
|
|
||||||
\t--sortdiamond\tsortdiamond file path\n \
|
|
||||||
\t--splitfasta\tsplitfasta file path\n \
|
|
||||||
for example: bash $0 -c scaffolds -f all -l list -g genes \ \n \
|
|
||||||
\t -r reference.aa.fas \n"
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
show_help
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
|
|
||||||
ARGS=$(getopt -o c:,f:,g:,h,l:,m:,r:,t: --long contigs:,genes:,functions:,help,list:,memory:,reference:,threads:,macse:,sortdiamond:,splitfasta: -n 'RGBEPP.sh' -- "$@")
|
|
||||||
if [ $? != 0 ]; then
|
|
||||||
echo "Failed to parse options." >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
eval set -- "$ARGS"
|
|
||||||
|
|
||||||
while true; do
|
|
||||||
case "$1" in
|
|
||||||
-c|--contigs)
|
|
||||||
case "$2" in
|
|
||||||
"") shift 2 ;;
|
|
||||||
*) ARG_C=$2; shift 2 ;;
|
|
||||||
esac ;;
|
|
||||||
-g|--genes)
|
|
||||||
case "$2" in
|
|
||||||
"") shift 2 ;;
|
|
||||||
*) ARG_G=$2; shift 2 ;;
|
|
||||||
esac ;;
|
|
||||||
-f|--functions)
|
|
||||||
case "$2" in
|
|
||||||
"") shift 2 ;;
|
|
||||||
*) ARG_F=$2; shift 2 ;;
|
|
||||||
esac ;;
|
|
||||||
-h|--help)
|
|
||||||
show_help
|
|
||||||
shift ;;
|
|
||||||
-l|--list)
|
|
||||||
case "$2" in
|
|
||||||
"") shift 2 ;;
|
|
||||||
*) ARG_L=$2; shift 2 ;;
|
|
||||||
esac ;;
|
|
||||||
-m|--memory)
|
|
||||||
case "$2" in
|
|
||||||
"") shift 2 ;;
|
|
||||||
*) ARG_M=$2; shift 2 ;;
|
|
||||||
esac ;;
|
|
||||||
-r|--reference)
|
|
||||||
case "$2" in
|
|
||||||
"") shift 2 ;;
|
|
||||||
*) ARG_R=$2; shift 2 ;;
|
|
||||||
esac ;;
|
|
||||||
-t|--threads)
|
|
||||||
case "$2" in
|
|
||||||
"") shift 2 ;;
|
|
||||||
*) ARG_T=$2; shift 2 ;;
|
|
||||||
esac ;;
|
|
||||||
--macse)
|
|
||||||
case "$2" in
|
|
||||||
"") shift 2 ;;
|
|
||||||
*) PathMacse=$2; shift 2 ;;
|
|
||||||
esac ;;
|
|
||||||
--sortdiamond)
|
|
||||||
case "$2" in
|
|
||||||
"") shift 2 ;;
|
|
||||||
*) PathSortdiamond=$2; shift 2 ;;
|
|
||||||
esac ;;
|
|
||||||
--splitfasta)
|
|
||||||
case "$2" in
|
|
||||||
"") shift 2 ;;
|
|
||||||
*) PathSplitfsata=$2; shift 2 ;;
|
|
||||||
esac ;;
|
|
||||||
--)
|
|
||||||
shift; break ;;
|
|
||||||
*) echo "Unknown option: $1"
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
### Get and check some arguments
|
|
||||||
|
|
||||||
check_var() {
|
|
||||||
local var_name="$1"
|
|
||||||
local var_value="${!var_name}" # get value
|
|
||||||
|
|
||||||
if [ -z "$var_value" ]; then
|
|
||||||
echo "Error: $var_name is not set or is empty"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo "$var_name is set to: $var_value"
|
|
||||||
|
|
||||||
case "$var_name" in
|
|
||||||
"ARG_G")
|
|
||||||
readarray -t genes < "$var_value"
|
|
||||||
length_gn=${#genes[@]}
|
|
||||||
;;
|
|
||||||
"ARG_L")
|
|
||||||
readarray -t full_names < "$var_value"
|
|
||||||
length_fn=${#full_names[@]}
|
|
||||||
;;
|
|
||||||
"ARG_F")
|
|
||||||
check_command "cp"
|
|
||||||
check_command "cd"
|
|
||||||
check_command "mv"
|
|
||||||
check_command "find"
|
|
||||||
check_command "mkdir"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
check_path(){
|
|
||||||
local path_name="$1"
|
|
||||||
local path_value="${!path_name}" # get value
|
|
||||||
|
|
||||||
# expand ~
|
|
||||||
path_value=$(eval echo "$path_value")
|
|
||||||
|
|
||||||
if [ -e "$path_value" ]; then
|
|
||||||
echo "$path_name exists at: $path_value"
|
|
||||||
else
|
|
||||||
echo "Error: $path_name does not exist at: $path_value"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
check_command() {
|
|
||||||
local cmd_name="$1"
|
|
||||||
|
|
||||||
if command -v "$cmd_name" >/dev/null 2>&1; then
|
|
||||||
echo "$cmd_name command exists."
|
|
||||||
else
|
|
||||||
echo "Error: $cmd_name command not found."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
### Quality control && Trimming
|
|
||||||
|
|
||||||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "clean" ]; then
|
|
||||||
|
|
||||||
## Prepare
|
|
||||||
mkdir -p $DirQcTrim
|
|
||||||
|
|
||||||
check_var "ARG_L"
|
|
||||||
check_command "fastp"
|
|
||||||
|
|
||||||
readarray -t full_names < "$ARG_L"
|
|
||||||
length_fn=${#full_names[@]}
|
|
||||||
|
|
||||||
readarray -t genes < "$ARG_G"
|
|
||||||
length_gn=${#genes[@]}
|
|
||||||
|
|
||||||
## Quality control and trimming using fastp
|
|
||||||
for (( i=0; i<$length_fn; i++ )); do
|
|
||||||
fastp -i $DirRaw/${full_names[$i]}_R1.fastq.gz -I $DirRaw/${full_names[$i]}_R2.fastq.gz -j $DirQcTrim/${full_names[$i]}.json -h $DirQcTrim/${full_names[$i]}.html -o $DirQcTrim/${full_names[$i]}_R1.fastq.gz -O $DirQcTrim/${full_names[$i]}_R2.fastq.gz -w $ARG_T
|
|
||||||
done
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
### De novo assembly
|
|
||||||
|
|
||||||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "assembly" ]; then
|
|
||||||
|
|
||||||
## Prepare
|
|
||||||
mkdir -p $DirAssembly
|
|
||||||
|
|
||||||
check_var "ARG_L"
|
|
||||||
check_command "spades.py"
|
|
||||||
|
|
||||||
## De novo assembly using spades
|
|
||||||
for (( i=0; i<$length_fn; i++ )); do
|
|
||||||
mkdir -p $DirAssembly/${full_names[$i]}
|
|
||||||
spades.py --pe1-1 $DirQcTrim/${full_names[$i]}_R1.fastq.gz --pe1-2 $DirQcTrim/${full_names[$i]}_R2.fastq.gz -t $ARG_T -m $ARG_M --careful --phred-offset 33 -o $DirAssembly/${full_names[$i]}
|
|
||||||
# -k 96,107,117,127 \
|
|
||||||
done
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
### Moving scaffords or Contigs out
|
|
||||||
|
|
||||||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "fasta" ]; then
|
|
||||||
|
|
||||||
## Check if the contigs type is specified
|
|
||||||
check_var "ARG_C"
|
|
||||||
check_var "ARG_L"
|
|
||||||
|
|
||||||
## Prepare
|
|
||||||
mkdir -p $DirFasta
|
|
||||||
|
|
||||||
## Move the assemblied fasta file to the folder
|
|
||||||
if [ "$ARG_C" = "scaffolds" ] || [ "$ARG_C" = "contigs" ] ; then
|
|
||||||
for (( i=0; i<$length_fn; i++ )); do
|
|
||||||
cp $DirAssembly/${full_names[$i]}/$ARG_C.fasta $DirFasta/$ARG_C/${full_names[$i]}.fasta
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
### Mapping
|
|
||||||
|
|
||||||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "map" ]; then
|
|
||||||
|
|
||||||
## Check if the reference or contigs type is specified
|
|
||||||
check_var "ARG_C"
|
|
||||||
check_var "ARG_R"
|
|
||||||
check_command "diamond"
|
|
||||||
|
|
||||||
## Prepare
|
|
||||||
mkdir -p $DirMap
|
|
||||||
|
|
||||||
## Index reference database
|
|
||||||
cd $DirFasta/$ARG_C
|
|
||||||
diamond makedb --db Reference --in $ARG_R
|
|
||||||
cd -
|
|
||||||
|
|
||||||
## Blastx for mapping DNA sequences to protein reference sequence
|
|
||||||
cd $DirFasta/$ARG_C
|
|
||||||
for (( i=0; i<$length_fn; i++ )); do
|
|
||||||
diamond blastx -d Reference.dmnd -q ${full_names[$i]}.fasta -o ${full_names[$i]}.m8 \
|
|
||||||
--outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen gaps ppos qframe qseq
|
|
||||||
# subject: reference; query: align-aimed
|
|
||||||
#1: qseqid: Query Seq-id
|
|
||||||
#2: sseqid: Subject Seq - id
|
|
||||||
#3: pident: Percentage of identical matches
|
|
||||||
#4: length: Alignment length
|
|
||||||
#5: mismatch: Number of mismatches
|
|
||||||
#6: gapopen: Number of gap openings
|
|
||||||
#7: qstart: Start of alignment in query
|
|
||||||
#8: qend: End of alignment in query
|
|
||||||
#9: sstart: Start of alignment in subject
|
|
||||||
#10: send: End of alignment in subject
|
|
||||||
#11: evalue: Expect value
|
|
||||||
#12: bitscore: Bit score
|
|
||||||
#13: qlen: Query sequence length
|
|
||||||
#14: slen: Subject sequence length
|
|
||||||
#15: gaps: Total number of gaps
|
|
||||||
#16: ppos: Percentage of positive - scoring matches
|
|
||||||
#17: qframe: Query frame (frames in blast?)
|
|
||||||
#18: qseq: Aligned part of query sequence
|
|
||||||
|
|
||||||
done
|
|
||||||
cd -
|
|
||||||
|
|
||||||
mv $DirFasta/$ARG_C/*.m8 $DirMap
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "pre" ]; then
|
|
||||||
|
|
||||||
mkdir -p $DirPre
|
|
||||||
|
|
||||||
check_var "ARG_L"
|
|
||||||
check_path "PathSortdiamond"
|
|
||||||
|
|
||||||
# extract fasta file from diamond balst style output
|
|
||||||
for (( i=0; i<$length_fn; i++ )); do
|
|
||||||
$PathSortdiamond $DirMap/${full_names[$i]}.m8 $DirPre/${full_names[$i]}.fasta
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "split" ]; then
|
|
||||||
mkdir -p $DirSplit
|
|
||||||
cd $DirPre
|
|
||||||
|
|
||||||
check_var "ARG_L"
|
|
||||||
check_path "PathSplitfasta"
|
|
||||||
|
|
||||||
# split fasta into different folders based on sequence names
|
|
||||||
for (( i=0; i<$length_fn; i++ )); do
|
|
||||||
$PathSplitfsata ${full_names[$i]}.fasta
|
|
||||||
done
|
|
||||||
|
|
||||||
# mv to destdir
|
|
||||||
find . -mindepth 1 -maxdepth 1 -type d -exec mv {} ../$DirSplit \;
|
|
||||||
cd -
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "merge" ]; then
|
|
||||||
|
|
||||||
## Check if the genes is specified
|
|
||||||
check_var "ARG_G"
|
|
||||||
|
|
||||||
mkdir -p $DirMerge
|
|
||||||
cd $DirSplit
|
|
||||||
|
|
||||||
# merge different taxa sequences in same gene to one fasta
|
|
||||||
for (( i=0; i<$length_gn; i++ ))
|
|
||||||
do
|
|
||||||
cd ${genes[$i]}
|
|
||||||
cat * > ../${genes[$i]}.fasta
|
|
||||||
cd ..
|
|
||||||
done
|
|
||||||
|
|
||||||
# mv to destdir
|
|
||||||
mv *.fasta ../$DirMerge
|
|
||||||
|
|
||||||
cd -
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "align" ]; then
|
|
||||||
|
|
||||||
## Check if the genes is specified
|
|
||||||
check_var "ARG_G"
|
|
||||||
check_command "java"
|
|
||||||
check_command "parallel"
|
|
||||||
check_path "PathMacse"
|
|
||||||
|
|
||||||
# current_thread=0
|
|
||||||
mkdir -p $DirAlign
|
|
||||||
mkdir -p $DirAlign/AA && mkdir -p $DirAlign/NT
|
|
||||||
cd $DirMerge
|
|
||||||
|
|
||||||
# align the sequence based on codon
|
|
||||||
parallel -j $ARG_T java -jar $PathMacse -prog alignSequences -seq {}.fasta -out_AA ../$DirAlign/AA/{}.fasta -out_NT ../$DirAlign/NT/{}.fasta ::: "${genes[@]}"
|
|
||||||
|
|
||||||
cd -
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
|
@ -5,6 +5,7 @@ sortdiamond = /usr/bin/sortdiamond
|
||||||
bowtie2 = /usr/bin/bowtie2
|
bowtie2 = /usr/bin/bowtie2
|
||||||
samtools = /usr/bin/samtools
|
samtools = /usr/bin/samtools
|
||||||
bcftools = /usr/bin/bcftools
|
bcftools = /usr/bin/bcftools
|
||||||
|
exonerate = /usr/bin/exonerate
|
||||||
macse = /usr/share/java/macse.jar
|
macse = /usr/share/java/macse.jar
|
||||||
delstop = /usr/bin/delstop
|
delstop = /usr/bin/delstop
|
||||||
trimal = /usr/bin/trimal
|
trimal = /usr/bin/trimal
|
||||||
|
|
Loading…
Reference in a new issue