polish: add check function
This commit is contained in:
parent
e8e40e4327
commit
01729e0726
1 changed files with 141 additions and 67 deletions
208
RGBEPP.sh
208
RGBEPP.sh
|
@ -19,9 +19,33 @@ PathSortdiamond=/home/guoyi/Downloads/PhD/wes/sortdiamond
|
|||
|
||||
HELP=false
|
||||
|
||||
ARG_C='scaffolds'
|
||||
#ARG_F='all'
|
||||
ARG_M=16
|
||||
ARG_T=8
|
||||
|
||||
### Get some arrays
|
||||
|
||||
ARGS=$(getopt -o c:,f:,g:,h,l:,m:,r:,t: --long contigs:,genes:,functions:,help,list:,memory:,reference:,threads: -n 'RGBEPP.sh' -- "$@")
|
||||
show_help(){
|
||||
# echo -e "\t\t\t\t\t\t\t\033[0;31mR\033[0m\033[0;92mG\033[0m\033[0;94mB\033[0m \033[0;33mE\033[0m\033[0;94mP\033[0m\033[0;33mP\033[0m\n\t\t\t\t\tReference Genome based Exon Phylogeny Pipeline\n \
|
||||
echo -e "\t\t\t\t\t\t\t\033[0;47;31mR\033[0m\033[0;47;92mG\033[0m\033[0;47;94mB\033[0m\033[0;47m \033[0m\033[0;47;33mE\033[0m\033[0;47;94mP\033[0m\033[0;47;33mP\033[0m\n\t\t\t\t\tReference Genome based Exon Phylogeny Pipeline\n \
|
||||
Version: $pkgver\n \
|
||||
License: GPL-3.0-only\n \
|
||||
Author: Guoyi Zhang\n \
|
||||
-c\t--contigs\tcontings type: scaffolds or contigs\n \
|
||||
-g\t--genes\t\tgene file path\n \
|
||||
-f\t--functions\tfunctions type (optional): all clean \n \
|
||||
\t \t\tassembly fasta map pre split merge align\n \
|
||||
-h\t--help\t\tshow this information\n \
|
||||
-l\t--list\t\tlist file path\n \
|
||||
-m\t--memory\tmemory settings (optional, default 16 GB)\n \
|
||||
-r\t--reference\treference genome path\n \
|
||||
-t\t--threads\tthreads setting (optional, default 8 threads)\n \
|
||||
for example: bash $0 -c scaffolds -f all -l list -g genes \ \n \
|
||||
\t -r Reference.exons.aa.fas \n"
|
||||
}
|
||||
|
||||
ARGS=$(getopt -o c:,f:,g:,h,l:,m:,r:,t: --long contigs:,genes:,functions:,help,list:,memory:,reference:,threads:,macse:,sortdiamond:,splitfasta: -n 'RGBEPP.sh' -- "$@")
|
||||
if [ $? != 0 ]; then
|
||||
echo "Failed to parse options." >&2
|
||||
exit 1
|
||||
|
@ -32,7 +56,7 @@ while true; do
|
|||
case "$1" in
|
||||
-c|--contigs)
|
||||
case "$2" in
|
||||
"") ARG_C='scaffolds'; shift 2 ;;
|
||||
"") shift 2 ;;
|
||||
*) ARG_C=$2; shift 2 ;;
|
||||
esac ;;
|
||||
-g|--genes)
|
||||
|
@ -42,26 +66,12 @@ while true; do
|
|||
esac ;;
|
||||
-f|--functions)
|
||||
case "$2" in
|
||||
"") ARG_F='all'; shift 2 ;;
|
||||
"") shift 2 ;;
|
||||
*) ARG_F=$2; shift 2 ;;
|
||||
esac ;;
|
||||
-h|--help)
|
||||
# echo -e "\t\t\t\t\t\t\t\033[0;31mR\033[0m\033[0;92mG\033[0m\033[0;94mB\033[0m \033[0;33mE\033[0m\033[0;94mP\033[0m\033[0;33mP\033[0m\n\t\t\t\t\tReference Genome based Exon Phylogeny Pipeline\n \
|
||||
echo -e "\t\t\t\t\t\t\t\033[0;47;31mR\033[0m\033[0;47;92mG\033[0m\033[0;47;94mB\033[0m\033[0;47m \033[0m\033[0;47;33mE\033[0m\033[0;47;94mP\033[0m\033[0;47;33mP\033[0m\n\t\t\t\t\tReference Genome based Exon Phylogeny Pipeline\n \
|
||||
Version: $pkgver\n \
|
||||
License: GPL-3.0-only\n \
|
||||
Author: Guoyi Zhang\n \
|
||||
-c\t--contigs\tcontings type: scaffolds or contigs\n \
|
||||
-g\t--genes\t\tgene file path\n \
|
||||
-f\t--functions\tfunctions type (optional): all clean \n \
|
||||
\t \t\tassembly fasta map pre split merge align\n \
|
||||
-h\t--help\t\thelp: show this information\n \
|
||||
-l\t--list\t\tlist file path\n \
|
||||
-m\t--memory\tmemory settings (optional, default 16 GB)\n \
|
||||
-r\t--reference\treference genome path\n \
|
||||
-t\t--threads\tthreads setting (optional, default 8 threads)\n \
|
||||
for example: bash $0 -c scaffolds -f all -l list -g genes -r Reference.exons.aa.fas \n"
|
||||
HELP=true
|
||||
show_help
|
||||
HELP=true;
|
||||
shift ;;
|
||||
-l|--list)
|
||||
case "$2" in
|
||||
|
@ -70,7 +80,7 @@ while true; do
|
|||
esac ;;
|
||||
-m|--memory)
|
||||
case "$2" in
|
||||
"") ARG_M=16; shift 2 ;;
|
||||
"") shift 2 ;;
|
||||
*) ARG_M=$2; shift 2 ;;
|
||||
esac ;;
|
||||
-r|--reference)
|
||||
|
@ -80,26 +90,92 @@ while true; do
|
|||
esac ;;
|
||||
-t|--threads)
|
||||
case "$2" in
|
||||
"") ARG_T=8; shift 2 ;;
|
||||
"") shift 2 ;;
|
||||
*) ARG_T=$2; shift 2 ;;
|
||||
esac ;;
|
||||
--) shift; break ;;
|
||||
*) echo "Internal error!"; exit 1 ;;
|
||||
--macse)
|
||||
case "$2" in
|
||||
"") shift 2 ;;
|
||||
*) PathMacse=$2; shift 2 ;;
|
||||
esac ;;
|
||||
--sortdiamond)
|
||||
case "$2" in
|
||||
"") shift 2 ;;
|
||||
*) PathSortdiamond=$2; shift 2 ;;
|
||||
esac ;;
|
||||
--splitfasta)
|
||||
case "$2" in
|
||||
"") shift 2 ;;
|
||||
*) PathSplitfsata=$2; shift 2 ;;
|
||||
esac ;;
|
||||
--)
|
||||
show_help
|
||||
HELP=true
|
||||
shift; break ;;
|
||||
*) echo "Unknown option: $1"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
### Get and check some arguments
|
||||
|
||||
if [ "$HELP" = false ]; then
|
||||
if [ -z "$ARG_L" ]; then
|
||||
echo "List argument can't be empty"
|
||||
exit 1
|
||||
fi
|
||||
check_var() {
|
||||
local var_name="$1"
|
||||
local var_value="${!var_name}" # get value
|
||||
|
||||
readarray -t full_names < "$ARG_L"
|
||||
readarray -t genes < "$ARG_G"
|
||||
length_fn=${#full_names[@]}
|
||||
length_gn=${#genes[@]}
|
||||
if [ -z "$var_value" ]; then
|
||||
echo "Error: $var_name is not set or is empty"
|
||||
exit 1
|
||||
else
|
||||
echo "$var_name is set to: $var_value"
|
||||
|
||||
case "$var_name" in
|
||||
"ARG_G")
|
||||
readarray -t genes < "$var_value"
|
||||
length_gn=${#genes[@]}
|
||||
;;
|
||||
"ARG_L")
|
||||
readarray -t full_names < "$var_value"
|
||||
length_fn=${#full_names[@]}
|
||||
;;
|
||||
esac
|
||||
|
||||
fi
|
||||
}
|
||||
|
||||
check_path(){
|
||||
local path_name="$1"
|
||||
local path_value="${!path_name}" # get value
|
||||
|
||||
# expand ~
|
||||
path_value=$(eval echo "$path_value")
|
||||
|
||||
if [ -e "$path_value" ]; then
|
||||
echo "$path_name exists at: $path_value"
|
||||
else
|
||||
echo "Error: $path_name does not exist at: $path_value"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_command() {
|
||||
local cmd_name="$1"
|
||||
|
||||
if command -v "$cmd_name" >/dev/null 2>&1; then
|
||||
echo "$cmd_name command exists."
|
||||
else
|
||||
echo "Error: $cmd_name command not found."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
if [ "$HELP" = false ]; then
|
||||
check_command "cp"
|
||||
check_command "cd"
|
||||
check_command "mv"
|
||||
check_command "find"
|
||||
check_command "mkdir"
|
||||
fi
|
||||
|
||||
### Quality control && Trimming
|
||||
|
@ -109,6 +185,15 @@ if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "clean" ]; then
|
|||
## Prepare
|
||||
mkdir -p $DirQcTrim
|
||||
|
||||
check_var "ARG_L"
|
||||
check_command "fastp"
|
||||
|
||||
readarray -t full_names < "$ARG_L"
|
||||
length_fn=${#full_names[@]}
|
||||
|
||||
readarray -t genes < "$ARG_G"
|
||||
length_gn=${#genes[@]}
|
||||
|
||||
## Quality control and trimming using fastp
|
||||
for (( i=0; i<$length_fn; i++ )); do
|
||||
fastp -i $DirRaw/${full_names[$i]}_R1.fastq.gz -I $DirRaw/${full_names[$i]}_R2.fastq.gz -j $DirQcTrim/${full_names[$i]}.json -h $DirQcTrim/${full_names[$i]}.html -o $DirQcTrim/${full_names[$i]}_R1.fastq.gz -O $DirQcTrim/${full_names[$i]}_R2.fastq.gz -w $ARG_T
|
||||
|
@ -122,7 +207,10 @@ if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "assembly" ]; then
|
|||
|
||||
## Prepare
|
||||
mkdir -p $DirAssembly
|
||||
|
||||
|
||||
check_var "ARG_L"
|
||||
check_command "spades.py"
|
||||
|
||||
## De novo assembly using spades
|
||||
for (( i=0; i<$length_fn; i++ )); do
|
||||
mkdir -p $DirAssembly/${full_names[$i]}
|
||||
|
@ -137,10 +225,8 @@ fi
|
|||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "fasta" ]; then
|
||||
|
||||
## Check if the contigs type is specified
|
||||
if [ -z "$ARG_C" ] ; then
|
||||
echo "Argument of contig type missing."
|
||||
exit 1
|
||||
fi
|
||||
check_var "ARG_C"
|
||||
check_var "ARG_L"
|
||||
|
||||
## Prepare
|
||||
mkdir -p $DirFasta
|
||||
|
@ -159,10 +245,9 @@ fi
|
|||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "map" ]; then
|
||||
|
||||
## Check if the reference or contigs type is specified
|
||||
if [ -z "$ARG_R" ] || [ -z "$ARG_C" ] ; then
|
||||
echo "Argument of reference or contig type missing."
|
||||
exit 1
|
||||
fi
|
||||
check_var "ARG_C"
|
||||
check_var "ARG_R"
|
||||
check_command "diamond"
|
||||
|
||||
## Prepare
|
||||
mkdir -p $DirMap
|
||||
|
@ -205,7 +290,12 @@ if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "map" ]; then
|
|||
fi
|
||||
|
||||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "pre" ]; then
|
||||
|
||||
mkdir -p $DirPre
|
||||
|
||||
check_var "ARG_L"
|
||||
checkPath "PathSortdiamond"
|
||||
|
||||
for (( i=0; i<$length_fn; i++ )); do
|
||||
$PathSortdiamond $DirMap/${full_names[$i]}.m8 $DirPre/${full_names[$i]}.fasta
|
||||
done
|
||||
|
@ -215,6 +305,10 @@ fi
|
|||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "split" ]; then
|
||||
mkdir -p $DirSplit
|
||||
cd $DirPre
|
||||
|
||||
check_var "ARG_L"
|
||||
checkPath "PathSplitfasta"
|
||||
|
||||
for (( i=0; i<$length_fn; i++ )); do
|
||||
$PathSplitfsata ${full_names[$i]}.fasta
|
||||
done
|
||||
|
@ -225,10 +319,7 @@ fi
|
|||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "merge" ]; then
|
||||
|
||||
## Check if the genes is specified
|
||||
if [ -z "$ARG_G" ] ; then
|
||||
echo "Argument of genes list missing."
|
||||
exit 1
|
||||
fi
|
||||
check_var "ARG_G"
|
||||
|
||||
mkdir -p $DirMerge
|
||||
cd $DirSplit
|
||||
|
@ -246,34 +337,17 @@ fi
|
|||
if [ "$ARG_F" = "all" ] || [ "$ARG_F" = "align" ]; then
|
||||
|
||||
## Check if the genes is specified
|
||||
if [ -z "$ARG_G" ] ; then
|
||||
echo "Argument of genes list missing."
|
||||
exit 1
|
||||
fi
|
||||
check_var "ARG_G"
|
||||
check_command "java"
|
||||
check_command "parallel"
|
||||
checkPath "PathMacse"
|
||||
|
||||
# current_thread=0
|
||||
mkdir -p $DirAlign
|
||||
mkdir -p $DirAlign/AA && mkdir -p $DirAlign/NT
|
||||
cd $DirMerge
|
||||
|
||||
align_by_macse() {
|
||||
java -jar $PathMacse -prog alignSequences -seq $1.fasta -out_AA ../$DirAlign/AA/$1.fasta -out_NT ../$DirAlign/NT/$1.fasta &
|
||||
}
|
||||
|
||||
export -f align_by_macse
|
||||
|
||||
parallel -j $ARG_T align_by_macse ::: "${genes[@]}"
|
||||
|
||||
# for (( i=0; i<$length_gn; i++ ))
|
||||
# do
|
||||
# java -jar $PathMacse -prog alignSequences -seq ${genes[$i]}.fasta -out_AA ../$DirAlign/AA/${genes[$i]}.fasta -out_NT ../$DirAlign/NT/${genes[$i]}.fasta &
|
||||
# ((current_thread++))
|
||||
# if [ $current_thread -eq $ARG_T ]; then
|
||||
# wait
|
||||
# current_thread=0
|
||||
# fi
|
||||
# done
|
||||
|
||||
parallel -j $ARG_T java -jar $PathMacse -prog alignSequences -seq {}.fasta -out_AA ../$DirAlign/AA/{}.fasta -out_NT ../$DirAlign/NT/{}.fasta ::: "${genes[@]}"
|
||||
|
||||
cd -
|
||||
|
||||
|
|
Loading…
Reference in a new issue