Last active
August 2, 2016 17:26
-
-
Save cfriedline/556e5cbc114e6741acd697ecfe8a2159 to your computer and use it in GitHub Desktop.
RefMapOpt.sh changes for SE
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/RefMapOpt.sh.1 b/RefMapOpt.sh | |
index 47f8f02..ce0e168 100644 | |
--- a/RefMapOpt.sh.1 | |
+++ b/RefMapOpt.sh | |
@@ -11,9 +11,9 @@ if find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null| grep -q 'tr | |
echo "The dependency trimmomatic is not installed or is not in your" '$PATH'"." | |
NUMDEP=$((NUMDEP + 1)) | |
fi | |
- | |
-if find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | grep -q 'Tru' ; then | |
- ADAPTERS=$(find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | head -1) | |
+ | |
+if find ${PATH//:/ } -maxdepth 1 -name TruSeq2-SE.fa 2> /dev/null | grep -q 'Tru' ; then | |
+ ADAPTERS=$(find ${PATH//:/ } -maxdepth 1 -name TruSeq2-SE.fa 2> /dev/null | head -1) | |
else | |
echo "The file listing adapters (included with trimmomatic) is not installed or is not in your" '$PATH'"." | |
NUMDEP=$((NUMDEP + 1)) | |
@@ -54,7 +54,7 @@ if [ ${NAMES[@]:(-1)}.F.fq.gz -nt ${NAMES[@]:(-1)}.uniq.seqs ];then | |
for i in "${NAMES[@]}"; | |
do | |
zcat $i.R.fq.gz | head -2 | tail -1 >> lengths.txt | |
- done | |
+ done | |
MaxLen=$(mawk '{ print length() | "sort -rn" }' lengths.txt| head -1) | |
LENGTH=$(( $MaxLen / 3)) | |
for i in "${NAMES[@]}" | |
@@ -63,8 +63,8 @@ if [ ${NAMES[@]:(-1)}.F.fq.gz -nt ${NAMES[@]:(-1)}.uniq.seqs ];then | |
done | |
cat namelist | parallel --no-notice -j $NUMProc "mawk '$AWK1' {}.assembled.fastq | mawk '$AWK2' | perl -e '$PERLT' > {}.uniq.seqs" | |
fi | |
- | |
- | |
+ | |
+ | |
fi | |
#Create a data file with the number of unique sequences and the number of occurrences | |
@@ -118,7 +118,7 @@ if [ "$ATYPE" == "PE" ]; then | |
mv ref.RC.fq ref.R.fq | |
LENGTH=$(mawk '!/>/' rainbow.fasta | mawk '(NR==1||length<shortest){shortest=length} END {print shortest}') | |
LENGTH=$(( $LENGTH * 5 / 4)) | |
- | |
+ | |
pearRM -f ref.F.fq -r ref.R.fq -o overlap -p 0.001 -j $NUMProc -n $LENGTH &>kopt.log | |
rm ref.F.fq ref.R.fq | |
@@ -186,8 +186,8 @@ do | |
rm lengths.txt &> /dev/null | |
for k in "${RANDNAMES[@]}"; | |
do | |
- zcat $k.R.fq.gz | head -2 | tail -1 >> lengths.txt | |
- done | |
+ zcat $k.F.fq.gz | head -2 | tail -1 >> lengths.txt | |
+ done | |
MaxLen=$(mawk '{ print length() | "sort -rn" }' lengths.txt| head -1) | |
INSERT=$(($MaxLen * 2 )) | |
INSERTH=$(($INSERT + 100 )) | |
@@ -198,10 +198,11 @@ do | |
rm $i.$j.results 2>/dev/null | |
for k in "${RANDNAMES[@]}" | |
do | |
- bwa mem reference.fasta $k.R1.fq.gz $k.R2.fq.gz -L 20,5 -I $INSERT,$SD,$INSERTH,$INSERTL -t 32 -a -M -T 10 -A 1 -B 4 -O 6 -R "@RG\tID:$k\tSM:$k\tPL:Illumina" 2> bwa.$i.log | mawk '!/\t[2-9].[SH].*/' | mawk '!/[2-9].[SH]\t/' | samtools view -@32 -q 1 -SbT reference.fasta - > $k.bam | |
- samtools sort -@32 $k.bam $k | |
+ #bwa mem reference.fasta $k.R1.fq.gz $k.R2.fq.gz -L 20,5 -I $INSERT,$SD,$INSERTH,$INSERTL -t 32 -a -M -T 10 -A 1 -B 4 -O 6 -R "@RG\tID:$k\tSM:$k\tPL:Illumina" 2> bwa.$i.log | mawk '!/\t[2-9].[SH].*/' | mawk '!/[2-9].[SH]\t/' | samtools view -@32 -q 1 -SbT reference.fasta - > $k.bam | |
+ bwa mem reference.fasta $k.R1.fq.gz -L 20,5 -t 32 -a -M -T 10 -A 1 -B 4 -O 6 -R "@RG\tID:$k\tSM:$k\tPL:Illumina" 2> bwa.$i.log | mawk '!/\t[2-9].[SH].*/' | mawk '!/[2-9].[SH]\t/' | samtools view -@32 -q 1 -SbT reference.fasta - > $k.bam | |
+ samtools sort -@32 $k.bam $k | |
samtools index $k.bam | |
- MM=$(samtools flagstat $k.bam | grep -E 'mapped \(|properly' | cut -f1 -d '+' | tr -d '\n') | |
+ MM=$(samtools flagstat $k.bam | grep -E 'mapped \(' | cut -f1 -d '+' | tr -d '\n') | |
CM=$(samtools idxstats $k.bam | mawk '$3 > 0' | wc -l) | |
CC=$(samtools idxstats $k.bam | mawk '{ sum += $3; n++ } END { if (n > 0) print sum / n; }') | |
DD=$(samtools idxstats $k.bam | mawk '$3 >0' | mawk '{ sum += $3; n++ } END { if (n > 0) print sum / n; }') | |
@@ -220,5 +221,5 @@ do | |
echo -e "$CCC\t$DDD\t$CON\t$AC\t$i\t$j\t$SUMM\t$SUMPM\t$AVEM\t$AVEP\t$BBM" >> mapping.results | |
fi | |
done | |
- | |
+ | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment