myillu_01.trimpe.sh
Script <myIllu_trimPE.sh>
#!/bin/bash
#
# takes two paired files, run trimmomatic (PE mode), and makes one interleaved file
# Trimmomatic v0.32 manual:
# http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/TrimmomaticManual_V0.32.pdf
#
# Latest version of ngopt (a5_miseq): 20150522
TRIMMOMATIC=/usr/local/apps/a5_miseq_linux_20140604/bin/trimmomatic.jar
ADAPTER=/usr/local/apps/a5_miseq_linux_20140604/adapter.fasta
INTERLEAVE=/usr/local/apps/khmer/khmerEnv/bin/interleave-reads.py
THREADS=24
MINLEN=75
# default MINLENG is 75 (for NGOPT, 36 bp)
if [ $# -ne 2 ]
then
echo "Please specifiy two file names!"
exit 1
fi
# Check the files are found (code from SGA example script)
file_list="$1 $2"
for input in $file_list; do
if [ ! -f $input ]; then
echo "Error input file $input not found"; exit 1;
fi
done
# Using `basename` command is a good idea. Because the stripped BASE
# can be used as a output file prefix in "current" directory.
BASE=$(basename "$1") # delete any leading path
BASE=${BASE%_*} # delete string from '_' to the end (shortest match)
# BASE=${1%%_*} makes difference. What is it? (longest match)
# BASE=${1/_*/} (longest match)
echo "File name base: $BASE"
BASE=${BASE}-trim
echo "File name base will be changed into ${BASE}"
java -jar $TRIMMOMATIC PE -threads ${THREADS} -phred33 -baseout ${BASE}.fq.gz $1 $2 ILLUMINACLIP:$ADAPTER:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:${MINLEN}
# compare to a5-miseq pipeline (my $trim_cmd):
# ILLUMINACLIP:$adapter:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
echo "Writing interleaved file ${BASE}.pe.fq from ${BASE}_1P.fq.gz ${BASE}_2P.fq.gz..."
$INTERLEAVE -o ${BASE}.pe.fq ${BASE}_1P.fq.gz ${BASE}_2P.fq.gz
echo "Deleting intermediate files (including orphan files)..."
rm ${BASE}_1U.fq.gz ${BASE}_2U.fq.gz ${BASE}_1P.fq.gz ${BASE}_2P.fq.gz
myillu_01.trimpe.sh.txt · Last modified: by 127.0.0.1
