User Tools

Site Tools


myillu_01.trimpe.sh

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
myillu_01.trimpe.sh [2016/12/06 09:56] hyjeongmyillu_01.trimpe.sh [2021/03/17 13:09] (current) – external edit 127.0.0.1
Line 1: Line 1:
 +====== Script <myIllu_trimPE.sh> ======
  
 +  #!/bin/bash
 +  #
 +  # takes two paired files, run trimmomatic (PE mode), and makes one interleaved file
 +  # Trimmomatic v0.32 manual: 
 +  #   http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/TrimmomaticManual_V0.32.pdf
 +  #
 +  # Latest version of ngopt (a5_miseq): 20150522
 +  
 +  TRIMMOMATIC=/usr/local/apps/a5_miseq_linux_20140604/bin/trimmomatic.jar
 +  ADAPTER=/usr/local/apps/a5_miseq_linux_20140604/adapter.fasta
 +  INTERLEAVE=/usr/local/apps/khmer/khmerEnv/bin/interleave-reads.py
 +  THREADS=24
 +  MINLEN=75
 +  # default MINLENG is 75 (for NGOPT, 36 bp)
 +  
 +  if [ $# -ne 2 ]
 +      then
 +          echo "Please specifiy two file names!"
 +          exit 1
 +  fi
 +  
 +  # Check the files are found (code from SGA example script)
 +  file_list="$1 $2"
 +  for input in $file_list; do
 +      if [ ! -f $input ]; then
 +          echo "Error input file $input not found"; exit 1;
 +      fi
 +  done
 +  
 +  # Using `basename` command is a good idea. Because the stripped BASE
 +  # can be used as a output file prefix in "current" directory.
 +  BASE=$(basename "$1" # delete any leading path
 +  BASE=${BASE%_*}           # delete string from '_' to the end (shortest match)
 +  # BASE=${1%%_*} makes difference. What is it? (longest match)
 +  # BASE=${1/_*/} (longest match)
 +  echo "File name base: $BASE"
 +  BASE=${BASE}-trim
 +  echo "File name base will be changed into ${BASE}"
 +  
 +  java -jar $TRIMMOMATIC PE -threads ${THREADS} -phred33 -baseout ${BASE}.fq.gz $1 $2 ILLUMINACLIP:$ADAPTER:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:${MINLEN}
 +  
 +  # compare to a5-miseq pipeline (my $trim_cmd): 
 +  #   ILLUMINACLIP:$adapter:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
 +  
 +  echo "Writing interleaved file ${BASE}.pe.fq from ${BASE}_1P.fq.gz ${BASE}_2P.fq.gz..."
 +  $INTERLEAVE -o ${BASE}.pe.fq ${BASE}_1P.fq.gz ${BASE}_2P.fq.gz
 +  
 +  echo "Deleting intermediate files (including orphan files)..."
 +  rm ${BASE}_1U.fq.gz ${BASE}_2U.fq.gz ${BASE}_1P.fq.gz ${BASE}_2P.fq.gz