1.reseq
This is an old revision of the document!
#########################
# 2017 C&K Workshop #
# Resequencing Code #
# 2017. 11 #
#########################
########Resequencing ###########
cd /home/workshop/1.WGS_practice/Genome_analysis
### FASTQC
fastqc --nogroup paired_end_sep1.fq paired_end_sep2.fq
### TRIMMOMATIC
java -jar /home/Program/Trimmomatic-0.33/trimmomatic-0.33.jar \
PE paired_end_sep1.fq paired_end_sep2.fq paired_end_1P.fastq.gz paired_end_1U.fastq.gz \
paired_end_2P.fastq.gz paired_end_2U.fastq.gz \
ILLUMINACLIP:/home/Program/Trimmomatic-0.33/adapters/total-TruSeq-PE.fa:2:30:10 \
TRAILING:20 MINLEN:75
### REFERENCE INDEXING
/home/Program/bowtie2-2.2.5/bowtie2-build Reference.fa Reference
### READ MAPPING
/home/Program/bowtie2-2.2.5/bowtie2 \
- x Reference \
- 1 paired_end_1P.fastq.gz -2 paired_end_2P.fastq.gz \
- S paired_end.sam \
2> /home/workshop/1.WGS_practice/Genome_analysis/paired_end.bowtie2stat
### ADD_RG
java -jar /home/Program/picard-tools-1.138/picard.jar AddOrReplaceReadGroups \
INPUT=paired_end.sam \
OUTPUT=paired_end_RG.bam \
SORT_ORDER=coordinate RGID=paired_end RGLB=paired_end RGPL=illumina RGPU=non RGSM=paired_end \
VALIDATION_STRINGENCY=LENIENT
### REMOVE_DU
java -jar /home/Program/picard-tools-1.138/picard.jar MarkDuplicates \
INPUT=paired_end_RG.bam \
OUTPUT=paired_end_RG_DU.bam \
METRICS_FILE=paired_end_RG_DU.metrics REMOVE_DUPLICATES=true ASSUME_SORTED=true \
VALIDATION_STRINGENCY=LENIENT
### FIXMATE
java -jar /home/Program/picard-tools-1.138/picard.jar FixMateInformation \
INPUT=paired_end_RG_DU.bam \
OUTPUT=paired_end_RG_DU_FIX.bam \
SORT_ORDER=coordinate \
VALIDATION_STRINGENCY=LENIENT
### INDEXING
/home/Program/samtools-1.2/samtools faidx Reference.fa
/home/Program/samtools-1.2/samtools index paired_end_RG_DU_FIX.bam
### REALIGN
java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T RealignerTargetCreator \
- R Reference.fa \
- I paired_end_RG_DU_FIX.bam \
- o paired_end_RG_DU_FIX_Realign.intervals
java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T IndelRealigner \
- R Reference.fa \
- I paired_end_RG_DU_FIX.bam \
- o paired_end_RG_DU_FIX_Realign.bam \
- targetIntervals paired_end_RG_DU_FIX_Realign.intervals
### BQ_RECAL
java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T BaseRecalibrator \
- R Reference.fa \
- I paired_end_RG_DU_FIX_Realign.bam \
- cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate \
- knownSites Reference.vcf \
- o paired_end_RECAL.grp
java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T PrintReads \
- R Reference.fa \
- I paired_end_RG_DU_FIX_Realign.bam \
- BQSR paired_end_RECAL.grp \
- o paired_end_RECAL.bam
### INDEXING
/home/Program/samtools-1.2/samtools index paired_end_RECAL.bam
### IGV
java -Xmx1500m -jar /home/Program/IGV_2.3.80/igv.jar
############ VARIANT CALLING #########
java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T UnifiedGenotyper \
- R Reference.fa \
- I paired_end_RECAL.bam \
- o sample01.raw.vcf \
- glm BOTH
java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar \
- R Reference.fa \
- T SelectVariants \
- -variant sample01.raw.vcf \
- o snp_sample01.raw.vcf \
- selectType SNP
java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar \
- R Reference.fa \
- T SelectVariants \
- -variant sample01.raw.vcf \
- o indel_sample01.raw.vcf \
- selectType INDEL
### VARIANT FILTERING
java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar \
- R Reference.fa \
- T VariantFiltration \
- -variant indel_sample01.raw.vcf \
- o filtered_indel_sample01.vcf \
- -filterExpression “MQ0 >= 4 && 1) > 0.1)” –filterName “HARD_TO_VALIDATE” \
- -filterExpression “QUAL < 30” –filterName “QualFilter” \
- -filterExpression “QD < 5.0” –filterName “QD5” \
- -filterExpression “FS > 200.0” –filterName “FS200”
java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar \
- R Reference.fa \
- T VariantFiltration \
- -variant snp_sample01.raw.vcf \
- o filtered_snp_sample01.vcf \
- -clusterSize 3 –clusterWindowSize 10 \
- -mask filtered_indel_sample01.vcf –maskName “InDel” \
- -filterExpression “MQ0 >= 4 && 2) > 0.1)” –filterName “HARD_TO_VALIDATE” \
- -filterExpression “QUAL < 30” –filterName “QualFilter” \
- -filterExpression “FS > 200.0 ” –filterName “FS200”
### VARIANT ANNOTATION
#java -jar /home/Program/snpEff/snpEff.jar databases | more
#java -jar /home/Program/snpEff/snpEff.jar -download GRCh38.82
java -jar /home/Program/snpEff/snpEff.jar Human3 filtered_snp_sample01.vcf -s filtered_snp_sample01.ann.html >filtered_snp_sample01.ann.vcf
java -jar /home/Program/snpEff/snpEff.jar Human3 filtered_indel_sample01.vcf -s filtered_indel_sample01.ann.html >filtered_indel_sample01.ann.vcf
### IGV
java -Xmx1500m -jar /home/Program/IGV_2.3.80/igv.jar
### END ###
1.reseq.1513319611.txt.gz · Last modified: (external edit)
