Slide 12
Slide 12 text
w ύΠϓϥΠϯʹଟ͘ͷιϑτΣΞΛΈ߹Θͤͯੜσʔ
λΛॲཧɾղੳ͢Δ
w ιϑτΣΞ͕มΘΔͱ݁Ռ͕มΘΔ
w ࢀরσʔλ͕มΘΔͱ݁Ռ͕มΘΔ
12
# ڥߏங
$ conda install -c bioconda fastp
$ conda install -c bioconda bowtie2
$ mkdir ~/bowtie2_index # Pre-built indexΛೖΕΔͨΊͷσΟϨΫτϦΛ࡞͢Δ
$ cd ~/bowtie2_index # # Pre-built indexΛೖΕΔͨΊͷσΟϨΫτϦʹҠಈ͢Δ
$ wget ftp://ftp.ccb.jhu.edu/pub/data/bowtie2_indexes/mm10.zip
$ unzip mm10.zip
$ cd ~/bowtie2_index
$ wget ftp://ftp.ncbi.nlm.nih.gov/genomes/archive/old_genbank/Eukaryotes/vertebrates_mammals/
Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/
GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.bowtie_index.tar.gz
$ tar xvzf bowtie2_index/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.bowtie_index.tar.gz
$ mkdir ~/tools
$ cd ~/tools
$ python3 -m venv MACS2/
$ source MACS2/bin/activate
$ pip install --upgrade pip
$ pip install numpy
$ pip install cython
$ git clone https://github.com/taoliu/MACS.git
$ cd MACS
$ git checkout remotes/origin/macs2python3
$ python setup_w_cython.py install
$ macs2 --help # ϔϧϓϝοηʔδ͕ग़ྗ͞ΕΕOK
$ deactivate
$ source ~/tools/MACS2/bin/activate
$ brew install samtools
$ conda install -c bioconda homer
$ perl /anaconda3/share/homer-*/configureHomer.pl -install hg38
$ perl /anaconda3/share/homer-*/configureHomer.pl -install mm10
$ conda install -c bioconda deeptools
$ deeptools --version
$ brew install r
$ brew cask install rstudio
$ brew install igv
$ brew install bedtools
$ open -a RStudio
> if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
> BiocManager::install("ChIPpeakAnno")
$ mkdir ~/gencode
$ cd ~/gencode
$ wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M20/
gencode.vM20.annotation.gtf.gz # $ gzip -d gencode.vM20.annotation.gtf.gz
$ ls gencode.vM20.annotation.gtfɹ# gencode.vM20.annotation.gtf͕Ͱ͖ͨ͜ͱΛ֬ೝ͢Δɻ
# σʔλऔಘ
$ mkdir ~/chipseq # ChIP-seqղੳ༻ͷσΟϨΫτϦΛ࡞͢Δ
$ cd ~/chipseq # ChIP-seqղੳ༻ͷσΟϨΫτϦҠಈ͢Δ
$ mkdir fastq # FASTQϑΝΠϧΛೖΕΔσΟϨΫτϦΛ࡞͢Δ
$ cd ~/chipseq/fastq
$ wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR520/004/SRR5208824/SRR5208824.fastq.gz
$ cd ~/chipseq/fastq
$ wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR520/008/SRR5208828/SRR5208828.fastq.gz
$ cd ~/chipseq/fastq
$ wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR520/008/SRR5208838/SRR5208838.fastq.gz
$ cd ~/chipseq/fastq
$ cp SRR5208824.fastq.gz BRD4_ChIP_IFNy.R1.fastq.gz
$ cp SRR5208828.fastq.gz IRF1_ChIP_IFNy.R1.fastq.gz
$ cp SRR5208838.fastq.gz Input_DNA.R1.fastq.gz
$ cd ~/chipseq
# ղੳ
$ mkdir fastqc
$ cd ~/chipseq
$ fastqc -o fastqc fastq/BRD4_ChIP_IFNy.R1.fastq.gz
$ cd ~/chipseq
$ fastqc -o fastqc fastq/IRF1_ChIP_IFNy.R1.fastq.gz
$ fastqc -o fastqc fastq/Input_DNA.R1.fastq.gz
$ cd ~/chipseq
$ mkdir fastp # fastpͱ͍͏໊લͷσΟϨΫτϦΛ࡞Δ
$ cd ~/chipseq
$ fastp -i fastq/BRD4_ChIP_IFNy.R1.fastq.gz -o fastp/BRD4_ChIP_IFNy.R1.trim.fastq.gz --html fastp/
BRD4_ChIP_IFNy.fastp.html
ɹಉ༷ʹIRF1_ChIP_IFNy.R1.fastq.gzɺInput_DNA.R1.fastq.gzʹରͯ͠fastpΛ࣮ߦ͢Δɻ
$ cd ~/chipseq
$ fastp -i fastq/IRF1_ChIP_IFNy.R1.fastq.gz -o fastp/IRF1_ChIP_IFNy.R1.trim.fastq.gz --html fastp/
IRF1_ChIP_IFNy.fastp.html
$ fastp -i fastq/Input_DNA.R1.fastq.gz -o fastp/Input_DNA.R1.trim.fastq.gz --html fastp/
Input_DNA.fastp.html
ɹfastpσΟϨΫτϦʹҎԼͷϑΝΠϧ͕Ͱ͖ͯΔ͜ͱΛ֬ೝ͢Δɻ
$ cd ~/chipseq
$ open fastp/BRD4_ChIP_IFNy.fastp.html
$ system_profiler SPHardwareDataType | grep Cores
$ cd ~/chipseq
$ mkdir bowtie2
$ cd ~/chipseq
$ bowtie2 -p 2 -x data/external/bowtie2_index/mm10 \
-U fastp/BRD4_ChIP_IFNy.R1.trim.fastq.gz > bowtie2/BRD4_ChIP_IFNy.trim.sam
$ cd ~/chipseq
$ bowtie2 -p 2 -x data/external/bowtie2_index/mm10 \
-U fastp/IRF1_ChIP_IFNy.R1.trim.fastq.gz > bowtie2/IRF1_ChIP_IFNy.trim.sam
$ bowtie2 -p 2 -x data/external/bowtie2_index/mm10 \
-U fastp/Input_DNA.R1.trim.fastq.gz > bowtie2/Input_DNA.trim.sam
$ cd ~/chipseq
$ samtools view -bhS -F 0x4 -q 42 bowtie2/BRD4_ChIP_IFNy.trim.sam | samtools sort -T bowtie2/
BRD4_ChIP_IFNy.trim - > bowtie2/BRD4_ChIP_IFNy.trim.uniq.bam
$ cd ~/chipseq
$ samtools view -bhS -F 0x4 -q 42 bowtie2/IRF1_ChIP_IFNy.trim.sam | samtools sort -T bowtie2/
IRF1_ChIP_IFNy.trim - > bowtie2/IRF1_ChIP_IFNy.trim.uniq.bam
$ samtools view -bhS -F 0x4 -q 42 bowtie2/Input_DNA.trim.sam | samtools sort -T bowtie2/Input_DNA.trim
- > bowtie2/Input_DNA.trim.uniq.bam
$ cd ~/chipseq
$ samtools index bowtie2/BRD4_ChIP_IFNy.trim.uniq.bam
$ samtools index bowtie2/IRF1_ChIP_IFNy.trim.uniq.bam
$ samtools index bowtie2/Input_DNA.trim.uniq.bam
$ cd ~/chipseq
$ mkdir macs2 # MACS2ͷग़ྗ݁ՌΛอଘ͢ΔσΟϨΫτϦΛ࡞͢ΔʢඞਢͰͳ͍ʣ
$ cd ~/chipseq
$ source ~/tools/MACS2/bin/activate # MACS2͕Πϯετʔϧ͞ΕͨڥΓସ͑Δ
$ macs2 callpeak -t bowtie2/BRD4_ChIP_IFNy.trim.uniq.bam \
-c bowtie2/Input_DNA.trim.uniq.bam -f BAM -g mm -n BRD4_ChIP_IFNy --outdir macs2 -B -q 0.01
$ macs2 callpeak -t bowtie2/IRF1_ChIP_IFNy.trim.uniq.bam \
-c bowtie2/Input_DNA.trim.uniq.bam -f BAM -g mm -n IRF1_ChIP_IFNy --outdir macs2 -B -q 0.01
$ deactivate # ݩͷڥΓସ͑Δɻ
$ head macs2/BRD4_ChIP_IFNy_peaks.narrowPeak
$ head macs2/BRD4_ChIP_IFNy_summits.bed
$ cd ~/chipseq
$ wc -l macs2/*_peaks.narrowPeak
$ cd ~/chipseq
$ bedtools intersect -u -a macs2/BRD4_ChIP_IFNy_peaks.narrowPeak -b macs2/
IRF1_ChIP_IFNy_peaks.narrowPeak > macs2/
BRD4_ChIP_IFNy_peaks.overlapped_with_IRF1_ChIP_IFNy_peaks.narrowPeak
$ cd ~/chipseq
$ bedtools intersect -u -a macs2/IRF1_ChIP_IFNy_peaks.narrowPeak -b macs2/
BRD4_ChIP_IFNy_peaks.narrowPeak > macs2/
IRF1_ChIP_IFNy_peaks.overlapped_with_BRD4_ChIP_IFNy_peaks.narrowPeak
$ cd ~/chipseq
$ bedtools intersect -v -a macs2/BRD4_ChIP_IFNy_peaks.narrowPeak -b macs2/
IRF1_ChIP_IFNy_peaks.narrowPeak > macs2/
BRD4_ChIP_IFNy_peaks.not_overlapped_with_IRF1_ChIP_IFNy_peaks.narrowPeak
$ bedtools intersect -v -a macs2/IRF1_ChIP_IFNy_peaks.narrowPeak -b macs2/
BRD4_ChIP_IFNy_peaks.narrowPeak > macs2/
IRF1_ChIP_IFNy_peaks.not_overlapped_with_BRD4_ChIP_IFNy_peaks.narrowPeak
$ cd ~/chipseq
$ wc -l macs2/*overlapped*.narrowPeak
$ cd ~/chipseq
$ mkdir deeptools # deepToolsͷग़ྗ݁ՌΛอଘ͢ΔσΟϨΫτϦΛ࡞͢Δ
$ cd ~/chipseq
$ bamCoverage -b bowtie2/BRD4_ChIP_IFNy.trim.uniq.bam -o deeptools/BRD4_ChIP_IFNy.trim.uniq.bw -of
bigwig --normalizeUsing CPM
$ cd ~/chipseq
$ bamCoverage -b bowtie2/IRF1_ChIP_IFNy.trim.uniq.bam -o deeptools/IRF1_ChIP_IFNy.trim.uniq.bw -of
bigwig --normalizeUsing CPM
$ bamCoverage -b bowtie2/Input_DNA.trim.uniq.bam -o deeptools/Input_DNA.trim.uniq.bw -of bigwig --
normalizeUsing CPM
$ igv
$ cd ~/chipseq
$ mkdir homer
$ cd ~/chipseq
$ mkdir homer/BRD4_ChIP_IFNy
$ findMotifsGenome.pl macs2/BRD4_ChIP_IFNy_summits.bed mm10 homer/BRD4_ChIP_IFNy -size 200 -mask
$ cd ~/chipseq
$ mkdir homer/IRF1_ChIP_IFNy
$ findMotifsGenome.pl macs2/IRF1_ChIP_IFNy_summits.bed mm10 homer/IRF1_ChIP_IFNy -size 200 -mask
$ cd ~/chipseq
$ computeMatrix scale-regions \
--regionsFileName ~/gencode/gencode.vM20.annotation.gtf \
--scoreFileName deeptools/BRD4_ChIP_IFNy.trim.uniq.bw \
--outFileName deeptools/BRD4_ChIP_IFNy.trim.uniq.matrix_gencode_vM20_gene.txt.gz \
--upstream 1000 --downstream 1000 \
--skipZeros
$ cd ~/chipseq
$ plotProfile -m deeptools/BRD4_ChIP_IFNy.trim.uniq.matrix_gencode_vM20_gene.txt.gz \
-out deeptools/metagene_BRD4_ChIP_IFNy_gencode_vM20_gene.pdf \
--plotTitle "GENCODE vM20 genes"
$ cd ~/chipseq
$ plotHeatmap -m deeptools/BRD4_ChIP_IFNy.trim.uniq.matrix_gencode_vM20_gene.txt.gz \
-out deeptools/heatmap_BRD4_ChIP_IFNy_gencode_vM20_gene.pdf \
--plotTitle "GENCODE vM20 genes"
$ cd ~/chipseq
$ computeMatrix scale-regions \
--regionsFileName ~/gencode/gencode.vM20.annotation.gtf \
--scoreFileName deeptools/IRF1_ChIP_IFNy.trim.uniq.bw \
--outFileName deeptools/IRF1_ChIP_IFNy.trim.uniq.matrix_gencode_vM20_gene.txt.gz \
--upstream 1000 --downstream 1000 \
--skipZeros
$ plotProfile -m deeptools/IRF1_ChIP_IFNy.trim.uniq.matrix_gencode_vM20_gene.txt.gz \
-out deeptools/metagene_IRF1_ChIP_IFNy_gencode_vM20_gene.pdf \
--plotTitle "GENCODE vM20 genes"
$ plotHeatmap -m deeptools/IRF1_ChIP_IFNy.trim.uniq.matrix_gencode_vM20_gene.txt.gz \
-out deeptools/heatmap_IRF1_ChIP_IFNy_gencode_vM20_gene.pdf \
--plotTitle "GENCODE vM20 genes"
$ cd ~/
$ computeMatrix reference-point \
--regionsFileName macs2/IRF1_ChIP_IFNy_summits.bed \
--scoreFileName deeptools/BRD4_ChIP_IFNy.trim.uniq.bw \
--referencePoint center \
--upstream 1000 \
--downstream 1000 \
--outFileName deeptools/BRD4_ChIP_IFNy.trim.IRF1_ChIP_IFNy_summits.matrix.txt.gz \
--skipZeros
$ plotProfile -m deeptools/BRD4_ChIP_IFNy.trim.IRF1_ChIP_IFNy_summits.matrix.txt.gz \
-out deeptools/aggregation_BRD4_ChIP_IFNy.trim.IRF1_ChIP_IFNy_summits.pdf \
--regionsLabel "IRF1_ChIP_IFNy Peaks"
$ plotHeatmap -m deeptools/BRD4_ChIP_IFNy.trim.IRF1_ChIP_IFNy_summits.matrix.txt.gz \
-out deeptools/heatmap_BRD4_ChIP_IFNy.trim.IRF1_ChIP_IFNy_summits.pdf \
--samplesLabel "BRD4_ChIP_IFNy" \
--regionsLabel "IRF1_ChIP_IFNy Peaks"
$ computeMatrix scale-regions \
--regionsFileName ../data/gencode/gencode.vM20.annotation.gtf \
--scoreFileName deeptools/BRD4_ChIP_IFNy.trim.uniq.bw \
deeptools/IRF1_ChIP_IFNy.trim.uniq.bw \
--outFileName deeptools/chipseq_matrix_gencode_vM20_gene.txt.gz \
--upstream 1000 --downstream 1000 \
--skipZeros
$ plotHeatmap -m deeptools/chipseq_matrix_gencode_vM20_gene.txt.gz \
-out deeptools/heatmap_BRD4_ChIP_IFNy_gencode_vM20_gene.k3.pdf \
--kmeans 3 \
--plotTitle "GENCODE vM20 genes"
$ cd ~/chipseq
$ cut -f 1,2,3,4,5,6 macs2/BRD4_ChIP_IFNy_peaks.narrowPeak > macs2/BRD4_ChIP_IFNy_peaks.narrowPeak.bed
$ cut -f 1,2,3,4,5,6 macs2/IRF1_ChIP_IFNy_peaks.narrowPeak > macs2/IRF1_ChIP_IFNy_peaks.narrowPeak.bed
$ cd ~/chipseq
$ head macs2/*.narrowPeak.bed
$ cd ~/chipseq
$ open -a RStudio
> library(ChIPpeakAnno)
> gr1 <- toGRanges("macs2/BRD4_ChIP_IFNy_peaks.narrowPeak", format="narrowPeak", header=FALSE)
> gr2 <- toGRanges("macs2/IRF1_ChIP_IFNy_peaks.narrowPeak", format="narrowPeak", header=FALSE)
> ol <- findOverlapsOfPeaks(gr1, gr2) # ϐʔΫಉ࢜ͷॏͳΓΛௐࠪ͢Δ
> makeVennDiagram(ol, NameOfPeaks=c(“BRD4”, “IRF1”)) # ॏͳΓΛϕϯਤͱͯ͠ՄࢹԽ͢Δ
> BiocManager::install("TxDb.Mmusculus.UCSC.mm10.ensGene") # ϚεήϊϜmm10ͷҨࢠϞσϧͷύοέʔδΛμ
ϯϩʔυ͢Δ
> library(TxDb.Mmusculus.UCSC.mm10.ensGene) # ϚεήϊϜmm10ͷҨࢠϞσϧͷύοέʔδΛϩʔυ͢Δ
> annoData <- toGRanges(TxDb.Mmusculus.UCSC.mm10.ensGene)
> seqlevelsStyle(gr1) <- seqlevelsStyle(annoData) # છ৭ମ໊ͷελΠϧΛἧ͑Δ
> anno1 <- annotatePeakInBatch(gr1, AnnotationData=annoData) # ϐʔΫΛ࠷͍ۙసࣸ։࢝ʢTSSʣʹׂΓͯΔ
> pie1(table(anno1$insideFeature), main="BRD4") # ϐʔΫ͕Ҩࢠ͔ΒΈͯͲͷྖҬʹஔ͍ͨͷ͔Λɺԁάϥϑͱͯ͠
දࣔ͢Δ
> seqlevelsStyle(gr2) <- seqlevelsStyle(annoData) # છ৭ମ໊ͷελΠϧΛἧ͑Δ
> anno2 <- annotatePeakInBatch(gr2, AnnotationData=annoData) # ϐʔΫΛ࠷͍ۙసࣸ։࢝ʢTSSʣʹׂΓͯΔ
> pie1(table(anno2$insideFeature), main="IRF2") # ϐʔΫ͕Ҩࢠ͔ΒΈͯͲͷྖҬʹஔ͍ͨͷ͔Λɺԁάϥϑͱͯ͠
දࣔ͢Δ
> overlaps <- ol$peaklist[["gr1///gr2"]]
> aCR <- assignChromosomeRegion(overlaps, nucleotideLevel=FALSE,
precedence=c("Promoters", "immediateDownstream",
"fiveUTRs", "threeUTRs",
"Exons", "Introns"),
TxDb=TxDb.Mmusculus.UCSC.mm10.ensGene)
> pie1(aCR$percentage, main="BRD4 & IRF1")
> BiocManager::install("EnsDb.Mmusculus.v79")
> library(EnsDb.Mmusculus.v79)
> anno1$feature[is.na(anno1$feature)] <- "." # ΤϥʔΛආ͚ΔͨΊʹ NA ΛϐϦΦυʹม͑Δ
> anno1$geneName <- mapIds(EnsDb.Mmusculus.v79, keys=anno1$feature, column = "GENENAME",
keytype="GENEID")
> anno1[1:2]
if(!dir.exists("ChIPpeakAnno")) dir.create("ChIPpeakAnno")
df_anno1 <- as.data.frame(anno1)
write.table(df_anno1, "ChIPpeakAnno/BRD4_ChIP_IFNy_peaks.annot.txt", sep="\t", quote=F)
ChIP-seqղੳͷྫ
>200ߦͷίϚϯυ
https://github.com/yuifu/ngsdat2_epigenome_chipseq/blob/master/chipseq.md