seq2science scATAC postprocessing
Seq2science SnapATAC
Install the SnapATAC library from the SnapATAC github Change the location of the seq2science_output_dir to the output dir of the seq2Science workflow. Also generate a output figure directory to export the figures genereated by R to.
#library(devtools)
#install_github("r3fang/SnapATAC")
library('SnapATAC')
## Loading required package: Matrix
## Loading required package: rhdf5
seq2science_output_dir <- "/home/jsmits/seq2science_scATAC_example"
output_figure_dir <- paste(seq2science_output_dir, '/output_figures', sep ="")
dir.create(output_figure_dir, showWarnings = FALSE)
Load all the snap objects generated by the seq2Science workflow into one large snap object
snap_dir <- paste(seq2science_output_dir, '/snap', sep = "")
file.list = list.files(path = snap_dir, pattern = c("binned.snap"), all.files = FALSE,full.names = TRUE, include.dirs = TRUE, no.. = FALSE)
sample.list =list.files(path = snap_dir, pattern = c("binned.snap"), all.files = FALSE,full.names = FALSE, include.dirs = FALSE, no.. = FALSE)
sample.list = gsub(".binned.snap","",sample.list)
x.sp = createSnap(file=file.list, sample=sample.list)
## Epoch: reading the barcode session ...
## Warning: 'rBind' is deprecated.
## Since R version 3.2.0, base's rbind() should work fine with S4 objects
The barcodes of the snapobject correlate to the single cell fastq file name
x.sp@barcode[c(1:10)]
## [1] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M502"
## [2] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M503"
## [3] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M505"
## [4] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M507"
## [5] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M508"
## [6] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M510"
## [7] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M511"
## [8] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M513"
## [9] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M515"
## [10] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M516"
If you processed mulitple cell types and want to add this as metadata to the snapobject you need to generate a aditional samples.tsv file (samples_metadata.tsv) containing the sample filenames and the additional metadata. If you dont want to add aditional metadata you can skiphe next step. Here is an example of how such a metadata file should look like:
samples_metadata_subset <- read.csv2(paste(seq2science_output_dir, '/samples_metadata_subset.tsv', sep = ""), sep = '\t')
samples_metadata_subset
## sample_name cell_type
## 1 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate6-17128m701m502 KCs
## 2 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate6-17128m701m503 KCs
## 3 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate6-17128m701m505 KCs
## 4 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate6-17128m701m506 KCs
## 5 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m703m516 LSCs
## 6 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m703m517 LSCs
## 7 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m703m518 LSCs
## 8 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m703m520 LSCs
## 9 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m703m521 LSCs
## 10 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m703m522 LSCs
## 11 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m704m502 LSCs
## 12 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m704m503 LSCs
## 13 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m704m505 LSCs
## 14 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m704m506 LSCs
## 15 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m704m507 LSCs
## 16 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m704m508 LSCs
## plate_384_well
## 1 A1
## 2 A2
## 3 A3
## 4 A4
## 5 E1
## 6 E2
## 7 E3
## 8 E4
## 9 E5
## 10 E6
## 11 E7
## 12 E8
## 13 E9
## 14 E10
## 15 E11
## 16 E12
(Optional) Merge the metadata from your metadata file to the snap object.
samples_metadata <- read.csv2(paste(seq2science_output_dir, '/samples_metadata.tsv', sep = ""), sep = '\t')
samples_metadata$sample_name <- lapply(samples_metadata$sample_name, toupper)
samples_metadata <- samples_metadata[samples_metadata$sample_name %in% x.sp@barcode,]
samples_metadata <- as.data.frame(samples_metadata[order(match(samples_metadata$sample_name,x.sp@barcode)),])
x.sp@metaData$cell_type <- samples_metadata$cell_type
Now you succesfully have generated a snap object, and you can generate single cell QC statistics
pdf(paste(output_figure_dir,'/barcodeQC.pdf',sep=""),width=6,height=6,paper='special')
for (name in unique(x.sp@sample)){
plotBarcode(
obj=x.sp[x.sp@sample==name,],
pdf.file.name=NULL,
pdf.width=7,
pdf.height=7,
col="grey",
border="grey",
breaks=50)
mtext(name, outer=TRUE, cex=1, line=-1)
}
dev.off()
## png
## 2
plotBarcode(
obj=x.sp[x.sp@sample==x.sp@sample[1],],
pdf.file.name=NULL,
pdf.width=7,
pdf.height=7,
col="grey",
border="grey",
breaks=50)
mtext(name, outer=TRUE, cex=1, line=-1)
add cell-by-bin matrix (Generated by SNAPtools in the celseq pipeline)
#bmat is added:
x.sp = addBmatToSnap(x.sp, bin.size=5000, num.cores=1)
## Epoch: reading cell-bin count matrix session ...
x.sp = makeBinary(x.sp, mat="bmat")
From this point you can continue with a workflow tutorial from SnapATAC:https://github.com/r3fang/SnapATAC