seq2science scATAC postprocessing

Seq2science SnapATAC

Install the SnapATAC library from the SnapATAC github Change the location of the seq2science_output_dir to the output dir of the seq2Science workflow. Also generate a output figure directory to export the figures genereated by R to.

#library(devtools)
#install_github("r3fang/SnapATAC")
library('SnapATAC')
## Loading required package: Matrix
## Loading required package: rhdf5
seq2science_output_dir <- "/home/jsmits/seq2science_scATAC_example"
output_figure_dir <- paste(seq2science_output_dir, '/output_figures', sep ="")
dir.create(output_figure_dir, showWarnings = FALSE)

Load all the snap objects generated by the seq2Science workflow into one large snap object

snap_dir <- paste(seq2science_output_dir, '/snap', sep = "")

file.list = list.files(path = snap_dir, pattern = c("binned.snap"), all.files = FALSE,full.names = TRUE, include.dirs = TRUE, no.. = FALSE)
sample.list =list.files(path = snap_dir, pattern = c("binned.snap"), all.files = FALSE,full.names = FALSE, include.dirs = FALSE, no.. = FALSE)
sample.list = gsub(".binned.snap","",sample.list)

x.sp = createSnap(file=file.list, sample=sample.list)
## Epoch: reading the barcode session ...
## Warning: 'rBind' is deprecated.
##  Since R version 3.2.0, base's rbind() should work fine with S4 objects

The barcodes of the snapobject correlate to the single cell fastq file name

x.sp@barcode[c(1:10)]
##  [1] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M502"
##  [2] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M503"
##  [3] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M505"
##  [4] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M507"
##  [5] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M508"
##  [6] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M510"
##  [7] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M511"
##  [8] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M513"
##  [9] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M515"
## [10] "IPSC-2-KC-DIFF-IPSC-293-KC-PKC19-A0004-PLATE2-16718M701M516"

If you processed mulitple cell types and want to add this as metadata to the snapobject you need to generate a aditional samples.tsv file (samples_metadata.tsv) containing the sample filenames and the additional metadata. If you dont want to add aditional metadata you can skiphe next step. Here is an example of how such a metadata file should look like:

samples_metadata_subset <- read.csv2(paste(seq2science_output_dir, '/samples_metadata_subset.tsv', sep = ""), sep = '\t')
samples_metadata_subset
##                                                    sample_name cell_type
## 1  iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate6-17128m701m502       KCs
## 2  iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate6-17128m701m503       KCs
## 3  iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate6-17128m701m505       KCs
## 4  iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate6-17128m701m506       KCs
## 5  iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m703m516      LSCs
## 6  iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m703m517      LSCs
## 7  iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m703m518      LSCs
## 8  iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m703m520      LSCs
## 9  iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m703m521      LSCs
## 10 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m703m522      LSCs
## 11 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m704m502      LSCs
## 12 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m704m503      LSCs
## 13 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m704m505      LSCs
## 14 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m704m506      LSCs
## 15 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m704m507      LSCs
## 16 iPSC-2-KC-diff-eLSC-and-KC-PKC19-A0005-plate7-17129m704m508      LSCs
##    plate_384_well
## 1              A1
## 2              A2
## 3              A3
## 4              A4
## 5              E1
## 6              E2
## 7              E3
## 8              E4
## 9              E5
## 10             E6
## 11             E7
## 12             E8
## 13             E9
## 14            E10
## 15            E11
## 16            E12

(Optional) Merge the metadata from your metadata file to the snap object.

samples_metadata <- read.csv2(paste(seq2science_output_dir, '/samples_metadata.tsv', sep = ""), sep = '\t')
samples_metadata$sample_name <- lapply(samples_metadata$sample_name, toupper)

samples_metadata <- samples_metadata[samples_metadata$sample_name %in% x.sp@barcode,]
samples_metadata <- as.data.frame(samples_metadata[order(match(samples_metadata$sample_name,x.sp@barcode)),])
x.sp@metaData$cell_type <- samples_metadata$cell_type

Now you succesfully have generated a snap object, and you can generate single cell QC statistics

pdf(paste(output_figure_dir,'/barcodeQC.pdf',sep=""),width=6,height=6,paper='special') 
for (name in unique(x.sp@sample)){
  plotBarcode(
    obj=x.sp[x.sp@sample==name,], 
    pdf.file.name=NULL, 
    pdf.width=7, 
    pdf.height=7, 
    col="grey",
    border="grey",
    breaks=50)
  mtext(name, outer=TRUE,  cex=1, line=-1)
}
dev.off()
## png 
##   2
plotBarcode(
    obj=x.sp[x.sp@sample==x.sp@sample[1],], 
    pdf.file.name=NULL, 
    pdf.width=7, 
    pdf.height=7, 
    col="grey",
    border="grey",
    breaks=50)
  mtext(name, outer=TRUE,  cex=1, line=-1)

add cell-by-bin matrix (Generated by SNAPtools in the celseq pipeline)

#bmat is added:
x.sp = addBmatToSnap(x.sp, bin.size=5000, num.cores=1)
## Epoch: reading cell-bin count matrix session ...
x.sp = makeBinary(x.sp, mat="bmat")

From this point you can continue with a workflow tutorial from SnapATAC:https://github.com/r3fang/SnapATAC