library(gread)
library(Matrix)
library(devtools)
library(Seurat)
options(Seurat.object.assay.version = 'v5')
library(tidyverse)
library(alevinQC)
library(fishpond)
library(miQC)
library(SeuratWrappers)
library(flexmix)
options(knitr.duplicate.label = "allow")1 scRNA: Load and Compile Seurat Object
1.1 Environment Setup
1.2 Sample QC
# Generate QC report in a separate html file
alevin_out = "/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin/"
alevin_q = "/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant/"
alevin_crl = "/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/"
alevinFryQCReport(paste(alevin_out,"Pool142-2_1_S1_ME_L001_R1_001", sep = ""),
paste(alevin_q, "Pool142-2_1_S1_ME_L001_R1_001", sep = ""),
paste(alevin_crl, "Pool142-2_1_S1_ME_L001_R1_001", sep = ""),
outputFormat = "html_document",
outputDir = paste(alevin_crl,"Pool142-2_1_S1_ME_L001_R1_001", sep = ""),
outputFile = "Pool142-2_1_S1_ME_L001_R1_001.html",
sampleId = "Pool142-2_1_S1_ME_L001_R1_001",forceOverwrite=TRUE
)Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_1_S1_ME_L001_R1_001/Pool142-2_1_S1_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
alevinFryQCReport(paste(alevin_out,"Pool142-2_2_S2_ME_L001_R1_001", sep = ""),
paste(alevin_q, "Pool142-2_2_S2_ME_L001_R1_001", sep = ""),
paste(alevin_crl, "Pool142-2_2_S2_ME_L001_R1_001", sep = ""),
outputFormat = "html_document",
outputDir = paste(alevin_crl,"Pool142-2_2_S2_ME_L001_R1_001", sep = ""),
outputFile = "Pool142-2_2_S2_ME_L001_R1_001.html",
sampleId = "Pool142-2_2_S2_ME_L001_R1_001",forceOverwrite=TRUE
)Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_2_S2_ME_L001_R1_001/Pool142-2_2_S2_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
alevinFryQCReport(paste(alevin_out,"Pool142-2_3_S3_ME_L001_R1_001", sep = ""),
paste(alevin_q, "Pool142-2_3_S3_ME_L001_R1_001", sep = ""),
paste(alevin_crl, "Pool142-2_3_S3_ME_L001_R1_001", sep = ""),
outputFormat = "html_document",
outputDir = paste(alevin_crl,"Pool142-2_3_S3_ME_L001_R1_001", sep = ""),
outputFile = "Pool142-2_3_S3_ME_L001_R1_001.html",
sampleId = "Pool142-2_3_S3_ME_L001_R1_001",forceOverwrite=TRUE
)Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_3_S3_ME_L001_R1_001/Pool142-2_3_S3_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
alevinFryQCReport(paste(alevin_out,"Pool142-2_4_S4_ME_L001_R1_001", sep = ""),
paste(alevin_q, "Pool142-2_4_S4_ME_L001_R1_001", sep = ""),
paste(alevin_crl, "Pool142-2_4_S4_ME_L001_R1_001", sep = ""),
outputFormat = "html_document",
outputDir = paste(alevin_crl,"Pool142-2_4_S4_ME_L001_R1_001", sep = ""),
outputFile = "Pool142-2_4_S4_ME_L001_R1_001.html",
sampleId = "Pool142-2_4_S4_ME_L001_R1_001",forceOverwrite=TRUE
)Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_4_S4_ME_L001_R1_001/Pool142-2_4_S4_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
alevinFryQCReport(paste(alevin_out,"Pool142-2_5_S5_ME_L001_R1_001", sep = ""),
paste(alevin_q, "Pool142-2_5_S5_ME_L001_R1_001", sep = ""),
paste(alevin_crl, "Pool142-2_5_S5_ME_L001_R1_001", sep = ""),
outputFormat = "html_document",
outputDir = paste(alevin_crl,"Pool142-2_5_S5_ME_L001_R1_001", sep = ""),
outputFile = "Pool142-2_5_S5_ME_L001_R1_001.html",
sampleId = "Pool142-2_5_S5_ME_L001_R1_001",forceOverwrite=TRUE
)Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_5_S5_ME_L001_R1_001/Pool142-2_5_S5_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
alevinFryQCReport(paste(alevin_out,"Pool142-2_6_S6_ME_L001_R1_001", sep = ""),
paste(alevin_q, "Pool142-2_6_S6_ME_L001_R1_001", sep = ""),
paste(alevin_crl, "Pool142-2_6_S6_ME_L001_R1_001", sep = ""),
outputFormat = "html_document",
outputDir = paste(alevin_crl,"Pool142-2_6_S6_ME_L001_R1_001", sep = ""),
outputFile = "Pool142-2_6_S6_ME_L001_R1_001.html",
sampleId = "Pool142-2_6_S6_ME_L001_R1_001",forceOverwrite=TRUE
)Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_6_S6_ME_L001_R1_001/Pool142-2_6_S6_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
alevinFryQCReport(paste(alevin_out,"Pool142-2_7_S7_ME_L001_R1_001", sep = ""),
paste(alevin_q, "Pool142-2_7_S7_ME_L001_R1_001", sep = ""),
paste(alevin_crl, "Pool142-2_7_S7_ME_L001_R1_001", sep = ""),
outputFormat = "html_document",
outputDir = paste(alevin_crl,"Pool142-2_7_S7_ME_L001_R1_001", sep = ""),
outputFile = "Pool142-2_7_S7_ME_L001_R1_001.html",
sampleId = "Pool142-2_7_S7_ME_L001_R1_001",forceOverwrite=TRUE
)Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_7_S7_ME_L001_R1_001/Pool142-2_7_S7_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
1.3 Alevin Fry Outputs Loading
#| label: Sample Loading
pool142_alevin_crl = "/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/"
pool144_alevin_crl = "/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool144_alevin_fry_output/alevin_quant_crlikeem/"
Pool142_1 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_1_S1_ME_L001_R1_001", sep = ""),
outputFormat = "snRNA") #responderlocating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 11817 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool142_2 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_2_S2_ME_L001_R1_001", sep = ""),
outputFormat = "snRNA") #non-responderlocating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 12085 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool142_3 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_3_S3_ME_L001_R1_001", sep = ""),
outputFormat = "snRNA") #non-responderlocating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 14437 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool142_4 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_4_S4_ME_L001_R1_001", sep = ""),
outputFormat = "snRNA") #non-responderlocating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 11961 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool142_5 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_5_S5_ME_L001_R1_001", sep = ""),
outputFormat = "snRNA") #responderlocating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 25476 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool142_6 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_6_S6_ME_L001_R1_001", sep = ""),
outputFormat = "snRNA") #responder locating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 39430 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool142_7 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_7_S7_ME_L001_R1_001", sep = ""),
outputFormat = "snRNA") #non-responderlocating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 10771 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool144_1 <- loadFry(paste(pool144_alevin_crl, "Pool144_1_S1_L004_R1_001", sep = ""),
outputFormat = "snRNA") #responderlocating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 13005 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool144_2 <- loadFry(paste(pool144_alevin_crl, "Pool144_2_S2_L004_R1_001", sep = ""),
outputFormat = "snRNA") #non-responderlocating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 10799 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool144_3 <- loadFry(paste(pool144_alevin_crl, "Pool144_3_S3_L004_R1_001", sep = ""),
outputFormat = "snRNA") #non-responderlocating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 1571 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
1.4 Transcript Level Counts to Gene Level Counts
#| label: Transcript Level Counts to Gene Level Counts
tx2gene <- read.table("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/ref/gencode.v45.annotation_splici_fl85/gene_id_to_name.tsv",
header=F,
sep="\t",
col.names=c("tx","gene"))
# Use matrix multiplication to group and sum counts by gene symbol
exp.txId <- rownames(counts(Pool142_1))
exp.geneId <- as.vector(tx2gene$gene[match(exp.txId, tx2gene$tx)])
exp.tx.grp <- t(sparse.model.matrix(~0 + exp.geneId))
Pool142_1.summarized <- exp.tx.grp %*% counts(Pool142_1)
rownames(Pool142_1.summarized) <- rownames(Pool142_1.summarized) %>% str_replace_all(".+.geneId","")
Pool142_2.summarized <- exp.tx.grp %*% counts(Pool142_2)
rownames(Pool142_2.summarized) <- rownames(Pool142_2.summarized) %>% str_replace_all(".+.geneId","")
Pool142_3.summarized <- exp.tx.grp %*% counts(Pool142_3)
rownames(Pool142_3.summarized) <- rownames(Pool142_3.summarized) %>% str_replace_all(".+.geneId","")
Pool142_4.summarized <- exp.tx.grp %*% counts(Pool142_4)
rownames(Pool142_4.summarized) <- rownames(Pool142_4.summarized) %>% str_replace_all(".+.geneId","")
Pool142_5.summarized <- exp.tx.grp %*% counts(Pool142_5)
rownames(Pool142_5.summarized) <- rownames(Pool142_5.summarized) %>% str_replace_all(".+.geneId","")
Pool142_6.summarized <- exp.tx.grp %*% counts(Pool142_6)
rownames(Pool142_6.summarized) <- rownames(Pool142_6.summarized) %>% str_replace_all(".+.geneId","")
Pool142_7.summarized <- exp.tx.grp %*% counts(Pool142_7)
rownames(Pool142_7.summarized) <- rownames(Pool142_7.summarized) %>% str_replace_all(".+.geneId","")
Pool144_1.summarized <- exp.tx.grp %*% counts(Pool144_1)
rownames(Pool144_1.summarized) <- rownames(Pool144_1.summarized) %>% str_replace_all(".+.geneId","")
Pool144_2.summarized <- exp.tx.grp %*% counts(Pool144_2)
rownames(Pool144_2.summarized) <- rownames(Pool144_2.summarized) %>% str_replace_all(".+.geneId","")
Pool144_3.summarized <- exp.tx.grp %*% counts(Pool144_3)
rownames(Pool144_3.summarized) <- rownames(Pool144_3.summarized) %>% str_replace_all(".+.geneId","")1.5 Seurat Object Generation
Pool142_1.seurat <- CreateSeuratObject(Pool142_1.summarized,project = "Pool142_Patient1_Responder")Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool142_2.seurat <- CreateSeuratObject(Pool142_2.summarized,project = "Pool142_Patient2_NonResponder")Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool142_3.seurat <- CreateSeuratObject(Pool142_3.summarized,project = "Pool142_Patient3_NonResponder")Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool142_4.seurat <- CreateSeuratObject(Pool142_4.summarized,project = "Pool142_Patient4_NonResponder")Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool142_5.seurat <- CreateSeuratObject(Pool142_5.summarized,project = "Pool142_Patient5_Responder")Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool142_6.seurat <- CreateSeuratObject(Pool142_6.summarized,project = "Pool142_Patient6_Responder")Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool142_7.seurat <- CreateSeuratObject(Pool142_7.summarized,project = "Pool142_Patient7_NonResponder")Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool144_1.seurat <- CreateSeuratObject(Pool144_1.summarized,project = "Pool144_Patient1_Responder")Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool144_2.seurat <- CreateSeuratObject(Pool144_2.summarized,project = "Pool144_Patient2_NonResponder")Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool144_3.seurat <- CreateSeuratObject(Pool144_3.summarized,project = "Pool144_Patient3_NonResponder")Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
1.6 Rename Cell Ids
Pool142_1.seurat <- RenameCells(Pool142_1.seurat,add.cell.id = "Pool142_1_R")
Pool142_2.seurat <- RenameCells(Pool142_2.seurat,add.cell.id = "Pool142_2_NR")
Pool142_3.seurat <- RenameCells(Pool142_3.seurat,add.cell.id = "Pool142_3_NR")
Pool142_4.seurat <- RenameCells(Pool142_4.seurat,add.cell.id = "Pool142_4_NR")
Pool142_5.seurat <- RenameCells(Pool142_5.seurat,add.cell.id = "Pool142_5_R")
Pool142_6.seurat <- RenameCells(Pool142_6.seurat,add.cell.id = "Pool142_6_R")
Pool142_7.seurat <- RenameCells(Pool142_7.seurat,add.cell.id = "Pool142_7_NR")
Pool144_1.seurat <- RenameCells(Pool144_1.seurat,add.cell.id = "Pool144_1_R")
Pool144_2.seurat <- RenameCells(Pool144_2.seurat,add.cell.id = "Pool144_2_NR")
Pool144_3.seurat <- RenameCells(Pool144_3.seurat,add.cell.id = "Pool144_3_NR")1.7 Merge Samples
merged <- merge(x = Pool142_1.seurat,y = c(Pool142_2.seurat,Pool142_3.seurat,Pool142_4.seurat, Pool142_5.seurat, Pool142_6.seurat, Pool142_7.seurat, Pool144_1.seurat, Pool144_2.seurat, Pool144_3.seurat))
merged[["percent.mt"]] <- PercentageFeatureSet(merged, pattern = "^MT-")1.8 Subset by Mitochondrial Content
merged <- subset(merged, nCount_RNA > 1000 & nFeature_RNA > 500)
merged <- RunMiQC(merged,
percent.mt = "percent.mt",
nFeature_RNA = "nFeature_RNA",
posterior.cutoff = 0.7,
model.slot = "flexmix_model")Warning: Adding a command log without an assay associated with it
merged <- subset(merged, miQC.keep == "keep")
dim(merged)[1] 61578 120739
1.9 QC Feature Plotting
# QC after subsetting
VlnPlot(merged, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)Warning: Default search for "data" layer in "RNA" assay yielded no results;
utilizing "counts" layer instead.
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
1.10 Adding Meta Data Labels
condition <- str_split_i(colnames(merged),"_",3)
Batch <- str_split_i(colnames(merged),"_",1)
split_colnames <- str_split(colnames(merged),"_")
Sample <- sapply(split_colnames, function(x) {
paste(x[1:3], collapse = "_")
})
merged <- AddMetaData(merged,condition,col.name="Condition")
merged <- AddMetaData(merged,Batch,col.name="Batch")
merged <- AddMetaData(merged,Sample,col.name="Sample")
merged[['RNA']] <- JoinLayers(merged[['RNA']])
merged[["RNA"]] <- split(merged[["RNA"]], f = merged$Sample)
saveRDS(merged,"/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/analysis_031224/merged.rds")