1  scRNA: Load and Compile Seurat Object

1.1 Environment Setup

library(gread)
library(Matrix)
library(devtools)
library(Seurat)
options(Seurat.object.assay.version = 'v5')
library(tidyverse)
library(alevinQC)
library(fishpond)
library(miQC)
library(SeuratWrappers)
library(flexmix)
options(knitr.duplicate.label = "allow")

1.2 Sample QC

# Generate QC report in a separate html file
alevin_out = "/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin/"
alevin_q = "/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant/"
alevin_crl = "/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/"
alevinFryQCReport(paste(alevin_out,"Pool142-2_1_S1_ME_L001_R1_001", sep = ""),
        paste(alevin_q, "Pool142-2_1_S1_ME_L001_R1_001", sep = ""),
        paste(alevin_crl, "Pool142-2_1_S1_ME_L001_R1_001", sep = ""),
        outputFormat = "html_document",
        outputDir = paste(alevin_crl,"Pool142-2_1_S1_ME_L001_R1_001", sep = ""),
        outputFile = "Pool142-2_1_S1_ME_L001_R1_001.html",
        sampleId = "Pool142-2_1_S1_ME_L001_R1_001",forceOverwrite=TRUE
)
Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_1_S1_ME_L001_R1_001/Pool142-2_1_S1_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
alevinFryQCReport(paste(alevin_out,"Pool142-2_2_S2_ME_L001_R1_001", sep = ""),
        paste(alevin_q, "Pool142-2_2_S2_ME_L001_R1_001", sep = ""),
        paste(alevin_crl, "Pool142-2_2_S2_ME_L001_R1_001", sep = ""),
        outputFormat = "html_document",
        outputDir = paste(alevin_crl,"Pool142-2_2_S2_ME_L001_R1_001", sep = ""),
        outputFile = "Pool142-2_2_S2_ME_L001_R1_001.html",
        sampleId = "Pool142-2_2_S2_ME_L001_R1_001",forceOverwrite=TRUE
)
Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_2_S2_ME_L001_R1_001/Pool142-2_2_S2_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
alevinFryQCReport(paste(alevin_out,"Pool142-2_3_S3_ME_L001_R1_001", sep = ""),
        paste(alevin_q, "Pool142-2_3_S3_ME_L001_R1_001", sep = ""),
        paste(alevin_crl, "Pool142-2_3_S3_ME_L001_R1_001", sep = ""),
        outputFormat = "html_document",
        outputDir = paste(alevin_crl,"Pool142-2_3_S3_ME_L001_R1_001", sep = ""),
        outputFile = "Pool142-2_3_S3_ME_L001_R1_001.html",
        sampleId = "Pool142-2_3_S3_ME_L001_R1_001",forceOverwrite=TRUE
)
Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_3_S3_ME_L001_R1_001/Pool142-2_3_S3_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
alevinFryQCReport(paste(alevin_out,"Pool142-2_4_S4_ME_L001_R1_001", sep = ""),
        paste(alevin_q, "Pool142-2_4_S4_ME_L001_R1_001", sep = ""),
        paste(alevin_crl, "Pool142-2_4_S4_ME_L001_R1_001", sep = ""),
        outputFormat = "html_document",
        outputDir = paste(alevin_crl,"Pool142-2_4_S4_ME_L001_R1_001", sep = ""),
        outputFile = "Pool142-2_4_S4_ME_L001_R1_001.html",
        sampleId = "Pool142-2_4_S4_ME_L001_R1_001",forceOverwrite=TRUE
)
Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_4_S4_ME_L001_R1_001/Pool142-2_4_S4_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
alevinFryQCReport(paste(alevin_out,"Pool142-2_5_S5_ME_L001_R1_001", sep = ""),
        paste(alevin_q, "Pool142-2_5_S5_ME_L001_R1_001", sep = ""),
        paste(alevin_crl, "Pool142-2_5_S5_ME_L001_R1_001", sep = ""),
        outputFormat = "html_document",
        outputDir = paste(alevin_crl,"Pool142-2_5_S5_ME_L001_R1_001", sep = ""),
        outputFile = "Pool142-2_5_S5_ME_L001_R1_001.html",
        sampleId = "Pool142-2_5_S5_ME_L001_R1_001",forceOverwrite=TRUE
)
Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_5_S5_ME_L001_R1_001/Pool142-2_5_S5_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
alevinFryQCReport(paste(alevin_out,"Pool142-2_6_S6_ME_L001_R1_001", sep = ""),
        paste(alevin_q, "Pool142-2_6_S6_ME_L001_R1_001", sep = ""),
        paste(alevin_crl, "Pool142-2_6_S6_ME_L001_R1_001", sep = ""),
        outputFormat = "html_document",
        outputDir = paste(alevin_crl,"Pool142-2_6_S6_ME_L001_R1_001", sep = ""),
        outputFile = "Pool142-2_6_S6_ME_L001_R1_001.html",
        sampleId = "Pool142-2_6_S6_ME_L001_R1_001",forceOverwrite=TRUE
)
Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_6_S6_ME_L001_R1_001/Pool142-2_6_S6_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.
alevinFryQCReport(paste(alevin_out,"Pool142-2_7_S7_ME_L001_R1_001", sep = ""),
        paste(alevin_q, "Pool142-2_7_S7_ME_L001_R1_001", sep = ""),
        paste(alevin_crl, "Pool142-2_7_S7_ME_L001_R1_001", sep = ""),
        outputFormat = "html_document",
        outputDir = paste(alevin_crl,"Pool142-2_7_S7_ME_L001_R1_001", sep = ""),
        outputFile = "Pool142-2_7_S7_ME_L001_R1_001.html",
        sampleId = "Pool142-2_7_S7_ME_L001_R1_001",forceOverwrite=TRUE
)
Warning: The file
/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/Pool142-2_7_S7_ME_L001_R1_001/Pool142-2_7_S7_ME_L001_R1_001.html
already exists and will be overwritten, since forceOverwrite = TRUE.

1.3 Alevin Fry Outputs Loading

#| label: Sample Loading

pool142_alevin_crl = "/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_alevin_fry_output/alevin_quant_crlikeem/"
pool144_alevin_crl = "/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool144_alevin_fry_output/alevin_quant_crlikeem/"

Pool142_1 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_1_S1_ME_L001_R1_001", sep = ""),  
    outputFormat = "snRNA")   #responder
locating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 11817 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool142_2 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_2_S2_ME_L001_R1_001", sep = ""), 
    outputFormat = "snRNA")  #non-responder
locating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 12085 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool142_3 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_3_S3_ME_L001_R1_001", sep = ""), 
    outputFormat = "snRNA")   #non-responder
locating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 14437 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool142_4 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_4_S4_ME_L001_R1_001", sep = ""), 
    outputFormat = "snRNA")    #non-responder
locating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 11961 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool142_5 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_5_S5_ME_L001_R1_001", sep = ""), 
    outputFormat = "snRNA")     #responder
locating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 25476 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool142_6 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_6_S6_ME_L001_R1_001", sep = ""), 
    outputFormat = "snRNA")    #responder 
locating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 39430 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool142_7 <- loadFry(paste(pool142_alevin_crl, "Pool142-2_7_S7_ME_L001_R1_001", sep = ""), 
    outputFormat = "snRNA")   #non-responder
locating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 10771 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool144_1 <- loadFry(paste(pool144_alevin_crl, "Pool144_1_S1_L004_R1_001", sep = ""), 
    outputFormat = "snRNA")   #responder
locating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 13005 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool144_2 <- loadFry(paste(pool144_alevin_crl, "Pool144_2_S2_L004_R1_001", sep = ""), 
    outputFormat = "snRNA")   #non-responder
locating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 10799 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
Pool144_3 <- loadFry(paste(pool144_alevin_crl, "Pool144_3_S3_L004_R1_001", sep = ""), 
    outputFormat = "snRNA")   #non-responder
locating quant file
Reading meta data
USA mode: TRUE
Processing 63187 genes and 1571 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done

1.4 Transcript Level Counts to Gene Level Counts

#| label: Transcript Level Counts to Gene Level Counts

tx2gene <- read.table("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/ref/gencode.v45.annotation_splici_fl85/gene_id_to_name.tsv",
                      header=F,
                      sep="\t",
                      col.names=c("tx","gene"))

# Use matrix multiplication to group and sum counts by gene symbol
exp.txId <- rownames(counts(Pool142_1))
exp.geneId <- as.vector(tx2gene$gene[match(exp.txId, tx2gene$tx)])
exp.tx.grp <- t(sparse.model.matrix(~0 + exp.geneId))

Pool142_1.summarized <- exp.tx.grp %*% counts(Pool142_1)
rownames(Pool142_1.summarized) <- rownames(Pool142_1.summarized) %>% str_replace_all(".+.geneId","")
Pool142_2.summarized <- exp.tx.grp %*% counts(Pool142_2)
rownames(Pool142_2.summarized) <- rownames(Pool142_2.summarized) %>% str_replace_all(".+.geneId","")
Pool142_3.summarized <- exp.tx.grp %*% counts(Pool142_3)
rownames(Pool142_3.summarized) <- rownames(Pool142_3.summarized) %>% str_replace_all(".+.geneId","")
Pool142_4.summarized <- exp.tx.grp %*% counts(Pool142_4)
rownames(Pool142_4.summarized) <- rownames(Pool142_4.summarized) %>% str_replace_all(".+.geneId","")
Pool142_5.summarized <- exp.tx.grp %*% counts(Pool142_5)
rownames(Pool142_5.summarized) <- rownames(Pool142_5.summarized) %>% str_replace_all(".+.geneId","")
Pool142_6.summarized <- exp.tx.grp %*% counts(Pool142_6)
rownames(Pool142_6.summarized) <- rownames(Pool142_6.summarized) %>% str_replace_all(".+.geneId","")
Pool142_7.summarized <- exp.tx.grp %*% counts(Pool142_7)
rownames(Pool142_7.summarized) <- rownames(Pool142_7.summarized) %>% str_replace_all(".+.geneId","")

Pool144_1.summarized <- exp.tx.grp %*% counts(Pool144_1)
rownames(Pool144_1.summarized) <- rownames(Pool144_1.summarized) %>% str_replace_all(".+.geneId","")
Pool144_2.summarized <- exp.tx.grp %*% counts(Pool144_2)
rownames(Pool144_2.summarized) <- rownames(Pool144_2.summarized) %>% str_replace_all(".+.geneId","")
Pool144_3.summarized <- exp.tx.grp %*% counts(Pool144_3)
rownames(Pool144_3.summarized) <- rownames(Pool144_3.summarized) %>% str_replace_all(".+.geneId","")

1.5 Seurat Object Generation

Pool142_1.seurat <- CreateSeuratObject(Pool142_1.summarized,project = "Pool142_Patient1_Responder")
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool142_2.seurat <- CreateSeuratObject(Pool142_2.summarized,project = "Pool142_Patient2_NonResponder")
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool142_3.seurat <- CreateSeuratObject(Pool142_3.summarized,project = "Pool142_Patient3_NonResponder")
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool142_4.seurat <- CreateSeuratObject(Pool142_4.summarized,project = "Pool142_Patient4_NonResponder")
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool142_5.seurat <- CreateSeuratObject(Pool142_5.summarized,project = "Pool142_Patient5_Responder")
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool142_6.seurat <- CreateSeuratObject(Pool142_6.summarized,project = "Pool142_Patient6_Responder")
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool142_7.seurat <- CreateSeuratObject(Pool142_7.summarized,project = "Pool142_Patient7_NonResponder")
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool144_1.seurat <- CreateSeuratObject(Pool144_1.summarized,project = "Pool144_Patient1_Responder")
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool144_2.seurat <- CreateSeuratObject(Pool144_2.summarized,project = "Pool144_Patient2_NonResponder")
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
Pool144_3.seurat <- CreateSeuratObject(Pool144_3.summarized,project = "Pool144_Patient3_NonResponder")
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')

1.6 Rename Cell Ids

Pool142_1.seurat <- RenameCells(Pool142_1.seurat,add.cell.id = "Pool142_1_R")
Pool142_2.seurat <- RenameCells(Pool142_2.seurat,add.cell.id = "Pool142_2_NR")
Pool142_3.seurat <- RenameCells(Pool142_3.seurat,add.cell.id = "Pool142_3_NR")
Pool142_4.seurat <- RenameCells(Pool142_4.seurat,add.cell.id = "Pool142_4_NR")
Pool142_5.seurat <- RenameCells(Pool142_5.seurat,add.cell.id = "Pool142_5_R")
Pool142_6.seurat <- RenameCells(Pool142_6.seurat,add.cell.id = "Pool142_6_R")
Pool142_7.seurat <- RenameCells(Pool142_7.seurat,add.cell.id = "Pool142_7_NR")

Pool144_1.seurat <- RenameCells(Pool144_1.seurat,add.cell.id = "Pool144_1_R")
Pool144_2.seurat <- RenameCells(Pool144_2.seurat,add.cell.id = "Pool144_2_NR")
Pool144_3.seurat <- RenameCells(Pool144_3.seurat,add.cell.id = "Pool144_3_NR")

1.7 Merge Samples

merged <- merge(x = Pool142_1.seurat,y = c(Pool142_2.seurat,Pool142_3.seurat,Pool142_4.seurat, Pool142_5.seurat, Pool142_6.seurat, Pool142_7.seurat, Pool144_1.seurat, Pool144_2.seurat, Pool144_3.seurat))

merged[["percent.mt"]] <- PercentageFeatureSet(merged, pattern = "^MT-")

1.8 Subset by Mitochondrial Content

merged <- subset(merged, nCount_RNA > 1000 & nFeature_RNA > 500)

merged <- RunMiQC(merged, 
                    percent.mt = "percent.mt", 
                    nFeature_RNA = "nFeature_RNA",
                    posterior.cutoff = 0.7, 
                    model.slot = "flexmix_model")
Warning: Adding a command log without an assay associated with it
merged <- subset(merged, miQC.keep == "keep")
dim(merged)
[1]  61578 120739

1.9 QC Feature Plotting

# QC after subsetting
VlnPlot(merged, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
Warning: Default search for "data" layer in "RNA" assay yielded no results;
utilizing "counts" layer instead.
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

1.10 Adding Meta Data Labels

condition <- str_split_i(colnames(merged),"_",3)
Batch <- str_split_i(colnames(merged),"_",1)
split_colnames <- str_split(colnames(merged),"_")
Sample <- sapply(split_colnames, function(x) {
  paste(x[1:3], collapse = "_")
})
merged <- AddMetaData(merged,condition,col.name="Condition")
merged <- AddMetaData(merged,Batch,col.name="Batch")
merged <- AddMetaData(merged,Sample,col.name="Sample")
merged[['RNA']] <- JoinLayers(merged[['RNA']])
merged[["RNA"]] <- split(merged[["RNA"]], f = merged$Sample)
saveRDS(merged,"/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/analysis_031224/merged.rds")