library(igraph, lib.loc = "/homes8/bohoon/R/x86_64-pc-linux-gnu-library/4.3")
library(scRepertoire)
library(Seurat)
library(dplyr)
library(tidyverse)
library(ggplot2)5 scTCR Analysis
5.1 Environment Setup
5.2 Reading in Integrated Seurat Objects
merged_integrated <- readRDS("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/analysis_052124/gated_altered_threshold_1_integrated.rds")5.3 Reading in TCR Clones from Cellranger Output
Pool142_1_R <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt1_R/outs/filtered_contig_annotations.csv")
Pool142_2_NR <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt2_NR/outs/filtered_contig_annotations.csv")
Pool142_3_NR <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt3_NR/outs/filtered_contig_annotations.csv")
Pool142_4_NR <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt4_NR/outs/filtered_contig_annotations.csv")
Pool142_5_R <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt5_R/outs/filtered_contig_annotations.csv")
Pool142_6_R <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt6_R/outs/filtered_contig_annotations.csv")
Pool142_7_NR <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt7_NR/outs/filtered_contig_annotations.csv")
Pool144_1_R <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool144_Cellranger/Pool144_Pt1_R/outs/filtered_contig_annotations.csv")
Pool144_2_NR <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool144_Cellranger/Pool144_Pt2_NR/outs/filtered_contig_annotations.csv")
Pool144_3_NR <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool144_Cellranger/Pool144_Pt3_NR/outs/filtered_contig_annotations.csv")5.4 Redefining Clone Definition (Only allow TCR clones with 1 alpha and 1 beta, 2 alpha 1 beta, 2 beta 1 alpha)
contig_list <- list(Pool142_1_R, Pool142_2_NR, Pool142_3_NR,Pool142_4_NR,Pool142_5_R, Pool142_6_R,Pool142_7_NR, Pool144_1_R, Pool144_2_NR, Pool144_3_NR)
sample_list <- c("Pool142_1_R", "Pool142_2_NR", "Pool142_3_NR", "Pool142_4_NR", "Pool142_5_R", "Pool142_6_R", "Pool142_7_NR", "Pool144_1_R", "Pool144_2_NR", "Pool144_3_NR")
# Turn all entries of c_gene into ""
to_empty <- function(df){
df$c_gene <- ""
return(df)
}
contig_list <- lapply(contig_list, to_empty)
combined_TCR <- combineTCR(contig_list,
samples = sample_list,
removeNA = FALSE,
removeMulti = FALSE,
filterMulti = FALSE)
# Count number of TRA and TRB per barcode
count_TRA_TRB <- function(df){
df <- df %>%
mutate(TCR1_count = str_count(TCR1, fixed(";")) + 1,
TCR2_count = str_count(TCR2, fixed(";")) + 1,
TCR1_count = replace_na(TCR1_count, 0),
TCR2_count = replace_na(TCR2_count, 0))
}
# Remove the barcode if it has 2A+2B or 3A/XB or XA/3B
remove_multi_chains <- function(df){
df <- df %>%
# 2A+2B
filter(!(TCR1_count == 2 & TCR2_count == 2),
!(TCR1_count >= 3),
!(TCR2_count >= 3))
}
# Apply counts and filter
combined_TCR <- lapply(combined_TCR, count_TRA_TRB)
combined_TCR <- lapply(combined_TCR, remove_multi_chains)
# Remove NA if it was supposed to be the C gene in TRA
rm_na_TCR1 <- function(df){
df <- df %>%
mutate(TCR1 = str_replace_all(TCR1, "(\\.NA(?=;))|(\\.NA$)", ""))
return(df)
}
# Remove NA if it was supposed to be the C or D gene in TRB (D gene is not seen in bulkTCR data anyway)
rm_na_TCR2 <- function(df){
df <- df %>%
mutate(TCR2 = str_replace_all(TCR2, "(\\.NA(?=;))|(\\.NA(?=.))|(\\.NA$)", ""))
return(df)
}
# Apply removals
combined_TCR <- lapply(combined_TCR, rm_na_TCR1)
combined_TCR <- lapply(combined_TCR, rm_na_TCR2)
# Create new columns: CTVJaa, CTbeta, CTalpha
create_columns <- function(df){
df <- df %>%
mutate(vjaa = paste0(TCR1, ";", cdr3_aa1, "_", TCR2, ";", cdr3_aa2),
alpha = paste0(TCR1, ";", cdr3_aa1),
beta = paste0(TCR2, ";", cdr3_aa2))
}
# Apply new columns
combined_TCR <- lapply(combined_TCR, create_columns)5.5 Add Patient and Responder Labels to scTCR objects
combined_TCR <- addVariable(combined_TCR,
variable.name = "Patient",
variables = str_replace_all(sample_list, "_[^_]+$", ""))
combined_TCR <- addVariable(combined_TCR,
variable.name = "Responder",
variables = str_replace_all(sample_list,".+_",""))5.6 Merging scTCR objects with scRNA object
colnames(merged_integrated) <- paste0(colnames(merged_integrated), "-1")
merged.TCR <- combineExpression(combined_TCR,
merged_integrated,
cloneCall="vjaa",
group.by = "sample",
cloneSize=c(Single=1, Small=5, Medium=20, Large=50, Hyperexpanded=200),
proportion = FALSE)Warning in .convertClonecall(x): A custom variable vjaa will be used to call
clones
5.7 Clonality on a UMAP
levels(merged.TCR$RNA_snn_res.0.3) <- as.character(1:6)
merged.TCR@meta.data$cloneSize <- as.character(merged.TCR@meta.data$cloneSize)
merged.TCR@meta.data$cloneSize[is.na(merged.TCR@meta.data$cloneSize)] <- "Undetected"
# scTCR clone size
merged.TCR@meta.data$cloneSize <- factor(merged.TCR@meta.data$cloneSize, levels=c("Hyperexpanded (50 < X <= 277)", "Large (20 < X <= 50)", "Medium (5 < X <= 20)", "Small (1 < X <= 5)", "Single (0 < X <= 1)", "Undetected"))
# Match colors to your current scheme
clone_colors <- c(
"Single (0 < X <= 1)" = "#E377C2", # pink
"Small (1 < X <= 5)" = "#17BECF", # cyan
"Medium (5 < X <= 20)" = "#33A02C", # green
"Large (20 < X <= 50)" = "#BCBD22", # olive
"Hyperexpanded (50 < X <= 277)" = "#FF9896", # salmon
"Undetected" = "#7F7F7F" # gray
)
clone_size <- DimPlot(
merged.TCR,
group.by = "cloneSize",
reduction = "umap.harmony",
cols = clone_colors
) + guides(color = guide_legend(override.aes = list(size = 8))) +
labs(title = '', x = "UMAP1", y = "UMAP2", color = "Clonality")
clone_size5.8 Clonal Proportions of each Clone Size Category
Clonal_Proportions <- clonalOccupy(
merged.TCR,
proportion = TRUE,
x.axis = "RNA_snn_res.0.3",
label = FALSE
) + labs(fill = "Clonality")
Clonal_Proportions5.9 TCR Diversity Visualization
# Extract the metadata
metadata <- merged.TCR@meta.data
# Modify the metadata using dplyr
metadata <- metadata %>%
mutate(cluster_sample = paste(Sample, RNA_snn_res.0.3, sep = "_"))
metadata <- metadata %>%
mutate(cluster_response = paste(Condition, RNA_snn_res.0.3, sep = "_"))
# Update the Seurat object with the modified metadata
merged.TCR@meta.data <- metadata
# Diversity Calculation
Cluster_by_Sample <- clonalDiversity(merged.TCR,
cloneCall = "vjaa",
group.by="cluster_sample",
x.axis="RNA_snn_res.0.3",
metrics = c("shannon","norm.entropy"), palette="Set3")Warning in .convertClonecall(x): A custom variable vjaa will be used to call
clones
Warning in .convertClonecall(x): A custom variable vjaa will be used to call
clones
# Normalized Entropy
Cluster_by_Sample <- Cluster_by_Sample$data[Cluster_by_Sample$data$variable=="norm.entropy",]
Cluster_by_Sample$patient_label <- gsub("_\\d+$", "",Cluster_by_Sample$cluster_sample)
Cluster_by_Sample$condition <- sub(".*_", "", Cluster_by_Sample$patient_label)
Cluster_by_Sample$RNA_snn_res.0.3 <- Cluster_by_Sample$RNA_snn_res.0.3
p1 <- ggplot(Cluster_by_Sample, aes(x = condition, y = value, fill = condition)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, size = 2, alpha = 0.7, color = "black") +
facet_wrap(~RNA_snn_res.0.3, scales = "free_x", nrow=1) +
labs(
x = "Cluster",
y = "Normalized Shannon Entropy",
title = "T cell diversity across clusters"
) +
theme_classic() +
ylim(0.85, 1) +
theme(
plot.title = element_text(size=24),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text = element_text(size = 24),
axis.title = element_text(size = 24),
legend.text = element_text(size = 24),
legend.title = element_text(size = 24),
panel.grid.major.x = element_blank(),
strip.text = element_text(size = 24, face = "bold")
)
p1Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_point()`).
sessionInfo()R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)
Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so; LAPACK version 3.9.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: America/New_York
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1 purrr_1.0.2
[5] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 tidyverse_2.0.0
[9] dplyr_1.1.4 Seurat_5.1.0 SeuratObject_5.0.2 sp_2.2-0
[13] scRepertoire_2.0.6 ggplot2_3.5.1 igraph_2.1.1
loaded via a namespace (and not attached):
[1] cubature_2.1.1 RcppAnnoy_0.0.22
[3] splines_4.3.2 later_1.4.1
[5] bitops_1.0-9 R.oo_1.27.0
[7] polyclip_1.10-7 fastDummies_1.7.4
[9] lifecycle_1.0.4 globals_0.16.3
[11] lattice_0.22-7 MASS_7.3-60.0.1
[13] magrittr_2.0.3 plotly_4.10.4
[15] rmarkdown_2.29 remotes_2.5.0
[17] httpuv_1.6.15 sctransform_0.4.1
[19] spam_2.11-1 spatstat.sparse_3.1-0
[21] reticulate_1.31 cowplot_1.1.3.9000
[23] pbapply_1.7-2 RColorBrewer_1.1-3
[25] abind_1.4-8 zlibbioc_1.48.2
[27] Rtsne_0.17 GenomicRanges_1.54.1
[29] R.utils_2.13.0 ggraph_2.2.1
[31] BiocGenerics_0.48.1 RCurl_1.98-1.17
[33] hash_2.2.6.3 tweenr_2.0.3
[35] evmix_2.12 GenomeInfoDbData_1.2.11
[37] IRanges_2.36.0 S4Vectors_0.40.2
[39] ggrepel_0.9.6 irlba_2.3.5.1
[41] listenv_0.9.1 spatstat.utils_3.1-1
[43] SeuratWrappers_0.3.19 iNEXT_3.0.1
[45] MatrixModels_0.5-4 goftest_1.2-3
[47] RSpectra_0.16-2 spatstat.random_3.3-2
[49] fitdistrplus_1.2-2 parallelly_1.43.0
[51] leiden_0.4.3.1 codetools_0.2-20
[53] DelayedArray_0.28.0 ggforce_0.4.2
[55] tidyselect_1.2.1 farver_2.1.2
[57] viridis_0.6.5 matrixStats_1.5.0
[59] stats4_4.3.2 spatstat.explore_3.3-3
[61] jsonlite_2.0.0 tidygraph_1.3.1
[63] progressr_0.14.0 ggridges_0.5.6
[65] ggalluvial_0.12.5 survival_3.8-3
[67] tools_4.3.2 stringdist_0.9.12
[69] ica_1.0-3 Rcpp_1.1.0
[71] glue_1.8.0 gridExtra_2.3
[73] SparseArray_1.4.8 xfun_0.52
[75] MatrixGenerics_1.14.0 GenomeInfoDb_1.38.8
[77] withr_3.0.2 BiocManager_1.30.25
[79] fastmap_1.2.0 SparseM_1.84-2
[81] rsvd_1.0.5 digest_0.6.37
[83] timechange_0.3.0 R6_2.6.1
[85] mime_0.13 colorspace_2.1-1
[87] scattermore_1.2 tensor_1.5
[89] dichromat_2.0-0.1 spatstat.data_3.1-2
[91] R.methodsS3_1.8.2 generics_0.1.3
[93] data.table_1.17.0 graphlayouts_1.2.2
[95] httr_1.4.7 htmlwidgets_1.6.4
[97] S4Arrays_1.4.1 uwot_0.2.3
[99] pkgconfig_2.0.3 gtable_0.3.5
[101] lmtest_0.9-40 SingleCellExperiment_1.24.0
[103] XVector_0.42.0 htmltools_0.5.8.1
[105] dotCall64_1.2 scales_1.4.0
[107] Biobase_2.62.0 png_0.1-8
[109] spatstat.univar_3.0-1 ggdendro_0.2.0
[111] knitr_1.50 rstudioapi_0.17.1
[113] tzdb_0.5.0 reshape2_1.4.4
[115] rjson_0.2.23 nlme_3.1-168
[117] cachem_1.0.8 zoo_1.8-13
[119] KernSmooth_2.23-26 parallel_4.3.2
[121] miniUI_0.1.1.1 pillar_1.10.1
[123] grid_4.3.2 vctrs_0.6.5
[125] RANN_2.6.2 VGAM_1.1-13
[127] promises_1.3.0 xtable_1.8-4
[129] cluster_2.1.8.1 evaluate_1.0.3
[131] truncdist_1.0-2 cli_3.6.4
[133] compiler_4.3.2 rlang_1.1.5
[135] crayon_1.5.3 future.apply_1.11.3
[137] labeling_0.4.3 plyr_1.8.9
[139] stringi_1.8.7 viridisLite_0.4.2
[141] deldir_2.0-4 assertthat_0.2.1
[143] gsl_2.1-8 lazyeval_0.2.2
[145] spatstat.geom_3.3-2 quantreg_5.98
[147] Matrix_1.6-5 RcppHNSW_0.6.0
[149] hms_1.1.3 patchwork_1.2.0
[151] future_1.34.0 shiny_1.9.1
[153] SummarizedExperiment_1.32.0 evd_2.3-7.1
[155] ROCR_1.0-11 memoise_2.0.1