5  scTCR Analysis

5.1 Environment Setup

library(igraph, lib.loc = "/homes8/bohoon/R/x86_64-pc-linux-gnu-library/4.3")
library(scRepertoire)
library(Seurat)
library(dplyr)
library(tidyverse)
library(ggplot2)

5.2 Reading in Integrated Seurat Objects

merged_integrated <- readRDS("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/analysis_052124/gated_altered_threshold_1_integrated.rds")

5.3 Reading in TCR Clones from Cellranger Output

Pool142_1_R <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt1_R/outs/filtered_contig_annotations.csv")
Pool142_2_NR <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt2_NR/outs/filtered_contig_annotations.csv")
Pool142_3_NR <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt3_NR/outs/filtered_contig_annotations.csv")
Pool142_4_NR <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt4_NR/outs/filtered_contig_annotations.csv")

Pool142_5_R <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt5_R/outs/filtered_contig_annotations.csv")
Pool142_6_R <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt6_R/outs/filtered_contig_annotations.csv")
Pool142_7_NR <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool142_Cellranger/Pool142_Pt7_NR/outs/filtered_contig_annotations.csv")

Pool144_1_R <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool144_Cellranger/Pool144_Pt1_R/outs/filtered_contig_annotations.csv")
Pool144_2_NR <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool144_Cellranger/Pool144_Pt2_NR/outs/filtered_contig_annotations.csv")
Pool144_3_NR <- read.csv("/jsimonlab/users/bshim/Shuntaro_scRNA_TCR/Pool144_Cellranger/Pool144_Pt3_NR/outs/filtered_contig_annotations.csv")

5.4 Redefining Clone Definition (Only allow TCR clones with 1 alpha and 1 beta, 2 alpha 1 beta, 2 beta 1 alpha)

contig_list <- list(Pool142_1_R, Pool142_2_NR, Pool142_3_NR,Pool142_4_NR,Pool142_5_R, Pool142_6_R,Pool142_7_NR, Pool144_1_R, Pool144_2_NR, Pool144_3_NR)
sample_list <- c("Pool142_1_R", "Pool142_2_NR", "Pool142_3_NR", "Pool142_4_NR", "Pool142_5_R", "Pool142_6_R", "Pool142_7_NR", "Pool144_1_R", "Pool144_2_NR", "Pool144_3_NR")

# Turn all entries of c_gene into ""
to_empty <- function(df){
  df$c_gene <- ""
  return(df)
}
contig_list <- lapply(contig_list, to_empty)

combined_TCR <- combineTCR(contig_list, 
                           samples = sample_list,
                           removeNA = FALSE, 
                           removeMulti = FALSE, 
                           filterMulti = FALSE)

# Count number of TRA and TRB per barcode
count_TRA_TRB <- function(df){
  df <- df %>%
    mutate(TCR1_count = str_count(TCR1, fixed(";")) + 1,
           TCR2_count = str_count(TCR2, fixed(";")) + 1,
           TCR1_count = replace_na(TCR1_count, 0),
           TCR2_count = replace_na(TCR2_count, 0))
}

# Remove the barcode if it has 2A+2B or 3A/XB or XA/3B
remove_multi_chains <- function(df){
  df <- df %>%
    # 2A+2B
    filter(!(TCR1_count == 2 & TCR2_count == 2),
           !(TCR1_count >= 3),
           !(TCR2_count >= 3))
}

# Apply counts and filter
combined_TCR <- lapply(combined_TCR, count_TRA_TRB)
combined_TCR <- lapply(combined_TCR, remove_multi_chains)

# Remove NA if it was supposed to be the C gene in TRA
rm_na_TCR1 <- function(df){
  df <- df %>%
    mutate(TCR1 = str_replace_all(TCR1, "(\\.NA(?=;))|(\\.NA$)", ""))
  return(df)
}

# Remove NA if it was supposed to be the C or D gene in TRB (D gene is not seen in bulkTCR data anyway)
rm_na_TCR2 <- function(df){
  df <- df %>%
    mutate(TCR2 = str_replace_all(TCR2, "(\\.NA(?=;))|(\\.NA(?=.))|(\\.NA$)", ""))
  return(df)
}

# Apply removals
combined_TCR <- lapply(combined_TCR, rm_na_TCR1)
combined_TCR <- lapply(combined_TCR, rm_na_TCR2)

# Create new columns: CTVJaa, CTbeta, CTalpha
create_columns <- function(df){
  df <- df %>%
    mutate(vjaa = paste0(TCR1, ";", cdr3_aa1, "_", TCR2, ";", cdr3_aa2),
           alpha = paste0(TCR1, ";", cdr3_aa1),
           beta = paste0(TCR2, ";", cdr3_aa2))
}

# Apply new columns
combined_TCR <- lapply(combined_TCR, create_columns)

5.5 Add Patient and Responder Labels to scTCR objects

combined_TCR <- addVariable(combined_TCR, 
                            variable.name = "Patient", 
                            variables = str_replace_all(sample_list, "_[^_]+$", ""))

combined_TCR <- addVariable(combined_TCR, 
                            variable.name = "Responder", 
                            variables = str_replace_all(sample_list,".+_",""))

5.6 Merging scTCR objects with scRNA object

colnames(merged_integrated) <- paste0(colnames(merged_integrated), "-1")
merged.TCR <- combineExpression(combined_TCR, 
                         merged_integrated, 
                         cloneCall="vjaa", 
                         group.by = "sample", 
                         cloneSize=c(Single=1, Small=5, Medium=20, Large=50, Hyperexpanded=200),
                         proportion = FALSE)
Warning in .convertClonecall(x): A custom variable vjaa will be used to call
clones

5.7 Clonality on a UMAP

levels(merged.TCR$RNA_snn_res.0.3) <- as.character(1:6)
merged.TCR@meta.data$cloneSize <- as.character(merged.TCR@meta.data$cloneSize)
merged.TCR@meta.data$cloneSize[is.na(merged.TCR@meta.data$cloneSize)] <- "Undetected"

# scTCR clone size
merged.TCR@meta.data$cloneSize <- factor(merged.TCR@meta.data$cloneSize, levels=c("Hyperexpanded (50 < X <= 277)", "Large (20 < X <= 50)", "Medium (5 < X <= 20)", "Small (1 < X <= 5)", "Single (0 < X <= 1)", "Undetected"))


# Match colors to your current scheme
clone_colors <- c(
  "Single (0 < X <= 1)"      = "#E377C2",  # pink
  "Small (1 < X <= 5)"       = "#17BECF",  # cyan
  "Medium (5 < X <= 20)"     = "#33A02C",  # green
  "Large (20 < X <= 50)"     = "#BCBD22",  # olive
  "Hyperexpanded (50 < X <= 277)" = "#FF9896",  # salmon
  "Undetected"               = "#7F7F7F"   # gray
)


clone_size <- DimPlot(
  merged.TCR,
  group.by = "cloneSize",
  reduction = "umap.harmony",
  cols = clone_colors
) + guides(color = guide_legend(override.aes = list(size = 8))) +
  labs(title = '', x = "UMAP1", y = "UMAP2", color = "Clonality")

clone_size

5.8 Clonal Proportions of each Clone Size Category

Clonal_Proportions <- clonalOccupy(
  merged.TCR,
  proportion = TRUE,
  x.axis = "RNA_snn_res.0.3",
  label = FALSE
) + labs(fill = "Clonality")

Clonal_Proportions

5.9 TCR Diversity Visualization

# Extract the metadata
metadata <- merged.TCR@meta.data

# Modify the metadata using dplyr
metadata <- metadata %>%
  mutate(cluster_sample = paste(Sample, RNA_snn_res.0.3, sep = "_"))

metadata <- metadata %>%
  mutate(cluster_response = paste(Condition, RNA_snn_res.0.3, sep = "_"))

# Update the Seurat object with the modified metadata
merged.TCR@meta.data <- metadata

# Diversity Calculation
Cluster_by_Sample <- clonalDiversity(merged.TCR, 
                cloneCall = "vjaa",
                group.by="cluster_sample",
                x.axis="RNA_snn_res.0.3",
                metrics = c("shannon","norm.entropy"), palette="Set3")
Warning in .convertClonecall(x): A custom variable vjaa will be used to call
clones
Warning in .convertClonecall(x): A custom variable vjaa will be used to call
clones
# Normalized Entropy 
Cluster_by_Sample <- Cluster_by_Sample$data[Cluster_by_Sample$data$variable=="norm.entropy",]
Cluster_by_Sample$patient_label <- gsub("_\\d+$", "",Cluster_by_Sample$cluster_sample)
Cluster_by_Sample$condition <- sub(".*_", "", Cluster_by_Sample$patient_label)
Cluster_by_Sample$RNA_snn_res.0.3 <- Cluster_by_Sample$RNA_snn_res.0.3


p1 <- ggplot(Cluster_by_Sample, aes(x = condition, y = value, fill = condition)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, size = 2, alpha = 0.7, color = "black") +
  facet_wrap(~RNA_snn_res.0.3, scales = "free_x", nrow=1) +
  labs(
    x = "Cluster",
    y = "Normalized Shannon Entropy",
    title = "T cell diversity across clusters"
  ) +
  theme_classic() +
  ylim(0.85, 1) + 
  theme(
    plot.title = element_text(size=24),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text = element_text(size = 24),
    axis.title = element_text(size = 24),
    legend.text = element_text(size = 24),
    legend.title = element_text(size = 24),
    panel.grid.major.x = element_blank(),
    strip.text = element_text(size = 24, face = "bold")
  )

p1
Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_point()`).

sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] lubridate_1.9.4    forcats_1.0.0      stringr_1.5.1      purrr_1.0.2       
 [5] readr_2.1.5        tidyr_1.3.1        tibble_3.2.1       tidyverse_2.0.0   
 [9] dplyr_1.1.4        Seurat_5.1.0       SeuratObject_5.0.2 sp_2.2-0          
[13] scRepertoire_2.0.6 ggplot2_3.5.1      igraph_2.1.1      

loaded via a namespace (and not attached):
  [1] cubature_2.1.1              RcppAnnoy_0.0.22           
  [3] splines_4.3.2               later_1.4.1                
  [5] bitops_1.0-9                R.oo_1.27.0                
  [7] polyclip_1.10-7             fastDummies_1.7.4          
  [9] lifecycle_1.0.4             globals_0.16.3             
 [11] lattice_0.22-7              MASS_7.3-60.0.1            
 [13] magrittr_2.0.3              plotly_4.10.4              
 [15] rmarkdown_2.29              remotes_2.5.0              
 [17] httpuv_1.6.15               sctransform_0.4.1          
 [19] spam_2.11-1                 spatstat.sparse_3.1-0      
 [21] reticulate_1.31             cowplot_1.1.3.9000         
 [23] pbapply_1.7-2               RColorBrewer_1.1-3         
 [25] abind_1.4-8                 zlibbioc_1.48.2            
 [27] Rtsne_0.17                  GenomicRanges_1.54.1       
 [29] R.utils_2.13.0              ggraph_2.2.1               
 [31] BiocGenerics_0.48.1         RCurl_1.98-1.17            
 [33] hash_2.2.6.3                tweenr_2.0.3               
 [35] evmix_2.12                  GenomeInfoDbData_1.2.11    
 [37] IRanges_2.36.0              S4Vectors_0.40.2           
 [39] ggrepel_0.9.6               irlba_2.3.5.1              
 [41] listenv_0.9.1               spatstat.utils_3.1-1       
 [43] SeuratWrappers_0.3.19       iNEXT_3.0.1                
 [45] MatrixModels_0.5-4          goftest_1.2-3              
 [47] RSpectra_0.16-2             spatstat.random_3.3-2      
 [49] fitdistrplus_1.2-2          parallelly_1.43.0          
 [51] leiden_0.4.3.1              codetools_0.2-20           
 [53] DelayedArray_0.28.0         ggforce_0.4.2              
 [55] tidyselect_1.2.1            farver_2.1.2               
 [57] viridis_0.6.5               matrixStats_1.5.0          
 [59] stats4_4.3.2                spatstat.explore_3.3-3     
 [61] jsonlite_2.0.0              tidygraph_1.3.1            
 [63] progressr_0.14.0            ggridges_0.5.6             
 [65] ggalluvial_0.12.5           survival_3.8-3             
 [67] tools_4.3.2                 stringdist_0.9.12          
 [69] ica_1.0-3                   Rcpp_1.1.0                 
 [71] glue_1.8.0                  gridExtra_2.3              
 [73] SparseArray_1.4.8           xfun_0.52                  
 [75] MatrixGenerics_1.14.0       GenomeInfoDb_1.38.8        
 [77] withr_3.0.2                 BiocManager_1.30.25        
 [79] fastmap_1.2.0               SparseM_1.84-2             
 [81] rsvd_1.0.5                  digest_0.6.37              
 [83] timechange_0.3.0            R6_2.6.1                   
 [85] mime_0.13                   colorspace_2.1-1           
 [87] scattermore_1.2             tensor_1.5                 
 [89] dichromat_2.0-0.1           spatstat.data_3.1-2        
 [91] R.methodsS3_1.8.2           generics_0.1.3             
 [93] data.table_1.17.0           graphlayouts_1.2.2         
 [95] httr_1.4.7                  htmlwidgets_1.6.4          
 [97] S4Arrays_1.4.1              uwot_0.2.3                 
 [99] pkgconfig_2.0.3             gtable_0.3.5               
[101] lmtest_0.9-40               SingleCellExperiment_1.24.0
[103] XVector_0.42.0              htmltools_0.5.8.1          
[105] dotCall64_1.2               scales_1.4.0               
[107] Biobase_2.62.0              png_0.1-8                  
[109] spatstat.univar_3.0-1       ggdendro_0.2.0             
[111] knitr_1.50                  rstudioapi_0.17.1          
[113] tzdb_0.5.0                  reshape2_1.4.4             
[115] rjson_0.2.23                nlme_3.1-168               
[117] cachem_1.0.8                zoo_1.8-13                 
[119] KernSmooth_2.23-26          parallel_4.3.2             
[121] miniUI_0.1.1.1              pillar_1.10.1              
[123] grid_4.3.2                  vctrs_0.6.5                
[125] RANN_2.6.2                  VGAM_1.1-13                
[127] promises_1.3.0              xtable_1.8-4               
[129] cluster_2.1.8.1             evaluate_1.0.3             
[131] truncdist_1.0-2             cli_3.6.4                  
[133] compiler_4.3.2              rlang_1.1.5                
[135] crayon_1.5.3                future.apply_1.11.3        
[137] labeling_0.4.3              plyr_1.8.9                 
[139] stringi_1.8.7               viridisLite_0.4.2          
[141] deldir_2.0-4                assertthat_0.2.1           
[143] gsl_2.1-8                   lazyeval_0.2.2             
[145] spatstat.geom_3.3-2         quantreg_5.98              
[147] Matrix_1.6-5                RcppHNSW_0.6.0             
[149] hms_1.1.3                   patchwork_1.2.0            
[151] future_1.34.0               shiny_1.9.1                
[153] SummarizedExperiment_1.32.0 evd_2.3-7.1                
[155] ROCR_1.0-11                 memoise_2.0.1