Time-course workflow of flowSpy
Introduction
To illustrate the usage of flowSpy on differential trajectory reconstruction of time-course FCS data, we used a flow cytometry dataset of ten-day hematopoietic differentiation from the hESC line HUES9 on the basis of some modification of the previous work [1]. By adding different cytokine combinations on different days, HUES9 cells (CD90+CD49f+ on Day 0, D0) were directionally differentiated into mesodermal cells (FLK1+, D4), hemogenic endothelium (CD34+CD31+CD43-, D6) and hematopoietic stem/progenitor cells (HSPCs, CD34+CD43+CD38-CD45RA-CD90+, D8) in succession (Fig. 4a and Additional file 1: Figure S4). Ten cell surface markers (CD90, CD49f, FLK1, CD34, CD31, CD73, CD43, CD45, CD45RA, and CD38) were used for the flow cytometry analysis to monitor the generation of these cells. In particular, the initial expression of CD31 and CD43 at D6 and D8, respectively, reflected the emergence of endothelial cells and the endothelial-to-hematopoietic transition (EHT) (Fig. 4a and Additional file 1: Figure S4). The aim of this use case was to reconstruct the cellular differentiation trajectory of HUES9 cells and identify the cell-of-origin of HSPCs using flowSpy.
This tutorial contains key steps of flowSpy
time-course workflow, including how to calculate the pseudotime and how to define cell subsets and rebuild an FSPY object using flowSpy. This use case also provided a framework for time-course cytometric data analysis and might provide support for research on stem cell reprogramming.
Preprocessing
# Loading packages
suppressMessages({
library(ggplot2)
library(flowCore)
library(pheatmap)
library(flowSpy)
library(stringr)
})
#########################
# Read Flow Cytometry Data
# It can be downloaded via `git clone https://github.com/ytdai/flowSpy-dataset.git`
# fcs.path musted be modified based on the download directory from GitHub
fcs.path <- "FCS/time_course/"
fcs.files <- paste0(fcs.path, "D", c(0,2,4,6,8,10), ".fcs")
###########################################
# Get the expression matrix from FCS file
###########################################
set.seed(1)
fcs.data <- runExprsMerge(fcs.files, comp = F, transformMethod = "none", fixedNum = 2000)
# Refine colnames of fcs data
# for usecase 2
recol <- c(`FITC-A<CD43>` = "CD43", `APC-A<CD34>` = "CD34",
`BV421-A<CD90>` = "CD90", `BV510-A<CD45RA>` = "CD45RA",
`BV605-A<CD31>` = "CD31", `BV650-A<CD49f>` = "CD49f",
`BV 735-A<CD73>` = "CD73", `BV786-A<CD45>` = "CD45",
`PE-A<FLK1>` = "FLK1", `PE-Cy7-A<CD38>` = "CD38")
colnames(fcs.data)[match(names(recol), colnames(fcs.data))] = recol
fcs.data <- fcs.data[, recol]
# Build an FSPY object
# If you don't want to see the running log information, set verbose FALSE
day.list <- c("D0", "D2", "D4", "D6", "D8", "D10")
meta.data <- data.frame(cell = rownames(fcs.data),
stage = str_replace(rownames(fcs.data), regex("_.+"), "") )
meta.data$stage <- factor(as.character(meta.data$stage), levels = day.list)
markers <- c("CD43", "CD34", "CD90", "CD45RA", "CD31", "CD49f", "CD73", "CD45", "FLK1", "CD38")
fspy <- createFSPY(raw.data = fcs.data, markers = markers,
meta.data = meta.data,
normalization.method = "log")
Trajectory
# Cluster cells by SOM algorithm
# Set random seed to make results reproducible
set.seed(80)
fspy <- runCluster(fspy, cluster.method = "som", xdim = 6, ydim = 6)
## Mapping data to SOM
# Do not perform downsampling
set.seed(2)
fspy <- processingCluster(fspy, downsampling.size = 1)
# run Principal Component Analysis (PCA)
fspy <- runFastPCA(fspy)
# run t-Distributed Stochastic Neighbor Embedding (tSNE)
set.seed(1)
fspy <- runTSNE(fspy)
# run Diffusion map
fspy <- runDiffusionMap(fspy)
# run Uniform Manifold Approximation and Projection (UMAP)
fspy <- runUMAP(fspy)
# build minimum spanning tree based on UMAP
fspy <- buildTree(fspy, dim.type = "umap", dim.use = 1:2)
###########################################
# This is visualization module
###########################################
# Plot marker density
plotMarkerDensity(fspy)
# Plot 2D PCA. And cells are colored by stage
plot2D(fspy, item.use = c("PC_1", "PC_2"), color.by = "stage",
alpha = 1, main = "tSNE", category = "categorical") +
scale_color_manual(values = c("#00599F","#009900","#FF9933",
"#FF99FF","#7A06A0","#FF3222"))
# Plot 2D tSNE. And cells are colored by stage
plot2D(fspy, item.use = c("tSNE_1", "tSNE_2"), color.by = "stage",
alpha = 1, main = "tSNE", category = "categorical") +
scale_color_manual(values = c("#00599F","#009900","#FF9933",
"#FF99FF","#7A06A0","#FF3222"))
# Plot 2D diffusion maps. And cells are colored by stage
plot2D(fspy, item.use = c("DC_1", "DC_2"), color.by = "stage",
alpha = 1, main = "tSNE", category = "categorical") +
scale_color_manual(values = c("#00599F","#009900","#FF9933",
"#FF99FF","#7A06A0","#FF3222"))
# Plot 2D UMAP. And cells are colored by stage
plot2D(fspy, item.use = c("UMAP_1", "UMAP_2"), color.by = "stage",
alpha = 1, main = "tSNE", category = "categorical") +
scale_color_manual(values = c("#00599F","#009900","#FF9933",
"#FF99FF","#7A06A0","#FF3222"))
# Plot 2D tSNE. And cells are colored by cluster id
plot2D(fspy, item.use = c("tSNE_1", "tSNE_2"), color.by = "cluster.id",
alpha = 1, main = "tSNE", category = "categorical", show.cluser.id = T)
# Plot 2D UMAP. And cells are colored by cluster id
plot2D(fspy, item.use = c("UMAP_1", "UMAP_2"), color.by = "cluster.id",
alpha = 1, main = "UMAP", category = "categorical", show.cluser.id = T)
# Plot 2D tSNE. And cells are colored by CD43 markers expression
plot2D(fspy, item.use = c("tSNE_1", "tSNE_2"), color.by = "CD43",
main = "tSNE CD43", category = "numeric") +
scale_colour_gradientn(colors = c("#00599F","#EEEEEE","#FF3222"))
# Plot 2D UMAP. And cells are colored by CD43 markers expression
plot2D(fspy, item.use = c("UMAP_1", "UMAP_2"), color.by = "CD43",
main = "UMAP CD43", category = "numeric") +
scale_colour_gradientn(colors = c("#00599F","#EEEEEE","#FF3222"))
# Plot 2D UMAP. And cells are colored by stage
plot2D(fspy, item.use = c("UMAP_1", "UMAP_2"), color.by = "stage",
alpha = 1, main = "UMAP", category = "categorical") +
scale_color_manual(values = c("#00599F","#009900","#FF9933",
"#FF99FF","#7A06A0","#FF3222"))
# Tree plot
plotTree(fspy, color.by = "CD49f", show.node.name = T, cex.size = 1) +
scale_colour_gradientn(colors = c("#00599F", "#EEEEEE", "#FF3222"))
plotTree(fspy, color.by = "D0.percent", show.node.name = T, cex.size = 1) +
scale_colour_gradientn(colors = c("#00599F", "#EEEEEE", "#FF3222"))
# plot clusters
plotCluster(fspy, item.use = c("PC_1", "PC_2"), category = "numeric",
size = 10, color.by = "CD45RA") +
scale_colour_gradientn(colors = c("#00599F", "#EEEEEE", "#FF3222"))
plotCluster(fspy, item.use = c("tSNE_1", "tSNE_2"), category = "numeric",
size = 100, color.by = "CD45RA") +
scale_colour_gradientn(colors = c("#00599F", "#EEEEEE", "#FF3222"))
# plot pie tree
plotPieTree(fspy, cex.size = 3, size.by.cell.number = T) +
scale_fill_manual(values = c("#00599F","#FF3222","#009900",
"#FF9933","#FF99FF","#7A06A0"))
plotPieTree(fspy, cex.size = 5, size.by.cell.number = T, as.tree = T, root.id = 15) +
scale_fill_manual(values = c("#00599F","#FF3222","#009900",
"#FF9933","#FF99FF","#7A06A0"))
# plot pie cluster
plotPieCluster(fspy, item.use = c("tSNE_1", "tSNE_2"), cex.size = 50) +
scale_fill_manual(values = c("#00599F","#FF3222","#009900",
"#FF9933","#FF99FF","#7A06A0"))
plotPieCluster(fspy, item.use = c("PC_1", "PC_2"), cex.size = 0.5) +
scale_fill_manual(values = c("#00599F","#FF3222","#009900",
"#FF9933","#FF99FF","#7A06A0"))
Pseudotime
###########################################
# Pseudotime
###########################################
fspy <- defRootCells(fspy, root.cells = c(15))
fspy <- runPseudotime(fspy, verbose = T, dim.type = "raw")
## 2020-07-09 21:45:25 [INFO] Calculating Pseudotime.
## 2020-07-09 21:45:25 [INFO] Pseudotime exists in meta.data, it will be replaced.
## 2020-07-09 21:45:25 [INFO] The log data will be used to calculate pseudotime
## 2020-07-09 21:45:45 [INFO] Calculating Pseudotime completed.
# tSNE plot colored by pseudotime
plot2D(fspy, item.use = c("tSNE_1", "tSNE_2"), category = "numeric",
size = 1, color.by = "pseudotime") +
scale_colour_gradientn(colors = c("#F4D31D", "#FF3222","#7A06A0"))
# UMAP plot colored by pseudotime
plot2D(fspy, item.use = c("UMAP_1", "UMAP_2"), category = "numeric",
size = 1, color.by = "pseudotime") +
scale_colour_gradientn(colors = c("#F4D31D", "#FF3222","#7A06A0"))
# Tree plot
plotTree(fspy, color.by = "pseudotime", cex.size = 1.5) +
scale_colour_gradientn(colors = c("#F4D31D", "#FF3222","#7A06A0"))
# denisty plot by different stage
plotPseudotimeDensity(fspy, adjust = 1) +
scale_color_manual(values = c("#00599F","#009900","#FF9933",
"#FF99FF","#7A06A0","#FF3222"))
# trajectory value
plotPseudotimeTraj(fspy, var.cols = T) +
scale_colour_gradientn(colors = c("#F4D31D", "#FF3222","#7A06A0"))
## `geom_smooth()` using formula 'y ~ x'
plotPseudotimeTraj(fspy, cutoff = 0.05, var.cols = T) +
scale_colour_gradientn(colors = c("#F4D31D", "#FF3222","#7A06A0"))
## `geom_smooth()` using formula 'y ~ x'
Intermediate state analysis
###########################################
# Subset FSPY
###########################################
cell.inter <- fetchCell(fspy, cluster.id = c(26,25,36,19,4,8,31,20,29,6,16))
cell.inter <- cell.inter[grep("D6|D8|D10", cell.inter)]
sub.fspy <- subsetFSPY(fspy, cells = cell.inter)
set.seed(1)
sub.fspy <- runCluster(sub.fspy, cluster.method = "som", xdim = 4, ydim = 4)
## Mapping data to SOM
# Do not perform downsampling
set.seed(1)
sub.fspy <- processingCluster(sub.fspy, perplexity = 2, downsampling.size = 1)
# run Diffusion map
set.seed(1)
sub.fspy <- runDiffusionMap(sub.fspy)
sub.fspy <- defRootCells(sub.fspy, root.cells = c(13))
sub.fspy <- runPseudotime(sub.fspy, dim.type = "raw", dim.use = 1:2)
## 2020-07-09 21:46:13 [INFO] Pseudotime exists in meta.data, it will be replaced.
# 3D plot for FSPY
plot3D(sub.fspy, item.use = c("DC_2","DC_1","DC_3"), color.by = "stage",
size = 0.5, angle = 60, color.theme = c("#FF99FF","#7A06A0","#FF3222"))
plot3D(sub.fspy, item.use = c("DC_2","DC_1","DC_3"),
size = 0.5, color.by = "CD49f", angle = 60, category = "numeric",
color.theme = c("#00599F","#00599F","#EEEEEE","#FF3222","#FF3222"))
Bug Reports
If there is any error in installing or librarying the flowSpy package, please contact us via e-mail forlynna@sjtu.edu.cn
Link to the quick start tutorial
The quick start tutorial provides a quick-reading version of flowSpy workflow. See the quick start tutorial of flowSpy, please visit Quick start of flowSpy.
Link to the basic tutorial
The basic tutorial provides a more detailed version of flowSpy workflow. See the basic tutorial of flowSpy, please visit Tutorial of flowSpy.
Note
Dear flowSpy users:
To improve the identification of this package and avoid awkward duplication of names in some situations, we decided to change the name of flowSpy
to CytoTree
. The package name of CytoTree
more fits the functional orientation of this software. The usage and update of flowSpy
and CytoTree
will be consistent until the end of Bioc 3.11. And for the 3.12 devel, flowSpy will be deprecated.
The package CytoTree
has been added to Bioconductor (https://bioconductor.org/packages/CytoTree/), we recommend that users can download this package and replace flowSpy
as soon as possible.
We apologized for the inconvenience.
flowSpy team
2020-07-09
Session information
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] stringr_1.4.0 flowSpy_1.2.3 igraph_1.2.5 pheatmap_1.0.12
## [5] flowCore_2.0.1 ggplot2_3.3.1
##
## loaded via a namespace (and not attached):
## [1] reticulate_1.16 RUnit_0.4.32
## [3] tidyselect_1.1.0 RSQLite_2.2.0
## [5] AnnotationDbi_1.50.0 grid_4.0.0
## [7] ranger_0.12.1 BiocParallel_1.22.0
## [9] Rtsne_0.15 scatterpie_0.1.4
## [11] munsell_0.5.0 destiny_3.2.0
## [13] codetools_0.2-16 umap_0.2.5.0
## [15] withr_2.2.0 colorspace_1.4-1
## [17] Biobase_2.48.0 knitr_1.28
## [19] stats4_4.0.0 SingleCellExperiment_1.10.1
## [21] robustbase_0.93-6 vcd_1.4-7
## [23] VIM_6.0.0 TTR_0.23-6
## [25] labeling_0.3 GenomeInfoDbData_1.2.3
## [27] polyclip_1.10-0 bit64_0.9-7
## [29] farver_2.0.3 flowWorkspace_4.0.6
## [31] vctrs_0.3.1 generics_0.0.2
## [33] xfun_0.14 ggthemes_4.2.0
## [35] R6_2.4.1 GenomeInfoDb_1.24.0
## [37] RcppEigen_0.3.3.7.0 rmdformats_0.3.7
## [39] locfit_1.5-9.4 bitops_1.0-6
## [41] DelayedArray_0.14.0 scales_1.1.1
## [43] nnet_7.3-14 gtable_0.3.0
## [45] sva_3.36.0 RProtoBufLib_2.0.0
## [47] rlang_0.4.6 genefilter_1.70.0
## [49] scatterplot3d_0.3-41 flowUtils_1.52.0
## [51] splines_4.0.0 hexbin_1.28.1
## [53] BiocManager_1.30.10 yaml_2.2.1
## [55] abind_1.4-5 RBGL_1.64.0
## [57] tools_4.0.0 bookdown_0.19
## [59] ellipsis_0.3.1 RColorBrewer_1.1-2
## [61] proxy_0.4-24 BiocGenerics_0.34.0
## [63] Rcpp_1.0.4.6 plyr_1.8.6
## [65] base64enc_0.1-3 zlibbioc_1.34.0
## [67] purrr_0.3.4 RCurl_1.98-1.2
## [69] FlowSOM_1.20.0 openssl_1.4.1
## [71] S4Vectors_0.26.1 zoo_1.8-8
## [73] SummarizedExperiment_1.18.1 haven_2.3.1
## [75] cluster_2.1.0 magrittr_1.5
## [77] ncdfFlow_2.34.0 data.table_1.12.8
## [79] RSpectra_0.16-0 openxlsx_4.1.5
## [81] gmodels_2.18.1 lmtest_0.9-37
## [83] RANN_2.6.1 pcaMethods_1.80.0
## [85] matrixStats_0.56.0 hms_0.5.3
## [87] evaluate_0.14 xtable_1.8-4
## [89] smoother_1.1 XML_3.99-0.3
## [91] rio_0.5.16 jpeg_0.1-8.1
## [93] mclust_5.4.6 readxl_1.3.1
## [95] IRanges_2.22.2 gridExtra_2.3
## [97] ggcyto_1.16.0 compiler_4.0.0
## [99] tibble_3.0.1 crayon_1.3.4
## [101] htmltools_0.4.0 mgcv_1.8-31
## [103] corpcor_1.6.9 tidyr_1.1.0
## [105] RcppParallel_5.0.1 DBI_1.1.0
## [107] tweenr_1.0.1 MASS_7.3-51.6
## [109] boot_1.3-25 Matrix_1.2-18
## [111] car_3.0-8 gdata_2.18.0
## [113] parallel_4.0.0 GenomicRanges_1.40.0
## [115] forcats_0.5.0 pkgconfig_2.0.3
## [117] rvcheck_0.1.8 prettydoc_0.3.1
## [119] foreign_0.8-80 laeken_0.5.1
## [121] sp_1.4-2 xml2_1.3.2
## [123] annotate_1.66.0 XVector_0.28.0
## [125] digest_0.6.25 tsne_0.1-3
## [127] ConsensusClusterPlus_1.52.0 graph_1.66.0
## [129] rmarkdown_2.2 cellranger_1.1.0
## [131] edgeR_3.30.3 curl_4.3
## [133] gtools_3.8.2 ggplot.multistats_1.0.0
## [135] nlme_3.1-148 lifecycle_0.2.0
## [137] jsonlite_1.6.1 carData_3.0-4
## [139] BiocNeighbors_1.6.0 askpass_1.1
## [141] limma_3.44.3 pillar_1.4.4
## [143] lattice_0.20-41 DEoptimR_1.0-8
## [145] survival_3.2-3 glue_1.4.1
## [147] xts_0.12-0 zip_2.0.4
## [149] png_0.1-7 bit_1.1-15.2
## [151] Rgraphviz_2.32.0 ggforce_0.3.1
## [153] class_7.3-17 stringi_1.4.6
## [155] blob_1.2.1 RcppHNSW_0.2.0
## [157] CytoML_2.0.5 latticeExtra_0.6-29
## [159] memoise_1.1.0 dplyr_1.0.0
## [161] cytolib_2.0.3 knn.covertree_1.0
## [163] irlba_2.3.3 e1071_1.7-3
Reference
[1] Wang C, Tang X, Sun X, Miao Z, Lv Y, Yang Y, Zhang H, Zhang P, Liu Y, Du L, et al: TGFbeta inhibition enhances the generation of hematopoietic progenitors from human ES cell-derived hemogenic endothelial cells using a stepwise strategy. Cell Res 2012, 22:194-207.