├── .Rbuildignore
├── .gitignore
├── CELESTA.Rproj
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── R
└── CELESTA_functions.R
├── README.Rmd
├── README.md
├── data
├── high_marker_threshold_anchor.rda
├── high_marker_threshold_iteration.rda
├── imaging_data.csv
├── imaging_data.rda
├── low_marker_threshold_anchor.rda
├── low_marker_threshold_iteration.rda
├── prior_marker_info.csv
└── prior_marker_info.rda
├── images
├── CD31_threshold.png
├── Cytokeratin_threshold.png
├── aSMA_threshold.png
├── demo_image.png
├── high_threshold_example.png
├── low_threshold_example.png
├── plot_cell_assignment.png
├── prior_matrix_example.png
└── segmented_file_example.png
├── man
├── AssignCellTypes.Rd
├── AssignCells.Rd
├── BuildSigmoidFunction.Rd
├── CalcMarkerActivationProbability.Rd
├── CalculateBeta.Rd
├── CalculateIndexCellProb.Rd
├── CalculateProbabilityDifference.Rd
├── CalculateScores.Rd
├── Celesta-class.Rd
├── CountCellType.Rd
├── CreateCelestaObject.Rd
├── FilterArtifactCells.Rd
├── FilterCells.Rd
├── FindCellsToCheck.Rd
├── FindCellsWithId.Rd
├── FitGmmModel.Rd
├── GetCoords.Rd
├── GetDistFromNearestAssignedCells.Rd
├── GetFinalInferredCellTypes.Rd
├── GetInitialPriorMatrix.Rd
├── GetMarkerExpMatrix.Rd
├── GetNeighborInfo.Rd
├── GetPriorInfo.Rd
├── GetScore.Rd
├── InitializeCellAndScoringMatrices.Rd
├── MarkQuestionableCells.Rd
├── NeighborCellType.Rd
├── PlotCellsAnyCombination.Rd
├── PlotExpProb.Rd
├── PlotSingleExpProb.Rd
├── UpdatePriorMatrix.Rd
└── figures
│ └── README-pressure-1.png
└── tests
├── CELESTA_functions_orig.R
├── testthat.R
└── testthat
├── project_title_anchor_cell_assignment.csv
├── project_title_final_cell_type_assignment.csv
└── test-CELESTA_functions.R
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^CELESTA\.Rproj$
2 | ^\.Rproj\.user$
3 | ^LICENSE\.md$
4 | ^README\.Rmd$
5 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 |
--------------------------------------------------------------------------------
/CELESTA.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | Encoding: UTF-8
9 |
10 | AutoAppendNewline: Yes
11 | StripTrailingWhitespace: Yes
12 | LineEndingConversion: Posix
13 |
14 | BuildType: Package
15 | PackageUseDevtools: Yes
16 | PackageInstallArgs: --no-multiarch --with-keep.source
17 | PackageRoxygenize: rd,collate,namespace
18 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: CELESTA
2 | Title: Cell type identification with spatial information
3 | Version: 0.0.0.9000
4 | Authors@R:
5 | person(given = "Weiruo",
6 | family = "Zhang",
7 | role = c("aut", "cre"),
8 | email = "weiruo16@stanford.edu")
9 | Description: Automate machine learning cell type identification using both protein expressions and cell neighborhood information for multiplexed in situ imaging data.
10 | License: Apache License (>= 2)
11 | Encoding: UTF-8
12 | LazyData: true
13 | Roxygen: list(markdown = TRUE)
14 | RoxygenNote: 7.1.2
15 | Imports:
16 | Rmixmod,
17 | spdep,
18 | ggplot2,
19 | reshape2,
20 | zeallot
21 | Depends:
22 | R (>= 2.10)
23 | Suggests:
24 | testthat (>= 3.0.0)
25 | Config/testthat/edition: 3
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Apache License
2 | ==============
3 |
4 | _Version 2.0, January 2004_
5 | _<>_
6 |
7 | ### Terms and Conditions for use, reproduction, and distribution
8 |
9 | #### 1. Definitions
10 |
11 | “License” shall mean the terms and conditions for use, reproduction, and
12 | distribution as defined by Sections 1 through 9 of this document.
13 |
14 | “Licensor” shall mean the copyright owner or entity authorized by the copyright
15 | owner that is granting the License.
16 |
17 | “Legal Entity” shall mean the union of the acting entity and all other entities
18 | that control, are controlled by, or are under common control with that entity.
19 | For the purposes of this definition, “control” means **(i)** the power, direct or
20 | indirect, to cause the direction or management of such entity, whether by
21 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the
22 | outstanding shares, or **(iii)** beneficial ownership of such entity.
23 |
24 | “You” (or “Your”) shall mean an individual or Legal Entity exercising
25 | permissions granted by this License.
26 |
27 | “Source” form shall mean the preferred form for making modifications, including
28 | but not limited to software source code, documentation source, and configuration
29 | files.
30 |
31 | “Object” form shall mean any form resulting from mechanical transformation or
32 | translation of a Source form, including but not limited to compiled object code,
33 | generated documentation, and conversions to other media types.
34 |
35 | “Work” shall mean the work of authorship, whether in Source or Object form, made
36 | available under the License, as indicated by a copyright notice that is included
37 | in or attached to the work (an example is provided in the Appendix below).
38 |
39 | “Derivative Works” shall mean any work, whether in Source or Object form, that
40 | is based on (or derived from) the Work and for which the editorial revisions,
41 | annotations, elaborations, or other modifications represent, as a whole, an
42 | original work of authorship. For the purposes of this License, Derivative Works
43 | shall not include works that remain separable from, or merely link (or bind by
44 | name) to the interfaces of, the Work and Derivative Works thereof.
45 |
46 | “Contribution” shall mean any work of authorship, including the original version
47 | of the Work and any modifications or additions to that Work or Derivative Works
48 | thereof, that is intentionally submitted to Licensor for inclusion in the Work
49 | by the copyright owner or by an individual or Legal Entity authorized to submit
50 | on behalf of the copyright owner. For the purposes of this definition,
51 | “submitted” means any form of electronic, verbal, or written communication sent
52 | to the Licensor or its representatives, including but not limited to
53 | communication on electronic mailing lists, source code control systems, and
54 | issue tracking systems that are managed by, or on behalf of, the Licensor for
55 | the purpose of discussing and improving the Work, but excluding communication
56 | that is conspicuously marked or otherwise designated in writing by the copyright
57 | owner as “Not a Contribution.”
58 |
59 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf
60 | of whom a Contribution has been received by Licensor and subsequently
61 | incorporated within the Work.
62 |
63 | #### 2. Grant of Copyright License
64 |
65 | Subject to the terms and conditions of this License, each Contributor hereby
66 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
67 | irrevocable copyright license to reproduce, prepare Derivative Works of,
68 | publicly display, publicly perform, sublicense, and distribute the Work and such
69 | Derivative Works in Source or Object form.
70 |
71 | #### 3. Grant of Patent License
72 |
73 | Subject to the terms and conditions of this License, each Contributor hereby
74 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
75 | irrevocable (except as stated in this section) patent license to make, have
76 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where
77 | such license applies only to those patent claims licensable by such Contributor
78 | that are necessarily infringed by their Contribution(s) alone or by combination
79 | of their Contribution(s) with the Work to which such Contribution(s) was
80 | submitted. If You institute patent litigation against any entity (including a
81 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a
82 | Contribution incorporated within the Work constitutes direct or contributory
83 | patent infringement, then any patent licenses granted to You under this License
84 | for that Work shall terminate as of the date such litigation is filed.
85 |
86 | #### 4. Redistribution
87 |
88 | You may reproduce and distribute copies of the Work or Derivative Works thereof
89 | in any medium, with or without modifications, and in Source or Object form,
90 | provided that You meet the following conditions:
91 |
92 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of
93 | this License; and
94 | * **(b)** You must cause any modified files to carry prominent notices stating that You
95 | changed the files; and
96 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute,
97 | all copyright, patent, trademark, and attribution notices from the Source form
98 | of the Work, excluding those notices that do not pertain to any part of the
99 | Derivative Works; and
100 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any
101 | Derivative Works that You distribute must include a readable copy of the
102 | attribution notices contained within such NOTICE file, excluding those notices
103 | that do not pertain to any part of the Derivative Works, in at least one of the
104 | following places: within a NOTICE text file distributed as part of the
105 | Derivative Works; within the Source form or documentation, if provided along
106 | with the Derivative Works; or, within a display generated by the Derivative
107 | Works, if and wherever such third-party notices normally appear. The contents of
108 | the NOTICE file are for informational purposes only and do not modify the
109 | License. You may add Your own attribution notices within Derivative Works that
110 | You distribute, alongside or as an addendum to the NOTICE text from the Work,
111 | provided that such additional attribution notices cannot be construed as
112 | modifying the License.
113 |
114 | You may add Your own copyright statement to Your modifications and may provide
115 | additional or different license terms and conditions for use, reproduction, or
116 | distribution of Your modifications, or for any such Derivative Works as a whole,
117 | provided Your use, reproduction, and distribution of the Work otherwise complies
118 | with the conditions stated in this License.
119 |
120 | #### 5. Submission of Contributions
121 |
122 | Unless You explicitly state otherwise, any Contribution intentionally submitted
123 | for inclusion in the Work by You to the Licensor shall be under the terms and
124 | conditions of this License, without any additional terms or conditions.
125 | Notwithstanding the above, nothing herein shall supersede or modify the terms of
126 | any separate license agreement you may have executed with Licensor regarding
127 | such Contributions.
128 |
129 | #### 6. Trademarks
130 |
131 | This License does not grant permission to use the trade names, trademarks,
132 | service marks, or product names of the Licensor, except as required for
133 | reasonable and customary use in describing the origin of the Work and
134 | reproducing the content of the NOTICE file.
135 |
136 | #### 7. Disclaimer of Warranty
137 |
138 | Unless required by applicable law or agreed to in writing, Licensor provides the
139 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS,
140 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
141 | including, without limitation, any warranties or conditions of TITLE,
142 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
143 | solely responsible for determining the appropriateness of using or
144 | redistributing the Work and assume any risks associated with Your exercise of
145 | permissions under this License.
146 |
147 | #### 8. Limitation of Liability
148 |
149 | In no event and under no legal theory, whether in tort (including negligence),
150 | contract, or otherwise, unless required by applicable law (such as deliberate
151 | and grossly negligent acts) or agreed to in writing, shall any Contributor be
152 | liable to You for damages, including any direct, indirect, special, incidental,
153 | or consequential damages of any character arising as a result of this License or
154 | out of the use or inability to use the Work (including but not limited to
155 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or
156 | any and all other commercial damages or losses), even if such Contributor has
157 | been advised of the possibility of such damages.
158 |
159 | #### 9. Accepting Warranty or Additional Liability
160 |
161 | While redistributing the Work or Derivative Works thereof, You may choose to
162 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or
163 | other liability obligations and/or rights consistent with this License. However,
164 | in accepting such obligations, You may act only on Your own behalf and on Your
165 | sole responsibility, not on behalf of any other Contributor, and only if You
166 | agree to indemnify, defend, and hold each Contributor harmless for any liability
167 | incurred by, or claims asserted against, such Contributor by reason of your
168 | accepting any such warranty or additional liability.
169 |
170 | _END OF TERMS AND CONDITIONS_
171 |
172 | ### APPENDIX: How to apply the Apache License to your work
173 |
174 | To apply the Apache License to your work, attach the following boilerplate
175 | notice, with the fields enclosed by brackets `[]` replaced with your own
176 | identifying information. (Don't include the brackets!) The text should be
177 | enclosed in the appropriate comment syntax for the file format. We also
178 | recommend that a file or class name and description of purpose be included on
179 | the same “printed page” as the copyright notice for easier identification within
180 | third-party archives.
181 |
182 | Copyright [yyyy] [name of copyright owner]
183 |
184 | Licensed under the Apache License, Version 2.0 (the "License");
185 | you may not use this file except in compliance with the License.
186 | You may obtain a copy of the License at
187 |
188 | http://www.apache.org/licenses/LICENSE-2.0
189 |
190 | Unless required by applicable law or agreed to in writing, software
191 | distributed under the License is distributed on an "AS IS" BASIS,
192 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
193 | See the License for the specific language governing permissions and
194 | limitations under the License.
195 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(AssignCellTypes)
4 | export(AssignCells)
5 | export(BuildSigmoidFunction)
6 | export(CalcMarkerActivationProbability)
7 | export(CalculateBeta)
8 | export(CalculateIndexCellProb)
9 | export(CalculateProbabilityDifference)
10 | export(CalculateScores)
11 | export(CountCellType)
12 | export(CreateCelestaObject)
13 | export(FilterArtifactCells)
14 | export(FilterCells)
15 | export(FindCellsToCheck)
16 | export(FindCellsWithId)
17 | export(FitGmmModel)
18 | export(GetCoords)
19 | export(GetDistFromNearestAssignedCells)
20 | export(GetFinalInferredCellTypes)
21 | export(GetInitialPriorMatrix)
22 | export(GetMarkerExpMatrix)
23 | export(GetNeighborInfo)
24 | export(GetPriorInfo)
25 | export(GetScore)
26 | export(InitializeCellAndScoringMatrices)
27 | export(MarkQuestionableCells)
28 | export(NeighborCellType)
29 | export(PlotCellsAnyCombination)
30 | export(PlotExpProb)
31 | export(PlotSingleExpProb)
32 | export(UpdatePriorMatrix)
33 | exportClasses(Celesta)
34 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 |
6 |
7 | ```{r, include = FALSE}
8 | knitr::opts_chunk$set(
9 | collapse = TRUE,
10 | comment = "#>",
11 | fig.path = "man/figures/README-",
12 | out.width = "100%"
13 | )
14 | options(tibble.print_min = 5, tibble.print_max = 5)
15 | ```
16 |
17 | # CELESTA
18 |
19 |
20 |
21 | ## Overview
22 | CELESTA (CELl typE identification with SpaTiAl information) is an algorithm aiming to perform automate cell type identification for multiplexed in situ imaging data. CELESTA makes use of both protein expressions and cell spatial neighborhood information from segmented imaging data for the cell type identification.
23 |
24 | The pre-saved imaging data is taken from reg009 of the published CODEX data Schurch et al. Cell,2020 for illustration purpose.
25 |
26 | * `CreateCelestaObject()` Creates an object running CELESTA. It requires a title to create the project, segmented imaging data file and prior knowledge file for cell-type signature matrix (user-defined).
27 | * `FilterCells()` This step intends to fill out questionable cells due to imaging artifacts, segmentation error etc.
28 | * `PlotExpProb()` This function plots the calculated expression probabilities for each marker included in the user-defined prior cell-type signature matrix. It can be used to visualize and help with setting the thresholds for whether a marker is expressed or not.
29 | * `AssignCells()` This is the main function to assign cell types with an iterative EM algorithm.
30 | * `PlotCellsAnyCombination()` This function can be used to plot the cells with identified cell types with the XY coordinates from segmentation.
31 |
32 | ## Installation
33 |
34 | You can install the development version of CELESTA
35 |
36 | ``` {r, eval = FALSE}
37 | # install.packages("devtools")
38 | devtools::install_github("plevritis/CELESTA")
39 | ```
40 | ## Dependency
41 | CELESTA requires dependency on the following R packages:
42 | - [Rmixmod](https://cran.r-project.org/web/packages/Rmixmod/index.html): for performing Gaussian Mixture Modeling
43 | - [spdep](https://cran.r-project.org/web/packages/spdep/index.html): for obtaining spatial neighborhood information
44 | - [zeallot](https://cran.r-project.org/web/packages/zeallot/index.html): for R code styling. Provides a %<-% operator to perform multiple, unpacking, and destructuring assignment in R.
45 | - [ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html)
46 | [reshape2](https://cran.r-project.org/web/packages/reshape2/index.html): for plotting
47 |
48 | ## Usage
49 |
50 | ```{r,results='hide',message=FALSE, eval = FALSE}
51 | library(CELESTA)
52 | library(Rmixmod)
53 | library(spdep)
54 | library(ggplot2)
55 | library(reshape2)
56 | library(zeallot)
57 |
58 | ### The pre-saved imaging data is taken from reg009 of the published CODEX data Schurch et al. Cell,2020
59 | ### Create CELESTA object. It requires a title for the project.
60 | ### It also required the segmented input file and user-defined cell-type signature matrix.
61 | ### Please refer to the Inputs session below.
62 | CelestaObj <- CreateCelestaObject(project_title = "project_title",prior_marker_info,imaging_data)
63 |
64 | ### Filter out questionable cells.
65 | ### A cell with every marker having expression probability higher than 0.9 are filtered out.
66 | ### And A cell with every marker having expression probability lower than 0.4 are filtered out.
67 | ### User can define the thresholds based on inspecting their data.
68 | ### This step is optional.
69 | CelestaObj <- FilterCells(CelestaObj,high_marker_threshold=0.9, low_marker_threshold=0.4)
70 |
71 | ### Assign cell types.
72 | ### max_iteration is used to define the maximum iterations allowed in the EM algorithm per round.
73 | ### cell_change_threshold is a user-defined ending condition for the EM algorithm.
74 | ### For example, 0.01 means that when fewer than 1% of the total number of cells do not change identity, the algorithm will stop.
75 | CelestaObj <- AssignCells(CelestaObj,max_iteration=10,cell_change_threshold=0.01,
76 | high_expression_threshold_anchor=high_marker_threshold_anchor,
77 | low_expression_threshold_anchor=low_marker_threshold_anchor,
78 | high_expression_threshold_index=high_marker_threshold_iteration,
79 | low_expression_threshold_index=low_marker_threshold_iteration)
80 |
81 | ### Plot cells with CELESTA assigned cell types.
82 | ### The cell_number_to_use corresponds to the defined numbers in the prior cell-type signature matrix.
83 | ### For example, 1 corresponds to endothelial cell, 2 corresponds to tumor cell.
84 | ### The program will plot the corresponding cell types given in the "cell_number_to_use" parameter.
85 | ### To plot the "unknown" cells that are left unassigned by CELESTA, include 0 in the list.
86 | ### The default color for unknown cells is gray.
87 | ### It is suggested that do not plot over 7 cell types on the same image for better visualization.
88 | PlotCellsAnyCombination(cell_type_assignment_to_plot=CelestaObj@final_cell_type_assignment[,5],
89 | coords = CelestaObj@coords,
90 | prior_info = prior_marker_info,
91 | cell_number_to_use=c(1,2,3),cell_type_colors=c("yellow","red","blue"))
92 |
93 | ### To include unknown cells
94 | PlotCellsAnyCombination(cell_type_assignment_to_plot=CelestaObj@final_cell_type_assignment[,5],
95 | coords = CelestaObj@coords,
96 | prior_info = prior_marker_info,
97 | cell_number_to_use=c(0,1,2,3),cell_type_colors=c("yellow","red","blue"))
98 |
99 | ### plot expression probability
100 | PlotExpProb(coords=CelestaObj@coords,
101 | marker_exp_prob=CelestaObj@marker_exp_prob,
102 | prior_marker_info = prior_marker_info,
103 | save_plot = TRUE)
104 | ```
105 |
106 | ## Inputs
107 | CELESTA requires two inputs:
108 | `1. Segmented imaging data`:
109 | a dataframe with rows as the cells, and needs to have (1) two columns named X and Y to define the XY coordinates of the cells and (2) other columns having the protein marker expressions for each cell
110 |
111 | Below is an example of the segmented imaging file header
112 |
113 | 
114 |
115 | `2. User-defined cell-type signature matrix`:
116 | (1) The first column has to contain the cell types to be inferred
117 | (2) The second column has the lineage information for each cell type. The lineage information has three numbers connected by “_” (underscore). The first number indicates round.Cell types with the same lineage level are inferred at the same round. Increasing number indicates increase cell-type resolution. For example, immune cells -> CD3+ T cells –> CD4+ T cells. The third number is a number assigned to the cell type, i.e, cell type number. The middle number tells the previous lineage cell type number for the current cell type. For example, the middle number for CD3+ T cells is 5, because it is a subtype of immune cells which have cell type number assigned to 5.
118 | (3) Starting from column three, each column is a protein marker. If the protein marker is known to be expressed for that cell type, then it is denoted by “1”. If the protein marker is known to not express for a cell type, then it is denoted by “0”. If the protein marker is irrelevant or uncertain to express for a cell type, then it is denoted by “NA”.
119 | (4) More examples of the user-defined cell-type signature matrix is provided under folder:data.
120 |
121 | Below is an example of cell-type signature matrix based on imaging panel used in Schurch et al. Cell, 2020.
122 |
123 | 
124 |
125 | ## Outputs
126 | CELESTA outputs:
127 | 1. After running `AssignCells()` function, CELESTA will output a .csv file with the cell type assignment to each cell for each round and the final combined cell types.
128 | 2. In addition, users can access the results in the CELESTA object under the slot "final_cell_type_assignment". The anchor cells defined for each round can be found under the slot "anchor_cell_type_assignment".
129 |
130 | CELESTA can also plot the assigned cells by using the `PlotCellsAnyCombination()` function. An example output image is shown below:
131 | 
132 | Users can compare the output with the original images. An example is shown below:
133 | `Please note:` CODEX images preprocess with Akoya Biosciences software stitched the image tiles in a flipped way. So in some cases, for the comparisons, the image needs to be flipped.
134 | 
135 |
136 | ## How to define thresholds
137 | In the `AssignCells()` function, it requires four vectors to define the high and low thresholds for each cell type. The length of the vector equals to the total number of cell types defined in the cell-type signature matrix.Examples of the thresholds are provided under the folder:data.
138 | We would suggest start with the default thresholds and modify them by comparing the results with the original staining demonstrated below.
139 | The two vectors are required for defining the "high_expression_threshold", one for anchor cells and one for index cells. The thresholds defined how much the marker expression probability is in order to be considered as expressed. An example for defining high_expression_threshold is shown below:
140 | 
141 |
142 | To find the proper threshold, the `PlotExpProb()` function can be applied. Because the segmented data may have some compensation in the values which are the inputs to CELESTA, the expression probabilities are calculated based on the segmented data. It's useful to compare the expression probabilities with the CODEX staining for each marker.
143 | For example, for endothelial cells, if we plot the expression probabilities of CD31 (left) and compare with the CD31 staining, approximately 0.9 and 0.8 would be the right threshold for defining how much the cell should express CD31. `Please note:` It is suggested that for anchor cells, use a slightly higher threshold than index cells.
144 | 
145 | Another example, for tumor cells, if we plot the expression probabilities of Cytokerain (left) and compare with the Cytokeratin staining, approximately 0.9 and 0.8 would be the right threshold for defining how much the cell should express Cytokeratin. `Please note:` It is suggested that for anchor cells, use a slightly higher threshold than index cells.
146 | 
147 |
148 | The two vectors are required for defining the "low_marker_threshold", one for anchor cells and one for index cells. The thresholds defined how much the marker expression probability is in order to be considered as not expressed. Normally 1 is assigned to this value unless there are a lot of doublets or co-staining in the data. **The Low expression threshold default values in general are robust, and thus we recommend testing the High expression threshold values.**
149 |
150 | An example for defining low_marker_threshold is shown below:
151 |
152 | 
153 |
154 | ## Getting help
155 | If you encounter a clear bug, please file an issue with a minimal reproducible example on [GitHub](https://github.com/plevritis/CELESTA/issues). For questions and other discussion, please use [community.rstudio.com](https://community.rstudio.com/).
156 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # CELESTA
5 |
6 |
7 |
8 |
9 | ## Overview
10 |
11 | CELESTA (CELl typE identification with SpaTiAl information) is an
12 | algorithm aiming to perform automate cell type identification for
13 | multiplexed in situ imaging data. CELESTA makes use of both protein
14 | expressions and cell spatial neighborhood information from segmented
15 | imaging data for the cell type identification.
16 |
17 | The pre-saved imaging data is taken from reg009 of the published CODEX
18 | data Schurch et al. Cell,2020 for illustration purpose.
19 |
20 | - `CreateCelestaObject()` Creates an object running CELESTA. It
21 | requires a title to create the project, segmented imaging data file
22 | and prior knowledge file for cell-type signature matrix
23 | (user-defined).
24 | - `FilterCells()` This step intends to fill out questionable cells due
25 | to imaging artifacts, segmentation error etc.
26 | - `PlotExpProb()` This function plots the calculated expression
27 | probabilities for each marker included in the user-defined prior
28 | cell-type signature matrix. It can be used to visualize and help
29 | with setting the thresholds for whether a marker is expressed or
30 | not.
31 | - `AssignCells()` This is the main function to assign cell types with
32 | an iterative EM algorithm.
33 | - `PlotCellsAnyCombination()` This function can be used to plot the
34 | cells with identified cell types with the XY coordinates from
35 | segmentation.
36 |
37 | ## Installation
38 |
39 | You can install the development version of CELESTA
40 |
41 | ``` r
42 | # install.packages("devtools")
43 | devtools::install_github("plevritis/CELESTA")
44 | ```
45 |
46 | ## Dependency
47 |
48 | CELESTA requires dependency on the following R packages: -
49 | [Rmixmod](https://cran.r-project.org/web/packages/Rmixmod/index.html):
50 | for performing Gaussian Mixture Modeling -
51 | [spdep](https://cran.r-project.org/web/packages/spdep/index.html): for
52 | obtaining spatial neighborhood information -
53 | [zeallot](https://cran.r-project.org/web/packages/zeallot/index.html):
54 | for R code styling. Provides a %<-% operator to perform multiple,
55 | unpacking, and destructuring assignment in R. -
56 | [ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html)
57 | [reshape2](https://cran.r-project.org/web/packages/reshape2/index.html):
58 | for plotting
59 |
60 | ## Usage
61 |
62 | ``` r
63 | library(CELESTA)
64 | library(Rmixmod)
65 | library(spdep)
66 | library(ggplot2)
67 | library(reshape2)
68 | library(zeallot)
69 |
70 | ### The pre-saved imaging data is taken from reg009 of the published CODEX data Schurch et al. Cell,2020
71 | ### Create CELESTA object. It requires a title for the project.
72 | ### It also required the segmented input file and user-defined cell-type signature matrix.
73 | ### Please refer to the Inputs session below.
74 | CelestaObj <- CreateCelestaObject(project_title = "project_title",prior_marker_info,imaging_data)
75 |
76 | ### Filter out questionable cells.
77 | ### A cell with every marker having expression probability higher than 0.9 are filtered out.
78 | ### And A cell with every marker having expression probability lower than 0.4 are filtered out.
79 | ### User can define the thresholds based on inspecting their data.
80 | ### **This step is optional.** We suggest starting without running this step to see whether there are many doublets/triplets.
81 | CelestaObj <- FilterCells(CelestaObj,high_marker_threshold=0.9, low_marker_threshold=0.4)
82 |
83 | ### Assign cell types.
84 | ### max_iteration is used to define the maximum iterations allowed in the EM algorithm per round.
85 | ### cell_change_threshold is a user-defined ending condition for the EM algorithm.
86 | ### For example, 0.01 means that when fewer than 1% of the total number of cells do not change identity, the algorithm will stop.
87 | CelestaObj <- AssignCells(CelestaObj,max_iteration=10,cell_change_threshold=0.01,
88 | high_expression_threshold_anchor=high_marker_threshold_anchor,
89 | low_expression_threshold_anchor=low_marker_threshold_anchor,
90 | high_expression_threshold_index=high_marker_threshold_iteration,
91 | low_expression_threshold_index=low_marker_threshold_iteration)
92 |
93 | ### After the AssignCells() function, the CELESTA assigned cell types will be stored in the CelestaObj
94 | ### in the field called final_cell_type_assignment with each row corresponding to a cell.
95 | ### The final_cell_type_assignment has assignment for each round stored in each column, the final
96 | ### cell types and the corresponding cell type number corresponding to the cell type specified in
97 | ### the cell-type signature matrix (please see Input section below).
98 |
99 | ### Plot cells with CELESTA assigned cell types.
100 | ### The cell_number_to_use corresponds to the defined numbers in the prior cell-type signature matrix.
101 | ### For example, 1 corresponds to endothelial cell, 2 corresponds to tumor cell.
102 | ### The program will plot the corresponding cell types given in the "cell_number_to_use" parameter.
103 | ### To plot the "unknown" cells that are left unassigned by CELESTA, include 0 in the list.
104 | ### The default color for unknown cells is gray.
105 | ### The size of the cells plotted can be modified by changing the parameter test_size.
106 | PlotCellsAnyCombination(cell_type_assignment_to_plot=CelestaObj@final_cell_type_assignment[,(CelestaObj@total_rounds+1)],
107 | coords = CelestaObj@coords,
108 | prior_info = prior_marker_info,
109 | cell_number_to_use=c(1,2,3),
110 | cell_type_colors=c("yellow","red","blue"),
111 | test_size=1)
112 |
113 | ### To include unknown cells
114 | PlotCellsAnyCombination(cell_type_assignment_to_plot=CelestaObj@final_cell_type_assignment[,(CelestaObj@total_rounds+1)],
115 | coords = CelestaObj@coords,
116 | prior_info = prior_marker_info,
117 | cell_number_to_use=c(0,1,2,3),cell_type_colors=c("yellow","red","blue"))
118 |
119 | ### plot expression probability
120 | PlotExpProb(coords=CelestaObj@coords,
121 | marker_exp_prob=CelestaObj@marker_exp_prob,
122 | prior_marker_info = prior_marker_info,
123 | save_plot = TRUE)
124 | ```
125 |
126 | ## Inputs
127 |
128 | CELESTA requires two inputs:
`1. Segmented imaging data`:
a
129 | dataframe with rows as the cells, and needs to have (1) two columns
130 | named X and Y to define the XY coordinates of the cells and (2) other
131 | columns having the protein marker expressions for each cell
132 |
133 | Below is an example of the segmented imaging file header
134 |
135 | 
137 |
138 | `2. User-defined cell-type signature matrix`:
(1) The first column
139 | has to contain the cell types to be inferred
(2) The second column
140 | has the lineage information for each cell type. The lineage information
141 | has three numbers connected by “\_” (underscore). The first number
142 | indicates round.Cell types with the same lineage level are inferred at
143 | the same round. Increasing number indicates increase cell-type
144 | resolution. For example, immune cells -> CD3+ T cells –> CD4+ T
145 | cells. The third number is a number assigned to the cell type, i.e, cell
146 | type number. The middle number tells the previous lineage cell type
147 | number for the current cell type. For example, the middle number for
148 | CD3+ T cells is 5, because it is a subtype of immune cells which have
149 | cell type number assigned to 5.
(3) Starting from column three,
150 | each column is a protein marker. If the protein marker is known to be
151 | expressed for that cell type, then it is denoted by “1”. If the protein
152 | marker is known to not express for a cell type, then it is denoted by
153 | “0”. If the protein marker is irrelevant or uncertain to express for a
154 | cell type, then it is denoted by “NA”.
(4) More examples of the
155 | user-defined cell-type signature matrix is provided under
156 | folder:data.
157 |
158 | Below is an example of cell-type signature matrix based on imaging panel
159 | used in Schurch et al. Cell, 2020.
160 |
161 | 
163 |
164 | ## Outputs
165 |
166 | CELESTA outputs: 1. After running `AssignCells()` function, CELESTA will
167 | output a .csv file with the cell type assignment to each cell for each
168 | round and the final combined cell types.
2. In addition, users can
169 | access the results in the CELESTA object under the slot
170 | “final\_cell\_type\_assignment”. The anchor cells defined for each round
171 | can be found under the slot “anchor\_cell\_type\_assignment”.
172 |
173 | CELESTA can also plot the assigned cells by using the
174 | `PlotCellsAnyCombination()` function. An example output image is shown
175 | below:  Users can compare the output
177 | with the original images. An example is shown below:
`Please note:`
178 | CODEX images preprocess with Akoya Biosciences software stitched the
179 | image tiles in a flipped way. So in some cases, for the comparisons, the
180 | image needs to be flipped.

182 |
183 | ## How to define thresholds
184 |
185 | In the `AssignCells()` function, it requires four vectors to define the
186 | high and low thresholds for each cell type. The length of the vector
187 | equals to the total number of cell types defined in the cell-type
188 | signature matrix.Examples of the thresholds are provided under the
189 | folder:data.
We would suggest start with the default thresholds and
190 | modify them by comparing the results with the original staining
191 | demonstrated below.
The two vectors are required for defining the
192 | “high\_expression\_threshold”, one for anchor cells and one for index
193 | cells(non-anchor cells). The thresholds defined how much the marker
194 | expression probability is in order to be considered as expressed. An
195 | example for defining high\_expression\_threshold is shown below: 
197 |
You can also specify the threholds using:
198 |
199 | ``` r
200 | CelestaObj <- AssignCells(CelestaObj,max_iteration=10,cell_change_threshold=0.01,
201 | high_expression_threshold_anchor=c(0.7,0.7,0.7,0.7,0.7,0.8,0.9,0.9),
202 | low_expression_threshold_anchor=c(1,1,1,1,1,1,1,1),
203 | high_expression_threshold_index=c(0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5),
204 | low_expression_threshold_index=c(1,1,1,1,1,1,1,1))
205 | ```
206 |
207 | The length of the vectors for the thresholds correspond to the number of
208 | cell types. The order of the thresholds correpond to the same order in
209 | the defined cell-type signature matrix.
210 |
211 | To find the proper threshold, the `PlotExpProb()` function can be
212 | applied. Because the segmented data may have some compensation in the
213 | values which are the inputs to CELESTA, the expression probabilities are
214 | calculated based on the segmented data. It’s useful to compare the
215 | expression probabilities with the CODEX staining for each marker.
216 | For example, for endothelial cells, if we plot the expression
217 | probabilities of CD31 (left) and compare with the CD31 staining,
218 | approximately 0.9 and 0.8 would be the right threshold for defining how
219 | much the cell should express CD31. `Please note:` It is suggested that
220 | for anchor cells, use a slightly higher threshold than index cells.
221 |  Another example, for
222 | tumor cells, if we plot the expression probabilities of Cytokerain
223 | (left) and compare with the Cytokeratin staining, approximately 0.9 and
224 | 0.8 would be the right threshold for defining how much the cell should
225 | express Cytokeratin. `Please note:` It is suggested that for anchor
226 | cells, use a slightly higher threshold than index cells. 
228 |
229 | The two vectors are required for defining the “low\_marker\_threshold”,
230 | one for anchor cells and one for index cells. The thresholds defined how
231 | much the marker expression probability is in order to be considered as
232 | not expressed. Normally 1 is assigned to this value unless there are a
233 | lot of doublets or co-staining in the data. **The Low expression
234 | threshold default values in general are robust, and thus we recommend
235 | testing the High expression threshold values.**
236 |
237 | An example for defining low\_marker\_threshold is shown below:
238 |
239 | 
240 |
241 | ## Getting help
242 |
243 | If you encounter a clear bug, please file an issue with a minimal
244 | reproducible example on
245 | [GitHub](https://github.com/plevritis/CELESTA/issues). For questions and
246 | other discussion, please use
247 | [community.rstudio.com](https://community.rstudio.com/).
248 |
--------------------------------------------------------------------------------
/data/high_marker_threshold_anchor.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/data/high_marker_threshold_anchor.rda
--------------------------------------------------------------------------------
/data/high_marker_threshold_iteration.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/data/high_marker_threshold_iteration.rda
--------------------------------------------------------------------------------
/data/imaging_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/data/imaging_data.rda
--------------------------------------------------------------------------------
/data/low_marker_threshold_anchor.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/data/low_marker_threshold_anchor.rda
--------------------------------------------------------------------------------
/data/low_marker_threshold_iteration.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/data/low_marker_threshold_iteration.rda
--------------------------------------------------------------------------------
/data/prior_marker_info.csv:
--------------------------------------------------------------------------------
1 | ,Lineage_level,CD31 - vasculature:Cyc_19_ch_3,CD34 - vasculature:Cyc_20_ch_3,Cytokeratin - epithelia:Cyc_10_ch_2,aSMA - smooth muscle:Cyc_11_ch_2,Podoplanin - lymphatics:Cyc_19_ch_4,CD45 - hematopoietic cells:Cyc_4_ch_2,CD15 - granulocytes:Cyc_14_ch_2,CD3 - T cells:Cyc_16_ch_4,CD20 - B cells:Cyc_8_ch_3,CD11c - DCs:Cyc_12_ch_3,CD163 - macrophages:Cyc_17_ch_3,CD68 - macrophages:Cyc_18_ch_4,CD38 - multifunctional:Cyc_20_ch_4,CD56 - NK cells:Cyc_10_ch_4,CD8 - cytotoxic T cells:Cyc_3_ch_2,CD4 - T helper cells:Cyc_6_ch_3,CD45RO - memory cells:Cyc_18_ch_3,FOXP3 - regulatory T cells:Cyc_2_ch_3
2 | vasculature,1_0_1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3 | tumor cells,1_0_2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4 | aSMA+ stroma,1_0_3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5 | lymphatics,1_0_4,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
6 | immune cells,1_0_5,0,0,0,0,0,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
7 | CD3+ T cells,2_5_6,0,0,0,0,0,NA,0,1,0,0,0,0,0,0,NA,NA,NA,NA
8 | CD15+ granulocytes,2_5_7,0,0,0,0,0,NA,1,0,0,0,0,NA,0,0,0,0,NA,0
9 | B cells,2_5_8,0,0,0,0,0,NA,0,0,1,0,0,0,0,0,0,0,0,0
10 | CD11c+ DCs,2_5_9,0,0,0,0,0,NA,0,0,0,1,0,0,0,0,0,0,0,0
11 | CD68+CD163+ macrophages,2_5_10,0,0,0,0,0,NA,0,0,0,0,1,1,0,0,0,0,NA,0
12 | plasma cells,2_5_11,0,0,0,0,0,NA,0,0,0,0,0,0,1,0,0,0,0,0
13 | NK cells,2_5_12,0,0,0,0,0,NA,0,NA,0,0,0,0,0,1,0,0,0,0
14 | CD8+ T cells,3_6_13,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,1,0,NA,0
15 | CD4+ T cells,3_6_14,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,0,1,NA,NA
16 | CD4+ T cells CD45RO+,4_14_15,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,0,NA,1,0
17 | Tregs,4_14_16,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,0,NA,0,1
--------------------------------------------------------------------------------
/data/prior_marker_info.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/data/prior_marker_info.rda
--------------------------------------------------------------------------------
/images/CD31_threshold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/CD31_threshold.png
--------------------------------------------------------------------------------
/images/Cytokeratin_threshold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/Cytokeratin_threshold.png
--------------------------------------------------------------------------------
/images/aSMA_threshold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/aSMA_threshold.png
--------------------------------------------------------------------------------
/images/demo_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/demo_image.png
--------------------------------------------------------------------------------
/images/high_threshold_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/high_threshold_example.png
--------------------------------------------------------------------------------
/images/low_threshold_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/low_threshold_example.png
--------------------------------------------------------------------------------
/images/plot_cell_assignment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/plot_cell_assignment.png
--------------------------------------------------------------------------------
/images/prior_matrix_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/prior_matrix_example.png
--------------------------------------------------------------------------------
/images/segmented_file_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/segmented_file_example.png
--------------------------------------------------------------------------------
/man/AssignCellTypes.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{AssignCellTypes}
4 | \alias{AssignCellTypes}
5 | \title{AssignCellTypes}
6 | \usage{
7 | AssignCellTypes(
8 | initial_pri_matrix,
9 | current_cell_prob,
10 | current_cell_type_assignment,
11 | marker_exp_prob,
12 | cell_type_num,
13 | unassigned_cells,
14 | round,
15 | high_marker_threshold,
16 | low_marker_threshold,
17 | min_difference = 0,
18 | min_prob = 0
19 | )
20 | }
21 | \arguments{
22 | \item{initial_pri_matrix}{user defined cell-type marker matrix for a
23 | specific round}
24 |
25 | \item{current_cell_prob}{the current cell probability
26 | (number_cells x number_cell_type)}
27 |
28 | \item{current_cell_type_assignment}{the current cell type assignments
29 | (number_cells x total_rounds)}
30 |
31 | \item{marker_exp_prob}{the marker expression probability for each cell}
32 |
33 | \item{cell_type_num}{the cell types associated with the current round}
34 |
35 | \item{unassigned_cells}{cells not assigned a cell type for each round and
36 | iteration}
37 |
38 | \item{round}{the current round}
39 |
40 | \item{high_marker_threshold}{the upper threshold for each cell type}
41 |
42 | \item{low_marker_threshold}{the lower threshold for each cell type}
43 |
44 | \item{min_difference}{lower bound used to determine cells that meet the
45 | threshold}
46 |
47 | \item{min_prob}{lower bound used to determine cells that meet the threshold}
48 | }
49 | \value{
50 | an updated current cell type assignment (number_cells x total_rounds)
51 | with more cells assigned for the current round
52 | }
53 | \description{
54 | Find the cell types based on the scores (anchor cell) or
55 | probabilities (index cell)
56 | }
57 |
--------------------------------------------------------------------------------
/man/AssignCells.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{AssignCells}
4 | \alias{AssignCells}
5 | \title{AssignCells}
6 | \usage{
7 | AssignCells(
8 | celesta_obj,
9 | max_iteration = 10,
10 | cell_change_threshold = 0.01,
11 | min_diff = 0,
12 | min_probability = 0,
13 | high_expression_threshold_anchor = rep(0.7, length = 50),
14 | low_expression_threshold_anchor = rep(0.9, length = 50),
15 | high_expression_threshold_index = rep(0.5, length = 50),
16 | low_expression_threshold_index = rep(1, length = 50),
17 | progress = NULL,
18 | save_result = T
19 | )
20 | }
21 | \arguments{
22 | \item{celesta_obj}{an initialized and filtered Celesta object (provided by
23 | \code{FilterCells})}
24 |
25 | \item{max_iteration}{the maximum number of iterations}
26 |
27 | \item{cell_change_threshold}{user defined threshold on when the iterative
28 | cell-type assignment stops. The default value is 0.01, which means that if
29 | the percentage of additional assigned cells is smaller than 1\% of the
30 | unassigned cells, then cell-type assignment will stop. The recommended range
31 | is 0.01 - 0.05. Note that the higher the cell change threshold, the more
32 | cells are left unassigned.}
33 |
34 | \item{min_diff}{user defined threshold on how much the largest cell-type
35 | probability needs to be higher than the second largest cell-type probability.
36 | The default value is 0. It is recommended to not change this value.}
37 |
38 | \item{min_probability}{user defined threshold on the maximum probability
39 | (i.e. a cell-type probability needs to be higher than this threshold for a
40 | cell to be assigned to that cell type). The default value is 0. It is
41 | recommended to not set this value higher than 0.5.}
42 |
43 | \item{high_expression_threshold_anchor}{the upper threshold for each cell type}
44 |
45 | \item{low_expression_threshold_anchor}{the lower threshold for each cell type}
46 |
47 | \item{high_expression_threshold_index}{user defined marker expression
48 | probability threshold for high expression for non-anchor cells}
49 |
50 | \item{low_expression_threshold_index}{user defined marker expression
51 | probability threshold for low expression for non-anchor cells}
52 |
53 | \item{progress}{progress object used for the Shiny app. Do not specify
54 | manually.}
55 | }
56 | \value{
57 | a fully initialized Celesta object
58 | }
59 | \description{
60 | Iteratively assigns cells based on spatial and protein
61 | expression information.
62 | }
63 |
--------------------------------------------------------------------------------
/man/BuildSigmoidFunction.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{BuildSigmoidFunction}
4 | \alias{BuildSigmoidFunction}
5 | \title{BuildSigmoidFunction}
6 | \usage{
7 | BuildSigmoidFunction(marker_exp_matrix, figure = FALSE)
8 | }
9 | \arguments{
10 | \item{marker_exp_matrix}{transformed protein marker expression (or original
11 | segmentation protein marker expression if transformation is not specified)}
12 |
13 | \item{figure}{whether a figure should be generated or not}
14 | }
15 | \value{
16 | the sigmoid function parameter, containing the \eqn{x_root} and slope
17 | }
18 | \description{
19 | Builds the sigmoid function for the calculation of the
20 | expression probability
21 | }
22 |
--------------------------------------------------------------------------------
/man/CalcMarkerActivationProbability.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{CalcMarkerActivationProbability}
4 | \alias{CalcMarkerActivationProbability}
5 | \title{CalcMarkerActivationProbability}
6 | \usage{
7 | CalcMarkerActivationProbability(marker_exp_matrix, figure = FALSE)
8 | }
9 | \arguments{
10 | \item{marker_exp_matrix}{transformed protein marker expression (or original
11 | segmentation
12 | protein marker expression if transformation is not specified)}
13 | }
14 | \value{
15 | the protein marker activation probability
16 | }
17 | \description{
18 | Calculates the activation probability for each marker in the
19 | prior matrix
20 | }
21 |
--------------------------------------------------------------------------------
/man/CalculateBeta.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{CalculateBeta}
4 | \alias{CalculateBeta}
5 | \title{CalculateBeta}
6 | \usage{
7 | CalculateBeta(
8 | dist_from_nearest_assigned_cell,
9 | scale_factor = 5,
10 | bandwidth = 100
11 | )
12 | }
13 | \arguments{
14 | \item{dist_from_nearest_assigned_cell}{the distance from the nearest assigned
15 | cell}
16 |
17 | \item{scale_factor}{the scale factor}
18 |
19 | \item{bandwidth}{the bandwidth}
20 | }
21 | \value{
22 | the beta value
23 | }
24 | \description{
25 | Calculates beta
26 | }
27 |
--------------------------------------------------------------------------------
/man/CalculateIndexCellProb.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{CalculateIndexCellProb}
4 | \alias{CalculateIndexCellProb}
5 | \title{CalculateIndexCellProb}
6 | \usage{
7 | CalculateIndexCellProb(
8 | current_cell_prob,
9 | current_cell_type_assignment,
10 | current_beta,
11 | nb_cell_type,
12 | current_scoring_matrix,
13 | cell_type_num,
14 | unassigned_cells,
15 | round
16 | )
17 | }
18 | \arguments{
19 | \item{current_cell_prob}{the current cell probability
20 | (number_cells x number_cell_type)}
21 |
22 | \item{current_cell_type_assignment}{the current cell type assignments
23 | (number_cells x total_rounds)}
24 |
25 | \item{current_beta}{the current beta values}
26 |
27 | \item{nb_cell_type}{cell types of the neighboring cells for index cells}
28 |
29 | \item{current_scoring_matrix}{the current scoring matrix
30 | (number_cells x number_cell_type)}
31 |
32 | \item{cell_type_num}{the cell types associated with the current round}
33 |
34 | \item{unassigned_cells}{cells not assigned a cell type for each round and
35 | iteration}
36 |
37 | \item{round}{the current round}
38 | }
39 | \value{
40 | calculates the probability for each cell type for unassigned cells
41 | }
42 | \description{
43 | Calculates the probability for index cells
44 | }
45 |
--------------------------------------------------------------------------------
/man/CalculateProbabilityDifference.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{CalculateProbabilityDifference}
4 | \alias{CalculateProbabilityDifference}
5 | \title{CalculateProbabilityDifference}
6 | \usage{
7 | CalculateProbabilityDifference(
8 | max.prob,
9 | max.prob_index,
10 | cell_prob_list,
11 | unassigned_cells
12 | )
13 | }
14 | \arguments{
15 | \item{max.prob}{the maximum marker probability for each cell}
16 |
17 | \item{max.prob_index}{the index of the maximum marker probability for each
18 | cell}
19 |
20 | \item{cell_prob_list}{the probabilities of the cells are are not assigned a
21 | cell type}
22 |
23 | \item{unassigned_cells}{cells not assigned a cell type for each round and
24 | iteration}
25 | }
26 | \value{
27 | the minimum of the difference in probability between the maximum
28 | marker probability and other marker probabilities
29 | }
30 | \description{
31 | Calculate the probability differences
32 | }
33 |
--------------------------------------------------------------------------------
/man/CalculateScores.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{CalculateScores}
4 | \alias{CalculateScores}
5 | \title{CalculateScores}
6 | \usage{
7 | CalculateScores(
8 | marker_exp_prob,
9 | current_pri_matrix,
10 | current_scoring_matrix,
11 | round,
12 | unassigned_cells,
13 | cell_type_num
14 | )
15 | }
16 | \arguments{
17 | \item{marker_exp_prob}{the marker expression probability for each cell}
18 |
19 | \item{current_pri_matrix}{the updated cell-type marker matrix}
20 |
21 | \item{current_scoring_matrix}{the current scoring matrix
22 | (number_cells x number_cell_type)}
23 |
24 | \item{round}{the current round}
25 |
26 | \item{unassigned_cells}{cells not assigned a cell type for each round and
27 | iteration}
28 |
29 | \item{cell_type_num}{the cell types associated with the current round}
30 | }
31 | \value{
32 | the current scoring matrix containing the scores for each cell type
33 | associated with the current round for each unassigned cell
34 | }
35 | \description{
36 | Calculate the scores based on the scoring function
37 | }
38 |
--------------------------------------------------------------------------------
/man/Celesta-class.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \docType{class}
4 | \name{Celesta-class}
5 | \alias{Celesta-class}
6 | \alias{Celesta}
7 | \title{Celesta}
8 | \description{
9 | Celesta object definition
10 | }
11 | \section{Slots}{
12 |
13 | \describe{
14 | \item{\code{project_name}}{name of the project (used in file names)}
15 |
16 | \item{\code{prior_info}}{user-defined cell-type signature matrix.
17 |
18 | The data should contain two columns (name X and Y) for the x, y coordinates
19 | and a column for each protein marker. Each row represents the data for a
20 | single cell, including its x, y coordinates and expression for each protein
21 | marker.}
22 |
23 | \item{\code{marker_exp_matrix}}{transformed protein marker expression (or original
24 | segmentation protein marker expression if transformation is not specified)}
25 |
26 | \item{\code{original_exp}}{original protein marker expression (containing only the
27 | protein markers specified in \code{prior_info})}
28 |
29 | \item{\code{cell_ID}}{the IDs of the cells (from 1 to the total number of cells)}
30 |
31 | \item{\code{lineage_info}}{the lineage information from \code{prior_info} parsed into
32 | round, previous cell type, and cell type number columns}
33 |
34 | \item{\code{coords}}{the x, y coordinates of each cell}
35 |
36 | \item{\code{cell_prob}}{cell type probability for each cell}
37 |
38 | \item{\code{final_cell_type_assignment}}{the final cell type assignments}
39 |
40 | \item{\code{nb_list}}{the list of N-nearest neighbors}
41 |
42 | \item{\code{total_rounds}}{the maximum round value}
43 |
44 | \item{\code{cell_nb_in_bandwidth}}{the cells located within a bandwidth to cell \emph{c}}
45 |
46 | \item{\code{cell_nb_dist}}{the distance of each cell to cell \emph{c} within a bandwidth}
47 |
48 | \item{\code{initial_pri_matrix}}{user defined cell-type marker matrix for a specific
49 | round}
50 |
51 | \item{\code{anchor_cell_type_assignment}}{the anchor cell type assignments}
52 |
53 | \item{\code{dist_from_nearest_assigned_cell}}{the distance from the nearest assigned
54 | cell}
55 |
56 | \item{\code{nb_cell_type}}{cell types of the neighboring cells for index cells}
57 |
58 | \item{\code{marker_exp_prob}}{the marker expression probability for each cell}
59 |
60 | \item{\code{current_scoring_matrix}}{the current scoring matrix
61 | (number_cells x number_cell_type)}
62 |
63 | \item{\code{current_pri_matrix}}{the updated cell-type marker matrix}
64 |
65 | \item{\code{current_cell_prob}}{the current cell probability
66 | (number_cells x number_cell_type)}
67 |
68 | \item{\code{current_cell_type_assignment}}{the current cell type assignments
69 | (number_cells x total_rounds)}
70 |
71 | \item{\code{starting_cell_type_assignment}}{the initial cell type assignments
72 | (number_cells x total_rounds)}
73 |
74 | \item{\code{current_beta}}{the current beta values}
75 |
76 | \item{\code{unassigned_cells}}{cells not assigned a cell type for each round and
77 | iteration}
78 |
79 | \item{\code{assigned_cells}}{cells with an assigned cell type}
80 | }}
81 |
82 |
--------------------------------------------------------------------------------
/man/CountCellType.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{CountCellType}
4 | \alias{CountCellType}
5 | \title{CountCellType}
6 | \usage{
7 | CountCellType(prior_info, current_cell_type_assignment, cell_type_num, round)
8 | }
9 | \arguments{
10 | \item{prior_info}{user-defined cell-type signature matrix.
11 |
12 | The data should contain two columns (name X and Y) for the x, y coordinates
13 | and a column for each protein marker. Each row represents the data for a
14 | single cell, including its x, y coordinates and expression for each protein
15 | marker.}
16 |
17 | \item{current_cell_type_assignment}{the current cell type assignments
18 | (number_cells x total_rounds)}
19 |
20 | \item{cell_type_num}{the cell types associated with the current round}
21 |
22 | \item{round}{the current round}
23 | }
24 | \value{
25 | the count and proportion for each cell type based on the current cell
26 | type assignments
27 | }
28 | \description{
29 | Counts the cell type
30 | }
31 |
--------------------------------------------------------------------------------
/man/CreateCelestaObject.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{CreateCelestaObject}
4 | \alias{CreateCelestaObject}
5 | \title{CreateCelestaObject}
6 | \usage{
7 | CreateCelestaObject(
8 | project_title,
9 | prior_marker_info,
10 | imaging_data_file,
11 | cofactor = 10,
12 | transform_type = 1,
13 | number_of_neighbors = 5,
14 | bandwidth = 100,
15 | progress = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{project_title}{\emph{required} name of the project (used in file names)}
20 |
21 | \item{prior_marker_info}{\emph{required} user-defined cell-type signature matrix.
22 |
23 | The data should contain two columns (name X and Y) for the x, y coordinates
24 | and a column for each protein marker. Each row represents the data for a
25 | single cell, including its x, y coordinates and expression for each protein
26 | marker.}
27 |
28 | \item{imaging_data_file}{\emph{required} segmented imaging data.
29 | The first column must contain the cell types to be inferred. The second
30 | column must contain the lineage information with the following format
31 | (without spaces): # _ # _ #.
32 | \itemize{
33 | \item The first number indicates round. Cell types with the same lineage level
34 | are inferred at the same round. An higher number indicates higher cell-type
35 | resolution. For example, immune cells -> CD3+ T cells -> CD4+ T cells.
36 | \item The second number indicates the previous lineage cell type number for the
37 | current cell type. For example, the second number for CD3+ T cell is 5
38 | because it is a subtype of immune cells which have cell type number 5.
39 | \item The third number is a number assigned to the cell type
40 | (i.e. cell type number).
41 | }
42 |
43 | The third column and beyond are columns for protein markers.
44 | \itemize{
45 | \item If a protein marker is known to be expressed for that cell type, then it
46 | is denoted by a "1".
47 | \item If a protein marker is known to not express for a cell type, then it is
48 | denoted by a "0".
49 | \item If the protein marker is irrelevant or uncertain to express for a cell
50 | type, then it is denoted by "NA".
51 | }}
52 |
53 | \item{cofactor}{value used to calculate the arcsinh transform on the protein
54 | marker expressions}
55 |
56 | \item{transform_type}{indicates a transform type for the protein marker
57 | expressions (0 = no transform, 1 = arcsinh transform)}
58 |
59 | \item{number_of_neighbors}{the number of cells in a single neighborhood}
60 |
61 | \item{bandwidth}{the upper distance bound used when calculating
62 | neighborhoods by distance}
63 |
64 | \item{progress}{progress object used for the Shiny app. Do not specify
65 | manually.}
66 | }
67 | \value{
68 | an initialized Celesta object
69 | }
70 | \description{
71 | Initializes the following fields of the Celesta object:
72 | \itemize{
73 | \item \code{cell_ID}
74 | \item \code{original_exp}
75 | \item \code{marker_exp_matrix}
76 | \item \code{prior_info}
77 | \item \code{lineage_info}
78 | \item \code{total_rounds}
79 | \item \code{coords}
80 | \item \code{marker_exp_prob}
81 | \item \code{nb_list}
82 | \item \code{cell_nb_in_bandwidth}
83 | \item \code{cell_nb_dist}
84 | \item \code{current_cell_type_assignment}
85 | \item \code{anchor_cell_type_assignment}
86 | \item \code{starting_cell_type_assignment}
87 | \item \code{current_scoring_matrix}
88 | \item \code{current_cell_prob}
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/man/FilterArtifactCells.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{FilterArtifactCells}
4 | \alias{FilterArtifactCells}
5 | \title{FilterArtifactCells}
6 | \usage{
7 | FilterArtifactCells(
8 | total_rounds,
9 | marker_exp_matrix,
10 | marker_exp_prob,
11 | current_cell_type_assignment,
12 | high_marker_threshold = 0.9,
13 | low_marker_threshold = 0.4
14 | )
15 | }
16 | \arguments{
17 | \item{total_rounds}{the maximum round value}
18 |
19 | \item{marker_exp_matrix}{transformed protein marker expression (or original
20 | segmentation protein marker expression if transformation is not specified)}
21 |
22 | \item{marker_exp_prob}{the marker expression probability for each cell}
23 |
24 | \item{current_cell_type_assignment}{the cell type assignments for each round
25 | for each cell}
26 |
27 | \item{high_marker_threshold}{upper bound used to filter out questionable
28 | cells}
29 |
30 | \item{low_marker_threshold}{lower bound used to filter out questionable
31 | cells}
32 | }
33 | \value{
34 | current cell type assignment, where a questionable cells are marked
35 | with a row of NAs.
36 | }
37 | \description{
38 | Filter out cells that could potentially be artifacts
39 | }
40 |
--------------------------------------------------------------------------------
/man/FilterCells.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{FilterCells}
4 | \alias{FilterCells}
5 | \title{FilterCells}
6 | \usage{
7 | FilterCells(
8 | celesta_obj,
9 | high_marker_threshold = 0.9,
10 | low_marker_threshold = 0.4
11 | )
12 | }
13 | \arguments{
14 | \item{celesta_obj}{an initialized Celesta object (provided by
15 | \code{CreateCelestaObject})}
16 |
17 | \item{high_marker_threshold}{upper bound used to filter out questionable
18 | cells}
19 |
20 | \item{low_marker_threshold}{lower bound used to filter out questionable
21 | cells}
22 | }
23 | \value{
24 | a Celesta object with questionable cells marked with NA
25 | }
26 | \description{
27 | Filters out artifact cells from the cell type assignments
28 | }
29 |
--------------------------------------------------------------------------------
/man/FindCellsToCheck.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{FindCellsToCheck}
4 | \alias{FindCellsToCheck}
5 | \title{FindCellsToCheck}
6 | \usage{
7 | FindCellsToCheck(current_cell_type_assignment, lineage_info, cell_ID, round)
8 | }
9 | \arguments{
10 | \item{current_cell_type_assignment}{the current cell type assignments
11 | (number_cells x total_rounds)}
12 |
13 | \item{lineage_info}{the lineage information from \code{prior_info} parsed into
14 | round, previous cell type, and cell type number columns}
15 |
16 | \item{cell_ID}{the IDs of the cells (from 1 to the total number of cells)}
17 | }
18 | \value{
19 | the IDs of unassigned cells
20 | }
21 | \description{
22 | Find unassigned cells
23 | }
24 |
--------------------------------------------------------------------------------
/man/FindCellsWithId.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{FindCellsWithId}
4 | \alias{FindCellsWithId}
5 | \title{FindCellsWithId}
6 | \usage{
7 | FindCellsWithId(current_cell_type_assignment, lineage_info, cell_ID, round)
8 | }
9 | \arguments{
10 | \item{current_cell_type_assignment}{the current cell type assignments
11 | (number_cells x total_rounds)}
12 |
13 | \item{lineage_info}{the lineage information from \code{prior_info} parsed into
14 | round, previous cell type, and cell type number columns}
15 |
16 | \item{cell_ID}{the IDs of the cells (from 1 to the total number of cells)}
17 |
18 | \item{round}{the current round}
19 | }
20 | \value{
21 | cells that have been assigned a cell type
22 | }
23 | \description{
24 | Find cells assigned with a cell type
25 | }
26 |
--------------------------------------------------------------------------------
/man/FitGmmModel.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{FitGmmModel}
4 | \alias{FitGmmModel}
5 | \title{FitGmmModel}
6 | \usage{
7 | FitGmmModel(marker_exp, marker_name, figure = FALSE)
8 | }
9 | \arguments{
10 | \item{marker_exp}{the expression of the marker for each cell}
11 |
12 | \item{marker_name}{the name of the marker}
13 |
14 | \item{figure}{whether a figure should be generated or not}
15 | }
16 | \value{
17 | the Gaussian mixture model parameters for the marker
18 | }
19 | \description{
20 | Fits a Gaussian mixture model for each marker
21 | }
22 |
--------------------------------------------------------------------------------
/man/GetCoords.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{GetCoords}
4 | \alias{GetCoords}
5 | \title{GetCoords}
6 | \usage{
7 | GetCoords(imaging_data_file)
8 | }
9 | \arguments{
10 | \item{imaging_data_file}{segmented imaging data.
11 | The first column must contain the cell types to be inferred. The second
12 | column must contain the lineage information with the following format
13 | (without spaces): # _ # _ #.
14 | \itemize{
15 | \item The first number indicates round. Cell types with the same lineage level
16 | are inferred at the same round. An higher number indicates higher cell-type
17 | resolution. For example, immune cells -> CD3+ T cells -> CD4+ T cells.
18 | \item The second number indicates the previous lineage cell type number for the
19 | current cell type. For example, the second number for CD3+ T cell is 5
20 | because it is a subtype of immune cells which have cell type number 5.
21 | \item The third number is a number assigned to the cell type
22 | (i.e. cell type number).
23 | }
24 |
25 | The third column and beyond are columns for protein markers.
26 | \itemize{
27 | \item If a protein marker is known to be expressed for that cell type, then it
28 | is denoted by a "1".
29 | \item If a protein marker is known to not express for a cell type, then it is
30 | denoted by a "0".
31 | \item If the protein marker is irrelevant or uncertain to express for a cell
32 | type, then it is denoted by "NA".
33 | }}
34 | }
35 | \value{
36 | the x, y coordinates of each cell
37 | }
38 | \description{
39 | Gets the x, y coordinates of each cell
40 | }
41 |
--------------------------------------------------------------------------------
/man/GetDistFromNearestAssignedCells.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{GetDistFromNearestAssignedCells}
4 | \alias{GetDistFromNearestAssignedCells}
5 | \title{GetDistFromNearestAssignedCells}
6 | \usage{
7 | GetDistFromNearestAssignedCells(
8 | cell_nb_in_bandwidth,
9 | cell_nb_dist,
10 | current_cell_type_assignment,
11 | cell_type_num,
12 | unassigned_cells,
13 | assigned_cells,
14 | round
15 | )
16 | }
17 | \arguments{
18 | \item{cell_nb_in_bandwidth}{the cells located within a bandwidth to cell \emph{c}}
19 |
20 | \item{cell_nb_dist}{the distance of each cell to cell \emph{c} within a bandwidth}
21 |
22 | \item{current_cell_type_assignment}{the current cell type assignments
23 | (number_cells x total_rounds)}
24 |
25 | \item{cell_type_num}{the cell types associated with the current round}
26 |
27 | \item{unassigned_cells}{cells not assigned a cell type for each round and
28 | iteration}
29 |
30 | \item{assigned_cells}{cells with an assigned cell type}
31 |
32 | \item{round}{the current round}
33 | }
34 | \value{
35 | the distance to the nearest assigned cells
36 | }
37 | \description{
38 | Get distance from nearest assigned cells
39 | }
40 |
--------------------------------------------------------------------------------
/man/GetFinalInferredCellTypes.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{GetFinalInferredCellTypes}
4 | \alias{GetFinalInferredCellTypes}
5 | \title{GetFinalInferredCellTypes}
6 | \usage{
7 | GetFinalInferredCellTypes(
8 | project_name,
9 | total_rounds,
10 | current_cell_type_assignment,
11 | anchor_cell_type_assignment,
12 | prior_info,
13 | lineage_info,
14 | coords,
15 | original_exp,
16 | save_result = T
17 | )
18 | }
19 | \arguments{
20 | \item{total_rounds}{the maximum round}
21 |
22 | \item{current_cell_type_assignment}{the current cell type assignments
23 | (number_cells x total_rounds)}
24 |
25 | \item{anchor_cell_type_assignment}{the anchor cell type assignments}
26 |
27 | \item{prior_info}{user-defined cell-type signature matrix.
28 |
29 | The data should contain two columns (name X and Y) for the x, y coordinates
30 | and a column for each protein marker. Each row represents the data for a
31 | single cell, including its x, y coordinates and expression for each protein
32 | marker.}
33 |
34 | \item{lineage_info}{the lineage information from \code{prior_info} parsed into
35 | round, previous cell type, and cell type number columns}
36 |
37 | \item{coords}{the x, y coordinates of each cell}
38 |
39 | \item{original_exp}{original protein marker expression (containing only the
40 | protein markers specified in \code{prior_info})}
41 |
42 | \item{save_data}{whether or not to save the final cell type assignment
43 | and anchor cell assignment results}
44 | }
45 | \value{
46 | the final cell type assignments
47 | }
48 | \description{
49 | Get final cell types and writes two files: the final cell type
50 | assignments and the anchor cell type assignments.
51 | }
52 |
--------------------------------------------------------------------------------
/man/GetInitialPriorMatrix.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{GetInitialPriorMatrix}
4 | \alias{GetInitialPriorMatrix}
5 | \title{GetInitialPriorMatrix}
6 | \usage{
7 | GetInitialPriorMatrix(lineage_info, prior_marker_info, round)
8 | }
9 | \arguments{
10 | \item{lineage_info}{the lineage information from \code{prior_info} parsed into
11 | round, previous cell type, and cell type number columns}
12 |
13 | \item{round}{the current round}
14 | }
15 | \value{
16 | the prior knowledge of the cells types with the specified round.
17 | }
18 | \description{
19 | Gets the prior knowledge of the cell types with the specified
20 | round.
21 | }
22 |
--------------------------------------------------------------------------------
/man/GetMarkerExpMatrix.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{GetMarkerExpMatrix}
4 | \alias{GetMarkerExpMatrix}
5 | \title{GetMarkerExpMatrix}
6 | \usage{
7 | GetMarkerExpMatrix(
8 | prior_marker_info,
9 | imaging_data_file,
10 | cofactor,
11 | transform_type
12 | )
13 | }
14 | \arguments{
15 | \item{prior_marker_info}{user-defined cell-type signature matrix.
16 |
17 | The data should contain two columns (name X and Y) for the x, y coordinates
18 | and a column for each protein marker. Each row represents the data for a
19 | single cell, including its x, y coordinates and expression for each protein
20 | marker.}
21 |
22 | \item{imaging_data_file}{segmented imaging data.
23 | The first column must contain the cell types to be inferred. The second
24 | column must contain the lineage information with the following format
25 | (without spaces): # _ # _ #.
26 | \itemize{
27 | \item The first number indicates round. Cell types with the same lineage level
28 | are inferred at the same round. An higher number indicates higher cell-type
29 | resolution. For example, immune cells -> CD3+ T cells -> CD4+ T cells.
30 | \item The second number indicates the previous lineage cell type number for the
31 | current cell type. For example, the second number for CD3+ T cell is 5
32 | because it is a subtype of immune cells which have cell type number 5.
33 | \item The third number is a number assigned to the cell type
34 | (i.e. cell type number).
35 | }
36 |
37 | The third column and beyond are columns for protein markers.
38 | \itemize{
39 | \item If a protein marker is known to be expressed for that cell type, then it
40 | is denoted by a "1".
41 | \item If a protein marker is known to not express for a cell type, then it is
42 | denoted by a "0".
43 | \item If the protein marker is irrelevant or uncertain to express for a cell
44 | type, then it is denoted by "NA".
45 | }}
46 |
47 | \item{cofactor}{used to calculate the arcsinh transform on the protein marker
48 | expressions}
49 |
50 | \item{transform_type}{indicates a transform type for the protein marker
51 | expressions (0 = no transform, 1 = arcsinh transform)}
52 | }
53 | \value{
54 | a list with the following information:
55 | \describe{
56 | \item{\code{cell_ids}}{the IDs of the cells}
57 | \item{\code{original_exp}}{the original expression matrix (containing only the
58 | protein markers specified by \code{prior_marker_info})}
59 | \item{\code{marker_exp_matrix} or \code{original_exp}}{the transformed expression
60 | matrix (or original expression matrix if a transform is not specified)}
61 | }
62 | }
63 | \description{
64 | Gets the protein marker expressions and assigns each cell a
65 | cell ID.
66 |
67 | Only protein markers specified in \code{prior_marker_info} are extracted from the
68 | \code{imaging_data_file}. Cells are assigned IDs from 1 to the total number of
69 | cells. If \code{transform_type = 1}, then an arcsinh transform is applied to the
70 | protein marker expressions.
71 | }
72 |
--------------------------------------------------------------------------------
/man/GetNeighborInfo.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{GetNeighborInfo}
4 | \alias{GetNeighborInfo}
5 | \title{GetNeighborInfo}
6 | \usage{
7 | GetNeighborInfo(coords, number_of_neighbors = 5, bandwidth = 100)
8 | }
9 | \arguments{
10 | \item{coords}{the x, y coordinates of each cell}
11 |
12 | \item{number_of_neighbors}{the number of cells in a single neighborhood}
13 |
14 | \item{bandwidth}{the upper distance bound used when calculating neighborhoods
15 | by distance}
16 | }
17 | \value{
18 | a list of the following information
19 | \describe{
20 | \item{\code{nb_list}}{the list of N-nearest neighbors}
21 | \item{\code{all_cell_nb_in_bandwidth}}{the cells located within a bandwidth to
22 | cell \emph{c}}
23 | \item{\code{cell_nb_dist}}{the distance of each cell to cell \emph{c} within a
24 | bandwidth}
25 | }
26 | }
27 | \description{
28 | Gets the neighborhood information, including neighborhoods by
29 | number and distance.
30 | }
31 |
--------------------------------------------------------------------------------
/man/GetPriorInfo.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{GetPriorInfo}
4 | \alias{GetPriorInfo}
5 | \title{GetPriorInfo}
6 | \usage{
7 | GetPriorInfo(prior_marker_info)
8 | }
9 | \arguments{
10 | \item{prior_marker_info}{user-defined cell-type signature matrix.
11 |
12 | The data should contain two columns (name X and Y) for the x, y coordinates
13 | and a column for each protein marker. Each row represents the data for a
14 | single cell, including its x, y coordinates and expression for each protein
15 | marker.}
16 | }
17 | \value{
18 | a list with the following information:
19 | \describe{
20 | \item{\code{lineage_info}}{the lineage information parsed into round, previous
21 | cell type, and cell type number columns}
22 | \item{\code{total_rounds}}{the maximum round value}
23 | }
24 | }
25 | \description{
26 | Extracts the lineage information from the \code{prior_marker_info}
27 | and determines the total rounds
28 | }
29 |
--------------------------------------------------------------------------------
/man/GetScore.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{GetScore}
4 | \alias{GetScore}
5 | \title{GetScore}
6 | \usage{
7 | GetScore(activation_prob_to_use, prior_info, non_NA_index)
8 | }
9 | \arguments{
10 | \item{activation_prob_to_use}{the marker expression probabilities of the
11 | unassigned cells}
12 |
13 | \item{prior_info}{user-defined cell-type signature matrix.
14 |
15 | The data should contain two columns (name X and Y) for the x, y coordinates
16 | and a column for each protein marker. Each row represents the data for a
17 | single cell, including its x, y coordinates and expression for each protein
18 | marker.}
19 |
20 | \item{non_NA_index}{the index of the columns in \code{current_pri_matrix} that do
21 | not contain NA for a particular cell}
22 | }
23 | \value{
24 | the score of the cell
25 | }
26 | \description{
27 | Calculate scores using MSE
28 | }
29 |
--------------------------------------------------------------------------------
/man/InitializeCellAndScoringMatrices.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{InitializeCellAndScoringMatrices}
4 | \alias{InitializeCellAndScoringMatrices}
5 | \title{InitializeCellAndScoringMatrices}
6 | \usage{
7 | InitializeCellAndScoringMatrices(lineage_info, marker_exp_matrix, prior_info)
8 | }
9 | \arguments{
10 | \item{lineage_info}{the lineage information from \code{prior_info} parsed into
11 | round, previous cell type, and cell type number columns}
12 |
13 | \item{marker_exp_matrix}{transformed protein marker expression (or original
14 | segmentation protein marker expression if transformation is not specified)}
15 |
16 | \item{prior_info}{user-defined cell-type signature matrix.
17 |
18 | The data should contain two columns (name X and Y) for the x, y coordinates
19 | and a column for each protein marker. Each row represents the data for a
20 | single cell, including its x, y coordinates and expression for each protein
21 | marker.}
22 | }
23 | \value{
24 | a list with the following information
25 | \describe{
26 | \item{\code{current_cell_type_assignment}}{a zero matrix with dimension
27 | (number_cells x total_rounds)}
28 | \item{\code{current_scoring_matrix}}{a NA matrix with dimension
29 | (number_cells x number_cell_type)}
30 | \item{\code{current_cell_prob}}{a NA matrix with dimension
31 | (number_cells x number_cell_type)}
32 | }
33 | }
34 | \description{
35 | Initialize the cell type assignments, cell probabilities, and
36 | scoring matrices
37 | }
38 |
--------------------------------------------------------------------------------
/man/MarkQuestionableCells.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{MarkQuestionableCells}
4 | \alias{MarkQuestionableCells}
5 | \title{MarkQuestionableCells}
6 | \usage{
7 | MarkQuestionableCells(
8 | cell_activation_prob,
9 | high_marker_threshold,
10 | low_marker_threshold
11 | )
12 | }
13 | \arguments{
14 | \item{cell_activation_prob}{the protein marker expressions for a single cell}
15 |
16 | \item{high_marker_threshold}{upper bound used to filter out questionable
17 | cells}
18 |
19 | \item{low_marker_threshold}{lower bound used to filter out questionable
20 | cells}
21 | }
22 | \value{
23 | whether a cell is questionable or not
24 | }
25 | \description{
26 | Determine if a cell is questionable.
27 |
28 | A cell is questionable if \emph{all} of its protein marker expressions are below
29 | the \code{lower_marker_threshold} or above the \code{high_marker_threshold}.
30 | }
31 |
--------------------------------------------------------------------------------
/man/NeighborCellType.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{NeighborCellType}
4 | \alias{NeighborCellType}
5 | \title{NeighborCellType}
6 | \usage{
7 | NeighborCellType(
8 | nb_list,
9 | current_cell_type_assignment,
10 | cell_type_num,
11 | round,
12 | unassigned_cells
13 | )
14 | }
15 | \arguments{
16 | \item{nb_list}{the list of N-nearest neighbors}
17 |
18 | \item{current_cell_type_assignment}{the current cell type assignments
19 | (number_cells x total_rounds)}
20 |
21 | \item{cell_type_num}{the cell types associated with the current round}
22 |
23 | \item{round}{the current round}
24 |
25 | \item{unassigned_cells}{cells not assigned a cell type for each round and
26 | iteration}
27 | }
28 | \value{
29 | the cell types of the neighbors of unassigned cells
30 | }
31 | \description{
32 | Find the cell types of the neighbors of unassigned cells
33 | }
34 |
--------------------------------------------------------------------------------
/man/PlotCellsAnyCombination.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{PlotCellsAnyCombination}
4 | \alias{PlotCellsAnyCombination}
5 | \title{PlotCellsAnyCombination}
6 | \usage{
7 | PlotCellsAnyCombination(
8 | cell_type_assignment_to_plot,
9 | coords,
10 | prior_info,
11 | cell_number_to_use,
12 | cell_type_colors = c(palette()[2:7], "white"),
13 | test_size = 1,
14 | save_plot = TRUE,
15 | output_dir = "."
16 | )
17 | }
18 | \arguments{
19 | \item{cell_type_assignment_to_plot}{the final cell type assignment for each
20 | cell}
21 |
22 | \item{coords}{the x, y coordinates of each cell}
23 |
24 | \item{prior_info}{user-defined cell-type signature matrix.
25 |
26 | The data should contain two columns (name X and Y) for the x, y coordinates
27 | and a column for each protein marker. Each row represents the data for a
28 | single cell, including its x, y coordinates and expression for each protein
29 | marker.}
30 |
31 | \item{cell_number_to_use}{the row number of the cell types to plot from
32 | \code{prior_info}. To plot unknown cells, include 0 in the list.}
33 |
34 | \item{cell_type_colors}{the colors for the cell types. If unknown cells
35 | are plotted, the color of the unknown cells will be the last color listed.}
36 |
37 | \item{test_size}{the size of the points in the plot}
38 |
39 | \item{save_plot}{whether to save the plot}
40 |
41 | \item{output_dir}{the path to the directory to where the plot will be
42 | outputted. This defaults to the directory containing CELESTA_functions.R.
43 | Note that the directory must exist.}
44 | }
45 | \value{
46 | writes the final cell type assignment plot
47 | }
48 | \description{
49 | Plots the cells using x, y coordinates with their assigned cell
50 | types
51 | }
52 |
--------------------------------------------------------------------------------
/man/PlotExpProb.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{PlotExpProb}
4 | \alias{PlotExpProb}
5 | \title{PlotExpProb}
6 | \usage{
7 | PlotExpProb(
8 | coords,
9 | marker_exp_prob,
10 | prior_marker_info,
11 | size_to_use = 1,
12 | width_to_use = 5,
13 | height_to_use = 4,
14 | save_plot = TRUE,
15 | output_dir = "."
16 | )
17 | }
18 | \arguments{
19 | \item{coords}{the x, y coordinates of each cell}
20 |
21 | \item{marker_exp_prob}{the marker expression probability for each cell}
22 |
23 | \item{prior_marker_info}{user-defined cell-type signature matrix.
24 |
25 | The data should contain two columns (name X and Y) for the x, y coordinates
26 | and a column for each protein marker. Each row represents the data for a
27 | single cell, including its x, y coordinates and expression for each protein
28 | marker.}
29 |
30 | \item{size_to_use}{the size of the points in the plot}
31 |
32 | \item{width_to_use}{the width of the plot}
33 |
34 | \item{height_to_use}{the height of the plot}
35 |
36 | \item{save_plot}{whether to save the plot}
37 |
38 | \item{output_dir}{the path to the directory to where the plot will be
39 | outputted. This defaults to the directory containing CELESTA_functions.R.
40 | Note that the directory must exist.}
41 | }
42 | \value{
43 | writes a plot of the expression probabilities for each marker
44 | }
45 | \description{
46 | Plots the expression probabilities of cells in the tissue
47 | }
48 |
--------------------------------------------------------------------------------
/man/PlotSingleExpProb.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{PlotSingleExpProb}
4 | \alias{PlotSingleExpProb}
5 | \title{PlotSingleExpProb}
6 | \usage{
7 | PlotSingleExpProb(
8 | coords,
9 | marker_exp_prob,
10 | cols = NULL,
11 | marker_to_use,
12 | size_to_use = 1,
13 | width_to_use = 5,
14 | height_to_use = 4,
15 | save_plot = TRUE,
16 | output_dir = "."
17 | )
18 | }
19 | \arguments{
20 | \item{coords}{the x, y coordinates of each cell}
21 |
22 | \item{marker_exp_prob}{the marker expression probability for each cell}
23 |
24 | \item{cols}{the color palette for the plot}
25 |
26 | \item{marker_to_use}{marker to plot}
27 |
28 | \item{size_to_use}{the size of the points in the plot}
29 |
30 | \item{width_to_use}{the width of the plot}
31 |
32 | \item{height_to_use}{the height of the plot}
33 |
34 | \item{save_plot}{whether to save the plot}
35 |
36 | \item{output_dir}{the path to the directory to where the plot will be
37 | outputted. This defaults to the directory containing CELESTA_functions.R.
38 | Note that the directory must exist.}
39 | }
40 | \value{
41 | generates a plot of the expression probabilities for a specified
42 | marker
43 | }
44 | \description{
45 | Plots the expression probabilities of cells in the tissue. This
46 | is use soley for the Shiny app.
47 | }
48 |
--------------------------------------------------------------------------------
/man/UpdatePriorMatrix.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CELESTA_functions.R
3 | \name{UpdatePriorMatrix}
4 | \alias{UpdatePriorMatrix}
5 | \title{UpdatePriorMatrix}
6 | \usage{
7 | UpdatePriorMatrix(
8 | current_pri_matrix,
9 | initial_pri_matrix,
10 | current_cell_type_assignment,
11 | marker_exp_prob,
12 | round,
13 | cell_type_num
14 | )
15 | }
16 | \arguments{
17 | \item{current_pri_matrix}{the updated cell-type marker matrix}
18 |
19 | \item{initial_pri_matrix}{user defined cell-type marker matrix for a specific
20 | round}
21 |
22 | \item{current_cell_type_assignment}{the current cell type assignments
23 | (number_cells x total_rounds)}
24 |
25 | \item{marker_exp_prob}{the marker expression probability for each cell}
26 |
27 | \item{round}{the current round}
28 |
29 | \item{cell_type_num}{the cell types associated with the current round}
30 | }
31 | \value{
32 | updates the prior knowledge matrix with information from cells
33 | assigned to each cell type
34 | }
35 | \description{
36 | Updates prior knowledge matrix of the cell type signatures
37 | }
38 |
--------------------------------------------------------------------------------
/man/figures/README-pressure-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/man/figures/README-pressure-1.png
--------------------------------------------------------------------------------
/tests/CELESTA_functions_orig.R:
--------------------------------------------------------------------------------
1 | #############################################################################################
2 | #' Create CELESTA object
3 | #' @export
4 | Celesta <- setClass("Celesta",
5 | slots = c(marker_exp_matrix = "matrix", # normalized expression from segmentation
6 | original_exp ="matrix", # original expression from segmentation
7 | prior_info = "data.frame", # store information from cell-type prior knowledge
8 | cell_ID = "numeric",
9 | lineage_info = "data.frame",
10 | coords = "matrix", #coordinates
11 | marker_exp_prob = "matrix", # marker expression probability
12 | cell_prob = "matrix", # cell type probability for each cell
13 | final_cell_type_assignment = "matrix",
14 | project_name = "character",
15 | nb_list = "matrix", # N-nearest neighbor list
16 | total_rounds = "numeric",
17 | cell_nb_in_bandwidth = "ANY", #Cells locates within a bandwidth to cell i
18 | cell_nb_dist = "ANY", #The distance of each cell to cell i within a bandwidth
19 | current_scoring_matrix = "matrix", #scoring function
20 | initial_pri_matrix = "matrix", #user defined cell-type marker matrix
21 | current_pri_matrix = "matrix", #updated cell-type marker matrix
22 | current_cell_prob = "matrix", #cell probability for updates
23 | current_cell_type_assignment = "matrix",
24 | anchor_cell_type_assignment = "matrix",
25 | starting_cell_type_assignment = "matrix",
26 | current_beta = "matrix",
27 | dist_from_nearest_assigned_cell = "matrix",
28 | nb_cell_type = "ANY", #cell types of the neighboring cells for index cells
29 | all_cell_nb_cell_type = "ANY", # cell types of the neighboring cells for all cells
30 | unassigned_cells = "numeric", # store cells to check in each round and each iteration
31 | assigned_cells = "numeric")) # cells already assigned cell type
32 |
33 | #############################################################################################
34 | #############################################################################################
35 | #' Get protein marker expression
36 | #' @export
37 | GetMarkerExpMatrix <- function(CelestaObj,prior_marker_info,imaging_data_file,
38 | cofactor,transform_type){
39 | markers_to_use <- colnames(prior_marker_info)[3:dim(prior_marker_info)[2]]
40 | matching_markers <- match(markers_to_use,colnames(imaging_data_file))
41 | if(length(which(is.na(matching_markers)==TRUE))>0){
42 | print("Please double check the protein markers in the cell-type marker matrix and
43 | imaging input file")
44 | }else{
45 | if(transform_type==0){#no transform
46 | marker_exp_matrix <- data.matrix(imaging_data_file[,matching_markers])
47 | CelestaObj@marker_exp_matrix <- marker_exp_matrix
48 | }else if(transform_type==1){#arcsinh
49 | marker_exp_matrix <- data.matrix(imaging_data_file[,matching_markers])
50 | marker_exp_transformed <- asinh(marker_exp_matrix/cofactor)
51 | CelestaObj@marker_exp_matrix <- marker_exp_transformed
52 | }
53 | CelestaObj@original_exp <- data.matrix(imaging_data_file[,matching_markers])
54 | cellIDs <- seq(1,dim(marker_exp_matrix)[1],by=1)
55 | CelestaObj@cell_ID <- cellIDs
56 | return (CelestaObj)
57 | }
58 | }
59 | ############################################################################################
60 | #' Get prior knowledge on cell types
61 | #' @export
62 | GetPirorInfo <- function(CelestaObj,prior_marker_info){
63 | CelestaObj@prior_info <- prior_marker_info
64 | lineage_column <- prior_marker_info[,2]
65 | if(grepl("_", lineage_column[1], fixed = TRUE)){
66 | round <- integer()
67 | previous_cell_type <- integer()
68 | cell_type_number <- integer()
69 | for(i in 1:dim(prior_marker_info)[1]){
70 | info <- strtoi(unlist(strsplit(prior_marker_info[i,2],"_")))
71 | round[i] <- info[1]
72 | previous_cell_type[i] <- info[2]
73 | cell_type_number[i] <- info[3]
74 | }
75 | CelestaObj@lineage_info <- data.frame(Round=round,
76 | Previous_cell_type=previous_cell_type,
77 | Cell_type_number=cell_type_number)
78 | total_rounds <- max(CelestaObj@lineage_info$Round)
79 | CelestaObj@total_rounds <- total_rounds
80 | }else{
81 | print("Warning:the lineage information column has formatting errors")
82 | }
83 | return(CelestaObj)
84 | }
85 | #############################################################################################
86 | #' Get coordinates
87 | #' @export
88 | GetCoords <- function(CelestaObj,imaging_data_file){
89 | Coords <- cbind(imaging_data_file$X,
90 | imaging_data_file$Y)
91 | colnames(Coords) <- c("X","Y")
92 | CelestaObj@coords <- Coords
93 | return(CelestaObj)
94 | }
95 | ############################################################################################
96 | #' Gaussian mixture model for each marker
97 | #' @export
98 | GMM_fitting <- function(marker_exp,marker_name,figure=FALSE){
99 | print("Marker:")
100 | print(marker_name)
101 | GMM_marker_param <- matrix(nrow=3,ncol=2)
102 | set.seed(1)
103 | zero_indices <- which(marker_exp==0)
104 | zero_percentage <- length(zero_indices)/length(marker_exp)
105 | if(zero_percentage > 0.1 & zero_percentage<0.2){
106 | print("Warning: The marker expression potentially has too many zeros for fitting.
107 | GMM fitting will use input expression data with reduced sparsity")
108 | num_of_indices_to_remove <- floor(length(marker_exp)*(zero_percentage))
109 | marker_exp <- marker_exp[-zero_indices[1:num_of_indices_to_remove]]
110 | xxx <- mixmodCluster(marker_exp,2,
111 | models=mixmodGaussianModel(family="general",
112 | listModels = "Gaussian_p_Lk_Ck",
113 | free.proportions = FALSE,equal.proportions = TRUE))
114 | ### Check the models information for the Gaussian models, which shows which parameters are constrained.
115 | ### Want equal proportions of the two Gaussians
116 | GMM_marker_param[1,] <- xxx@results[[1]]@parameters@proportions
117 | GMM_marker_param[2,] <- xxx@results[[1]]@parameters@mean[,1]
118 | GMM_marker_param[3,1] <- xxx@results[[1]]@parameters@variance[[1]][,1]
119 | GMM_marker_param[3,2] <- xxx@results[[1]]@parameters@variance[[2]][,1]
120 | }else if(zero_percentage >= 0.2 & zero_percentage<0.5){
121 | print("Warning: The marker expression potentially has too many zeros for fitting.
122 | GMM fitting will use input expression data with reduced sparsity")
123 | num_of_indices_to_remove <- floor(length(marker_exp)*(zero_percentage - 0.05))
124 | marker_exp <- marker_exp[-zero_indices[1:num_of_indices_to_remove]]
125 | xxx <- mixmodCluster(marker_exp,2,
126 | models=mixmodGaussianModel(family="general",
127 | listModels = "Gaussian_p_Lk_Ck",
128 | free.proportions = FALSE,equal.proportions = TRUE))
129 | ### Check the models information for the Gaussian models, which shows which parameters are constrained.
130 | ### Want equal proportions of the two Gaussians
131 | GMM_marker_param[1,] <- xxx@results[[1]]@parameters@proportions
132 | GMM_marker_param[2,] <- xxx@results[[1]]@parameters@mean[,1]
133 | GMM_marker_param[3,1] <- xxx@results[[1]]@parameters@variance[[1]][,1]
134 | GMM_marker_param[3,2] <- xxx@results[[1]]@parameters@variance[[2]][,1]
135 | }else if(zero_percentage>=0.5 & zero_percentage<=0.9){
136 | print("Warning: The marker expression potentially has too many zeros for fitting.
137 | GMM fitting will use input expression data with reduced sparsity")
138 | num_of_indices_to_remove <- ceiling(length(marker_exp)*(zero_percentage-0.02))
139 | marker_exp <- marker_exp[-zero_indices[1:num_of_indices_to_remove]]
140 | xxx <- mixmodCluster(marker_exp,2,
141 | models=mixmodGaussianModel(family="general",
142 | listModels = "Gaussian_p_Lk_Ck",
143 | free.proportions = FALSE,equal.proportions = TRUE))
144 | ### Check the models information for the Gaussian models, which shows which parameters are constrained.
145 | ### Want equal proportions of the two Gaussians
146 | GMM_marker_param[1,] <- xxx@results[[1]]@parameters@proportions
147 | GMM_marker_param[2,] <- xxx@results[[1]]@parameters@mean[,1]
148 | GMM_marker_param[3,1] <- xxx@results[[1]]@parameters@variance[[1]][,1]
149 | GMM_marker_param[3,2] <- xxx@results[[1]]@parameters@variance[[2]][,1]
150 | }else if(zero_percentage>=0.9){
151 | print("Warning: The marker expression potentially has too many zeros for fitting.
152 | GMM fitting will use input expression data with reduced sparsity")
153 | marker_exp <- marker_exp[-zero_indices]
154 | xxx <- mixmodCluster(marker_exp,2,
155 | models=mixmodGaussianModel(family="general",
156 | listModels = "Gaussian_p_Lk_Ck",
157 | free.proportions = FALSE,equal.proportions = TRUE))
158 | ### Check the models information for the Gaussian models, which shows which parameters are constrained.
159 | ### Want equal proportions of the two Gaussians
160 | GMM_marker_param[1,] <- xxx@results[[1]]@parameters@proportions
161 | GMM_marker_param[2,] <- xxx@results[[1]]@parameters@mean[,1]
162 | GMM_marker_param[3,1] <- xxx@results[[1]]@parameters@variance[[1]][,1]
163 | GMM_marker_param[3,2] <- xxx@results[[1]]@parameters@variance[[2]][,1]
164 | }else{
165 | xxx <- mixmodCluster(marker_exp,2,
166 | models=mixmodGaussianModel(family="general",
167 | listModels = "Gaussian_p_Lk_Ck",
168 | free.proportions = FALSE,equal.proportions = TRUE))
169 | ### Check the models information for the Gaussian models, which shows which parameters are constrained.
170 | ### Want equal proportions of the two Gaussians
171 | GMM_marker_param[1,] <- xxx@results[[1]]@parameters@proportions
172 | GMM_marker_param[2,] <- xxx@results[[1]]@parameters@mean[,1]
173 | GMM_marker_param[3,1] <- xxx@results[[1]]@parameters@variance[[1]][,1]
174 | GMM_marker_param[3,2] <- xxx@results[[1]]@parameters@variance[[2]][,1]
175 | }
176 | #print(GMM_marker_param)
177 | if(figure == TRUE){
178 | bin_size <- 20
179 | filename <- paste0(marker_name,"_GMM.png")
180 | png(filename,width = 5.5, height = 6.5,units = 'in',res = 300)
181 | h<-hist(marker_exp,breaks=bin_size,xlab="Marker expression",main=paste0("Histogram for ",marker_name))
182 | highestCount <- max(h$counts)
183 | multiplier <- h$counts/h$density
184 | xfit <- seq(min(marker_exp),max(marker_exp),length=length(h$breaks))
185 | yfit1 <- dnorm(xfit,mean=GMM_marker_param[2,1],sd=sqrt(GMM_marker_param[3,1]))*multiplier[1]
186 | lines(xfit, yfit1, col="blue", lwd=2)
187 | yfit2 <- dnorm(xfit,mean=GMM_marker_param[2,2],sd=sqrt(GMM_marker_param[3,2]))*multiplier[1]
188 | lines(xfit, yfit2, col="red", lwd=2)
189 | dev.off()
190 | }
191 | return(GMM_marker_param)
192 | }
193 | #############################################################################################
194 | #' Build sigmoid function for calculation of expression probability
195 | #' @export
196 | build_sigmoid_function <- function(marker_exp_matrix,figure=FALSE){
197 | sigmoid_function_parameter <- matrix(nrow=2,ncol=dim(marker_exp_matrix)[2])
198 | ### For each marker, fit GMM
199 | for(i in 1:dim(marker_exp_matrix)[2]){
200 | marker_exp <- marker_exp_matrix[,i]
201 | marker_name <- colnames(marker_exp_matrix)[i]
202 | if(typeof(marker_name) != "character"){
203 | print("Protein marker name in the marker expression matrix has potential problem.")
204 | }else{
205 | marker_GMM_model <- GMM_fitting(marker_exp,marker_name,figure)
206 | weight <- marker_GMM_model[1,]
207 | mus <- marker_GMM_model[2,]
208 | sigmas <- marker_GMM_model[3,]
209 |
210 | if(mus[1] > mus[2]){ # first Gaussian model is for marker expressed, second is for marker not expressed
211 | a <- (-0.5 / sigmas[2] + 0.5 /sigmas[1])
212 | b <- mus[2] / sigmas[2] - mus[1] / sigmas[1]
213 | c <- 0.5 * (-mus[2]^2 / sigmas[2] + mus[1]^2 / sigmas[1]) + log(weight[2] / weight[1]) + 0.5 * log(sigmas[1] / sigmas[2])
214 | xroot <- (-b - sqrt(b^2 - 4.0 * a * c) ) / (2.0 * a)
215 | #slope <- 0.5 * (xroot - mus[2]) / sigmas[2] - 0.5 * (xroot - mus[1]) / sigmas[1]
216 | slope <- 1
217 | }else{# second Gaussian model is for marker expressed, first is for marker not expressed
218 | a <- (-0.5 / sigmas[1] + 0.5 /sigmas[2])
219 | b <- mus[1] / sigmas[1] - mus[2] / sigmas[2]
220 | c <- 0.5 * (-mus[1]^2 / sigmas[1] + mus[2]^2 / sigmas[2]) + log(weight[1] / weight[2]) + 0.5 * log(sigmas[2] / sigmas[1])
221 | xroot <- (-b - sqrt(b^2 - 4.0 * a * c) ) / (2.0 * a)
222 | #slope <- 0.5 * (xroot - mus[1]) / sigmas[1] - 0.5 * (xroot - mus[2]) / sigmas[2]
223 | slope <- 1
224 | }
225 | if(figure==TRUE){
226 | filename <- paste0(marker_name,"_sigmoid.png")
227 | ### plot sigmoid function
228 | exp_term <- exp(slope*(marker_exp-xroot))
229 | yyy <- exp_term/(1+exp_term)
230 | yyy <- (yyy-min(yyy))/(max(yyy)-min(yyy))
231 | png(filename,width = 4.5, height = 4.5,units = 'in',res = 300)
232 | plot(marker_exp, yyy, col = "darkblue",
233 | xlab = "", ylab = "", main = paste0(marker_name," sigmoid function"))
234 | grid()
235 | dev.off()
236 | }
237 | sigmoid_function_parameter[1,i] <- xroot
238 | sigmoid_function_parameter[2,i] <- slope
239 | }
240 | }
241 | return(sigmoid_function_parameter)
242 | }
243 | #############################################################################################
244 | #' Calculate expression probability for each marker in the prior matrix
245 | #' @export
246 | marker_exp_probability <- function(CelestaObj,figure=FALSE){
247 | ### Fit GMM model and get parameters for the activation probabilities
248 | marker_exp_matrix <- CelestaObj@marker_exp_matrix
249 | sigmoid_function_parameter <- build_sigmoid_function(marker_exp_matrix,figure)
250 | ### Marker activation probability matrix
251 | marker_exp_prob <- matrix(nrow=dim(marker_exp_matrix)[1],ncol=dim(marker_exp_matrix)[2])
252 | colnames(marker_exp_prob) <- colnames(marker_exp_matrix)
253 |
254 | for(i in 1:dim(marker_exp_matrix)[2]){
255 | exp_term <- exp(sigmoid_function_parameter[2,i]*(marker_exp_matrix[,i]-sigmoid_function_parameter[1,i]))
256 | y = exp_term/(1+exp_term)
257 | marker_exp_prob[,i] <- (y-min(y))/(max(y)-min(y))
258 | }
259 | CelestaObj@marker_exp_prob <- marker_exp_prob
260 | return(CelestaObj)
261 | }
262 | #############################################################################################
263 | #' Get neighborhood informtion
264 | #' @export
265 | GetNeighborInfo <- function(CelestaObj,number_of_neighbors=5,bandwidth=100){
266 | coords <- CelestaObj@coords
267 | print("Get nearest neighbors.")
268 | xxx <- knearneigh(coords,k=number_of_neighbors)
269 | nb_list <- xxx$nn
270 | colnames(nb_list) <- paste0("neighbor",seq(1,number_of_neighbors,by=1))
271 | ### Identify N-nearest neighbors for each cell
272 | CelestaObj@nb_list <- nb_list
273 | ### Identify cells within a circle bandwidth
274 | print("Identify neighboring cells within a defined bandwidth.")
275 | all_cell_nb_in_bandwidth <- dnearneigh(coords, 0, bandwidth, longlat = NULL)
276 | CelestaObj@cell_nb_in_bandwidth <- all_cell_nb_in_bandwidth
277 | ### Identify distances for all the cells within the circle
278 | CelestaObj@cell_nb_dist <- nbdists(all_cell_nb_in_bandwidth, coords)
279 | return(CelestaObj)
280 | }
281 | ##############################################################################
282 | #' Initialize the celesta object
283 | #' @export
284 | initialize_object <- function(CelestaObj){
285 | total_rounds <- max(CelestaObj@lineage_info$Round)
286 | current_cell_type_assignment <- matrix(0L,nrow =dim(CelestaObj@marker_exp_matrix)[1],
287 | ncol=total_rounds)
288 | CelestaObj@current_cell_type_assignment <- current_cell_type_assignment
289 | CelestaObj@anchor_cell_type_assignment <- current_cell_type_assignment
290 | CelestaObj@starting_cell_type_assignment <- current_cell_type_assignment
291 |
292 | current_scoring_matrix <- matrix(nrow=dim(CelestaObj@marker_exp_matrix)[1],
293 | ncol = dim(CelestaObj@prior_info)[1])
294 | colnames(current_scoring_matrix) <- CelestaObj@prior_info[,1]
295 | CelestaObj@current_scoring_matrix <- current_scoring_matrix
296 |
297 | current_cell_prob <- matrix(nrow=dim(CelestaObj@marker_exp_matrix)[1],
298 | ncol = dim(CelestaObj@prior_info)[1])
299 | colnames(current_cell_prob) <- CelestaObj@prior_info[,1]
300 | CelestaObj@current_cell_prob <- current_cell_prob
301 | return(CelestaObj)
302 | }
303 | #############################################################################################
304 | #' Create CELESTA object
305 | #' @export
306 | CreateCELESTAobj <- function(project_title="Project",prior_marker_info,imaging_data_file,
307 | cofactor=10,transform_type=1,
308 | number_of_neighbors=5,bandwidth=100){
309 | CelestaObj <- Celesta(project_name = project_title)
310 | ### Get protein marker expressions and cell IDs
311 | CelestaObj <- GetMarkerExpMatrix(CelestaObj,prior_marker_info,imaging_data_file,cofactor=10,
312 | transform_type = transform_type)
313 | ### Get user-defined prior knowledge matrix and cell lineage information
314 | CelestaObj <- GetPirorInfo(CelestaObj,prior_marker_info)
315 | ### Get coordinates
316 | CelestaObj <- GetCoords(CelestaObj,imaging_data_file)
317 | ### Convert marker expressions to marker activation probability
318 | CelestaObj <- marker_exp_probability(CelestaObj)
319 | ### Get neighboring cell information
320 | #CelestaObj <- GetNeighborInfo(CelestaObj,number_of_neighbors=5,bandwidth=100)
321 | CelestaObj <- GetNeighborInfo(CelestaObj)
322 | #Initialize the matrices for scoring function and prob matrix
323 | CelestaObj <- initialize_object(CelestaObj)
324 | return(CelestaObj)
325 | }
326 | #############################################################################################
327 | #############################################################################################
328 | #' Filter cells that could potentially be artifacts
329 | #' @export
330 | cell_filtering <- function(high_marker_threshold=0.9, low_marker_threshold=0.4,
331 | CelestaObj){
332 | ### Filter out cells that have marker expressions all high or all low
333 | total_rounds <- CelestaObj@total_rounds
334 | number_of_marker <- dim(CelestaObj@initial_pri_matrix)[2]
335 | for(i in 1:dim(CelestaObj@marker_exp_matrix)[1]){
336 | cell_activation_prob <- CelestaObj@marker_exp_prob[i,]
337 | if(MarkQuestionableCells(cell_activation_prob,high_marker_threshold,low_marker_threshold)){
338 | CelestaObj@current_cell_type_assignment[i,1:total_rounds] <- rep(NA,total_rounds)
339 | }else{
340 | }
341 | }
342 | CelestaObj@starting_cell_type_assignment <- CelestaObj@current_cell_type_assignment
343 | return(CelestaObj)
344 | }
345 | ################################################################################################
346 | #' Mark questionable cells
347 | #' @export
348 | MarkQuestionableCells <- function(cell_activation_prob,high_marker_threshold,low_marker_threshold){
349 | number_of_marker <- length(cell_activation_prob)
350 | number_of_low_markers <- length(which(cell_activation_probhigh_marker_threshold))
352 | if(number_of_low_markers==number_of_marker | number_of_high_markers==number_of_marker){
353 | return(TRUE)
354 | }else{
355 | return(FALSE)
356 | }
357 | }
358 | #############################################################################################
359 | #' For each round, need to get the prior knowledge on the cell types
360 | #' @export
361 | get_initial_prior_matrix <- function(CelestaObj,round){
362 | lineage_info <- CelestaObj@lineage_info
363 | cell_type_num <- lineage_info$Cell_type_number[which(lineage_info$Round==round)]
364 | initial_pri_matrix <- data.matrix(prior_marker_info[which(lineage_info$Round==round),
365 | 3:dim(prior_marker_info)[2]])
366 | CelestaObj@initial_pri_matrix <- initial_pri_matrix
367 | CelestaObj@current_pri_matrix <- initial_pri_matrix
368 | return(CelestaObj)
369 | }
370 | ################################################################################################
371 | ################################################################################################
372 | #' Find cells to check
373 | #' @export
374 | find_unassigned_cells <- function(CelestaObj,round){
375 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment
376 | lineage_info <- CelestaObj@lineage_info
377 | cell_ID <- CelestaObj@cell_ID
378 | if(round == 1){
379 | unassigned_cells <- cell_ID[which(current_cell_type_assignment[,round] == 0)]
380 | }else{
381 | previous_level_type <- unique(lineage_info$Previous_cell_type[which(lineage_info$Round==round)])
382 | previous_level_round <- lineage_info$Round[which(lineage_info$Cell_type_number==previous_level_type)]
383 | unassigned_cells <- cell_ID[which(current_cell_type_assignment[,round] == 0 &
384 | (current_cell_type_assignment[,previous_level_round]==previous_level_type))]
385 | }
386 | return(unassigned_cells)
387 | }
388 | ################################################################################################
389 | #' Find cells with ID assigned
390 | #' @export
391 | find_assigned_cells <- function(CelestaObj,round){
392 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment
393 | lineage_info <- CelestaObj@lineage_info
394 | cell_ID <- CelestaObj@cell_ID
395 | if(round == 1){
396 | assigned_cells <- cell_ID[which(current_cell_type_assignment[,round] != 0 &
397 | is.na(current_cell_type_assignment[,round])==FALSE)]
398 | }else{
399 | previous_level_type <- unique(lineage_info$Previous_cell_type[which(lineage_info$Round==round)])
400 | assigned_cells <- cell_ID[which(current_cell_type_assignment[,round] != 0 &
401 | is.na(current_cell_type_assignment[,round])==FALSE &
402 | (current_cell_type_assignment[,(round-1)]==previous_level_type))]
403 | }
404 | return(assigned_cells)
405 | }
406 | ################################################################################################
407 | #' Calculate scores using MSE
408 | #' @export
409 | get_score <- function(activation_prob_to_use,prior_info,non_NA_index){
410 | score <- apply(activation_prob_to_use[,non_NA_index],1,function(x) (1-sum((x-prior_info)^2)/length(x)))
411 | return(score)
412 | }
413 | #############################################################################################
414 | #' Function for calculating scoring function
415 | #' @export
416 | scoring_function <- function(CelestaObj,round,unassigned_cells,cell_type_num){
417 | marker_exp_prob <- CelestaObj@marker_exp_prob
418 | current_pri_matrix <- CelestaObj@current_pri_matrix
419 | current_scoring_matrix <- CelestaObj@current_scoring_matrix
420 | print("Start calculating the scoring function.")
421 | activation_prob_to_use <- marker_exp_prob[unassigned_cells,]
422 | for(i in 1:length(cell_type_num)){
423 | non_NA_index <- which(!is.na(current_pri_matrix[i,]))
424 | prior_info <- current_pri_matrix[i,non_NA_index]
425 | current_scoring_matrix[unassigned_cells,cell_type_num[i]] <- get_score(activation_prob_to_use,prior_info,non_NA_index)
426 | }
427 | current_scoring_matrix[unassigned_cells,cell_type_num]<-t(apply(current_scoring_matrix[unassigned_cells,cell_type_num],
428 | 1,function(x) x/sum(x)))
429 | CelestaObj@current_scoring_matrix <- current_scoring_matrix
430 | return(CelestaObj)
431 | }
432 | ################################################################################################
433 | ################################################################################################
434 | #' Calculate probability differences
435 | #' @export
436 | find_min_prob_diff <- function(max.prob,max.prob_index,cell_prob_list,unassigned_cells){
437 | ### max.prob, max.prob_index are calculated only on unassigned_cells
438 | ### but cell_prob_list has all the cells
439 | min_prob_diff <- numeric(length=length(unassigned_cells))
440 | for(i in 1:length(unassigned_cells)){
441 | min_prob_diff[i] <- min(max.prob[i]-cell_prob_list[unassigned_cells[i],-max.prob_index[i]])
442 | }
443 | return(min_prob_diff)
444 | }
445 | ################################################################################################
446 | #' Find the cell types based on the scores (anchor cell) or probabilities (index cell)
447 | #' @export
448 | cell_type <- function(CelestaObj,cell_type_num,unassigned_cells,round,
449 | min_difference=0,min_prob=0,
450 | high_marker_threshold,low_marker_threshold){
451 | all_cell_prob <- CelestaObj@current_cell_prob
452 | initial_pri_matrix <- CelestaObj@initial_pri_matrix
453 | cell_prob_list <- all_cell_prob[,cell_type_num]
454 | cell_type_assignment <- CelestaObj@current_cell_type_assignment[,round]
455 | marker_exp_prob <- CelestaObj@marker_exp_prob
456 | max.prob_index <- apply(cell_prob_list[unassigned_cells,],1,which.max)
457 | max.prob <- apply(cell_prob_list[unassigned_cells,],1,max)
458 | min_prob_diff <- find_min_prob_diff(max.prob,max.prob_index,cell_prob_list,unassigned_cells)
459 | ### Find cells with cell type max probability > threshold and cell type probability difference > threshold
460 | ########################################
461 | ### Indexing on unassigned_cells!!!!!!!!
462 | threshold_cells <- unassigned_cells[which(min_prob_diff > min_difference & max.prob > min_prob)]
463 | max.prob_index_thresholded <- max.prob_index[which(min_prob_diff > min_difference & max.prob > min_prob)]
464 | ########################################
465 | for(i in 1:length(threshold_cells)){
466 | cell_ID_to_check <- threshold_cells[i]
467 | high_marker_index <- which(initial_pri_matrix[max.prob_index_thresholded[i],]==1)
468 | low_marker_index <- which(initial_pri_matrix[max.prob_index_thresholded[i],]==0)
469 | threshold_index <- cell_type_num[max.prob_index_thresholded[i]]
470 | if(length(which(marker_exp_prob[cell_ID_to_check,high_marker_index]>=high_marker_threshold[threshold_index]))==length(high_marker_index) &
471 | length(which(marker_exp_prob[cell_ID_to_check,low_marker_index]<=low_marker_threshold[threshold_index]))==length(low_marker_index)){
472 | cell_type_assignment[cell_ID_to_check] <- cell_type_num[max.prob_index_thresholded[i]]
473 | }else{
474 | #cell_type_assignment[cell_ID_to_check] <- 0
475 | }
476 | }
477 | CelestaObj@current_cell_type_assignment[,round] <- cell_type_assignment
478 | return(CelestaObj)
479 | }
480 | ################################################################################################
481 | #' Cell type count
482 | #' @export
483 | count_cell_type <- function(CelestaObj,cell_type_num,round){
484 | cell_type_count <- matrix(nrow=(length(cell_type_num)),ncol=3)
485 | colnames(cell_type_count) <- c("cell_type_number","count","proportion")
486 | prior_marker_info <- CelestaObj@prior_info
487 | row.names(cell_type_count) <- prior_marker_info[cell_type_num,1]
488 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment
489 | cell_type_count[,1] <- cell_type_num
490 | total_cell_number <- dim(current_cell_type_assignment)[1]
491 | for(i in 1:length(cell_type_num)){
492 | cell_type_count[i,2] <- length(which(current_cell_type_assignment[,round]==cell_type_num[i]))
493 | cell_type <- prior_marker_info[cell_type_num[i],1]
494 | if(cell_type_count[i,2]<1){
495 | print(paste0("Too few cells identified for: ",cell_type))
496 | print("Please consider relax threshold.")
497 | }
498 | }
499 | cell_type_count[,3] <- cell_type_count[,2]/total_cell_number
500 | return(cell_type_count)
501 | }
502 | ################################################################################################
503 | # plot_cells_iteration <- function(CelestaObj,cell_number_to_use,round,
504 | # cell_type_colors,point_size=0.1,iteration,figure = FALSE){
505 | # if(figure==TRUE){
506 | # coords <- CelestaObj@coords
507 | # current_cell_type_assignment <- CelestaObj@current_cell_type_assignment[,round]
508 | # project_name <- CelestaObj@project_name
509 | # prior_marker_info <- CelestaObj@prior_info
510 | # cell_types <- prior_marker_info[,1]
511 | # x_min <- min(coords[,1])
512 | # x_max <- max(coords[,1])
513 | # y_min <- min(coords[,2])
514 | # y_max <- max(coords[,2])
515 | # range <- c(min(x_min,y_min),max(x_max,y_max))
516 | #
517 | # filename <- paste0(project_name,paste0(paste0("Round_",round),
518 | # paste0("_Iteration_",paste0(iteration,".png"))))
519 | # cell_index <- integer()
520 | # cell_anno <- character()
521 | # count <- 0
522 | # for(i in 1:length(cell_number_to_use)){
523 | # unassigned_cells <- which(current_cell_type_assignment == cell_number_to_use[i])
524 | # cell_index[(count+1):(count+length(unassigned_cells))] <- unassigned_cells
525 | # cell_anno[(count+1):(count+length(unassigned_cells))] <- cell_types[cell_number_to_use[i]]
526 | # count <- count + length(unassigned_cells)
527 | # }
528 | # df_plot <- data.frame(x=coords[cell_index,1],
529 | # y=coords[cell_index,2],
530 | # cell_anno=cell_anno)
531 | # df_plot$cell_anno <- factor(df_plot$cell_anno,levels = c(cell_types[cell_number_to_use]))
532 | # color_plot <- cell_type_colors[cell_number_to_use]
533 | #
534 | # g<- ggplot(df_plot,aes(x=x,y=y,group=cell_anno))+geom_point(aes(color=cell_anno),size=point_size)+
535 | # scale_color_manual(values=color_plot)+
536 | # xlim(range[1],range[2])+ylim(range[1],range[2])+
537 | # labs(main="")+theme(aspect.ratio = 1,panel.grid.major = element_blank(),
538 | # panel.grid.minor = element_blank(),
539 | # legend.title = element_blank(),
540 | # legend.text=element_text(size=12,face = "bold"),
541 | # panel.background = element_rect(fill = 'black'),
542 | # axis.line = element_line(colour = "black"),
543 | # axis.title.x=element_blank(),
544 | # axis.title.y=element_blank())+
545 | # guides(colour = guide_legend(override.aes = list(size=10)))
546 | # ggsave(filename,plot=g,width = 16.5, height = 16,units = 'in',dpi = 300)
547 | # }
548 | # }
549 | ################################################################################################
550 | #' Find the cell types of the neighbors for unassigned_cells
551 | #' @export
552 | neighbor_cell_type <- function(CelestaObj,cell_type_num,round,unassigned_cells){
553 | ### Only has information for cells to check
554 | nb_list <- CelestaObj@nb_list
555 | cell_type_assignment <- CelestaObj@current_cell_type_assignment[,round]
556 | same_type_nb <- matrix(rep(list(),length(cell_type_num)*length(unassigned_cells)),
557 | nrow=length(unassigned_cells),ncol=length(cell_type_num))
558 | row.names(same_type_nb) <- unassigned_cells
559 | colnames(same_type_nb) <- cell_type_num
560 | for(j in 1:length(unassigned_cells)){
561 | current_cell_ID <- unassigned_cells[j]
562 | neighbors <- nb_list[current_cell_ID,]
563 | neighbor_types <- cell_type_assignment[neighbors]
564 | for(i in 1:length(cell_type_num)){
565 | same_type_nb[j,i][[1]] <- neighbors[which(neighbor_types == cell_type_num[i])]
566 | }
567 | }
568 | CelestaObj@nb_cell_type <- same_type_nb
569 | return(CelestaObj)
570 | }
571 | ################################################################################################
572 | ################################################################################################
573 | #' Get distance from nearest assigned cells
574 | #' @export
575 | get_dist_from_nearest_assigned_cells <- function(CelestaObj,cell_type_num,unassigned_cells,
576 | assigned_cells,round){
577 | print("Get distance from nearest assigned cells.")
578 | all_cell_nb_in_circle <- CelestaObj@cell_nb_in_bandwidth
579 | all_cell_nb_circle_dist <- CelestaObj@cell_nb_dist
580 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment
581 | dist_nearest_assigned_cell <- matrix(nrow = length(unassigned_cells),
582 | ncol = length(cell_type_num))
583 | colnames(dist_nearest_assigned_cell) <- cell_type_num
584 | for(i in 1:dim(dist_nearest_assigned_cell)[1]){
585 | cell_to_check <- unassigned_cells[i]
586 | matching <- match(all_cell_nb_in_circle[[cell_to_check]],assigned_cells)
587 | index <- matching[which(is.na(matching)==FALSE)]
588 | if(length(index)==0){
589 |
590 | }else{
591 | nb_cell_with_ID <- assigned_cells[index]
592 | nb_cell_type <- current_cell_type_assignment[nb_cell_with_ID,round]
593 | unique_nb_cell_type <- unique(nb_cell_type)
594 | nb_cell_dist <- all_cell_nb_circle_dist[[cell_to_check]][which(is.na(matching)==FALSE)]
595 | for(j in 1:length(unique_nb_cell_type)){
596 | type_j <- which(nb_cell_type == unique_nb_cell_type[j])
597 | dist_nearest_assigned_cell[i,which(cell_type_num==unique_nb_cell_type[j])] <- min(nb_cell_dist[type_j])
598 | }
599 | }
600 | }
601 | CelestaObj@dist_from_nearest_assigned_cell <- dist_nearest_assigned_cell
602 | return(CelestaObj)
603 | }
604 | #############################################################################################
605 | #' Function to calcualte beta
606 | #' @export
607 | calculate_beta <- function(CelestaObj,scale_factor=5,bandwidth=100){
608 | dist_from_nearest_assigned_cell <- CelestaObj@dist_from_nearest_assigned_cell
609 | beta <- scale_factor*(1-dist_from_nearest_assigned_cell/bandwidth)
610 | beta[is.na(beta)] <- 0
611 | CelestaObj@current_beta <- beta
612 | return(CelestaObj)
613 | }
614 | ################################################################################################
615 | #' Function to calculate probability for index cells
616 | #' @export
617 | cell_prob <- function(CelestaObj,cell_type_num,unassigned_cells,round){
618 | # This function uses mean field estimation to calculate probability
619 | # For each cell, a probability is calculated for each cell type to check
620 | current_cell_prob_list <- CelestaObj@current_cell_prob[,cell_type_num] #all cells*cell_type_num
621 | u <- current_cell_prob_list #all cells
622 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment[,round] # all cells
623 | current_beta <- CelestaObj@current_beta #cells to check
624 | nb_cell_type <- CelestaObj@nb_cell_type #cells to check
625 | current_scoring_matrix <- CelestaObj@current_scoring_matrix # all cells*all cell types
626 | for(i in 1:length(unassigned_cells)){
627 | cell_ID_to_check <- unassigned_cells[i]
628 | u_i <- numeric(length=length(cell_type_num))
629 | number_of_nb <- lengths(nb_cell_type[i,])
630 | for(j in 1:length(number_of_nb)){
631 | current_same_type_nb <- unlist(nb_cell_type[i,j][[1]])
632 | u_i[j] <- exp(current_scoring_matrix[cell_ID_to_check,cell_type_num[j]])*
633 | exp(current_beta[i,j]*
634 | sum(current_cell_prob_list[current_same_type_nb,j]))
635 | }
636 | u[cell_ID_to_check,] <- u_i/sum(u_i)
637 | }
638 | print("Cell probability updating done.")
639 | CelestaObj@current_cell_prob[,cell_type_num] <- u
640 | return(CelestaObj)
641 | }
642 | ################################################################################################
643 | #' Function to update prior knowledge matrix of the cell type signatures
644 | #' @export
645 | update_prior_matrix <- function(CelestaObj,round,cell_type_num){
646 | updated_prior_matrix <- CelestaObj@current_pri_matrix
647 | initial_pri_matrix_data <- CelestaObj@initial_pri_matrix
648 | current_pri_matrix_data <- CelestaObj@current_pri_matrix
649 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment
650 | all_marker_pro_matrix <- CelestaObj@marker_exp_prob
651 | for(i in 1:length(cell_type_num)){ ### for each cell type
652 | cell_type_to_check <- cell_type_num[i]
653 | for(j in 1:dim(current_pri_matrix_data)[2]){
654 | if(is.na(initial_pri_matrix_data[i,j])==TRUE){
655 | }else{
656 | cells_of_current_cell_type <- which(current_cell_type_assignment[,round] == cell_type_to_check)
657 | updated_prior_matrix[i,j] <- (mean(all_marker_pro_matrix[cells_of_current_cell_type,j])+
658 | initial_pri_matrix_data[i,j])/2
659 | }
660 | }
661 | }
662 | CelestaObj@current_pri_matrix <- updated_prior_matrix
663 | return(CelestaObj)
664 | }
665 | ##############################################################################
666 | ### For different rounds
667 | # plot_marker_exp <- function(CelestaObj,cell_type_colors=c(palette()[2:7],"white"),
668 | # cell_type_num,round){
669 | # sample_name <- CelestaObj@project_name
670 | # current_cell_type_assignment <- CelestaObj@current_cell_type_assignment
671 | # marker_exp_matrix <- CelestaObj@marker_exp_matrix
672 | # plot_matrix <- matrix(0L,nrow=length(cell_type_num),ncol=dim(marker_exp_matrix)[2])
673 | # row.names(plot_matrix) <- as.character(CelestaObj@prior_info[cell_type_num,1])
674 | # colnames(plot_matrix) <- colnames(marker_exp_matrix)
675 | # for(i in 1:length(cell_type_num)){
676 | # plot_matrix[i,] <- colMeans(marker_exp_matrix[which(current_cell_type_assignment[,round] == cell_type_num[i]),])
677 | # }
678 | # df <- as.data.frame(cbind(row.names(plot_matrix),plot_matrix))
679 | # colnames(df) <- c("cell_types",colnames(marker_exp_matrix))
680 | # df.m <- melt(df, id.var = "cell_types")
681 | #
682 | # df.m$value <- as.numeric(df.m$value)
683 | # df.m$cell_types <- factor(df.m$cell_types,levels = row.names(plot_matrix))
684 | #
685 | # filename <- paste0(sample_name,"_")
686 | # filename1 <- paste0(filename,round)
687 | # filename2 <- paste0(filename1,"_ave_marker_exp.png")
688 | #
689 | # g<-ggplot(df.m,aes(x=variable,y=value,group=cell_types,color=cell_types)) + geom_point() + geom_line() +
690 | # scale_color_manual(values=cell_type_colors[cell_type_num])+xlab("Marker")+
691 | # ylab("Expression")+theme_bw()+
692 | # theme(legend.title = element_blank())+
693 | # theme(axis.text.x = element_text(angle = 80, hjust = 1,size=12,face="bold"),
694 | # legend.text=element_text(size=12,face = "bold"),
695 | # panel.grid.major = element_blank(), panel.grid.minor = element_blank())
696 | # ggsave(filename2,plot=g,width=13.5,height=9,units = 'in',dpi = 300)
697 | # }
698 | ################################################################################################
699 | ################################################################################################
700 | #' Get final results
701 | #' @export
702 | get_final_inferred_cell_types <- function(total_rounds,CelestaObj,imaging_data){
703 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment
704 | anchor_cell_assignment <- CelestaObj@anchor_cell_type_assignment
705 | cell_type_name_assigned <- matrix(nrow=dim(current_cell_type_assignment),ncol=total_rounds)
706 | anchor_cell_type_name_assigned <- matrix(nrow=dim(current_cell_type_assignment),ncol=total_rounds)
707 | prior_marker_info <- CelestaObj@prior_info
708 | lineage_info <- CelestaObj@lineage_info
709 | final_cell_type_assignment <- rep(0,length=dim(current_cell_type_assignment)[1])
710 | for(i in 1:total_rounds){
711 | current_pri_matrix_num <- i
712 | cell_type_name_assigned[,i] <- prior_marker_info[match(current_cell_type_assignment[,i],
713 | lineage_info$Cell_type_number),1]
714 | cell_type_name_assigned[which(current_cell_type_assignment[,i]==0),i] <- "Unknown"
715 | anchor_cell_type_name_assigned[,i] <- prior_marker_info[match(anchor_cell_assignment[,i],
716 | lineage_info$Cell_type_number),1]
717 | anchor_cell_type_name_assigned[which(anchor_cell_assignment[,i]==0),i] <- "Unknown"
718 | if(current_pri_matrix_num == 1){
719 | final_cell_type_assignment <- current_cell_type_assignment[,current_pri_matrix_num]
720 | }else{
721 | previous_level_type <- unique(lineage_info$Previous_cell_type[which(lineage_info$Round==current_pri_matrix_num)])
722 | assignment <- current_cell_type_assignment[which(final_cell_type_assignment==previous_level_type &
723 | current_cell_type_assignment[,current_pri_matrix_num]!=0),current_pri_matrix_num]
724 | final_cell_type_assignment[which(final_cell_type_assignment==previous_level_type &
725 | current_cell_type_assignment[,current_pri_matrix_num]!=0)] <- assignment
726 | }
727 | }
728 | final_cell_names <- character(length=dim(current_cell_type_assignment)[1])
729 | final_cell_names <- prior_marker_info[match(final_cell_type_assignment,lineage_info$Cell_type_number),1]
730 | final_cell_names[which(final_cell_type_assignment==0)] <- "Unknown"
731 | final_result <- cbind(cell_type_name_assigned,final_cell_type_assignment,final_cell_names)
732 | round_name <- paste("Round",seq(1,total_rounds,by=1))
733 | colnames(final_result) <- c(round_name,"Cell type number","Final cell type")
734 | filename <- paste0(CelestaObj@project_name,"_final_cell_type_assignment.csv")
735 | write.csv(cbind(final_result,imaging_data),file=filename,row.names = FALSE)
736 | filename <- paste0(CelestaObj@project_name,"_anchor_cell_assignment.csv")
737 | write.csv(anchor_cell_type_name_assigned,file=filename)
738 | CelestaObj@final_cell_type_assignment <- final_result
739 | return(CelestaObj)
740 | }
741 | #############################################################################################
742 | #############################################################################################
743 | #' Plot the cells using XY coordinates
744 | #' @export
745 | plot_cells_any_combination <- function(cell_type_assignment_to_plot,CelestaObj,
746 | cell_number_to_use,
747 | cell_type_colors=c(palette()[2:7],"white"),
748 | test_size=1){
749 | ### Cannot plot more than 7 cell types
750 | current_cell_type_assignment <- cell_type_assignment_to_plot
751 | coords <- CelestaObj@coords
752 | cell_types <- CelestaObj@prior_info[cell_number_to_use,1]
753 | x_min <- min(coords[,1])
754 | x_max <- max(coords[,1])
755 | y_min <- min(coords[,2])
756 | y_max <- max(coords[,2])
757 | range <- c(min(x_min,y_min),max(x_max,y_max))
758 |
759 | cell_index <- integer()
760 | cell_anno <- character()
761 | count <- 0
762 | for(i in 1:length(cell_number_to_use)){
763 | unassigned_cells <- which(current_cell_type_assignment == cell_number_to_use[i])
764 | cell_index[(count+1):(count+length(unassigned_cells))] <- unassigned_cells
765 | cell_anno[(count+1):(count+length(unassigned_cells))] <- cell_types[i]
766 | count <- count + length(unassigned_cells)
767 | }
768 | df_plot <- data.frame(x=coords[cell_index,1],
769 | y=coords[cell_index,2],
770 | cell_anno=cell_anno)
771 | df_plot$cell_anno <- factor(df_plot$cell_anno,levels = c(cell_types))
772 | color_plot <- cell_type_colors[1:length(cell_number_to_use)]
773 |
774 | g<- ggplot(df_plot,aes(x=x,y=y,group=cell_anno))+geom_point(aes(color=cell_anno),size=test_size)+
775 | scale_color_manual(values=color_plot)+
776 | xlim(range[1],range[2])+ylim(range[1],range[2])+
777 | labs(main="")+theme(aspect.ratio = 1,panel.grid.major = element_blank(),
778 | panel.grid.minor = element_blank(),
779 | legend.title = element_blank(),
780 | panel.background = element_rect(fill = 'black'),
781 | axis.line = element_line(colour = "black"),
782 | axis.title.x=element_blank(),
783 | axis.title.y=element_blank(),
784 | legend.text = element_text(size=12,face="bold") )+
785 | guides(colour = guide_legend(override.aes = list(size=5)))
786 | ggsave(filename="plot_cell_assignment.png",plot=g,width = 12, height = 12,units = 'in',dpi = 300)
787 | }
788 | #############################################################################################
789 | #' Plot the expression probabilities of cells in the tissue
790 | #' @export
791 | plot_exp_prob <- function(CelestaObj,size_to_use=1,width_to_use=5,height_to_use=4){
792 | coords <- CelestaObj@coords
793 | marker_exp_prob <- CelestaObj@marker_exp_prob
794 | prior_marker_info <- CelestaObj@prior_info
795 | palette <- colorRampPalette(colors=c("white", "blue4"))
796 | cols <- palette(6)
797 | #plot(1:6, col=cols, pch=16, cex=3)
798 |
799 | markers_to_check <- as.character(colnames(prior_marker_info)[3:dim(prior_marker_info)[2]])
800 | for(i in 1:length(markers_to_check)){
801 | marker_to_use <- markers_to_check[i]
802 | marker_exp_prob_to_use <- marker_exp_prob[,which(colnames(marker_exp_prob)==marker_to_use)]
803 | cols_anno <- character(length=length(marker_exp_prob_to_use))
804 | cols_anno[which(marker_exp_prob_to_use>0.9)] <- ">0.9"
805 | cols_anno[which(marker_exp_prob_to_use>0.8 & marker_exp_prob_to_use<=0.9)] <- ">0.8"
806 | cols_anno[which(marker_exp_prob_to_use>0.7 & marker_exp_prob_to_use<=0.8)] <- ">0.7"
807 | cols_anno[which(marker_exp_prob_to_use>0.5 & marker_exp_prob_to_use<=0.7)] <- ">0.5"
808 | cols_anno[which(marker_exp_prob_to_use<=0.5)] <- "<=0.5"
809 |
810 | mca <- data.frame(Coords_1 = round(coords[,1],digits = 2),
811 | Coords_2 = round(coords[,2],digits = 2),
812 | Exp_quantile = round(marker_exp_prob_to_use,digits = 2),
813 | Col_anno=cols_anno)
814 | row.names(mca) <- NULL
815 | colnames(mca) <- c("X","Y","Expression","Color_anno")
816 | mca$Color_anno <- factor(mca$Color_anno,levels=c("<=0.5",">0.5",">0.7",">0.8",">0.9"))
817 |
818 | x_min <- min(coords[,1])
819 | x_max <- max(coords[,1])
820 | y_min <- min(coords[,2])
821 | y_max <- max(coords[,2])
822 | range <- c(min(x_min,y_min),max(x_max,y_max))
823 |
824 | filename <- paste0(marker_to_use,"_exp_prob.png")
825 | g <- ggplot(mca,aes(x=X,y=Y,color=Color_anno)) +
826 | xlim(range[1],range[2])+ylim(range[1],range[2])+
827 | geom_point(shape=20,size=size_to_use) +
828 | ggtitle(marker_to_use)+theme_bw()+
829 | scale_colour_manual(values=c(cols[1],cols[2],cols[3],cols[4],cols[6]))+
830 | #scale_colour_manual(values=cols)+
831 | theme(legend.title = element_blank(),
832 | legend.text = element_text(size=14),
833 | panel.grid.major = element_blank(),
834 | panel.grid.minor = element_blank(),
835 | plot.title = element_text(hjust = 0.5,size=15,face="bold"))+
836 | guides(colour = guide_legend(override.aes = list(size=10)))
837 | ggsave(filename,plot=g,width=width_to_use,height=height_to_use,units = 'in',dpi = 300)
838 | }
839 | }
840 | #############################################################################################
841 | #' Function to assign cell types through iterations
842 | #' @export
843 | assign_cell_main <- function(CelestaObj,max_iteration=10,cell_change_threshold=0.01,
844 | min_diff=0,min_probability=0,
845 | high_marker_threshold_anchor=rep(0.7,length=50),
846 | low_marker_threshold_anchor=rep(0.9,length=50),
847 | high_marker_threshold_iteration=rep(0.5,length=50),
848 | low_marker_threshold_iteration=rep(1,length=50)){
849 | total_rounds <- CelestaObj@total_rounds
850 | ### This loop is the main part for cell type assignment
851 | ### Cell type assignment function (normally should finish within 10min for ~100k cells)
852 | ### It runs pretty fast for below 50k cells
853 | for(i in 1:total_rounds){
854 | round <- i
855 | CelestaObj@current_cell_type_assignment[,round] <- CelestaObj@starting_cell_type_assignment[,round]
856 |
857 | current_number_of_cells_changed <- numeric()
858 | loglikelihood <- numeric()
859 | lineage_info <- CelestaObj@lineage_info
860 | cell_type_num <- lineage_info$Cell_type_number[which(lineage_info$Round==round)]
861 | CelestaObj <- get_initial_prior_matrix(CelestaObj,round)
862 | unassigned_cells <- find_unassigned_cells(CelestaObj,round)
863 | number_of_cells_to_find_identity <- length(unassigned_cells)
864 | print(number_of_cells_to_find_identity)
865 | ### Get scoring function
866 | CelestaObj <- scoring_function(CelestaObj,round,unassigned_cells,cell_type_num)
867 | ### Initialize the cell probability with initial scores
868 | CelestaObj@current_cell_prob <- CelestaObj@current_scoring_matrix
869 | ### Assign anchor cells
870 | old_cell_assignment <- CelestaObj@current_cell_type_assignment[,round]
871 | CelestaObj <- cell_type(CelestaObj,cell_type_num,unassigned_cells,round,
872 | min_difference=min_diff,
873 | min_prob=min_probability,
874 | high_marker_threshold=high_marker_threshold_anchor,
875 | low_marker_threshold=low_marker_threshold_anchor)
876 | #
877 | iteration <- 1
878 | CelestaObj@anchor_cell_type_assignment[,round] <- CelestaObj@current_cell_type_assignment[,round]
879 | print(cell_type_count <- count_cell_type(CelestaObj,cell_type_num,round))
880 | if(length(which(cell_type_count[,2]<1))==length(cell_type_num)){
881 | print("Too few cells identified for certain cell type, please consider relaxing threshold.")
882 | return(CelestaObj)
883 | break
884 | }
885 | current_number_of_cells_changed[iteration] <- 1
886 | #############
887 | ### Find cells to check
888 | unassigned_cells <- find_unassigned_cells(CelestaObj,round)
889 | assigned_cells <- find_assigned_cells(CelestaObj,round)
890 | #############
891 | ### Calculate beta
892 | CelestaObj <- neighbor_cell_type(CelestaObj,cell_type_num,round,unassigned_cells)
893 | CelestaObj <- get_dist_from_nearest_assigned_cells(CelestaObj,cell_type_num,
894 | unassigned_cells,assigned_cells,round)
895 | CelestaObj <- calculate_beta(CelestaObj,scale_factor = 5,bandwidth = 100)
896 | ### Iterative cell type assignment
897 | while(iteration < max_iteration & current_number_of_cells_changed[iteration] > cell_change_threshold){
898 | iteration <- iteration + 1
899 | ### Calculate cell type probabilities
900 | CelestaObj <- cell_prob(CelestaObj, cell_type_num,unassigned_cells,round)
901 | old_cell_assignment <- CelestaObj@current_cell_type_assignment[,round]
902 | ### Update cell type assignment
903 | CelestaObj <- cell_type(CelestaObj,cell_type_num,unassigned_cells,round,
904 | min_difference=min_diff,
905 | min_prob=min_probability,
906 | high_marker_threshold=high_marker_threshold_iteration,
907 | low_marker_threshold=low_marker_threshold_iteration)
908 | print(cell_type_count <- count_cell_type(CelestaObj,cell_type_num,round))
909 | current_number_of_cells_changed[iteration] <- length(which((old_cell_assignment-CelestaObj@current_cell_type_assignment[,round])!=0))/number_of_cells_to_find_identity
910 | print(current_number_of_cells_changed[iteration])
911 | if(current_number_of_cells_changed[iteration] < cell_change_threshold){
912 | break
913 | }
914 | #############
915 | ### Find cells to check
916 | unassigned_cells <- find_unassigned_cells(CelestaObj,round)
917 | assigned_cells <- find_assigned_cells(CelestaObj,round)
918 | if(length(unassigned_cells)==0){
919 | break
920 | }
921 | #############
922 | ### Calculate beta
923 | CelestaObj <- neighbor_cell_type(CelestaObj,cell_type_num,round,unassigned_cells)
924 | CelestaObj <- get_dist_from_nearest_assigned_cells(CelestaObj,cell_type_num,
925 | unassigned_cells,assigned_cells,round)
926 | CelestaObj <- calculate_beta(CelestaObj,scale_factor = 5,bandwidth = 100)
927 | ############
928 | ### Update prior cell-type marker matrix
929 | CelestaObj <- update_prior_matrix(CelestaObj,round,cell_type_num)
930 | ### Update scoring function
931 | CelestaObj <- scoring_function(CelestaObj,round,unassigned_cells,cell_type_num)
932 | }
933 | }
934 | CelestaObj <- get_final_inferred_cell_types(total_rounds,CelestaObj,imaging_data)
935 | return(CelestaObj)
936 | }
937 | #############################################################################################
938 | #############################################################################################
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(CELESTA)
3 |
4 | test_check("CELESTA")
--------------------------------------------------------------------------------
/tests/testthat/test-CELESTA_functions.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(dplyr)
3 | library(devtools)
4 | library(Rmixmod)
5 | library(spdep)
6 | library(ggplot2)
7 | library(reshape2)
8 | library(zeallot)
9 |
10 | # Load the original CELESTA functions
11 | # Note that some of the slot names were changed in the original to match
12 | # the new version for comparison.
13 | source("../CELESTA_functions_orig.R")
14 |
15 | # Load data
16 | load(file = "../../data/prior_marker_info.rda")
17 | load(file = "../../data/imaging_data.rda")
18 |
19 | compareCelesta <- function(actual, expected) {
20 | sapply(slotNames(actual), function(x) {
21 | tryCatch(
22 | {
23 | a <- slot(actual, x)
24 | e <- slot(expected, x)
25 |
26 | if (typeof(a) == "double") {
27 | expect_equal(e, a)
28 | } else {
29 | expect_identical(e, a)
30 | }
31 | },
32 | error = function(e) {
33 | # If there is not a matching slot (in the case where it was deleted),
34 | # assume that it is vacuously true that they are equal
35 | return(TRUE)
36 | }
37 | )
38 | })
39 | }
40 |
41 | test_that("CreateCelestaObject", {
42 | actual <- CELESTA::CreateCelestaObject(
43 | project_title = "project_title",
44 | prior_marker_info,
45 | imaging_data
46 | )
47 | expected <- CreateCELESTAobj(
48 | project_title = "project_title",
49 | prior_marker_info,
50 | imaging_data
51 | )
52 | compareCelesta(actual, expected)
53 | })
54 |
55 | test_that("FilterCells", {
56 | celesta_obj <- CELESTA::CreateCelestaObject(
57 | project_title = "project_title",
58 | prior_marker_info,
59 | imaging_data
60 | )
61 | CelestaObj <- CreateCELESTAobj(
62 | project_title = "project_title",
63 | prior_marker_info,
64 | imaging_data
65 | )
66 |
67 | actual <- CELESTA::FilterCells(celesta_obj,
68 | high_marker_threshold = 0.9,
69 | low_marker_threshold = 0.5
70 | )
71 | expected <- cell_filtering(
72 | high_marker_threshold = 0.9, low_marker_threshold = 0.5,
73 | CelestaObj
74 | )
75 | compareCelesta(actual, expected)
76 | })
77 |
78 | test_that("AssignCells", {
79 | celesta_obj <- CELESTA::CreateCelestaObject(
80 | project_title = "project_title",
81 | prior_marker_info,
82 | imaging_data
83 | )
84 | CelestaObj <- CreateCELESTAobj(
85 | project_title = "project_title",
86 | prior_marker_info,
87 | imaging_data
88 | )
89 |
90 | celesta_obj <- CELESTA::FilterCells(celesta_obj,
91 | high_marker_threshold = 0.9,
92 | low_marker_threshold = 0.5
93 | )
94 | CelestaObj <- cell_filtering(
95 | high_marker_threshold = 0.9, low_marker_threshold = 0.5,
96 | CelestaObj
97 | )
98 |
99 | actual <- CELESTA::AssignCells(celesta_obj,
100 | max_iteration = 10, cell_change_threshold = 0.01,
101 | high_expression_threshold_anchor = high_marker_threshold_anchor,
102 | low_expression_threshold_anchor = low_marker_threshold_anchor,
103 | high_expression_threshold_index = high_marker_threshold_iteration,
104 | low_expression_threshold_index = low_marker_threshold_iteration
105 | )
106 | expected <- assign_cell_main(CelestaObj,
107 | max_iteration = 10, cell_change_threshold = 0.01,
108 | high_marker_threshold_anchor = high_marker_threshold_anchor,
109 | low_marker_threshold_anchor = low_marker_threshold_anchor,
110 | high_marker_threshold_iteration = high_marker_threshold_iteration,
111 | low_marker_threshold_iteration = low_marker_threshold_iteration
112 | )
113 | compareCelesta(actual, expected)
114 | })
115 |
--------------------------------------------------------------------------------