├── .Rbuildignore ├── .gitignore ├── CELESTA.Rproj ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── R └── CELESTA_functions.R ├── README.Rmd ├── README.md ├── data ├── high_marker_threshold_anchor.rda ├── high_marker_threshold_iteration.rda ├── imaging_data.csv ├── imaging_data.rda ├── low_marker_threshold_anchor.rda ├── low_marker_threshold_iteration.rda ├── prior_marker_info.csv └── prior_marker_info.rda ├── images ├── CD31_threshold.png ├── Cytokeratin_threshold.png ├── aSMA_threshold.png ├── demo_image.png ├── high_threshold_example.png ├── low_threshold_example.png ├── plot_cell_assignment.png ├── prior_matrix_example.png └── segmented_file_example.png ├── man ├── AssignCellTypes.Rd ├── AssignCells.Rd ├── BuildSigmoidFunction.Rd ├── CalcMarkerActivationProbability.Rd ├── CalculateBeta.Rd ├── CalculateIndexCellProb.Rd ├── CalculateProbabilityDifference.Rd ├── CalculateScores.Rd ├── Celesta-class.Rd ├── CountCellType.Rd ├── CreateCelestaObject.Rd ├── FilterArtifactCells.Rd ├── FilterCells.Rd ├── FindCellsToCheck.Rd ├── FindCellsWithId.Rd ├── FitGmmModel.Rd ├── GetCoords.Rd ├── GetDistFromNearestAssignedCells.Rd ├── GetFinalInferredCellTypes.Rd ├── GetInitialPriorMatrix.Rd ├── GetMarkerExpMatrix.Rd ├── GetNeighborInfo.Rd ├── GetPriorInfo.Rd ├── GetScore.Rd ├── InitializeCellAndScoringMatrices.Rd ├── MarkQuestionableCells.Rd ├── NeighborCellType.Rd ├── PlotCellsAnyCombination.Rd ├── PlotExpProb.Rd ├── PlotSingleExpProb.Rd ├── UpdatePriorMatrix.Rd └── figures │ └── README-pressure-1.png └── tests ├── CELESTA_functions_orig.R ├── testthat.R └── testthat ├── project_title_anchor_cell_assignment.csv ├── project_title_final_cell_type_assignment.csv └── test-CELESTA_functions.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^CELESTA\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^README\.Rmd$ 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | -------------------------------------------------------------------------------- /CELESTA.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | Encoding: UTF-8 9 | 10 | AutoAppendNewline: Yes 11 | StripTrailingWhitespace: Yes 12 | LineEndingConversion: Posix 13 | 14 | BuildType: Package 15 | PackageUseDevtools: Yes 16 | PackageInstallArgs: --no-multiarch --with-keep.source 17 | PackageRoxygenize: rd,collate,namespace 18 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: CELESTA 2 | Title: Cell type identification with spatial information 3 | Version: 0.0.0.9000 4 | Authors@R: 5 | person(given = "Weiruo", 6 | family = "Zhang", 7 | role = c("aut", "cre"), 8 | email = "weiruo16@stanford.edu") 9 | Description: Automate machine learning cell type identification using both protein expressions and cell neighborhood information for multiplexed in situ imaging data. 10 | License: Apache License (>= 2) 11 | Encoding: UTF-8 12 | LazyData: true 13 | Roxygen: list(markdown = TRUE) 14 | RoxygenNote: 7.1.2 15 | Imports: 16 | Rmixmod, 17 | spdep, 18 | ggplot2, 19 | reshape2, 20 | zeallot 21 | Depends: 22 | R (>= 2.10) 23 | Suggests: 24 | testthat (>= 3.0.0) 25 | Config/testthat/edition: 3 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | ============== 3 | 4 | _Version 2.0, January 2004_ 5 | _<>_ 6 | 7 | ### Terms and Conditions for use, reproduction, and distribution 8 | 9 | #### 1. Definitions 10 | 11 | “License” shall mean the terms and conditions for use, reproduction, and 12 | distribution as defined by Sections 1 through 9 of this document. 13 | 14 | “Licensor” shall mean the copyright owner or entity authorized by the copyright 15 | owner that is granting the License. 16 | 17 | “Legal Entity” shall mean the union of the acting entity and all other entities 18 | that control, are controlled by, or are under common control with that entity. 19 | For the purposes of this definition, “control” means **(i)** the power, direct or 20 | indirect, to cause the direction or management of such entity, whether by 21 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the 22 | outstanding shares, or **(iii)** beneficial ownership of such entity. 23 | 24 | “You” (or “Your”) shall mean an individual or Legal Entity exercising 25 | permissions granted by this License. 26 | 27 | “Source” form shall mean the preferred form for making modifications, including 28 | but not limited to software source code, documentation source, and configuration 29 | files. 30 | 31 | “Object” form shall mean any form resulting from mechanical transformation or 32 | translation of a Source form, including but not limited to compiled object code, 33 | generated documentation, and conversions to other media types. 34 | 35 | “Work” shall mean the work of authorship, whether in Source or Object form, made 36 | available under the License, as indicated by a copyright notice that is included 37 | in or attached to the work (an example is provided in the Appendix below). 38 | 39 | “Derivative Works” shall mean any work, whether in Source or Object form, that 40 | is based on (or derived from) the Work and for which the editorial revisions, 41 | annotations, elaborations, or other modifications represent, as a whole, an 42 | original work of authorship. For the purposes of this License, Derivative Works 43 | shall not include works that remain separable from, or merely link (or bind by 44 | name) to the interfaces of, the Work and Derivative Works thereof. 45 | 46 | “Contribution” shall mean any work of authorship, including the original version 47 | of the Work and any modifications or additions to that Work or Derivative Works 48 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 49 | by the copyright owner or by an individual or Legal Entity authorized to submit 50 | on behalf of the copyright owner. For the purposes of this definition, 51 | “submitted” means any form of electronic, verbal, or written communication sent 52 | to the Licensor or its representatives, including but not limited to 53 | communication on electronic mailing lists, source code control systems, and 54 | issue tracking systems that are managed by, or on behalf of, the Licensor for 55 | the purpose of discussing and improving the Work, but excluding communication 56 | that is conspicuously marked or otherwise designated in writing by the copyright 57 | owner as “Not a Contribution.” 58 | 59 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf 60 | of whom a Contribution has been received by Licensor and subsequently 61 | incorporated within the Work. 62 | 63 | #### 2. Grant of Copyright License 64 | 65 | Subject to the terms and conditions of this License, each Contributor hereby 66 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 67 | irrevocable copyright license to reproduce, prepare Derivative Works of, 68 | publicly display, publicly perform, sublicense, and distribute the Work and such 69 | Derivative Works in Source or Object form. 70 | 71 | #### 3. Grant of Patent License 72 | 73 | Subject to the terms and conditions of this License, each Contributor hereby 74 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 75 | irrevocable (except as stated in this section) patent license to make, have 76 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 77 | such license applies only to those patent claims licensable by such Contributor 78 | that are necessarily infringed by their Contribution(s) alone or by combination 79 | of their Contribution(s) with the Work to which such Contribution(s) was 80 | submitted. If You institute patent litigation against any entity (including a 81 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 82 | Contribution incorporated within the Work constitutes direct or contributory 83 | patent infringement, then any patent licenses granted to You under this License 84 | for that Work shall terminate as of the date such litigation is filed. 85 | 86 | #### 4. Redistribution 87 | 88 | You may reproduce and distribute copies of the Work or Derivative Works thereof 89 | in any medium, with or without modifications, and in Source or Object form, 90 | provided that You meet the following conditions: 91 | 92 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of 93 | this License; and 94 | * **(b)** You must cause any modified files to carry prominent notices stating that You 95 | changed the files; and 96 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute, 97 | all copyright, patent, trademark, and attribution notices from the Source form 98 | of the Work, excluding those notices that do not pertain to any part of the 99 | Derivative Works; and 100 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any 101 | Derivative Works that You distribute must include a readable copy of the 102 | attribution notices contained within such NOTICE file, excluding those notices 103 | that do not pertain to any part of the Derivative Works, in at least one of the 104 | following places: within a NOTICE text file distributed as part of the 105 | Derivative Works; within the Source form or documentation, if provided along 106 | with the Derivative Works; or, within a display generated by the Derivative 107 | Works, if and wherever such third-party notices normally appear. The contents of 108 | the NOTICE file are for informational purposes only and do not modify the 109 | License. You may add Your own attribution notices within Derivative Works that 110 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 111 | provided that such additional attribution notices cannot be construed as 112 | modifying the License. 113 | 114 | You may add Your own copyright statement to Your modifications and may provide 115 | additional or different license terms and conditions for use, reproduction, or 116 | distribution of Your modifications, or for any such Derivative Works as a whole, 117 | provided Your use, reproduction, and distribution of the Work otherwise complies 118 | with the conditions stated in this License. 119 | 120 | #### 5. Submission of Contributions 121 | 122 | Unless You explicitly state otherwise, any Contribution intentionally submitted 123 | for inclusion in the Work by You to the Licensor shall be under the terms and 124 | conditions of this License, without any additional terms or conditions. 125 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 126 | any separate license agreement you may have executed with Licensor regarding 127 | such Contributions. 128 | 129 | #### 6. Trademarks 130 | 131 | This License does not grant permission to use the trade names, trademarks, 132 | service marks, or product names of the Licensor, except as required for 133 | reasonable and customary use in describing the origin of the Work and 134 | reproducing the content of the NOTICE file. 135 | 136 | #### 7. Disclaimer of Warranty 137 | 138 | Unless required by applicable law or agreed to in writing, Licensor provides the 139 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, 140 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 141 | including, without limitation, any warranties or conditions of TITLE, 142 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 143 | solely responsible for determining the appropriateness of using or 144 | redistributing the Work and assume any risks associated with Your exercise of 145 | permissions under this License. 146 | 147 | #### 8. Limitation of Liability 148 | 149 | In no event and under no legal theory, whether in tort (including negligence), 150 | contract, or otherwise, unless required by applicable law (such as deliberate 151 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 152 | liable to You for damages, including any direct, indirect, special, incidental, 153 | or consequential damages of any character arising as a result of this License or 154 | out of the use or inability to use the Work (including but not limited to 155 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 156 | any and all other commercial damages or losses), even if such Contributor has 157 | been advised of the possibility of such damages. 158 | 159 | #### 9. Accepting Warranty or Additional Liability 160 | 161 | While redistributing the Work or Derivative Works thereof, You may choose to 162 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 163 | other liability obligations and/or rights consistent with this License. However, 164 | in accepting such obligations, You may act only on Your own behalf and on Your 165 | sole responsibility, not on behalf of any other Contributor, and only if You 166 | agree to indemnify, defend, and hold each Contributor harmless for any liability 167 | incurred by, or claims asserted against, such Contributor by reason of your 168 | accepting any such warranty or additional liability. 169 | 170 | _END OF TERMS AND CONDITIONS_ 171 | 172 | ### APPENDIX: How to apply the Apache License to your work 173 | 174 | To apply the Apache License to your work, attach the following boilerplate 175 | notice, with the fields enclosed by brackets `[]` replaced with your own 176 | identifying information. (Don't include the brackets!) The text should be 177 | enclosed in the appropriate comment syntax for the file format. We also 178 | recommend that a file or class name and description of purpose be included on 179 | the same “printed page” as the copyright notice for easier identification within 180 | third-party archives. 181 | 182 | Copyright [yyyy] [name of copyright owner] 183 | 184 | Licensed under the Apache License, Version 2.0 (the "License"); 185 | you may not use this file except in compliance with the License. 186 | You may obtain a copy of the License at 187 | 188 | http://www.apache.org/licenses/LICENSE-2.0 189 | 190 | Unless required by applicable law or agreed to in writing, software 191 | distributed under the License is distributed on an "AS IS" BASIS, 192 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 193 | See the License for the specific language governing permissions and 194 | limitations under the License. 195 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(AssignCellTypes) 4 | export(AssignCells) 5 | export(BuildSigmoidFunction) 6 | export(CalcMarkerActivationProbability) 7 | export(CalculateBeta) 8 | export(CalculateIndexCellProb) 9 | export(CalculateProbabilityDifference) 10 | export(CalculateScores) 11 | export(CountCellType) 12 | export(CreateCelestaObject) 13 | export(FilterArtifactCells) 14 | export(FilterCells) 15 | export(FindCellsToCheck) 16 | export(FindCellsWithId) 17 | export(FitGmmModel) 18 | export(GetCoords) 19 | export(GetDistFromNearestAssignedCells) 20 | export(GetFinalInferredCellTypes) 21 | export(GetInitialPriorMatrix) 22 | export(GetMarkerExpMatrix) 23 | export(GetNeighborInfo) 24 | export(GetPriorInfo) 25 | export(GetScore) 26 | export(InitializeCellAndScoringMatrices) 27 | export(MarkQuestionableCells) 28 | export(NeighborCellType) 29 | export(PlotCellsAnyCombination) 30 | export(PlotExpProb) 31 | export(PlotSingleExpProb) 32 | export(UpdatePriorMatrix) 33 | exportClasses(Celesta) 34 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | options(tibble.print_min = 5, tibble.print_max = 5) 15 | ``` 16 | 17 | # CELESTA 18 | 19 | 20 | 21 | ## Overview 22 | CELESTA (CELl typE identification with SpaTiAl information) is an algorithm aiming to perform automate cell type identification for multiplexed in situ imaging data. CELESTA makes use of both protein expressions and cell spatial neighborhood information from segmented imaging data for the cell type identification. 23 | 24 | The pre-saved imaging data is taken from reg009 of the published CODEX data Schurch et al. Cell,2020 for illustration purpose. 25 | 26 | * `CreateCelestaObject()` Creates an object running CELESTA. It requires a title to create the project, segmented imaging data file and prior knowledge file for cell-type signature matrix (user-defined). 27 | * `FilterCells()` This step intends to fill out questionable cells due to imaging artifacts, segmentation error etc. 28 | * `PlotExpProb()` This function plots the calculated expression probabilities for each marker included in the user-defined prior cell-type signature matrix. It can be used to visualize and help with setting the thresholds for whether a marker is expressed or not. 29 | * `AssignCells()` This is the main function to assign cell types with an iterative EM algorithm. 30 | * `PlotCellsAnyCombination()` This function can be used to plot the cells with identified cell types with the XY coordinates from segmentation. 31 | 32 | ## Installation 33 | 34 | You can install the development version of CELESTA 35 | 36 | ``` {r, eval = FALSE} 37 | # install.packages("devtools") 38 | devtools::install_github("plevritis/CELESTA") 39 | ``` 40 | ## Dependency 41 | CELESTA requires dependency on the following R packages: 42 | - [Rmixmod](https://cran.r-project.org/web/packages/Rmixmod/index.html): for performing Gaussian Mixture Modeling 43 | - [spdep](https://cran.r-project.org/web/packages/spdep/index.html): for obtaining spatial neighborhood information 44 | - [zeallot](https://cran.r-project.org/web/packages/zeallot/index.html): for R code styling. Provides a %<-% operator to perform multiple, unpacking, and destructuring assignment in R. 45 | - [ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html) 46 | [reshape2](https://cran.r-project.org/web/packages/reshape2/index.html): for plotting 47 | 48 | ## Usage 49 | 50 | ```{r,results='hide',message=FALSE, eval = FALSE} 51 | library(CELESTA) 52 | library(Rmixmod) 53 | library(spdep) 54 | library(ggplot2) 55 | library(reshape2) 56 | library(zeallot) 57 | 58 | ### The pre-saved imaging data is taken from reg009 of the published CODEX data Schurch et al. Cell,2020 59 | ### Create CELESTA object. It requires a title for the project. 60 | ### It also required the segmented input file and user-defined cell-type signature matrix. 61 | ### Please refer to the Inputs session below. 62 | CelestaObj <- CreateCelestaObject(project_title = "project_title",prior_marker_info,imaging_data) 63 | 64 | ### Filter out questionable cells. 65 | ### A cell with every marker having expression probability higher than 0.9 are filtered out. 66 | ### And A cell with every marker having expression probability lower than 0.4 are filtered out. 67 | ### User can define the thresholds based on inspecting their data. 68 | ### This step is optional. 69 | CelestaObj <- FilterCells(CelestaObj,high_marker_threshold=0.9, low_marker_threshold=0.4) 70 | 71 | ### Assign cell types. 72 | ### max_iteration is used to define the maximum iterations allowed in the EM algorithm per round. 73 | ### cell_change_threshold is a user-defined ending condition for the EM algorithm. 74 | ### For example, 0.01 means that when fewer than 1% of the total number of cells do not change identity, the algorithm will stop. 75 | CelestaObj <- AssignCells(CelestaObj,max_iteration=10,cell_change_threshold=0.01, 76 | high_expression_threshold_anchor=high_marker_threshold_anchor, 77 | low_expression_threshold_anchor=low_marker_threshold_anchor, 78 | high_expression_threshold_index=high_marker_threshold_iteration, 79 | low_expression_threshold_index=low_marker_threshold_iteration) 80 | 81 | ### Plot cells with CELESTA assigned cell types. 82 | ### The cell_number_to_use corresponds to the defined numbers in the prior cell-type signature matrix. 83 | ### For example, 1 corresponds to endothelial cell, 2 corresponds to tumor cell. 84 | ### The program will plot the corresponding cell types given in the "cell_number_to_use" parameter. 85 | ### To plot the "unknown" cells that are left unassigned by CELESTA, include 0 in the list. 86 | ### The default color for unknown cells is gray. 87 | ### It is suggested that do not plot over 7 cell types on the same image for better visualization. 88 | PlotCellsAnyCombination(cell_type_assignment_to_plot=CelestaObj@final_cell_type_assignment[,5], 89 | coords = CelestaObj@coords, 90 | prior_info = prior_marker_info, 91 | cell_number_to_use=c(1,2,3),cell_type_colors=c("yellow","red","blue")) 92 | 93 | ### To include unknown cells 94 | PlotCellsAnyCombination(cell_type_assignment_to_plot=CelestaObj@final_cell_type_assignment[,5], 95 | coords = CelestaObj@coords, 96 | prior_info = prior_marker_info, 97 | cell_number_to_use=c(0,1,2,3),cell_type_colors=c("yellow","red","blue")) 98 | 99 | ### plot expression probability 100 | PlotExpProb(coords=CelestaObj@coords, 101 | marker_exp_prob=CelestaObj@marker_exp_prob, 102 | prior_marker_info = prior_marker_info, 103 | save_plot = TRUE) 104 | ``` 105 | 106 | ## Inputs 107 | CELESTA requires two inputs:
108 | `1. Segmented imaging data`:
109 | a dataframe with rows as the cells, and needs to have (1) two columns named X and Y to define the XY coordinates of the cells and (2) other columns having the protein marker expressions for each cell
110 | 111 | Below is an example of the segmented imaging file header 112 | 113 | ![An example of the segmented imaging file example](images/segmented_file_example.png) 114 | 115 | `2. User-defined cell-type signature matrix`:
116 | (1) The first column has to contain the cell types to be inferred
117 | (2) The second column has the lineage information for each cell type. The lineage information has three numbers connected by “_” (underscore). The first number indicates round.Cell types with the same lineage level are inferred at the same round. Increasing number indicates increase cell-type resolution. For example, immune cells -> CD3+ T cells –> CD4+ T cells. The third number is a number assigned to the cell type, i.e, cell type number. The middle number tells the previous lineage cell type number for the current cell type. For example, the middle number for CD3+ T cells is 5, because it is a subtype of immune cells which have cell type number assigned to 5.
118 | (3) Starting from column three, each column is a protein marker. If the protein marker is known to be expressed for that cell type, then it is denoted by “1”. If the protein marker is known to not express for a cell type, then it is denoted by “0”. If the protein marker is irrelevant or uncertain to express for a cell type, then it is denoted by “NA”.
119 | (4) More examples of the user-defined cell-type signature matrix is provided under folder:data.
120 | 121 | Below is an example of cell-type signature matrix based on imaging panel used in Schurch et al. Cell, 2020. 122 | 123 | ![An example of cell-type signature matrix based on imaging panel used in Schurch et al. Cell, 2020](images/prior_matrix_example.png) 124 | 125 | ## Outputs 126 | CELESTA outputs: 127 | 1. After running `AssignCells()` function, CELESTA will output a .csv file with the cell type assignment to each cell for each round and the final combined cell types.
128 | 2. In addition, users can access the results in the CELESTA object under the slot "final_cell_type_assignment". The anchor cells defined for each round can be found under the slot "anchor_cell_type_assignment".
129 | 130 | CELESTA can also plot the assigned cells by using the `PlotCellsAnyCombination()` function. An example output image is shown below: 131 | ![An example plot of assigned cell types](images/plot_cell_assignment.png) 132 | Users can compare the output with the original images. An example is shown below:
133 | `Please note:` CODEX images preprocess with Akoya Biosciences software stitched the image tiles in a flipped way. So in some cases, for the comparisons, the image needs to be flipped.
134 | ![An example of comparison](images/demo_image.png) 135 | 136 | ## How to define thresholds 137 | In the `AssignCells()` function, it requires four vectors to define the high and low thresholds for each cell type. The length of the vector equals to the total number of cell types defined in the cell-type signature matrix.Examples of the thresholds are provided under the folder:data.
138 | We would suggest start with the default thresholds and modify them by comparing the results with the original staining demonstrated below.
139 | The two vectors are required for defining the "high_expression_threshold", one for anchor cells and one for index cells. The thresholds defined how much the marker expression probability is in order to be considered as expressed. An example for defining high_expression_threshold is shown below: 140 | ![An example of high marker threshold](images/high_threshold_example.png) 141 |
142 | To find the proper threshold, the `PlotExpProb()` function can be applied. Because the segmented data may have some compensation in the values which are the inputs to CELESTA, the expression probabilities are calculated based on the segmented data. It's useful to compare the expression probabilities with the CODEX staining for each marker.
143 | For example, for endothelial cells, if we plot the expression probabilities of CD31 (left) and compare with the CD31 staining, approximately 0.9 and 0.8 would be the right threshold for defining how much the cell should express CD31. `Please note:` It is suggested that for anchor cells, use a slightly higher threshold than index cells.
144 | ![An example of CD31](images/CD31_threshold.png) 145 | Another example, for tumor cells, if we plot the expression probabilities of Cytokerain (left) and compare with the Cytokeratin staining, approximately 0.9 and 0.8 would be the right threshold for defining how much the cell should express Cytokeratin. `Please note:` It is suggested that for anchor cells, use a slightly higher threshold than index cells. 146 | ![An example of CK](images/Cytokeratin_threshold.png) 147 | 148 | The two vectors are required for defining the "low_marker_threshold", one for anchor cells and one for index cells. The thresholds defined how much the marker expression probability is in order to be considered as not expressed. Normally 1 is assigned to this value unless there are a lot of doublets or co-staining in the data. **The Low expression threshold default values in general are robust, and thus we recommend testing the High expression threshold values.**
149 | 150 | An example for defining low_marker_threshold is shown below:
151 | 152 | ![An example of low marker threshold](images/low_threshold_example.png) 153 | 154 | ## Getting help 155 | If you encounter a clear bug, please file an issue with a minimal reproducible example on [GitHub](https://github.com/plevritis/CELESTA/issues). For questions and other discussion, please use [community.rstudio.com](https://community.rstudio.com/). 156 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # CELESTA 5 | 6 | 7 | 8 | 9 | ## Overview 10 | 11 | CELESTA (CELl typE identification with SpaTiAl information) is an 12 | algorithm aiming to perform automate cell type identification for 13 | multiplexed in situ imaging data. CELESTA makes use of both protein 14 | expressions and cell spatial neighborhood information from segmented 15 | imaging data for the cell type identification. 16 | 17 | The pre-saved imaging data is taken from reg009 of the published CODEX 18 | data Schurch et al. Cell,2020 for illustration purpose. 19 | 20 | - `CreateCelestaObject()` Creates an object running CELESTA. It 21 | requires a title to create the project, segmented imaging data file 22 | and prior knowledge file for cell-type signature matrix 23 | (user-defined). 24 | - `FilterCells()` This step intends to fill out questionable cells due 25 | to imaging artifacts, segmentation error etc. 26 | - `PlotExpProb()` This function plots the calculated expression 27 | probabilities for each marker included in the user-defined prior 28 | cell-type signature matrix. It can be used to visualize and help 29 | with setting the thresholds for whether a marker is expressed or 30 | not. 31 | - `AssignCells()` This is the main function to assign cell types with 32 | an iterative EM algorithm. 33 | - `PlotCellsAnyCombination()` This function can be used to plot the 34 | cells with identified cell types with the XY coordinates from 35 | segmentation. 36 | 37 | ## Installation 38 | 39 | You can install the development version of CELESTA 40 | 41 | ``` r 42 | # install.packages("devtools") 43 | devtools::install_github("plevritis/CELESTA") 44 | ``` 45 | 46 | ## Dependency 47 | 48 | CELESTA requires dependency on the following R packages: - 49 | [Rmixmod](https://cran.r-project.org/web/packages/Rmixmod/index.html): 50 | for performing Gaussian Mixture Modeling - 51 | [spdep](https://cran.r-project.org/web/packages/spdep/index.html): for 52 | obtaining spatial neighborhood information - 53 | [zeallot](https://cran.r-project.org/web/packages/zeallot/index.html): 54 | for R code styling. Provides a %<-% operator to perform multiple, 55 | unpacking, and destructuring assignment in R. - 56 | [ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html) 57 | [reshape2](https://cran.r-project.org/web/packages/reshape2/index.html): 58 | for plotting 59 | 60 | ## Usage 61 | 62 | ``` r 63 | library(CELESTA) 64 | library(Rmixmod) 65 | library(spdep) 66 | library(ggplot2) 67 | library(reshape2) 68 | library(zeallot) 69 | 70 | ### The pre-saved imaging data is taken from reg009 of the published CODEX data Schurch et al. Cell,2020 71 | ### Create CELESTA object. It requires a title for the project. 72 | ### It also required the segmented input file and user-defined cell-type signature matrix. 73 | ### Please refer to the Inputs session below. 74 | CelestaObj <- CreateCelestaObject(project_title = "project_title",prior_marker_info,imaging_data) 75 | 76 | ### Filter out questionable cells. 77 | ### A cell with every marker having expression probability higher than 0.9 are filtered out. 78 | ### And A cell with every marker having expression probability lower than 0.4 are filtered out. 79 | ### User can define the thresholds based on inspecting their data. 80 | ### **This step is optional.** We suggest starting without running this step to see whether there are many doublets/triplets. 81 | CelestaObj <- FilterCells(CelestaObj,high_marker_threshold=0.9, low_marker_threshold=0.4) 82 | 83 | ### Assign cell types. 84 | ### max_iteration is used to define the maximum iterations allowed in the EM algorithm per round. 85 | ### cell_change_threshold is a user-defined ending condition for the EM algorithm. 86 | ### For example, 0.01 means that when fewer than 1% of the total number of cells do not change identity, the algorithm will stop. 87 | CelestaObj <- AssignCells(CelestaObj,max_iteration=10,cell_change_threshold=0.01, 88 | high_expression_threshold_anchor=high_marker_threshold_anchor, 89 | low_expression_threshold_anchor=low_marker_threshold_anchor, 90 | high_expression_threshold_index=high_marker_threshold_iteration, 91 | low_expression_threshold_index=low_marker_threshold_iteration) 92 | 93 | ### After the AssignCells() function, the CELESTA assigned cell types will be stored in the CelestaObj 94 | ### in the field called final_cell_type_assignment with each row corresponding to a cell. 95 | ### The final_cell_type_assignment has assignment for each round stored in each column, the final 96 | ### cell types and the corresponding cell type number corresponding to the cell type specified in 97 | ### the cell-type signature matrix (please see Input section below). 98 | 99 | ### Plot cells with CELESTA assigned cell types. 100 | ### The cell_number_to_use corresponds to the defined numbers in the prior cell-type signature matrix. 101 | ### For example, 1 corresponds to endothelial cell, 2 corresponds to tumor cell. 102 | ### The program will plot the corresponding cell types given in the "cell_number_to_use" parameter. 103 | ### To plot the "unknown" cells that are left unassigned by CELESTA, include 0 in the list. 104 | ### The default color for unknown cells is gray. 105 | ### The size of the cells plotted can be modified by changing the parameter test_size. 106 | PlotCellsAnyCombination(cell_type_assignment_to_plot=CelestaObj@final_cell_type_assignment[,(CelestaObj@total_rounds+1)], 107 | coords = CelestaObj@coords, 108 | prior_info = prior_marker_info, 109 | cell_number_to_use=c(1,2,3), 110 | cell_type_colors=c("yellow","red","blue"), 111 | test_size=1) 112 | 113 | ### To include unknown cells 114 | PlotCellsAnyCombination(cell_type_assignment_to_plot=CelestaObj@final_cell_type_assignment[,(CelestaObj@total_rounds+1)], 115 | coords = CelestaObj@coords, 116 | prior_info = prior_marker_info, 117 | cell_number_to_use=c(0,1,2,3),cell_type_colors=c("yellow","red","blue")) 118 | 119 | ### plot expression probability 120 | PlotExpProb(coords=CelestaObj@coords, 121 | marker_exp_prob=CelestaObj@marker_exp_prob, 122 | prior_marker_info = prior_marker_info, 123 | save_plot = TRUE) 124 | ``` 125 | 126 | ## Inputs 127 | 128 | CELESTA requires two inputs:
`1. Segmented imaging data`:
a 129 | dataframe with rows as the cells, and needs to have (1) two columns 130 | named X and Y to define the XY coordinates of the cells and (2) other 131 | columns having the protein marker expressions for each cell
132 | 133 | Below is an example of the segmented imaging file header 134 | 135 | ![An example of the segmented imaging file 136 | example](images/segmented_file_example.png) 137 | 138 | `2. User-defined cell-type signature matrix`:
(1) The first column 139 | has to contain the cell types to be inferred
(2) The second column 140 | has the lineage information for each cell type. The lineage information 141 | has three numbers connected by “\_” (underscore). The first number 142 | indicates round.Cell types with the same lineage level are inferred at 143 | the same round. Increasing number indicates increase cell-type 144 | resolution. For example, immune cells -> CD3+ T cells –> CD4+ T 145 | cells. The third number is a number assigned to the cell type, i.e, cell 146 | type number. The middle number tells the previous lineage cell type 147 | number for the current cell type. For example, the middle number for 148 | CD3+ T cells is 5, because it is a subtype of immune cells which have 149 | cell type number assigned to 5.
(3) Starting from column three, 150 | each column is a protein marker. If the protein marker is known to be 151 | expressed for that cell type, then it is denoted by “1”. If the protein 152 | marker is known to not express for a cell type, then it is denoted by 153 | “0”. If the protein marker is irrelevant or uncertain to express for a 154 | cell type, then it is denoted by “NA”.
(4) More examples of the 155 | user-defined cell-type signature matrix is provided under 156 | folder:data.
157 | 158 | Below is an example of cell-type signature matrix based on imaging panel 159 | used in Schurch et al. Cell, 2020. 160 | 161 | ![An example of cell-type signature matrix based on imaging panel used 162 | in Schurch et al. Cell, 2020](images/prior_matrix_example.png) 163 | 164 | ## Outputs 165 | 166 | CELESTA outputs: 1. After running `AssignCells()` function, CELESTA will 167 | output a .csv file with the cell type assignment to each cell for each 168 | round and the final combined cell types.
2. In addition, users can 169 | access the results in the CELESTA object under the slot 170 | “final\_cell\_type\_assignment”. The anchor cells defined for each round 171 | can be found under the slot “anchor\_cell\_type\_assignment”.
172 | 173 | CELESTA can also plot the assigned cells by using the 174 | `PlotCellsAnyCombination()` function. An example output image is shown 175 | below: ![An example plot of assigned cell 176 | types](images/plot_cell_assignment.png) Users can compare the output 177 | with the original images. An example is shown below:
`Please note:` 178 | CODEX images preprocess with Akoya Biosciences software stitched the 179 | image tiles in a flipped way. So in some cases, for the comparisons, the 180 | image needs to be flipped.
![An example of 181 | comparison](images/demo_image.png) 182 | 183 | ## How to define thresholds 184 | 185 | In the `AssignCells()` function, it requires four vectors to define the 186 | high and low thresholds for each cell type. The length of the vector 187 | equals to the total number of cell types defined in the cell-type 188 | signature matrix.Examples of the thresholds are provided under the 189 | folder:data.
We would suggest start with the default thresholds and 190 | modify them by comparing the results with the original staining 191 | demonstrated below.
The two vectors are required for defining the 192 | “high\_expression\_threshold”, one for anchor cells and one for index 193 | cells(non-anchor cells). The thresholds defined how much the marker 194 | expression probability is in order to be considered as expressed. An 195 | example for defining high\_expression\_threshold is shown below: ![An 196 | example of high marker threshold](images/high_threshold_example.png) 197 |
You can also specify the threholds using: 198 | 199 | ``` r 200 | CelestaObj <- AssignCells(CelestaObj,max_iteration=10,cell_change_threshold=0.01, 201 | high_expression_threshold_anchor=c(0.7,0.7,0.7,0.7,0.7,0.8,0.9,0.9), 202 | low_expression_threshold_anchor=c(1,1,1,1,1,1,1,1), 203 | high_expression_threshold_index=c(0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5), 204 | low_expression_threshold_index=c(1,1,1,1,1,1,1,1)) 205 | ``` 206 | 207 | The length of the vectors for the thresholds correspond to the number of 208 | cell types. The order of the thresholds correpond to the same order in 209 | the defined cell-type signature matrix. 210 | 211 | To find the proper threshold, the `PlotExpProb()` function can be 212 | applied. Because the segmented data may have some compensation in the 213 | values which are the inputs to CELESTA, the expression probabilities are 214 | calculated based on the segmented data. It’s useful to compare the 215 | expression probabilities with the CODEX staining for each marker.
216 | For example, for endothelial cells, if we plot the expression 217 | probabilities of CD31 (left) and compare with the CD31 staining, 218 | approximately 0.9 and 0.8 would be the right threshold for defining how 219 | much the cell should express CD31. `Please note:` It is suggested that 220 | for anchor cells, use a slightly higher threshold than index cells.
221 | ![An example of CD31](images/CD31_threshold.png) Another example, for 222 | tumor cells, if we plot the expression probabilities of Cytokerain 223 | (left) and compare with the Cytokeratin staining, approximately 0.9 and 224 | 0.8 would be the right threshold for defining how much the cell should 225 | express Cytokeratin. `Please note:` It is suggested that for anchor 226 | cells, use a slightly higher threshold than index cells. ![An example of 227 | CK](images/Cytokeratin_threshold.png) 228 | 229 | The two vectors are required for defining the “low\_marker\_threshold”, 230 | one for anchor cells and one for index cells. The thresholds defined how 231 | much the marker expression probability is in order to be considered as 232 | not expressed. Normally 1 is assigned to this value unless there are a 233 | lot of doublets or co-staining in the data. **The Low expression 234 | threshold default values in general are robust, and thus we recommend 235 | testing the High expression threshold values.**
236 | 237 | An example for defining low\_marker\_threshold is shown below:
238 | 239 | ![An example of low marker threshold](images/low_threshold_example.png) 240 | 241 | ## Getting help 242 | 243 | If you encounter a clear bug, please file an issue with a minimal 244 | reproducible example on 245 | [GitHub](https://github.com/plevritis/CELESTA/issues). For questions and 246 | other discussion, please use 247 | [community.rstudio.com](https://community.rstudio.com/). 248 | -------------------------------------------------------------------------------- /data/high_marker_threshold_anchor.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/data/high_marker_threshold_anchor.rda -------------------------------------------------------------------------------- /data/high_marker_threshold_iteration.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/data/high_marker_threshold_iteration.rda -------------------------------------------------------------------------------- /data/imaging_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/data/imaging_data.rda -------------------------------------------------------------------------------- /data/low_marker_threshold_anchor.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/data/low_marker_threshold_anchor.rda -------------------------------------------------------------------------------- /data/low_marker_threshold_iteration.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/data/low_marker_threshold_iteration.rda -------------------------------------------------------------------------------- /data/prior_marker_info.csv: -------------------------------------------------------------------------------- 1 | ,Lineage_level,CD31 - vasculature:Cyc_19_ch_3,CD34 - vasculature:Cyc_20_ch_3,Cytokeratin - epithelia:Cyc_10_ch_2,aSMA - smooth muscle:Cyc_11_ch_2,Podoplanin - lymphatics:Cyc_19_ch_4,CD45 - hematopoietic cells:Cyc_4_ch_2,CD15 - granulocytes:Cyc_14_ch_2,CD3 - T cells:Cyc_16_ch_4,CD20 - B cells:Cyc_8_ch_3,CD11c - DCs:Cyc_12_ch_3,CD163 - macrophages:Cyc_17_ch_3,CD68 - macrophages:Cyc_18_ch_4,CD38 - multifunctional:Cyc_20_ch_4,CD56 - NK cells:Cyc_10_ch_4,CD8 - cytotoxic T cells:Cyc_3_ch_2,CD4 - T helper cells:Cyc_6_ch_3,CD45RO - memory cells:Cyc_18_ch_3,FOXP3 - regulatory T cells:Cyc_2_ch_3 2 | vasculature,1_0_1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 3 | tumor cells,1_0_2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 4 | aSMA+ stroma,1_0_3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 5 | lymphatics,1_0_4,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 6 | immune cells,1_0_5,0,0,0,0,0,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 7 | CD3+ T cells,2_5_6,0,0,0,0,0,NA,0,1,0,0,0,0,0,0,NA,NA,NA,NA 8 | CD15+ granulocytes,2_5_7,0,0,0,0,0,NA,1,0,0,0,0,NA,0,0,0,0,NA,0 9 | B cells,2_5_8,0,0,0,0,0,NA,0,0,1,0,0,0,0,0,0,0,0,0 10 | CD11c+ DCs,2_5_9,0,0,0,0,0,NA,0,0,0,1,0,0,0,0,0,0,0,0 11 | CD68+CD163+ macrophages,2_5_10,0,0,0,0,0,NA,0,0,0,0,1,1,0,0,0,0,NA,0 12 | plasma cells,2_5_11,0,0,0,0,0,NA,0,0,0,0,0,0,1,0,0,0,0,0 13 | NK cells,2_5_12,0,0,0,0,0,NA,0,NA,0,0,0,0,0,1,0,0,0,0 14 | CD8+ T cells,3_6_13,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,1,0,NA,0 15 | CD4+ T cells,3_6_14,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,0,1,NA,NA 16 | CD4+ T cells CD45RO+,4_14_15,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,0,NA,1,0 17 | Tregs,4_14_16,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,0,NA,0,1 -------------------------------------------------------------------------------- /data/prior_marker_info.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/data/prior_marker_info.rda -------------------------------------------------------------------------------- /images/CD31_threshold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/CD31_threshold.png -------------------------------------------------------------------------------- /images/Cytokeratin_threshold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/Cytokeratin_threshold.png -------------------------------------------------------------------------------- /images/aSMA_threshold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/aSMA_threshold.png -------------------------------------------------------------------------------- /images/demo_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/demo_image.png -------------------------------------------------------------------------------- /images/high_threshold_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/high_threshold_example.png -------------------------------------------------------------------------------- /images/low_threshold_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/low_threshold_example.png -------------------------------------------------------------------------------- /images/plot_cell_assignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/plot_cell_assignment.png -------------------------------------------------------------------------------- /images/prior_matrix_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/prior_matrix_example.png -------------------------------------------------------------------------------- /images/segmented_file_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/images/segmented_file_example.png -------------------------------------------------------------------------------- /man/AssignCellTypes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{AssignCellTypes} 4 | \alias{AssignCellTypes} 5 | \title{AssignCellTypes} 6 | \usage{ 7 | AssignCellTypes( 8 | initial_pri_matrix, 9 | current_cell_prob, 10 | current_cell_type_assignment, 11 | marker_exp_prob, 12 | cell_type_num, 13 | unassigned_cells, 14 | round, 15 | high_marker_threshold, 16 | low_marker_threshold, 17 | min_difference = 0, 18 | min_prob = 0 19 | ) 20 | } 21 | \arguments{ 22 | \item{initial_pri_matrix}{user defined cell-type marker matrix for a 23 | specific round} 24 | 25 | \item{current_cell_prob}{the current cell probability 26 | (number_cells x number_cell_type)} 27 | 28 | \item{current_cell_type_assignment}{the current cell type assignments 29 | (number_cells x total_rounds)} 30 | 31 | \item{marker_exp_prob}{the marker expression probability for each cell} 32 | 33 | \item{cell_type_num}{the cell types associated with the current round} 34 | 35 | \item{unassigned_cells}{cells not assigned a cell type for each round and 36 | iteration} 37 | 38 | \item{round}{the current round} 39 | 40 | \item{high_marker_threshold}{the upper threshold for each cell type} 41 | 42 | \item{low_marker_threshold}{the lower threshold for each cell type} 43 | 44 | \item{min_difference}{lower bound used to determine cells that meet the 45 | threshold} 46 | 47 | \item{min_prob}{lower bound used to determine cells that meet the threshold} 48 | } 49 | \value{ 50 | an updated current cell type assignment (number_cells x total_rounds) 51 | with more cells assigned for the current round 52 | } 53 | \description{ 54 | Find the cell types based on the scores (anchor cell) or 55 | probabilities (index cell) 56 | } 57 | -------------------------------------------------------------------------------- /man/AssignCells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{AssignCells} 4 | \alias{AssignCells} 5 | \title{AssignCells} 6 | \usage{ 7 | AssignCells( 8 | celesta_obj, 9 | max_iteration = 10, 10 | cell_change_threshold = 0.01, 11 | min_diff = 0, 12 | min_probability = 0, 13 | high_expression_threshold_anchor = rep(0.7, length = 50), 14 | low_expression_threshold_anchor = rep(0.9, length = 50), 15 | high_expression_threshold_index = rep(0.5, length = 50), 16 | low_expression_threshold_index = rep(1, length = 50), 17 | progress = NULL, 18 | save_result = T 19 | ) 20 | } 21 | \arguments{ 22 | \item{celesta_obj}{an initialized and filtered Celesta object (provided by 23 | \code{FilterCells})} 24 | 25 | \item{max_iteration}{the maximum number of iterations} 26 | 27 | \item{cell_change_threshold}{user defined threshold on when the iterative 28 | cell-type assignment stops. The default value is 0.01, which means that if 29 | the percentage of additional assigned cells is smaller than 1\% of the 30 | unassigned cells, then cell-type assignment will stop. The recommended range 31 | is 0.01 - 0.05. Note that the higher the cell change threshold, the more 32 | cells are left unassigned.} 33 | 34 | \item{min_diff}{user defined threshold on how much the largest cell-type 35 | probability needs to be higher than the second largest cell-type probability. 36 | The default value is 0. It is recommended to not change this value.} 37 | 38 | \item{min_probability}{user defined threshold on the maximum probability 39 | (i.e. a cell-type probability needs to be higher than this threshold for a 40 | cell to be assigned to that cell type). The default value is 0. It is 41 | recommended to not set this value higher than 0.5.} 42 | 43 | \item{high_expression_threshold_anchor}{the upper threshold for each cell type} 44 | 45 | \item{low_expression_threshold_anchor}{the lower threshold for each cell type} 46 | 47 | \item{high_expression_threshold_index}{user defined marker expression 48 | probability threshold for high expression for non-anchor cells} 49 | 50 | \item{low_expression_threshold_index}{user defined marker expression 51 | probability threshold for low expression for non-anchor cells} 52 | 53 | \item{progress}{progress object used for the Shiny app. Do not specify 54 | manually.} 55 | } 56 | \value{ 57 | a fully initialized Celesta object 58 | } 59 | \description{ 60 | Iteratively assigns cells based on spatial and protein 61 | expression information. 62 | } 63 | -------------------------------------------------------------------------------- /man/BuildSigmoidFunction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{BuildSigmoidFunction} 4 | \alias{BuildSigmoidFunction} 5 | \title{BuildSigmoidFunction} 6 | \usage{ 7 | BuildSigmoidFunction(marker_exp_matrix, figure = FALSE) 8 | } 9 | \arguments{ 10 | \item{marker_exp_matrix}{transformed protein marker expression (or original 11 | segmentation protein marker expression if transformation is not specified)} 12 | 13 | \item{figure}{whether a figure should be generated or not} 14 | } 15 | \value{ 16 | the sigmoid function parameter, containing the \eqn{x_root} and slope 17 | } 18 | \description{ 19 | Builds the sigmoid function for the calculation of the 20 | expression probability 21 | } 22 | -------------------------------------------------------------------------------- /man/CalcMarkerActivationProbability.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{CalcMarkerActivationProbability} 4 | \alias{CalcMarkerActivationProbability} 5 | \title{CalcMarkerActivationProbability} 6 | \usage{ 7 | CalcMarkerActivationProbability(marker_exp_matrix, figure = FALSE) 8 | } 9 | \arguments{ 10 | \item{marker_exp_matrix}{transformed protein marker expression (or original 11 | segmentation 12 | protein marker expression if transformation is not specified)} 13 | } 14 | \value{ 15 | the protein marker activation probability 16 | } 17 | \description{ 18 | Calculates the activation probability for each marker in the 19 | prior matrix 20 | } 21 | -------------------------------------------------------------------------------- /man/CalculateBeta.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{CalculateBeta} 4 | \alias{CalculateBeta} 5 | \title{CalculateBeta} 6 | \usage{ 7 | CalculateBeta( 8 | dist_from_nearest_assigned_cell, 9 | scale_factor = 5, 10 | bandwidth = 100 11 | ) 12 | } 13 | \arguments{ 14 | \item{dist_from_nearest_assigned_cell}{the distance from the nearest assigned 15 | cell} 16 | 17 | \item{scale_factor}{the scale factor} 18 | 19 | \item{bandwidth}{the bandwidth} 20 | } 21 | \value{ 22 | the beta value 23 | } 24 | \description{ 25 | Calculates beta 26 | } 27 | -------------------------------------------------------------------------------- /man/CalculateIndexCellProb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{CalculateIndexCellProb} 4 | \alias{CalculateIndexCellProb} 5 | \title{CalculateIndexCellProb} 6 | \usage{ 7 | CalculateIndexCellProb( 8 | current_cell_prob, 9 | current_cell_type_assignment, 10 | current_beta, 11 | nb_cell_type, 12 | current_scoring_matrix, 13 | cell_type_num, 14 | unassigned_cells, 15 | round 16 | ) 17 | } 18 | \arguments{ 19 | \item{current_cell_prob}{the current cell probability 20 | (number_cells x number_cell_type)} 21 | 22 | \item{current_cell_type_assignment}{the current cell type assignments 23 | (number_cells x total_rounds)} 24 | 25 | \item{current_beta}{the current beta values} 26 | 27 | \item{nb_cell_type}{cell types of the neighboring cells for index cells} 28 | 29 | \item{current_scoring_matrix}{the current scoring matrix 30 | (number_cells x number_cell_type)} 31 | 32 | \item{cell_type_num}{the cell types associated with the current round} 33 | 34 | \item{unassigned_cells}{cells not assigned a cell type for each round and 35 | iteration} 36 | 37 | \item{round}{the current round} 38 | } 39 | \value{ 40 | calculates the probability for each cell type for unassigned cells 41 | } 42 | \description{ 43 | Calculates the probability for index cells 44 | } 45 | -------------------------------------------------------------------------------- /man/CalculateProbabilityDifference.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{CalculateProbabilityDifference} 4 | \alias{CalculateProbabilityDifference} 5 | \title{CalculateProbabilityDifference} 6 | \usage{ 7 | CalculateProbabilityDifference( 8 | max.prob, 9 | max.prob_index, 10 | cell_prob_list, 11 | unassigned_cells 12 | ) 13 | } 14 | \arguments{ 15 | \item{max.prob}{the maximum marker probability for each cell} 16 | 17 | \item{max.prob_index}{the index of the maximum marker probability for each 18 | cell} 19 | 20 | \item{cell_prob_list}{the probabilities of the cells are are not assigned a 21 | cell type} 22 | 23 | \item{unassigned_cells}{cells not assigned a cell type for each round and 24 | iteration} 25 | } 26 | \value{ 27 | the minimum of the difference in probability between the maximum 28 | marker probability and other marker probabilities 29 | } 30 | \description{ 31 | Calculate the probability differences 32 | } 33 | -------------------------------------------------------------------------------- /man/CalculateScores.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{CalculateScores} 4 | \alias{CalculateScores} 5 | \title{CalculateScores} 6 | \usage{ 7 | CalculateScores( 8 | marker_exp_prob, 9 | current_pri_matrix, 10 | current_scoring_matrix, 11 | round, 12 | unassigned_cells, 13 | cell_type_num 14 | ) 15 | } 16 | \arguments{ 17 | \item{marker_exp_prob}{the marker expression probability for each cell} 18 | 19 | \item{current_pri_matrix}{the updated cell-type marker matrix} 20 | 21 | \item{current_scoring_matrix}{the current scoring matrix 22 | (number_cells x number_cell_type)} 23 | 24 | \item{round}{the current round} 25 | 26 | \item{unassigned_cells}{cells not assigned a cell type for each round and 27 | iteration} 28 | 29 | \item{cell_type_num}{the cell types associated with the current round} 30 | } 31 | \value{ 32 | the current scoring matrix containing the scores for each cell type 33 | associated with the current round for each unassigned cell 34 | } 35 | \description{ 36 | Calculate the scores based on the scoring function 37 | } 38 | -------------------------------------------------------------------------------- /man/Celesta-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \docType{class} 4 | \name{Celesta-class} 5 | \alias{Celesta-class} 6 | \alias{Celesta} 7 | \title{Celesta} 8 | \description{ 9 | Celesta object definition 10 | } 11 | \section{Slots}{ 12 | 13 | \describe{ 14 | \item{\code{project_name}}{name of the project (used in file names)} 15 | 16 | \item{\code{prior_info}}{user-defined cell-type signature matrix. 17 | 18 | The data should contain two columns (name X and Y) for the x, y coordinates 19 | and a column for each protein marker. Each row represents the data for a 20 | single cell, including its x, y coordinates and expression for each protein 21 | marker.} 22 | 23 | \item{\code{marker_exp_matrix}}{transformed protein marker expression (or original 24 | segmentation protein marker expression if transformation is not specified)} 25 | 26 | \item{\code{original_exp}}{original protein marker expression (containing only the 27 | protein markers specified in \code{prior_info})} 28 | 29 | \item{\code{cell_ID}}{the IDs of the cells (from 1 to the total number of cells)} 30 | 31 | \item{\code{lineage_info}}{the lineage information from \code{prior_info} parsed into 32 | round, previous cell type, and cell type number columns} 33 | 34 | \item{\code{coords}}{the x, y coordinates of each cell} 35 | 36 | \item{\code{cell_prob}}{cell type probability for each cell} 37 | 38 | \item{\code{final_cell_type_assignment}}{the final cell type assignments} 39 | 40 | \item{\code{nb_list}}{the list of N-nearest neighbors} 41 | 42 | \item{\code{total_rounds}}{the maximum round value} 43 | 44 | \item{\code{cell_nb_in_bandwidth}}{the cells located within a bandwidth to cell \emph{c}} 45 | 46 | \item{\code{cell_nb_dist}}{the distance of each cell to cell \emph{c} within a bandwidth} 47 | 48 | \item{\code{initial_pri_matrix}}{user defined cell-type marker matrix for a specific 49 | round} 50 | 51 | \item{\code{anchor_cell_type_assignment}}{the anchor cell type assignments} 52 | 53 | \item{\code{dist_from_nearest_assigned_cell}}{the distance from the nearest assigned 54 | cell} 55 | 56 | \item{\code{nb_cell_type}}{cell types of the neighboring cells for index cells} 57 | 58 | \item{\code{marker_exp_prob}}{the marker expression probability for each cell} 59 | 60 | \item{\code{current_scoring_matrix}}{the current scoring matrix 61 | (number_cells x number_cell_type)} 62 | 63 | \item{\code{current_pri_matrix}}{the updated cell-type marker matrix} 64 | 65 | \item{\code{current_cell_prob}}{the current cell probability 66 | (number_cells x number_cell_type)} 67 | 68 | \item{\code{current_cell_type_assignment}}{the current cell type assignments 69 | (number_cells x total_rounds)} 70 | 71 | \item{\code{starting_cell_type_assignment}}{the initial cell type assignments 72 | (number_cells x total_rounds)} 73 | 74 | \item{\code{current_beta}}{the current beta values} 75 | 76 | \item{\code{unassigned_cells}}{cells not assigned a cell type for each round and 77 | iteration} 78 | 79 | \item{\code{assigned_cells}}{cells with an assigned cell type} 80 | }} 81 | 82 | -------------------------------------------------------------------------------- /man/CountCellType.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{CountCellType} 4 | \alias{CountCellType} 5 | \title{CountCellType} 6 | \usage{ 7 | CountCellType(prior_info, current_cell_type_assignment, cell_type_num, round) 8 | } 9 | \arguments{ 10 | \item{prior_info}{user-defined cell-type signature matrix. 11 | 12 | The data should contain two columns (name X and Y) for the x, y coordinates 13 | and a column for each protein marker. Each row represents the data for a 14 | single cell, including its x, y coordinates and expression for each protein 15 | marker.} 16 | 17 | \item{current_cell_type_assignment}{the current cell type assignments 18 | (number_cells x total_rounds)} 19 | 20 | \item{cell_type_num}{the cell types associated with the current round} 21 | 22 | \item{round}{the current round} 23 | } 24 | \value{ 25 | the count and proportion for each cell type based on the current cell 26 | type assignments 27 | } 28 | \description{ 29 | Counts the cell type 30 | } 31 | -------------------------------------------------------------------------------- /man/CreateCelestaObject.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{CreateCelestaObject} 4 | \alias{CreateCelestaObject} 5 | \title{CreateCelestaObject} 6 | \usage{ 7 | CreateCelestaObject( 8 | project_title, 9 | prior_marker_info, 10 | imaging_data_file, 11 | cofactor = 10, 12 | transform_type = 1, 13 | number_of_neighbors = 5, 14 | bandwidth = 100, 15 | progress = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{project_title}{\emph{required} name of the project (used in file names)} 20 | 21 | \item{prior_marker_info}{\emph{required} user-defined cell-type signature matrix. 22 | 23 | The data should contain two columns (name X and Y) for the x, y coordinates 24 | and a column for each protein marker. Each row represents the data for a 25 | single cell, including its x, y coordinates and expression for each protein 26 | marker.} 27 | 28 | \item{imaging_data_file}{\emph{required} segmented imaging data. 29 | The first column must contain the cell types to be inferred. The second 30 | column must contain the lineage information with the following format 31 | (without spaces): # _ # _ #. 32 | \itemize{ 33 | \item The first number indicates round. Cell types with the same lineage level 34 | are inferred at the same round. An higher number indicates higher cell-type 35 | resolution. For example, immune cells -> CD3+ T cells -> CD4+ T cells. 36 | \item The second number indicates the previous lineage cell type number for the 37 | current cell type. For example, the second number for CD3+ T cell is 5 38 | because it is a subtype of immune cells which have cell type number 5. 39 | \item The third number is a number assigned to the cell type 40 | (i.e. cell type number). 41 | } 42 | 43 | The third column and beyond are columns for protein markers. 44 | \itemize{ 45 | \item If a protein marker is known to be expressed for that cell type, then it 46 | is denoted by a "1". 47 | \item If a protein marker is known to not express for a cell type, then it is 48 | denoted by a "0". 49 | \item If the protein marker is irrelevant or uncertain to express for a cell 50 | type, then it is denoted by "NA". 51 | }} 52 | 53 | \item{cofactor}{value used to calculate the arcsinh transform on the protein 54 | marker expressions} 55 | 56 | \item{transform_type}{indicates a transform type for the protein marker 57 | expressions (0 = no transform, 1 = arcsinh transform)} 58 | 59 | \item{number_of_neighbors}{the number of cells in a single neighborhood} 60 | 61 | \item{bandwidth}{the upper distance bound used when calculating 62 | neighborhoods by distance} 63 | 64 | \item{progress}{progress object used for the Shiny app. Do not specify 65 | manually.} 66 | } 67 | \value{ 68 | an initialized Celesta object 69 | } 70 | \description{ 71 | Initializes the following fields of the Celesta object: 72 | \itemize{ 73 | \item \code{cell_ID} 74 | \item \code{original_exp} 75 | \item \code{marker_exp_matrix} 76 | \item \code{prior_info} 77 | \item \code{lineage_info} 78 | \item \code{total_rounds} 79 | \item \code{coords} 80 | \item \code{marker_exp_prob} 81 | \item \code{nb_list} 82 | \item \code{cell_nb_in_bandwidth} 83 | \item \code{cell_nb_dist} 84 | \item \code{current_cell_type_assignment} 85 | \item \code{anchor_cell_type_assignment} 86 | \item \code{starting_cell_type_assignment} 87 | \item \code{current_scoring_matrix} 88 | \item \code{current_cell_prob} 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /man/FilterArtifactCells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{FilterArtifactCells} 4 | \alias{FilterArtifactCells} 5 | \title{FilterArtifactCells} 6 | \usage{ 7 | FilterArtifactCells( 8 | total_rounds, 9 | marker_exp_matrix, 10 | marker_exp_prob, 11 | current_cell_type_assignment, 12 | high_marker_threshold = 0.9, 13 | low_marker_threshold = 0.4 14 | ) 15 | } 16 | \arguments{ 17 | \item{total_rounds}{the maximum round value} 18 | 19 | \item{marker_exp_matrix}{transformed protein marker expression (or original 20 | segmentation protein marker expression if transformation is not specified)} 21 | 22 | \item{marker_exp_prob}{the marker expression probability for each cell} 23 | 24 | \item{current_cell_type_assignment}{the cell type assignments for each round 25 | for each cell} 26 | 27 | \item{high_marker_threshold}{upper bound used to filter out questionable 28 | cells} 29 | 30 | \item{low_marker_threshold}{lower bound used to filter out questionable 31 | cells} 32 | } 33 | \value{ 34 | current cell type assignment, where a questionable cells are marked 35 | with a row of NAs. 36 | } 37 | \description{ 38 | Filter out cells that could potentially be artifacts 39 | } 40 | -------------------------------------------------------------------------------- /man/FilterCells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{FilterCells} 4 | \alias{FilterCells} 5 | \title{FilterCells} 6 | \usage{ 7 | FilterCells( 8 | celesta_obj, 9 | high_marker_threshold = 0.9, 10 | low_marker_threshold = 0.4 11 | ) 12 | } 13 | \arguments{ 14 | \item{celesta_obj}{an initialized Celesta object (provided by 15 | \code{CreateCelestaObject})} 16 | 17 | \item{high_marker_threshold}{upper bound used to filter out questionable 18 | cells} 19 | 20 | \item{low_marker_threshold}{lower bound used to filter out questionable 21 | cells} 22 | } 23 | \value{ 24 | a Celesta object with questionable cells marked with NA 25 | } 26 | \description{ 27 | Filters out artifact cells from the cell type assignments 28 | } 29 | -------------------------------------------------------------------------------- /man/FindCellsToCheck.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{FindCellsToCheck} 4 | \alias{FindCellsToCheck} 5 | \title{FindCellsToCheck} 6 | \usage{ 7 | FindCellsToCheck(current_cell_type_assignment, lineage_info, cell_ID, round) 8 | } 9 | \arguments{ 10 | \item{current_cell_type_assignment}{the current cell type assignments 11 | (number_cells x total_rounds)} 12 | 13 | \item{lineage_info}{the lineage information from \code{prior_info} parsed into 14 | round, previous cell type, and cell type number columns} 15 | 16 | \item{cell_ID}{the IDs of the cells (from 1 to the total number of cells)} 17 | } 18 | \value{ 19 | the IDs of unassigned cells 20 | } 21 | \description{ 22 | Find unassigned cells 23 | } 24 | -------------------------------------------------------------------------------- /man/FindCellsWithId.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{FindCellsWithId} 4 | \alias{FindCellsWithId} 5 | \title{FindCellsWithId} 6 | \usage{ 7 | FindCellsWithId(current_cell_type_assignment, lineage_info, cell_ID, round) 8 | } 9 | \arguments{ 10 | \item{current_cell_type_assignment}{the current cell type assignments 11 | (number_cells x total_rounds)} 12 | 13 | \item{lineage_info}{the lineage information from \code{prior_info} parsed into 14 | round, previous cell type, and cell type number columns} 15 | 16 | \item{cell_ID}{the IDs of the cells (from 1 to the total number of cells)} 17 | 18 | \item{round}{the current round} 19 | } 20 | \value{ 21 | cells that have been assigned a cell type 22 | } 23 | \description{ 24 | Find cells assigned with a cell type 25 | } 26 | -------------------------------------------------------------------------------- /man/FitGmmModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{FitGmmModel} 4 | \alias{FitGmmModel} 5 | \title{FitGmmModel} 6 | \usage{ 7 | FitGmmModel(marker_exp, marker_name, figure = FALSE) 8 | } 9 | \arguments{ 10 | \item{marker_exp}{the expression of the marker for each cell} 11 | 12 | \item{marker_name}{the name of the marker} 13 | 14 | \item{figure}{whether a figure should be generated or not} 15 | } 16 | \value{ 17 | the Gaussian mixture model parameters for the marker 18 | } 19 | \description{ 20 | Fits a Gaussian mixture model for each marker 21 | } 22 | -------------------------------------------------------------------------------- /man/GetCoords.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{GetCoords} 4 | \alias{GetCoords} 5 | \title{GetCoords} 6 | \usage{ 7 | GetCoords(imaging_data_file) 8 | } 9 | \arguments{ 10 | \item{imaging_data_file}{segmented imaging data. 11 | The first column must contain the cell types to be inferred. The second 12 | column must contain the lineage information with the following format 13 | (without spaces): # _ # _ #. 14 | \itemize{ 15 | \item The first number indicates round. Cell types with the same lineage level 16 | are inferred at the same round. An higher number indicates higher cell-type 17 | resolution. For example, immune cells -> CD3+ T cells -> CD4+ T cells. 18 | \item The second number indicates the previous lineage cell type number for the 19 | current cell type. For example, the second number for CD3+ T cell is 5 20 | because it is a subtype of immune cells which have cell type number 5. 21 | \item The third number is a number assigned to the cell type 22 | (i.e. cell type number). 23 | } 24 | 25 | The third column and beyond are columns for protein markers. 26 | \itemize{ 27 | \item If a protein marker is known to be expressed for that cell type, then it 28 | is denoted by a "1". 29 | \item If a protein marker is known to not express for a cell type, then it is 30 | denoted by a "0". 31 | \item If the protein marker is irrelevant or uncertain to express for a cell 32 | type, then it is denoted by "NA". 33 | }} 34 | } 35 | \value{ 36 | the x, y coordinates of each cell 37 | } 38 | \description{ 39 | Gets the x, y coordinates of each cell 40 | } 41 | -------------------------------------------------------------------------------- /man/GetDistFromNearestAssignedCells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{GetDistFromNearestAssignedCells} 4 | \alias{GetDistFromNearestAssignedCells} 5 | \title{GetDistFromNearestAssignedCells} 6 | \usage{ 7 | GetDistFromNearestAssignedCells( 8 | cell_nb_in_bandwidth, 9 | cell_nb_dist, 10 | current_cell_type_assignment, 11 | cell_type_num, 12 | unassigned_cells, 13 | assigned_cells, 14 | round 15 | ) 16 | } 17 | \arguments{ 18 | \item{cell_nb_in_bandwidth}{the cells located within a bandwidth to cell \emph{c}} 19 | 20 | \item{cell_nb_dist}{the distance of each cell to cell \emph{c} within a bandwidth} 21 | 22 | \item{current_cell_type_assignment}{the current cell type assignments 23 | (number_cells x total_rounds)} 24 | 25 | \item{cell_type_num}{the cell types associated with the current round} 26 | 27 | \item{unassigned_cells}{cells not assigned a cell type for each round and 28 | iteration} 29 | 30 | \item{assigned_cells}{cells with an assigned cell type} 31 | 32 | \item{round}{the current round} 33 | } 34 | \value{ 35 | the distance to the nearest assigned cells 36 | } 37 | \description{ 38 | Get distance from nearest assigned cells 39 | } 40 | -------------------------------------------------------------------------------- /man/GetFinalInferredCellTypes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{GetFinalInferredCellTypes} 4 | \alias{GetFinalInferredCellTypes} 5 | \title{GetFinalInferredCellTypes} 6 | \usage{ 7 | GetFinalInferredCellTypes( 8 | project_name, 9 | total_rounds, 10 | current_cell_type_assignment, 11 | anchor_cell_type_assignment, 12 | prior_info, 13 | lineage_info, 14 | coords, 15 | original_exp, 16 | save_result = T 17 | ) 18 | } 19 | \arguments{ 20 | \item{total_rounds}{the maximum round} 21 | 22 | \item{current_cell_type_assignment}{the current cell type assignments 23 | (number_cells x total_rounds)} 24 | 25 | \item{anchor_cell_type_assignment}{the anchor cell type assignments} 26 | 27 | \item{prior_info}{user-defined cell-type signature matrix. 28 | 29 | The data should contain two columns (name X and Y) for the x, y coordinates 30 | and a column for each protein marker. Each row represents the data for a 31 | single cell, including its x, y coordinates and expression for each protein 32 | marker.} 33 | 34 | \item{lineage_info}{the lineage information from \code{prior_info} parsed into 35 | round, previous cell type, and cell type number columns} 36 | 37 | \item{coords}{the x, y coordinates of each cell} 38 | 39 | \item{original_exp}{original protein marker expression (containing only the 40 | protein markers specified in \code{prior_info})} 41 | 42 | \item{save_data}{whether or not to save the final cell type assignment 43 | and anchor cell assignment results} 44 | } 45 | \value{ 46 | the final cell type assignments 47 | } 48 | \description{ 49 | Get final cell types and writes two files: the final cell type 50 | assignments and the anchor cell type assignments. 51 | } 52 | -------------------------------------------------------------------------------- /man/GetInitialPriorMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{GetInitialPriorMatrix} 4 | \alias{GetInitialPriorMatrix} 5 | \title{GetInitialPriorMatrix} 6 | \usage{ 7 | GetInitialPriorMatrix(lineage_info, prior_marker_info, round) 8 | } 9 | \arguments{ 10 | \item{lineage_info}{the lineage information from \code{prior_info} parsed into 11 | round, previous cell type, and cell type number columns} 12 | 13 | \item{round}{the current round} 14 | } 15 | \value{ 16 | the prior knowledge of the cells types with the specified round. 17 | } 18 | \description{ 19 | Gets the prior knowledge of the cell types with the specified 20 | round. 21 | } 22 | -------------------------------------------------------------------------------- /man/GetMarkerExpMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{GetMarkerExpMatrix} 4 | \alias{GetMarkerExpMatrix} 5 | \title{GetMarkerExpMatrix} 6 | \usage{ 7 | GetMarkerExpMatrix( 8 | prior_marker_info, 9 | imaging_data_file, 10 | cofactor, 11 | transform_type 12 | ) 13 | } 14 | \arguments{ 15 | \item{prior_marker_info}{user-defined cell-type signature matrix. 16 | 17 | The data should contain two columns (name X and Y) for the x, y coordinates 18 | and a column for each protein marker. Each row represents the data for a 19 | single cell, including its x, y coordinates and expression for each protein 20 | marker.} 21 | 22 | \item{imaging_data_file}{segmented imaging data. 23 | The first column must contain the cell types to be inferred. The second 24 | column must contain the lineage information with the following format 25 | (without spaces): # _ # _ #. 26 | \itemize{ 27 | \item The first number indicates round. Cell types with the same lineage level 28 | are inferred at the same round. An higher number indicates higher cell-type 29 | resolution. For example, immune cells -> CD3+ T cells -> CD4+ T cells. 30 | \item The second number indicates the previous lineage cell type number for the 31 | current cell type. For example, the second number for CD3+ T cell is 5 32 | because it is a subtype of immune cells which have cell type number 5. 33 | \item The third number is a number assigned to the cell type 34 | (i.e. cell type number). 35 | } 36 | 37 | The third column and beyond are columns for protein markers. 38 | \itemize{ 39 | \item If a protein marker is known to be expressed for that cell type, then it 40 | is denoted by a "1". 41 | \item If a protein marker is known to not express for a cell type, then it is 42 | denoted by a "0". 43 | \item If the protein marker is irrelevant or uncertain to express for a cell 44 | type, then it is denoted by "NA". 45 | }} 46 | 47 | \item{cofactor}{used to calculate the arcsinh transform on the protein marker 48 | expressions} 49 | 50 | \item{transform_type}{indicates a transform type for the protein marker 51 | expressions (0 = no transform, 1 = arcsinh transform)} 52 | } 53 | \value{ 54 | a list with the following information: 55 | \describe{ 56 | \item{\code{cell_ids}}{the IDs of the cells} 57 | \item{\code{original_exp}}{the original expression matrix (containing only the 58 | protein markers specified by \code{prior_marker_info})} 59 | \item{\code{marker_exp_matrix} or \code{original_exp}}{the transformed expression 60 | matrix (or original expression matrix if a transform is not specified)} 61 | } 62 | } 63 | \description{ 64 | Gets the protein marker expressions and assigns each cell a 65 | cell ID. 66 | 67 | Only protein markers specified in \code{prior_marker_info} are extracted from the 68 | \code{imaging_data_file}. Cells are assigned IDs from 1 to the total number of 69 | cells. If \code{transform_type = 1}, then an arcsinh transform is applied to the 70 | protein marker expressions. 71 | } 72 | -------------------------------------------------------------------------------- /man/GetNeighborInfo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{GetNeighborInfo} 4 | \alias{GetNeighborInfo} 5 | \title{GetNeighborInfo} 6 | \usage{ 7 | GetNeighborInfo(coords, number_of_neighbors = 5, bandwidth = 100) 8 | } 9 | \arguments{ 10 | \item{coords}{the x, y coordinates of each cell} 11 | 12 | \item{number_of_neighbors}{the number of cells in a single neighborhood} 13 | 14 | \item{bandwidth}{the upper distance bound used when calculating neighborhoods 15 | by distance} 16 | } 17 | \value{ 18 | a list of the following information 19 | \describe{ 20 | \item{\code{nb_list}}{the list of N-nearest neighbors} 21 | \item{\code{all_cell_nb_in_bandwidth}}{the cells located within a bandwidth to 22 | cell \emph{c}} 23 | \item{\code{cell_nb_dist}}{the distance of each cell to cell \emph{c} within a 24 | bandwidth} 25 | } 26 | } 27 | \description{ 28 | Gets the neighborhood information, including neighborhoods by 29 | number and distance. 30 | } 31 | -------------------------------------------------------------------------------- /man/GetPriorInfo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{GetPriorInfo} 4 | \alias{GetPriorInfo} 5 | \title{GetPriorInfo} 6 | \usage{ 7 | GetPriorInfo(prior_marker_info) 8 | } 9 | \arguments{ 10 | \item{prior_marker_info}{user-defined cell-type signature matrix. 11 | 12 | The data should contain two columns (name X and Y) for the x, y coordinates 13 | and a column for each protein marker. Each row represents the data for a 14 | single cell, including its x, y coordinates and expression for each protein 15 | marker.} 16 | } 17 | \value{ 18 | a list with the following information: 19 | \describe{ 20 | \item{\code{lineage_info}}{the lineage information parsed into round, previous 21 | cell type, and cell type number columns} 22 | \item{\code{total_rounds}}{the maximum round value} 23 | } 24 | } 25 | \description{ 26 | Extracts the lineage information from the \code{prior_marker_info} 27 | and determines the total rounds 28 | } 29 | -------------------------------------------------------------------------------- /man/GetScore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{GetScore} 4 | \alias{GetScore} 5 | \title{GetScore} 6 | \usage{ 7 | GetScore(activation_prob_to_use, prior_info, non_NA_index) 8 | } 9 | \arguments{ 10 | \item{activation_prob_to_use}{the marker expression probabilities of the 11 | unassigned cells} 12 | 13 | \item{prior_info}{user-defined cell-type signature matrix. 14 | 15 | The data should contain two columns (name X and Y) for the x, y coordinates 16 | and a column for each protein marker. Each row represents the data for a 17 | single cell, including its x, y coordinates and expression for each protein 18 | marker.} 19 | 20 | \item{non_NA_index}{the index of the columns in \code{current_pri_matrix} that do 21 | not contain NA for a particular cell} 22 | } 23 | \value{ 24 | the score of the cell 25 | } 26 | \description{ 27 | Calculate scores using MSE 28 | } 29 | -------------------------------------------------------------------------------- /man/InitializeCellAndScoringMatrices.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{InitializeCellAndScoringMatrices} 4 | \alias{InitializeCellAndScoringMatrices} 5 | \title{InitializeCellAndScoringMatrices} 6 | \usage{ 7 | InitializeCellAndScoringMatrices(lineage_info, marker_exp_matrix, prior_info) 8 | } 9 | \arguments{ 10 | \item{lineage_info}{the lineage information from \code{prior_info} parsed into 11 | round, previous cell type, and cell type number columns} 12 | 13 | \item{marker_exp_matrix}{transformed protein marker expression (or original 14 | segmentation protein marker expression if transformation is not specified)} 15 | 16 | \item{prior_info}{user-defined cell-type signature matrix. 17 | 18 | The data should contain two columns (name X and Y) for the x, y coordinates 19 | and a column for each protein marker. Each row represents the data for a 20 | single cell, including its x, y coordinates and expression for each protein 21 | marker.} 22 | } 23 | \value{ 24 | a list with the following information 25 | \describe{ 26 | \item{\code{current_cell_type_assignment}}{a zero matrix with dimension 27 | (number_cells x total_rounds)} 28 | \item{\code{current_scoring_matrix}}{a NA matrix with dimension 29 | (number_cells x number_cell_type)} 30 | \item{\code{current_cell_prob}}{a NA matrix with dimension 31 | (number_cells x number_cell_type)} 32 | } 33 | } 34 | \description{ 35 | Initialize the cell type assignments, cell probabilities, and 36 | scoring matrices 37 | } 38 | -------------------------------------------------------------------------------- /man/MarkQuestionableCells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{MarkQuestionableCells} 4 | \alias{MarkQuestionableCells} 5 | \title{MarkQuestionableCells} 6 | \usage{ 7 | MarkQuestionableCells( 8 | cell_activation_prob, 9 | high_marker_threshold, 10 | low_marker_threshold 11 | ) 12 | } 13 | \arguments{ 14 | \item{cell_activation_prob}{the protein marker expressions for a single cell} 15 | 16 | \item{high_marker_threshold}{upper bound used to filter out questionable 17 | cells} 18 | 19 | \item{low_marker_threshold}{lower bound used to filter out questionable 20 | cells} 21 | } 22 | \value{ 23 | whether a cell is questionable or not 24 | } 25 | \description{ 26 | Determine if a cell is questionable. 27 | 28 | A cell is questionable if \emph{all} of its protein marker expressions are below 29 | the \code{lower_marker_threshold} or above the \code{high_marker_threshold}. 30 | } 31 | -------------------------------------------------------------------------------- /man/NeighborCellType.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{NeighborCellType} 4 | \alias{NeighborCellType} 5 | \title{NeighborCellType} 6 | \usage{ 7 | NeighborCellType( 8 | nb_list, 9 | current_cell_type_assignment, 10 | cell_type_num, 11 | round, 12 | unassigned_cells 13 | ) 14 | } 15 | \arguments{ 16 | \item{nb_list}{the list of N-nearest neighbors} 17 | 18 | \item{current_cell_type_assignment}{the current cell type assignments 19 | (number_cells x total_rounds)} 20 | 21 | \item{cell_type_num}{the cell types associated with the current round} 22 | 23 | \item{round}{the current round} 24 | 25 | \item{unassigned_cells}{cells not assigned a cell type for each round and 26 | iteration} 27 | } 28 | \value{ 29 | the cell types of the neighbors of unassigned cells 30 | } 31 | \description{ 32 | Find the cell types of the neighbors of unassigned cells 33 | } 34 | -------------------------------------------------------------------------------- /man/PlotCellsAnyCombination.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{PlotCellsAnyCombination} 4 | \alias{PlotCellsAnyCombination} 5 | \title{PlotCellsAnyCombination} 6 | \usage{ 7 | PlotCellsAnyCombination( 8 | cell_type_assignment_to_plot, 9 | coords, 10 | prior_info, 11 | cell_number_to_use, 12 | cell_type_colors = c(palette()[2:7], "white"), 13 | test_size = 1, 14 | save_plot = TRUE, 15 | output_dir = "." 16 | ) 17 | } 18 | \arguments{ 19 | \item{cell_type_assignment_to_plot}{the final cell type assignment for each 20 | cell} 21 | 22 | \item{coords}{the x, y coordinates of each cell} 23 | 24 | \item{prior_info}{user-defined cell-type signature matrix. 25 | 26 | The data should contain two columns (name X and Y) for the x, y coordinates 27 | and a column for each protein marker. Each row represents the data for a 28 | single cell, including its x, y coordinates and expression for each protein 29 | marker.} 30 | 31 | \item{cell_number_to_use}{the row number of the cell types to plot from 32 | \code{prior_info}. To plot unknown cells, include 0 in the list.} 33 | 34 | \item{cell_type_colors}{the colors for the cell types. If unknown cells 35 | are plotted, the color of the unknown cells will be the last color listed.} 36 | 37 | \item{test_size}{the size of the points in the plot} 38 | 39 | \item{save_plot}{whether to save the plot} 40 | 41 | \item{output_dir}{the path to the directory to where the plot will be 42 | outputted. This defaults to the directory containing CELESTA_functions.R. 43 | Note that the directory must exist.} 44 | } 45 | \value{ 46 | writes the final cell type assignment plot 47 | } 48 | \description{ 49 | Plots the cells using x, y coordinates with their assigned cell 50 | types 51 | } 52 | -------------------------------------------------------------------------------- /man/PlotExpProb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{PlotExpProb} 4 | \alias{PlotExpProb} 5 | \title{PlotExpProb} 6 | \usage{ 7 | PlotExpProb( 8 | coords, 9 | marker_exp_prob, 10 | prior_marker_info, 11 | size_to_use = 1, 12 | width_to_use = 5, 13 | height_to_use = 4, 14 | save_plot = TRUE, 15 | output_dir = "." 16 | ) 17 | } 18 | \arguments{ 19 | \item{coords}{the x, y coordinates of each cell} 20 | 21 | \item{marker_exp_prob}{the marker expression probability for each cell} 22 | 23 | \item{prior_marker_info}{user-defined cell-type signature matrix. 24 | 25 | The data should contain two columns (name X and Y) for the x, y coordinates 26 | and a column for each protein marker. Each row represents the data for a 27 | single cell, including its x, y coordinates and expression for each protein 28 | marker.} 29 | 30 | \item{size_to_use}{the size of the points in the plot} 31 | 32 | \item{width_to_use}{the width of the plot} 33 | 34 | \item{height_to_use}{the height of the plot} 35 | 36 | \item{save_plot}{whether to save the plot} 37 | 38 | \item{output_dir}{the path to the directory to where the plot will be 39 | outputted. This defaults to the directory containing CELESTA_functions.R. 40 | Note that the directory must exist.} 41 | } 42 | \value{ 43 | writes a plot of the expression probabilities for each marker 44 | } 45 | \description{ 46 | Plots the expression probabilities of cells in the tissue 47 | } 48 | -------------------------------------------------------------------------------- /man/PlotSingleExpProb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{PlotSingleExpProb} 4 | \alias{PlotSingleExpProb} 5 | \title{PlotSingleExpProb} 6 | \usage{ 7 | PlotSingleExpProb( 8 | coords, 9 | marker_exp_prob, 10 | cols = NULL, 11 | marker_to_use, 12 | size_to_use = 1, 13 | width_to_use = 5, 14 | height_to_use = 4, 15 | save_plot = TRUE, 16 | output_dir = "." 17 | ) 18 | } 19 | \arguments{ 20 | \item{coords}{the x, y coordinates of each cell} 21 | 22 | \item{marker_exp_prob}{the marker expression probability for each cell} 23 | 24 | \item{cols}{the color palette for the plot} 25 | 26 | \item{marker_to_use}{marker to plot} 27 | 28 | \item{size_to_use}{the size of the points in the plot} 29 | 30 | \item{width_to_use}{the width of the plot} 31 | 32 | \item{height_to_use}{the height of the plot} 33 | 34 | \item{save_plot}{whether to save the plot} 35 | 36 | \item{output_dir}{the path to the directory to where the plot will be 37 | outputted. This defaults to the directory containing CELESTA_functions.R. 38 | Note that the directory must exist.} 39 | } 40 | \value{ 41 | generates a plot of the expression probabilities for a specified 42 | marker 43 | } 44 | \description{ 45 | Plots the expression probabilities of cells in the tissue. This 46 | is use soley for the Shiny app. 47 | } 48 | -------------------------------------------------------------------------------- /man/UpdatePriorMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CELESTA_functions.R 3 | \name{UpdatePriorMatrix} 4 | \alias{UpdatePriorMatrix} 5 | \title{UpdatePriorMatrix} 6 | \usage{ 7 | UpdatePriorMatrix( 8 | current_pri_matrix, 9 | initial_pri_matrix, 10 | current_cell_type_assignment, 11 | marker_exp_prob, 12 | round, 13 | cell_type_num 14 | ) 15 | } 16 | \arguments{ 17 | \item{current_pri_matrix}{the updated cell-type marker matrix} 18 | 19 | \item{initial_pri_matrix}{user defined cell-type marker matrix for a specific 20 | round} 21 | 22 | \item{current_cell_type_assignment}{the current cell type assignments 23 | (number_cells x total_rounds)} 24 | 25 | \item{marker_exp_prob}{the marker expression probability for each cell} 26 | 27 | \item{round}{the current round} 28 | 29 | \item{cell_type_num}{the cell types associated with the current round} 30 | } 31 | \value{ 32 | updates the prior knowledge matrix with information from cells 33 | assigned to each cell type 34 | } 35 | \description{ 36 | Updates prior knowledge matrix of the cell type signatures 37 | } 38 | -------------------------------------------------------------------------------- /man/figures/README-pressure-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plevritis-lab/CELESTA/e5e92ffcef5ccada8053c2e042f34c446b6b3565/man/figures/README-pressure-1.png -------------------------------------------------------------------------------- /tests/CELESTA_functions_orig.R: -------------------------------------------------------------------------------- 1 | ############################################################################################# 2 | #' Create CELESTA object 3 | #' @export 4 | Celesta <- setClass("Celesta", 5 | slots = c(marker_exp_matrix = "matrix", # normalized expression from segmentation 6 | original_exp ="matrix", # original expression from segmentation 7 | prior_info = "data.frame", # store information from cell-type prior knowledge 8 | cell_ID = "numeric", 9 | lineage_info = "data.frame", 10 | coords = "matrix", #coordinates 11 | marker_exp_prob = "matrix", # marker expression probability 12 | cell_prob = "matrix", # cell type probability for each cell 13 | final_cell_type_assignment = "matrix", 14 | project_name = "character", 15 | nb_list = "matrix", # N-nearest neighbor list 16 | total_rounds = "numeric", 17 | cell_nb_in_bandwidth = "ANY", #Cells locates within a bandwidth to cell i 18 | cell_nb_dist = "ANY", #The distance of each cell to cell i within a bandwidth 19 | current_scoring_matrix = "matrix", #scoring function 20 | initial_pri_matrix = "matrix", #user defined cell-type marker matrix 21 | current_pri_matrix = "matrix", #updated cell-type marker matrix 22 | current_cell_prob = "matrix", #cell probability for updates 23 | current_cell_type_assignment = "matrix", 24 | anchor_cell_type_assignment = "matrix", 25 | starting_cell_type_assignment = "matrix", 26 | current_beta = "matrix", 27 | dist_from_nearest_assigned_cell = "matrix", 28 | nb_cell_type = "ANY", #cell types of the neighboring cells for index cells 29 | all_cell_nb_cell_type = "ANY", # cell types of the neighboring cells for all cells 30 | unassigned_cells = "numeric", # store cells to check in each round and each iteration 31 | assigned_cells = "numeric")) # cells already assigned cell type 32 | 33 | ############################################################################################# 34 | ############################################################################################# 35 | #' Get protein marker expression 36 | #' @export 37 | GetMarkerExpMatrix <- function(CelestaObj,prior_marker_info,imaging_data_file, 38 | cofactor,transform_type){ 39 | markers_to_use <- colnames(prior_marker_info)[3:dim(prior_marker_info)[2]] 40 | matching_markers <- match(markers_to_use,colnames(imaging_data_file)) 41 | if(length(which(is.na(matching_markers)==TRUE))>0){ 42 | print("Please double check the protein markers in the cell-type marker matrix and 43 | imaging input file") 44 | }else{ 45 | if(transform_type==0){#no transform 46 | marker_exp_matrix <- data.matrix(imaging_data_file[,matching_markers]) 47 | CelestaObj@marker_exp_matrix <- marker_exp_matrix 48 | }else if(transform_type==1){#arcsinh 49 | marker_exp_matrix <- data.matrix(imaging_data_file[,matching_markers]) 50 | marker_exp_transformed <- asinh(marker_exp_matrix/cofactor) 51 | CelestaObj@marker_exp_matrix <- marker_exp_transformed 52 | } 53 | CelestaObj@original_exp <- data.matrix(imaging_data_file[,matching_markers]) 54 | cellIDs <- seq(1,dim(marker_exp_matrix)[1],by=1) 55 | CelestaObj@cell_ID <- cellIDs 56 | return (CelestaObj) 57 | } 58 | } 59 | ############################################################################################ 60 | #' Get prior knowledge on cell types 61 | #' @export 62 | GetPirorInfo <- function(CelestaObj,prior_marker_info){ 63 | CelestaObj@prior_info <- prior_marker_info 64 | lineage_column <- prior_marker_info[,2] 65 | if(grepl("_", lineage_column[1], fixed = TRUE)){ 66 | round <- integer() 67 | previous_cell_type <- integer() 68 | cell_type_number <- integer() 69 | for(i in 1:dim(prior_marker_info)[1]){ 70 | info <- strtoi(unlist(strsplit(prior_marker_info[i,2],"_"))) 71 | round[i] <- info[1] 72 | previous_cell_type[i] <- info[2] 73 | cell_type_number[i] <- info[3] 74 | } 75 | CelestaObj@lineage_info <- data.frame(Round=round, 76 | Previous_cell_type=previous_cell_type, 77 | Cell_type_number=cell_type_number) 78 | total_rounds <- max(CelestaObj@lineage_info$Round) 79 | CelestaObj@total_rounds <- total_rounds 80 | }else{ 81 | print("Warning:the lineage information column has formatting errors") 82 | } 83 | return(CelestaObj) 84 | } 85 | ############################################################################################# 86 | #' Get coordinates 87 | #' @export 88 | GetCoords <- function(CelestaObj,imaging_data_file){ 89 | Coords <- cbind(imaging_data_file$X, 90 | imaging_data_file$Y) 91 | colnames(Coords) <- c("X","Y") 92 | CelestaObj@coords <- Coords 93 | return(CelestaObj) 94 | } 95 | ############################################################################################ 96 | #' Gaussian mixture model for each marker 97 | #' @export 98 | GMM_fitting <- function(marker_exp,marker_name,figure=FALSE){ 99 | print("Marker:") 100 | print(marker_name) 101 | GMM_marker_param <- matrix(nrow=3,ncol=2) 102 | set.seed(1) 103 | zero_indices <- which(marker_exp==0) 104 | zero_percentage <- length(zero_indices)/length(marker_exp) 105 | if(zero_percentage > 0.1 & zero_percentage<0.2){ 106 | print("Warning: The marker expression potentially has too many zeros for fitting. 107 | GMM fitting will use input expression data with reduced sparsity") 108 | num_of_indices_to_remove <- floor(length(marker_exp)*(zero_percentage)) 109 | marker_exp <- marker_exp[-zero_indices[1:num_of_indices_to_remove]] 110 | xxx <- mixmodCluster(marker_exp,2, 111 | models=mixmodGaussianModel(family="general", 112 | listModels = "Gaussian_p_Lk_Ck", 113 | free.proportions = FALSE,equal.proportions = TRUE)) 114 | ### Check the models information for the Gaussian models, which shows which parameters are constrained. 115 | ### Want equal proportions of the two Gaussians 116 | GMM_marker_param[1,] <- xxx@results[[1]]@parameters@proportions 117 | GMM_marker_param[2,] <- xxx@results[[1]]@parameters@mean[,1] 118 | GMM_marker_param[3,1] <- xxx@results[[1]]@parameters@variance[[1]][,1] 119 | GMM_marker_param[3,2] <- xxx@results[[1]]@parameters@variance[[2]][,1] 120 | }else if(zero_percentage >= 0.2 & zero_percentage<0.5){ 121 | print("Warning: The marker expression potentially has too many zeros for fitting. 122 | GMM fitting will use input expression data with reduced sparsity") 123 | num_of_indices_to_remove <- floor(length(marker_exp)*(zero_percentage - 0.05)) 124 | marker_exp <- marker_exp[-zero_indices[1:num_of_indices_to_remove]] 125 | xxx <- mixmodCluster(marker_exp,2, 126 | models=mixmodGaussianModel(family="general", 127 | listModels = "Gaussian_p_Lk_Ck", 128 | free.proportions = FALSE,equal.proportions = TRUE)) 129 | ### Check the models information for the Gaussian models, which shows which parameters are constrained. 130 | ### Want equal proportions of the two Gaussians 131 | GMM_marker_param[1,] <- xxx@results[[1]]@parameters@proportions 132 | GMM_marker_param[2,] <- xxx@results[[1]]@parameters@mean[,1] 133 | GMM_marker_param[3,1] <- xxx@results[[1]]@parameters@variance[[1]][,1] 134 | GMM_marker_param[3,2] <- xxx@results[[1]]@parameters@variance[[2]][,1] 135 | }else if(zero_percentage>=0.5 & zero_percentage<=0.9){ 136 | print("Warning: The marker expression potentially has too many zeros for fitting. 137 | GMM fitting will use input expression data with reduced sparsity") 138 | num_of_indices_to_remove <- ceiling(length(marker_exp)*(zero_percentage-0.02)) 139 | marker_exp <- marker_exp[-zero_indices[1:num_of_indices_to_remove]] 140 | xxx <- mixmodCluster(marker_exp,2, 141 | models=mixmodGaussianModel(family="general", 142 | listModels = "Gaussian_p_Lk_Ck", 143 | free.proportions = FALSE,equal.proportions = TRUE)) 144 | ### Check the models information for the Gaussian models, which shows which parameters are constrained. 145 | ### Want equal proportions of the two Gaussians 146 | GMM_marker_param[1,] <- xxx@results[[1]]@parameters@proportions 147 | GMM_marker_param[2,] <- xxx@results[[1]]@parameters@mean[,1] 148 | GMM_marker_param[3,1] <- xxx@results[[1]]@parameters@variance[[1]][,1] 149 | GMM_marker_param[3,2] <- xxx@results[[1]]@parameters@variance[[2]][,1] 150 | }else if(zero_percentage>=0.9){ 151 | print("Warning: The marker expression potentially has too many zeros for fitting. 152 | GMM fitting will use input expression data with reduced sparsity") 153 | marker_exp <- marker_exp[-zero_indices] 154 | xxx <- mixmodCluster(marker_exp,2, 155 | models=mixmodGaussianModel(family="general", 156 | listModels = "Gaussian_p_Lk_Ck", 157 | free.proportions = FALSE,equal.proportions = TRUE)) 158 | ### Check the models information for the Gaussian models, which shows which parameters are constrained. 159 | ### Want equal proportions of the two Gaussians 160 | GMM_marker_param[1,] <- xxx@results[[1]]@parameters@proportions 161 | GMM_marker_param[2,] <- xxx@results[[1]]@parameters@mean[,1] 162 | GMM_marker_param[3,1] <- xxx@results[[1]]@parameters@variance[[1]][,1] 163 | GMM_marker_param[3,2] <- xxx@results[[1]]@parameters@variance[[2]][,1] 164 | }else{ 165 | xxx <- mixmodCluster(marker_exp,2, 166 | models=mixmodGaussianModel(family="general", 167 | listModels = "Gaussian_p_Lk_Ck", 168 | free.proportions = FALSE,equal.proportions = TRUE)) 169 | ### Check the models information for the Gaussian models, which shows which parameters are constrained. 170 | ### Want equal proportions of the two Gaussians 171 | GMM_marker_param[1,] <- xxx@results[[1]]@parameters@proportions 172 | GMM_marker_param[2,] <- xxx@results[[1]]@parameters@mean[,1] 173 | GMM_marker_param[3,1] <- xxx@results[[1]]@parameters@variance[[1]][,1] 174 | GMM_marker_param[3,2] <- xxx@results[[1]]@parameters@variance[[2]][,1] 175 | } 176 | #print(GMM_marker_param) 177 | if(figure == TRUE){ 178 | bin_size <- 20 179 | filename <- paste0(marker_name,"_GMM.png") 180 | png(filename,width = 5.5, height = 6.5,units = 'in',res = 300) 181 | h<-hist(marker_exp,breaks=bin_size,xlab="Marker expression",main=paste0("Histogram for ",marker_name)) 182 | highestCount <- max(h$counts) 183 | multiplier <- h$counts/h$density 184 | xfit <- seq(min(marker_exp),max(marker_exp),length=length(h$breaks)) 185 | yfit1 <- dnorm(xfit,mean=GMM_marker_param[2,1],sd=sqrt(GMM_marker_param[3,1]))*multiplier[1] 186 | lines(xfit, yfit1, col="blue", lwd=2) 187 | yfit2 <- dnorm(xfit,mean=GMM_marker_param[2,2],sd=sqrt(GMM_marker_param[3,2]))*multiplier[1] 188 | lines(xfit, yfit2, col="red", lwd=2) 189 | dev.off() 190 | } 191 | return(GMM_marker_param) 192 | } 193 | ############################################################################################# 194 | #' Build sigmoid function for calculation of expression probability 195 | #' @export 196 | build_sigmoid_function <- function(marker_exp_matrix,figure=FALSE){ 197 | sigmoid_function_parameter <- matrix(nrow=2,ncol=dim(marker_exp_matrix)[2]) 198 | ### For each marker, fit GMM 199 | for(i in 1:dim(marker_exp_matrix)[2]){ 200 | marker_exp <- marker_exp_matrix[,i] 201 | marker_name <- colnames(marker_exp_matrix)[i] 202 | if(typeof(marker_name) != "character"){ 203 | print("Protein marker name in the marker expression matrix has potential problem.") 204 | }else{ 205 | marker_GMM_model <- GMM_fitting(marker_exp,marker_name,figure) 206 | weight <- marker_GMM_model[1,] 207 | mus <- marker_GMM_model[2,] 208 | sigmas <- marker_GMM_model[3,] 209 | 210 | if(mus[1] > mus[2]){ # first Gaussian model is for marker expressed, second is for marker not expressed 211 | a <- (-0.5 / sigmas[2] + 0.5 /sigmas[1]) 212 | b <- mus[2] / sigmas[2] - mus[1] / sigmas[1] 213 | c <- 0.5 * (-mus[2]^2 / sigmas[2] + mus[1]^2 / sigmas[1]) + log(weight[2] / weight[1]) + 0.5 * log(sigmas[1] / sigmas[2]) 214 | xroot <- (-b - sqrt(b^2 - 4.0 * a * c) ) / (2.0 * a) 215 | #slope <- 0.5 * (xroot - mus[2]) / sigmas[2] - 0.5 * (xroot - mus[1]) / sigmas[1] 216 | slope <- 1 217 | }else{# second Gaussian model is for marker expressed, first is for marker not expressed 218 | a <- (-0.5 / sigmas[1] + 0.5 /sigmas[2]) 219 | b <- mus[1] / sigmas[1] - mus[2] / sigmas[2] 220 | c <- 0.5 * (-mus[1]^2 / sigmas[1] + mus[2]^2 / sigmas[2]) + log(weight[1] / weight[2]) + 0.5 * log(sigmas[2] / sigmas[1]) 221 | xroot <- (-b - sqrt(b^2 - 4.0 * a * c) ) / (2.0 * a) 222 | #slope <- 0.5 * (xroot - mus[1]) / sigmas[1] - 0.5 * (xroot - mus[2]) / sigmas[2] 223 | slope <- 1 224 | } 225 | if(figure==TRUE){ 226 | filename <- paste0(marker_name,"_sigmoid.png") 227 | ### plot sigmoid function 228 | exp_term <- exp(slope*(marker_exp-xroot)) 229 | yyy <- exp_term/(1+exp_term) 230 | yyy <- (yyy-min(yyy))/(max(yyy)-min(yyy)) 231 | png(filename,width = 4.5, height = 4.5,units = 'in',res = 300) 232 | plot(marker_exp, yyy, col = "darkblue", 233 | xlab = "", ylab = "", main = paste0(marker_name," sigmoid function")) 234 | grid() 235 | dev.off() 236 | } 237 | sigmoid_function_parameter[1,i] <- xroot 238 | sigmoid_function_parameter[2,i] <- slope 239 | } 240 | } 241 | return(sigmoid_function_parameter) 242 | } 243 | ############################################################################################# 244 | #' Calculate expression probability for each marker in the prior matrix 245 | #' @export 246 | marker_exp_probability <- function(CelestaObj,figure=FALSE){ 247 | ### Fit GMM model and get parameters for the activation probabilities 248 | marker_exp_matrix <- CelestaObj@marker_exp_matrix 249 | sigmoid_function_parameter <- build_sigmoid_function(marker_exp_matrix,figure) 250 | ### Marker activation probability matrix 251 | marker_exp_prob <- matrix(nrow=dim(marker_exp_matrix)[1],ncol=dim(marker_exp_matrix)[2]) 252 | colnames(marker_exp_prob) <- colnames(marker_exp_matrix) 253 | 254 | for(i in 1:dim(marker_exp_matrix)[2]){ 255 | exp_term <- exp(sigmoid_function_parameter[2,i]*(marker_exp_matrix[,i]-sigmoid_function_parameter[1,i])) 256 | y = exp_term/(1+exp_term) 257 | marker_exp_prob[,i] <- (y-min(y))/(max(y)-min(y)) 258 | } 259 | CelestaObj@marker_exp_prob <- marker_exp_prob 260 | return(CelestaObj) 261 | } 262 | ############################################################################################# 263 | #' Get neighborhood informtion 264 | #' @export 265 | GetNeighborInfo <- function(CelestaObj,number_of_neighbors=5,bandwidth=100){ 266 | coords <- CelestaObj@coords 267 | print("Get nearest neighbors.") 268 | xxx <- knearneigh(coords,k=number_of_neighbors) 269 | nb_list <- xxx$nn 270 | colnames(nb_list) <- paste0("neighbor",seq(1,number_of_neighbors,by=1)) 271 | ### Identify N-nearest neighbors for each cell 272 | CelestaObj@nb_list <- nb_list 273 | ### Identify cells within a circle bandwidth 274 | print("Identify neighboring cells within a defined bandwidth.") 275 | all_cell_nb_in_bandwidth <- dnearneigh(coords, 0, bandwidth, longlat = NULL) 276 | CelestaObj@cell_nb_in_bandwidth <- all_cell_nb_in_bandwidth 277 | ### Identify distances for all the cells within the circle 278 | CelestaObj@cell_nb_dist <- nbdists(all_cell_nb_in_bandwidth, coords) 279 | return(CelestaObj) 280 | } 281 | ############################################################################## 282 | #' Initialize the celesta object 283 | #' @export 284 | initialize_object <- function(CelestaObj){ 285 | total_rounds <- max(CelestaObj@lineage_info$Round) 286 | current_cell_type_assignment <- matrix(0L,nrow =dim(CelestaObj@marker_exp_matrix)[1], 287 | ncol=total_rounds) 288 | CelestaObj@current_cell_type_assignment <- current_cell_type_assignment 289 | CelestaObj@anchor_cell_type_assignment <- current_cell_type_assignment 290 | CelestaObj@starting_cell_type_assignment <- current_cell_type_assignment 291 | 292 | current_scoring_matrix <- matrix(nrow=dim(CelestaObj@marker_exp_matrix)[1], 293 | ncol = dim(CelestaObj@prior_info)[1]) 294 | colnames(current_scoring_matrix) <- CelestaObj@prior_info[,1] 295 | CelestaObj@current_scoring_matrix <- current_scoring_matrix 296 | 297 | current_cell_prob <- matrix(nrow=dim(CelestaObj@marker_exp_matrix)[1], 298 | ncol = dim(CelestaObj@prior_info)[1]) 299 | colnames(current_cell_prob) <- CelestaObj@prior_info[,1] 300 | CelestaObj@current_cell_prob <- current_cell_prob 301 | return(CelestaObj) 302 | } 303 | ############################################################################################# 304 | #' Create CELESTA object 305 | #' @export 306 | CreateCELESTAobj <- function(project_title="Project",prior_marker_info,imaging_data_file, 307 | cofactor=10,transform_type=1, 308 | number_of_neighbors=5,bandwidth=100){ 309 | CelestaObj <- Celesta(project_name = project_title) 310 | ### Get protein marker expressions and cell IDs 311 | CelestaObj <- GetMarkerExpMatrix(CelestaObj,prior_marker_info,imaging_data_file,cofactor=10, 312 | transform_type = transform_type) 313 | ### Get user-defined prior knowledge matrix and cell lineage information 314 | CelestaObj <- GetPirorInfo(CelestaObj,prior_marker_info) 315 | ### Get coordinates 316 | CelestaObj <- GetCoords(CelestaObj,imaging_data_file) 317 | ### Convert marker expressions to marker activation probability 318 | CelestaObj <- marker_exp_probability(CelestaObj) 319 | ### Get neighboring cell information 320 | #CelestaObj <- GetNeighborInfo(CelestaObj,number_of_neighbors=5,bandwidth=100) 321 | CelestaObj <- GetNeighborInfo(CelestaObj) 322 | #Initialize the matrices for scoring function and prob matrix 323 | CelestaObj <- initialize_object(CelestaObj) 324 | return(CelestaObj) 325 | } 326 | ############################################################################################# 327 | ############################################################################################# 328 | #' Filter cells that could potentially be artifacts 329 | #' @export 330 | cell_filtering <- function(high_marker_threshold=0.9, low_marker_threshold=0.4, 331 | CelestaObj){ 332 | ### Filter out cells that have marker expressions all high or all low 333 | total_rounds <- CelestaObj@total_rounds 334 | number_of_marker <- dim(CelestaObj@initial_pri_matrix)[2] 335 | for(i in 1:dim(CelestaObj@marker_exp_matrix)[1]){ 336 | cell_activation_prob <- CelestaObj@marker_exp_prob[i,] 337 | if(MarkQuestionableCells(cell_activation_prob,high_marker_threshold,low_marker_threshold)){ 338 | CelestaObj@current_cell_type_assignment[i,1:total_rounds] <- rep(NA,total_rounds) 339 | }else{ 340 | } 341 | } 342 | CelestaObj@starting_cell_type_assignment <- CelestaObj@current_cell_type_assignment 343 | return(CelestaObj) 344 | } 345 | ################################################################################################ 346 | #' Mark questionable cells 347 | #' @export 348 | MarkQuestionableCells <- function(cell_activation_prob,high_marker_threshold,low_marker_threshold){ 349 | number_of_marker <- length(cell_activation_prob) 350 | number_of_low_markers <- length(which(cell_activation_probhigh_marker_threshold)) 352 | if(number_of_low_markers==number_of_marker | number_of_high_markers==number_of_marker){ 353 | return(TRUE) 354 | }else{ 355 | return(FALSE) 356 | } 357 | } 358 | ############################################################################################# 359 | #' For each round, need to get the prior knowledge on the cell types 360 | #' @export 361 | get_initial_prior_matrix <- function(CelestaObj,round){ 362 | lineage_info <- CelestaObj@lineage_info 363 | cell_type_num <- lineage_info$Cell_type_number[which(lineage_info$Round==round)] 364 | initial_pri_matrix <- data.matrix(prior_marker_info[which(lineage_info$Round==round), 365 | 3:dim(prior_marker_info)[2]]) 366 | CelestaObj@initial_pri_matrix <- initial_pri_matrix 367 | CelestaObj@current_pri_matrix <- initial_pri_matrix 368 | return(CelestaObj) 369 | } 370 | ################################################################################################ 371 | ################################################################################################ 372 | #' Find cells to check 373 | #' @export 374 | find_unassigned_cells <- function(CelestaObj,round){ 375 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment 376 | lineage_info <- CelestaObj@lineage_info 377 | cell_ID <- CelestaObj@cell_ID 378 | if(round == 1){ 379 | unassigned_cells <- cell_ID[which(current_cell_type_assignment[,round] == 0)] 380 | }else{ 381 | previous_level_type <- unique(lineage_info$Previous_cell_type[which(lineage_info$Round==round)]) 382 | previous_level_round <- lineage_info$Round[which(lineage_info$Cell_type_number==previous_level_type)] 383 | unassigned_cells <- cell_ID[which(current_cell_type_assignment[,round] == 0 & 384 | (current_cell_type_assignment[,previous_level_round]==previous_level_type))] 385 | } 386 | return(unassigned_cells) 387 | } 388 | ################################################################################################ 389 | #' Find cells with ID assigned 390 | #' @export 391 | find_assigned_cells <- function(CelestaObj,round){ 392 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment 393 | lineage_info <- CelestaObj@lineage_info 394 | cell_ID <- CelestaObj@cell_ID 395 | if(round == 1){ 396 | assigned_cells <- cell_ID[which(current_cell_type_assignment[,round] != 0 & 397 | is.na(current_cell_type_assignment[,round])==FALSE)] 398 | }else{ 399 | previous_level_type <- unique(lineage_info$Previous_cell_type[which(lineage_info$Round==round)]) 400 | assigned_cells <- cell_ID[which(current_cell_type_assignment[,round] != 0 & 401 | is.na(current_cell_type_assignment[,round])==FALSE & 402 | (current_cell_type_assignment[,(round-1)]==previous_level_type))] 403 | } 404 | return(assigned_cells) 405 | } 406 | ################################################################################################ 407 | #' Calculate scores using MSE 408 | #' @export 409 | get_score <- function(activation_prob_to_use,prior_info,non_NA_index){ 410 | score <- apply(activation_prob_to_use[,non_NA_index],1,function(x) (1-sum((x-prior_info)^2)/length(x))) 411 | return(score) 412 | } 413 | ############################################################################################# 414 | #' Function for calculating scoring function 415 | #' @export 416 | scoring_function <- function(CelestaObj,round,unassigned_cells,cell_type_num){ 417 | marker_exp_prob <- CelestaObj@marker_exp_prob 418 | current_pri_matrix <- CelestaObj@current_pri_matrix 419 | current_scoring_matrix <- CelestaObj@current_scoring_matrix 420 | print("Start calculating the scoring function.") 421 | activation_prob_to_use <- marker_exp_prob[unassigned_cells,] 422 | for(i in 1:length(cell_type_num)){ 423 | non_NA_index <- which(!is.na(current_pri_matrix[i,])) 424 | prior_info <- current_pri_matrix[i,non_NA_index] 425 | current_scoring_matrix[unassigned_cells,cell_type_num[i]] <- get_score(activation_prob_to_use,prior_info,non_NA_index) 426 | } 427 | current_scoring_matrix[unassigned_cells,cell_type_num]<-t(apply(current_scoring_matrix[unassigned_cells,cell_type_num], 428 | 1,function(x) x/sum(x))) 429 | CelestaObj@current_scoring_matrix <- current_scoring_matrix 430 | return(CelestaObj) 431 | } 432 | ################################################################################################ 433 | ################################################################################################ 434 | #' Calculate probability differences 435 | #' @export 436 | find_min_prob_diff <- function(max.prob,max.prob_index,cell_prob_list,unassigned_cells){ 437 | ### max.prob, max.prob_index are calculated only on unassigned_cells 438 | ### but cell_prob_list has all the cells 439 | min_prob_diff <- numeric(length=length(unassigned_cells)) 440 | for(i in 1:length(unassigned_cells)){ 441 | min_prob_diff[i] <- min(max.prob[i]-cell_prob_list[unassigned_cells[i],-max.prob_index[i]]) 442 | } 443 | return(min_prob_diff) 444 | } 445 | ################################################################################################ 446 | #' Find the cell types based on the scores (anchor cell) or probabilities (index cell) 447 | #' @export 448 | cell_type <- function(CelestaObj,cell_type_num,unassigned_cells,round, 449 | min_difference=0,min_prob=0, 450 | high_marker_threshold,low_marker_threshold){ 451 | all_cell_prob <- CelestaObj@current_cell_prob 452 | initial_pri_matrix <- CelestaObj@initial_pri_matrix 453 | cell_prob_list <- all_cell_prob[,cell_type_num] 454 | cell_type_assignment <- CelestaObj@current_cell_type_assignment[,round] 455 | marker_exp_prob <- CelestaObj@marker_exp_prob 456 | max.prob_index <- apply(cell_prob_list[unassigned_cells,],1,which.max) 457 | max.prob <- apply(cell_prob_list[unassigned_cells,],1,max) 458 | min_prob_diff <- find_min_prob_diff(max.prob,max.prob_index,cell_prob_list,unassigned_cells) 459 | ### Find cells with cell type max probability > threshold and cell type probability difference > threshold 460 | ######################################## 461 | ### Indexing on unassigned_cells!!!!!!!! 462 | threshold_cells <- unassigned_cells[which(min_prob_diff > min_difference & max.prob > min_prob)] 463 | max.prob_index_thresholded <- max.prob_index[which(min_prob_diff > min_difference & max.prob > min_prob)] 464 | ######################################## 465 | for(i in 1:length(threshold_cells)){ 466 | cell_ID_to_check <- threshold_cells[i] 467 | high_marker_index <- which(initial_pri_matrix[max.prob_index_thresholded[i],]==1) 468 | low_marker_index <- which(initial_pri_matrix[max.prob_index_thresholded[i],]==0) 469 | threshold_index <- cell_type_num[max.prob_index_thresholded[i]] 470 | if(length(which(marker_exp_prob[cell_ID_to_check,high_marker_index]>=high_marker_threshold[threshold_index]))==length(high_marker_index) & 471 | length(which(marker_exp_prob[cell_ID_to_check,low_marker_index]<=low_marker_threshold[threshold_index]))==length(low_marker_index)){ 472 | cell_type_assignment[cell_ID_to_check] <- cell_type_num[max.prob_index_thresholded[i]] 473 | }else{ 474 | #cell_type_assignment[cell_ID_to_check] <- 0 475 | } 476 | } 477 | CelestaObj@current_cell_type_assignment[,round] <- cell_type_assignment 478 | return(CelestaObj) 479 | } 480 | ################################################################################################ 481 | #' Cell type count 482 | #' @export 483 | count_cell_type <- function(CelestaObj,cell_type_num,round){ 484 | cell_type_count <- matrix(nrow=(length(cell_type_num)),ncol=3) 485 | colnames(cell_type_count) <- c("cell_type_number","count","proportion") 486 | prior_marker_info <- CelestaObj@prior_info 487 | row.names(cell_type_count) <- prior_marker_info[cell_type_num,1] 488 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment 489 | cell_type_count[,1] <- cell_type_num 490 | total_cell_number <- dim(current_cell_type_assignment)[1] 491 | for(i in 1:length(cell_type_num)){ 492 | cell_type_count[i,2] <- length(which(current_cell_type_assignment[,round]==cell_type_num[i])) 493 | cell_type <- prior_marker_info[cell_type_num[i],1] 494 | if(cell_type_count[i,2]<1){ 495 | print(paste0("Too few cells identified for: ",cell_type)) 496 | print("Please consider relax threshold.") 497 | } 498 | } 499 | cell_type_count[,3] <- cell_type_count[,2]/total_cell_number 500 | return(cell_type_count) 501 | } 502 | ################################################################################################ 503 | # plot_cells_iteration <- function(CelestaObj,cell_number_to_use,round, 504 | # cell_type_colors,point_size=0.1,iteration,figure = FALSE){ 505 | # if(figure==TRUE){ 506 | # coords <- CelestaObj@coords 507 | # current_cell_type_assignment <- CelestaObj@current_cell_type_assignment[,round] 508 | # project_name <- CelestaObj@project_name 509 | # prior_marker_info <- CelestaObj@prior_info 510 | # cell_types <- prior_marker_info[,1] 511 | # x_min <- min(coords[,1]) 512 | # x_max <- max(coords[,1]) 513 | # y_min <- min(coords[,2]) 514 | # y_max <- max(coords[,2]) 515 | # range <- c(min(x_min,y_min),max(x_max,y_max)) 516 | # 517 | # filename <- paste0(project_name,paste0(paste0("Round_",round), 518 | # paste0("_Iteration_",paste0(iteration,".png")))) 519 | # cell_index <- integer() 520 | # cell_anno <- character() 521 | # count <- 0 522 | # for(i in 1:length(cell_number_to_use)){ 523 | # unassigned_cells <- which(current_cell_type_assignment == cell_number_to_use[i]) 524 | # cell_index[(count+1):(count+length(unassigned_cells))] <- unassigned_cells 525 | # cell_anno[(count+1):(count+length(unassigned_cells))] <- cell_types[cell_number_to_use[i]] 526 | # count <- count + length(unassigned_cells) 527 | # } 528 | # df_plot <- data.frame(x=coords[cell_index,1], 529 | # y=coords[cell_index,2], 530 | # cell_anno=cell_anno) 531 | # df_plot$cell_anno <- factor(df_plot$cell_anno,levels = c(cell_types[cell_number_to_use])) 532 | # color_plot <- cell_type_colors[cell_number_to_use] 533 | # 534 | # g<- ggplot(df_plot,aes(x=x,y=y,group=cell_anno))+geom_point(aes(color=cell_anno),size=point_size)+ 535 | # scale_color_manual(values=color_plot)+ 536 | # xlim(range[1],range[2])+ylim(range[1],range[2])+ 537 | # labs(main="")+theme(aspect.ratio = 1,panel.grid.major = element_blank(), 538 | # panel.grid.minor = element_blank(), 539 | # legend.title = element_blank(), 540 | # legend.text=element_text(size=12,face = "bold"), 541 | # panel.background = element_rect(fill = 'black'), 542 | # axis.line = element_line(colour = "black"), 543 | # axis.title.x=element_blank(), 544 | # axis.title.y=element_blank())+ 545 | # guides(colour = guide_legend(override.aes = list(size=10))) 546 | # ggsave(filename,plot=g,width = 16.5, height = 16,units = 'in',dpi = 300) 547 | # } 548 | # } 549 | ################################################################################################ 550 | #' Find the cell types of the neighbors for unassigned_cells 551 | #' @export 552 | neighbor_cell_type <- function(CelestaObj,cell_type_num,round,unassigned_cells){ 553 | ### Only has information for cells to check 554 | nb_list <- CelestaObj@nb_list 555 | cell_type_assignment <- CelestaObj@current_cell_type_assignment[,round] 556 | same_type_nb <- matrix(rep(list(),length(cell_type_num)*length(unassigned_cells)), 557 | nrow=length(unassigned_cells),ncol=length(cell_type_num)) 558 | row.names(same_type_nb) <- unassigned_cells 559 | colnames(same_type_nb) <- cell_type_num 560 | for(j in 1:length(unassigned_cells)){ 561 | current_cell_ID <- unassigned_cells[j] 562 | neighbors <- nb_list[current_cell_ID,] 563 | neighbor_types <- cell_type_assignment[neighbors] 564 | for(i in 1:length(cell_type_num)){ 565 | same_type_nb[j,i][[1]] <- neighbors[which(neighbor_types == cell_type_num[i])] 566 | } 567 | } 568 | CelestaObj@nb_cell_type <- same_type_nb 569 | return(CelestaObj) 570 | } 571 | ################################################################################################ 572 | ################################################################################################ 573 | #' Get distance from nearest assigned cells 574 | #' @export 575 | get_dist_from_nearest_assigned_cells <- function(CelestaObj,cell_type_num,unassigned_cells, 576 | assigned_cells,round){ 577 | print("Get distance from nearest assigned cells.") 578 | all_cell_nb_in_circle <- CelestaObj@cell_nb_in_bandwidth 579 | all_cell_nb_circle_dist <- CelestaObj@cell_nb_dist 580 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment 581 | dist_nearest_assigned_cell <- matrix(nrow = length(unassigned_cells), 582 | ncol = length(cell_type_num)) 583 | colnames(dist_nearest_assigned_cell) <- cell_type_num 584 | for(i in 1:dim(dist_nearest_assigned_cell)[1]){ 585 | cell_to_check <- unassigned_cells[i] 586 | matching <- match(all_cell_nb_in_circle[[cell_to_check]],assigned_cells) 587 | index <- matching[which(is.na(matching)==FALSE)] 588 | if(length(index)==0){ 589 | 590 | }else{ 591 | nb_cell_with_ID <- assigned_cells[index] 592 | nb_cell_type <- current_cell_type_assignment[nb_cell_with_ID,round] 593 | unique_nb_cell_type <- unique(nb_cell_type) 594 | nb_cell_dist <- all_cell_nb_circle_dist[[cell_to_check]][which(is.na(matching)==FALSE)] 595 | for(j in 1:length(unique_nb_cell_type)){ 596 | type_j <- which(nb_cell_type == unique_nb_cell_type[j]) 597 | dist_nearest_assigned_cell[i,which(cell_type_num==unique_nb_cell_type[j])] <- min(nb_cell_dist[type_j]) 598 | } 599 | } 600 | } 601 | CelestaObj@dist_from_nearest_assigned_cell <- dist_nearest_assigned_cell 602 | return(CelestaObj) 603 | } 604 | ############################################################################################# 605 | #' Function to calcualte beta 606 | #' @export 607 | calculate_beta <- function(CelestaObj,scale_factor=5,bandwidth=100){ 608 | dist_from_nearest_assigned_cell <- CelestaObj@dist_from_nearest_assigned_cell 609 | beta <- scale_factor*(1-dist_from_nearest_assigned_cell/bandwidth) 610 | beta[is.na(beta)] <- 0 611 | CelestaObj@current_beta <- beta 612 | return(CelestaObj) 613 | } 614 | ################################################################################################ 615 | #' Function to calculate probability for index cells 616 | #' @export 617 | cell_prob <- function(CelestaObj,cell_type_num,unassigned_cells,round){ 618 | # This function uses mean field estimation to calculate probability 619 | # For each cell, a probability is calculated for each cell type to check 620 | current_cell_prob_list <- CelestaObj@current_cell_prob[,cell_type_num] #all cells*cell_type_num 621 | u <- current_cell_prob_list #all cells 622 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment[,round] # all cells 623 | current_beta <- CelestaObj@current_beta #cells to check 624 | nb_cell_type <- CelestaObj@nb_cell_type #cells to check 625 | current_scoring_matrix <- CelestaObj@current_scoring_matrix # all cells*all cell types 626 | for(i in 1:length(unassigned_cells)){ 627 | cell_ID_to_check <- unassigned_cells[i] 628 | u_i <- numeric(length=length(cell_type_num)) 629 | number_of_nb <- lengths(nb_cell_type[i,]) 630 | for(j in 1:length(number_of_nb)){ 631 | current_same_type_nb <- unlist(nb_cell_type[i,j][[1]]) 632 | u_i[j] <- exp(current_scoring_matrix[cell_ID_to_check,cell_type_num[j]])* 633 | exp(current_beta[i,j]* 634 | sum(current_cell_prob_list[current_same_type_nb,j])) 635 | } 636 | u[cell_ID_to_check,] <- u_i/sum(u_i) 637 | } 638 | print("Cell probability updating done.") 639 | CelestaObj@current_cell_prob[,cell_type_num] <- u 640 | return(CelestaObj) 641 | } 642 | ################################################################################################ 643 | #' Function to update prior knowledge matrix of the cell type signatures 644 | #' @export 645 | update_prior_matrix <- function(CelestaObj,round,cell_type_num){ 646 | updated_prior_matrix <- CelestaObj@current_pri_matrix 647 | initial_pri_matrix_data <- CelestaObj@initial_pri_matrix 648 | current_pri_matrix_data <- CelestaObj@current_pri_matrix 649 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment 650 | all_marker_pro_matrix <- CelestaObj@marker_exp_prob 651 | for(i in 1:length(cell_type_num)){ ### for each cell type 652 | cell_type_to_check <- cell_type_num[i] 653 | for(j in 1:dim(current_pri_matrix_data)[2]){ 654 | if(is.na(initial_pri_matrix_data[i,j])==TRUE){ 655 | }else{ 656 | cells_of_current_cell_type <- which(current_cell_type_assignment[,round] == cell_type_to_check) 657 | updated_prior_matrix[i,j] <- (mean(all_marker_pro_matrix[cells_of_current_cell_type,j])+ 658 | initial_pri_matrix_data[i,j])/2 659 | } 660 | } 661 | } 662 | CelestaObj@current_pri_matrix <- updated_prior_matrix 663 | return(CelestaObj) 664 | } 665 | ############################################################################## 666 | ### For different rounds 667 | # plot_marker_exp <- function(CelestaObj,cell_type_colors=c(palette()[2:7],"white"), 668 | # cell_type_num,round){ 669 | # sample_name <- CelestaObj@project_name 670 | # current_cell_type_assignment <- CelestaObj@current_cell_type_assignment 671 | # marker_exp_matrix <- CelestaObj@marker_exp_matrix 672 | # plot_matrix <- matrix(0L,nrow=length(cell_type_num),ncol=dim(marker_exp_matrix)[2]) 673 | # row.names(plot_matrix) <- as.character(CelestaObj@prior_info[cell_type_num,1]) 674 | # colnames(plot_matrix) <- colnames(marker_exp_matrix) 675 | # for(i in 1:length(cell_type_num)){ 676 | # plot_matrix[i,] <- colMeans(marker_exp_matrix[which(current_cell_type_assignment[,round] == cell_type_num[i]),]) 677 | # } 678 | # df <- as.data.frame(cbind(row.names(plot_matrix),plot_matrix)) 679 | # colnames(df) <- c("cell_types",colnames(marker_exp_matrix)) 680 | # df.m <- melt(df, id.var = "cell_types") 681 | # 682 | # df.m$value <- as.numeric(df.m$value) 683 | # df.m$cell_types <- factor(df.m$cell_types,levels = row.names(plot_matrix)) 684 | # 685 | # filename <- paste0(sample_name,"_") 686 | # filename1 <- paste0(filename,round) 687 | # filename2 <- paste0(filename1,"_ave_marker_exp.png") 688 | # 689 | # g<-ggplot(df.m,aes(x=variable,y=value,group=cell_types,color=cell_types)) + geom_point() + geom_line() + 690 | # scale_color_manual(values=cell_type_colors[cell_type_num])+xlab("Marker")+ 691 | # ylab("Expression")+theme_bw()+ 692 | # theme(legend.title = element_blank())+ 693 | # theme(axis.text.x = element_text(angle = 80, hjust = 1,size=12,face="bold"), 694 | # legend.text=element_text(size=12,face = "bold"), 695 | # panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 696 | # ggsave(filename2,plot=g,width=13.5,height=9,units = 'in',dpi = 300) 697 | # } 698 | ################################################################################################ 699 | ################################################################################################ 700 | #' Get final results 701 | #' @export 702 | get_final_inferred_cell_types <- function(total_rounds,CelestaObj,imaging_data){ 703 | current_cell_type_assignment <- CelestaObj@current_cell_type_assignment 704 | anchor_cell_assignment <- CelestaObj@anchor_cell_type_assignment 705 | cell_type_name_assigned <- matrix(nrow=dim(current_cell_type_assignment),ncol=total_rounds) 706 | anchor_cell_type_name_assigned <- matrix(nrow=dim(current_cell_type_assignment),ncol=total_rounds) 707 | prior_marker_info <- CelestaObj@prior_info 708 | lineage_info <- CelestaObj@lineage_info 709 | final_cell_type_assignment <- rep(0,length=dim(current_cell_type_assignment)[1]) 710 | for(i in 1:total_rounds){ 711 | current_pri_matrix_num <- i 712 | cell_type_name_assigned[,i] <- prior_marker_info[match(current_cell_type_assignment[,i], 713 | lineage_info$Cell_type_number),1] 714 | cell_type_name_assigned[which(current_cell_type_assignment[,i]==0),i] <- "Unknown" 715 | anchor_cell_type_name_assigned[,i] <- prior_marker_info[match(anchor_cell_assignment[,i], 716 | lineage_info$Cell_type_number),1] 717 | anchor_cell_type_name_assigned[which(anchor_cell_assignment[,i]==0),i] <- "Unknown" 718 | if(current_pri_matrix_num == 1){ 719 | final_cell_type_assignment <- current_cell_type_assignment[,current_pri_matrix_num] 720 | }else{ 721 | previous_level_type <- unique(lineage_info$Previous_cell_type[which(lineage_info$Round==current_pri_matrix_num)]) 722 | assignment <- current_cell_type_assignment[which(final_cell_type_assignment==previous_level_type & 723 | current_cell_type_assignment[,current_pri_matrix_num]!=0),current_pri_matrix_num] 724 | final_cell_type_assignment[which(final_cell_type_assignment==previous_level_type & 725 | current_cell_type_assignment[,current_pri_matrix_num]!=0)] <- assignment 726 | } 727 | } 728 | final_cell_names <- character(length=dim(current_cell_type_assignment)[1]) 729 | final_cell_names <- prior_marker_info[match(final_cell_type_assignment,lineage_info$Cell_type_number),1] 730 | final_cell_names[which(final_cell_type_assignment==0)] <- "Unknown" 731 | final_result <- cbind(cell_type_name_assigned,final_cell_type_assignment,final_cell_names) 732 | round_name <- paste("Round",seq(1,total_rounds,by=1)) 733 | colnames(final_result) <- c(round_name,"Cell type number","Final cell type") 734 | filename <- paste0(CelestaObj@project_name,"_final_cell_type_assignment.csv") 735 | write.csv(cbind(final_result,imaging_data),file=filename,row.names = FALSE) 736 | filename <- paste0(CelestaObj@project_name,"_anchor_cell_assignment.csv") 737 | write.csv(anchor_cell_type_name_assigned,file=filename) 738 | CelestaObj@final_cell_type_assignment <- final_result 739 | return(CelestaObj) 740 | } 741 | ############################################################################################# 742 | ############################################################################################# 743 | #' Plot the cells using XY coordinates 744 | #' @export 745 | plot_cells_any_combination <- function(cell_type_assignment_to_plot,CelestaObj, 746 | cell_number_to_use, 747 | cell_type_colors=c(palette()[2:7],"white"), 748 | test_size=1){ 749 | ### Cannot plot more than 7 cell types 750 | current_cell_type_assignment <- cell_type_assignment_to_plot 751 | coords <- CelestaObj@coords 752 | cell_types <- CelestaObj@prior_info[cell_number_to_use,1] 753 | x_min <- min(coords[,1]) 754 | x_max <- max(coords[,1]) 755 | y_min <- min(coords[,2]) 756 | y_max <- max(coords[,2]) 757 | range <- c(min(x_min,y_min),max(x_max,y_max)) 758 | 759 | cell_index <- integer() 760 | cell_anno <- character() 761 | count <- 0 762 | for(i in 1:length(cell_number_to_use)){ 763 | unassigned_cells <- which(current_cell_type_assignment == cell_number_to_use[i]) 764 | cell_index[(count+1):(count+length(unassigned_cells))] <- unassigned_cells 765 | cell_anno[(count+1):(count+length(unassigned_cells))] <- cell_types[i] 766 | count <- count + length(unassigned_cells) 767 | } 768 | df_plot <- data.frame(x=coords[cell_index,1], 769 | y=coords[cell_index,2], 770 | cell_anno=cell_anno) 771 | df_plot$cell_anno <- factor(df_plot$cell_anno,levels = c(cell_types)) 772 | color_plot <- cell_type_colors[1:length(cell_number_to_use)] 773 | 774 | g<- ggplot(df_plot,aes(x=x,y=y,group=cell_anno))+geom_point(aes(color=cell_anno),size=test_size)+ 775 | scale_color_manual(values=color_plot)+ 776 | xlim(range[1],range[2])+ylim(range[1],range[2])+ 777 | labs(main="")+theme(aspect.ratio = 1,panel.grid.major = element_blank(), 778 | panel.grid.minor = element_blank(), 779 | legend.title = element_blank(), 780 | panel.background = element_rect(fill = 'black'), 781 | axis.line = element_line(colour = "black"), 782 | axis.title.x=element_blank(), 783 | axis.title.y=element_blank(), 784 | legend.text = element_text(size=12,face="bold") )+ 785 | guides(colour = guide_legend(override.aes = list(size=5))) 786 | ggsave(filename="plot_cell_assignment.png",plot=g,width = 12, height = 12,units = 'in',dpi = 300) 787 | } 788 | ############################################################################################# 789 | #' Plot the expression probabilities of cells in the tissue 790 | #' @export 791 | plot_exp_prob <- function(CelestaObj,size_to_use=1,width_to_use=5,height_to_use=4){ 792 | coords <- CelestaObj@coords 793 | marker_exp_prob <- CelestaObj@marker_exp_prob 794 | prior_marker_info <- CelestaObj@prior_info 795 | palette <- colorRampPalette(colors=c("white", "blue4")) 796 | cols <- palette(6) 797 | #plot(1:6, col=cols, pch=16, cex=3) 798 | 799 | markers_to_check <- as.character(colnames(prior_marker_info)[3:dim(prior_marker_info)[2]]) 800 | for(i in 1:length(markers_to_check)){ 801 | marker_to_use <- markers_to_check[i] 802 | marker_exp_prob_to_use <- marker_exp_prob[,which(colnames(marker_exp_prob)==marker_to_use)] 803 | cols_anno <- character(length=length(marker_exp_prob_to_use)) 804 | cols_anno[which(marker_exp_prob_to_use>0.9)] <- ">0.9" 805 | cols_anno[which(marker_exp_prob_to_use>0.8 & marker_exp_prob_to_use<=0.9)] <- ">0.8" 806 | cols_anno[which(marker_exp_prob_to_use>0.7 & marker_exp_prob_to_use<=0.8)] <- ">0.7" 807 | cols_anno[which(marker_exp_prob_to_use>0.5 & marker_exp_prob_to_use<=0.7)] <- ">0.5" 808 | cols_anno[which(marker_exp_prob_to_use<=0.5)] <- "<=0.5" 809 | 810 | mca <- data.frame(Coords_1 = round(coords[,1],digits = 2), 811 | Coords_2 = round(coords[,2],digits = 2), 812 | Exp_quantile = round(marker_exp_prob_to_use,digits = 2), 813 | Col_anno=cols_anno) 814 | row.names(mca) <- NULL 815 | colnames(mca) <- c("X","Y","Expression","Color_anno") 816 | mca$Color_anno <- factor(mca$Color_anno,levels=c("<=0.5",">0.5",">0.7",">0.8",">0.9")) 817 | 818 | x_min <- min(coords[,1]) 819 | x_max <- max(coords[,1]) 820 | y_min <- min(coords[,2]) 821 | y_max <- max(coords[,2]) 822 | range <- c(min(x_min,y_min),max(x_max,y_max)) 823 | 824 | filename <- paste0(marker_to_use,"_exp_prob.png") 825 | g <- ggplot(mca,aes(x=X,y=Y,color=Color_anno)) + 826 | xlim(range[1],range[2])+ylim(range[1],range[2])+ 827 | geom_point(shape=20,size=size_to_use) + 828 | ggtitle(marker_to_use)+theme_bw()+ 829 | scale_colour_manual(values=c(cols[1],cols[2],cols[3],cols[4],cols[6]))+ 830 | #scale_colour_manual(values=cols)+ 831 | theme(legend.title = element_blank(), 832 | legend.text = element_text(size=14), 833 | panel.grid.major = element_blank(), 834 | panel.grid.minor = element_blank(), 835 | plot.title = element_text(hjust = 0.5,size=15,face="bold"))+ 836 | guides(colour = guide_legend(override.aes = list(size=10))) 837 | ggsave(filename,plot=g,width=width_to_use,height=height_to_use,units = 'in',dpi = 300) 838 | } 839 | } 840 | ############################################################################################# 841 | #' Function to assign cell types through iterations 842 | #' @export 843 | assign_cell_main <- function(CelestaObj,max_iteration=10,cell_change_threshold=0.01, 844 | min_diff=0,min_probability=0, 845 | high_marker_threshold_anchor=rep(0.7,length=50), 846 | low_marker_threshold_anchor=rep(0.9,length=50), 847 | high_marker_threshold_iteration=rep(0.5,length=50), 848 | low_marker_threshold_iteration=rep(1,length=50)){ 849 | total_rounds <- CelestaObj@total_rounds 850 | ### This loop is the main part for cell type assignment 851 | ### Cell type assignment function (normally should finish within 10min for ~100k cells) 852 | ### It runs pretty fast for below 50k cells 853 | for(i in 1:total_rounds){ 854 | round <- i 855 | CelestaObj@current_cell_type_assignment[,round] <- CelestaObj@starting_cell_type_assignment[,round] 856 | 857 | current_number_of_cells_changed <- numeric() 858 | loglikelihood <- numeric() 859 | lineage_info <- CelestaObj@lineage_info 860 | cell_type_num <- lineage_info$Cell_type_number[which(lineage_info$Round==round)] 861 | CelestaObj <- get_initial_prior_matrix(CelestaObj,round) 862 | unassigned_cells <- find_unassigned_cells(CelestaObj,round) 863 | number_of_cells_to_find_identity <- length(unassigned_cells) 864 | print(number_of_cells_to_find_identity) 865 | ### Get scoring function 866 | CelestaObj <- scoring_function(CelestaObj,round,unassigned_cells,cell_type_num) 867 | ### Initialize the cell probability with initial scores 868 | CelestaObj@current_cell_prob <- CelestaObj@current_scoring_matrix 869 | ### Assign anchor cells 870 | old_cell_assignment <- CelestaObj@current_cell_type_assignment[,round] 871 | CelestaObj <- cell_type(CelestaObj,cell_type_num,unassigned_cells,round, 872 | min_difference=min_diff, 873 | min_prob=min_probability, 874 | high_marker_threshold=high_marker_threshold_anchor, 875 | low_marker_threshold=low_marker_threshold_anchor) 876 | # 877 | iteration <- 1 878 | CelestaObj@anchor_cell_type_assignment[,round] <- CelestaObj@current_cell_type_assignment[,round] 879 | print(cell_type_count <- count_cell_type(CelestaObj,cell_type_num,round)) 880 | if(length(which(cell_type_count[,2]<1))==length(cell_type_num)){ 881 | print("Too few cells identified for certain cell type, please consider relaxing threshold.") 882 | return(CelestaObj) 883 | break 884 | } 885 | current_number_of_cells_changed[iteration] <- 1 886 | ############# 887 | ### Find cells to check 888 | unassigned_cells <- find_unassigned_cells(CelestaObj,round) 889 | assigned_cells <- find_assigned_cells(CelestaObj,round) 890 | ############# 891 | ### Calculate beta 892 | CelestaObj <- neighbor_cell_type(CelestaObj,cell_type_num,round,unassigned_cells) 893 | CelestaObj <- get_dist_from_nearest_assigned_cells(CelestaObj,cell_type_num, 894 | unassigned_cells,assigned_cells,round) 895 | CelestaObj <- calculate_beta(CelestaObj,scale_factor = 5,bandwidth = 100) 896 | ### Iterative cell type assignment 897 | while(iteration < max_iteration & current_number_of_cells_changed[iteration] > cell_change_threshold){ 898 | iteration <- iteration + 1 899 | ### Calculate cell type probabilities 900 | CelestaObj <- cell_prob(CelestaObj, cell_type_num,unassigned_cells,round) 901 | old_cell_assignment <- CelestaObj@current_cell_type_assignment[,round] 902 | ### Update cell type assignment 903 | CelestaObj <- cell_type(CelestaObj,cell_type_num,unassigned_cells,round, 904 | min_difference=min_diff, 905 | min_prob=min_probability, 906 | high_marker_threshold=high_marker_threshold_iteration, 907 | low_marker_threshold=low_marker_threshold_iteration) 908 | print(cell_type_count <- count_cell_type(CelestaObj,cell_type_num,round)) 909 | current_number_of_cells_changed[iteration] <- length(which((old_cell_assignment-CelestaObj@current_cell_type_assignment[,round])!=0))/number_of_cells_to_find_identity 910 | print(current_number_of_cells_changed[iteration]) 911 | if(current_number_of_cells_changed[iteration] < cell_change_threshold){ 912 | break 913 | } 914 | ############# 915 | ### Find cells to check 916 | unassigned_cells <- find_unassigned_cells(CelestaObj,round) 917 | assigned_cells <- find_assigned_cells(CelestaObj,round) 918 | if(length(unassigned_cells)==0){ 919 | break 920 | } 921 | ############# 922 | ### Calculate beta 923 | CelestaObj <- neighbor_cell_type(CelestaObj,cell_type_num,round,unassigned_cells) 924 | CelestaObj <- get_dist_from_nearest_assigned_cells(CelestaObj,cell_type_num, 925 | unassigned_cells,assigned_cells,round) 926 | CelestaObj <- calculate_beta(CelestaObj,scale_factor = 5,bandwidth = 100) 927 | ############ 928 | ### Update prior cell-type marker matrix 929 | CelestaObj <- update_prior_matrix(CelestaObj,round,cell_type_num) 930 | ### Update scoring function 931 | CelestaObj <- scoring_function(CelestaObj,round,unassigned_cells,cell_type_num) 932 | } 933 | } 934 | CelestaObj <- get_final_inferred_cell_types(total_rounds,CelestaObj,imaging_data) 935 | return(CelestaObj) 936 | } 937 | ############################################################################################# 938 | ############################################################################################# -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(CELESTA) 3 | 4 | test_check("CELESTA") -------------------------------------------------------------------------------- /tests/testthat/test-CELESTA_functions.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(dplyr) 3 | library(devtools) 4 | library(Rmixmod) 5 | library(spdep) 6 | library(ggplot2) 7 | library(reshape2) 8 | library(zeallot) 9 | 10 | # Load the original CELESTA functions 11 | # Note that some of the slot names were changed in the original to match 12 | # the new version for comparison. 13 | source("../CELESTA_functions_orig.R") 14 | 15 | # Load data 16 | load(file = "../../data/prior_marker_info.rda") 17 | load(file = "../../data/imaging_data.rda") 18 | 19 | compareCelesta <- function(actual, expected) { 20 | sapply(slotNames(actual), function(x) { 21 | tryCatch( 22 | { 23 | a <- slot(actual, x) 24 | e <- slot(expected, x) 25 | 26 | if (typeof(a) == "double") { 27 | expect_equal(e, a) 28 | } else { 29 | expect_identical(e, a) 30 | } 31 | }, 32 | error = function(e) { 33 | # If there is not a matching slot (in the case where it was deleted), 34 | # assume that it is vacuously true that they are equal 35 | return(TRUE) 36 | } 37 | ) 38 | }) 39 | } 40 | 41 | test_that("CreateCelestaObject", { 42 | actual <- CELESTA::CreateCelestaObject( 43 | project_title = "project_title", 44 | prior_marker_info, 45 | imaging_data 46 | ) 47 | expected <- CreateCELESTAobj( 48 | project_title = "project_title", 49 | prior_marker_info, 50 | imaging_data 51 | ) 52 | compareCelesta(actual, expected) 53 | }) 54 | 55 | test_that("FilterCells", { 56 | celesta_obj <- CELESTA::CreateCelestaObject( 57 | project_title = "project_title", 58 | prior_marker_info, 59 | imaging_data 60 | ) 61 | CelestaObj <- CreateCELESTAobj( 62 | project_title = "project_title", 63 | prior_marker_info, 64 | imaging_data 65 | ) 66 | 67 | actual <- CELESTA::FilterCells(celesta_obj, 68 | high_marker_threshold = 0.9, 69 | low_marker_threshold = 0.5 70 | ) 71 | expected <- cell_filtering( 72 | high_marker_threshold = 0.9, low_marker_threshold = 0.5, 73 | CelestaObj 74 | ) 75 | compareCelesta(actual, expected) 76 | }) 77 | 78 | test_that("AssignCells", { 79 | celesta_obj <- CELESTA::CreateCelestaObject( 80 | project_title = "project_title", 81 | prior_marker_info, 82 | imaging_data 83 | ) 84 | CelestaObj <- CreateCELESTAobj( 85 | project_title = "project_title", 86 | prior_marker_info, 87 | imaging_data 88 | ) 89 | 90 | celesta_obj <- CELESTA::FilterCells(celesta_obj, 91 | high_marker_threshold = 0.9, 92 | low_marker_threshold = 0.5 93 | ) 94 | CelestaObj <- cell_filtering( 95 | high_marker_threshold = 0.9, low_marker_threshold = 0.5, 96 | CelestaObj 97 | ) 98 | 99 | actual <- CELESTA::AssignCells(celesta_obj, 100 | max_iteration = 10, cell_change_threshold = 0.01, 101 | high_expression_threshold_anchor = high_marker_threshold_anchor, 102 | low_expression_threshold_anchor = low_marker_threshold_anchor, 103 | high_expression_threshold_index = high_marker_threshold_iteration, 104 | low_expression_threshold_index = low_marker_threshold_iteration 105 | ) 106 | expected <- assign_cell_main(CelestaObj, 107 | max_iteration = 10, cell_change_threshold = 0.01, 108 | high_marker_threshold_anchor = high_marker_threshold_anchor, 109 | low_marker_threshold_anchor = low_marker_threshold_anchor, 110 | high_marker_threshold_iteration = high_marker_threshold_iteration, 111 | low_marker_threshold_iteration = low_marker_threshold_iteration 112 | ) 113 | compareCelesta(actual, expected) 114 | }) 115 | --------------------------------------------------------------------------------