├── .github
    └── workflows
    │   └── python-publish.yml
├── CHANGELOG
├── LICENSE
├── README.rst
├── c
    └── utils.cpp
├── doc
    ├── Makefile
    ├── conf.py
    ├── images
    │   ├── de_novo.png
    │   ├── de_novo_celltype.png
    │   ├── diagplot_centroid_2.png
    │   ├── diagplot_centroid_30.png
    │   ├── diagplot_centroid_5.png
    │   ├── diagplot_centroid_8.png
    │   ├── domain_composition.png
    │   ├── domain_composition_all.png
    │   ├── domains.png
    │   ├── domains_individual.png
    │   ├── final.png
    │   ├── guided.png
    │   ├── kernel_bw.png
    │   ├── local_max_threshold_gene.png
    │   ├── local_max_threshold_knn.png
    │   ├── local_max_threshold_knn2.png
    │   ├── local_max_threshold_knn3.png
    │   ├── local_max_threshold_total.png
    │   ├── mask.png
    │   ├── maxima.png
    │   ├── segmented_celltype_map.png
    │   ├── tsne.png
    │   ├── tsne_final.png
    │   ├── tsne_merged.png
    │   └── tsne_removed.png
    ├── index.rst
    ├── ssam.rst
    ├── userguide.rst
    └── userguide
    │   ├── 01-tldr.rst
    │   ├── 02-installation.rst
    │   ├── 03-data.rst
    │   ├── 04-kde.rst
    │   ├── 05-kernel_shape.rst
    │   ├── 06-kernel_bandwidth.rst
    │   ├── 07-input_mask.rst
    │   ├── 08-guided.rst
    │   ├── 09-celltype_map_thresh_g.rst
    │   ├── 10-de_novo.rst
    │   ├── 11-max_filtering.rst
    │   ├── 12-clustering.rst
    │   ├── 13-diagnostic.rst
    │   ├── 14-cluster_annotation.rst
    │   ├── 15-celltype_map_thresh_d.rst
    │   ├── 16-visualisation.rst
    │   ├── 17-domain.rst
    │   ├── 18-composition.rst
    │   ├── 19-experimental.rst
    │   ├── 20-aaec.rst
    │   └── 21-segment_celltype_map.rst
├── requirements.txt
├── setup.py
└── ssam
    └── __init__.py


/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   deploy:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v2
19 |       with:
20 |         python-version: '3.x'
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install setuptools wheel twine numpy
25 |     - name: Build and publish
26 |       env:
27 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 |       run: |
30 |         python setup.py sdist
31 |         twine upload dist/*
32 | 


--------------------------------------------------------------------------------
/CHANGELOG:
--------------------------------------------------------------------------------
1 | 2019-10-13
2 | v 1.0.0b – Initial release
3 | 2019-10-19
4 | v 1.0.1 - Added documentations, corrected the default parameters of the methods
5 | 2021-04-16
6 | v 1.0.2 - Added more documentations (read the docs), minor bug fixes
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU AFFERO GENERAL PUBLIC LICENSE
  2 |                        Version 3, 19 November 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU Affero General Public License is a free, copyleft license for
 11 | software and other kinds of works, specifically designed to ensure
 12 | cooperation with the community in the case of network server software.
 13 | 
 14 |   The licenses for most software and other practical works are designed
 15 | to take away your freedom to share and change the works.  By contrast,
 16 | our General Public Licenses are intended to guarantee your freedom to
 17 | share and change all versions of a program--to make sure it remains free
 18 | software for all its users.
 19 | 
 20 |   When we speak of free software, we are referring to freedom, not
 21 | price.  Our General Public Licenses are designed to make sure that you
 22 | have the freedom to distribute copies of free software (and charge for
 23 | them if you wish), that you receive source code or can get it if you
 24 | want it, that you can change the software or use pieces of it in new
 25 | free programs, and that you know you can do these things.
 26 | 
 27 |   Developers that use our General Public Licenses protect your rights
 28 | with two steps: (1) assert copyright on the software, and (2) offer
 29 | you this License which gives you legal permission to copy, distribute
 30 | and/or modify the software.
 31 | 
 32 |   A secondary benefit of defending all users' freedom is that
 33 | improvements made in alternate versions of the program, if they
 34 | receive widespread use, become available for other developers to
 35 | incorporate.  Many developers of free software are heartened and
 36 | encouraged by the resulting cooperation.  However, in the case of
 37 | software used on network servers, this result may fail to come about.
 38 | The GNU General Public License permits making a modified version and
 39 | letting the public access it on a server without ever releasing its
 40 | source code to the public.
 41 | 
 42 |   The GNU Affero General Public License is designed specifically to
 43 | ensure that, in such cases, the modified source code becomes available
 44 | to the community.  It requires the operator of a network server to
 45 | provide the source code of the modified version running there to the
 46 | users of that server.  Therefore, public use of a modified version, on
 47 | a publicly accessible server, gives the public access to the source
 48 | code of the modified version.
 49 | 
 50 |   An older license, called the Affero General Public License and
 51 | published by Affero, was designed to accomplish similar goals.  This is
 52 | a different license, not a version of the Affero GPL, but Affero has
 53 | released a new version of the Affero GPL which permits relicensing under
 54 | this license.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                        TERMS AND CONDITIONS
 60 | 
 61 |   0. Definitions.
 62 | 
 63 |   "This License" refers to version 3 of the GNU Affero General Public License.
 64 | 
 65 |   "Copyright" also means copyright-like laws that apply to other kinds of
 66 | works, such as semiconductor masks.
 67 | 
 68 |   "The Program" refers to any copyrightable work licensed under this
 69 | License.  Each licensee is addressed as "you".  "Licensees" and
 70 | "recipients" may be individuals or organizations.
 71 | 
 72 |   To "modify" a work means to copy from or adapt all or part of the work
 73 | in a fashion requiring copyright permission, other than the making of an
 74 | exact copy.  The resulting work is called a "modified version" of the
 75 | earlier work or a work "based on" the earlier work.
 76 | 
 77 |   A "covered work" means either the unmodified Program or a work based
 78 | on the Program.
 79 | 
 80 |   To "propagate" a work means to do anything with it that, without
 81 | permission, would make you directly or secondarily liable for
 82 | infringement under applicable copyright law, except executing it on a
 83 | computer or modifying a private copy.  Propagation includes copying,
 84 | distribution (with or without modification), making available to the
 85 | public, and in some countries other activities as well.
 86 | 
 87 |   To "convey" a work means any kind of propagation that enables other
 88 | parties to make or receive copies.  Mere interaction with a user through
 89 | a computer network, with no transfer of a copy, is not conveying.
 90 | 
 91 |   An interactive user interface displays "Appropriate Legal Notices"
 92 | to the extent that it includes a convenient and prominently visible
 93 | feature that (1) displays an appropriate copyright notice, and (2)
 94 | tells the user that there is no warranty for the work (except to the
 95 | extent that warranties are provided), that licensees may convey the
 96 | work under this License, and how to view a copy of this License.  If
 97 | the interface presents a list of user commands or options, such as a
 98 | menu, a prominent item in the list meets this criterion.
 99 | 
100 |   1. Source Code.
101 | 
102 |   The "source code" for a work means the preferred form of the work
103 | for making modifications to it.  "Object code" means any non-source
104 | form of a work.
105 | 
106 |   A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 | 
111 |   The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form.  A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 | 
122 |   The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities.  However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work.  For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 | 
135 |   The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 | 
139 |   The Corresponding Source for a work in source code form is that
140 | same work.
141 | 
142 |   2. Basic Permissions.
143 | 
144 |   All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met.  This License explicitly affirms your unlimited
147 | permission to run the unmodified Program.  The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work.  This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 | 
152 |   You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force.  You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright.  Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 | 
163 |   Conveying under any other circumstances is permitted solely under
164 | the conditions stated below.  Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 | 
167 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 | 
169 |   No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 | 
175 |   When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 | 
183 |   4. Conveying Verbatim Copies.
184 | 
185 |   You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 | 
193 |   You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 | 
196 |   5. Conveying Modified Source Versions.
197 | 
198 |   You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 | 
202 |     a) The work must carry prominent notices stating that you modified
203 |     it, and giving a relevant date.
204 | 
205 |     b) The work must carry prominent notices stating that it is
206 |     released under this License and any conditions added under section
207 |     7.  This requirement modifies the requirement in section 4 to
208 |     "keep intact all notices".
209 | 
210 |     c) You must license the entire work, as a whole, under this
211 |     License to anyone who comes into possession of a copy.  This
212 |     License will therefore apply, along with any applicable section 7
213 |     additional terms, to the whole of the work, and all its parts,
214 |     regardless of how they are packaged.  This License gives no
215 |     permission to license the work in any other way, but it does not
216 |     invalidate such permission if you have separately received it.
217 | 
218 |     d) If the work has interactive user interfaces, each must display
219 |     Appropriate Legal Notices; however, if the Program has interactive
220 |     interfaces that do not display Appropriate Legal Notices, your
221 |     work need not make them do so.
222 | 
223 |   A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit.  Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 | 
233 |   6. Conveying Non-Source Forms.
234 | 
235 |   You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 | 
240 |     a) Convey the object code in, or embodied in, a physical product
241 |     (including a physical distribution medium), accompanied by the
242 |     Corresponding Source fixed on a durable physical medium
243 |     customarily used for software interchange.
244 | 
245 |     b) Convey the object code in, or embodied in, a physical product
246 |     (including a physical distribution medium), accompanied by a
247 |     written offer, valid for at least three years and valid for as
248 |     long as you offer spare parts or customer support for that product
249 |     model, to give anyone who possesses the object code either (1) a
250 |     copy of the Corresponding Source for all the software in the
251 |     product that is covered by this License, on a durable physical
252 |     medium customarily used for software interchange, for a price no
253 |     more than your reasonable cost of physically performing this
254 |     conveying of source, or (2) access to copy the
255 |     Corresponding Source from a network server at no charge.
256 | 
257 |     c) Convey individual copies of the object code with a copy of the
258 |     written offer to provide the Corresponding Source.  This
259 |     alternative is allowed only occasionally and noncommercially, and
260 |     only if you received the object code with such an offer, in accord
261 |     with subsection 6b.
262 | 
263 |     d) Convey the object code by offering access from a designated
264 |     place (gratis or for a charge), and offer equivalent access to the
265 |     Corresponding Source in the same way through the same place at no
266 |     further charge.  You need not require recipients to copy the
267 |     Corresponding Source along with the object code.  If the place to
268 |     copy the object code is a network server, the Corresponding Source
269 |     may be on a different server (operated by you or a third party)
270 |     that supports equivalent copying facilities, provided you maintain
271 |     clear directions next to the object code saying where to find the
272 |     Corresponding Source.  Regardless of what server hosts the
273 |     Corresponding Source, you remain obligated to ensure that it is
274 |     available for as long as needed to satisfy these requirements.
275 | 
276 |     e) Convey the object code using peer-to-peer transmission, provided
277 |     you inform other peers where the object code and Corresponding
278 |     Source of the work are being offered to the general public at no
279 |     charge under subsection 6d.
280 | 
281 |   A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 | 
285 |   A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling.  In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage.  For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product.  A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 | 
298 |   "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source.  The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 | 
306 |   If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information.  But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 | 
317 |   The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed.  Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 | 
325 |   Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 | 
331 |   7. Additional Terms.
332 | 
333 |   "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law.  If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 | 
342 |   When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it.  (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.)  You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 | 
349 |   Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 | 
353 |     a) Disclaiming warranty or limiting liability differently from the
354 |     terms of sections 15 and 16 of this License; or
355 | 
356 |     b) Requiring preservation of specified reasonable legal notices or
357 |     author attributions in that material or in the Appropriate Legal
358 |     Notices displayed by works containing it; or
359 | 
360 |     c) Prohibiting misrepresentation of the origin of that material, or
361 |     requiring that modified versions of such material be marked in
362 |     reasonable ways as different from the original version; or
363 | 
364 |     d) Limiting the use for publicity purposes of names of licensors or
365 |     authors of the material; or
366 | 
367 |     e) Declining to grant rights under trademark law for use of some
368 |     trade names, trademarks, or service marks; or
369 | 
370 |     f) Requiring indemnification of licensors and authors of that
371 |     material by anyone who conveys the material (or modified versions of
372 |     it) with contractual assumptions of liability to the recipient, for
373 |     any liability that these contractual assumptions directly impose on
374 |     those licensors and authors.
375 | 
376 |   All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10.  If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term.  If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 | 
386 |   If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 | 
391 |   Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 | 
395 |   8. Termination.
396 | 
397 |   You may not propagate or modify a covered work except as expressly
398 | provided under this License.  Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 | 
403 |   However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 | 
410 |   Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 | 
417 |   Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License.  If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 | 
423 |   9. Acceptance Not Required for Having Copies.
424 | 
425 |   You are not required to accept this License in order to receive or
426 | run a copy of the Program.  Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance.  However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work.  These actions infringe copyright if you do
431 | not accept this License.  Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 | 
434 |   10. Automatic Licensing of Downstream Recipients.
435 | 
436 |   Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License.  You are not responsible
439 | for enforcing compliance by third parties with this License.
440 | 
441 |   An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations.  If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 | 
451 |   You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License.  For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 | 
459 |   11. Patents.
460 | 
461 |   A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based.  The
463 | work thus licensed is called the contributor's "contributor version".
464 | 
465 |   A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version.  For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 | 
475 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 | 
480 |   In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement).  To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 | 
487 |   If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients.  "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 | 
501 |   If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 | 
509 |   A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License.  You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 | 
524 |   Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 | 
528 |   12. No Surrender of Others' Freedom.
529 | 
530 |   If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License.  If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all.  For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 | 
540 |   13. Remote Network Interaction; Use with the GNU General Public License.
541 | 
542 |   Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software.  This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 | 
553 |   Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work.  The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 | 
561 |   14. Revised Versions of this License.
562 | 
563 |   The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time.  Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 | 
568 |   Each version is given a distinguishing version number.  If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation.  If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 | 
577 |   If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 | 
582 |   Later license versions may give you additional or different
583 | permissions.  However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 | 
587 |   15. Disclaimer of Warranty.
588 | 
589 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 | 
598 |   16. Limitation of Liability.
599 | 
600 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 | 
610 |   17. Interpretation of Sections 15 and 16.
611 | 
612 |   If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 | 
619 |                      END OF TERMS AND CONDITIONS
620 | 
621 |             How to Apply These Terms to Your New Programs
622 | 
623 |   If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 | 
627 |   To do so, attach the following notices to the program.  It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 | 
632 |     <one line to give the program's name and a brief idea of what it does.>
633 |     Copyright (C) <year>  <name of author>
634 | 
635 |     This program is free software: you can redistribute it and/or modify
636 |     it under the terms of the GNU Affero General Public License as published
637 |     by the Free Software Foundation, either version 3 of the License, or
638 |     (at your option) any later version.
639 | 
640 |     This program is distributed in the hope that it will be useful,
641 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
642 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
643 |     GNU Affero General Public License for more details.
644 | 
645 |     You should have received a copy of the GNU Affero General Public License
646 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
647 | 
648 | Also add information on how to contact you by electronic and paper mail.
649 | 
650 |   If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source.  For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code.  There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 | 
658 |   You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | <https://www.gnu.org/licenses/>.


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Notice
 2 | ======
 3 | 
 4 | This repository is no longer maintained. Further development of SSAM will be continued in the https://github.com/pnucolab/ssam repository.
 5 | 
 6 | SSAM (Spot-based Spatial cell-type Analysis by Multidimensional mRNA density estimation)
 7 | ========================================================================================
 8 | 
 9 | Author: Jeongbin Park (jeongbin.park@charite.de)\ :sup:`1,2` and Wonyl Choi (wonyl@bu.edu)\ :sup:`3`
10 | 
11 | :sup:`1`\ Digital Health Center, Berlin Institute of Health (BIH) and Charité – Universitätsmedizin, Berlin, Germany; :sup:`2`\ Faculty of Biosciences, Heidelberg University, Heidelberg, Germany; :sup:`3`\ Department of Computer Science, Boston University, Boston, the United States of America
12 | 
13 | (Not referring this :laughing:: https://en.wikipedia.org/wiki/Ssam)
14 | 
15 | This project was done under supervision of Dr. Naveed Ishaque (naveed.ishaque@charite.de) and Prof. Roland Eils (roland.eils@charite.de), and in collaboration with the SpaceTx consortium and the Human Cell Atlas project.
16 | 
17 | Please also check our example Jupyter notebooks here: https://github.com/eilslabs/ssam_example
18 | 
19 | Prerequisites
20 | =============
21 | 
22 | Currently SSAM was only tested with Python 3 in Linux environment. In addition to this package, SSAM requires a local R installation with pre-installed packages ``feather`` and ``sctransform``. For details, please follow the instructions here: https://ssam.readthedocs.io/en/release/userguide/01-tldr.html#installation
23 | 
24 | Install
25 | =======
26 | 
27 | https://ssam.readthedocs.io/en/release/userguide/01-tldr.html#installation
28 | 
29 | Documentation
30 | =============
31 | 
32 | https://ssam.readthedocs.io/
33 | 
34 | Citations
35 | =========
36 | 
37 | Jeongbin Park, Wonyl Choi, Sebastian Tiesmeyer, Brian Long, Lars E. Borm, Emma Garren, Thuc Nghi Nguyen, Bosiljka Tasic, Simone Codeluppi, Tobias Graf, Matthias Schlesner, Oliver Stegle, Roland Eils & Naveed Ishaque. "`Cell segmentation-free inference of cell types from in situ transcriptomics data. <https://www.nature.com/articles/s41467-021-23807-4>`_" *Nature Communications* **12**, 3545 (2021). 
38 | 
39 | License
40 | =======
41 | 
42 | Copyright (C) 2018 Jeongbin Park and Wonyl Choi
43 | 
44 | This program is free software: you can redistribute it and/or modify
45 | it under the terms of the GNU Affero General Public License as published
46 | by the Free Software Foundation, either version 3 of the License, or
47 | (at your option) any later version.
48 | 
49 | This program is distributed in the hope that it will be useful,
50 | but WITHOUT ANY WARRANTY; without even the implied warranty of
51 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
52 | GNU Affero General Public License for more details.
53 | 
54 | You should have received a copy of the GNU Affero General Public License
55 | along with this program.  If not, see https://www.gnu.org/licenses/.
56 | 


--------------------------------------------------------------------------------
/c/utils.cpp:
--------------------------------------------------------------------------------
  1 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
  2 | #include <stdio.h>
  3 | #include <math.h>
  4 | #include <queue>
  5 | #include <vector>
  6 | 
  7 | #if defined(_OPENMP)
  8 | #include <omp.h>
  9 | #else
 10 | typedef int omp_int_t;
 11 | inline omp_int_t omp_get_thread_num() { return 0;}
 12 | inline omp_int_t omp_get_max_threads() { return 1;}
 13 | #endif
 14 | 
 15 | #include <Python.h>
 16 | #include "numpy/npy_math.h"
 17 | #include "numpy/arrayobject.h"
 18 | 
 19 | #define I2D(X, Y, YL) ((X) * (YL) + (Y))
 20 | #define I3D(X, Y, Z, YL, ZL) (((X) * (YL) * (ZL)) + ((Y) * (ZL)) + (Z))
 21 | 
 22 | struct pos2d {
 23 |     long x;
 24 |     long y;
 25 | };
 26 | 
 27 | struct pos3d {
 28 |     long x;
 29 |     long y;
 30 |     long z;
 31 | };
 32 | 
 33 | static double gauss_kernel(double x, double y, double z) {
 34 |     return exp(-0.5 * (x*x + y*y + z*z)); // this is not normalized
 35 | }
 36 | 
 37 | static void kde(double bandwidth, double *x, double *y, double *z, double* query_x, double* query_y, double *query_z, double *rtn, unsigned int num_points, unsigned int num_querys, double (*kernel)(double, double, double), double maxdist, int ncores) {
 38 |     unsigned int i, j;
 39 |     double d;
 40 |     #pragma omp parallel for num_threads(ncores) private(i, j, d)
 41 |     for (i=0; i<num_querys; i++) {
 42 |         d = 0;
 43 |         for (j=0; j< num_points; j++)
 44 |             if (sqrt((query_x[i] - x[j]) * (query_x[i] - x[j]) + (query_y[i] - y[j]) * (query_y[i] - y[j]) + (query_z[i] - z[j]) * (query_z[i] - z[j])) < maxdist)
 45 |                 d += kernel((query_x[i] - x[j])/bandwidth, (query_y[i] - y[j])/bandwidth, (query_z[i] - z[j])/bandwidth);
 46 |         rtn[i] = d; // not normalized
 47 |     }
 48 | }
 49 | 
 50 | static double __corr__(double *a, double *b, int ngene) {
 51 |     double a_mean = 0;
 52 |     double b_mean = 0;
 53 |     double aa_mean = 0;
 54 |     double bb_mean = 0;
 55 |     double a_std = 0;
 56 |     double b_std = 0;
 57 |     double rtn = 0;
 58 |     int i;
 59 | 
 60 |     for (i=0; i<ngene; i++) {
 61 |         a_mean += a[i];
 62 |         b_mean += b[i];
 63 |         aa_mean += a[i]*a[i];
 64 |         bb_mean += b[i]*b[i];
 65 |     }
 66 | 
 67 |     a_mean /= ngene;
 68 |     b_mean /= ngene;
 69 |     aa_mean /= ngene;
 70 |     bb_mean /= ngene;
 71 | 
 72 |     a_std = sqrt(aa_mean - a_mean * a_mean);
 73 |     b_std = sqrt(bb_mean - b_mean * b_mean);
 74 | 
 75 |     if (a_std == 0 || b_std == 0) {
 76 |         rtn = 0;
 77 |     } else {
 78 |         for (i=0; i<ngene; i++)
 79 |             rtn += (a[i] - a_mean) * (b[i] - b_mean);
 80 |         rtn /= a_std * b_std;
 81 |     }
 82 | 
 83 |     rtn /= ngene;
 84 |     return rtn;
 85 | }
 86 | 
 87 | static PyObject *calc_kde(PyObject *self, PyObject *args, PyObject *kwargs) {
 88 |     PyObject *arg1 = NULL;
 89 |     PyObject *arg2 = NULL;
 90 |     PyObject *arg3 = NULL;
 91 |     PyObject *arg4 = NULL;
 92 |     PyObject *arg5 = NULL;
 93 |     PyObject *arg6 = NULL;
 94 |     PyArrayObject *arr1 = NULL;
 95 |     PyArrayObject *arr2 = NULL;
 96 |     PyArrayObject *arr3 = NULL;
 97 |     PyArrayObject *arr4 = NULL;
 98 |     PyArrayObject *arr5 = NULL;
 99 |     PyArrayObject *arr6 = NULL;
100 |     PyArrayObject *oarr = NULL;
101 |     int ncores = omp_get_max_threads();
102 |     double *x, *y, *z, *qx, *qy, *qz, *rtn;
103 |     double h = 2;
104 |     double maxdist_gauss = -1;
105 |     int kernel = 0;
106 |     unsigned int npts, nqrys;
107 |     npy_intp nqrys_npy;
108 |     
109 |     static const char *kwlist[] = { "h", "x", "y", "z", "q_x", "q_y", "q_z", "kernel", "ncores", NULL };
110 |     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "dOOOOOO|ii", const_cast<char **>(kwlist), &h, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &kernel, &ncores)) return NULL;
111 |     if ((arr1 = (PyArrayObject*)PyArray_FROM_OTF(arg1, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) return NULL;
112 |     if ((arr2 = (PyArrayObject*)PyArray_FROM_OTF(arg2, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) goto fail;
113 |     if ((arr3 = (PyArrayObject*)PyArray_FROM_OTF(arg3, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) goto fail;
114 |     if ((arr4 = (PyArrayObject*)PyArray_FROM_OTF(arg4, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) goto fail;
115 |     if ((arr5 = (PyArrayObject*)PyArray_FROM_OTF(arg5, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) goto fail;
116 |     if ((arr6 = (PyArrayObject*)PyArray_FROM_OTF(arg6, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) goto fail;
117 |     
118 |     if (PyArray_NDIM(arr1) != 1 || PyArray_NDIM(arr2) != 1 || PyArray_NDIM(arr3) != 1 ||
119 |         PyArray_NDIM(arr4) != 1 || PyArray_NDIM(arr5) != 1 || PyArray_NDIM(arr6) != 1)
120 |     {
121 |         goto fail;
122 |     }
123 | 
124 |     npts = PyArray_DIMS(arr1)[0];
125 |     nqrys = PyArray_DIMS(arr4)[0];
126 |     nqrys_npy = nqrys;
127 |     
128 |     oarr = (PyArrayObject*)PyArray_ZEROS(1, &nqrys_npy, NPY_DOUBLE, NPY_CORDER);
129 | 
130 |     x = (double *)PyArray_DATA(arr1);
131 |     y = (double *)PyArray_DATA(arr2);
132 |     z = (double *)PyArray_DATA(arr3);
133 |     qx = (double *)PyArray_DATA(arr4);
134 |     qy = (double *)PyArray_DATA(arr5);
135 |     qz = (double *)PyArray_DATA(arr6);
136 |     rtn = (double *)PyArray_DATA(oarr);
137 |     
138 |     maxdist_gauss = sqrt(2) * h * log((double)(1000000 * npts));
139 |     kde(h, x, y, z, qx, qy, qz, rtn, npts, nqrys, gauss_kernel, maxdist_gauss, ncores);
140 | 
141 |     Py_DECREF(arr1);
142 |     Py_DECREF(arr2);
143 |     Py_DECREF(arr3);
144 |     Py_DECREF(arr4);
145 |     Py_DECREF(arr5);
146 |     Py_DECREF(arr6);
147 |     
148 |     return (PyObject *) oarr;
149 |     
150 | fail:
151 |     Py_XDECREF(arr1);
152 |     Py_XDECREF(arr2);
153 |     Py_XDECREF(arr3);
154 |     Py_XDECREF(arr4);
155 |     Py_XDECREF(arr5);
156 |     Py_XDECREF(arr6);
157 |     return NULL;
158 | }
159 | 
160 | static PyObject *flood_fill(PyObject *self, PyObject *args, PyObject *kwargs) {
161 |     PyObject *arg1 = NULL;
162 |     PyObject *arg2 = NULL;
163 |     PyObject* filled_poslist = NULL;
164 |     PyArrayObject *arr1 = NULL;
165 |     PyArrayObject *arr2 = NULL;
166 |     long nvec, nd, ngene = 0;
167 |     long *pos, x, y, z, cnt;
168 |     double r = 0.6, *vf;
169 |     npy_intp *dimsp;
170 |     int min_pixels = 10, max_pixels=2000;
171 |     int i;
172 |     bool *mask;
173 | 
174 |     static const char *kwlist[] = { "pos", "vf", "r", "min_pixels", "max_pixels", NULL };
175 |     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|dii", const_cast<char **>(kwlist), &arg1, &arg2, &r, &min_pixels, &max_pixels)) return NULL;
176 |     if ((arr1 = (PyArrayObject*)PyArray_FROM_OTF(arg1, NPY_LONG, NPY_ARRAY_IN_ARRAY)) == NULL) return NULL;
177 |     if ((arr2 = (PyArrayObject*)PyArray_FROM_OTF(arg2, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) goto fail;
178 |     if (PyArray_NDIM(arr1) != 1) goto fail;
179 |     nd = PyArray_NDIM(arr2);
180 |     dimsp = PyArray_DIMS(arr2);
181 |     nvec = 1;
182 |     for (i=0; i<nd-1; i++)
183 |         nvec *= dimsp[i];
184 |     ngene = dimsp[nd-1];
185 |     filled_poslist = PyList_New(0);
186 |     pos = (long *)PyArray_DATA(arr1);
187 |     vf = (double *)PyArray_DATA(arr2);
188 |     mask = (bool *) calloc(nvec, sizeof(bool));
189 |     cnt = 0;
190 |     if (nd == 3) {
191 |         // 2D
192 |         std::queue<pos2d> queue2d;
193 |         queue2d.push(pos2d());
194 |         queue2d.back().x = pos[0];
195 |         queue2d.back().y = pos[1];
196 |         while (queue2d.size() > 0) {
197 |             x = queue2d.front().x;
198 |             y = queue2d.front().y;
199 |             PyObject *t = PyTuple_New(2);
200 |             PyTuple_SetItem(t, 0, PyLong_FromLong(x));
201 |             PyTuple_SetItem(t, 1, PyLong_FromLong(y));
202 |             cnt += 1;
203 |             if (cnt > max_pixels) 
204 |                 break;
205 |             PyList_Append(filled_poslist, t);
206 |             queue2d.pop();
207 |             if (x < dimsp[0] - 1 && mask[I2D(x + 1, y, dimsp[1])] == false &&
208 |                     __corr__(vf + (I2D(pos[0], pos[1], dimsp[1]) * ngene),
209 |                              vf + (I2D(x + 1, y, dimsp[1]) * ngene), ngene) > r) {
210 |                 mask[I2D(x + 1, y, dimsp[1])] = true;
211 |                 queue2d.push(pos2d());
212 |                 queue2d.back().x = x + 1;
213 |                 queue2d.back().y = y;
214 |             }
215 |             if (x > 1 && mask[I2D(x - 1, y, dimsp[1])] == false &&
216 |                     __corr__(vf + (I2D(pos[0], pos[1], dimsp[1]) * ngene),
217 |                              vf + (I2D(x - 1, y, dimsp[1]) * ngene), ngene) > r) {
218 |                 mask[I2D(x - 1, y, dimsp[1])] = true;
219 |                 queue2d.push(pos2d());
220 |                 queue2d.back().x = x - 1;
221 |                 queue2d.back().y = y;
222 |             }
223 |             if (y < dimsp[1] - 1 && mask[I2D(x, y + 1, dimsp[1])] == false &&
224 |                     __corr__(vf + (I2D(pos[0], pos[1], dimsp[1]) * ngene),
225 |                              vf + (I2D(x, y + 1, dimsp[1]) * ngene), ngene) > r) {
226 |                 mask[I2D(x, y + 1, dimsp[1])] = true;
227 |                 queue2d.push(pos2d());
228 |                 queue2d.back().x = x;
229 |                 queue2d.back().y = y + 1;
230 |             }
231 |             if (y > 1 && mask[I2D(x, y - 1, dimsp[1])] == false &&
232 |                     __corr__(vf + (I2D(pos[0], pos[1], dimsp[1]) * ngene),
233 |                              vf + (I2D(x, y - 1, dimsp[1]) * ngene), ngene) > r) {
234 |                 mask[I2D(x, y - 1, dimsp[1])] = true;
235 |                 queue2d.push(pos2d());
236 |                 queue2d.back().x = x;
237 |                 queue2d.back().y = y - 1;
238 |             }
239 |         }
240 |     } else if (nd == 4) {
241 |         // 3D
242 |         std::queue<pos3d> queue3d;
243 |         queue3d.push(pos3d());
244 |         queue3d.back().x = pos[0];
245 |         queue3d.back().y = pos[1];
246 |         queue3d.back().z = pos[2];
247 |         while (queue3d.size() > 0) {
248 |             x = queue3d.front().x;
249 |             y = queue3d.front().y;
250 |             z = queue3d.front().z;
251 |             PyObject *t = PyTuple_New(3);
252 |             PyTuple_SetItem(t, 0, PyLong_FromLong(x));
253 |             PyTuple_SetItem(t, 1, PyLong_FromLong(y));
254 |             PyTuple_SetItem(t, 2, PyLong_FromLong(z));
255 |             PyList_Append(filled_poslist, t);
256 |             cnt += 1;
257 |             if (cnt > max_pixels) 
258 |                 break;
259 |             queue3d.pop();
260 |             if (x < dimsp[0] - 1 && mask[I3D(x + 1, y, z, dimsp[1], dimsp[2])] == false &&
261 |                     __corr__(vf + I3D(pos[0], pos[1], pos[2], dimsp[1], dimsp[2]) * ngene,
262 |                              vf + I3D(x + 1, y, z, dimsp[1], dimsp[2]) * ngene, ngene) > r) {
263 |                 mask[I3D(x + 1, y, z, dimsp[1], dimsp[2])] = true;
264 |                 queue3d.push(pos3d());
265 |                 queue3d.back().x = x + 1;
266 |                 queue3d.back().y = y;
267 |                 queue3d.back().z = z;
268 |             }
269 |             if (x > 1 && mask[I3D(x - 1, y, z, dimsp[1], dimsp[2])] == false &&
270 |                     __corr__(vf + I3D(pos[0], pos[1], pos[2], dimsp[1], dimsp[2]) * ngene,
271 |                              vf + I3D(x - 1, y, z, dimsp[1], dimsp[2]) * ngene, ngene) > r) {
272 |                 mask[I3D(x - 1, y, z, dimsp[1], dimsp[2])] = true;
273 |                 queue3d.push(pos3d());
274 |                 queue3d.back().x = x - 1;
275 |                 queue3d.back().y = y;
276 |                 queue3d.back().z = z;
277 |             }
278 |             if (y < dimsp[1] - 1 && mask[I3D(x, y + 1, z, dimsp[1], dimsp[2])] == false &&
279 |                     __corr__(vf + I3D(pos[0], pos[1], pos[2], dimsp[1], dimsp[2]) * ngene,
280 |                              vf + I3D(x, y + 1, z, dimsp[1], dimsp[2]) * ngene, ngene) > r) {
281 |                 mask[I3D(x, y + 1, z, dimsp[1], dimsp[2])] = true;
282 |                 queue3d.push(pos3d());
283 |                 queue3d.back().x = x;
284 |                 queue3d.back().y = y + 1;
285 |                 queue3d.back().z = z;
286 |             }
287 |             if (y > 1 && mask[I3D(x, y - 1, z, dimsp[1], dimsp[2])] == false &&
288 |                     __corr__(vf + I3D(pos[0], pos[1], pos[2], dimsp[1], dimsp[2]) * ngene,
289 |                              vf + I3D(x, y - 1, z, dimsp[1], dimsp[2]) * ngene, ngene) > r) {
290 |                 mask[I3D(x, y - 1, z, dimsp[1], dimsp[2])] = true;
291 |                 queue3d.push(pos3d());
292 |                 queue3d.back().x = x;
293 |                 queue3d.back().y = y - 1;
294 |                 queue3d.back().z = z;
295 |             }
296 |             if (z < dimsp[2] - 1 && mask[I3D(x, y, z + 1, dimsp[1], dimsp[2])] == false &&
297 |                     __corr__(vf + I3D(pos[0], pos[1], pos[2], dimsp[1], dimsp[2]) * ngene,
298 |                              vf + I3D(x, y, z + 1, dimsp[1], dimsp[2]) * ngene, ngene) > r) {
299 |                 mask[I3D(x, y, z, dimsp[1], dimsp[2])] = true;
300 |                 queue3d.push(pos3d());
301 |                 queue3d.back().x = x;
302 |                 queue3d.back().y = y;
303 |                 queue3d.back().z = z + 1;
304 |             }
305 |             if (z > 1 && mask[I3D(x, y, z - 1, dimsp[1], dimsp[2])] == false &&
306 |                     __corr__(vf + I3D(pos[0], pos[1], pos[2], dimsp[1], dimsp[2]) * ngene,
307 |                              vf + I3D(x, y, z - 1, dimsp[1], dimsp[2]) * ngene, ngene) > r) {
308 |                 mask[I3D(x, y, z - 1, dimsp[1], dimsp[2])] = true;
309 |                 queue3d.push(pos3d());
310 |                 queue3d.back().x = x;
311 |                 queue3d.back().y = y;
312 |                 queue3d.back().z = z - 1;
313 |             }
314 |         }
315 |     }
316 |     free((void*)mask);
317 |     Py_DECREF(arr1);
318 |     Py_DECREF(arr2);
319 |     if (cnt > max_pixels || cnt < min_pixels) 
320 |         PyList_SetSlice(filled_poslist, 0, PyList_Size(filled_poslist), NULL);
321 |     return (PyObject *) filled_poslist;
322 |  
323 | fail:
324 |     Py_XDECREF(arr1);
325 |     Py_XDECREF(arr2);
326 |     return NULL;
327 | }
328 | 
329 | static PyObject *calc_corrmap(PyObject *self, PyObject *args, PyObject *kwargs) {
330 |     PyObject *arg1 = NULL;
331 |     PyArrayObject *arr1 = NULL;
332 |     PyArrayObject *oarr = NULL;
333 |     long i, x, y, z, dx, dy, dz;
334 |     long nvec, nd, ngene = 0;
335 |     double *vecs, *corrmap;
336 |     npy_intp *dimsp;
337 |     int ncores = omp_get_max_threads();
338 |     int csize = 1;
339 |     double *tmpvec;
340 | 
341 |     static const char *kwlist[] = { "vf", "ncores", "size", NULL };
342 |     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|ii", const_cast<char **>(kwlist), &arg1, &ncores, &csize)) return NULL;
343 |     if ((arr1 = (PyArrayObject*)PyArray_FROM_OTF(arg1, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) return NULL;   
344 |     nd = PyArray_NDIM(arr1);
345 |     if (nd != 3 && nd != 4) goto fail; // only 2D or 3D array is expected
346 |     dimsp = PyArray_DIMS(arr1);
347 |     oarr = (PyArrayObject*)PyArray_ZEROS(nd - 1, dimsp, NPY_DOUBLE, NPY_CORDER);
348 |     ngene = dimsp[nd-1];
349 |     corrmap = (double *)PyArray_DATA(oarr);
350 |     vecs = (double *)PyArray_DATA(arr1);
351 |     nvec = 1;
352 |     for (i=0; i<nd-1; i++)
353 |         nvec *= dimsp[i];
354 |     
355 |     // initialize corrmap with NANs
356 |     #pragma omp parallel for num_threads(ncores)
357 |     for (i=0; i<nvec; i++)
358 |         corrmap[i] = NPY_NAN;
359 | 
360 |     if (nd == 3) {
361 |         // 2D
362 |         #pragma omp parallel num_threads(ncores) private(tmpvec)
363 |         {
364 |             tmpvec = (double *)calloc(ngene, sizeof(double)); // zero initialized
365 |             #pragma omp for collapse(2)
366 |             for (x=csize; x<dimsp[0]-csize; x++) {
367 |                 for (y=csize; y<dimsp[1]-csize; y++) {
368 |                     for (i=0; i<ngene; i++)
369 |                         tmpvec[i] = 0;
370 |                     for (dx=-csize; dx<csize+1; dx++) {
371 |                         for (dy=-csize; dy<csize+1; dy++) {
372 |                             if (dx == 0 && dy == 0) continue;
373 |                             for (i=0; i<ngene; i++)
374 |                                 tmpvec[i] += (vecs + I2D(x+dx, y+dy, dimsp[1])*ngene)[i];
375 |                         }
376 |                     }
377 |                     // tmpvec[i] /= (csize * 2 + 1) * (csize * 2 + 1) - 1;
378 |                     corrmap[I2D(x, y, dimsp[1])] = __corr__(vecs + I2D(x, y, dimsp[1])*ngene, tmpvec, ngene);
379 |                 }
380 |             }
381 |             free((void*)tmpvec);
382 |         }
383 |     } else {
384 |         // 3D (nd == 4)
385 |         #pragma omp parallel num_threads(ncores) private(tmpvec)
386 |         {
387 |             tmpvec = (double *)calloc(ngene, sizeof(double));
388 |             #pragma omp for collapse(3)
389 |             for (x=csize; x<dimsp[0]-csize; x++) {
390 |                 for (y=csize; y<dimsp[1]-csize; y++) {
391 |                     for (z=csize; z<dimsp[2]-csize; z++) {
392 |                         for (i=0; i<ngene; i++)
393 |                             tmpvec[i] = 0;
394 |                         for (dx=-csize; dx<csize+1; dx++) {
395 |                             for (dy=-csize; dy<csize+1; dy++) {
396 |                                 for (dz=-csize; dz<csize+1; dz++) {
397 |                                     if (dx == 0 && dy == 0 && dz == 0) continue;
398 |                                     for (i=0; i<ngene; i++)
399 |                                         tmpvec[i] += (vecs + I3D(x+dx, y+dy, z+dz, dimsp[1], dimsp[2])*ngene)[i];
400 |                                 }
401 |                             }
402 |                         }
403 |                         //for (i=0; i<ngene; i++)
404 |                         //    tmpvec[i] /= (csize * 2 + 1) * (csize * 2 + 1) * (csize * 2 + 1) - 1;
405 |                         corrmap[I3D(x, y, z, dimsp[1], dimsp[2])] =
406 |                             __corr__(vecs + I3D(x, y, z, dimsp[1], dimsp[2])*ngene, tmpvec, ngene);
407 |                     }
408 |                 }
409 |             }
410 |             free((void*)tmpvec);
411 |         }
412 |     }
413 |     Py_DECREF(arr1);
414 | 
415 |     return (PyObject *) oarr;
416 |  fail:
417 |     Py_XDECREF(arr1);
418 |     return NULL;
419 | }
420 | 
421 | static PyObject *calc_corrmap_2(PyObject *self, PyObject *args, PyObject *kwargs) {
422 |     PyObject *arg1 = NULL;
423 |     PyArrayObject *arr1 = NULL;
424 |     PyArrayObject *oarr = NULL;
425 |     long i, k, x, y, z, dx, dy, dz;
426 |     long nvec, nd, ngene = 0;
427 |     double *vecs, *corrmap;
428 |     npy_intp *dimsp;
429 |     npy_intp dimsp2[4];
430 |     int ncores = omp_get_max_threads();
431 |     int csize = 1;
432 | 
433 |     static const char *kwlist[] = { "vf", "ncores", "size", NULL };
434 |     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|ii", const_cast<char **>(kwlist), &arg1, &ncores, &csize)) return NULL;
435 |     if ((arr1 = (PyArrayObject*)PyArray_FROM_OTF(arg1, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) return NULL;   
436 |     nd = PyArray_NDIM(arr1);
437 |     if (nd != 3 && nd != 4) goto fail; // only 2D or 3D array is expected
438 |     dimsp = PyArray_DIMS(arr1);
439 |     for (i=0; i<nd-1; i++)
440 |         dimsp2[i] = dimsp[i];
441 |     dimsp2[nd-1] = (csize * 2 + 1) * (csize * 2 + 1) - 1;
442 |     oarr = (PyArrayObject*)PyArray_ZEROS(nd, dimsp2, NPY_DOUBLE, NPY_CORDER);
443 |     ngene = dimsp[nd-1];
444 |     corrmap = (double *)PyArray_DATA(oarr);
445 |     vecs = (double *)PyArray_DATA(arr1);
446 |     nvec = 1;
447 |     for (i=0; i<nd; i++)
448 |         nvec *= dimsp2[i];
449 | 
450 |     // initialize corrmap with NANs
451 |     #pragma omp parallel for num_threads(ncores)
452 |     for (i=0; i<nvec; i++)
453 |         corrmap[i] = NPY_NAN;
454 | 
455 |     if (nd == 3) {
456 |         // 2D
457 |         #pragma omp parallel for collapse(2) private(k, dx, dy)
458 |         for (x=csize; x<dimsp[0]-csize; x++) {
459 |             for (y=csize; y<dimsp[1]-csize; y++) {
460 |                 k = 0;
461 |                 for (dx=-csize; dx<csize+1; dx++) {
462 |                     for (dy=-csize; dy<csize+1; dy++) {
463 |                         if (dx == 0 && dy == 0) continue;
464 |                         corrmap[I2D(x, y, dimsp2[1])*dimsp2[2] + (k++)] = 
465 |                             __corr__(vecs + I2D(x, y, dimsp[1])*ngene,
466 |                                      vecs + I2D(x+dx, y+dy, dimsp[1])*ngene, ngene);
467 |                     }
468 |                 }
469 |             }
470 |         }
471 |     } else {
472 |         // 3D
473 |         #pragma omp parallel for collapse(3) private(k, dx, dy, dz)
474 |         for (x=csize; x<dimsp[0]-csize; x++) {
475 |             for (y=csize; y<dimsp[1]-csize; y++) {
476 |                 for (z=csize; z<dimsp[2]-csize; z++) {
477 |                     k = 0;
478 |                     for (dx=-csize; dx<csize+1; dx++) {
479 |                         for (dy=-csize; dy<csize+1; dy++) {
480 |                             for (dz=-csize; dz<csize+1; dz++) {
481 |                                 if (dx == 0 && dy == 0 && dz == 0) continue;
482 |                                 corrmap[I3D(x, y, z, dimsp2[1], dimsp2[2])*dimsp2[3] + (k++)] =
483 |                                     __corr__(vecs + I3D(x, y, z, dimsp[1], dimsp[2])*ngene, vecs + I3D(x+dx, y+dy, z+dz, dimsp[1], dimsp[2])*ngene, ngene);
484 |                             }
485 |                         }
486 |                     }
487 |                 }
488 |             }
489 |         }
490 |     }
491 |     Py_DECREF(arr1);
492 | 
493 |     return (PyObject *) oarr;
494 |  fail:
495 |     Py_XDECREF(arr1);
496 |     return NULL;
497 | }
498 | 
499 | static PyObject *calc_ctmap(PyObject *self, PyObject *args, PyObject *kwargs) {
500 |     PyObject *arg1 = NULL;
501 |     PyObject *arg2 = NULL;
502 |     PyArrayObject *arr1 = NULL;
503 |     PyArrayObject *arr2 = NULL;
504 |     PyArrayObject *oarr = NULL;
505 |     long nvec, nd, ngene = 0;
506 |     double *cent, *vecs, *scores;
507 |     npy_intp *dimsp;
508 |     int ncores = omp_get_max_threads();
509 |     int i;
510 | 
511 |     static const char *kwlist[] = { "vec", "vf", "ncores", NULL };
512 |     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|i", const_cast<char **>(kwlist), &arg1, &arg2, &ncores)) return NULL;
513 |     if ((arr1 = (PyArrayObject*)PyArray_FROM_OTF(arg1, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) return NULL;
514 |     if ((arr2 = (PyArrayObject*)PyArray_FROM_OTF(arg2, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) goto fail;
515 |     if (PyArray_NDIM(arr1) != 1) goto fail;
516 |     nd = PyArray_NDIM(arr2);
517 |     if((ngene = *PyArray_DIMS(arr1)) != PyArray_DIMS(arr2)[nd-1]) goto fail;
518 | 
519 |     dimsp = PyArray_DIMS(arr2);
520 |     oarr = (PyArrayObject*)PyArray_ZEROS(nd - 1, dimsp, NPY_DOUBLE, NPY_CORDER);
521 | 
522 |     nvec = 1;
523 |     for (i=0; i<nd-1; i++)
524 |         nvec *= dimsp[i];
525 | 
526 |     scores = (double *)PyArray_DATA(oarr);
527 |     cent = (double *)PyArray_DATA(arr1);
528 |     vecs = (double *)PyArray_DATA(arr2);
529 | 
530 |     #pragma omp parallel for num_threads(ncores)
531 |     for (i=0; i<nvec; i++) {
532 |         scores[i] = __corr__(cent, vecs + (i*ngene), ngene);
533 |     }
534 | 
535 |     Py_DECREF(arr1);
536 |     Py_DECREF(arr2);
537 | 
538 |     return (PyObject *) oarr;
539 |  fail:
540 |     Py_XDECREF(arr1);
541 |     Py_XDECREF(arr2);
542 |     return NULL;
543 | }
544 | 
545 | static PyObject *corr(PyObject *self, PyObject *args) {
546 |     PyObject *arg1 = NULL;
547 |     PyObject *arg2 = NULL;
548 |     PyArrayObject *arr1 = NULL;
549 |     PyArrayObject *arr2 = NULL;
550 |     long ngene = 0;
551 |     double *a;
552 |     double *b;
553 |     double rtn = 0;
554 | 
555 |     if (!PyArg_ParseTuple(args, "OO", &arg1, &arg2)) return NULL;
556 |     if ((arr1 = (PyArrayObject*)PyArray_FROM_OTF(arg1, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) return NULL;
557 |     if ((arr2 = (PyArrayObject*)PyArray_FROM_OTF(arg2, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY)) == NULL) goto fail;
558 |     if (PyArray_NDIM(arr1) != 1) goto fail;
559 |     if (PyArray_NDIM(arr2) != 1) goto fail;
560 |     if((ngene = *PyArray_DIMS(arr1)) != *PyArray_DIMS(arr2)) goto fail;
561 | 
562 |     a = (double *)PyArray_DATA(arr1);
563 |     b = (double *)PyArray_DATA(arr2);
564 | 
565 |     rtn = __corr__(a, b, ngene);
566 | 
567 |     Py_DECREF(arr1);
568 |     Py_DECREF(arr2);
569 | 
570 |     return PyFloat_FromDouble(rtn);
571 |  fail:
572 |     Py_XDECREF(arr1);
573 |     Py_XDECREF(arr2);
574 |     return NULL;
575 | }
576 | 
577 | static struct PyMethodDef module_methods[] = {
578 |     {"corr", (PyCFunction)corr, METH_VARARGS, "Calculates Pearson's correlation coefficient."},
579 |     {"calc_ctmap", (PyCFunction)calc_ctmap, METH_VARARGS | METH_KEYWORDS, "Creates a cell type map."},
580 |     {"calc_corrmap", (PyCFunction)calc_corrmap, METH_VARARGS | METH_KEYWORDS, "Creates a correlation map."},
581 |     {"calc_corrmap_2", (PyCFunction)calc_corrmap_2, METH_VARARGS | METH_KEYWORDS, "Creates a correlation map."},
582 |     {"calc_kde", (PyCFunction)calc_kde, METH_VARARGS | METH_KEYWORDS, "Run kernel density estimation."},
583 |     {"flood_fill", (PyCFunction)flood_fill, METH_VARARGS | METH_KEYWORDS, "Performs 3d flood fill based on correlation."},
584 |     {NULL, NULL, 0, NULL}
585 | };
586 | 
587 | #if PY_MAJOR_VERSION >= 3
588 | static struct PyModuleDef moduledef = {
589 |         PyModuleDef_HEAD_INIT,
590 |         "analysis_utils",
591 |         NULL,
592 |         -1,
593 |         module_methods
594 | };
595 | #endif
596 | 
597 | PyMODINIT_FUNC
598 | PyInit_utils(void)
599 | {
600 | #if PY_MAJOR_VERSION >= 3
601 |     PyObject *module = PyModule_Create(&moduledef);
602 | #else
603 |     Py_InitModule("utils", module_methods);
604 | #endif
605 |     import_array();
606 | #if PY_MAJOR_VERSION >= 3
607 |     return module;
608 | #endif
609 | }


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = SSAM
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # SSAM documentation build configuration file, created by
  5 | # sphinx-quickstart on Thu Nov 22 11:41:04 2018.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import os
 21 | import sys
 22 | sys.path.insert(0, os.path.abspath('.'))
 23 | sys.path.insert(0, os.path.abspath('..'))
 24 | 
 25 | # -- General configuration ------------------------------------------------
 26 | 
 27 | # If your documentation needs a minimal Sphinx version, state it here.
 28 | #
 29 | # needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | extensions = ['sphinx.ext.autodoc',
 35 |     'sphinx.ext.doctest',
 36 |     'sphinx.ext.intersphinx',
 37 |     'sphinx.ext.todo',
 38 |     'sphinx.ext.mathjax',
 39 |     'sphinx.ext.ifconfig',
 40 |     'sphinx.ext.viewcode',
 41 |     'sphinx.ext.githubpages']
 42 | 
 43 | # Add any paths that contain templates here, relative to this directory.
 44 | templates_path = ['templates']
 45 | 
 46 | # The suffix(es) of source filenames.
 47 | # You can specify multiple suffix as a list of string:
 48 | #
 49 | # source_suffix = ['.rst', '.md']
 50 | source_suffix = '.rst'
 51 | 
 52 | # The master toctree document.
 53 | master_doc = 'index'
 54 | 
 55 | # General information about the project.
 56 | project = 'SSAM'
 57 | copyright = '2018, Jeongbin Park'
 58 | author = 'Jeongbin Park'
 59 | 
 60 | # The version info for the project you're documenting, acts as replacement for
 61 | # |version| and |release|, also used in various other places throughout the
 62 | # built documents.
 63 | #
 64 | # The short X.Y version.
 65 | version = '1.0.1'
 66 | # The full version, including alpha/beta/rc tags.
 67 | release = '1.0.1'
 68 | 
 69 | # The language for content autogenerated by Sphinx. Refer to documentation
 70 | # for a list of supported languages.
 71 | #
 72 | # This is also used if you do content translation via gettext catalogs.
 73 | # Usually you set "language" from the command line for these cases.
 74 | language = None
 75 | 
 76 | # List of patterns, relative to source directory, that match files and
 77 | # directories to ignore when looking for source files.
 78 | # This patterns also effect to html_static_path and html_extra_path
 79 | exclude_patterns = ['build', 'Thumbs.db', '.DS_Store']
 80 | 
 81 | # The name of the Pygments (syntax highlighting) style to use.
 82 | pygments_style = 'sphinx'
 83 | 
 84 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 85 | todo_include_todos = True
 86 | 
 87 | 
 88 | # -- Options for HTML output ----------------------------------------------
 89 | 
 90 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 91 | # a list of builtin themes.
 92 | #
 93 | html_theme = 'sphinx_rtd_theme'
 94 | html_theme_path = ["_themes", ]
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | #
100 | # html_theme_options = {}
101 | 
102 | # Add any paths that contain custom static files (such as style sheets) here,
103 | # relative to this directory. They are copied after the builtin static files,
104 | # so a file named "default.css" will overwrite the builtin "default.css".
105 | #html_static_path = ['static']
106 | 
107 | # Custom sidebar templates, must be a dictionary that maps document names
108 | # to template names.
109 | #
110 | # This is required for the alabaster theme
111 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
112 | #html_sidebars = {
113 | #    '**': [
114 | #        'relations.html',  # needs 'show_related': True theme option to display
115 | #        'searchbox.html',
116 | #    ]
117 | #}
118 | 
119 | 
120 | # -- Options for HTMLHelp output ------------------------------------------
121 | 
122 | # Output file base name for HTML help builder.
123 | htmlhelp_basename = 'SSAMdoc'
124 | 
125 | 
126 | # -- Options for LaTeX output ---------------------------------------------
127 | 
128 | latex_elements = {
129 |     # The paper size ('letterpaper' or 'a4paper').
130 |     #
131 |     # 'papersize': 'letterpaper',
132 | 
133 |     # The font size ('10pt', '11pt' or '12pt').
134 |     #
135 |     # 'pointsize': '10pt',
136 | 
137 |     # Additional stuff for the LaTeX preamble.
138 |     #
139 |     # 'preamble': '',
140 | 
141 |     # Latex figure (float) alignment
142 |     #
143 |     # 'figure_align': 'htbp',
144 | }
145 | 
146 | # Grouping the document tree into LaTeX files. List of tuples
147 | # (source start file, target name, title,
148 | #  author, documentclass [howto, manual, or own class]).
149 | latex_documents = [
150 |     (master_doc, 'SSAM.tex', 'SSAM Documentation',
151 |      'Jeongbin Park', 'manual'),
152 | ]
153 | 
154 | 
155 | # -- Options for manual page output ---------------------------------------
156 | 
157 | # One entry per manual page. List of tuples
158 | # (source start file, name, description, authors, manual section).
159 | man_pages = [
160 |     (master_doc, 'ssam', 'SSAM Documentation',
161 |      [author], 1)
162 | ]
163 | 
164 | 
165 | # -- Options for Texinfo output -------------------------------------------
166 | 
167 | # Grouping the document tree into Texinfo files. List of tuples
168 | # (source start file, target name, title, author,
169 | #  dir menu entry, description, category)
170 | texinfo_documents = [
171 |     (master_doc, 'SSAM', 'SSAM Documentation',
172 |      author, 'SSAM', 'One line description of project.',
173 |      'Miscellaneous'),
174 | ]
175 | 
176 | 
177 | 
178 | 
179 | # Example configuration for intersphinx: refer to the Python standard library.
180 | intersphinx_mapping = {'https://docs.python.org/': None}
181 | 
182 | autodoc_mock_imports = ["ssam.utils"]
183 | 


--------------------------------------------------------------------------------
/doc/images/de_novo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/de_novo.png


--------------------------------------------------------------------------------
/doc/images/de_novo_celltype.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/de_novo_celltype.png


--------------------------------------------------------------------------------
/doc/images/diagplot_centroid_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/diagplot_centroid_2.png


--------------------------------------------------------------------------------
/doc/images/diagplot_centroid_30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/diagplot_centroid_30.png


--------------------------------------------------------------------------------
/doc/images/diagplot_centroid_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/diagplot_centroid_5.png


--------------------------------------------------------------------------------
/doc/images/diagplot_centroid_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/diagplot_centroid_8.png


--------------------------------------------------------------------------------
/doc/images/domain_composition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/domain_composition.png


--------------------------------------------------------------------------------
/doc/images/domain_composition_all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/domain_composition_all.png


--------------------------------------------------------------------------------
/doc/images/domains.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/domains.png


--------------------------------------------------------------------------------
/doc/images/domains_individual.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/domains_individual.png


--------------------------------------------------------------------------------
/doc/images/final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/final.png


--------------------------------------------------------------------------------
/doc/images/guided.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/guided.png


--------------------------------------------------------------------------------
/doc/images/kernel_bw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/kernel_bw.png


--------------------------------------------------------------------------------
/doc/images/local_max_threshold_gene.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/local_max_threshold_gene.png


--------------------------------------------------------------------------------
/doc/images/local_max_threshold_knn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/local_max_threshold_knn.png


--------------------------------------------------------------------------------
/doc/images/local_max_threshold_knn2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/local_max_threshold_knn2.png


--------------------------------------------------------------------------------
/doc/images/local_max_threshold_knn3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/local_max_threshold_knn3.png


--------------------------------------------------------------------------------
/doc/images/local_max_threshold_total.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/local_max_threshold_total.png


--------------------------------------------------------------------------------
/doc/images/mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/mask.png


--------------------------------------------------------------------------------
/doc/images/maxima.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/maxima.png


--------------------------------------------------------------------------------
/doc/images/segmented_celltype_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/segmented_celltype_map.png


--------------------------------------------------------------------------------
/doc/images/tsne.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/tsne.png


--------------------------------------------------------------------------------
/doc/images/tsne_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/tsne_final.png


--------------------------------------------------------------------------------
/doc/images/tsne_merged.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/tsne_merged.png


--------------------------------------------------------------------------------
/doc/images/tsne_removed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HiDiHlabs/ssam/76615319930635210dbfd9eba1f20c1632c0a673/doc/images/tsne_removed.png


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ../README.rst
 2 | 
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 4
 6 |    :caption: Contents
 7 | 
 8 |    userguide
 9 |    ssam
10 | 
11 | 
12 | Indices and tables
13 | ==================
14 | 
15 | * :ref:`genindex`
16 | 


--------------------------------------------------------------------------------
/doc/ssam.rst:
--------------------------------------------------------------------------------
1 | Module contents
2 | ---------------
3 | 
4 | .. automodule:: ssam
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/doc/userguide.rst:
--------------------------------------------------------------------------------
1 | Spatial gene expression analysis with SSAM
2 | ------------------------------------------
3 | 
4 | .. toctree::
5 |    :glob:
6 | 
7 |    userguide/*
8 | 


--------------------------------------------------------------------------------
/doc/userguide/01-tldr.rst:
--------------------------------------------------------------------------------
  1 | quick start / tldr page
  2 | =======================
  3 | 
  4 | This tl;dr guide is for you if you already know what happens in a SSAM
  5 | analysis or if you don’t care.
  6 | 
  7 | For everyone else we recommend using the full
  8 | `userguide <../userguide.md>`__.
  9 | 
 10 | Installation
 11 | ------------
 12 | 
 13 | Setup a ``conda`` environment:
 14 | 
 15 | ::
 16 | 
 17 |    conda create -n ssam python=3.6
 18 |    conda activate ssam
 19 |    conda install gxx_linux-64 numpy pip R=3.6 pyarrow=0.15.1
 20 | 
 21 | Do this in ``R``:
 22 | 
 23 | ::
 24 | 
 25 |    install.packages("sctransform")
 26 |    install.packages("feather")
 27 | 
 28 | Install SSAM via ``pip``:
 29 | 
 30 | ::
 31 | 
 32 |    pip install ssam
 33 | 
 34 | Data download
 35 | -------------
 36 | 
 37 | ::
 38 | 
 39 |    curl "https://zenodo.org/record/3478502/files/supplemental_data_ssam_2019.zip?download=1" -o zenodo.zip
 40 |    unzip zenodo.zip
 41 | 
 42 | Data preparation
 43 | ----------------
 44 | 
 45 | All following steps in ``python``:
 46 | 
 47 | ::
 48 | 
 49 |    import numpy as np
 50 |    import pandas as pd
 51 |    import matplotlib.pyplot as plt
 52 |    import ssam
 53 | 
 54 |    df = pd.read_csv(
 55 |        "zenodo/multiplexed_smFISH/raw_data/smFISH_MCT_CZI_Panel_0_spot_table.csv",
 56 |        usecols=['x', 'y', 'z', 'target'])
 57 |        
 58 |    um_per_pixel = 0.1
 59 | 
 60 |    df.x = (df.x - df.x.min()) * um_per_pixel + 10
 61 |    df.y = (df.y - df.y.min()) * um_per_pixel + 10
 62 |    df.z = (df.z - df.z.min()) * um_per_pixel + 10
 63 |    width = df.x.max() - df.x.min() + 10
 64 |    height = df.y.max() - df.y.min() + 10
 65 | 
 66 |    grouped = df.groupby('target').agg(list)
 67 |    genes = list(grouped.index)
 68 |    coord_list = []
 69 |    for target, coords in grouped.iterrows():
 70 |        coord_list.append(np.array(list(zip(*coords))))
 71 | 
 72 | Create SSAM dataset and vector field
 73 | ------------------------------------
 74 | 
 75 | ::
 76 | 
 77 |    ds = ssam.SSAMDataset(genes, coord_list, width, height)
 78 |    analysis = ssam.SSAMAnalysis(
 79 |      ds,
 80 |      ncores=10, # used for kde step
 81 |      save_dir="kde/",
 82 |      verbose=True)
 83 | 
 84 |    analysis.run_kde(bandwidth=2.5, use_mmap=False)
 85 | 
 86 |    analysis.find_localmax(
 87 |        search_size=3,
 88 |        min_norm=0.2,
 89 |        min_expression=0.027
 90 |        )
 91 |        
 92 |    analysis.normalize_vectors_sctransform()
 93 | 
 94 | Creating the *de novo* cell map
 95 | -------------------------------
 96 | 
 97 | ::
 98 | 
 99 |    analysis.cluster_vectors(
100 |        min_cluster_size=0,
101 |        pca_dims=22,
102 |        resolution=0.15,
103 |        metric='correlation')
104 |        
105 |    # post-filtering parameter for cell-type map
106 |    filter_method = "local"
107 |    filter_params = {
108 |        "block_size": 151,
109 |        "method": "mean",
110 |        "mode": "constant",
111 |        "offset": 0.2
112 |    }
113 | 
114 |    analysis.map_celltypes()
115 |    analysis.filter_celltypemaps(min_norm=filter_method, filter_params=filter_params, min_r=0.6, fill_blobs=True, min_blob_area=50, output_mask=output_mask)
116 | 
117 | .. figure:: ../images/de_novo.png
118 |    :alt: Visualisation of cell type map.
119 | 
120 |    Visualisation of cell type map.
121 | 
122 | Creating the tissue domain map
123 | ------------------------------
124 | 
125 | ::
126 | 
127 |    analysis.bin_celltypemaps(step=10, radius=100)
128 |    analysis.find_domains(n_clusters=20, merge_remote=True, merge_thres=0.7, norm_thres=1500)
129 | 
130 |    plt.figure(figsize=[5, 5])
131 |    ds.plot_domains(rotate=1, cmap=cmap)
132 | 
133 | .. figure:: ../images/domains.png
134 |    :alt: Visualisation of final domain map exhibitin clearly separated
135 |    domains.
136 | 
137 |    Visualisation of final domain map exhibitin clearly separated
138 |    domains.
139 | 


--------------------------------------------------------------------------------
/doc/userguide/02-installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | A step-by-step guide
 5 | --------------------
 6 | 
 7 | The easiest way to prepare a python environment for SSAM is using
 8 | `conda <https://docs.conda.io/projects/conda/en/latest/user-guide/install/>`__.
 9 | Keeping python projects in isolated environments prevents dependency
10 | version conflicts or conflicts with your OS installation of python which
11 | usually depends on older versions incompatible with current scientific
12 | packages.
13 | 
14 | Create your environment:
15 | 
16 | ::
17 | 
18 |    conda create -n ssam python=3.6
19 | 
20 | Remember to activate before using it:
21 | 
22 | ::
23 | 
24 |    conda activate ssam
25 | 
26 | Now we use conda to install some dependencies into our ssam environment:
27 | 
28 | ::
29 | 
30 |    conda install gxx_linux-64=7.3.0 numpy=1.19.2 pip R=3.6 pyarrow=0.15.1
31 | 
32 | Now we can install the R packages ``sctransform`` and ``feather``. Open
33 | R and type:
34 | 
35 | ::
36 | 
37 |    install.packages("sctransform")
38 |    install.packages("feather")
39 | 
40 | Finally we switch to pip:
41 | 
42 | .. raw:: html
43 | 
44 |    <!--
45 |    ```
46 |    pip install ssam
47 |    ```
48 |    -->
49 | 
50 | ::
51 | 
52 |    pip install git+https://github.com/HiDiHlabs/ssam.git
53 | 
54 | Next we can download and prepare our `data <data.md>`__.
55 | 
56 | SSAM’s source code
57 | ------------------
58 | 
59 | In case you want to work with `SSAM’s source
60 | code <https://github.com/HiDiHlabs/ssam>`__, it is also hosted on github.
61 | 


--------------------------------------------------------------------------------
/doc/userguide/03-data.rst:
--------------------------------------------------------------------------------
 1 | Data Preparation
 2 | ================
 3 | 
 4 | Download VISp data
 5 | ------------------
 6 | 
 7 | In this tutorial we work with data of the murine primary visual cortex
 8 | (VISp) profiled using multiplexed smFISH. Further details are available
 9 | in the SSAM publication (Park, et. al. 2019).
10 | 
11 | First, download the data and unpack it:
12 | 
13 | ::
14 | 
15 |    curl "https://zenodo.org/record/3478502/files/supplemental_data_ssam_2019.zip?download=1" -o zenodo.zip
16 |    unzip zenodo.zip
17 | 
18 | Load data into python
19 | ---------------------
20 | 
21 | Let’s start with loading our python packages:
22 | 
23 | ::
24 | 
25 |    import numpy as np
26 |    import pandas as pd
27 |    import matplotlib.pyplot as plt
28 |    import ssam
29 | 
30 | Now we can load the mRNA spot table. Each row describes one mRNA spot
31 | and the columns contain its coordinates and target gene. We load the
32 | required columns into a dataframe:
33 | 
34 | ::
35 | 
36 |    df = pd.read_csv(
37 |        "zenodo/multiplexed_smFISH/raw_data/smFISH_MCT_CZI_Panel_0_spot_table.csv",
38 |        usecols=['x', 'y', 'z', 'target'])
39 | 
40 | If your dataset is organized differently, you will have to reshape it
41 | before continuing with the next steps. ## Transform Data
42 | 
43 | Because SSAM analysis is rooted in a cellular scale we transform the
44 | coordinates from a laboratory system into micrometers. Also we make them
45 | a bit tidier:
46 | 
47 | ::
48 | 
49 |    um_per_pixel = 0.1
50 | 
51 |    df.x = (df.x - df.x.min()) * um_per_pixel + 10
52 |    df.y = (df.y - df.y.min()) * um_per_pixel + 10
53 |    df.z = (df.z - df.z.min()) * um_per_pixel + 10
54 | 
55 | Prepare data for SSAM
56 | ---------------------
57 | 
58 | To create a ``SSAMDataset`` object we need to provide four arguments: -
59 | a list of gene names profiled in the experiment: ``genes`` - a list of
60 | lists that contains the coordinates of each gene: ``coord_list`` - the
61 | ``width`` of the image - the ``height`` of the image
62 | 
63 | The width and height are straightforward to infer from the dimensions of
64 | the image:
65 | 
66 | ::
67 | 
68 |    width = df.x.max() - df.x.min() + 10
69 |    height = df.y.max() - df.y.min() + 10
70 | 
71 | We group the dataframe by gene and create the list of gene names:
72 | 
73 | ::
74 | 
75 |    grouped = df.groupby('target').agg(list)
76 |    genes = list(grouped.index)
77 | 
78 | And finally the coordinate list:
79 | 
80 | ::
81 | 
82 |    coord_list = []
83 |    for target, coords in grouped.iterrows():
84 |        coord_list.append(np.array(list(zip(*coords))))
85 | 
86 | Create the ``SSAMDataset`` object
87 | ---------------------------------
88 | 
89 | With everything in place we can now instantiate the ``SSAMDataset``
90 | object:
91 | 
92 | ::
93 | 
94 |    ds = ssam.SSAMDataset(genes, coord_list, width, height)
95 | 
96 | Now we can start the analysis with the `kernel density
97 | estimation <kde.md>`__ step.
98 | 


--------------------------------------------------------------------------------
/doc/userguide/04-kde.rst:
--------------------------------------------------------------------------------
  1 | Creating the vector field
  2 | =========================
  3 | 
  4 | After the data has been loaded, SSAM converts the discrete mRNA
  5 | locations into mRNA desntiy (that can be thought of as continuous “gene
  6 | expression clouds” over the tissue) through application of `Kernel
  7 | Density Estimation <https://en.wikipedia.org/wiki/KDE>`__.
  8 | 
  9 | KDE
 10 | ---
 11 | 
 12 | With our ``SSAMDataset`` object ``ds`` we can now initialize a
 13 | ``SSAMAnalysis`` object ``analysis``.
 14 | 
 15 | ::
 16 | 
 17 |    analysis = ssam.SSAMAnalysis(
 18 |      ds,
 19 |      ncores=10, # used for kde step
 20 |      save_dir="kde/",
 21 |      verbose=True)
 22 | 
 23 | And calculate a mRNA density estimate with the ``run_kde`` method.
 24 | Important considerations here are the `kernel
 25 | function <kernel_shape.md>`__ and the `kernel
 26 | bandwidth <kernel_bandwidth.md>`__. As default, we recommend using a
 27 | Gaussian kernel with a bandwidth of 2.5:
 28 | 
 29 | ::
 30 | 
 31 |    analysis.run_kde(bandwidth=2.5, use_mmap=False)
 32 | 
 33 | Masking
 34 | -------
 35 | 
 36 | If you want to perform the analysis on `only a part of your sample you
 37 | can use a mask <inpu_mask>`__. This can restrict what parts of the image
 38 | are used for local maxima sampling (the ``input_mask``), or restrict the
 39 | cell-type map generation of SSAM to certain regions (the
 40 | ``output_mask``). While this is not required for analysis (infact the
 41 | SSAM paper did not apply masks to the osmFISH or MERFISH dataset), here
 42 | we define a simply polygon as both the ``input_mask`` and
 43 | ``output_mask`` for the VISp region.
 44 | 
 45 | ::
 46 | 
 47 |    from matplotlib.path import Path
 48 |    # manual area annotation
 49 |    xy = np.array([[1535,  90],
 50 |                   [ 795,  335],
 51 |                   [ 135,  940],
 52 |                   [ 835, 1995],
 53 |                   [1465, 1695],
 54 |                   [2010, 1215]])
 55 | 
 56 |    # Extract coordinates from SSAMDataset
 57 |    x, y = np.meshgrid(np.arange(ds.vf.shape[0]), np.arange(ds.vf.shape[1]))
 58 |    x, y = x.flatten(), y.flatten()
 59 |    points = np.vstack((x,y)).T
 60 | 
 61 |    path = Path(xy)
 62 |    input_mask = path.contains_points(points)
 63 |    input_mask = input_mask.reshape((ds.vf.shape[1], ds.vf.shape[0], 1)).swapaxes(0, 1)
 64 |    output_mask = input_mask
 65 | 
 66 | We recommend a visual inspection of the mask to make sure it alignes
 67 | with the data as you expect it to:
 68 | 
 69 | ::
 70 | 
 71 |    from matplotlib.patches import Polygon          
 72 |    from matplotlib.collections import PatchCollection
 73 | 
 74 |    patch = Polygon(xy, True)  
 75 |    p = PatchCollection([patch], alpha=0.4)
 76 | 
 77 |    plt.figure(figsize=[5, 5])
 78 |    ds.plot_l1norm(rotate=1, cmap="Greys")
 79 |    plt.gca().add_collection(p)
 80 |    plt.axis('off')
 81 |    plt.savefig('images/mask.png')
 82 | 
 83 | .. figure:: ../images/mask.png
 84 |    :alt: plot of the mRNA density superimposed with the mask
 85 | 
 86 |    plot of the mRNA density superimposed with the mask
 87 | 
 88 | Local maxima search and normalization
 89 | -------------------------------------
 90 | 
 91 | In order to reduce the computational burden, we recommend downsampling
 92 | the image. While random sampling can be performe, we strongly encourage
 93 | downsampling via local maxima selection, followed by `filtering based of
 94 | individual and total gene expression <max_filtering.md>`__.
 95 | 
 96 | The local maxima are used to (i) determine the variance stabilisation
 97 | parameters for the image, and (ii) be used to determine
 98 | `clusters <clustering.md>`__ in `de novo analysis <de_novo.md>`__. In
 99 | this section, we will use the local maxima for variance stabilisation.
100 | 
101 | Here we apply the ``find_localmax`` function to find the local maxima of
102 | the mRNA density, using a per gene expression threshold of ``0.027`` and
103 | a total gene expression threshold of ``0.2``:
104 | 
105 | ::
106 | 
107 |    analysis.find_localmax(
108 |        search_size=3,
109 |        min_norm=0.2, # the total gene expression threshold
110 |        min_expression=0.027, # the per gene expression threshold
111 |        mask=input_mask
112 |        )
113 | 
114 | Visualization
115 | -------------
116 | 
117 | After the local maxima have been identified, they can be visualised. In
118 | cases when many local maxima orginate from outside the tissue area a
119 | `k-NN density threshold can be used to filter “stray” local
120 | maxima <max_filtering.md#filtering-stray-local-maxima-using-k-nearest-neighbour-density>`__,
121 | however in this example we use an input mask so it is not a problem.
122 | 
123 | ::
124 | 
125 |    plt.figure(figsize=[5, 5])
126 |    ds.plot_l1norm(cmap="Greys", rotate=1)
127 |    ds.plot_localmax(c="Blue", rotate=1, s=0.1)
128 | 
129 |    patch = Polygon(xy, facecolor="black", edgecolor="red", linewidth=10, ls="-")
130 |    p = PatchCollection([patch], alpha=0.4)
131 |    plt.gca().add_collection(p)
132 | 
133 |    scalebar = ScaleBar(1, 'um') # 1 pixel = 1um
134 |    plt.gca().add_artist(scalebar)
135 |    plt.tight_layout()
136 |    plt.axis('off')
137 |    plt.show()
138 | 
139 | .. figure:: ../images/maxima.png
140 |    :alt: plot found maxima superimposed with the mask
141 | 
142 |    plot found maxima superimposed with the mask
143 | 
144 | Normalization
145 | -------------
146 | 
147 | Once the local maxima have been identified, we can use them for
148 | calculating the variance stabilisation parameters using ``sctransform``.
149 | If you receive an error here, make sure that you have installed the R
150 | packages in the `installation <docs/installation.md>`__ step
151 | 
152 | This part of the analysis ends with the normalization of the mRNA
153 | density and the local-maximum vectors.
154 | 
155 | ::
156 | 
157 |    analysis.normalize_vectors_sctransform()
158 | 
159 | Now we are rady to continue with mapping the cell types in
160 | `guided <guided.md>`__ or `de novo mode <de_novo.md>`__.
161 | 


--------------------------------------------------------------------------------
/doc/userguide/05-kernel_shape.rst:
--------------------------------------------------------------------------------
 1 | The shape of the kernel
 2 | =======================
 3 | 
 4 | The shape of the kernel is defined by the `kernel
 5 | function <https://en.wikipedia.org/wiki/Kernel_(statistics)>`__. The
 6 | shape of the kernel determines how the mRNA signal is smoothed.
 7 | 
 8 | We adopt the use of the Gaussian kernel due to it’s popular use in
 9 | signal processing, however other kernel functions can be used: - we have
10 | had success in using semi-circle kernels when applied to `ISS data of
11 | the human pancreas <https://doi.org/10.1053/j.gastro.2020.11.010>`__ -
12 | the `Epanechnikov kernel <https://doi.org/10.1137%2F1114019>`__
13 | minimizes AMISE and has therefore been described as optimal
14 | 
15 | The following exmaples shows how you can apply a semicircular kernel
16 | instead of a Gaussian.
17 | 
18 | ::
19 | 
20 |    # code to change the shape of the kernel (@sebastiantiesmeyer)
21 | 


--------------------------------------------------------------------------------
/doc/userguide/06-kernel_bandwidth.rst:
--------------------------------------------------------------------------------
 1 | Kernel bandwidth
 2 | ================
 3 | 
 4 | The bandwidth of the kernel controls the amount of smoothing applied.
 5 | With a low bandwidth, the smooth is spread less. With a high badnwidth,
 6 | the smoothing is spread more.
 7 | 
 8 | The bandwidth should be set according to 2 factors: - the maximum size
 9 | of the bandwidth should not smooth the signals outside of cells. by
10 | default we choose a bandwidth of 2.5 um, as this has a FWTM or ~10um,
11 | which is the average size of cells in the mouse SSp. This worked well
12 | for all examples in the SSAM paper. - the minimum size of the bandwidth
13 | should at least smooth signal to adjacent mRNA. From experience, this is
14 | not an issue for most ISH based techniques, but sequencing based
15 | techniques such as ISS can produce very sparse data and may require
16 | higher bandwidths to smooth signal sufficiently.
17 | 
18 | Here is a close-up of the osmFISH mouse SSp dataset which investigates
19 | the effect of adjusting the kernel bandwidth. You can see that with a
20 | bandwidth of 1um the smoothing is sufficient, and with a bandwidth of
21 | 5um it is a little too much. The bandwidth of 2.5um appears to be a good
22 | balance of smoothing adjacent signal, while not smooting into the
23 | adjacent area or loosing sparse cell types.
24 | 
25 | |image0|
26 | 
27 | .. |image0| image:: ../images/kernel_bw.png
28 | 
29 | 


--------------------------------------------------------------------------------
/doc/userguide/07-input_mask.rst:
--------------------------------------------------------------------------------
 1 | Input masks
 2 | ===========
 3 | 
 4 | For some tissue images you may want to restrict analysis to certain
 5 | parts of the image. For example, the image may have degradation towards
 6 | the edges, you may wish to exclude non tissue areas, or even perhaps
 7 | restricting SSAM analysis to previously segmented areas.
 8 | 
 9 | SSAM accepts input masks that are defined as polygons.
10 | 
11 | Example for the VISp smFISH dataset:
12 | 
13 | ::
14 | 
15 |    from matplotlib.patches import Polygon
16 |    from matplotlib.collections import PatchCollection
17 | 
18 |    plt.figure(figsize=[5, 5])
19 |    ds.plot_l1norm(cmap="Greys", rotate=1)
20 |    ds.plot_localmax(c="Blue", rotate=1, s=0.1)
21 | 
22 |    patch = Polygon(xy, facecolor="black", edgecolor="red", linewidth=10, ls="-")
23 |    p = PatchCollection([patch], alpha=0.4)
24 |    plt.gca().add_collection(p~)
25 |    plt.show()
26 | 
27 | |image0|
28 | 
29 | After the desired region selected, a ``mask`` can be created. In this
30 | case we define an ``input_mask`` and ``output_mask`` which restricts all
31 | data process anf reported output to the selected region.
32 | 
33 | ::
34 | 
35 |    from matplotlib.path import Path
36 | 
37 |    x, y = np.meshgrid(np.arange(ds.vf.shape[0]), np.arange(ds.vf.shape[1]))
38 |    x, y = x.flatten(), y.flatten()
39 |    points = np.vstack((x,y)).T
40 | 
41 |    path = Path(xy)
42 |    input_mask = path.contains_points(points)
43 |    output_mask = input_mask = input_mask.reshape((ds.vf.shape[1], ds.vf.shape[0], 1)).swapaxes(0, 1)
44 | 
45 | .. |image0| image:: ../images/mask.png
46 | 
47 | 


--------------------------------------------------------------------------------
/doc/userguide/08-guided.rst:
--------------------------------------------------------------------------------
 1 | SSAM *guided* analysis
 2 | ======================
 3 | 
 4 | The main visual output of SSAM is the creation of the cell-type map,
 5 | which is created by classifying pixels in the tissue image based of
 6 | either predefined or calculated genes expression signatures. When the
 7 | gene expression signatures are already known, one can use SSAM in
 8 | *guided* mode. When previously known cell type signatures are known, we
 9 | highly recommend running *guided* mode analysis as a quality check.
10 | 
11 | Single cell RNA sequencing data
12 | -------------------------------
13 | 
14 | We will use scRNA-seq data from `Tasic et al.
15 | 2018 <https://doi.org/10.1038/s41586-018-0654-5>`__ for the guided
16 | analysis. In the paper they identified “shared and distinct
17 | transcriptomic cell types across neocortical areas” in the mouse brain,
18 | also including the mouse VISp (which is our exmaple).
19 | 
20 | First we need to load the data:
21 | 
22 | ::
23 | 
24 |    scrna_cl = pd.read_feather("zenodo/multiplexed_smFISH/raw_data/scrna_data_tasic_2018/cl.feather")
25 |    scrna_cl_df = pd.read_feather("zenodo/multiplexed_smFISH/raw_data/scrna_data_tasic_2018/cl_df.feather")
26 |    scrna_genes = pd.read_feather("zenodo/multiplexed_smFISH/raw_data/scrna_data_tasic_2018/genes.feather")
27 |    scrna_counts = pd.read_feather("zenodo/multiplexed_smFISH/raw_data/scrna_data_tasic_2018/counts.feather")
28 | 
29 |    scrna_clusters = scrna_cl['cluster_id']
30 | 
31 |    scrna_cl_dic = dict(zip(scrna_cl['cell_id'], scrna_cl['cluster_id']))
32 |    scrna_cl_metadata_dic = dict(zip(
33 |        scrna_cl_df['cluster_id'],
34 |        zip(scrna_cl_df['cluster_label'],
35 |            scrna_cl_df['cluster_color'], )
36 |    ))
37 | 
38 |    qc_gene_indices = np.sum(scrna_counts > 0, axis=1) > 5
39 |    scrna_genes_qc = np.array(scrna_genes)[qc_gene_indices]
40 | 
41 |    scrna_counts_qc = np.array(scrna_counts).T[:, qc_gene_indices]
42 | 
43 | Normalisation
44 | -------------
45 | 
46 | Once the data is loaded, we will normalise it using ``run_sctransform``:
47 | 
48 | ::
49 | 
50 |    scrna_data_normalized = np.array(ssam.run_sctransform(scrna_counts_qc)[0])
51 | 
52 | Cell-type gene expression signatures
53 | ------------------------------------
54 | 
55 | Once the data is normalised, we can calculate the average gene
56 | expression per cell type (the ``centroids``), which can then be used for
57 | classifying pixels in the image
58 | 
59 | ::
60 | 
61 |    selected_genes_idx = [list(scrna_genes_qc).index(g) for g in ds.genes]
62 |    scrna_uniq_clusters = np.unique(scrna_clusters)
63 |    scrna_centroids = []
64 |    for cl in scrna_uniq_clusters:
65 |        scrna_centroids.append(np.mean(scrna_data_normalized[:, selected_genes_idx][scrna_clusters == cl], axis=0))
66 | 
67 | Generate a *guided* cell-type map
68 | ---------------------------------
69 | 
70 | We can now continue to classify pixels in the tissue image using the
71 | cell-type gene expression signatures from the sc-RNAseq data.
72 | 
73 | We map the local maxima vectors to the most similar clusters in the
74 | scRNA-seq data using, using a `correlation threshold of classifying
75 | pixels of ``0.6`` <celltype_map_thresh_g.md>`__
76 | 
77 | ::
78 | 
79 |    analysis.map_celltypes(scrna_centroids) # map the scRNAseq cell type signatures to the tissue image
80 |    analysis.filter_celltypemaps(min_norm=filter_method, filter_params=filter_params, min_r=0.3, output_mask=output_mask) # post-filter cell-type map to remove spurious pixels
81 | 
82 |    plt.figure(figsize=[5, 5]) # initiate the plotting area
83 |    ds.plot_celltypes_map(rotate=1, colors=scrna_colors, set_alpha=False) # SSAM plotting function
84 | 
85 | |image0|
86 | 
87 | Despite the guided mode producing passable results, we highly recommend
88 | using the `de novo mode for more accurate analysis <de_novo.md>`__.
89 | 
90 | .. |image0| image:: ../images/guided.png
91 | 
92 | 


--------------------------------------------------------------------------------
/doc/userguide/09-celltype_map_thresh_g.rst:
--------------------------------------------------------------------------------
 1 | Thresholding the guided cell-type map
 2 | =====================================
 3 | 
 4 | After cell-type signatures are provided, the tissue image can be
 5 | classified. The classification of each pixel is based on the Pearson
 6 | correlation metric (although an `experimental adversarial autoencoder
 7 | based classification method <aaec.md>`__ can be applied).
 8 | 
 9 | We found that a minimum correlation threshold (``min_r``) of 0.3 worked
10 | well for guided mode based on single cell RNAseq cell-type signatures,
11 | and 0.6 worked well for *de novo* mode.
12 | 
13 | Below we show how the cell-type map changes using correlation thresholds
14 | of ``0.15,0.3,0.45`` using the scRNAseq signatures
15 | 
16 | ::
17 | 
18 |    scrna_uniq_labels = [scrna_cl_metadata_dic[i][0] for i in scrna_uniq_clusters]
19 |    scrna_colors = [scrna_cl_metadata_dic[i][1] for i in scrna_uniq_clusters]
20 | 
21 |    analysis.map_celltypes(scrna_centroids)
22 | 
23 |    analysis.filter_celltypemaps(min_norm=filter_method, filter_params=filter_params, min_r=0.15, output_mask=output_mask) # post-filter cell-
24 |    plt.figure(figsize=[5, 5])
25 |    ds.plot_celltypes_map(rotate=1, colors=scrna_colors, set_alpha=False)
26 | 
27 |    analysis.filter_celltypemaps(min_norm=filter_method, filter_params=filter_params, min_r=0.3, output_mask=output_mask) # post-filter cell-
28 |    plt.figure(figsize=[5, 5])
29 |    ds.plot_celltypes_map(rotate=1, colors=scrna_colors, set_alpha=False)
30 | 
31 |    analysis.filter_celltypemaps(min_norm=filter_method, filter_params=filter_params, min_r=0.45, output_mask=output_mask) # post-filter cell-
32 |    plt.figure(figsize=[5, 5])
33 |    ds.plot_celltypes_map(rotate=1, colors=scrna_colors, set_alpha=False)
34 | 


--------------------------------------------------------------------------------
/doc/userguide/10-de_novo.rst:
--------------------------------------------------------------------------------
 1 | SSAM *de novo* analysis
 2 | =======================
 3 | 
 4 | While we believe that the `guided mode of SSAM <guided.md>`__ to be able
 5 | to generate good cell-type maps rapidly, the *de novo mode* provide much
 6 | more accurate results.
 7 | 
 8 | The steps of the *de novo* analysis are briefly discussed below, with
 9 | links to more detailed discussion:
10 | 
11 | -  `setting cell-type map correlation
12 |    threshold <docs/celltype_map_thresh_d.md>`__
13 | -  `visualisation of cell-type signatures: heatmap, tSNE,
14 |    UMAP <docs/visualisation.md>`__
15 | 
16 | Clustering of expression vectors
17 | --------------------------------
18 | 
19 | Once the local maxima have been selected and
20 | `filtered <max_filtering.md>`__, we can perform `clustering
21 | analysis <clustering.md>`__. SSAM supports `a number of clustering
22 | methods <clustering.md>`__. Here we use the Louvain algorithm using 22
23 | principle components, a resolution of 0.15.
24 | 
25 | ::
26 | 
27 |    analysis.cluster_vectors(
28 |        min_cluster_size=0,
29 |        pca_dims=22,
30 |        resolution=0.15,
31 |        metric='correlation')
32 | 
33 | Cluster annotation and diagnostics
34 | ----------------------------------
35 | 
36 | SSAM provides `diagnostic plots <diagnostics.md>`__ which can be used to
37 | evaluate the quality of clusters, and `facilitates the annotation of
38 | clusters <cluster_annotation.md>`__.
39 | 
40 | Visualisng the clusters
41 | -----------------------
42 | 
43 | SSAM supports `cluster visualisation via heatmaps, and 2D embedding
44 | (t-SNE and UMAP) <visualisation.md>`__. Here we give an example of the
45 | t-SNE plot:
46 | 
47 | ::
48 | 
49 |    plt.figure(figsize=[5, 5])
50 |    ds.plot_tsne(pca_dims=22, metric="correlation", s=5, run_tsne=True)
51 |    plt.savefig('images/tsne.png')
52 | 
53 | .. figure:: ../images/tsne.png
54 |    :alt: plot of t-SNE embedding of cell types
55 | 
56 |    plot of t-SNE embedding of cell types
57 | 
58 | Cell type map
59 | -------------
60 | 
61 | Once the clusters have been evaluated for quality, we can generate the
62 | *de novo* cell-type map. This involves `classifying all the pixels in
63 | the tissue image based on a correlation
64 | threshold <celltype_map_thresh_d.md>`__. For the *de novo* application
65 | ``0.6`` was found to perform well:
66 | 
67 | ::
68 | 
69 |    analysis.map_celltypes()
70 | 
71 |    filter_params = {
72 |        "block_size": 151,
73 |        "method": "mean",
74 |        "mode": "constant",
75 |        "offset": 0.2
76 |        }
77 |        
78 |    analysis.filter_celltypemaps(min_norm="local", filter_params=filter_params, min_r=0.6, fill_blobs=True, min_blob_area=50, output_mask=output_mask)
79 | 
80 | ::
81 | 
82 |    plt.figure(figsize=[5, 5])
83 |    ds.plot_celltypes_map(rotate=1, set_alpha=False)
84 |    plt.axis('off')
85 |    plt.savefig('images/de_novo.png')
86 | 
87 | .. figure:: ../images/de_novo.png
88 |    :alt: plot of the de novo generated celltype map
89 | 
90 |    plot of the de novo generated celltype map
91 | 
92 | We can now use our celltype map to infer a map of `tissue
93 | domains <domain.md>`__.
94 | 


--------------------------------------------------------------------------------
/doc/userguide/11-max_filtering.rst:
--------------------------------------------------------------------------------
  1 | Filtering local maxima
  2 | ======================
  3 | 
  4 | As demonstrated in the `SSAM
  5 | paper <https://www.biorxiv.org/content/10.1101/800748v2>`__, local L1
  6 | maxima selection is an effective way of downsampling the entire vector
  7 | field for faster computation, and they better represent known gene
  8 | expression profiles compared to random downsampling.
  9 | 
 10 | However, local maxima in the vector field can arrise from undesirable
 11 | locations, e.g. singleton mRNAs. In order to filter less informative
 12 | local maxima.
 13 | 
 14 | We recommend applying threshold for individual genes, and for the total
 15 | gene expression.
 16 | 
 17 | Per gene expression threshold
 18 | -----------------------------
 19 | 
 20 | The per gene threshold should be at least the height of a single
 21 | Gaussian curve over an mRNA. This can easily be empirically determined
 22 | by visual analysis. In this multiplexed smFISH exmaple, the per gene
 23 | expression threshold, ``exp_thres`` is set to 0.027
 24 | 
 25 | ::
 26 | 
 27 |    exp_thres = 0.027
 28 |    viewport = 0.1
 29 |    gindices = np.arange(len(ds.genes))
 30 |    np.random.shuffle(gindices)
 31 |    plt.figure(figsize=[5, 7])
 32 |    for i, gidx in enumerate(gindices[:6], start=1):
 33 |        ax = plt.subplot(5, 2, i)
 34 |        n, bins, patches = ax.hist(ds.vf[..., gidx][np.logical_and(ds.vf[..., gidx] > 0, ds.vf[..., gidx] < viewport)], bins=100, log=True, histtype=u'step')
 35 |        ax.set_xlim([0, viewport])
 36 |        ax.set_ylim([n[0], n[-1]])
 37 |        ax.axvline(exp_thres, c='red', ls='--')
 38 |        ax.set_title(ds.genes[gidx])
 39 |        ax.set_xlabel("Expression")
 40 |        ax.set_ylabel("Count")
 41 |    plt.tight_layout()
 42 |    pass
 43 | 
 44 | |image0|
 45 | 
 46 | Total gene expression threshold
 47 | -------------------------------
 48 | 
 49 | The total gene threshold should be empirically determined by examing the
 50 | curve of total gene expression of local maxima. This isn’t always easy,
 51 | and we highly encourage investigating this thoroughly.
 52 | 
 53 | ::
 54 | 
 55 |    norm_thres = 0.2
 56 |    gidx = 0
 57 |    plt.figure(figsize=[5, 2])
 58 |    #plt.hist(ds.vf[..., gidx][ds.vf[..., gidx] > 0], bins=100, log=True)
 59 |    n, _, _ = plt.hist(ds.vf_norm[np.logical_and(ds.vf_norm > 0, ds.vf_norm < 0.3)], bins=100, log=True, histtype='step')
 60 |    ax = plt.gca()
 61 |    ax.axvline(norm_thres, c='red', ls='--')
 62 |    ax.set_xlabel("L1-norm")
 63 |    ax.set_ylabel("Count")
 64 | 
 65 |    plt.xlim([0, 0.3])
 66 |    plt.ylim([np.min(n), np.max(n) + 100000])
 67 |    pass
 68 | 
 69 | |image1|
 70 | 
 71 | Filtering “stray” local maxima using k-nearest neighbour density
 72 | ================================================================
 73 | 
 74 | If there is mRNA signal originating from outside the tissue area (due to
 75 | background noise), it would improve downstream analysis to remove such
 76 | vectors. We observed this in the osMFISH data. These “stray” local
 77 | maxima tend to be less dense than local maxima from the tissue area:
 78 | 
 79 | |image2|
 80 | 
 81 | Because of this, they can be effectively filtered using their k-neearest
 82 | neighbor density, in this example settting the ``threshold`` to 0.002.
 83 | 
 84 | ::
 85 | 
 86 |    from sklearn.neighbors import KDTree
 87 |    X = np.array([ds.local_maxs[0], ds.local_maxs[1]]).T
 88 |    kdt = KDTree(X, leaf_size=30, metric='euclidean')
 89 |    rho = 100 / (np.pi * kdt.query(X, k=100)[0][:, 99] ** 2)
 90 | 
 91 |    threshold = 0.002
 92 | 
 93 |    plt.figure(figsize=[5, 2.5])
 94 |    plt.hist(rho, bins=100, histtype='step')
 95 |    plt.axvline(x=threshold, color='r', linestyle='--')
 96 | 
 97 |    ax = plt.gca()
 98 |    ax.set_xlabel("Local KNN density")
 99 |    ax.set_ylabel("Count")
100 |    pass
101 | 
102 | |image3|
103 | 
104 | …. and a quick look at the before and after in the osmFISH dataset
105 | 
106 | |image4|
107 | 
108 | .. |image0| image:: ../images/local_max_threshold_gene.png
109 | .. |image1| image:: ../images/local_max_threshold_total.png
110 | .. |image2| image:: ../images/local_max_threshold_knn.png
111 | .. |image3| image:: ../images/local_max_threshold_knn2.png
112 | .. |image4| image:: ../images/local_max_threshold_knn3.png
113 | 
114 | 


--------------------------------------------------------------------------------
/doc/userguide/12-clustering.rst:
--------------------------------------------------------------------------------
 1 | Clustering Local L-1 Maxima
 2 | ===========================
 3 | 
 4 | In the *de novo* mode analysis, after the local maxima have been
 5 | identified from the tissue image, they are clustered.
 6 | 
 7 | The default clustering algorithm is based on `Louvain community
 8 | detection <https://doi.org/10.1088%2F1742-5468%2F2008%2F10%2FP10008>`__.
 9 | SSAM also supports clustering using ``hdbscan`` and ``optics``.
10 | 
11 | It can be initiated by:
12 | 
13 | ::
14 | 
15 |    analysis.cluster_vectors(method="louvain", 
16 |                             pca_dims=-1, 
17 |                             min_cluster_size=2, 
18 |                             max_correlation=1.0, 
19 |                             metric="correlation",
20 |                             outlier_detection_method='medoid-correlation', 
21 |                             outlier_detection_kwargs={}, 
22 |                             random_state=0, 
23 |                             **kwargs)
24 | 
25 | … where - ``method`` can be ``louvain``, ``hdbscan``, ``optics``. -
26 | ``pca_dims`` are the number of principal componants used for clustering.
27 | - ``min_cluster_size`` is the minimum cluster size. - ``resolution`` is
28 | the resolution for Louvain community detection. - ``prune`` is the
29 | threshold for Jaccard index (weight of SNN network). If it is smaller
30 | than prune, it is set to zero. - ``snn_neighbors`` is the number of
31 | neighbors for SNN network. - ``max_correlation`` is the threshold for
32 | which clusters with higher correlation to this value will be merged. -
33 | ``metric`` is the metric for calculation of distance between vectors in
34 | gene expression space. - ``subclustering`` if set to True, each cluster
35 | will be clustered once again with DBSCAN algorithm to find more
36 | subclusters. - ``dbscan_eps`` is the ``eps`` value for DBSCAN
37 | subclustering. Not used when ‘subclustering’ is set False. -
38 | ``centroid_correction_threshold`` is the threshold for which centroid
39 | will be recalculated with the vectors which have the correlation to the
40 | cluster medoid equal or higher than this value. - ``random_state`` is
41 | the random seed or scikit-learn’s random state object to replicate the
42 | same result
43 | 
44 | Removing outliers
45 | -----------------
46 | 
47 | The cell type signature is determined as the centroid of the cluster.
48 | This can be affected by outliers, so SSAM supports a number of outlier
49 | removal methods:
50 | 
51 | ::
52 | 
53 |    analysis.remove_outliers(outlier_detection_method='medoid-correlation', outlier_detection_kwargs={}, normalize=True)
54 | 
55 | .. where - ``outlier_detection_method`` can be ``medoid-correlation``,
56 | ``robust-covariance``, ``one-class-svm``, ``isolation-forest``,
57 | ``local-outlier-factor`` - ``outlier_detection_kwargs`` are arguments
58 | passed to the outlier detection method
59 | 


--------------------------------------------------------------------------------
/doc/userguide/13-diagnostic.rst:
--------------------------------------------------------------------------------
 1 | Diagnostic plots
 2 | ================
 3 | 
 4 | After unsupervised clustering of gene expression vectors, some clusters
 5 | may need to be manually merged or discarded. SSAM supports merging of
 6 | clusters based on correlation of gene expression profile, however in
 7 | many cases manual inspection is needed to rule out any non-trivial
 8 | issues.
 9 | 
10 | To guide this process, SSAM generates a cluster-wise ‘diagnostic plot’,
11 | which consists of four panels: 1) location of the clustered vectors on
12 | the tissue image, 2) the pixels classified to belong the cluster
13 | signature (the cluster centroid), 3) the mean expression profile of the
14 | clustered vectors, and 4) the t-SNE or UMAP embedding.
15 | 
16 | In the three datasets analyzed the clusters to be merged or removed
17 | often showed a discordance between the location of sampled vectors used
18 | to determine the cluster (panel 1) and the pixels classified to belong
19 | to that cluster (panel 2). In case of overclustering, i.e. when a
20 | cell-type signature is split over 2 clusters, the map typically does not
21 | classify the full shape of the cells but instead only fragments (panel
22 | 2), and having almost the same marker gene expression of another cluster
23 | (panel 3). Such clusters can be merged.
24 | 
25 | For dubious clusters that should be removed, we observed that vectors
26 | usually originate from outside the tissue region or from image artifacts
27 | (panel 1), or that the gene expression does not show any clear
28 | expression of marker genes or similarity to expected gene expression
29 | profiles (panel 3).
30 | 
31 | The remaining clusters are then annotated by comparing cluster marker
32 | genes to known cell-type markers. Note that in many cases, the identity
33 | of clusters can be easily assigned by comparing the centroids of the
34 | clusters to the known cell-type signatures, e.g., from single cell RNA
35 | sequencing.
36 | 
37 | To support rapid annotation of cell types to clusters, SSAM additionally
38 | shows the highest correlating known cell-type signature should this data
39 | be available in panel 3.
40 | 
41 | Example 1: a large cluster that can be easily annotated
42 | -------------------------------------------------------
43 | 
44 | Local maxima (panel 1), correspond to the same area (panel 2), and
45 | matches known gene expression patterns of *Vip Arhgap36 Hmcn1* cell
46 | types from scRNAseq experiments with high correlation (panel 3)
47 | 
48 | |image0|
49 | 
50 | Example 2: a large cluster that cannot be easily annotated
51 | ----------------------------------------------------------
52 | 
53 | Local maxima (panel 1), correspond to the same area (panel 2). The gene
54 | expression profile has a good correlation to *L2/3 IT VISp Adamts2* cell
55 | types, but are lacking the very high expression of *Pde1a*. In this
56 | particular case, one would need to check other clusters matching this
57 | cell type and perhaps merge them, or perhaps this indicates low
58 | efficiency of the *Pde1a* probe in the experiment.
59 | 
60 | |image1|
61 | 
62 | Example 3: a small cluster that is good
63 | ---------------------------------------
64 | 
65 | Despite only 2 local maxima (panel 1), the classified pixels correspond
66 | to the same area (panel 2), and matches known gene expression patterns
67 | (panel 3). This presents a very rare, SSt Chodl cell type.
68 | 
69 | |image2|
70 | 
71 | Example 4: a small cluster that is questionable
72 | -----------------------------------------------
73 | 
74 | Sampled local maxima (panel 1) to no correspond to the classified pixels
75 | (panel 2), and doesnt clearly match known gene expression patterns
76 | (panel 3)
77 | 
78 | |image3|
79 | 
80 | .. |image0| image:: ../images/diagplot_centroid_2.png
81 | .. |image1| image:: ../images/diagplot_centroid_5.png
82 | .. |image2| image:: ../images/diagplot_centroid_30.png
83 | .. |image3| image:: ../images/diagplot_centroid_8.png
84 | 
85 | 


--------------------------------------------------------------------------------
/doc/userguide/14-cluster_annotation.rst:
--------------------------------------------------------------------------------
  1 | Cluster annotation
  2 | ==================
  3 | 
  4 | In a typical single cell RNAseq experiment, the process of annotating
  5 | cell types manually can be laborious and as such, `a number of automated
  6 | methods have emerged <https://doi.org/10.1186/s13059-019-1795-z>`__.
  7 | 
  8 | In a typical *in situ* transcriptomics experiment, the annotation of
  9 | cell types is usually much easier as these assays are usually profile
 10 | established cell type markers. Cluster can be annotated easily based on
 11 | marker gene expression.
 12 | 
 13 | The `diagnostic plots <diagnostic.md>`__ can be used to compare existing
 14 | signatures against those identified *de novo*
 15 | 
 16 | ::
 17 | 
 18 |    from scipy.stats import pearsonr, spearmanr
 19 | 
 20 |    for idx in range(len(ds.centroids)):
 21 |        plt.figure(figsize=[50, 15])
 22 |        ds.plot_diagnostic_plot(idx, known_signatures=[
 23 |            ("scRNA-seq", scrna_uniq_labels, scrna_centroids, scrna_colors),
 24 |        ], correlation_methods=[
 25 |            ("r", pearsonr),
 26 |            ("rho", spearmanr)
 27 |        ])
 28 |        plt.tight_layout()
 29 |        plt.savefig('diagplots_multiplexed_smFISH/diagplot_centroid_%d.png'%idx)
 30 |        plt.close()
 31 | 
 32 | This will generate a diagnostic plot for each cluster, which can be used
 33 | to assign cluster labels. E.g. the following cluster matches known gene
 34 | expression patterns of Vip Arhgap36 Hmcn1 cell types from scRNAseq
 35 | experiments with high correlation (panel 3):
 36 | 
 37 | |image0|
 38 | 
 39 | While this is a good example of cluster that can be easily annotated,
 40 | some clusters may prepresent noise and would need to be removed, and
 41 | when over clustering occurs then clusters may have to be merged. The
 42 | `diagnostic plots documentation <diagnostic.md>`__ assist the decision
 43 | making process.
 44 | 
 45 | Once each cluster is reviewed, a cell-type be assigned, or removed, or
 46 | merged. In the following code snippet, we show an elegent way to
 47 | annotate, remove, and merge clusters.
 48 | 
 49 | 1) Determine that (i) clusters with a name will be annotated, (ii)
 50 |    clusters with a “N/A” will be removed, (iii) clusters with the same
 51 |    name will be merged
 52 | 
 53 | ::
 54 | 
 55 |    denovo_labels = [
 56 |        "N/A",
 57 |        "VLMC",
 58 |        "Vip Arhgap36 Hmcn1 / Vip Igfbp4 Map21l1",
 59 |        "L2/3 IT Rrad",
 60 |        "N/A",
 61 |        "L2/3 IT Adamts2",
 62 |        "Sst Nts / Sst Rxfp1 Eya1",
 63 |        "Lamp5 Lsp1",
 64 |        "N/A",
 65 |        "Sst Crhr2 Efemp1 / Sst Esm1",
 66 |        
 67 |        "Pvalb Calb1 Sst / Pvalb Reln Tac1",
 68 |        "Astro Aqp4",
 69 |        "L6 IT Penk Fst",
 70 |        "L4 IT Superficial",
 71 |        "L5 IT Col27a1",
 72 |        "L2/3 IT Adamts2",
 73 |        "OPC",
 74 |        "Oligo",
 75 |        "L4 IT Rspo1",
 76 |        "L5 NP Trhr Met",
 77 |        
 78 |        "L5 IT Hsd11b1 Endou",
 79 |        "Pvalb Th Sst / Pvalb Reln Tac1",
 80 |        "L6 CT Ctxn3 Brinp3 / L6 CT Gpr139",
 81 |        "L5 PT Chrna6",
 82 |        "L5 IT Batf3",
 83 |        "L5 PT C1ql2 Cdh13",
 84 |        "L5 PT Krt80",
 85 |        "L6 IT Penk Col27a1",
 86 |        "L6 IT Penk Col27a1",
 87 |        "L6b Crh",
 88 |        
 89 |        "Sst Chodl",
 90 |    ]
 91 | 
 92 | 2) make objects for storing the index of clusters to be annotated,
 93 |    removed and merged
 94 | 
 95 | ::
 96 | 
 97 |    denovo_labels_final = []
 98 |    exclude_indices = []
 99 |    merge_indices = []
100 | 
101 | 3) iterate over the ``denovo_labels`` object and populate the
102 |    ``denovo_labels_final``, ``exclude_indices``, ``merge_indices``
103 |    objects
104 | 
105 | ::
106 | 
107 |    for idx, cl in enumerate(denovo_labels):
108 |        if cl == 'N/A':
109 |            exclude_indices.append(idx)
110 |            continue
111 |        if cl in denovo_labels_final:
112 |            continue
113 |        denovo_labels_final.append(cl)
114 | 
115 |    for cl in np.unique(denovo_labels):
116 |        if cl == 'N/A':
117 |            continue
118 |        mask = [cl == e for e in denovo_labels]
119 |        if np.sum(mask) > 1:
120 |            merge_indices.append(np.where(mask)[0])
121 | 
122 | 4) plot the removed clusters in t-SNE embedding
123 | 
124 | ::
125 | 
126 |    cmap = plt.get_cmap('jet')
127 |    jet_colors = cmap(np.array(list(range(len(ds.centroids)))) / (len(ds.centroids) - 1))
128 |    tsne_colors = np.zeros_like(jet_colors)
129 |    tsne_colors[..., :] = [0.8, 0.8, 0.8, 1]
130 |    tsne_colors[exclude_indices] = [0, 0, 0, 1] #jet_colors[exclude_indices]
131 |    import matplotlib.patheffects as PathEffects
132 |    plt.figure(figsize=[5, 5])
133 |    ds.plot_tsne(pca_dims=33, metric="correlation", s=5, run_tsne=False, colors=tsne_colors)
134 |    plt.axis('off')
135 | 
136 | |image1|
137 | 
138 | 5) plot the merged clusters in t-SNE embedding
139 | 
140 | ::
141 | 
142 |    cmap = plt.get_cmap('rainbow')
143 |    jet_colors = cmap(np.array(list(range(len(merge_indices)))) / (len(merge_indices) - 1))
144 |    plt.figure(figsize=[5, 5])
145 |    tsne_colors = np.zeros([len(ds.centroids), 4])
146 |    tsne_colors[..., :] = [0.8, 0.8, 0.8, 1]
147 |    for idx, mi in enumerate(merge_indices):
148 |        tsne_colors[mi] = jet_colors[idx]
149 |        ds.plot_tsne(pca_dims=33, metric="correlation", s=5, run_tsne=False, colors=tsne_colors)
150 |    plt.axis('off')
151 | 
152 | |image2|
153 | 
154 | 6) update the ``analysis`` object with the clusters to remove and merge
155 | 
156 | ::
157 | 
158 |    analysis.exclude_and_merge_clusters(exclude_indices, merge_indices, centroid_correction_threshold=0.6)
159 | 
160 | .. |image0| image:: ../images/diagplot_centroid_2.png
161 | .. |image1| image:: ../images/tsne_removed.png
162 | .. |image2| image:: ../images/tsne_merged.png
163 | 
164 | 


--------------------------------------------------------------------------------
/doc/userguide/15-celltype_map_thresh_d.rst:
--------------------------------------------------------------------------------
 1 | Thresholding the de-novo cell-type map
 2 | ======================================
 3 | 
 4 | After cell-type signatures are calculated, the tissue image can be
 5 | classified. The classification of each pixel is based on the Pearson
 6 | correlation metric (although an `experimental adversarial autoencoder
 7 | based classification method <aaec.md>`__ can be applied).
 8 | 
 9 | We found that a minimum correlation threshold (``min_r``) of 0.3 worked
10 | well for guided mode based on single cell RNAseq cell-type signatures,
11 | and 0.6 worked well for *de novo* mode.
12 | 
13 | Below we show how the cell-type map changes using correlation thresholds
14 | of ``0.4,0.6,0.8`` for the guided cell-type map.
15 | 
16 | ::
17 | 
18 |    analysis.map_celltypes()
19 | 
20 |    analysis.filter_celltypemaps(min_norm=filter_method, filter_params=filter_params, min_r=0.4, fill_blobs=True, min_blob_area=50, output_mask=output_mask)
21 |    plt.figure(figsize=[5, 5])
22 |    ds.plot_celltypes_map(colors=denovo_celltype_colors, rotate=1, set_alpha=False)
23 | 
24 |    analysis.filter_celltypemaps(min_norm=filter_method, filter_params=filter_params, min_r=0.6, fill_blobs=True, min_blob_area=50, output_mask=output_mask)
25 |    plt.figure(figsize=[5, 5])
26 |    ds.plot_celltypes_map(colors=denovo_celltype_colors, rotate=1, set_alpha=False)
27 | 
28 |    analysis.filter_celltypemaps(min_norm=filter_method, filter_params=filter_params, min_r=0.8, fill_blobs=True, min_blob_area=50, output_mask=output_mask)
29 |    plt.figure(figsize=[5, 5])
30 |    ds.plot_celltypes_map(colors=denovo_celltype_colors, rotate=1, set_alpha=False)
31 | 


--------------------------------------------------------------------------------
/doc/userguide/16-visualisation.rst:
--------------------------------------------------------------------------------
 1 | Visualisation of 2D gene expression embeddings (t-SNE and UMAP)
 2 | ===============================================================
 3 | 
 4 | An important part of presenting the summary of the clustering analysis
 5 | is 2D visualisation via embedding.
 6 | 
 7 | `UMAP <https://arxiv.org/abs/1802.03426>`__ and
 8 | `t-SNE <http://jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf>`__,
 9 | are 2 common dimensionality reduction methods that can be useful for
10 | displaying clustering results.
11 | 
12 | Running t-SNE
13 | -------------
14 | 
15 | To run the t-SNE on the ``ds`` object:
16 | ``ds.run_tsne(pca_dims=-1,n_iter=5000, perplexity=70, early_exaggeration=10, metric="correlation", exclude_bad_clusters=True, random_state=0, tsne_kwargs={})``
17 | 
18 | .. where:
19 | 
20 | -  ``pca_dims``: Number of PCA dimensions used for the tSNE embedding.
21 | -  ``n_iter``: Maximum number of iterations for the tSNE.
22 | -  ``perplexity``: The perplexity value of the tSNE (please refer to the
23 |    section `How should I set the perplexity in
24 |    t-SNE? <https://lvdmaaten.github.io/tsne/>`__ ).
25 | -  ``early_exaggeration``: Early exaggeration parameter for tSNE.
26 |    Controls the tightness of the resulting tSNE plot.
27 | -  ``metric``: Metric for calculation of distance between vectors in
28 |    gene expression space.
29 | -  ``exclude_bad_clusters``: If true, the vectors that are excluded by
30 |    the clustering algorithm will not be considered for tSNE computation.
31 | -  ``random_state``: Random seed or scikit-learn’s random state object
32 |    to replicate the same result
33 | -  ``tsne_kwargs``: Other keyward parameters for tSNE.
34 | 
35 | Running UMAP
36 | ------------
37 | 
38 | To run the t-SNE on the ``ds`` object:
39 | ``ds.run_umap(self, pca_dims=-1, metric="correlation", min_dist=0.8, exclude_bad_clusters=True, random_state=0, umap_kwargs={})``
40 | 
41 | .. where:
42 | 
43 | -  ``pca_dims``: Number of PCA dimensions used for the UMAP embedding.
44 | -  ``metric``: Metric for calculation of distance between vectors in
45 |    gene expression space.
46 | -  ``min_dist``: ‘min_dist’ parameter for UMAP.
47 | -  ``exclude_bad_clusters``: If true, the vectors that are excluded by
48 |    the clustering algorithm will not be considered for UMAP computation.
49 | -  ``random_state``: Random seed or scikit-learn’s random state object
50 |    to replicate the same result
51 | -  ``umap_kwargs``: Other keyward parameters for UMAP.
52 | 
53 | Plotting embeddings
54 | -------------------
55 | 
56 | Plotting of the t-SNE and UMAP beddings can be performed by:
57 | 
58 | ::
59 | 
60 |    ds.plot_embedding(method='umap')
61 |    ds.plot_embedding(method='tSNE')
62 | 
63 | |image0|
64 | 
65 | .. |image0| image:: ../images/tsne_final.png
66 | 
67 | 


--------------------------------------------------------------------------------
/doc/userguide/17-domain.rst:
--------------------------------------------------------------------------------
  1 | Identifying tissue domains
  2 | ==========================
  3 | 
  4 | Cells are organised into tissues and organs. Spatial gene expression not
  5 | only allows the identification of cell types *in situ*, but also allows
  6 | investigation of how these cells are organised.
  7 | 
  8 | SSAM facilitates the identification of “tissue domains”, which are
  9 | regions in the tissue exhibiting similar local cell type composition.
 10 | This is based on circular window sampling with a defined ``radius`` and
 11 | ``step``, which is then followed by `agglomerative
 12 | clustering <https://scikit-learn.org/stable/auto_examples/cluster/plot_cluster_comparison.html>`__.
 13 | 
 14 | Perform circular window sampling
 15 | --------------------------------
 16 | 
 17 | The first step is to sample cell-type composition in circular sweeping
 18 | windows. For this, the size of circular window (``radius``) and the step
 19 | between each sampling (``step``) has to be defined. The units here are
 20 | in um, which is also equivalent to pixels in this example. The following
 21 | performs this sampling using a circular window of 100um, with 10um
 22 | steps:
 23 | 
 24 | ::
 25 | 
 26 |    analysis.bin_celltypemaps(step=10, radius=100)
 27 | 
 28 | Clustering domain signatures
 29 | ----------------------------
 30 | 
 31 | After performing the sampling, we continue with identifying domain
 32 | signatures through clustering. This is based on agglomerative clustering
 33 | to identify the initial clusters (``n_clusters``) of windows which
 34 | include a minimum number of classified pixels (``norm_thres``), followed
 35 | cluster merging when the correlation between clusters exceeds a
 36 | threshold (``merge_thres``). The merging of clusters can be restricted
 37 | to adjacent clusters (``merge_remote=FALSE``), or not restricted to
 38 | spatial proximity (``merge_remote=True``)
 39 | 
 40 | ::
 41 | 
 42 |    analysis.find_domains(n_clusters=20, merge_remote=True, merge_thres=0.7, norm_thres=1500)
 43 | 
 44 | Visualizing identified domains
 45 | ------------------------------
 46 | 
 47 | Once the domains have been indentified, they have to be visualised for
 48 | evaluation.
 49 | 
 50 | ::
 51 | 
 52 |    from matplotlib.colors import ListedColormap
 53 |    cmap_jet = plt.get_cmap('jet')
 54 |    num_domains = np.max(ds.inferred_domains_cells) + 1
 55 | 
 56 |    fig, axs = plt.subplots(1, num_domains, figsize=(4*num_domains, 4))
 57 |    for domain_idx in range(num_domains):
 58 |        ax = axs[domain_idx]
 59 |        plt.sca(ax)
 60 |        plt.axis('off')
 61 |        cmap = ListedColormap([cmap_jet(lbl_idx / num_domains) if domain_idx == lbl_idx else "#cccccc" for lbl_idx in range(num_domains)])
 62 |        ds.plot_domains(rotate=1, cmap=cmap)
 63 |    plt.tight_layout()    
 64 |    plt.savefig(f'plots/domains_individual')
 65 | 
 66 | .. figure:: ../images/domains_individual.png
 67 |    :alt: side by side plot of all tissue domains
 68 | 
 69 |    side by side plot of all tissue domains
 70 | 
 71 | Post-processing the identified domains
 72 | --------------------------------------
 73 | 
 74 | In certain cases, one may wish to **exclude certain domains**
 75 | (``excluded_domain_indices``) as they may originate from tissue
 76 | artifacts or contain no information. In our case the third domain (0
 77 | based index 2) seems to be an artifact and the fourth one contains no
 78 | useful information. The First two domains are obviously part of the same
 79 | layer and can therefore be merged.
 80 | 
 81 | Due to possible imaging artifacts such as tiling, some domains might be
 82 | split. While it is still possible to tune the ``merge_thres`` in the
 83 | clustering step, one can simply perform this as manual post processing.
 84 | In the case above, there do not appear to be any domains that require
 85 | merging.
 86 | 
 87 | Once the domains to be excluded or merged have been determined, they can
 88 | be excluded and removed(!):
 89 | 
 90 | ::
 91 | 
 92 |    excluded_domain_indices = [2,3,7,10]
 93 |    merged_domain_indices = [[0,1],[9,11]]
 94 |    analysis.exclude_and_merge_domains(excluded_domain_indices, merged_domain_indices)
 95 | 
 96 | The final plot
 97 | --------------
 98 | 
 99 | The individual domains represent the established neocortex layering
100 | patterns found in the mouse brain. We can continue with assigning domain
101 | colours, names, and plotting all of the domains together.
102 | 
103 | ::
104 | 
105 |    plt.figure(figsize=[5, 5])
106 |    ds.plot_domains(rotate=1)
107 | 
108 | |image0|
109 | 
110 | .. |image0| image:: ../images/final.png
111 | 
112 | 


--------------------------------------------------------------------------------
/doc/userguide/18-composition.rst:
--------------------------------------------------------------------------------
 1 | Cell-type composition analysis in tissue domains
 2 | ================================================
 3 | 
 4 | After identifying `tissue domains <domain.md>`__ that exhibit specific
 5 | cell-type composition properties, it may be desirable to report the
 6 | cell-type composition properties of the identified domains.
 7 | 
 8 | In the `SSAM
 9 | manuscript <https://www.biorxiv.org/content/10.1101/800748v2>`__ we used
10 | this functionality to identify that astrocytes cell type representation
11 | of neocortex layer were previously under-reported, and identified the
12 | cell-type composition of novel layering patterns in the primary visual
13 | cortex (VISp).
14 | 
15 | Performing the cell-type composition analysis
16 | ---------------------------------------------
17 | 
18 | The analysis is initiated on the ``analysis`` object:
19 | 
20 | ::
21 | 
22 |    analysis.calc_cell_type_compositions()
23 | 
24 | Plotting the composition of each domain
25 | ---------------------------------------
26 | 
27 | Once this has completed, you can plot the cell-type composition of the
28 | different layers using the plot function. In the following exmaple, we
29 | plot the 7 identified layers (``domain_index = 0-6``) in the order that
30 | they would appear in the neocortex:
31 | 
32 | ::
33 | 
34 |    # note - this could be wrapped up into a function
35 |    for domain_idx in [1, 0, 2, 3, 4, 5, 6]:
36 |        plt.figure(figsize=[5, 5])
37 |        ds.plot_celltype_composition(domain_idx,
38 |                                     cell_type_colors=denovo_celltype_colors,
39 |                                     cell_type_orders=heatmap_clusters_index[::-1],
40 |                                     label_cutoff=0.03)
41 |        plt.title(domain_labels[domain_idx])
42 | 
43 | |image0|
44 | 
45 | Plotting the composition of the entire tissue
46 | ---------------------------------------------
47 | 
48 | It would be worthwhile to compare the cell-type composition within each
49 | domain, and compare this to what is observed over the entire tissue. The
50 | cell-type compostion over the entire tissue is stored as the last
51 | domain, in this case the 8th element (``domain_index = 7``):
52 | 
53 | ::
54 | 
55 |    # note - this can be wrapped up into a function
56 |    plt.figure(figsize=[5, 5])
57 |    ds.plot_celltype_composition(domain_index=7,
58 |                                 cell_type_colors=denovo_celltype_colors,
59 |                                 cell_type_orders=heatmap_clusters_index[::-1],
60 |                                 label_cutoff=0.03)
61 |    plt.title('All')
62 | 
63 | |image1|
64 | 
65 | .. |image0| image:: ../images/domain_composition.png
66 | .. |image1| image:: ../images/domain_composition_all.png
67 | 
68 | 


--------------------------------------------------------------------------------
/doc/userguide/19-experimental.rst:
--------------------------------------------------------------------------------
 1 | Experimental features
 2 | =====================
 3 | 
 4 | We will endevour to improve the functionality of SSAM by implementing
 5 | novel features. So far, these experimental features only works with the ``develop`` branch of SSAM.
 6 | 
 7 | The current novel features supported by SSAM include:
 8 | 
 9 | -  `Adversarial Auto Encoder based classification <aaec.md>`__
10 | 
11 | -  `Segmenting the cell-type map <segment_celltype_map.md>`__
12 | 


--------------------------------------------------------------------------------
/doc/userguide/20-aaec.rst:
--------------------------------------------------------------------------------
 1 | Cell-type classification using Adversarial Autoencoders
 2 | =======================================================
 3 | 
 4 | The default classification algorithm is based on Pearson correlation as
 5 | this has been `shown to be effective for automatic classification of
 6 | cell types <https://doi.org/10.1186/s13059-019-1795-z>`__ for single
 7 | cell RNAseq experiments. This proved to be both highly performant and
 8 | accurate also for spatial gene expression data. However, it may be
 9 | desirable to explore other classification methods.
10 | 
11 | One recent and exciting Deep Learning framework that achieve competitive
12 | results in generative modeling and semi-supervised classification tasks
13 | are `adversarial autoencoders <https://arxiv.org/abs/1511.05644>`__.
14 | 
15 | SSAM implements a modified version of adversarial autoencoder classifier
16 | based on the `original
17 | implementation <https://github.com/shaharazulay/adversarial-autoencoder-classifier>`__
18 | by `Shahar Azulay <https://github.com/shaharazulay>`__.
19 | 
20 | Mapping cell types using an adversarial autoencoder
21 | ---------------------------------------------------
22 | 
23 | In order to use the AAEC classification of pixels instead of the Pearson
24 | correlation based method, simply replace ``analysis.map_celltypes()``
25 | with :
26 | 
27 | ::
28 | 
29 |    analysis.map_celltypes_aaec(epochs=1000, seed=0, batch_size=1000, chunk_size=100000, z_dim=10, noise=0)
30 | 
31 | 


--------------------------------------------------------------------------------
/doc/userguide/21-segment_celltype_map.rst:
--------------------------------------------------------------------------------
 1 | Segmenting the SSAM cell type map
 2 | =================================
 3 | 
 4 | While we demonstrate the accuracy of SSAM in reconstructing celltype
 5 | maps, we understand that many applications in biology require cell
 6 | segmentation. As such, the development branch of SSAM supports
 7 | segmentation of the celltype map using the ``watershed`` algorithm.
 8 | 
 9 | **This is an experimental feature!**
10 | 
11 | The segmentation of the cell type map can be performed by:
12 | 
13 | .. code-block:: python
14 | 
15 |    # Load DAPI image
16 |    with open('zenodo/osmFISH/raw_data/im_nuc_small.pickle', 'rb') as f:
17 |        dapi = pickle.load(f)
18 |    dapi_small = np.hstack([dapi.T[:1640], np.zeros([1640, 12])]).reshape(ds.vf_norm.shape)
19 |    
20 |    # Threshold DAPI image to create markers
21 |    dapi_threshold = filters.threshold_local(dapi_small[..., 0], 35, offset=-0.0002)
22 |    dapi_thresh_im = dapi_small[..., 0] > dapi_threshold
23 |    dapi_thresh_im = dapi_thresh_im.reshape(ds.vf_norm.shape).astype(np.uint8) * 255
24 |    
25 |    # Run watershed segmentation of cell-type maps with DAPI as markers
26 |    # After running below, the segmentation data will be available as:
27 |    #  - Segmentations: ds.watershed_segmentations
28 |    #  - Cell-type map: ds.watershed_celltype_map
29 |    analysis.run_watershed(dapi_thresh_im)
30 | 
31 | Below we demonstrate the application of the segmentation on the *de
32 | novo* celltype map generated for the mouse SSp osmFISH data.
33 | 
34 | |image0|
35 | 
36 | .. |image0| image:: ../images/segmented_celltype_map.png
37 | 
38 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | scipy
 3 | pandas
 4 | matplotlib
 5 | seaborn
 6 | scikit-learn
 7 | umap-learn
 8 | python-louvain
 9 | sparse
10 | scikit-image
11 | pyarrow
12 | packaging
13 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import setuptools
 3 | try:
 4 |     import numpy as np
 5 | except ImportError:
 6 |     print("Please install Numpy first. e.g. pip install numpy")
 7 |     exit(1)
 8 | from glob import glob
 9 | 
10 | module_utils = setuptools.extension.Extension('ssam.utils', sources=["c/utils.cpp"], extra_compile_args=["-fopenmp"], extra_link_args=["-fopenmp"], include_dirs=[np.get_include()])
11 | 
12 | with io.open("README.rst", "r", encoding="utf-8") as fh:
13 |     long_description = fh.read()
14 | 
15 | setuptools.setup(
16 |     name="ssam",
17 |     version="1.0.2",
18 |     author="Jeongbin Park",
19 |     author_email="j.park@dkfz-heidelberg.de",
20 |     description="SSAM",
21 |     long_description=long_description,
22 |     long_description_content_type="text/markdown",
23 |     url="https://github.com/HiDiHlabs/ssam",
24 |     packages=setuptools.find_packages(),
25 |     classifiers=[
26 |         "Programming Language :: Python :: 3",
27 |         "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
28 |         "Operating System :: POSIX",
29 |     ],
30 |     ext_modules = [module_utils],
31 |     install_requires=[
32 |         "numpy",
33 |         "scipy",
34 |         "pandas",
35 |         "matplotlib",
36 |         "seaborn",
37 |         "scikit-learn",
38 |         "umap-learn",
39 |         "python-louvain",
40 |         "sparse",
41 |         "scikit-image",
42 |         "pyarrow",
43 |         "packaging",
44 |     ]
45 | )
46 | 


--------------------------------------------------------------------------------
/ssam/__init__.py:
--------------------------------------------------------------------------------
   1 | import numpy as np
   2 | import pandas as pd
   3 | import matplotlib.pyplot as plt
   4 | from matplotlib.colors import to_rgba
   5 | import seaborn as sns
   6 | import multiprocessing
   7 | import os
   8 | sns.set()
   9 | sns.set_style("whitegrid", {'axes.grid' : False})
  10 | from functools import reduce
  11 | from sklearn.neighbors import KernelDensity
  12 | from sklearn import preprocessing
  13 | import scipy
  14 | from scipy import ndimage
  15 | from sklearn.decomposition import PCA
  16 | from sklearn.manifold import TSNE
  17 | from umap import UMAP
  18 | from multiprocessing import Pool
  19 | from contextlib import closing
  20 | from tempfile import mkdtemp, TemporaryDirectory
  21 | from sklearn.neighbors import kneighbors_graph
  22 | from sklearn.cluster import KMeans
  23 | import community
  24 | import networkx as nx
  25 | from sklearn.cluster import DBSCAN
  26 | import sparse
  27 | from skimage import filters
  28 | from skimage.morphology import disk
  29 | from skimage import measure
  30 | from matplotlib.colors import ListedColormap
  31 | import pickle
  32 | import subprocess
  33 | from scipy.spatial.distance import cdist
  34 | from sklearn.cluster import AgglomerativeClustering
  35 | from PIL import Image
  36 | from scipy.ndimage import zoom
  37 | import pyarrow
  38 | import time
  39 | from packaging import version
  40 | 
  41 | from .utils import corr, calc_ctmap, calc_corrmap, flood_fill, calc_kde
  42 | 
  43 | def _fast_gaussian_kde(args):
  44 |     # TODO: 1) support sampling distance
  45 |     #       2) support other kernels
  46 |     (bandwidth, save_dir, gene_name, shape, locations, sampling_distance) = args
  47 |     
  48 |     print('Processing gene %s...'%gene_name)
  49 | 
  50 |     maxdist = int(bandwidth * 4)
  51 |     span = np.linspace(-maxdist,maxdist,maxdist*2+1)
  52 |     X, Y, Z = np.meshgrid(span,span,span)
  53 |     
  54 |     def create_kernel(x, y, z):
  55 |         X_=(-x+X)/bandwidth
  56 |         Y_=(-y+Y)/bandwidth
  57 |         Z_=(-z+Z)/bandwidth
  58 |         return np.exp(-0.5*(X_**2+Y_**2+Z_**2))
  59 |     
  60 |     pd = np.zeros(shape)
  61 |     for loc in locations:
  62 |         int_loc = [int(i) for i in loc]
  63 |         rem_loc = [i%1 for i in loc]
  64 | 
  65 |         kernel = create_kernel(*rem_loc)
  66 | 
  67 |         pos_start = [i - maxdist for i in int_loc]
  68 |         pos_end = [i + maxdist + 1 for i in int_loc]
  69 | 
  70 |         kernel_pos_start = [abs(i) if i < 0 else 0 for i in pos_start]
  71 |         kernel_pos_end = [maxdist*2+1 - (i-j) if i > j else maxdist*2+1 for i, j in zip(pos_end, shape)]
  72 | 
  73 |         pos_start = [0 if i < 0 else i for i in pos_start]
  74 |         pos_end = [j if i >= j else i for i, j in zip(pos_end, shape)]
  75 | 
  76 |         slices = tuple([slice(i, j) for i, j in zip(pos_start, pos_end)])
  77 |         kernel_slices = tuple([slice(i, j) for i, j in zip(kernel_pos_start, kernel_pos_end)])
  78 |         pd[slices] += kernel.swapaxes(0, 1)[kernel_slices]
  79 | 
  80 |     pd /= pd.sum()
  81 |     pd *= len(locations)
  82 |     
  83 |     return pd
  84 | 
  85 | def run_sctransform(data, clip_range=None, verbose=True, debug_path=None, plot_model_pars=False, **kwargs):
  86 |     """
  87 |     Run 'sctransform' R package and returns the normalized matrix and the model parameters.
  88 |     Package 'feather' is used for the data exchange between R and Python.
  89 |     :param data: N x D ndarray to normlize (N is number of samples, D is number of dimensions).
  90 |     :type data: numpy.ndarray
  91 |     :param kwargs: Any keyword arguments passed to R function `vst`.
  92 |     :returns: A 2-tuple, which contains two pandas.dataframe: 
  93 |         (1) normalized N x D matrix.
  94 |         (2) determined model parameters.
  95 |     """
  96 |     def _log(m):
  97 |         if verbose:
  98 |             print(m)
  99 |             
 100 |     vst_options = ['%s = "%s"'%(k, v) if type(v) is str else '%s = %s'%(k, v) for k, v in kwargs.items()]
 101 |     if len(vst_options) == 0:
 102 |         vst_opt_str = ''
 103 |     else:
 104 |         vst_opt_str = ', ' + ', '.join(vst_options)
 105 |     with TemporaryDirectory() as tmpdirname:
 106 |         if debug_path:
 107 |             tmpdirname = debug_path
 108 |         ifn, ofn, pfn, rfn = [os.path.join(tmpdirname, e) for e in ["in.feather", "out.feather", "fit_params.feather", "script.R"]]
 109 |         _log("Writing temporary files...")
 110 |         if isinstance(data, pd.DataFrame):
 111 |             df = data
 112 |         else:
 113 |             df = pd.DataFrame(data, columns=[str(e) for e in range(data.shape[1])])
 114 |         if version.parse(pyarrow.__version__) >= version.parse("1.0.0"):
 115 |             df.to_feather(ifn, version=1)
 116 |         else:
 117 |             df.to_feather(ifn)
 118 |         rcmd = 'library(feather); library(sctransform); mat <- t(as.matrix(read_feather("{0}"))); colnames(mat) <- 1:ncol(mat); res <- vst(mat{1}, return_gene_attr=TRUE, return_cell_attr=TRUE); write_feather(as.data.frame(t(res$y)), "{2}"); write_feather(as.data.frame(res$model_pars_fit), "{3}");'.format(ifn, vst_opt_str, ofn, pfn)
 119 |         if plot_model_pars:
 120 |             plot_path = os.path.join(tmpdirname, 'model_pars.png')
 121 |             rcmd += 'png(file="%s", width=3600, height=1200, res=300); plot_model_pars(res, show_var=TRUE); dev.off();'%plot_path
 122 |         rcmd = rcmd.replace('\\', '\\\\')
 123 |         with open(rfn, "w") as f:
 124 |             f.write(rcmd)
 125 |         _log("Running scTransform via Rscript...")
 126 |         proc = subprocess.Popen(["Rscript", rfn], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 127 |         while not proc.poll():
 128 |             c = proc.stdout.read(1)
 129 |             if not c:
 130 |                 break
 131 |             if verbose:
 132 |                 try:
 133 |                     sys.stdout.write(c.decode("utf-8"))
 134 |                 except:
 135 |                     pass
 136 |             time.sleep(0.0001)
 137 |         _log("Reading output files...")
 138 |         o, p = pd.read_feather(ofn), pd.read_feather(pfn)
 139 |         if plot_model_pars:
 140 |             try:
 141 |                 from matplotlib.image import imread
 142 |                 import matplotlib.pyplot as plt
 143 |                 img = imread(plot_path)
 144 |                 dpi = 80
 145 |                 fig = plt.figure(figsize=(img.shape[1]/dpi, img.shape[0]/dpi), dpi=dpi)
 146 |                 plt.imshow(img, interpolation='nearest')
 147 |                 plt.gca().set_axis_off()
 148 |                 plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
 149 |                 plt.margins(0, 0)
 150 |                 plt.gca().xaxis.set_major_locator(plt.NullLocator())
 151 |                 plt.gca().yaxis.set_major_locator(plt.NullLocator())
 152 |                 plt.show()
 153 |             except:
 154 |                 print("Warning: plotting failed, perhaps matplotlib is not available?")
 155 |         _log("Clipping residuals...")
 156 |         if clip_range is None:
 157 |             r = np.sqrt(data.shape[0]/30.0)
 158 |             clip_range = (-r, r)
 159 |         o.clip(*clip_range)
 160 |         return o, p
 161 | 
 162 | 
 163 | class SSAMDataset(object):
 164 |     """
 165 |     A class to store intial values and results of SSAM analysis.
 166 | 
 167 |     :param genes: The genes that will be used for the analysis.
 168 |     :type genes: list(str)    
 169 |     :param locations: Location of the mRNAs in um, given as a list of
 170 |         N x D ndarrays (N is number of mRNAs, D is number of dimensions).
 171 |     :type locations: list(numpy.ndarray)
 172 |     :param width: Width of the image in um.
 173 |     :type width: float
 174 |     :param height: Height of the image in um.
 175 |     :type height: float
 176 |     :param depth: Depth of the image in um. Depth == 1 means 2D image.
 177 |     :type depth: float
 178 |     """
 179 |         
 180 |     def __init__(self, genes, locations, width, height, depth=1):
 181 |         if depth < 1 or width < 1 or height < 1:
 182 |             raise ValueError("Invalid image dimension")
 183 |         self.shape = (width, height, depth)
 184 |         self.ndim = 2 if depth == 1 else 3
 185 |         self.genes = list(genes)
 186 |         self.locations = []
 187 |         for l in list(locations):
 188 |             if l.shape[-1] == 3:
 189 |                 self.locations.append(l)
 190 |             elif l.shape[-1] == 2:
 191 |                 self.locations.append(np.concatenate((l, np.zeros([l.shape[0], 1])), axis=1))
 192 |             else:
 193 |                 raise ValueError("Invalid mRNA locations")
 194 |         self.__vf = None
 195 |         self.__vf_norm = None
 196 |         self.normalized_vectors = None
 197 |         self.expanded_vectors = None
 198 |         self.cluster_labels = None
 199 |         #self.corr_map = None
 200 |         self.tsne = None
 201 |         self.umap = None
 202 |         self.normalized_vf = None
 203 |         self.excluded_clusters = None
 204 |         self.celltype_binned_counts = None
 205 | 
 206 |     @property
 207 |     def vf(self):
 208 |         """
 209 |         Vector field as a numpy.ndarray.
 210 |         """
 211 |         return self.__vf
 212 |     
 213 |     @vf.setter
 214 |     def vf(self, vf):
 215 |         self.__vf = vf
 216 |         self.__vf_norm = None
 217 |         
 218 |     @property
 219 |     def vf_norm(self):
 220 |         """
 221 |         `L1-norm <http://mathworld.wolfram.com/L1-Norm.html>`_ of the vector field as a numpy.ndarray.
 222 |         """
 223 | 
 224 |         if self.vf is None:
 225 |             return None
 226 |         if self.__vf_norm is None:
 227 |             self.__vf_norm = np.sum(self.vf, axis=len(self.vf.shape) - 1)
 228 |         return self.__vf_norm
 229 |     
 230 |     def plot_l1norm(self, cmap="viridis", rotate=0, z=None):
 231 |         """
 232 |         Plot the `L1-norm <http://mathworld.wolfram.com/L1-Norm.html>`_ of the vector field.
 233 | 
 234 |         :param cmap: Colormap used for the plot.
 235 |         :type cmap: str or matplotlib.colors.Colormap
 236 |         :param rotate: Rotate the plot. Possible values are 0, 1, 2, and 3.
 237 |         :type rotate: int
 238 |         :param z: Z index to slice 3D vector field.
 239 |             If not given, the slice at the middle will be plotted.
 240 |         :type z: int
 241 |         """
 242 |         if z is None:
 243 |             z = int(self.vf_norm.shape[2] / 2)
 244 |         if rotate < 0 or rotate > 3:
 245 |             raise ValueError("rotate can only be 0, 1, 2, 3")
 246 |         im = np.array(self.vf_norm, copy=True)
 247 |         if rotate == 1 or rotate == 3:
 248 |             im = im.swapaxes(0, 1)
 249 |         plt.imshow(im[..., z], cmap=cmap)
 250 |         if rotate == 1:
 251 |             plt.gca().invert_xaxis()
 252 |         elif rotate == 2:
 253 |             plt.gca().invert_xaxis()
 254 |             plt.gca().invert_yaxis()
 255 |         elif rotate == 3:
 256 |             plt.gca().invert_yaxis()
 257 | 
 258 |     def plot_localmax(self, c=None, cmap=None, s=1, rotate=0):
 259 |         """
 260 |         Scatter plot the local maxima.
 261 | 
 262 |         :param c: Color of the scatter dots. Overrides `cmap` parameter.
 263 |         :type c: str or list(str), or list(float) or list(list(float))
 264 |         :param cmap: Colormap of the scatter dots.
 265 |         :type cmap: str or matplotlib.colors.Colormap
 266 |         :param s: Size of the scatter dots.
 267 |         :param rotate: Rotate the plot. Possible values are 0, 1, 2, and 3.
 268 |         :type rotate: int
 269 |         """
 270 |         if rotate < 0 or rotate > 3:
 271 |             raise ValueError("rotate can only be 0, 1, 2, 3")
 272 |         if rotate == 0 or rotate == 2:
 273 |             dim0, dim1 = 1, 0
 274 |         elif rotate == 1 or rotate == 3:
 275 |             dim0, dim1 = 0, 1
 276 |         plt.scatter(self.local_maxs[dim0], self.local_maxs[dim1], s=s, c=c, cmap=cmap)
 277 |         plt.xlim([0, self.vf_norm.shape[dim0]])
 278 |         plt.ylim([self.vf_norm.shape[dim1], 0])
 279 |         if rotate == 1:
 280 |             plt.gca().invert_xaxis()
 281 |         elif rotate == 2:
 282 |             plt.gca().invert_xaxis()
 283 |             plt.gca().invert_yaxis()
 284 |         elif rotate == 3:
 285 |             plt.gca().invert_yaxis()    
 286 |         
 287 |     def __run_pca(self, exclude_bad_clusters, pca_dims, random_state):
 288 |         if exclude_bad_clusters:
 289 |             good_vecs = self.normalized_vectors[self.filtered_cluster_labels != -1, :]
 290 |         else:
 291 |             good_vecs = self.normalized_vectors
 292 |         return PCA(n_components=pca_dims, random_state=random_state).fit_transform(good_vecs)
 293 |         
 294 |     def plot_tsne(self, run_tsne=False, pca_dims=10, n_iter=5000, perplexity=70, early_exaggeration=10,
 295 |                   metric="correlation", exclude_bad_clusters=True, s=None, random_state=0, colors=[], excluded_color="#00000033", cmap="jet", tsne_kwargs={}):
 296 |         """
 297 |         Scatter plot the tSNE embedding.
 298 | 
 299 |         :param run_tsne: If false, this method tries to load precomputed tSNE result before running tSNE.
 300 |         :type run_tsne: bool
 301 |         :param pca_dims: Number of PCA dimensions used for the tSNE embedding.
 302 |         :type pca_dims: int
 303 |         :param n_iter: Maximum number of iterations for the tSNE.
 304 |         :type n_iter: int
 305 |         :param perplexity: The perplexity value of the tSNE (please refer to the section `How should I set the perplexity in t-SNE?` in this `link <https://lvdmaaten.github.io/tsne/>`_).
 306 |         :type perplexity: float
 307 |         :param early_exaggeration: Early exaggeration parameter for tSNE. Controls the tightness of the resulting tSNE plot.
 308 |         :type early_exaggeration: float
 309 |         :param metric: Metric for calculation of distance between vectors in gene expression space.
 310 |         :type metric: str
 311 |         :param exclude_bad_clusters: If true, the vectors that are excluded by the clustering algorithm will not be considered for tSNE computation.
 312 |         :type exclude_bad_clusters: bool
 313 |         :param s: Size of the scatter dots.
 314 |         :type s: float
 315 |         :param random_state: Random seed or scikit-learn's random state object to replicate the same result
 316 |         :type random_state: int or random state object
 317 |         :param colors: Color of each clusters.
 318 |         :type colors: list(str), list(list(float))
 319 |         :param excluded_color: Color of the vectors excluded by the clustering algorithm.
 320 |         :type excluded_color: str of list(float)
 321 |         :param cmap: Colormap for the clusters.
 322 |         :type cmap: str or matplotlib.colors.Colormap
 323 |         :param tsne_kwargs: Other keyward parameters for tSNE.
 324 |         :type tsne_kwargs: dict
 325 |         """
 326 |         if self.filtered_cluster_labels is None:
 327 |             exclude_bad_clusters = False
 328 |         if run_tsne or self.tsne is None:
 329 |             pcs = self.__run_pca(exclude_bad_clusters, pca_dims, random_state)
 330 |             self.tsne = TSNE(n_iter=n_iter, perplexity=perplexity, early_exaggeration=early_exaggeration, metric=metric, random_state=random_state, **tsne_kwargs).fit_transform(pcs[:, :pca_dims])
 331 |         if self.filtered_cluster_labels is not None:
 332 |             cols = self.filtered_cluster_labels[self.filtered_cluster_labels != -1]
 333 |         else:
 334 |             cols = None
 335 |         if len(colors) > 0:
 336 |             cmap = ListedColormap(colors)
 337 |         if not exclude_bad_clusters and self.filtered_cluster_labels is not None:
 338 |             plt.scatter(self.tsne[:, 0][self.filtered_cluster_labels == -1], self.tsne[:, 1][self.filtered_cluster_labels == -1], s=s, c=excluded_color)
 339 |             plt.scatter(self.tsne[:, 0][self.filtered_cluster_labels != -1], self.tsne[:, 1][self.filtered_cluster_labels != -1], s=s, c=cols, cmap=cmap)
 340 |         else:
 341 |             plt.scatter(self.tsne[:, 0], self.tsne[:, 1], s=s, c=cols, cmap=cmap)
 342 |         return
 343 | 
 344 |     def plot_umap(self, run_umap=False, pca_dims=10, metric="correlation", exclude_bad_clusters=True, s=None, random_state=0, colors=[], excluded_color="#00000033", cmap="jet", umap_kwargs={}):
 345 |         """
 346 |         Scatter plot the UMAP embedding.
 347 | 
 348 |         :param run_umap: If false, this method tries to load precomputed UMAP result before running UMAP.
 349 |         :type run_tsne: bool
 350 |         :param pca_dims: Number of PCA dimensions used for the UMAP embedding.
 351 |         :type pca_dims: int
 352 |         :param metric: Metric for calculation of distance between vectors in gene expression space.
 353 |         :type metric: str
 354 |         :param exclude_bad_clusters: If true, the vectors that are excluded by the clustering algorithm will not be considered for tSNE computation.
 355 |         :type exclude_bad_clusters: bool
 356 |         :param s: Size of the scatter dots.
 357 |         :type s: float
 358 |         :param random_state: Random seed or scikit-learn's random state object to replicate the same result
 359 |         :type random_state: int or random state object
 360 |         :param colors: Color of each clusters.
 361 |         :type colors: list(str), list(list(float))
 362 |         :param excluded_color: Color of the vectors excluded by the clustering algorithm.
 363 |         :type excluded_color: str of list(float)
 364 |         :param cmap: Colormap for the clusters.
 365 |         :type cmap: str or matplotlib.colors.Colormap
 366 |         :param umap_kwargs: Other keyward parameters for UMAP.
 367 |         :type umap_kwargs: dict
 368 |         """
 369 |         if self.filtered_cluster_labels is None:
 370 |             exclude_bad_clusters = False
 371 |         if run_umap or self.umap is None:
 372 |             pcs = self.__run_pca(exclude_bad_clusters, pca_dims, random_state)
 373 |             self.umap = UMAP(metric=metric, random_state=random_state, **umap_kwargs).fit_transform(pcs[:, :pca_dims])
 374 |         if self.filtered_cluster_labels is not None:
 375 |             cols = self.filtered_cluster_labels[self.filtered_cluster_labels != -1]
 376 |         else:
 377 |             cols = None
 378 |         if len(colors) > 0:
 379 |             cmap = ListedColormap(colors)
 380 |         if not exclude_bad_clusters and self.filtered_cluster_labels is not None:
 381 |             plt.scatter(self.umap[:, 0][self.filtered_cluster_labels == -1], self.umap[:, 1][self.filtered_cluster_labels == -1], s=s, c=excluded_color)
 382 |             plt.scatter(self.umap[:, 0][self.filtered_cluster_labels != -1], self.umap[:, 1][self.filtered_cluster_labels != -1], s=s, c=cols, cmap=cmap)
 383 |         else:
 384 |             plt.scatter(self.umap[:, 0], self.umap[:, 1], s=s, c=cols, cmap=cmap)
 385 |         return
 386 |     
 387 |     def plot_expanded_mask(self, cmap='Greys'): # TODO
 388 |         """
 389 |         Plot the expanded area of the vectors (Not fully implemented yet).
 390 | 
 391 |         :param cmap: Colormap for the mask.
 392 |         """
 393 |         plt.imshow(self.expanded_mask, vmin=0, vmax=1, cmap=cmap)
 394 |         return
 395 |     
 396 |     def plot_correlation_map(self, cmap='hot'): # TODO
 397 |         """
 398 |         Plot the correlations near the vectors in the vector field (Not fully implemented yet).
 399 | 
 400 |         :param cmap: Colormap for the image.
 401 |         """
 402 |         plt.imshow(self.corr_map, vmin=0.995, vmax=1.0, cmap=cmap)
 403 |         plt.colorbar()
 404 |         return
 405 |     
 406 |     def plot_celltypes_map(self, background="black", centroid_indices=[], colors=None, cmap='jet', rotate=0, min_r=0.6, set_alpha=False, z=None):
 407 |         """
 408 |         Plot the merged cell-type map.
 409 | 
 410 |         :param background: Set background color of the cell-type map.
 411 |         :type background: str or list(float)
 412 |         :param centroid_indices: The centroids which will be in the cell type map. If not given, the cell-type map is drawn with all centroids.
 413 |         :type centroid_indices: list(int)
 414 |         :param colors: Color of the clusters. Overrides `cmap` parameter.
 415 |         :type colors: list(str), list(list(float))
 416 |         :param cmap: Colormap for the clusters.
 417 |         :type cmap: str or matplotlib.colors.Colormap
 418 |         :param rotate: Rotate the plot. Possible values are 0, 1, 2, and 3.
 419 |         :type rotate: int
 420 |         :param min_r: Minimum correlation threshold for the cell-type map.
 421 |             This value is only for the plotting, does not affect to the cell-type maps generated by `filter_celltypemaps`.
 422 |         :type min_r: float
 423 |         :param set_alpha: Set alpha of each pixel based on the correlation.
 424 |             Not properly implemented yet, doesn't work properly with the background other than black.
 425 |         :type set_alpha: bool
 426 |         :param z: Z index to slice 3D cell-type map.
 427 |             If not given, the slice at the middle will be used.
 428 |         :type z: int
 429 |         """
 430 |         if z is None:
 431 |             z = int(self.shape[2] / 2)
 432 |         num_ctmaps = np.max(self.filtered_celltype_maps) + 1
 433 |         
 434 |         if len(centroid_indices) == 0:
 435 |             centroid_indices = list(range(num_ctmaps))
 436 |             
 437 |         if colors is None:
 438 |             cmap_internal = plt.get_cmap(cmap)
 439 |             colors = cmap_internal([float(i) / (num_ctmaps - 1) for i in range(num_ctmaps)])
 440 |             
 441 |         all_colors = [background if not j in centroid_indices else colors[i] for i, j in enumerate(range(num_ctmaps))]
 442 |         cmap_internal = ListedColormap(all_colors)
 443 | 
 444 |         celltype_maps_internal = np.array(self.filtered_celltype_maps[..., z], copy=True)
 445 |         empty_mask = celltype_maps_internal == -1
 446 |         celltype_maps_internal[empty_mask] = 0
 447 |         sctmap = cmap_internal(celltype_maps_internal)
 448 |         sctmap[empty_mask] = (0, 0, 0, 0)
 449 | 
 450 |         if set_alpha:
 451 |             alpha = np.array(self.max_correlations[..., z], copy=True)
 452 |             alpha[alpha < 0] = 0 # drop negative correlations
 453 |             alpha = min_r + alpha / (np.max(alpha) / (1.0 - min_r))
 454 |             sctmap[..., 3] = alpha
 455 | 
 456 |         if rotate == 1 or rotate == 3:
 457 |             sctmap = sctmap.swapaxes(0, 1)
 458 | 
 459 |         plt.gca().set_facecolor(background)
 460 |         plt.imshow(sctmap)
 461 |         
 462 |         if rotate == 1:
 463 |             plt.gca().invert_xaxis()
 464 |         elif rotate == 2:
 465 |             plt.gca().invert_xaxis()
 466 |             plt.gca().invert_yaxis()
 467 |         elif rotate == 3:
 468 |             plt.gca().invert_yaxis()
 469 | 
 470 |         return
 471 | 
 472 |     def plot_domains(self, background='white', colors=None, cmap='jet', rotate=0, domain_background=False, background_alpha=0.3, z=None):
 473 |         """
 474 |         Plot tissue domains.
 475 | 
 476 |         :param background: Background color of the plot.
 477 |         :type background: str or list(float)
 478 |         :param colors: Color of the domains. Overrides `cmap` parameter.
 479 |         :type colors: list(str), list(list(float))
 480 |         :param cmap: Colormap for the domains.
 481 |         :type cmap: str or matplotlib.colors.Colormap
 482 |         :param rotate: Rotate the plot. Possible values are 0, 1, 2, and 3.
 483 |         :type rotate: int
 484 |         :param domain_background: Show the area of the inferred domains behind the domain map.
 485 |         :type domain_background: bool
 486 |         :param background_alpha: The alpha value of the area of the inferred domains.
 487 |         :type background_alpha: float
 488 |         :param z: Z index to slice 3D domain map.
 489 |             If not given, the slice at the middle will be used.
 490 |         :type z: int
 491 |         """
 492 |         if z is None:
 493 |             z = int(self.shape[2] / 2)
 494 |         
 495 |         inferred_domains = self.inferred_domains[..., z]
 496 |         inferred_domains_cells = self.inferred_domains_cells[..., z]
 497 |         
 498 |         if rotate == 1 or rotate == 3:
 499 |             inferred_domains = inferred_domains.swapaxes(0, 1)
 500 |             inferred_domains_cells = inferred_domains_cells.swapaxes(0, 1)
 501 |             
 502 |         if colors is None:
 503 |             cmap_internal = plt.get_cmap(cmap)
 504 |             colors_domains = cmap_internal(np.linspace(0, 1, np.max(inferred_domains) + 1))
 505 |             colors_cells = cmap_internal(np.linspace(0, 1, np.max(inferred_domains_cells) + 1))
 506 |             
 507 |         colors_domains[:, 3] = background_alpha
 508 |         if -1 in inferred_domains:
 509 |             colors_domains = [[0, 0, 0, 0]] + list(colors_domains)
 510 |         if -1 in inferred_domains_cells:
 511 |             colors_cells = [[0, 0, 0, 0]] + list(colors_cells)
 512 |             
 513 |         plt.gca().set_facecolor(background)
 514 |         if domain_background:
 515 |             plt.imshow(inferred_domains, cmap=ListedColormap(colors_domains))
 516 |         plt.imshow(inferred_domains_cells, cmap=ListedColormap(colors_cells))
 517 |         
 518 |         if rotate == 1:
 519 |             plt.gca().invert_xaxis()
 520 |         elif rotate == 2:
 521 |             plt.gca().invert_xaxis()
 522 |             plt.gca().invert_yaxis()
 523 |         elif rotate == 3:
 524 |             plt.gca().invert_yaxis()
 525 |             
 526 |         return
 527 |     
 528 |     def plot_diagnostic_plot(self, centroid_index, cluster_name=None, cluster_color=None, cmap=None, rotate=0, z=None, use_embedding="tsne", known_signatures=[], correlation_methods=[]):
 529 |         """
 530 |         Plot the diagnostic plot. This method requires `plot_tsne` or `plot_umap` was run at least once before.
 531 | 
 532 |         :param centroid_index: Index of the centroid for the diagnostic plot.
 533 |         :type centroid_index: int
 534 |         :param cluster_name: The name of the cluster.
 535 |         :type cluster_name: str
 536 |         :param cluster_color: The color of the cluster. Overrides `cmap` parameter.
 537 |         :type cluster_color: str or list(float)
 538 |         :param cmap: The colormap for the clusters. The cluster color is determined using the `centroid_index` th color of the given colormap.
 539 |         :type cmap: str or matplotlib.colors.Colormap
 540 |         :param rotate: Rotate the plot. Possible values are 0, 1, 2, and 3.
 541 |         :type rotate: int
 542 |         :param z: Z index to slice 3D vector norm and cell-type map plots.
 543 |             If not given, the slice at the middle will be used.
 544 |         :type z: int
 545 |         :param use_embedding: The type of the embedding for the last panel. Possible values are "tsne" or "umap".
 546 |         :type use_embedding: str
 547 |         :param known_signatures: The list of known signatures, which will be displayed in the 3rd panel. Each signature can be 3-tuple or 4-tuple,
 548 |             containing 1) the name of signature, 2) gene labels of the signature, 3) gene expression values of the signature, 4) optionally the color of the signature.
 549 |         :type known_signatures: list(tuple)
 550 |         :param correlation_methods: The correlation method used to determine max correlation of the centroid to the `known_signatures`. Each method should be 2-tuple,
 551 |             containing 1) the name of the correaltion, 2) the correaltion function (compatiable with the correlation methods available in `scipy.stats <https://docs.scipy.org/doc/scipy/reference/stats.html>`_)
 552 |         :type correlation_methods: list(tuple)
 553 |         """
 554 |         if z is None:
 555 |             z = int(self.vf_norm.shape[2] / 2)
 556 |         p, e = self.centroids[centroid_index], self.centroids_stdev[centroid_index]
 557 |         if cluster_name is None:
 558 |             cluster_name = "Cluster #%d"%centroid_index
 559 |         
 560 |         if cluster_color is None:
 561 |             if cmap is None:
 562 |                 cmap = plt.get_cmap("jet")
 563 |             cluster_color = cmap(centroid_index / (len(self.centroids) - 1))
 564 | 
 565 |         if len(correlation_methods) == 0:
 566 |             correlation_methods = [("r", corr), ]
 567 |         total_signatures = len(correlation_methods) * len(known_signatures) + 1
 568 |                 
 569 |         ax = plt.subplot(1, 4, 1)
 570 |         mask = self.filtered_cluster_labels == centroid_index
 571 |         plt.scatter(self.local_maxs[0][mask], self.local_maxs[1][mask], c=[cluster_color])
 572 |         self.plot_l1norm(rotate=rotate, cmap="Greys", z=z)
 573 | 
 574 |         ax = plt.subplot(1, 4, 2)
 575 |         ctmap = np.zeros([self.filtered_celltype_maps.shape[0], self.filtered_celltype_maps.shape[1], 4])
 576 |         ctmap[self.filtered_celltype_maps[..., z] == centroid_index] = to_rgba(cluster_color)
 577 |         ctmap[np.logical_and(self.filtered_celltype_maps[..., z] != centroid_index, self.filtered_celltype_maps[..., 0] > -1)] = [0.9, 0.9, 0.9, 1]
 578 |         if rotate == 1 or rotate == 3:
 579 |             ctmap = ctmap.swapaxes(0, 1)
 580 |         ax.imshow(ctmap)
 581 |         if rotate == 1:
 582 |             ax.invert_xaxis()
 583 |         elif rotate == 2:
 584 |             ax.invert_xaxis()
 585 |             ax.invert_yaxis()
 586 |         elif rotate == 3:
 587 |             ax.invert_yaxis()
 588 | 
 589 |         ax = plt.subplot(total_signatures, 4, 3)
 590 |         ax.bar(self.genes, p, yerr=e)
 591 |         ax.set_title(cluster_name)
 592 |         plt.xlim([-1, len(self.genes)])
 593 |         plt.xticks(rotation=90)
 594 | 
 595 |         subplot_idx = 0
 596 |         for signature in known_signatures:
 597 |             sig_title, sig_labels, sig_values = signature[:3]
 598 |             sig_colors_defined = False
 599 |             if len(signature) == 4:
 600 |                 sig_colors = signature[3]
 601 |                 sig_colors_defined = True
 602 |             for corr_label, corr_func in correlation_methods:
 603 |                 corr_results = [corr_func(p, sig_value) for sig_value in sig_values]
 604 |                 corr_results = [e[0] if hasattr(e, "__getitem__") else e for e in corr_results]
 605 |                 max_corr_idx = np.argmax(corr_results)
 606 |                 ax = plt.subplot(total_signatures, 4, 7+subplot_idx*4)
 607 |                 lbl = sig_labels[max_corr_idx]
 608 |                 if sig_colors_defined:
 609 |                     col = sig_colors[max_corr_idx]
 610 |                 else:
 611 |                     col = cluster_color
 612 |                 ax.bar(self.genes, sig_values[max_corr_idx], color=col)
 613 |                 ax.set_title("%s in %s (max %s, %.3f)"%(lbl, sig_title, corr_label, corr_results[max_corr_idx]))
 614 |                 plt.xlim([-1, len(self.genes)])
 615 |                 plt.xticks(rotation=90)
 616 |                 subplot_idx += 1
 617 | 
 618 |         if use_embedding == 'tsne':
 619 |             embedding = self.tsne
 620 |             fig_title = "t-SNE, %d vectors"%sum(self.filtered_cluster_labels == centroid_index)
 621 |         elif use_embedding == 'umap':
 622 |             embedding = self.umap
 623 |             fig_title = "UMAP, %d vectors"%sum(self.filtered_cluster_labels == centroid_index)
 624 |         good_vectors = self.filtered_cluster_labels[self.filtered_cluster_labels != -1]
 625 |         ax = plt.subplot(1, 4, 4)
 626 |         ax.scatter(embedding[:, 0][good_vectors != centroid_index], embedding[:, 1][good_vectors != centroid_index], c=[[0.8, 0.8, 0.8, 1],], s=80)
 627 |         ax.scatter(embedding[:, 0][good_vectors == centroid_index], embedding[:, 1][good_vectors == centroid_index], c=[cluster_color], s=80)
 628 |         ax.get_xaxis().set_visible(False)
 629 |         ax.get_yaxis().set_visible(False)
 630 |         ax.set_title(fig_title)
 631 |         
 632 |     def plot_celltype_composition(self, domain_index, cell_type_colors=None, cell_type_cmap='jet', cell_type_orders=None, label_cutoff=0.03, pctdistance=1.15, **kwargs):
 633 |         """
 634 |         Plot composition of cell types in each domain.
 635 | 
 636 |         :param domain_index: Index of the domain.
 637 |         :type domain_index: int
 638 |         :param cell_type_colors: The colors of the cell types. Overrides `cell_type_cmap` parameter.
 639 |         :type cell_type_colors: str or list(float)
 640 |         :param cell_type_cmap: The colormap for the cell types.
 641 |         :type cell_type_cmap: str or matplotlib.colors.Colormap
 642 |         :param label_cutoff: The minimum cutoff of the labeling of the percentage. From 0 to 1.
 643 |         :type label_cutoff: float
 644 |         :param pctdistance: The distance from center of the pie to the labels.
 645 |         :type pctdistance: float
 646 |         :param kwargs: More kewward arguments for the matplotlib.pyplot.pie.
 647 |         """
 648 |         if cell_type_colors is None:
 649 |             cmap = plt.get_cmap(cell_type_cmap)
 650 |             cell_type_colors = cmap(np.arange(0, len(self.centroids)) / (len(self.centroids) - 1))
 651 |         
 652 |         if cell_type_orders is not None:
 653 |             ctcs = np.array(cell_type_colors)[cell_type_orders]
 654 |             p = self.inferred_domains_compositions[domain_index][cell_type_orders]
 655 |         else:
 656 |             ctcs = cell_type_colors
 657 |             p = self.inferred_domains_compositions[domain_index]
 658 |         plt.pie(p,
 659 |                 colors=ctcs,
 660 |                 autopct=lambda e: '%.1f %%'%e if e > 3 else '',
 661 |                 pctdistance=pctdistance, **kwargs)
 662 | 
 663 |     def plot_spatial_relationships(self, cluster_labels, *args, **kwargs):
 664 |         """
 665 |         Plot spatial relationship between cell types, presented as a heatmap.
 666 | 
 667 |         :param cluster_labels: x- and y-axis label of the heatmap.
 668 |         :type cluster_labels: list(str)
 669 |         :param args: More arguments for the seaborn.heatmap.
 670 |         :param kwargs: More keyword arguments for the seaborn.heatmap.
 671 |         """
 672 |         sns.heatmap(self.spatial_relationships, *args, xticklabels=cluster_labels, yticklabels=cluster_labels, **kwargs)    
 673 | 
 674 |     def get_celltype_correlation(self, idx):
 675 |         """
 676 |         Get correlation values of a cell type map between the given cluster's centroid to the vector field.
 677 |         
 678 |         :param idx: Index of a cluster
 679 |         :type idx: int
 680 |         :return: Correlation values of a cell type map of the specified cluster's centroid
 681 |         :rtype: numpy.ndarray
 682 |         """
 683 |         rtn = np.zeros_like(self.max_correlations) - 1
 684 |         rtn[self.celltype_maps == idx] = self.max_correlations[self.celltype_maps == idx]
 685 |         return rtn
 686 |     
 687 |         
 688 | class SSAMAnalysis(object):
 689 |     """
 690 |     A class to run SSAM analysis.
 691 | 
 692 |     :param dataset: A SSAMDataset object.
 693 |     :type dataset: SSAMDataset
 694 |     :param ncores: Number of cores for parallel computation. If a negative value is given,
 695 |         ((# of all available cores on system) - abs(ncores)) cores will be used.
 696 |     :type ncores: int
 697 |     :param save_dir: Directory to store intermediate data (e.g. density / vector field).
 698 |         Any data which already exists will be loaded and reused.
 699 |     :type save_dir: str
 700 |     :param verbose: If True, then it prints out messages during the analysis.
 701 |     :type verbose: bool
 702 |     """
 703 |     def __init__(self, dataset, ncores=-1, save_dir="", verbose=False):
 704 |         
 705 |         self.dataset = dataset
 706 |         if not ncores > 0:
 707 |             ncores += multiprocessing.cpu_count()
 708 |         if ncores > multiprocessing.cpu_count():
 709 |             ncores = multiprocessing.cpu_count()
 710 |         if not ncores > 0:
 711 |             raise ValueError("Invalid number of cores.")
 712 |         self.ncores = ncores
 713 |         self.use_savedir = True
 714 |         if len(save_dir) == 0:
 715 |             save_dir = mkdtemp()
 716 |             self.use_savedir = False
 717 |         if not os.path.exists(save_dir):
 718 |             os.makedirs(save_dir)
 719 |         self.save_dir = save_dir
 720 |         self.verbose = verbose
 721 | 
 722 |     def __m__(self, message):
 723 |         if self.verbose:
 724 |             print(message)
 725 |             
 726 |     def run_kde(self, kernel="gaussian", bandwidth=2.5, sampling_distance=1.0, use_mmap=False):
 727 |         """
 728 |         Run KDE to estimate density of mRNA.
 729 | 
 730 |         :param kernel: Kernel for density estimation.
 731 |         :type kernel: str
 732 |         :param bandwidth: Parameter to adjust width of kernel.
 733 |             Set it 2.5 to make FWTM of Gaussian kernel to be ~10um (assume that avg. cell diameter is ~10um).
 734 |         :type bandwidth: float
 735 |         :param sampling_distance: Grid spacing in um.
 736 |         :type sampling_distance: float
 737 |         :param use_mmap: Use MMAP to reduce memory usage during analysis.
 738 |             Turning on this option can reduce the amount of memory used by SSAM analysis, but also lower the analysis speed.
 739 |         :type use_mmap: bool
 740 |         """
 741 |         def save_pickle(fn, o):
 742 |             with open(fn, "wb") as f:
 743 |                 return pickle.dump(o, f, protocol=4)
 744 |         def load_pickle(fn):
 745 |             with open(fn, "rb") as f:
 746 |                 return pickle.load(f)
 747 |         
 748 |         steps = [int(np.ceil(e / sampling_distance)) for e in self.dataset.shape]
 749 |         total_steps = np.prod(steps)
 750 |         vf_shape = tuple(steps + [len(self.dataset.genes), ])
 751 |         vf_filename = os.path.join(self.save_dir, 'vf_sd%s_bw%s'%(
 752 |             ('%f' % sampling_distance).rstrip('0').rstrip('.'),
 753 |             ('%f' % bandwidth).rstrip('0').rstrip('.')
 754 |         ))
 755 |         if (use_mmap and not os.path.exists(vf_filename + '.dat')) or \
 756 |                 (not use_mmap and not os.path.exists(vf_filename + '.pkl') and not os.path.exists(vf_filename + '.dat')):
 757 |             # If VF file doesn't exist, then run KDE
 758 |             if use_mmap:
 759 |                 vf = np.memmap(vf_filename + '.dat.tmp', dtype='double', mode='w+', shape=vf_shape)
 760 |             else:
 761 |                 vf = np.zeros(vf_shape)
 762 |             chunksize = min(int(np.ceil(total_steps / self.ncores)), 100000)
 763 |             def yield_chunk():
 764 |                 chunk = np.zeros(shape=[chunksize, len(steps)], dtype=int)
 765 |                 cnt = 0
 766 |                 remaining_cnt = total_steps
 767 |                 for x in range(steps[0]):
 768 |                     for y in range(steps[1]):
 769 |                         for z in range(steps[2]):
 770 |                             chunk[cnt, :] = [x, y, z]
 771 |                             cnt += 1
 772 |                             if cnt == chunksize:
 773 |                                 yield chunk
 774 |                                 remaining_cnt -= cnt
 775 |                                 cnt = 0
 776 |                                 chunk = np.zeros(shape=[min(chunksize, remaining_cnt), len(steps)], dtype=int)
 777 |                 if cnt > 0:
 778 |                     yield chunk
 779 | 
 780 |             def yield_chunks():
 781 |                 chunks = []
 782 |                 for chunk in yield_chunk():
 783 |                     chunks.append(chunk)
 784 |                     if len(chunks) == self.ncores:
 785 |                         yield chunks
 786 |                         chunks = []
 787 |                 if len(chunks) > 0:
 788 |                     yield chunks
 789 | 
 790 |             pool = None
 791 |             for gidx, gene_name in enumerate(self.dataset.genes):
 792 |                 pdf_filename = os.path.join(self.save_dir, 'pdf_sd%s_bw%s_%s.npy'%(
 793 |                     ('%f' % sampling_distance).rstrip('0').rstrip('.'),
 794 |                     ('%f' % bandwidth).rstrip('0').rstrip('.'),
 795 |                     gene_name)
 796 |                 )
 797 |                 if os.path.exists(pdf_filename):
 798 |                     self.__m__("Loading %s..."%gene_name)
 799 |                     pdf = np.load(pdf_filename)
 800 |                 else:
 801 |                     self.__m__("Running KDE for %s..."%gene_name)
 802 |                     pdf = np.zeros(shape=vf_shape[:-1])
 803 |                     if kernel != "gaussian":
 804 |                         kde = KernelDensity(kernel=kernel, bandwidth=bandwidth).fit(self.dataset.locations[gidx])
 805 |                         if pool is None:
 806 |                             pool = multiprocessing.Pool(self.ncores)
 807 |                     else:
 808 |                         X, Y, Z = [self.dataset.locations[gidx][:, i] for i in range(3)]
 809 |                     for chunks in yield_chunks():
 810 |                         if kernel == "gaussian":
 811 |                             pdf_chunks = [calc_kde(bandwidth, X, Y, Z, chunk[:, 0], chunk[:, 1], chunk[:, 2], 0, self.ncores) for chunk in chunks]
 812 |                         else:
 813 |                             pdf_chunks = pool.map(kde.score_samples, [chunk * sampling_distance for chunk in chunks])
 814 |                         for pdf_chunk, pos_chunk in zip(pdf_chunks, chunks):
 815 |                             if kernel == "gaussian":
 816 |                                 pdf[pos_chunk[:, 0], pos_chunk[:, 1], pos_chunk[:, 2]] = pdf_chunk
 817 |                             else:
 818 |                                 pdf[pos_chunk[:, 0], pos_chunk[:, 1], pos_chunk[:, 2]] = np.exp(pdf_chunk)
 819 |                     pdf /= np.sum(pdf)
 820 |                     np.save(pdf_filename, pdf)
 821 |                 vf[..., gidx] = pdf * len(self.dataset.locations[gidx])
 822 |             if use_mmap:
 823 |                 vf.flush()
 824 |                 os.rename(vf_filename + '.dat.tmp', vf_filename + '.dat')
 825 |                 vf = np.memmap(vf_filename + '.dat', dtype='double', mode='r', shape=vf_shape)
 826 |             elif self.use_savedir:
 827 |                 save_pickle(vf_filename + '.pkl', vf)
 828 |         elif not use_mmap:
 829 |             if os.path.exists(vf_filename + '.pkl'):
 830 |                 vf = load_pickle(vf_filename + '.pkl')
 831 |             else: # == os.path.exists(vf_filename + '.dat'):
 832 |                 vf_tmp = np.memmap(vf_filename + '.dat', dtype='double', mode='r', shape=vf_shape)
 833 |                 vf = np.array(vf_tmp, copy=True)
 834 |                 if self.use_savedir:
 835 |                     save_pickle(vf_filename + '.pkl', vf)
 836 |         elif use_mmap:
 837 |             vf = np.memmap(vf_filename + '.dat', dtype='double', mode='r', shape=vf_shape)
 838 |         self.dataset.vf = vf
 839 |         return
 840 |    
 841 |     def run_fast_kde(self, kernel='gaussian', bandwidth=2.5, sampling_distance=1.0, re_run=False, use_mmap=False):
 842 |         """
 843 |         Run KDE faster than `run_kde` method. This method uses precomputed kernels to estimate density of mRNA.
 844 | 
 845 |         :param kernel: Kernel for density estimation. Currently only Gaussian kernel is supported.
 846 |         :type kernel: str
 847 |         :param bandwidth: Parameter to adjust width of kernel.
 848 |             Set it 2.5 to make FWTM of Gaussian kernel to be ~10um (assume that avg. cell diameter is ~10um).
 849 |         :type bandwidth: float
 850 |         :param sampling_distance: Grid spacing in um. Currently only 1 um is supported.
 851 |         :type sampling_distance: float
 852 |         :param re_run: Recomputes KDE, ignoring all existing precomputed densities in the data directory.
 853 |         :type re_run: bool
 854 |         :param use_mmap: Use MMAP to reduce memory usage during analysis. Currently not implemented, this option should be always disabled.
 855 |         :type use_mmap: bool
 856 |         """
 857 |         if kernel != 'gaussian':
 858 |             raise NotImplementedError('Only Gaussian kernel is supported.')
 859 |         if sampling_distance != 1.0:
 860 |             raise NotImplementedError('Sampling distance should be 1.')
 861 |         if use_mmap:
 862 |             raise NotImplementedError('MMAP is not supported yet.')
 863 |             
 864 |         def save_pickle(fn, o):
 865 |             with open(fn, "wb") as f:
 866 |                 return pickle.dump(o, f, protocol=4)
 867 |         def load_pickle(fn):
 868 |             with open(fn, "rb") as f:
 869 |                 return pickle.load(f)
 870 | 
 871 |         vf_filename = os.path.join(self.save_dir, 'vf_sd%s_bw%s.pkl'%(
 872 |             ('%f' % sampling_distance).rstrip('0').rstrip('.'),
 873 |             ('%f' % bandwidth).rstrip('0').rstrip('.')
 874 |         ))
 875 |         
 876 |         if os.path.exists(vf_filename) and not re_run:
 877 |             self.dataset.vf = load_pickle(vf_filename)
 878 |             return
 879 | 
 880 |         self.dataset.vf = np.zeros(self.dataset.shape+(len(self.dataset.genes),))
 881 |         idcs = np.argsort([len(i) for i in self.dataset.locations])[::-1]
 882 |         pdf_filenames = [os.path.join(self.save_dir, 'pdf_sd%s_bw%s_%s.npy'%(
 883 |             ('%f' % sampling_distance).rstrip('0').rstrip('.'),
 884 |             ('%f' % bandwidth).rstrip('0').rstrip('.'),
 885 |             self.dataset.genes[gidx])
 886 |         ) for gidx in idcs]
 887 | 
 888 |         if not re_run:
 889 |             idcs = np.where([not os.path.exists(fn) for fn in pdf_filenames])[0]
 890 |             for gidx in np.where([os.path.exists(fn) for fn in pdf_filenames])[0]:
 891 |                 print("Loading gene %s..."%self.dataset.genes[gidx])
 892 |                 self.dataset.vf[..., gidx] = np.load(pdf_filenames[gidx])
 893 |         
 894 |         if len(idcs) > 0:
 895 |             with closing(Pool(self.ncores, maxtasksperchild=1)) as p:
 896 |                 res = p.imap(_fast_gaussian_kde,[(bandwidth,
 897 |                                                  self.save_dir,
 898 |                                                  self.dataset.genes[gidx],
 899 |                                                  self.dataset.shape,
 900 |                                                  self.dataset.locations[gidx],
 901 |                                                  sampling_distance) for gidx in idcs])
 902 |                 for gidx, pd in zip(idcs, res): # imap returns result in the same order as the input array
 903 |                     self.dataset.vf[..., gidx] = pd
 904 |                     np.save(pdf_filenames[gidx], pd)
 905 |                 p.close()
 906 |                 p.join()
 907 |         save_pickle(vf_filename, self.dataset.vf)
 908 | 
 909 |     def calc_correlation_map(self, corr_size=3):
 910 |         """
 911 |         Calculate local correlation map of the vector field.
 912 | 
 913 |         :param corr_size: Size of square (or cube) that is used to compute the local correlation values.
 914 |             This value should be an odd number.
 915 |         :type corr_size: int
 916 |         """
 917 |         
 918 |         corr_map = calc_corrmap(self.dataset.vf, ncores=self.ncores, size=int(corr_size/2))
 919 |         self.dataset.corr_map = np.array(corr_map, copy=True)
 920 |         return
 921 |     
 922 |     def find_localmax(self, search_size=3, min_norm=0, min_expression=0, mask=None):
 923 |         """
 924 |         Find local maxima vectors in the norm of the vector field.
 925 | 
 926 |         :param search_size: Size of square (or cube in 3D) that is used to search for the local maxima.
 927 |             This value should be an odd number.
 928 |         :type search_size: int
 929 |         :param min_norm: Minimum value of norm at the local maxima.
 930 |         :type min_norm: float
 931 |         :param min_expression: Minimum value of gene expression in a unit pixel at the local maxima.
 932 |             mask: numpy.ndarray, optional
 933 |             If given, find vectors in the masked region, instead of the whole image.
 934 |         :type min_expression: float
 935 |         """
 936 | 
 937 |         max_mask = self.dataset.vf_norm == ndimage.maximum_filter(self.dataset.vf_norm, size=search_size)
 938 |         max_mask &= self.dataset.vf_norm > min_norm
 939 |         if min_expression > 0:
 940 |             exp_mask = np.zeros_like(max_mask)
 941 |             for i in range(len(self.dataset.genes)):
 942 |                 exp_mask |= self.dataset.vf[..., i] > min_expression
 943 |             max_mask &= exp_mask
 944 |         if mask is not None:
 945 |             max_mask &= mask
 946 |         local_maxs = np.where(max_mask)
 947 |         self.__m__("Found %d local max vectors."%len(local_maxs[0]))
 948 |         self.dataset.local_maxs = local_maxs
 949 |         return
 950 | 
 951 |     def expand_localmax(self, r=0.99, min_pixels=7, max_pixels=1000):
 952 |         """
 953 |         Merge the vectors nearby the local max vectors.
 954 |         Only the vectors with the large Pearson correlation values are merged.
 955 | 
 956 |         :param r: Minimum Pearson's correlation coefficient to look for the nearby vectors.
 957 |         :type r: float
 958 |         :param min_pixels: Minimum number of pixels to merge.
 959 |         :type min_pixels: float
 960 |         :param max_pixels: Maximum number of pixels to merge.
 961 |         :type max_pixels: float
 962 |         """
 963 |         
 964 |         expanded_vecs = []
 965 |         self.__m__("Expanding local max vectors...")
 966 |         fill_dx = np.meshgrid(range(3), range(3), range(3))
 967 |         fill_dx = np.array(list(zip(*[np.ravel(e) - 1 for e in fill_dx])))
 968 |         mask = np.zeros(self.dataset.vf.shape[:-1]) # TODO: sparse?
 969 |         nlocalmaxs = len(self.dataset.local_maxs[0])
 970 |         valid_pos_list = []
 971 |         for cnt, idx in enumerate(range(nlocalmaxs), start=1):
 972 |             local_pos = tuple(i[idx] for i in self.dataset.local_maxs)
 973 |             filled_pos = tuple(zip(*flood_fill(local_pos, self.dataset.vf, r, min_pixels, max_pixels)))
 974 |             if len(filled_pos) > 0:
 975 |                 mask[filled_pos] = 1
 976 |                 valid_pos_list.append(local_pos)
 977 |                 expanded_vecs.append(np.sum(self.dataset.vf[filled_pos], axis=0))
 978 |             if cnt % 100 == 0:
 979 |                 self.__m__("Processed %d/%d..."%(cnt, nlocalmaxs))
 980 |         self.__m__("Processed %d/%d..."%(cnt, nlocalmaxs))
 981 |         self.dataset.expanded_vectors = np.array(expanded_vecs)
 982 |         self.dataset.expanded_mask = mask
 983 |         self.dataset.valid_local_maxs = valid_pos_list
 984 |         return
 985 |     
 986 |     def normalize_vectors_sctransform(self, use_expanded_vectors=False, normalize_vf=True, vst_kwargs={}):
 987 |         """
 988 |         Normalize and regularize vectors using SCtransform
 989 | 
 990 |         :param use_expanded_vectors: If True, use averaged vectors nearby local maxima
 991 |             of the vector field.
 992 |         :type use_expanded_vectors: bool
 993 |         :param normalize_vf: If True, the vector field is also normalized
 994 |             using the same parameters used to normalize the local maxima.
 995 |         :type normalize_vf: bool
 996 |         :param vst_kwargs: Optional keywords arguments for sctransform's vst function.
 997 |         :type vst_kwargs: dict
 998 |         """
 999 |         if use_expanded_vectors:
1000 |             vec = np.array(self.dataset.expanded_vectors, copy=True)
1001 |         else:
1002 |             vec = np.array(self.dataset.vf[self.dataset.local_maxs], copy=True)
1003 |         
1004 |         norm_vec, fit_params = run_sctransform(vec, **vst_kwargs)
1005 |         self.dataset.normalized_vectors = np.array(norm_vec)
1006 |         
1007 |         if normalize_vf:
1008 |             vf_nonzero = self.dataset.vf[self.dataset.vf_norm > 0]
1009 |             nvec = vf_nonzero.shape[0]
1010 |             fit_params = np.array(fit_params).T
1011 |             regressor_data = np.ones([nvec, 2])
1012 |             regressor_data[:, 1] = np.log10(np.sum(vf_nonzero, axis=1))
1013 |             
1014 |             mu = np.exp(np.dot(regressor_data, fit_params[1:, :]))
1015 |             with np.errstate(divide='ignore', invalid='ignore'):
1016 |                 res = (vf_nonzero - mu) / np.sqrt(mu + mu**2 / fit_params[0, :])
1017 |             self.dataset.normalized_vf = np.zeros_like(self.dataset.vf)
1018 |             self.dataset.normalized_vf[self.dataset.vf_norm > 0] = np.nan_to_num(res)
1019 |         return
1020 |     
1021 |     def normalize_vectors(self, use_expanded_vectors=False, normalize_gene=False, normalize_vector=False, normalize_median=False, size_after_normalization=1e4, log_transform=False, scale=False):
1022 |         """
1023 |         Normalize and regularize vectors
1024 | 
1025 |         :param use_expanded_vectors: If True, use averaged vectors nearby local maxima of the vector field.
1026 |         :type use_expanded_vectors: bool
1027 |         :param normalize_gene: If True, normalize vectors by sum of each gene expression across all vectors.
1028 |         :type normalize_gene: bool
1029 |         :param normalize_vector: If True, normalize vectors by sum of all gene expression of each vector.
1030 |         :type normalize_vector: bool
1031 |         :param log_transform: If True, vectors are log transformed.
1032 |         :type log_transform: bool
1033 |         :param scale: If True, vectors are z-scaled (mean centered and scaled by stdev).
1034 |         :type scale: bool
1035 |         """
1036 |         if use_expanded_vectors:
1037 |             vec = np.array(self.dataset.expanded_vectors, copy=True)
1038 |         else:
1039 |             vec = np.array(self.dataset.vf[self.dataset.local_maxs], copy=True)
1040 |         if normalize_gene:
1041 |             vec = preprocessing.normalize(vec, norm=norm, axis=0) * size_after_normalization  # Normalize per gene
1042 |         if normalize_vector:
1043 |             vec = preprocessing.normalize(vec, norm="l1", axis=1) * size_after_normalization # Normalize per vector
1044 |         if normalize_median:
1045 |             def n(v):
1046 |                 s, m = np.sum(v, axis=1), np.median(v, axis=1)
1047 |                 s[m > 0] = s[m > 0] / m[m > 0]
1048 |                 s[m == 0] = 0
1049 |                 v[s > 0] = v[s > 0] / s[s > 0][:, np.newaxis]
1050 |                 v[v == 0] = 0
1051 |                 return v
1052 |             vec = n(vec)
1053 |         if log_transform:
1054 |             vec = np.log2(vec + 1)
1055 |         if scale:
1056 |             vec = preprocessing.scale(vec)
1057 |         self.dataset.normalized_vectors = vec
1058 |         return
1059 |     
1060 |     def __correct_cluster_labels(self, cluster_labels, centroid_correction_threshold):
1061 |         new_labels = np.array(cluster_labels, copy=True)
1062 |         if centroid_correction_threshold < 1.0:
1063 |             for cidx in np.unique(cluster_labels):
1064 |                 if cidx == -1:
1065 |                     continue
1066 |                 prev_midx = -1
1067 |                 while True:
1068 |                     vecs = self.dataset.normalized_vectors[new_labels == cidx]
1069 |                     vindices = np.where(new_labels == cidx)[0]
1070 |                     midx = vindices[np.argmin(np.sum(cdist(vecs, vecs), axis=0))]
1071 |                     if midx == prev_midx:
1072 |                         break
1073 |                     prev_midx = midx
1074 |                     m = self.dataset.normalized_vectors[midx]
1075 |                     for vidx, v in zip(vindices, vecs):
1076 |                         if corr(v, m) < centroid_correction_threshold:
1077 |                             new_labels[vidx] = -1
1078 |         return new_labels
1079 | 
1080 |     def __calc_centroid(self, cluster_labels):
1081 |         centroids = []
1082 |         centroids_stdev = []
1083 |         #medoids = []
1084 |         for lbl in sorted(list(set(cluster_labels))):
1085 |             if lbl == -1:
1086 |                 continue
1087 |             cl_vecs = self.dataset.normalized_vectors[cluster_labels == lbl, :]
1088 |             #cl_dists = scipy.spatial.distance.cdist(cl_vecs, cl_vecs, metric)
1089 |             #medoid = cl_vecs[np.argmin(np.sum(cl_dists, axis=0))]
1090 |             centroid = np.mean(cl_vecs, axis=0)
1091 |             centroid_stdev = np.std(cl_vecs, axis=0)
1092 |             #medoids.append(medoid)
1093 |             centroids.append(centroid)
1094 |             centroids_stdev.append(centroid_stdev)
1095 |         return centroids, centroids_stdev#, medoids
1096 | 
1097 |     def cluster_vectors(self, pca_dims=10, min_cluster_size=0, resolution=0.6, prune=1.0/15.0, snn_neighbors=30, max_correlation=1.0,
1098 |                         metric="correlation", subclustering=False, dbscan_eps=0.4, centroid_correction_threshold=0.8, random_state=0):
1099 |         """
1100 |         Cluster the given vectors using the specified clustering method.
1101 | 
1102 |         :param pca_dims: Number of principal componants used for clustering.
1103 |         :type pca_dims: int
1104 |         :param min_cluster_size: Set minimum cluster size.
1105 |         :type min_cluster_size: int
1106 |         :param resolution: Resolution for Louvain community detection.
1107 |         :type resolution: float
1108 |         :param prune: Threshold for Jaccard index (weight of SNN network). If it is smaller than prune, it is set to zero.
1109 |         :type prune: float
1110 |         :param snn_neighbors: Number of neighbors for SNN network.
1111 |         :type snn_neighbors: int
1112 |         :param max_correlation: Clusters with higher correlation to this value will be merged.
1113 |         :type max_correlation: bool
1114 |         :param metric: Metric for calculation of distance between vectors in gene expression space.
1115 |         :type metric: str
1116 |         :param subclustering: If True, each cluster will be clustered once again with DBSCAN algorithm to find more subclusters.
1117 |         :type subclustering: bool
1118 |         :param centroid_correction_threshold: Centroid will be recalculated with the vectors
1119 |             which have the correlation to the cluster medoid equal or higher than this value.
1120 |         :type centroid_correction_threshold: float
1121 |         :param random_state: Random seed or scikit-learn's random state object to replicate the same result
1122 |         :type random_state: int or random state object
1123 |         """
1124 |         
1125 |         vecs_normalized = self.dataset.normalized_vectors
1126 |         vecs_normalized_dimreduced = PCA(n_components=pca_dims, random_state=random_state).fit_transform(vecs_normalized)
1127 | 
1128 |         def cluster_vecs(vecs):
1129 |             k = min(snn_neighbors, vecs.shape[0])
1130 |             knn_graph = kneighbors_graph(vecs, k, mode='connectivity', include_self=True, metric=metric).todense()
1131 |             intersections = np.dot(knn_graph, knn_graph.T)
1132 |             snn_graph = intersections / (k + (k - intersections)) # borrowed from Seurat
1133 |             snn_graph[snn_graph < prune] = 0
1134 |             G = nx.from_numpy_matrix(snn_graph)
1135 |             partition = community.best_partition(G, resolution=resolution, random_state=random_state)
1136 |             lbls = np.array(list(partition.values()))
1137 |             return lbls
1138 | 
1139 |         def remove_small_clusters(lbls, lbls2=None):
1140 |             small_clusters = []
1141 |             cluster_indices = []
1142 |             lbls = np.array(lbls)
1143 |             for lbl in np.unique(lbls):
1144 |                 if lbl == -1:
1145 |                     continue
1146 |                 cnt = np.sum(lbls == lbl)
1147 |                 if cnt < min_cluster_size:
1148 |                     small_clusters.append(lbl)
1149 |                 else:
1150 |                     cluster_indices.append(lbl)
1151 |             for lbl in small_clusters:
1152 |                 lbls[lbls == lbl] = -1
1153 |             tmp = np.array(lbls, copy=True)
1154 |             for i, idx in enumerate(cluster_indices):
1155 |                 lbls[tmp == idx] = i
1156 |             if lbls2 is not None:
1157 |                 for lbl in small_clusters:
1158 |                     lbls2[lbls2 == lbl] = -1
1159 |                 tmp = np.array(lbls2, copy=True)
1160 |                 for i, idx in enumerate(cluster_indices):
1161 |                     lbls2[tmp == idx] = i
1162 |                 return lbls, lbls2
1163 |             else:
1164 |                 return lbls
1165 |         
1166 |         if subclustering:
1167 |             super_lbls = cluster_vecs(vecs_normalized_dimreduced)
1168 |             dbscan = DBSCAN(eps=dbscan_eps, min_samples=min_cluster_size, metric=metric)
1169 |             all_lbls = np.zeros_like(super_lbls)
1170 |             global_lbl_idx = 0
1171 |             for super_lbl in set(list(super_lbls)):
1172 |                 super_lbl_idx = np.where(super_lbls == super_lbl)[0]
1173 |                 if super_lbl == -1:
1174 |                     all_lbls[super_lbl_idx] = -1
1175 |                     continue
1176 |                 sub_lbls = dbscan.fit(vecs_normalized_dimreduced[super_lbl_idx]).labels_
1177 |                 for sub_lbl in set(list(sub_lbls)):
1178 |                     if sub_lbl == -1:
1179 |                         all_lbls[tuple([super_lbl_idx[sub_lbls == sub_lbl]])] = -1
1180 |                         continue
1181 |                     all_lbls[tuple([super_lbl_idx[sub_lbls == sub_lbl]])] = global_lbl_idx
1182 |                     global_lbl_idx += 1
1183 |         else:
1184 |             all_lbls = cluster_vecs(vecs_normalized_dimreduced)            
1185 |                 
1186 |         new_labels = self.__correct_cluster_labels(all_lbls, centroid_correction_threshold)
1187 |         new_labels, all_lbls = remove_small_clusters(new_labels, all_lbls)
1188 |         centroids, centroids_stdev = self.__calc_centroid(new_labels)
1189 | 
1190 |         merge_candidates = []
1191 |         if max_correlation < 1.0:
1192 |             Z = scipy.cluster.hierarchy.linkage(centroids, metric='correlation')
1193 |             clbls = scipy.cluster.hierarchy.fcluster(Z, 1 - max_correlation, 'distance')
1194 |             for i in set(clbls):
1195 |                 leaf_indices = np.where(clbls == i)[0]
1196 |                 if len(leaf_indices) > 1:
1197 |                     merge_candidates.append(leaf_indices)
1198 |             removed_indices = []
1199 |             for cand in merge_candidates:
1200 |                 for i in cand[1:]:
1201 |                     all_lbls[all_lbls == i] = cand[0]
1202 |                     removed_indices.append(i)
1203 |             for i in sorted(removed_indices, reverse=True):
1204 |                 all_lbls[all_lbls > i] -= 1
1205 | 
1206 |             new_labels = self.__correct_cluster_labels(all_lbls, centroid_correction_threshold)
1207 |             new_labels, all_lbls = remove_small_clusters(new_labels, all_lbls)
1208 |             centroids, centroids_stdev = self.__calc_centroid(new_labels)
1209 |                 
1210 |         self.dataset.cluster_labels = all_lbls
1211 |         self.dataset.filtered_cluster_labels = new_labels
1212 |         self.dataset.centroids = np.array(centroids)
1213 |         self.dataset.centroids_stdev = np.array(centroids_stdev)
1214 |         #self.dataset.medoids = np.array(medoids)
1215 |         
1216 |         self.__m__("Found %d clusters"%len(centroids))
1217 |         return
1218 | 
1219 |     def rescue_cluster(self, gene_names, expression_thresholds=[]):
1220 |         assert len(gene_names) > 0
1221 |         assert len(expression_thresholds) == 0 or len(gene_names) == len(expression_thresholds)
1222 | 
1223 |         expression_thresholds = list(expression_thresholds)
1224 |         lm_vectors = self.dataset.vf[self.dataset.local_maxs[0], self.dataset.local_maxs[1], self.dataset.local_maxs[2], :]
1225 |         lm_mask = np.ones(len(lm_vectors), dtype=bool)
1226 |         for i in range(len(gene_names)):
1227 |             rg_idx = self.dataset.genes.index(gene_names[i])
1228 |             if len(expression_thresholds) == 0:
1229 |                 expression_threshold = filters.threshold_otsu(self.dataset.vf[..., rg_idx])
1230 |             else:
1231 |                 expression_threshold = float(expression_thresholds[i])
1232 |             lm_mask = np.logical_and(lm_mask, lm_vectors[:, rg_idx] > expression_threshold)
1233 | 
1234 |         rg_vectors = lm_vectors[lm_mask]
1235 |         rg_centroid = np.mean(rg_vectors, axis=0)
1236 |         rg_centroid_stdev = np.std(rg_vectors, axis=0)
1237 | 
1238 |         self.dataset.cluster_labels[lm_mask] = len(self.dataset.centroids)
1239 |         self.dataset.filtered_cluster_labels[lm_mask] = len(self.dataset.centroids)
1240 |         self.dataset.centroids = np.append(self.dataset.centroids, [rg_centroid], axis=0)
1241 |         self.dataset.centroids_stdev = np.append(self.dataset.centroids_stdev, [rg_centroid_stdev], axis=0)
1242 |     
1243 |     def exclude_and_merge_clusters(self, exclude=[], merge=[], centroid_correction_threshold=0.8):
1244 |         """
1245 |         Exclude bad clusters (including the vectors in the clusters), and merge similar clusters for the downstream analysis.
1246 | 
1247 |         :param exclude: List of cluster indices to be excluded.
1248 |         :type exclude: list(int)
1249 |         :param merge: List of list of cluster indices to be merged.
1250 |         :type merge: list(list(int))
1251 |         :param centroid_correction_threshold: Centroid will be recalculated with the vectors
1252 |             which have the correlation to the cluster medoid equal or higher than this value.
1253 |         :type centroid_correction_threshold: float
1254 |         """
1255 |         exclude = list(exclude)
1256 |         merge = np.array(merge)
1257 |         for centroids in merge:
1258 |             centroids = np.unique(centroids)
1259 |             for centroid in centroids[1:][::-1]:
1260 |                 self.dataset.cluster_labels[self.dataset.cluster_labels == centroid] = centroids[0]
1261 |                 exclude.append(centroid)
1262 |         exclude = sorted(exclude)
1263 |         
1264 |         mask = np.ones(len(self.dataset.centroids), np.bool)
1265 |         mask[exclude] = False
1266 | 
1267 |         #self.dataset.centroids = self.dataset.centroids[mask]
1268 |         #self.dataset.centroids_stdev = self.dataset.centroids_stdev[mask]
1269 |         #self.dataset.medoids = self.dataset.medoids[mask]
1270 | 
1271 |         mask = np.ones(len(self.dataset.cluster_labels), np.bool)
1272 |         for centroid in exclude:
1273 |             # There will be no vectors for already merged centroids - so there is no problem
1274 |             mask[np.array(self.dataset.cluster_labels) == centroid] = False
1275 |         self.dataset.cluster_labels = self.dataset.cluster_labels[mask]
1276 |         self.dataset.local_maxs = tuple([lm[mask] for lm in self.dataset.local_maxs])
1277 |         
1278 |         for centroid in exclude[::-1]:
1279 |             self.dataset.cluster_labels[self.dataset.cluster_labels > centroid] -= 1
1280 |         self.dataset.normalized_vectors = self.dataset.normalized_vectors[mask, :]
1281 |         
1282 |         new_labels = self.__correct_cluster_labels(self.dataset.cluster_labels, centroid_correction_threshold)
1283 |         centroids, centroids_stdev = self.__calc_centroid(new_labels)
1284 |         
1285 |         self.dataset.centroids = centroids
1286 |         self.dataset.centroids_stdev = centroids_stdev
1287 |         self.dataset.filtered_cluster_labels = new_labels
1288 |         
1289 |         return
1290 |     
1291 |     def map_celltypes(self, centroids=None):
1292 |         """
1293 |         Create correlation maps between the centroids and the vector field.
1294 |         Each correlation map corresponds each cell type map.
1295 | 
1296 |         :param centroids: If given, map celltypes with the given cluster centroids.
1297 |         :type centroids: list(np.array(int))
1298 |         """
1299 |         
1300 |         if self.dataset.normalized_vf is None:
1301 |             normalized_vf = self.dataset.vf
1302 |         else:
1303 |             normalized_vf = self.dataset.normalized_vf
1304 | 
1305 |         if centroids is None:
1306 |             centroids = self.dataset.centroids
1307 |         else:
1308 |             self.dataset.centroids = centroids
1309 |             
1310 |         max_corr = np.zeros_like(self.dataset.vf_norm) - 1 # range from -1 to +1
1311 |         max_corr_idx = np.zeros_like(self.dataset.vf_norm, dtype=int) - 1 # -1 for background
1312 |         for cidx, centroid in enumerate(centroids):
1313 |             ctmap = calc_ctmap(centroid, normalized_vf, self.ncores)
1314 |             ctmap = np.nan_to_num(ctmap)
1315 |             mask = max_corr < ctmap
1316 |             max_corr[mask] = ctmap[mask]
1317 |             max_corr_idx[mask] = cidx
1318 |         self.dataset.max_correlations = max_corr
1319 |         self.dataset.celltype_maps = max_corr_idx
1320 |         return
1321 | 
1322 |     def filter_celltypemaps(self, min_r=0.6, min_norm=0.1, fill_blobs=True, min_blob_area=0, filter_params={}, output_mask=None):
1323 |         """
1324 |         Post-filter cell type maps created by `map_celltypes`.
1325 | 
1326 |         :param min_r: minimum threshold of the correlation.
1327 |         :type min_r: float
1328 |         :param min_norm: minimum threshold of the vector norm.
1329 |             If a string is given instead, then the threshold is automatically determined using
1330 |             sklearn's `threshold filter functions <https://scikit-image.org/docs/dev/api/skimage.filters.html>`_ (The functions start with `threshold_`).
1331 |         :type min_norm: str or float
1332 |         :param fill_blobs: If true, then the algorithm automatically fill holes in each blob.
1333 |         :type fill_blobs: bool
1334 |         :param min_blob_area: The blobs with its area less than this value will be removed.
1335 |         :type min_blob_area: int
1336 |         :param filter_params: Filter parameters used for the sklearn's threshold filter functions.
1337 |             Not used when `min_norm` is float.
1338 |         :type filter_params: dict
1339 |         :param output_mask: If given, the cell type maps will be filtered using the output mask.
1340 |         :type output_mask: np.ndarray(bool)
1341 |         """
1342 | 
1343 |         if isinstance(min_norm, str):
1344 |             # filter_params dict will be used for kwd params for filter_* functions.
1345 |             # some functions doesn't support param 'offset', therefore temporariliy remove it from here
1346 |             filter_offset = filter_params.pop('offset', 0)
1347 |         
1348 |         filtered_ctmaps = np.zeros_like(self.dataset.celltype_maps) - 1
1349 |         mask = np.zeros_like(self.dataset.vf_norm, dtype=bool)
1350 |         for cidx in range(len(self.dataset.centroids)):
1351 |             ctcorr = self.dataset.get_celltype_correlation(cidx)
1352 |             if isinstance(min_norm, str):
1353 |                 for z in range(self.dataset.shape[2]):
1354 |                     if min_norm in ["local", "niblack", "sauvola", "localotsu"]:
1355 |                         im = np.zeros(self.dataset.vf_norm.shape[:-1])
1356 |                         im[ctcorr[..., z] > min_r] = self.dataset.vf_norm[..., z][ctcorr[..., z] > min_r]
1357 |                     if min_norm == "localotsu":
1358 |                         max_norm = np.max(im)
1359 |                         im /= max_norm
1360 |                         selem = disk(filter_params['radius'])
1361 |                         min_norm_cut = filters.rank.otsu(im, selem) * max_norm
1362 |                     else:
1363 |                         filter_func = getattr(filters, "threshold_" + min_norm)
1364 |                         if min_norm in ["local", "niblack", "sauvola"]:
1365 |                             min_norm_cut = filter_func(im, **filter_params)
1366 |                         else:
1367 |                             highr_norm = self.dataset.vf_norm[..., z][ctcorr[..., z] > min_r]
1368 |                             #sigma = np.std(highr_norm)
1369 |                             if len(highr_norm) == 0 or np.max(highr_norm) == np.min(highr_norm):
1370 |                                 min_norm_cut = np.max(self.dataset.vf_norm)
1371 |                             else:
1372 |                                 min_norm_cut = filter_func(highr_norm, **filter_params)
1373 |                     min_norm_cut += filter_offset # manually apply filter offset
1374 |                     mask[..., z][np.logical_and(self.dataset.vf_norm[..., z] > min_norm_cut, ctcorr[..., z] > min_r)] = 1
1375 |             else:
1376 |                 mask[np.logical_and(self.dataset.vf_norm > min_norm, ctcorr > min_r)] = 1
1377 | 
1378 |             if min_blob_area > 0 or fill_blobs:
1379 |                 blob_labels = measure.label(mask, background=0)
1380 |                 for bp in measure.regionprops(blob_labels):
1381 |                     if min_blob_area > 0 and bp.filled_area < min_blob_area:
1382 |                         for c in bp.coords:
1383 |                             mask[c[0], c[1], c[2]] = 0 # fill with zeros
1384 |                             #mask[c[0], c[1]] = 0 # fill with zeros
1385 |                         continue
1386 |                     if fill_blobs and bp.area != bp.filled_area:
1387 |                         minx, miny, minz, maxx, maxy, maxz = bp.bbox
1388 |                         mask[minx:maxx, miny:maxy, minz:maxz] |= bp.filled_image
1389 |                         #minr, minc, maxr, maxc = bp.bbox
1390 |                         #mask[minr:maxr, minc:maxc] |= bp.filled_image
1391 |                 
1392 |             filtered_ctmaps[np.logical_and(mask == 1, np.logical_or(self.dataset.celltype_maps == -1, self.dataset.celltype_maps == cidx))] = cidx
1393 |         
1394 |         if isinstance(min_norm, str):
1395 |             # restore offset param
1396 |             filter_params['offset'] = filter_offset
1397 | 
1398 |         if output_mask is not None:
1399 |             filtered_ctmaps[~output_mask.astype(bool)] = -1
1400 |         self.dataset.filtered_celltype_maps = filtered_ctmaps
1401 |         
1402 |     def bin_celltypemaps(self, step=10, radius=100):
1403 |         """
1404 |         Sweep a sphere window along a lattice on the image, and count the number of cell types in each window.
1405 | 
1406 |         :param step: The lattice spacing.
1407 |         :type step: int
1408 |         :param radius: The radius of the sphere window.
1409 |         :type radius: int
1410 |         """
1411 |         def make_sphere_mask(radius):
1412 |             dia = radius*2+1
1413 |             X, Y, Z = np.ogrid[:dia, :dia, :dia]
1414 |             dist_from_center = np.sqrt((X - radius)**2 + (Y - radius)**2 + (Z - radius)**2)
1415 |             mask = dist_from_center <= radius
1416 |             return mask
1417 | 
1418 |         centers = np.array(self.dataset.vf_norm.shape) // 2
1419 |         steps = np.array(np.floor(centers / step) * 2 + np.array(self.dataset.vf_norm.shape) % 2, dtype=int)
1420 |         starts = centers - step * np.floor(centers / step)
1421 |         ends = starts + steps * step
1422 |         X, Y, Z = [np.arange(s, e, step, dtype=int) for s, e in zip(starts, ends)]
1423 | 
1424 |         ct_centers = np.zeros([len(X), len(Y), len(Z)], dtype=int)
1425 |         ct_counts = np.zeros([len(X), len(Y), len(Z), len(self.dataset.centroids)], dtype=int)
1426 | 
1427 |         ncelltypes = np.max(self.dataset.filtered_celltype_maps) + 1
1428 |         cnt_matrix = np.zeros([ncelltypes, ncelltypes])
1429 |         sphere_mask = make_sphere_mask(radius)
1430 | 
1431 |         for xidx, x in enumerate(X):
1432 |             for yidx, y in enumerate(Y):
1433 |                 for zidx, z in enumerate(Z):
1434 |                     mask_slices = [slice(0, radius*2+1), slice(0, radius*2+1), slice(0, radius*2+1)]
1435 |                     s = [x - radius,     y - radius,     z - radius    ]
1436 |                     e = [x + radius + 1, y + radius + 1, z + radius + 1]
1437 | 
1438 |                     for ms_idx, ms in enumerate(s):
1439 |                         if ms < 0:
1440 |                             mask_slices[ms_idx] = slice(abs(ms), mask_slices[ms_idx].stop)
1441 |                             s[ms_idx] = 0
1442 |                     for me_idx, me in enumerate(e):
1443 |                         ctmap_size = self.dataset.filtered_celltype_maps.shape[me_idx]
1444 |                         #ctmap_size = 50
1445 |                         if me > ctmap_size:
1446 |                             mask_slices[me_idx] = slice(mask_slices[me_idx].start, (radius * 2 + 1) + ctmap_size - me)
1447 |                             e[me_idx] = ctmap_size
1448 | 
1449 |                     w = self.dataset.filtered_celltype_maps[s[0]:e[0],
1450 |                                                             s[1]:e[1],
1451 |                                                             s[2]:e[2]][sphere_mask[tuple(mask_slices)]] + 1
1452 | 
1453 |                     ct_centers[xidx, yidx, zidx] = self.dataset.filtered_celltype_maps[x, y, z]
1454 |                     ct_counts[xidx, yidx, zidx] = np.bincount(np.ravel(w), minlength=len(self.dataset.centroids) + 1)[1:]
1455 |                     
1456 |         self.dataset.celltype_binned_centers = ct_centers
1457 |         self.dataset.celltype_binned_counts = ct_counts
1458 |         return
1459 |         
1460 |     def find_domains(self, centroid_indices=[], n_clusters=10, norm_thres=0, merge_thres=0.6, merge_remote=True):
1461 |         """
1462 |         Find domains in the image, using the result of `bin_celltypemaps`.
1463 | 
1464 |         :param centroid_indices: The indices of centroids which will be used for determine tissue domains.
1465 |         :type centroid_indices: list(int)
1466 |         :param n_clusters: Initial number of clusters (domains) of agglomerative clustering.
1467 |         :type n_clusters: int
1468 |         :param norm_thres: Threshold for the total number of cell types in each window.
1469 |             The window which contains the number of cell-type pixels less than this value will be ignored.
1470 |         :type norm_thres: int
1471 |         :param merge_thres: Threshold for merging domains. The centroids of the domains
1472 |             which have higher correlation to this value will be merged.
1473 |         :type merge_thres: float
1474 |         :param merge_remote: If true, allow merging clusters that are not adjacent to each other.
1475 |         :type merge_remote: bool
1476 |         """
1477 |         def find_neighbors(m, l):
1478 |             neighbors = set()
1479 |             for x, y, z in zip(*np.where(m == l)):
1480 |                 neighbors.add(m[x - 1, y    , z    ])
1481 |                 neighbors.add(m[x + 1, y    , z    ])
1482 |                 neighbors.add(m[x    , y - 1, z    ])
1483 |                 neighbors.add(m[x    , y + 1, z    ])
1484 |                 neighbors.add(m[x    , y    , z - 1])
1485 |                 neighbors.add(m[x    , y    , z + 1])
1486 |                 neighbors.add(m[x - 1, y - 1, z    ])
1487 |                 neighbors.add(m[x + 1, y - 1, z    ])
1488 |                 neighbors.add(m[x - 1, y + 1, z    ])
1489 |                 neighbors.add(m[x + 1, y + 1, z    ])
1490 |                 neighbors.add(m[x - 1, y    , z - 1])
1491 |                 neighbors.add(m[x + 1, y    , z - 1])
1492 |                 neighbors.add(m[x - 1, y    , z + 1])
1493 |                 neighbors.add(m[x + 1, y    , z + 1])
1494 |                 neighbors.add(m[x    , y - 1, z - 1])
1495 |                 neighbors.add(m[x    , y + 1, z - 1])
1496 |                 neighbors.add(m[x    , y - 1, z + 1])
1497 |                 neighbors.add(m[x    , y + 1, z + 1])
1498 |                 neighbors.add(m[x - 1, y - 1, z - 1])
1499 |                 neighbors.add(m[x + 1, y - 1, z - 1])
1500 |                 neighbors.add(m[x - 1, y - 1, z + 1])
1501 |                 neighbors.add(m[x + 1, y - 1, z + 1])
1502 |                 neighbors.add(m[x - 1, y + 1, z - 1])
1503 |                 neighbors.add(m[x + 1, y + 1, z - 1])
1504 |                 neighbors.add(m[x - 1, y + 1, z + 1])
1505 |                 neighbors.add(m[x + 1, y + 1, z + 1])
1506 |             return neighbors
1507 |         
1508 |         if self.dataset.celltype_binned_counts is None:
1509 |             raise AssertionError("Run 'bin_celltypemap()' method first!")
1510 | 
1511 |         if len(centroid_indices) > 0:
1512 |             binned_ctmaps = self.dataset.celltype_binned_counts[..., centroid_indices]
1513 |         else:
1514 |             binned_ctmaps = self.dataset.celltype_binned_counts
1515 | 
1516 |         binned_ctmaps_norm = np.sum(binned_ctmaps, axis=3)
1517 | 
1518 |         ctvf_vecs = binned_ctmaps[binned_ctmaps_norm > norm_thres]
1519 |         ctvf_vecs_normalized = preprocessing.normalize(ctvf_vecs, norm='l1', axis=1)
1520 | 
1521 |         clustering = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward', affinity='euclidean').fit(ctvf_vecs_normalized)
1522 |         labels_predicted = clustering.labels_ + 1
1523 |         
1524 |         layer_map = np.zeros(binned_ctmaps_norm.shape)
1525 |         layer_map[binned_ctmaps_norm > norm_thres] = labels_predicted
1526 |         layer_map = measure.label(layer_map)
1527 |         
1528 |         if merge_thres < 1.0:
1529 |             while True:
1530 |                 uniq_labels = np.array(list(set(list(np.ravel(layer_map))) - set([0])))
1531 |                 if not merge_remote:
1532 |                     layer_map_padded = np.pad(layer_map, 1, mode='constant', constant_values=0)
1533 |                     neighbors_dic = {}
1534 |                     for lbl in uniq_labels:
1535 |                         neighbors_dic[lbl] = find_neighbors(layer_map_padded, lbl)
1536 |                 cluster_centroids = []
1537 |                 for lbl in uniq_labels:
1538 |                     cluster_centroids.append(np.mean(binned_ctmaps[layer_map == lbl], axis=0))
1539 |                 max_corr = 0
1540 |                 #max_corr_indices = (0, 0, )
1541 |                 for i in range(len(uniq_labels)):
1542 |                     for j in range(i+1, len(uniq_labels)):
1543 |                         lbl_i, lbl_j = uniq_labels[i], uniq_labels[j]
1544 |                         if lbl_i == 0 or lbl_j == 0:
1545 |                             continue
1546 |                         corr_ij = corr(cluster_centroids[i], cluster_centroids[j])
1547 |                         if corr_ij > max_corr and (merge_remote or lbl_j in neighbors_dic[lbl_i]):
1548 |                             max_corr = corr_ij
1549 |                             max_corr_indices = (lbl_i, lbl_j, )
1550 |                 if max_corr > merge_thres:
1551 |                     layer_map[layer_map == max_corr_indices[1]] = max_corr_indices[0]
1552 |                 else:
1553 |                     break
1554 | 
1555 |         """
1556 |         if min_size > 0:
1557 |             labeled_layer_map = measure.label(layer_map)
1558 |             labeled_layer_map_padded = np.pad(labeled_layer_map, 1, mode='constant', constant_values=0)
1559 |             for prop in measure.regionprops(labeled_layer_map):
1560 |                 if prop.area < min_size:
1561 |                     find_neighbors(layer_map_padded, )
1562 |         """
1563 | 
1564 |         uniq_labels = sorted(set(list(np.ravel(layer_map))) - set([0]))
1565 |         for i, lbl in enumerate(uniq_labels, start=1):
1566 |             layer_map[layer_map == lbl] = i
1567 |         
1568 |         resized_layer_map = zoom(layer_map, np.array(self.dataset.vf_norm.shape)/np.array(layer_map.shape), order=0) - 1
1569 |         resized_layer_map2 = np.array(resized_layer_map, copy=True)
1570 |         resized_layer_map2[self.dataset.filtered_celltype_maps == -1] = -1
1571 |         
1572 |         self.dataset.inferred_domains = resized_layer_map
1573 |         self.dataset.inferred_domains_cells = resized_layer_map2
1574 |      
1575 |     def exclude_and_merge_domains(self, exclude=[], merge=[]):
1576 |         """
1577 |         Manually exclude or merge domains.
1578 | 
1579 |         :param exclude: Indices of the domains which will be excluded.
1580 |         :type exclude: list(int)
1581 |         :param merge: List of indices of the domains which will be merged.
1582 |         :type merge: list(list(int))
1583 |         """
1584 |         for i in exclude:
1585 |             self.dataset.inferred_domains[self.dataset.inferred_domains == i] = -1
1586 |             self.dataset.inferred_domains_cells[self.dataset.inferred_domains_cells == i] = -1
1587 |             
1588 |         for i in merge:
1589 |             for j in i[1:]:
1590 |                 self.dataset.inferred_domains[self.dataset.inferred_domains == j] = i[0]
1591 |                 self.dataset.inferred_domains_cells[self.dataset.inferred_domains_cells == j] = i[0]
1592 | 
1593 |         uniq_indices = np.unique(self.dataset.inferred_domains_cells)
1594 |         if -1 in uniq_indices:
1595 |             uniq_indices = uniq_indices[1:]
1596 |             
1597 |         for new_idx, i in enumerate(uniq_indices):
1598 |             self.dataset.inferred_domains[self.dataset.inferred_domains == i] = new_idx
1599 |             self.dataset.inferred_domains_cells[self.dataset.inferred_domains_cells == i] = new_idx
1600 | 
1601 |     def calc_cell_type_compositions(self):
1602 |         """
1603 |         Calculate cell type compositions in each domain.
1604 |         """
1605 |         cell_type_compositions = []
1606 |         for i in range(np.max(self.dataset.inferred_domains) + 1):
1607 |             counts = np.bincount(self.dataset.filtered_celltype_maps[self.dataset.inferred_domains == i] + 1, minlength=len(self.dataset.centroids) + 1)
1608 |             cell_type_compositions.append(counts[1:])
1609 |         
1610 |         masked_ctmap = self.dataset.filtered_celltype_maps[self.dataset.filtered_celltype_maps != -1]
1611 |         counts_all = np.array(np.bincount(masked_ctmap, minlength=len(self.dataset.centroids)), dtype=float)
1612 |         cell_type_compositions.append(counts_all) # Add proportion from the whole tissue
1613 |         cell_type_compositions = preprocessing.normalize(cell_type_compositions, axis=1, norm='l1')
1614 |         self.dataset.inferred_domains_compositions = cell_type_compositions
1615 |         
1616 |         
1617 |     def calc_spatial_relationship(self):
1618 |         """
1619 |         Calculate spatial relationship between the domains using the result of `bin_celltypemap`.
1620 |         """
1621 |         if self.dataset.celltype_binned_counts is None:
1622 |             raise AssertionError("Run 'bin_celltypemap()' method first!")
1623 |             
1624 |         ct_centers = self.dataset.celltype_binned_centers
1625 |         
1626 |         sparel = np.zeros([len(self.dataset.centroids), len(self.dataset.centroids)])
1627 |         for idx in np.unique(ct_centers):
1628 |             sparel[idx, :] = np.sum(self.dataset.celltype_binned_counts[ct_centers == idx], axis=0)
1629 | 
1630 |         self.dataset.spatial_relationships = preprocessing.normalize(sparel, axis=1, norm='l1')
1631 | 


--------------------------------------------------------------------------------