├── outline.png
├── docs
    ├── source
    │   ├── images
    │   │   ├── ecoprospector.png
    │   │   ├── selection_matrix_identity.png
    │   │   └── selection_matrix_propagule_pooling.png
    │   ├── content
    │   │   ├── micrm.rst
    │   │   ├── perturbation.rst
    │   │   ├── metacommunity.rst
    │   │   ├── community_function.rst
    │   │   ├── installation.rst
    │   │   ├── quickstart.rst
    │   │   ├── usertools.rst
    │   │   ├── selection_matrix.rst
    │   │   ├── protocol.rst
    │   │   └── mapping_file.rst
    │   ├── data
    │   │   └── input_test.csv
    │   ├── conf.py
    │   └── index.rst
    ├── Makefile
    └── make.bat
├── requirements.txt
├── commandline_tool
    ├── ecoprospector
    └── extract_species_function
├── setup.py
├── LICENSE
├── input_example.csv
├── .gitignore
├── README.md
└── community_selection
    ├── __init__.py
    ├── B_community_phenotypes.py
    ├── E_protocols.py
    ├── D_perturbation_algorithms.py
    ├── C_selection_algorithms.py
    ├── usertools.py
    └── A_experiment_functions.py


/outline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chang-Yu-Chang/ecoprospector/HEAD/outline.png


--------------------------------------------------------------------------------
/docs/source/images/ecoprospector.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chang-Yu-Chang/ecoprospector/HEAD/docs/source/images/ecoprospector.png


--------------------------------------------------------------------------------
/docs/source/images/selection_matrix_identity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chang-Yu-Chang/ecoprospector/HEAD/docs/source/images/selection_matrix_identity.png


--------------------------------------------------------------------------------
/docs/source/content/micrm.rst:
--------------------------------------------------------------------------------
1 | Microbial Consumer-Resource Model
2 | =================================
3 | 
4 | * Briefly explain MiCRM model and reference Bobby's paper.


--------------------------------------------------------------------------------
/docs/source/content/perturbation.rst:
--------------------------------------------------------------------------------
1 | Perturbation
2 | =============
3 | 
4 | * Describe how perturbations are carried out 
5 | * Where are the perturbations codeup 
6 | 


--------------------------------------------------------------------------------
/docs/source/images/selection_matrix_propagule_pooling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chang-Yu-Chang/ecoprospector/HEAD/docs/source/images/selection_matrix_propagule_pooling.png


--------------------------------------------------------------------------------
/docs/source/content/metacommunity.rst:
--------------------------------------------------------------------------------
1 | Metacommunity
2 | =============
3 | 
4 | * Metacommunity object inherited from community-simulator
5 | 
6 | * Describe how it is constructed from mapping file
7 | 
8 | * Describe what it contains (resource, community composition, species feature)
9 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | contourpy==1.1.0
 2 | cvxpy==1.3.2
 3 | cycler==0.11.0
 4 | ecos==2.0.12
 5 | fonttools==4.42.0
 6 | kiwisolver==1.4.4
 7 | matplotlib==3.7.2
 8 | numpy==1.25.2
 9 | osqp==0.6.3
10 | packaging==23.1
11 | pandas==1.2.0
12 | Pillow==10.0.0
13 | pyparsing==3.0.9
14 | python-dateutil==2.8.2
15 | pytz==2023.3
16 | qdldl==0.1.7.post0
17 | scipy==1.11.1
18 | scs==3.2.3
19 | six==1.16.0
20 | tzdata==2023.3
21 | 


--------------------------------------------------------------------------------
/docs/source/content/community_function.rst:
--------------------------------------------------------------------------------
 1 | Community Function
 2 | ===================
 3 | 
 4 | Six types of community function is currently available in ecoprospector, including
 5 | 
 6 | * Additive function ``f1_additive`` and ``f1a_additive``
 7 | * Non-additive, epistatic function ``f2_interaction`` ``f2a_interaction``
 8 | * Binary function ``f3_additive_binary`` ``f4_interaction_binary``
 9 | * Invader resistance ``f5_invader_suppression``
10 | * Resource minimization ``f6_target_resource``
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/commandline_tool/ecoprospector:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import os
 4 | from community_selection.usertools import *
 5 | 
 6 | input_csv = str(sys.argv[1]) # Input file name
 7 | row_number = int(sys.argv[2]) # Which row of experiment to run
 8 | 
 9 | assumptions = make_assumptions(input_csv, row_number)
10 | params, params_simulation , params_algorithm, plate = prepare_experiment(assumptions)
11 | simulate_community(params = params, params_simulation = params_simulation, params_algorithm = params_algorithm,plate = plate)
12 | save_plate(assumptions, plate)     #Save plate (will onlys save if assumptions specify that)
13 | 
14 | 


--------------------------------------------------------------------------------
/commandline_tool/extract_species_function:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import os
 4 | import pandas as pd
 5 | from community_selection.usertools import *
 6 | 
 7 | input_csv = str(sys.argv[1]) # Input file name
 8 | row_number = int(sys.argv[2]) # Which row of experiment to run
 9 | output_file_name = str(sys.argv[3])
10 | 
11 | assumptions = make_assumptions(input_csv, row_number)
12 | species_function = extract_species_function(assumptions)
13 | species_function.to_csv(output_file_name, index = False)
14 | print("\nGenerated per-capita species functions from: " + input_csv + "\nRow (0-based): " + str(row_number) + "\nOutput file name: " + output_file_name)
15 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from setuptools import setup
 5 | 
 6 | setup(name='ecoprospector',
 7 |       version='0.0.2',
 8 |       description='Simulate community selection protocols',
 9 |       url='https://github.com/Chang-Yu-Chang/ecoprospector',
10 |       author=['Chang-Yu Chang', 'Jean Villa'], 
11 |       author_email=['chang-yu.chang@yale.edu'],
12 |       license='MIT',
13 |       packages = ['community_selection'],
14 |       scripts = ['commandline_tool/ecoprospector', 'commandline_tool/extract_species_function'],
15 |       include_package_data = True,
16 |       package_data = {"": ["*.csv"]},
17 |       install_requires=["community-simulator@ git+https://github.com/Emergent-Behaviors-in-Biology/community-simulator.git@master"],
18 |       zip_safe=False)
19 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Chang-Yu Chang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docs/source/content/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | 
 5 | System requirement
 6 | ------------------
 7 | 
 8 | * Python 3.7.3
 9 | * `community-simulator <https://github.com/Emergent-Behaviors-in-Biology/community-simulator>`_
10 | * Scipy, Numpy, Pandas, Matplotlib, functools, itertools, random. CVXPY is not required for our simulations as batch-culture simulations do not use the steadystate method in community-simulator.
11 | 
12 | Ecoprospector package depends on [community-simulator](https://github.com/Emergent-Behaviors-in-Biology/community-simulator) (developed by the Mehta group and described in their [paper](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0230430)), which depends on Numpy, Pandas, Matplotlib, SciPy that are all included in Anaconda distribution. 
13 | 
14 | 
15 | | 
16 | 
17 | Install the development version
18 | -------------------------------
19 | 
20 | Clone the github repository  to a local directory ::
21 | 
22 |     $ git clone https://github.com/Chang-Yu-Chang/ecoprospector .
23 | 
24 | Then browse to the Ecoprospector directory and install package ::
25 | 
26 |     $ pip install -e .
27 | 


--------------------------------------------------------------------------------
/docs/source/content/quickstart.rst:
--------------------------------------------------------------------------------
 1 | Quick Start Guide 
 2 | =================
 3 | 
 4 | This page provides a quick start guide for using ecoprospector with an single input :code:`.csv` file. If you do not know how to configurate :code:`.csv` file, please read :ref:`Input Mapping File` for more details on each parameter.
 5 | 
 6 | You can execute a experiments in a command line to quickly run an experiment. For example in Terminal on Mac, enter
 7 | 
 8 | .. code-block:: bash
 9 | 
10 |     $ ecoprospector mapping_file.csv 0
11 | 
12 | Where mapping_file.csv is the input :code:`csv` file and i is the row (0-indexed) specifying the experiment to be run. 
13 | 
14 | You can also run the above code in python. The line above is equivalent as:
15 | 
16 | .. code-block:: python
17 | 
18 |     from community_selection import *
19 |     from community_selection.usertools import *
20 |     assumptions = make_assumptions("mapping_file.csv", 0)
21 |     params, params_simulation , params_algorithm, plate = prepare_experiment(assumptions)
22 |     simulate_community(params = params, params_simulation = params_simulation, params_algorithm = params_algorithm, plate = plate)
23 | 
24 | The functons are described in :ref:`User Tools`


--------------------------------------------------------------------------------
/input_example.csv:
--------------------------------------------------------------------------------
1 | selected_function,protocol,seed,exp_id,overwrite_plate,passage_overwrite_plate,output_dir,save_function,save_composition,save_plate,function_lograte,composition_lograte,scale,n_inoc,rich_medium,monoculture,dilution,n_wells,n_propagation,n_transfer,n_transfer_selection,metacommunity_sampling,power_alpha,lognormal_mean,lognormal_sd,phi_distribution,phi_mean,phi_sd,phi_lower,phi_upper,ruggedness,function_ratio,binary_threshold,g0,cost_distribution,cost_mean,cost_sd,cost_lower,cost_upper,invader_index,invader_sampling,invader_strength,target_resource,directed_selection,knock_out,knock_in,knock_in_threshold,bottleneck,bottleneck_size,migration,n_migration,s_migration,coalescence,frac_coalescence,resource_shift,r_type,r_percent,sampling,sn,sf,Sgen,rn,rf,R0_food,food,supply,muc,sigc,c0,c1,q,sparsity,fs,fw,g,w,l,m,n,response,sigma_max,regulation,nreg,tau,r,S
2 | f1_additive,simple_screening,1,f1_additive-simple_screening-1,NA,FALSE,./,TRUE,TRUE,FALSE,1,20,1000000,1.00E+06,TRUE,FALSE,0.001,6,1,10,5,Power,0.01,8,8,Norm,0,1,0,1,NA,1,1,1,Norm,0,0,0,1,2,Gamma,10,NA,FALSE,FALSE,FALSE,NA,FALSE,NA,FALSE,1000000,NA,FALSE,NA,FALSE,NA,NA,Binary_Gamma,2100,1,0,90,1,1000,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,0,NA,type III,1,NA,NA,NA,NA,NA
3 | f1_additive,simple_screening,1,f1_additive-monoculture-1,NA,FALSE,./,TRUE,TRUE,FALSE,1,20,1000000,1.00E+06,TRUE,TRUE,0.001,6,1,10,5,Power,0.01,8,8,Norm,0,1,0,1,NA,1,1,1,Norm,0,0,0,1,2,Gamma,10,NA,FALSE,FALSE,FALSE,NA,FALSE,NA,FALSE,1000000,NA,FALSE,NA,FALSE,NA,NA,Binary_Gamma,2100,1,0,90,1,1000,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,0,NA,type III,1,NA,NA,NA,NA,NA


--------------------------------------------------------------------------------
/docs/source/data/input_test.csv:
--------------------------------------------------------------------------------
1 | selected_function,protocol,seed,exp_id,overwrite_plate,passage_overwrite_plate,output_dir,save_function,save_composition,save_plate,function_lograte,composition_lograte,scale,n_inoc,rich_medium,monoculture,dilution,n_wells,n_propagation,n_transfer,n_transfer_selection,metacommunity_sampling,power_alpha,lognormal_mean,lognormal_sd,phi_distribution,phi_mean,phi_sd,phi_lower,phi_upper,ruggedness,function_ratio,binary_threshold,g0,cost_distribution,cost_mean,cost_sd,cost_lower,cost_upper,invader_index,invader_sampling,invader_strength,target_resource,directed_selection,knock_out,knock_in,knock_in_threshold,bottleneck,bottleneck_size,migration,n_migration,s_migration,coalescence,frac_coalescence,resource_shift,r_type,r_percent,sampling,sn,sf,Sgen,rn,rf,R0_food,food,supply,muc,sigc,c0,c1,q,sparsity,fs,fw,g,w,l,m,n,response,sigma_max,regulation,nreg,tau,r,S
2 | f1_additive,simple_screening,1,f1_additive-simple_screening-1,NA,False,/home/cc2553/project/community-selection/data/independent_f1_additive/,True,True,False,1,20,1000000,1e+06,TRUE,False,0.001,96,1,40,20,Power,0.01,8,8,Norm,0,1,0,1,NA,1,1,1,Norm,0,0,0,1,2,Gamma,10,NA,False,False,False,NA,False,NA,False,1000000,NA,False,NA,False,NA,NA,Binary_Gamma,2100,1,0,90,1,1000,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,0,NA,type III,1,NA,NA,NA,NA,NA
3 | f1_additive,simple_screening,1,f1_additive-monoculture-1,NA,False,/home/cc2553/project/community-selection/data/independent_f1_additive/,True,True,False,1,20,1000000,1e+06,TRUE,True,0.001,96,1,40,20,Power,0.01,8,8,Norm,0,1,0,1,NA,1,1,1,Norm,0,0,0,1,2,Gamma,10,NA,False,False,False,NA,False,NA,False,1000000,NA,False,NA,False,NA,NA,Binary_Gamma,2100,1,0,90,1,1000,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,0,NA,type III,1,NA,NA,NA,NA,NA
4 | 


--------------------------------------------------------------------------------
/docs/source/content/usertools.rst:
--------------------------------------------------------------------------------
 1 | User Tools
 2 | ==========
 3 | 
 4 | Main functions in ecoprospector
 5 | 
 6 | 
 7 | 
 8 | Make parameters
 9 | ---------------
10 | 
11 | .. code-block:: python
12 | 
13 |     make_assumptions(input_csv, row_number)
14 | 
15 | 
16 | .. confval:: input_csv
17 | 
18 |     :type: DataFrame
19 |     :default: ``input_csv``
20 | 
21 |     mapping csv file
22 | 
23 | .. confval:: row_number
24 | 
25 |     :type: integer
26 |     :default: ``0``
27 | 
28 |     The row number that specifies the experiment to run (0-indexed)
29 | 
30 | |
31 | 
32 | Prepare and set up expeirments
33 | ------------------------------
34 | 
35 | .. code-block:: python
36 | 
37 |     params, params_simulation , params_algorithm, plate = prepare_experiment(assumptions)
38 | 
39 | 
40 | .. confval:: assumptions
41 | 
42 |     :type: List
43 |     :default: ``assumptions``
44 | 
45 |     A comprehensive list read from the input csv file
46 | 
47 | 
48 | |
49 | 
50 | Simulate the protocol
51 | ----------------------
52 | 
53 | 
54 | .. code-block:: python
55 | 
56 |     simulate_community(params = params, params_simulation = params_simulation, params_algorithm = params_algorithm, plate = plate)
57 | 
58 | 
59 | .. confval:: params
60 | 
61 |     :type: List
62 |     :default: ``assumptions``
63 | 
64 |     A comprehensive list read from the input csv file
65 | 
66 | 
67 | .. confval:: params_simulation
68 | 
69 |     :type: List
70 |     :default: ``params_simulation``
71 | 
72 |     Parameters related to simulating batch culture 
73 | 
74 | .. confval:: params_algorithm
75 | 
76 |     :type: List
77 |     :default: ``params_algorithm``
78 | 
79 |     Parameters related to protocol, community function,  selection matrices, and
80 | 
81 | 
82 | .. confval:: plate
83 | 
84 |     :type: Metacommunity object
85 |     :default: ``plate``
86 | 
87 |     Object defined in this project 
88 |     
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # Folders
  7 | deprecated/*
  8 | tests/*
  9 | *.ipynb
 10 | Rreport/*
 11 | 
 12 | # R documents
 13 | *.Rproj
 14 | .Rhistory
 15 | *.Rproj.user
 16 | *.Rhistory
 17 | *.RData
 18 | *.Ruserdata
 19 | *.html
 20 | *.nb.html
 21 | .DS_Store
 22 | *.Rmd
 23 | 
 24 | # C extensions
 25 | *.so
 26 | 
 27 | # Distribution / packaging
 28 | .Python
 29 | build/
 30 | develop-eggs/
 31 | dist/
 32 | downloads/
 33 | eggs/
 34 | .eggs/
 35 | lib/
 36 | lib64/
 37 | parts/
 38 | sdist/
 39 | var/
 40 | wheels/
 41 | *.egg-info/
 42 | .installed.cfg
 43 | *.egg
 44 | MANIFEST
 45 | 
 46 | # PyInstaller
 47 | #  Usually these files are written by a python script from a template
 48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 49 | *.manifest
 50 | *.spec
 51 | 
 52 | # Installer logs
 53 | pip-log.txt
 54 | pip-delete-this-directory.txt
 55 | 
 56 | # Unit test / coverage reports
 57 | htmlcov/
 58 | .tox/
 59 | .coverage
 60 | .coverage.*
 61 | .cache
 62 | nosetests.xml
 63 | coverage.xml
 64 | *.cover
 65 | .hypothesis/
 66 | .pytest_cache/
 67 | 
 68 | # Translations
 69 | *.mo
 70 | *.pot
 71 | 
 72 | # Django stuff:
 73 | *.log
 74 | local_settings.py
 75 | db.sqlite3
 76 | 
 77 | # Flask stuff:
 78 | instance/
 79 | .webassets-cache
 80 | 
 81 | # Scrapy stuff:
 82 | .scrapy
 83 | 
 84 | # Sphinx documentation
 85 | docs/_build/
 86 | 
 87 | # PyBuilder
 88 | target/
 89 | 
 90 | # Jupyter Notebook
 91 | .ipynb_checkpoints
 92 | 
 93 | # pyenv
 94 | .python-version
 95 | 
 96 | # celery beat schedule file
 97 | celerybeat-schedule
 98 | 
 99 | # SageMath parsed files
100 | *.sage.py
101 | 
102 | # Environments
103 | .env
104 | .venv
105 | env/
106 | venv/
107 | ENV/
108 | env.bak/
109 | venv.bak/
110 | 
111 | # Spyder project settings
112 | .spyderproject
113 | .spyproject
114 | 
115 | # Rope project settings
116 | .ropeproject
117 | 
118 | # mkdocs documentation
119 | /site
120 | 
121 | # mypy
122 | .mypy_cache/
123 | .Rproj.user
124 | 


--------------------------------------------------------------------------------
/docs/source/content/selection_matrix.rst:
--------------------------------------------------------------------------------
 1 | Selection Matrix
 2 | ===================
 3 | 
 4 | What is a selection matrix?
 5 | ------------------------------------
 6 | A selection matrix is a map specifying how the parental communities are selected according to their function ranks, and how the selected communities are pooled or distributed to seed the offspring communities. It is a square matrix of size ``n_wells``. The columns are ranked parental communities and the rows are offspring communities. Each element in the selection matrix specifies the dilution factor used for the batch culture.
 7 | 
 8 | .. image:: ../images/selection_matrix_identity.png
 9 |    :width: 600
10 | 
11 | 
12 | The selection matrices allow us to standardize most strategies of artificial community selection, for example, propagule and migrant pool approaches, into a regular form.
13 | 
14 | .. image:: ../images/selection_matrix_propagule_pooling.png
15 |     :width: 600
16 | 
17 | 
18 | How does selection matrix work in ecoprospector?
19 | ----------------------------------------------------------------------
20 | 
21 | A selection matrix is written in the form of a Python function to accommodate a varied number of communities in different independent experiments. These functions take a vector of values (the default output of :ref:`Community Function` functions) as input. The selection matrix function will read the length of the input vector, and construct a selection matrix of that length. The selection matrix is then used to guide the passaging of the metacommunity.
22 | 
23 | A selection matrix must be defined during the simulation setup, i.e. stored in the ``C_selection_matrices.py``. During simulation, any particular selection matrix will be called according to :ref:`Selection Protocol`.
24 | 
25 | A library of selection matrices
26 | ----------------------------------------------------------------------
27 | 
28 | We saved all the predefined selection matrices in ``C_selection_matrices.py``. These selection matrices were adapted from the selection protocols in the prior empirical and theoretical studies.
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/docs/source/content/protocol.rst:
--------------------------------------------------------------------------------
 1 | Selection Protocol
 2 | ==================
 3 | 
 4 | What is a selection protocol?
 5 | ---------------------------------------------------
 6 | 
 7 | A selection protocol is a table that defines the selected function, selection regime, and the number of community generations.
 8 | 
 9 | How does a selection protocol in ecoprospector look like? Inspired by batch culture experiments of microbial communities, we specify the selection protocol in a transfer/generation-wise manner. Here is an example of no-selection (simple_screening) protocol where it is simply doing nothing but passaging the plate in every transfer :
10 | 
11 | [insert an example protocol table of no-selection protocol ]
12 | 
13 | A selection protocol is then a table with four columns: 
14 | 
15 | * Protocol name: specified by ``protocol`` in the input ``csv``.
16 | * Transfer or community generation.
17 | * :ref:`Community Function`: the community function under selection at each transfer.
18 | * :ref:`Selection Matrix`: the selection regime conducted at each transfer. 
19 | 
20 | 
21 | How to make a selection protocol
22 | --------------------------------
23 | 
24 | The selection protocol is automatically generated by ecoprospector with the mapping ``csv``. Key parameters include the specified protocol (``protocol``) the number of total transfers (``n_transfer``) and the number of selection transfers (``n_transfer_selection``).
25 | 
26 | By default, ecoprospector will divide the protocol into two phases: selection and stabilization. In each transfer of the selection phase, a subset of the metacommunity is selected and used to seed the next generation. The selection matrix is consecutively implemented for ``n_transfer_selection`` times. Then for the rest of transfers until ``n_transfer``, the metacommunity is stabilized by simply passaged without selection. 
27 | 
28 | There are some examples of default selection protocols, which are contained in the ``E_protocols.py``.
29 | 
30 | Note that users can make their  own protocol without regard to the ecoprospector predefined protocols. To do that, make a pandas DataFrame with the same column names and include it in the ``E_protocols.py``, and make sure that:
31 | 
32 | * The number of transfers does not exceed the ``n_transfer``
33 | * The selection matrix specified in the protocol is contained in ``C_selection_matrices.py``
34 | * Specify the new protocol name in the input ``csv``.
35 | 
36 | What a selection protocol does not do
37 | ---------------------------------------------------
38 | 
39 | While the table form of a selection protocol is a convenient way to standardize empirical protocols, some features that are usually specified in a “protocol” at an experimental setting are  not included:
40 | 
41 | * The number of communities (``n_wells``) in a metacommunity.
42 | * Dilution factor (``l``)
43 | * Incubation time (``n_propagation``)
44 | * Media or resource composition
45 | 
46 | Instead these parameters, either specified in the mapping ``csv`` or generated during simulation setup, become object attributes of the :ref:`Metacommunity` during simulation.
47 | 
48 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # http://www.sphinx-doc.org/en/master/config
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | import sphinx_rtd_theme
17 | from sphinx.locale import _
18 | 
19 | # -- Project information -----------------------------------------------------
20 | master_doc = 'index'
21 | project = 'ecoprospector'
22 | copyright = '2020, Chang-Yu Chang'
23 | author = 'Chang-Yu Chang'
24 | 
25 | # The full version, including alpha/beta/rc tags
26 | release = '0.0.1'
27 | 
28 | 
29 | # -- General configuration ---------------------------------------------------
30 | 
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = ['recommonmark',
35 |               'sphinx_rtd_theme',
36 |               'sphinx.ext.autosectionlabel'
37 | ]
38 | 
39 | # Add any paths that contain templates here, relative to this directory.
40 | templates_path = ['_templates']
41 | 
42 | # List of patterns, relative to source directory, that match files and
43 | # directories to ignore when looking for source files.
44 | # This pattern also affects html_static_path and html_extra_path.
45 | exclude_patterns = []
46 | 
47 | 
48 | # -- Options for HTML output -------------------------------------------------
49 | 
50 | # The theme to use for HTML and HTML Help pages.  See the documentation for
51 | # a list of builtin themes.
52 | #
53 | html_theme = 'sphinx_rtd_theme'
54 | 
55 | # Add any paths that contain custom static files (such as style sheets) here,
56 | # relative to this directory. They are copied after the builtin static files,
57 | # so a file named "default.css" will overwrite the builtin "default.css".
58 | html_static_path = ['_static']
59 | 
60 | 
61 | # Extensions to theme docs
62 | def setup(app):
63 |     from sphinx.domains.python import PyField
64 |     from sphinx.util.docfields import Field
65 | 
66 |     app.add_object_type(
67 |         'confval',
68 |         'confval',
69 |         objname='configuration value',
70 |         indextemplate='pair: %s; configuration value',
71 |         doc_field_types=[
72 |             PyField(
73 |                 'type',
74 |                 label=_('Type'),
75 |                 has_arg=False,
76 |                 names=('type',),
77 |                 bodyrolename='class'
78 |             ),
79 |             Field(
80 |                 'default',
81 |                 label=_('Default'),
82 |                 has_arg=False,
83 |                 names=('default',),
84 |             ),
85 |         ]
86 |     )
87 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ecoprospector
 2 | 
 3 | > Simulating protocols for the artificial selection of microbial communities
 4 | 
 5 | This package is designed to simulate arbitrary community-level selection protocols on microbial metacommunitities in order to determine whether they can effectively engineer communitities with desired functions. The simulation are based on batch culture and the microbes in each community interact via consumer-resource dynamics.
 6 | 
 7 | See our [paper](https://www.nature.com/articles/s41559-021-01457-5) that uses ecoprospector to study a range of selection strategies that direct the evolution of microbial communities.
 8 | 
 9 | ![](outline.png)
10 | 
11 | 
12 | # Installation
13 | 
14 | ## Anaconda
15 | 
16 | A python development setup by [Anaconda](https://docs.anaconda.com/anaconda/install/) will be sufficient to implement community-simulator and ecoprospector.
17 | 
18 | ### Mac and Linux
19 | 
20 | Install requirement
21 | 
22 | ```sh
23 | # Required to build wheel for qdldl
24 | pip install cmake
25 | 
26 | # Install requirements
27 | pip install -r requirements.txt 
28 | ```
29 | 
30 | 
31 | Download the code or clone the github repository of community simulator to a local directory and browse to the community-simulator directory and install the package
32 | 
33 | ```sh
34 | cd <your_local_directory>
35 | git clone https://github.com/Emergent-Behaviors-in-Biology/community-simulator
36 | ```
37 | 
38 | Download the code or clone this github repository to a local directory and browse to the ecoprospector directory and install package 
39 | 
40 | ```sh
41 | cd <your_local_directory>
42 | git clone https://github.com/Chang-Yu-Chang/ecoprospector
43 | pip install -e .
44 | ```
45 | 
46 | ### Windows
47 | 
48 | The parallelization features in community-simulator are not currently supported on Windows and as such we cannot guarantee that the current version of ecoprospector will work in a windows environment. We would recommend using a linux emulator for windows such as Cygwin instead.
49 | 
50 | 
51 | ## Usage example
52 | 
53 | With the mapping file (csv), executing one experiment is simple as 
54 | 
55 | ```sh
56 | $ ecoprospector input_example.csv 0
57 | ```
58 | 
59 | For more examples and usage, please refer to the [documentation](https://ecoprospector.readthedocs.io/en/latest/).
60 | 
61 | 
62 | ## Release History
63 | 
64 | * 0.0.3
65 |     * Add requirements file. Solve the syntax issue caused by latest pandas
66 |     * Update README for installing the required packages
67 |     * Provide the input_example.csv
68 |     * Change the commandline tool to lower case
69 | * 0.0.2
70 |     * Include other non-additive functions
71 | * 0.0.1
72 |     * Work in progress
73 | 
74 | ## Documentation
75 | 
76 | Ecoprospector's documentation lives at [ecoprospector.readthedocs.io](https://ecoprospector.readthedocs.io/en/latest/)
77 | 
78 | ## Meta
79 | 
80 | Chang-Yu Chang – [@changyu_chang](https://twitter.com/changyu_chang) – changyuchang5@gmail.com
81 | 
82 | Jean Vila – [@jccvila](https://twitter.com/jccvila) – Jeanccvila@gmail.com
83 | 
84 | Distributed under the MIT license. See ``LICENSE`` for more information.
85 | 
86 | [https://github.com/Chang-Yu-Chang/ecoprospector](https://github.com/Chang-Yu-Chang/ecoprospector)
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/community_selection/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | 
 5 | """
 6 | from __future__ import division
 7 | import pandas as pd
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | import copy
11 | import community_selection
12 | from multiprocessing import Pool
13 | from functools import partial
14 | 
15 | from community_simulator import Community
16 | 
17 | class Metacommunity(Community):
18 |     """
19 |     Inherited object from community-simulator package. 
20 |     
21 |     Changes:
22 |     
23 |     - Passage are Possion distributed
24 |     
25 |     """
26 |     def Passage(self,f,scale=None,refresh_resource=True):
27 |         """
28 |         Transfer cells to a fresh plate.
29 |         
30 |         f = matrix specifying fraction of each old well (column) to transfer 
31 |             to each new well (row)
32 |             
33 |         scale = option for using a different scale factor from the one defined 
34 |             for the plate on initialization.
35 |             
36 |         refresh_resource says whether the new plate comes supplied with fresh 
37 |             media. The resource concentrations in the media are assumed to be
38 |             the same as the initial resource concentrations from the first plate.
39 |             The "Reset" method can be used to adjust these concentrations.
40 |         """
41 |         #HOUSEKEEPING
42 |         if scale == None:
43 |             scale = self.scale #Use scale from initialization by default
44 |         f = np.asarray(f) #Allow for f to be a dataframe
45 |         self.N[self.N<0] = 0 #Remove any negative values that may have crept in
46 |         self.R[self.R<0] = 0
47 |         
48 |         #DEFINE NEW VARIABLES
49 |         N_tot = np.sum(self.N)
50 |         R_tot = np.sum(self.R)
51 |         N = np.zeros(np.shape(self.N))
52 |         
53 |         #MULTINOMIAL SAMPLING
54 |         #(simulate transfering a finite fraction of a discrete collection of cells)
55 |         for k in range(self.n_wells):
56 |             for j in range(self.n_wells):
57 |                 if f[k,j] > 0 and N_tot[j] > 0:
58 |                     N[:,k] += np.random.multinomial(np.random.poisson(scale*N_tot[j]*f[k,j]),(self.N/N_tot).values[:,j])*1./scale  
59 |         self.N = pd.DataFrame(N, index = self.N.index, columns = self.N.keys())
60 |         
61 |         #In batch culture, there is no need to do multinomial sampling on the resources,
62 |         #since they are externally replenished before they cause numerical problems
63 |         if refresh_resource:
64 |             self.R = pd.DataFrame(np.dot(self.R,f.T), index = self.R.index, columns = self.R.keys())
65 |             self.R = self.R+self.R0
66 | 
67 |         #In continuous culture, it is useful to eliminate the resources that are
68 |         #going extinct, to avoid numerical instability
69 |         else:
70 |             R_tot = np.sum(self.R)
71 |             R = np.zeros(np.shape(self.R))
72 |             for k in range(self.n_wells):
73 |                 for j in range(self.n_wells):
74 |                     if f[k,j] > 0 and R_tot[j] > 0:
75 |                         R[:,k] += np.random.multinomial(int(scale*R_tot[j]*f[k,j]),(self.R/R_tot).values[:,j])*1./scale
76 |             self.R = pd.DataFrame(R, index = self.R.index, columns = self.R.keys())
77 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | Ecoprospector's Tutorial
 2 | =========================================
 3 | 
 4 | .. image:: images/ecoprospector.png
 5 |    :width: 1000
 6 | 
 7 | 
 8 | What is ecoprospector?
 9 | ======================
10 | 
11 | Ecoprospector is a Python package designed to simulate protocols of artificial selection on microbial metacommunities. Experiments are run by using a  :code:`mapping_file.csv` as an input. Each row in this csv file corresponds to a single experiment and each column specifies the paramaters for that experiment. Running a single experiment specified in row :code:`i` simply involves the bash command
12 | 
13 | .. code-block:: bash
14 | 
15 |     $ ecoprospector mapping_file.csv i
16 | 
17 | |
18 | 
19 | Main features
20 | =============
21 | 
22 | Ecoprospector aims to flexibly adapt major componets of commonly used experiemntal protocols so that they can be tested on in-silico microbial meta-communitities.  The main features of our simulations include:
23 | 
24 | * **Consumer-resource dynamics**: virtual microbial species with idiosyncratic metabolic properties interact with others in a community through secretion and uptakes. Microbial community dynamics can be adjusted using a wide range of paramaters
25 | * **Batch-culture**: the community generation is divided into serial batch culture with a tunable incubation time and number of generations.
26 | * **Community function**: any arbitrarily designed community functions can be under selection.
27 | * **Selection matrix**: the selection regimes (i.e., which parental communitues to select and how to seed the offspring communities) are standardized by selection matrix at the end of each generation.
28 | * **Pertubations**: at end of any generation the top performing community can be replicated and copies can be be perturbed simulating possibe manipulations (i.e single-species invasions, resource-shifts, bottle-necking etc).
29 | * **Modular protocol design**: the feature mentioned above can be assembled in any combination to form a user designed experimental protocol.
30 | 
31 | Our package is designed with three types of user in mind.
32 | 
33 | * **Beginners** who have no python experience would be able to re-run all the simulations presented in this paper using the csv file alone and should be able to repeat pre-coded experimental protocols under varying parameter choices.
34 | * **Intermediate users** who have basic knowledge of python, should be able to code up their own protocols and may also be able to perform simple extensions to the package (such as introducing new types of community function, or selection matrices).
35 | * **Advanced users** who are familiar with python coding should be able to add additional functionality to the package, including carrying over several features of community-simulator that are not currently in use. This includes but is not limited to  a) introducing intrinsic resource dynamics (for chemostat simulations) b) alternative dynamical models such as Lotka-Volterra models.
36 | 
37 | |
38 | 
39 | Key contributors
40 | ================
41 | 
42 | Jean Vila and Chang-Yu Chang (both at Yale working with `Alvaro Sanchez <http://www.sanchezlaboratory.com/>`_) started to build ecoprospector in collaboration with students from Physical Biology of Cells Course at Marine Biology Laboratoy in Woods Hole (Molly Bassette, Julia Borden, Stefan Golfier, Paul G. Sanchez, Rachel Waymack, Xinwen Zhu), who provided assistance during early development.
43 | 
44 | |
45 | 
46 | .. toctree::
47 |    :maxdepth: 2
48 |    :caption: Getting Started
49 | 
50 |    content/installation
51 |    content/quickstart
52 | 
53 | .. toctree::
54 |    :maxdepth: 2
55 |    :caption: User Guide
56 | 
57 |    content/usertools
58 |    content/mapping_file
59 |    content/micrm
60 |    content/metacommunity
61 |    content/protocol
62 |    content/community_function
63 |    content/selection_matrix
64 |    content/perturbation
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/community_selection/B_community_phenotypes.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Nov 26 2019
  5 | @author: changyuchang
  6 | """
  7 | import numpy as np
  8 | 
  9 | def f1_additive(plate, params_simulation):
 10 |     """
 11 |     Additive community function(F1)
 12 |     
 13 |     plate = plate object from package
 14 |     k = an 1-D array of saturation factors. set k = np.zeros(n) for binary function (species presence or absense)
 15 |     """
 16 |     
 17 |     community_function = np.sum(plate.N.values * plate.f1_species_smooth[:,None], axis = 0)
 18 |     
 19 |     return community_function
 20 | 
 21 | def f1a_additive(plate, params_simulation):
 22 |     """
 23 |     Additive community function (F1) with ruggedness
 24 |     """
 25 |     
 26 |     community_function = np.sum(plate.N.values * plate.f1_species_rugged[:,None], axis = 0)
 27 |     
 28 |     return community_function
 29 | 
 30 | def f2_interaction(plate, params_simulation):
 31 |     """
 32 |     Additive community function with interaction (F2)
 33 |     
 34 |     plate = plate object from package
 35 |     species_function = a n by n 2-D array; n is the size of species pool
 36 |     """
 37 | 
 38 |     # Number of species in the pool 
 39 |     S_tot = plate.N.shape[0]
 40 |     
 41 |     # Additive term
 42 |     #additive_term = np.sum(plate.N.values * plate.f1_species_smooth[:,None], axis = 0)
 43 |     
 44 |     # Interaction term
 45 |     interaction_term = np.zeros(plate.N.shape[1])
 46 |     for i in range(plate.N.shape[1]): # For each community
 47 |         community_composition = np.array(plate.N.iloc[:,i]).reshape(S_tot, 1)
 48 |         community_composition_square = np.multiply(community_composition, community_composition.reshape(1, S_tot))
 49 |         interaction_term[i] = np.sum(community_composition_square * plate.f2_species_smooth)
 50 | 
 51 |     return interaction_term
 52 | 
 53 | def f2a_interaction(plate, params_simulation):
 54 |     """
 55 |     Additive community function with interaction (F2) and ruggedness
 56 |     
 57 |     plate = plate object from package
 58 |     species_function = a n by n 2-D array; n is the size of species pool
 59 |     """
 60 | 
 61 |     # Number of species in the pool 
 62 |     S_tot = plate.N.shape[0]
 63 |     
 64 |     # Additive term
 65 |     #additive_term = np.sum(plate.N.values * plate.f1_species_smooth[:,None], axis = 0)
 66 |     
 67 |     # Interaction term
 68 |     interaction_term = np.zeros(plate.N.shape[1])
 69 |     for i in range(plate.N.shape[1]): # For each community
 70 |         community_composition = np.array(plate.N.iloc[:,i]).reshape(S_tot, 1)
 71 |         community_composition_square = np.multiply(community_composition, community_composition.reshape(1, S_tot))
 72 |         interaction_term[i] = np.sum(community_composition_square * plate.f2_species_rugged)
 73 | 
 74 |     return interaction_term
 75 | 
 76 | def f3_additive_binary(plate, params_simulation):
 77 |     """
 78 |     Complex community function
 79 |     
 80 |     plate = plate object from package
 81 |     species_function = a n by n 2-D array; n is the size of species pool
 82 |     """
 83 |     # Binary function using type III response
 84 |     plate_temp = plate.copy()
 85 |     n = 10; Sm = 1
 86 |     plate_temp.N = plate_temp.N / params_simulation["binary_threshold"]
 87 |     plate_temp.N = plate_temp.N**n / (1 + plate_temp.N**n/Sm) 
 88 |     community_function = np.sum(plate_temp.N.values * plate_temp.species_function[:,None], axis = 0)
 89 | 
 90 |     return community_function
 91 | 
 92 | def f4_interaction_binary(plate, params_simulation):
 93 |     """
 94 |     Complex community function
 95 |     
 96 |     plate = plate object from package
 97 |     species_function = a n by n 2-D array; n is the size of species pool
 98 |     k = an 2-D array of saturation factors. set k = np.zeros([n, n]) for binary function (species presence or absense)
 99 |  
100 |     """
101 |     # Number of species in the pool 
102 |     S_tot = plate.N.shape[0]
103 | 
104 |     # Binary function using type III response
105 |     plate_temp = plate.copy()
106 |     n = 10; Sm = 1
107 |     plate_temp.N = plate_temp.N / params_simulation["binary_threshold"]
108 |     plate_temp.N = plate_temp.N**n / (1 + plate_temp.N**n/Sm) 
109 |     
110 |     # Additive term
111 |     additive_term = np.sum(plate_temp.N.values * plate_temp.species_function[:,None], axis = 0)
112 |     
113 |     # Interaction term
114 |     interaction_term = np.zeros(plate_temp.N.shape[1])
115 |     for i in range(plate_temp.N.shape[1]): # For each community
116 |         community_composition = np.array(plate_temp.N.iloc[:,i]).reshape(S_tot, 1)
117 |         community_composition_square = np.multiply(community_composition, community_composition.reshape(1, S_tot))
118 |         interaction_term[i] = np.sum(community_composition_square * plate_temp.interaction_function)
119 |     
120 |     return additive_term + interaction_term
121 | 
122 | def f5_invader_suppression(plate, params_simulation):
123 |     """
124 |     Community function in which an indentical alien community (single or multiple species) invades the selected resident communities.
125 |     This community function is the ratio between the biomass when invader grows with the community and when invader grows alone.
126 |     The biomass of invader growing alone (plate.invasion_plate_t1) should have been included in the plate object attribute.
127 |     
128 |     """
129 |     S_tot = plate.N.shape[0]
130 |     n_wells = plate.N.shape[1]
131 |     plate_test = plate.copy()
132 |     plate_test.Passage(params_simulation['dilution']*np.eye(params_simulation['n_wells']))
133 |     plate_test.N.iloc[params_simulation["invader_index"],:] = plate_test.N.iloc[params_simulation["invader_index"],:] + 10 / params_simulation['scale']
134 |     plate_test.Propagate(params_simulation["n_propagation"])
135 |     invader_growth_together = plate_test.N.iloc[params_simulation["invader_index"],:]
136 |     function_invader_suppressed_growth = -invader_growth_together
137 |     return function_invader_suppressed_growth
138 | 
139 | def f6_target_resource(plate, params_simulation):
140 |     """
141 |     Function as minimized an supplied resource
142 |     The target resource by default is the resouce in the last index
143 |     If rich medium is provided, the target resource amount in the initial plate is set to 0
144 |     """
145 |     target_resource_index = plate.target_resource
146 |     community_function_temp = plate.R.iloc[target_resource_index,:].tolist()
147 |     community_function = [-i for i in community_function_temp]
148 |     return community_function
149 |     
150 | def f6a_target_resource(plate, params_simulation):
151 |     """
152 |     Function as maximized non-supplied resource production
153 |     The target resource by default is the resouce in the last index
154 |     If rich medium is provided, the target resource amount in the initial plate is set to 0
155 |     """
156 |     target_resource_index = plate.target_resource
157 |     community_function = plate.R.iloc[target_resource_index,:].tolist()
158 |     
159 |     return community_function
160 | 
161 | 
162 |     
163 | 
164 | def resource_distance_community_function(plate, R_target, sigma = 0.01): # Sigma is the measurement error
165 |     """# Compute the distances from the target resource """
166 |     R_tot = plate.R.shape[0]
167 |     well_tot = plate.R.shape[1]
168 |     relative_resource = np.array(plate.R) #Load plate resource data
169 |     relative_resource[0,:]  = 0.0 #Set supplied resource to 0
170 |     relative_resource = relative_resource/relative_resource.sum(0)  #Look at relative abundance of remaining resource
171 |     R_dist = np.sqrt(np.sum(np.array((np.tile(R_target,(well_tot,1)) - relative_resource.T)**2)[:,1:],axis=1))
172 |     return (np.array(R_dist.T)* -1) * (1+ np.random.normal(0,sigma,well_tot))#(so we select for positive community function)
173 | 
174 | 


--------------------------------------------------------------------------------
/community_selection/E_protocols.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Nov 26 2019
  5 | @author: changyuchang
  6 | """
  7 | import pandas as pd
  8 | 
  9 | def make_algorithm_library():
 10 |     """
 11 |     Show the table of algorithms in this package
 12 |     """
 13 |     import re
 14 |     import pandas as pd
 15 |     
 16 |     # Find directory of community_selection modultes
 17 |     import community_selection
 18 |     module_dir = community_selection.__file__
 19 |     module_dir = re.sub("__init__.py", "", module_dir) 
 20 |     
 21 |     # 
 22 |     algorithm_types = ["community_phenotypes", "selection_algorithms", "perturbation_algorithms"]
 23 |     algorithms = list()
 24 |     
 25 |     for i in range(len(algorithm_types)):
 26 |     
 27 |         # Open files
 28 |         file_algorithm_phenotype = open(module_dir + ["B", "C", "D"][i] + "_" + algorithm_types[i] + ".py", "r")
 29 |         
 30 |         # Read lines
 31 |         line_list = list()
 32 |         line = file_algorithm_phenotype.readline()
 33 |         cnt = 1
 34 |         
 35 |         while line:
 36 |             line = file_algorithm_phenotype.readline()
 37 |             line_list.append(line.strip())
 38 |             cnt += 1
 39 |         
 40 |         # Regular expression
 41 |         algorithm_names = re.findall("def \w+", " ".join(line_list))
 42 |         list_algorithm = [re.sub("^def ", "", x) for x in algorithm_names]
 43 |         
 44 |         # Write the files
 45 |         algorithms.append(pd.DataFrame({"AlgorithmType": re.sub("s$", "", algorithm_types[i]), "AlgorithmName": list_algorithm}))
 46 |      
 47 |     return pd.concat(algorithms)
 48 |     
 49 |     
 50 | def make_protocol(params_simulation, protocol_name, selection_algorithm = None, repeated_selection = False):
 51 |     """
 52 |     Make protocol for one experimental protocol 
 53 |     """
 54 |     temp_df = pd.DataFrame({
 55 |         "algorithm_name": protocol_name,
 56 |         "transfer": range(1, params_simulation["n_transfer"] + 1),
 57 |         "community_phenotype": params_simulation["selected_function"],
 58 |         "selection_algorithm": "no_selection"
 59 |         })
 60 |     if protocol_name != "simple_screening":
 61 |         if repeated_selection: 
 62 |             temp_df["selection_algorithm"] = [selection_algorithm for i in range(params_simulation["n_transfer_selection"])] + ["no_selection" for i in range(params_simulation["n_transfer"] - params_simulation["n_transfer_selection"])]
 63 |         elif repeated_selection == False:
 64 |             temp_df["selection_algorithm"] = ["no_selection" for i in range(params_simulation["n_transfer_selection"]-1)] + [selection_algorithm] + ["no_selection" for i in range(params_simulation["n_transfer"] - params_simulation["n_transfer_selection"])]
 65 |     
 66 |     return temp_df
 67 |     
 68 | 
 69 | def make_algorithms(params_simulation):
 70 |     """
 71 |     Make a comprehensive dataframe of all protocols 
 72 |     """
 73 |     
 74 |     
 75 |     # Control
 76 |     simple_screening = make_protocol(params_simulation, "simple_screening")
 77 |     select_top25 = make_protocol(params_simulation, protocol_name = "select_top25", selection_algorithm = "select_top25percent", repeated_selection = False)
 78 |     select_top10 = make_protocol(params_simulation, protocol_name = "select_top10", selection_algorithm = "select_top10percent", repeated_selection = False)
 79 |     pool_top25 = make_protocol(params_simulation, protocol_name = "pool_top25", selection_algorithm = "pool_top25percent", repeated_selection = False)
 80 |     pool_top10 = make_protocol(params_simulation, protocol_name = "pool_top10", selection_algorithm = "pool_top10percent", repeated_selection = False)
 81 |     
 82 |     # Experimental protocols
 83 |     Blouin2015 = make_protocol(params_simulation, protocol_name = "Blouin2015", selection_algorithm = "pool_top10percent", repeated_selection = True)
 84 |     Blouin2015_control = make_protocol(params_simulation, protocol_name = "Blouin2015_control", selection_algorithm = "pool_top10percent_control", repeated_selection = True)
 85 |     Chang2020a = make_protocol(params_simulation, protocol_name = "Chang2020a", selection_algorithm = "select_top16percent", repeated_selection = True)
 86 |     Chang2020a_control = make_protocol(params_simulation, protocol_name = "Chang2020a_control", selection_algorithm = "select_top16percent_control", repeated_selection = True)
 87 |     Chang2020b = make_protocol(params_simulation, protocol_name = "Chang2020b", selection_algorithm = "select_top25percent", repeated_selection = True)
 88 |     Chang2020b_control = make_protocol(params_simulation, protocol_name = "Chang2020b_control", selection_algorithm = "select_top25percent_control", repeated_selection = True)
 89 |     Jochum2019 = make_protocol(params_simulation, protocol_name = "Jochum2019", selection_algorithm = "pool_top10percent", repeated_selection = True)
 90 |     Mueller2019 = make_protocol(params_simulation, protocol_name = "Mueller2019", selection_algorithm = "pool_top25percent", repeated_selection = True)
 91 |     Panke_Buisse2015 = make_protocol(params_simulation, protocol_name = "Panke_Buisse2015", selection_algorithm = "pool_top28percent", repeated_selection = True)
 92 |     Swenson2000a = make_protocol(params_simulation, protocol_name = "Swenson2000a", selection_algorithm = "pool_top20percent", repeated_selection = True)
 93 |     Swenson2000a_control = make_protocol(params_simulation, protocol_name = "Swenson2000a_control", selection_algorithm = "pool_top20percent_control", repeated_selection = True)
 94 |     Swenson2000b = make_protocol(params_simulation, protocol_name = "Swenson2000b", selection_algorithm = "select_top25percent", repeated_selection = True)
 95 |     Swenson2000b_control = make_protocol(params_simulation, protocol_name = "Swenson2000b_control", selection_algorithm = "select_top25percent_control", repeated_selection = True)
 96 |     Swenson2000c = make_protocol(params_simulation, protocol_name = "Swenson2000c", selection_algorithm = "pool_top20percent", repeated_selection = True)
 97 |     Wright2019 = make_protocol(params_simulation, protocol_name = "Wright2019", selection_algorithm = "pool_top10percent", repeated_selection = True)
 98 |     Wright2019_control = make_protocol(params_simulation, protocol_name = "Wright2019_control", selection_algorithm = "pool_top10percent_control", repeated_selection = True)
 99 |     
100 |     # Sub-lineage protocols
101 |     Arora2019 = make_protocol(params_simulation, protocol_name = "Arora2019", selection_algorithm = "Arora2019", repeated_selection = True)
102 |     Arora2019_control = make_protocol(params_simulation, protocol_name = "Arora2019_control", selection_algorithm = "Arora2019_control", repeated_selection = True)
103 |     Raynaud2019a = make_protocol(params_simulation, protocol_name = "Raynaud2019a", selection_algorithm = "Raynaud2019a", repeated_selection = True)
104 |     Raynaud2019a_control = make_protocol(params_simulation, protocol_name = "Raynaud2019a_control", selection_algorithm = "Raynaud2019a_control", repeated_selection = True)
105 |     Raynaud2019b = make_protocol(params_simulation, protocol_name = "Raynaud2019b", selection_algorithm = "Raynaud2019b", repeated_selection = True)
106 |     Raynaud2019b_control = make_protocol(params_simulation, protocol_name = "Raynaud2019b_control", selection_algorithm = "Raynaud2019b_control", repeated_selection = True)
107 |     
108 |     # Theory
109 |     Penn2004 = make_protocol(params_simulation, protocol_name = "Penn2004", selection_algorithm = "Williams2007a", repeated_selection = True)
110 |     Williams2007a = make_protocol(params_simulation, protocol_name = "Williams2007a", selection_algorithm = "Williams2007a", repeated_selection = True)
111 |     Williams2007b = make_protocol(params_simulation, protocol_name = "Williams2007b", selection_algorithm = "Williams2007b", repeated_selection = True)
112 |     Xie2019a = make_protocol(params_simulation, protocol_name = "Xie2019a", selection_algorithm = "select_top_dog", repeated_selection = True)
113 |     Xie2019b = make_protocol(params_simulation, protocol_name = "Xie2019b", selection_algorithm = "select_top10percent", repeated_selection = True)
114 |     
115 |     
116 |     #directed_selection
117 |     directed_selection = pd.DataFrame({
118 |         "algorithm_name": "directed_selection",
119 |         "transfer": range(1, params_simulation["n_transfer"] + 1),
120 |         "community_phenotype": params_simulation["selected_function"],
121 |         "selection_algorithm": ["no_selection" for i in range(params_simulation["n_transfer_selection"]-1)] + ["select_top"] + ["no_selection" for i in range(params_simulation["n_transfer"] - params_simulation["n_transfer_selection"])]
122 |     })
123 |     
124 |     algorithms = pd.concat([
125 |         # Control
126 |         simple_screening, select_top25, select_top10, pool_top25, pool_top10,
127 |         # Experimental protocols
128 |         Blouin2015, Blouin2015_control, Chang2020a, Chang2020a_control, Chang2020b, Chang2020b_control, 
129 |         Jochum2019, Mueller2019, Panke_Buisse2015, 
130 |         Swenson2000a, Swenson2000a_control, Swenson2000b, Swenson2000b_control, Swenson2000c,
131 |         Wright2019, Wright2019_control,
132 |         # Sub-lineage protocols
133 |         Arora2019, Arora2019_control, Raynaud2019a, Raynaud2019a_control, Raynaud2019b, Raynaud2019b_control, 
134 |         # Theory
135 |         Penn2004, Williams2007a, Williams2007b, Xie2019a, Xie2019b,
136 |         directed_selection
137 |         ])
138 | 
139 |     
140 |     return algorithms
141 |     
142 |     
143 |     
144 |     
145 |     
146 |     
147 |     
148 | 
149 | 


--------------------------------------------------------------------------------
/community_selection/D_perturbation_algorithms.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Nov 27 2019
  5 | @author: changyuchang
  6 | """
  7 | import numpy as np
  8 | import random
  9 | from community_selection.A_experiment_functions import *
 10 | 
 11 | def resource_perturb(plate, params_simulation, keep):
 12 |     """
 13 |     Perturb the communities by shifting the medium composition
 14 |     """
 15 |     #Remove new fresh media
 16 |     plate.R = plate.R - plate.R0
 17 |     old_R0 = plate.R0[plate.N.columns[keep]]
 18 |     #First construct olist of possible metabolite perturbations (depends on r_type, either list of tuples of index opr simple list of index)
 19 |     if params_simulation['r_type'] == 'add': #Remove from top and add to random
 20 |         metabolite_choice = [(x,y) for x in old_R0.index for y in old_R0.index if x !=y and x ==  old_R0.idxmax()]
 21 |     if params_simulation['r_type']  == 'remove': #Remove from random and add to bottom
 22 |         metabolite_choice = [(x,y) for x in old_R0.index for y in old_R0.index if x !=y and y == old_R0.idxmin() and old_R0[x]>0]
 23 |     if params_simulation['r_type'] == 'rescale_add' or params_simulation['r_type'] == 'old':  # add to random
 24 |         metabolite_choice = [x for x in old_R0.index]
 25 |     if params_simulation['r_type'] == 'rescale_remove':  #remove from random    
 26 |         metabolite_choice = [x for x in old_R0.index if old_R0[x] >0]
 27 |     else: #default_resource_swap
 28 |         metabolite_choice = [(x,y) for x in old_R0.index for y in old_R0.index if x !=y]
 29 |     
 30 |     # If f6_target_resource, avoid target_resource
 31 |     if "target_resource" in params_simulation["selected_function"]:
 32 |         target_resource = old_R0.index[params_simulation["target_resource"]]
 33 |         if params_simulation['r_type'] == 'add': #Remove from top and add to random
 34 |             metabolite_choice = [(x,y) for x in old_R0.index for y in old_R0.index if x !=y and x ==  old_R0.idxmax() and x != target_resource and y != target_resource]
 35 |         if params_simulation['r_type']  == 'remove': #Remove from random and add to bottom
 36 |             metabolite_choice = [(x,y) for x in old_R0.index for y in old_R0.index if x !=y and y == old_R0.idxmin() and old_R0[x]>0 and x != target_resource and y != target_resource]
 37 |         if params_simulation['r_type'] == 'rescale_add' or params_simulation['r_type'] == 'old':  # add to random
 38 |             metabolite_choice = [x for x in old_R0.index if x != target_resource and y != target_resource]
 39 |         if params_simulation['r_type'] == 'rescale_remove':  #remove from random    
 40 |             metabolite_choice = [x for x in old_R0.index if old_R0[x] >0 and x != target_resource and y != target_resource]
 41 |         else: #default_resource_swap
 42 |             metabolite_choice = [(x,y) for x in old_R0.index for y in old_R0.index if x !=y and x != target_resource and y != target_resource]
 43 | 
 44 |     #next randomly pick element in list and apply pertubation 
 45 |     for k in plate.R0.columns:
 46 |         if k != plate.R0.columns[keep]:
 47 |             #So first default to kept media
 48 |             plate.R0[k] = old_R0
 49 |             if len(metabolite_choice) ==0: #If all possible pertubations have been carried out skip
 50 |                 continue
 51 |             #Pick random pertubation
 52 |             r_id = random.choice(metabolite_choice)
 53 |             #perform pertubations
 54 |             if params_simulation['r_type']  == 'rescale_add': 
 55 |                 plate.R0[k][r_id] = plate.R0[k][r_id]*(1+params_simulation['r_percent'])
 56 |             elif params_simulation['r_type'] == 'rescale_remove':
 57 |                 plate.R0[k][r_id] = plate.R0[k][r_id]*(1-params_simulation['r_percent']) 
 58 |             elif params_simulation['r_type'] == 'old':
 59 |                 plate.R0[k] = plate.R0[k] * (1-params_simulation['R_percent']) #Dilute old resource
 60 |                 plate.R0[k][r_id] = plate.R0[k][r_id] + (params_simulation['R0_food']*params_simulation['R_percent']) #Add fixed percent
 61 |             else:
 62 |                 plate.R0[k][r_id[0]] = plate.R0[k][r_id[0]] + (plate.R0[k][r_id[1]]*params_simulation['r_percent']) #add new resources
 63 |                 plate.R0[k][r_id[1]] = plate.R0[k][r_id[1]]*(1-params_simulation['r_percent']) #remove new resources
 64 |             # Remove chosen pertubation as option for subsequent loop
 65 |             metabolite_choice = [x for x in metabolite_choice if x != r_id]
 66 |     plate.R0 = plate.R0/np.sum(plate.R0)*params_simulation['R0_food'] #Keep this to avoid floating point error and rescale when neeeded.
 67 |     #add new fresh environment (so that this round uses R0
 68 |     plate.R = plate.R + plate.R0
 69 |     return plate
 70 |                 
 71 | 
 72 | def perturb(plate, params_simulation, keep):
 73 |     """
 74 |     Perturbs all communities except for the one specified by the argument keep. Default is the first well so keep = 0
 75 |     Only runs if directed selection is true
 76 |     """
 77 |     #Bottleneck
 78 |     if params_simulation['bottleneck']:
 79 |         dilution_matrix = np.eye(params_simulation['n_wells'])*params_simulation['bottleneck_size'] 
 80 |         dilution_matrix[keep,keep] = 1
 81 |         old_R = plate.R.copy()
 82 |         plate.Passage(dilution_matrix)
 83 |         plate.R = old_R.copy()  #knock_in isolates absent from all communities
 84 |     if params_simulation['knock_in']:
 85 |         knock_in_list = np.where(np.logical_and(np.array(np.sum(plate.N,axis=1)==0.0), plate.knock_in_species_function >= np.percentile(plate.knock_in_species_function, q = 100*params_simulation['knock_in_threshold'])))[0]
 86 |         # If f5, avoid using invader
 87 |         if "invader" in params_simulation["selected_function"]:
 88 |             knock_in_list[params_simulation["invader_index"]] = False
 89 |         for k in plate.N.columns:
 90 |             if k == plate.N.columns[keep] or len(knock_in_list) ==0.0:
 91 |                 continue
 92 |             else:
 93 |                 s_id = np.random.choice(knock_in_list) 
 94 |                 plate.N[k][s_id]= 1/params_simulation["dilution"] * 1/params_simulation["scale"] #Knock in enough to survive 1 dilution even with no growth
 95 |                 knock_in_list = knock_in_list[knock_in_list != s_id] 
 96 |     #knock_out isolates present in all communities
 97 |     if params_simulation['knock_out']:
 98 |         knock_out_list = np.where(np.sum(plate.N>0.0,axis=1) == params_simulation['n_wells'])[0]
 99 |         for k in plate.N.columns:
100 |             if k == plate.N.columns[keep] or len(knock_out_list) ==0.0:
101 |                 continue
102 |             else:
103 |                 s_id = np.random.choice(knock_out_list) 
104 |                 plate.N[k][s_id]= 0
105 |                 knock_out_list = knock_out_list[knock_out_list != s_id] 
106 |     #Migrate taxa into the best performing community. By default migrations are done using power law model but can tune the diversity of migration using s_migration
107 |     if params_simulation['migration']:
108 |         migration_factor = np.ones(params_simulation['n_wells'])
109 |         migration_factor[keep] = 0
110 |         if np.isfinite(params_simulation['s_migration']):
111 |             plate.N = migrate_from_pool(plate,migration_factor,params_simulation,power_law=False,n=params_simulation['n_migration'])
112 |         else:
113 |             plate.N = migrate_from_pool(plate,migration_factor,params_simulation,power_law = True,n=params_simulation['n_migration'])
114 |         # If f5, avoid using invader
115 |         if "invader" in params_simulation["selected_function"]:
116 |             plate.N.iloc[params_simulation["invader_index"],] = 0
117 |     #Migrate taxa into the best performing community. By default migrations are done using power law model but can tune the diversity of migration using s_migration
118 |     if params_simulation['coalescence']:
119 |         plate.Propagate(params_simulation["n_propagation"])
120 |         plate.N = plate.N*(1-params_simulation['frac_coalescence']) + plate.prior_N*params_simulation['frac_coalescence']
121 |         plate.R = plate.R*(1-params_simulation['frac_coalescence']) + plate.prior_R*params_simulation['frac_coalescence']
122 |         plate.Passage(np.eye(params_simulation['n_wells'])*params_simulation['dilution'] )
123 |     #Shift_R0
124 |     if params_simulation['resource_shift']:
125 |         plate = resource_perturb(plate, params_simulation, keep)
126 |     return plate
127 | 
128 | 
129 | 
130 | # Design migration_factor (a sequence of binary factors)
131 | def no_migration(community_function):
132 |     """
133 |     No migration
134 |     """
135 |     # Number of wells
136 |     n_wells = len(community_function)
137 | 
138 |     # No migration
139 |     migration_factor = np.zeros(n_wells)
140 | 
141 |     return migration_factor
142 | 
143 | def parent_migration(community_function):
144 |     """
145 |     Parent migration, migrate into all wells
146 |     """
147 |     # Number of wells
148 |     n_wells = len(community_function)
149 | 
150 |     # All migration
151 |     migration_factor = np.ones(n_wells)
152 | 
153 |     #dont migrate into winner
154 |     winner_index = np.where(community_function >= np.max(community_function))[0][::-1] # Reverse the list so the higher
155 |     migration_factor[winner_index] = 0
156 |     return migration_factor
157 | 
158 | def directed_selection_migrate(community_function):
159 |     """
160 |     Sample new communities from species pool, coalesce the migrant communities to the species pools
161 |     """
162 |     # Number of wells
163 |     n_wells = len(community_function)
164 | 
165 |     # Compute the cutoff based on the number of wells
166 |     cut_off_percent = (np.sqrt(n_wells))/n_wells
167 | 
168 |     # Sort the community function in this transfer
169 |     sorted_community_function = np.sort(community_function)
170 | 
171 |     # Community function value cutoff for selecting communities
172 |     cut_off = sorted_community_function[int(np.round(len(community_function)*(1-cut_off_percent)))]
173 | 
174 |     # Winner wells
175 |     winner_index = np.where(community_function >= cut_off)[0][::-1]
176 | 
177 |     # Migration factor. A list of whether to migrate the community or not
178 |     migration_factor = np.ones(n_wells) # Migrate all the wells except for the new wells that contain the winner replicate
179 |     migration_factor[range(len(winner_index))] = 0 # Don't migrate to the winner wells
180 | 
181 |     return migration_factor
182 | 
183 | def migrate_half(community_function):
184 |     # Number of wells
185 |     n_wells = len(community_function)
186 | 
187 |     # Migration
188 |     migration_factor = [1, 0] * int(n_wells/2)
189 | 
190 |     return migration_factor
191 | 
192 | 
193 | def migrate_random(community_function):
194 |     # Number of wells
195 |     n_wells = len(community_function)
196 | 
197 |     # Migration
198 |     migration_factor = np.random.binomial(1, 0.5, size = n_wells)
199 | 
200 |     return migration_factor
201 |     
202 |     
203 | 
204 | 


--------------------------------------------------------------------------------
/community_selection/C_selection_algorithms.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Nov 27 2019
  5 | @author: changyuchang
  6 | """
  7 | import numpy as np
  8 | from functools import partial
  9 | 
 10 | def no_selection(community_function):
 11 |     """
 12 |     Direct well-to-well transfer without selection
 13 |     """
 14 |     n_wells = len(community_function)
 15 |     return np.eye(n_wells)
 16 | 
 17 | # Make selection algorithms with similar names, using partial functions
 18 | ## Select top n%
 19 | def temp_select_top(community_function, p):
 20 |     n_wells = len(community_function)
 21 |     sorted_community_function = np.sort(community_function)
 22 |     cut_off = sorted_community_function[int(np.floor(len(community_function)*(1-p)))]
 23 |     winner_index = np.where(community_function >= cut_off)[0][::-1] 
 24 |     transfer_matrix = np.zeros((n_wells,n_wells))
 25 |     t_new = range(n_wells) # New wells
 26 |     t_old = list(winner_index) * (int(np.ceil(1/p) + 1)) # Old wells
 27 |     for i in range(n_wells):
 28 |         transfer_matrix[t_new[i], t_old[i]] = 1
 29 |     return transfer_matrix
 30 | 
 31 | for i in [10, 15, 16, 20, 25, 28, 30, 33, 40, 50, 60]:
 32 |     globals()['select_top%spercent' %i] = partial(temp_select_top, p = i/100)
 33 | 
 34 | 
 35 | ## Select top n% control
 36 | def temp_select_top_control(community_function, p):
 37 |     n_wells = len(community_function)
 38 |     randomized_community_function = community_function.copy()
 39 |     np.random.shuffle(randomized_community_function)
 40 |     sorted_community_function = np.sort(randomized_community_function)
 41 |     cut_off = sorted_community_function[int(np.floor(len(randomized_community_function)*(1-p)))]
 42 |     winner_index = np.where(randomized_community_function >= cut_off)[0][::-1] 
 43 |     transfer_matrix = np.zeros((n_wells,n_wells))
 44 |     t_new = range(n_wells) # New wells
 45 |     t_old = list(winner_index) * (int(np.ceil(1/p)+1)) # Old wells
 46 |     for i in range(n_wells):
 47 |         transfer_matrix[t_new[i], t_old[i]] = 1
 48 |     return transfer_matrix
 49 | 
 50 | for i in [10, 15, 16, 20, 25, 28, 30, 33, 40, 50, 60]:
 51 |     globals()['select_top%spercent_control' %i] = partial(temp_select_top_control, p = i/100)
 52 | 
 53 | 
 54 | ## Pooling
 55 | def temp_pool_top(community_function, p):
 56 |     n_wells = len(community_function)
 57 |     sorted_community_function = np.sort(community_function)
 58 |     cut_off = sorted_community_function[int(np.floor(len(community_function)*(1-p)))]
 59 |     winner_index = np.where(community_function >= cut_off)[0][::-1] 
 60 |     transfer_matrix = np.zeros((n_wells,n_wells))
 61 |     transfer_matrix[:, list(winner_index)] = 1
 62 |     return transfer_matrix
 63 | 
 64 | for i in [10, 15, 16, 20, 25, 28, 30, 33, 40, 50, 60]:
 65 |     globals()['pool_top%spercent' %i] = partial(temp_pool_top, p = i/100)
 66 | 
 67 | ## Pooling control
 68 | def temp_pool_top_control(community_function, p):
 69 |     n_wells = len(community_function)
 70 |     randomized_community_function = community_function.copy()
 71 |     np.random.shuffle(randomized_community_function)
 72 |     sorted_community_function = np.sort(randomized_community_function)
 73 |     cut_off = sorted_community_function[int(np.floor(len(randomized_community_function)*(1-p)))]
 74 |     winner_index = np.where(randomized_community_function >= cut_off)[0][::-1] 
 75 |     transfer_matrix = np.zeros((n_wells,n_wells))
 76 |     transfer_matrix[:, winner_index] = 1
 77 |     return transfer_matrix
 78 | 
 79 | for i in [10, 15, 16, 20, 25, 28, 30, 33, 40, 50, 60]:
 80 |     globals()['pool_top%spercent_control' %i] = partial(temp_pool_top_control, p = i/100)
 81 | 
 82 | 
 83 | # Sub-lineage algorithms
 84 | def Arora2019(community_function, n_rep = 3):
 85 |     """
 86 |     Arora2019
 87 |     Sub-divide wells of plate into lines where each 'line' consists of n_rep communities' 
 88 |     Each round the highest function member of the line is used to colonize the next three triplicate wells of that line
 89 |     """
 90 |     n_wells = len(community_function)
 91 |     n_lines = int(np.ceil(n_wells/n_rep)) #Number of lines
 92 |     transfer_matrix = np.zeros((n_wells,n_wells))
 93 |     for i in range(n_lines):
 94 | 	    sorted_community_function = np.sort(community_function[i*n_rep:(i*n_rep)+n_rep])
 95 | 	    cut_off = np.max(sorted_community_function)
 96 | 	    winner_index = np.where(community_function[i*n_rep:(i*n_rep)+n_rep] == cut_off)[0]
 97 | 	    transfer_matrix[i*n_rep:(i*n_rep)+n_rep, winner_index+i*n_rep] = 1
 98 |     return transfer_matrix
 99 |     
100 |     
101 | def Arora2019_control(community_function, n_rep = 3):
102 |     """
103 |   	Same as Arora2019 except the line member is selected at Random
104 |     """
105 |     n_wells = len(community_function)
106 |     n_lines = int(np.ceil(n_wells/n_rep)) #Number of lines
107 |     transfer_matrix = np.zeros((n_wells,n_wells))
108 |     for i in range(n_lines):
109 |   	    sorted_community_function = np.sort(community_function[i*n_rep:(i*n_rep)+n_rep])
110 |   	    cut_off = np.max(sorted_community_function)
111 |   	    winner_index = np.random.randint(0,n_rep)
112 |   	    if winner_index+i*n_rep >= n_wells:
113 |   	    	  corrected_n_rep  = n_wells % n_rep
114 |   	    	  winner_index = np.random.randint(0,corrected_n_rep)
115 |   	    transfer_matrix[i*n_rep:(i*n_rep)+n_rep, winner_index+i*n_rep] = 1
116 |     return transfer_matrix
117 |     
118 |     
119 | def Raynaud2019a(community_function, n_lines = 3):
120 |     """
121 |     Raynaud2019a
122 |     Sub-divide wells of plate into n_lines' 
123 |     Each round the highest function member of the line is used to colonize the  wells of that lineage
124 |     """
125 |     n_wells = len(community_function)
126 |     n_rep  = int(np.ceil(n_wells/n_lines)) #Number of replicates per line
127 |     transfer_matrix = np.zeros((n_wells,n_wells))
128 |     for i in range(n_lines):
129 | 	    sorted_community_function = np.sort(community_function[i*n_rep:(i*n_rep)+n_rep])
130 | 	    cut_off = np.max(sorted_community_function)
131 | 	    winner_index = np.where(community_function[i*n_rep:(i*n_rep)+n_rep] == cut_off)[0]
132 | 	    transfer_matrix[i*n_rep:(i*n_rep)+n_rep, winner_index+i*n_rep] = 1
133 |     return transfer_matrix
134 | 
135 | 
136 | def Raynaud2019a_control(community_function, n_lines = 3):
137 |     """
138 | 	Same as Raynaud2019a except the lineage member is selected at Random
139 |     """
140 |     n_wells = len(community_function)
141 |     n_rep  = int(np.ceil(n_wells/n_lines)) #Number of replicates per line
142 |     transfer_matrix = np.zeros((n_wells,n_wells))
143 |     for i in range(n_lines):
144 | 	    sorted_community_function = np.sort(community_function[i*n_rep:(i*n_rep)+n_rep])
145 | 	    cut_off = np.max(sorted_community_function)
146 | 	    winner_index = np.random.randint(0,n_rep)
147 | 	    if winner_index+i*n_rep >= n_wells:
148 | 	    	  corrected_n_rep  = n_wells % n_rep
149 | 	    	  winner_index = np.random.randint(0,corrected_n_rep)
150 | 	    transfer_matrix[i*n_rep:(i*n_rep)+n_rep, winner_index+i*n_rep] = 1
151 |     return transfer_matrix
152 | 
153 | 
154 | def Raynaud2019b(community_function, n_lines = 3):
155 |     """
156 |     same as Raynaud2019a except top from each lineage is pooled
157 |     """
158 |     n_wells = len(community_function)
159 |     n_rep  = int(np.ceil(n_wells/n_lines)) #Number of replicates per line
160 |     transfer_matrix = np.zeros((n_wells,n_wells))
161 |     for i in range(n_lines):
162 | 	    sorted_community_function = np.sort(community_function[i*n_rep:(i*n_rep)+n_rep])
163 | 	    cut_off = np.max(sorted_community_function)
164 | 	    winner_index = np.where(community_function[i*n_rep:(i*n_rep)+n_rep] == cut_off)[0]
165 | 	    transfer_matrix[:, winner_index+i*n_rep] = 1
166 |     return transfer_matrix
167 | 
168 | 
169 | def Raynaud2019b_control(community_function, n_lines = 3):
170 |     """
171 | 	Same as Raynaud2019b except the lineage member is selected at Random
172 |     """
173 |     n_wells = len(community_function)
174 |     n_rep  = int(np.ceil(n_wells/n_lines)) #Number of replicates per line
175 |     transfer_matrix = np.zeros((n_wells,n_wells))
176 |     for i in range(n_lines):
177 | 	    sorted_community_function = np.sort(community_function[i*n_rep:(i*n_rep)+n_rep])
178 | 	    cut_off = np.max(sorted_community_function)
179 | 	    winner_index = np.random.randint(0,n_rep)
180 | 	    if winner_index+i*n_rep >= n_wells:
181 | 	    	  corrected_n_rep  = n_wells % n_rep
182 | 	    	  winner_index = np.random.randint(0,corrected_n_rep)
183 | 	    transfer_matrix[:, winner_index+i*n_rep] = 1
184 |     return transfer_matrix
185 | 
186 | 
187 | def select_top(community_function):
188 |     """
189 |     Select the top community 
190 |     """
191 |     # Read number of wells 
192 |     n_wells = len(community_function)
193 |     
194 |     # Winner wells
195 |     winner_index = np.where(community_function >= np.max(community_function))[0][::-1] # Reverse the list so the higher 
196 |     
197 |     # Transfer matrix
198 |     transfer_matrix = np.zeros((n_wells,n_wells))
199 |     t_new = range(n_wells) # New wells
200 |     t_old = list(winner_index) * n_wells # Old wells
201 |         
202 |     # Fill in the transfer matrix
203 |     for i in range(n_wells):
204 |         transfer_matrix[t_new[i], t_old[i]] = 1
205 |   
206 |     return transfer_matrix
207 | 
208 | 
209 | # Other selection algorithms
210 | def select_top_nth(community_function, n):
211 |     """
212 |     Select the top nth single community. Designed for perturbation effect
213 |     """
214 |     n_wells = len(community_function)
215 |     sorted_community_function = np.sort(community_function)[::-1]
216 |     cut_off = sorted_community_function[n-1] # The top nth
217 |     winner_index = np.where(community_function == cut_off)[0]
218 | 
219 |     # Transfer matrix
220 |     transfer_matrix = np.zeros((n_wells,n_wells))
221 |     t_new = range(n_wells) # New wells
222 |     t_old = list(winner_index) * n_wells # Old wells
223 |         
224 |     # Fill in the transfer matrix
225 |     for i in range(n_wells):
226 |         transfer_matrix[t_new[i], t_old[i]] = 1
227 |   
228 |     return transfer_matrix
229 | 
230 | 
231 | def select_top_dog(community_function):
232 |   """
233 |   100 communities. Reproduce the best one to 60-70 newborns, and reproduce the second best to 30-40 newborns. 
234 |   """
235 |   n_wells = len(community_function)
236 |   sorted_community_function = np.sort(community_function)
237 |   cut_off = sorted_community_function[int(np.round(len(community_function)*0.5)) - 1]
238 |   winner_index = np.where(community_function >= cut_off)[0][::-1] 
239 |   
240 |   # Transfer matrix
241 |   transfer_matrix = np.zeros((n_wells,n_wells))
242 |   t_new = range(n_wells) # New wells
243 |   # The best performed community
244 |   t_old = [list(winner_index)[0]] * int(0.6 * n_wells) + [list(winner_index)[1]] * int(0.5 * n_wells) # Old wells
245 |   
246 |   # Fill in the transfer matrix
247 |   for i in range(n_wells):
248 |       transfer_matrix[t_new[i], t_old[i]] = 1
249 |   
250 |   return transfer_matrix
251 | 
252 | 
253 | def Williams2007a(community_function):
254 |     """
255 |     Williams2007a
256 |     Select the top community and impose an bottleneck
257 |     """
258 |     n_wells = len(community_function)
259 |     sorted_community_function = np.sort(community_function)
260 |     winner_index = np.where(community_function == np.max(community_function))[0][::-1] 
261 |     transfer_matrix = np.zeros((n_wells,n_wells))
262 |     t_new = range(n_wells) # New wells
263 |     t_old = list(winner_index) * n_wells # Old wells
264 |     for i in range(n_wells):
265 |         transfer_matrix[t_new[i], t_old[i]] = 10**(-4) # An additional strong bottleneck
266 |     return transfer_matrix
267 | 
268 | 
269 | def Williams2007b(community_function, p = 0.2):
270 |     """
271 |     Williams2007b
272 |     Select and pool the top 20% community and impose an bottleneck
273 |     """
274 |     n_wells = len(community_function)
275 |     sorted_community_function = np.sort(community_function)
276 |     cut_off = sorted_community_function[int(np.round(len(community_function)*(1-p))) - 1]
277 |     winner_index = np.where(community_function > cut_off)[0][::-1]
278 |     transfer_matrix = np.zeros((n_wells,n_wells))
279 |     transfer_matrix[:, winner_index] = 10**(-4) # An additional strong bottleneck
280 |     return transfer_matrix
281 | 
282 | 
283 | def pair_top(community_function):
284 |     """
285 |     Pair the top communities. Each pairwise combination has roughly two replicates
286 |     """
287 |     import itertools
288 | 
289 |     # Read number of wells
290 |     n_wells = len(community_function)
291 | 
292 |     # Compute the cutoff based on the number of wells
293 |     cut_off_percent = (np.sqrt(n_wells))/n_wells
294 | 
295 |     # Sort the community function in this transfer
296 |     sorted_community_function = np.sort(community_function)
297 | 
298 |     # Community function value cutoff for selecting communities    
299 |     cut_off = sorted_community_function[int(np.round(len(community_function)*(1-cut_off_percent)))]
300 | 
301 |     # Winner wells
302 |     winner_index = np.where(community_function >= cut_off)[0] # Reverse the list so the higher 
303 |     pairs_list = list(itertools.combinations(winner_index, 2)) # Pair list based on the winer wells
304 | 
305 |     # Transfer matrix
306 |     transfer_matrix = np.zeros((n_wells,n_wells))
307 |     t_new = range(n_wells) # New wells
308 |     t_old = list(winner_index) + pairs_list * (int(np.round(1/cut_off_percent)) + 1) # Old wells
309 |  
310 |     # Fill in the transfer matrix
311 |     for i in range(n_wells):
312 |         transfer_matrix[t_new[i], t_old[i]] = 1
313 | 
314 |     return transfer_matrix
315 | 
316 | 
317 | 
318 | 


--------------------------------------------------------------------------------
/community_selection/usertools.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Mar 09 2020
  5 | @author: changyuchang
  6 | """
  7 | import numpy as np
  8 | import pandas as pd
  9 | from community_selection.A_experiment_functions import *
 10 | from community_selection.B_community_phenotypes import *
 11 | from community_selection.C_selection_algorithms import *
 12 | from community_selection.D_perturbation_algorithms import *
 13 | from community_selection.E_protocols import *
 14 | 
 15 | 
 16 | def plot_community_function(function_df):
 17 |     """Plot community function"""
 18 |     function_df.plot.scatter(x = "Transfer", y = "CommunityPhenotype")
 19 | 
 20 | def plot_transfer_matrix(transfer_matrix):
 21 |     """Plot transfer matrix"""
 22 |     import seaborn as sns
 23 |     fig,ax=plt.subplots()
 24 |     sns.heatmap(transfer_matrix,ax=ax)
 25 |     ax.set_xlabel('Old well',fontsize=14)
 26 |     ax.set_ylabel('New well',fontsize=14)
 27 |     ax.set_title(r'Transfer Matrix',fontsize=14)
 28 |     plt.show()
 29 | 
 30 | def make_assumptions(input_file, row):
 31 |     '''  Generate the assumptions dictionary from input file and row of input file '''
 32 |     #Load row dat and default assumptions
 33 |     row_dat = pd.read_csv(input_file, keep_default_na=False).iloc[row]
 34 |     assumptions = a_default.copy()
 35 |     original_params = MakeParams(assumptions.copy())
 36 |     #Update assumptions based on row_dat
 37 |     for k in row_dat.keys():
 38 |         #if NA default to original value 
 39 |         if k in assumptions.keys() and row_dat[k] != 'NA' :
 40 |             assumptions.update({k :row_dat[k]})
 41 |         elif k in assumptions.keys() and row_dat[k] == 'NA' :
 42 |             continue
 43 |         #some params for who we wan't to resort to there default value are not stored in assumptions but are generated by MakeParams
 44 |         elif k not in assumptions.keys() and k in original_params.keys() and row_dat[k] != 'NA':
 45 |             assumptions.update({k :row_dat[k]})
 46 |         elif k not in assumptions.keys() and k  in original_params.keys() and row_dat[k] == 'NA':
 47 |             assumptions.update({k:original_params[k]})
 48 |         else:
 49 |             if row_dat[k] != 'NA':
 50 |                 assumptions.update({k :row_dat[k]})
 51 |             else:
 52 |                 assumptions.update({k :np.nan})
 53 |     
 54 |     #These two assumptions are generated from combinations of other paramaters
 55 |     assumptions.update({'SA' :row_dat['sn']*np.ones(row_dat['sf'])  }) #Number of consumers in each Specialist family
 56 |     assumptions.update({'MA' :row_dat['rn']*np.ones(row_dat['rf'])  }) #Number of resources in each class
 57 |     
 58 |     #MakeParams does not work with numpy type for R0_food so convert to base python if not using default
 59 |     if not isinstance(assumptions['R0_food'],int):
 60 |         assumptions['R0_food'] = assumptions['R0_food'].item()
 61 |     
 62 |     #When running monoculture (every isolate in monoculture)
 63 |     if assumptions['monoculture'] :
 64 |         assumptions.update({"n_wells": int(np.sum(assumptions["SA"])  + assumptions["Sgen"])})
 65 |         
 66 |     #If knock_in isolate is True and no threshold is set threshold size defaults to 0
 67 |     if assumptions['bottleneck']:
 68 |         if  pd.isnull(assumptions['bottleneck_size']):
 69 |             assumptions['bottleneck_size'] =assumptions['dilution']
 70 |         else:
 71 |             assumptions['bottleneck_size'] = float(assumptions['bottleneck_size'])
 72 | 
 73 |     #If knock_in isolate is True and no threshold is set threshold size defaults to 0
 74 |     if assumptions['knock_in']:
 75 |         if pd.isnull(assumptions['knock_in_threshold']) :
 76 |             assumptions['knock_in_threshold'] =0
 77 |         else:
 78 |             assumptions['knock_in_threshold'] = float(assumptions['knock_in_threshold'])
 79 |         
 80 |     #If coalescence is True and no frac coalescence is set defaults to 50-50
 81 |     if assumptions['coalescence']:
 82 |         if pd.isnull(assumptions['frac_coalescence']):
 83 |             assumptions['frac_coalescence'] =0.5    
 84 |         else:
 85 |             assumptions['frac_coalescence'] = float(assumptions['frac_coalescence'])
 86 |     
 87 |     #If migration is True and no n_migration is set defaults to n_inoc
 88 |     if assumptions['migration']: 
 89 |         if pd.isnull(assumptions['n_migration']):
 90 |             assumptions['n_migration'] =assumptions['n_inoc']
 91 |         else:
 92 |             assumptions['n_migration'] = int(assumptions['n_migration'])
 93 |             
 94 |         if pd.isnull(assumptions['s_migration']):
 95 |             pass
 96 |         else:
 97 |             assumptions['s_migration'] = int(assumptions['s_migration'])
 98 |             
 99 |     #If coalescence is True and no frac coalescence is set defaults to 50-50
100 |     if assumptions['resource_shift']:
101 |         if pd.isnull(assumptions['r_percent']):
102 |             assumptions['r_percent'] =0.1
103 |         else:
104 |             assumptions['r_percent'] = float(assumptions['r_percent'])
105 |             
106 |     # Overwrite plate
107 |     if isinstance(assumptions["overwrite_plate"], str) and assumptions["overwrite_plate"] != "": 
108 |         print("\nUpdating the n_wells with overwrite_plate")
109 |         df = pd.read_csv(assumptions["overwrite_plate"])
110 |         df = df[df.Transfer == np.max(df.Transfer)]
111 |         if len(df["Well"].unique()) != 1:
112 |             assumptions["n_wells"] = len(df["Well"].unique())
113 |     
114 |     if np.isnan(assumptions["ruggedness"]):
115 |         assumptions["ruggedness"] = 0
116 |     
117 |     # f6_target_resource
118 |     if "target_resource" in assumptions["selected_function"]:
119 |         # Default target resource is the last resource
120 |         if pd.isnull(assumptions['target_resource']):
121 |             assumptions["target_resource"] = int(assumptions["rn"]) * int(assumptions["rf"]) - 1
122 |         else:
123 |             assumptions["target_resource"] = int(assumptions["target_resource"])
124 |     
125 |     return assumptions
126 | 
127 | def prepare_experiment(assumptions):
128 |     """
129 |     Prepare the experimental setup for this simulation
130 |     
131 |     assumptions = dictionary of metaparameters
132 |     
133 |     Return: params, params_simulation, params_algorithm,plate
134 |     """
135 |     print("\nGenerate species parameters")
136 |     np.random.seed(assumptions['seed']) 
137 |     params = MakeParams(assumptions) 
138 |     if assumptions["selected_function"] == "f5_invader_suppression":
139 |         print("\nDraw invader feature")
140 |         params = create_invader(params, assumptions)
141 |     
142 |     print("\nDraw per-capita function and cost")
143 |     f1_species_smooth, f1_species_rugged, f2_species_smooth, f2_species_rugged = draw_species_function(assumptions)
144 |     params.update({"f1_species_smooth": f1_species_smooth, "f1_species_rugged": f1_species_rugged, "f2_species_smooth": f2_species_smooth, "f2_species_rugged": f2_species_rugged})
145 |     gi = draw_species_cost(f1_species_smooth, assumptions)
146 |     params.update({"g": gi})
147 |     
148 |     print("\nConstruct plate")
149 |     np.random.seed(assumptions['seed']) 
150 |     plate = make_plate(assumptions,params)
151 |         
152 |     print("\nAdd community function to plate")
153 |     plate = add_community_function(plate, assumptions, params)
154 |     
155 |     if not pd.isnull(assumptions["overwrite_plate"]) :
156 |         print("\nUpdating the initial plate composition by overwrite_plate")
157 |         plate = overwrite_plate(plate, assumptions)
158 |         
159 |     print("\nPrepare Protocol")
160 |     #Extract Protocol from protocol database
161 |     algorithms = make_algorithms(assumptions)
162 |     params_algorithm = algorithms[algorithms['algorithm_name'] == assumptions['protocol']]
163 |     
164 |     #Params_simulation by default  contains all assumptions not stored in params.
165 |     params_simulation  =  dict((k, assumptions[k]) for k in assumptions.keys() if k not in params.keys())
166 |     
167 |     return params, params_simulation , params_algorithm, plate
168 | 
169 | def simulate_community(params, params_simulation, params_algorithm, plate):
170 |     """
171 |     Simulate community dynamics by given experimental regimes
172 |     
173 |     params = parameter passed from community-simulator
174 |     params_simulation = dictionary of parameters for running experiment
175 |     params_algorithm = dictionary of algorithms that determine the selection regime, migration regime, and community pheotypes
176 |     plate = Plate object specified by community-simulator
177 |     
178 |     Return:
179 |     community_composition = concatenated, melted panda dataframe of community and resource composition in each transfer
180 |     community_function = melted panda dataframe of community function
181 |     """
182 |     print("\nStarting " + params_simulation["exp_id"])
183 |     print(params_algorithm)
184 |     
185 |     # Test the community function
186 |     globals()[params_algorithm["community_phenotype"][0]](plate, params_simulation = params_simulation)
187 |     try:
188 |         community_function = globals()[params_algorithm["community_phenotype"][0]](plate, params_simulation = params_simulation) # Community phenotype
189 |     except:
190 |         print('\nCommunity phenotype test failed')
191 |         raise SystemExit
192 | 
193 |     # Save the inocula composition
194 |     if params_simulation['save_composition']:
195 |         plate_data_list = list() # Plate composition
196 |         plate_data = reshape_plate_data(plate, params_simulation,transfer_loop_index=0)  # Initial state
197 |         plate_data_list.append(plate_data)
198 |         composition_filename = params_simulation['output_dir'] + params_simulation['exp_id'] + '_composition.txt'   
199 |         
200 |     # Save the initial community function + richness + biomass
201 |     if params_simulation['save_function']:
202 |         community_function_list = list() # Plate composition
203 |         richness = np.sum(plate.N >= 1/params_simulation["scale"], axis = 0) # Richness
204 |         biomass = list(np.sum(plate.N, axis = 0)) # Biomass
205 |         function_data = reshape_function_data(params_simulation,community_function, richness, biomass, transfer_loop_index =0)
206 |         community_function_list.append(function_data)
207 |         function_filename = params_simulation['output_dir'] + params_simulation['exp_id'] + '_function.txt'   
208 | 
209 |     print("\nStart propogation")
210 |     # Run simulation
211 |     for i in range(0, params_simulation["n_transfer"]):
212 |         # Algorithms used in this transfer
213 |         phenotype_algorithm = params_algorithm["community_phenotype"][i]
214 |         selection_algorithm = params_algorithm["selection_algorithm"][i]
215 | 
216 |         # Propagation
217 |         plate.Propagate(params_simulation["n_propagation"])
218 | 
219 |         # Measure Community phenotype
220 |         community_function = globals()[phenotype_algorithm](plate, params_simulation = params_simulation) # Community phenotype
221 |         
222 |         # Append the composition to a list
223 |         if params_simulation['save_composition'] and ((i+1) % params_simulation['composition_lograte'] == 0):
224 |             plate_data = reshape_plate_data(plate, params_simulation, transfer_loop_index=i+1)  # Initial state
225 |             plate_data_list.append(plate_data)
226 | 
227 |         if params_simulation['save_function'] and ((i+1) % params_simulation['function_lograte'] == 0):
228 |             richness = np.sum(plate.N >= 1/params_simulation["scale"], axis = 0) # Richness
229 |             biomass = list(np.sum(plate.N, axis = 0)) # Biomass
230 |             function_data = reshape_function_data(params_simulation, community_function, richness, biomass, transfer_loop_index =i+1)
231 |             community_function_list.append(function_data)
232 | 
233 |         #Store prior state before passaging (For coalescence)
234 |         setattr(plate, "prior_N", plate.N)
235 |         setattr(plate, "prior_R", plate.R)
236 |         setattr(plate, "prior_R0", plate.R0)
237 | 
238 |         # Passage and transfer matrix
239 |         transfer_matrix = globals()[selection_algorithm](community_function)
240 |         if params_simulation['monoculture']:
241 |             plate = passage_monoculture(plate, params_simulation["dilution"])
242 |         else:
243 |             plate.Passage(transfer_matrix * params_simulation["dilution"])
244 |         
245 |         # Perturbation
246 |         if params_simulation['directed_selection']:
247 |             if selection_algorithm == 'select_top': # In principle it can take select_top_x% but leave it as select_top for now
248 |                 plate = perturb(plate, params_simulation, keep = np.where(community_function >= np.max(community_function))[0][0])
249 |             # if selection_algorithm != 'select_top' and (params_algorithm.iloc[i]["algorithm_name"] != 'simple_screening'):
250 |             #   plate = perturb(plate, params_simulation, keep = None)
251 |             elif selection_algorithm == "no_selection": 
252 |                 pass
253 |         
254 |         print("Transfer " + str(i+1))
255 | 
256 |     if params_simulation['save_composition']:
257 |         pd.concat(plate_data_list).to_csv(composition_filename, index = False)
258 |     if params_simulation['save_function']:
259 |         pd.concat(community_function_list).to_csv(function_filename, index = False)
260 |     print("\n" + params_simulation["exp_id"] + " finished")
261 | 
262 | def save_plate(assumptions, plate):
263 |     """ 
264 |     Save the initial plate in a pickle file. Like saving a frozen stock at -80C
265 |     """
266 |     if assumptions['save_plate']:
267 |         import dill as pickle
268 |         with open(assumptions['output_dir'] + assumptions['exp_id'] + ".p", "wb") as f:
269 |             pickle.dump(plate, f)
270 | 
271 | def extract_species_function(assumptions):
272 |     """
273 |     Extract the per-capita species function from the community data
274 |     """
275 |     np.random.seed(assumptions['seed']) 
276 |     params = MakeParams(assumptions) 
277 |     f1_species_smooth, f1_species_rugged, f2_species_smooth, f2_species_rugged = draw_species_function(assumptions)
278 |     S_tot = int(assumptions["sn"]) * int(assumptions["sf"]) + int(assumptions["Sgen"])
279 |     
280 |     if "additive" in assumptions["selected_function"]:
281 |         if assumptions["selected_function"] == "f1_additive":
282 |             per_capita_function = f1_species_smooth
283 |             species_function = pd.DataFrame({"SelectedFunction": assumptions["selected_function"], "Seed": np.repeat(assumptions['seed'], S_tot), "ID": range(1, S_tot+1), "PerCapitaFunction": per_capita_function})
284 |             if "cost" in assumptions["exp_id"]: # Should read a flag instead of name
285 |                 gi = draw_species_cost(f1_species_smooth, assumptions)
286 |                 params.update({"g": gi})
287 |                 species_function = pd.DataFrame({"SelectedFunction": assumptions["selected_function"], "Seed": np.repeat(assumptions['seed'], S_tot), "ID": range(1, S_tot+1), "PerCapitaFunction": per_capita_function, "g": gi})
288 |         elif assumptions["selected_function"] == "f1a_additive":
289 |             per_capita_function = f1_species_rugged
290 |             species_function = pd.DataFrame({"SelectedFunction": assumptions["selected_function"], "Seed": np.repeat(assumptions['seed'], S_tot), "ID": range(1, S_tot+1), "PerCapitaFunction": per_capita_function})
291 | 
292 |     
293 |     elif "interaction" in assumptions["selected_function"]:
294 |         if assumptions["selected_function"] == "f2_interaction":
295 |             per_interaction_function = f2_species_smooth
296 |         elif assumptions["selected_function"] == "f2a_interaction":
297 |             per_interaction_function = f2_species_rugged
298 |             
299 |         df_interaction_function = pd.DataFrame(per_interaction_function)
300 |         df_interaction_function.columns = range(1, S_tot+1)
301 |         df_interaction_function = df_interaction_function.assign(ID_row=range(1,S_tot+1)).melt(id_vars="ID_row", var_name = "ID_col", value_name = "PerCapitaFunction")
302 |         df_interaction_function = df_interaction_function.assign(SelectedFunction = assumptions["selected_function"], Seed = assumptions['seed'])
303 |         species_function = df_interaction_function[["SelectedFunction", "Seed", "ID_row", "ID_col", "PerCapitaFunction"]]
304 |     
305 |     return(species_function)
306 | 
307 | 
308 | 


--------------------------------------------------------------------------------
/docs/source/content/mapping_file.rst:
--------------------------------------------------------------------------------
  1 | Input Mapping File
  2 | ==================
  3 | 
  4 | The input mapping ``.csv`` lists 86 essential parameters in columns and (indepdendent) selection experiments in rows. Here is an example of mapping file with two independent experiments.
  5 | 
  6 | .. csv-table::
  7 |    :file: ../data/input_test.csv
  8 | 
  9 | 
 10 | 
 11 | The mapping file has five categories of parameters:
 12 | 
 13 | .. contents::
 14 |     :local:
 15 | 
 16 | File operation
 17 | ---------------
 18 | 
 19 | .. confval:: selected_function
 20 | 
 21 |     :type: string
 22 |     :default: ``f1_additive``
 23 | 
 24 |     Function under selection. Available options are ``f1_additive`` and ``f2_interaction``, ``f2a_interaction``, ``f3_additive_binary``, ``f4_interaction_binary``, ``f5_invader_growth``, and ``resource_distance_community_function``.
 25 | 
 26 | 
 27 | .. confval:: protocol
 28 | 
 29 |     :type: string
 30 |     :default: ``simple_screening``
 31 | 
 32 |     Protocol to implement. Only the protocols listed in ``E_protocols.py`` can be used.
 33 | 
 34 | 
 35 | .. confval:: seed
 36 | 
 37 |     :type: integer
 38 |     :default: ``1``
 39 | 
 40 |     Random seed to initiate pseudorandom number generator.
 41 | 
 42 | 
 43 | .. confval:: exp_id
 44 | 
 45 |     :type: string
 46 |     :default: ``f1_additive-simple_screening-1``
 47 | 
 48 |     Experiment-specific ID, which will also determine the naming convention of output files. For example, the community function is saved in ``f1_additive-simple_screening-1_function.txt`` if ``save_function=True``, whereas community compostition is saved in ``f1_additive-simple_screening-1_compostition.txt`` if ``save_composition=True``.
 49 | 
 50 | 
 51 | .. confval:: overwrite_plate
 52 | 
 53 |     :type: string
 54 |     :default: ``NA``
 55 | 
 56 |     To replace the initial plate composition with an arbitrary plate, specify a text file of the community composition that containes four columns: Type, ID, Well, and Abundance. If an output text file (e.g., ``f1_additive-simple_screening-1_compostition.txt``) is specified and it contains composition for more than two transfers, by default only the metacommunity compostition of the latter tranfer is read.
 57 | 
 58 | 
 59 | .. confval:: passage_overwrite_plate
 60 | 
 61 |     :type: boolean
 62 |     :default: ``False``
 63 | 
 64 |     If overwrite_plate != NA, set TRUE if the community from overwrite_plate is at equilibrium and need an addititonal transfer.
 65 | 
 66 | 
 67 | .. confval:: output_dir
 68 | 
 69 |     :type: string
 70 |     :default: ``data/``
 71 | 
 72 |     Directory where the output files will be stored.
 73 | 
 74 | 
 75 | .. confval:: save_function
 76 | 
 77 |     :type: boolean
 78 |     :default: ``True``
 79 | 
 80 |     Set True to save function data.
 81 | 
 82 | 
 83 | .. confval:: save_composition
 84 | 
 85 |     :type: boolean
 86 |     :default: ``True``
 87 | 
 88 |     Set ``True`` to save composition data.
 89 | 
 90 | 
 91 | .. confval:: save_plate
 92 | 
 93 |     :type: boolean
 94 |     :default: ``False``
 95 | 
 96 |     Set ``True`` to save initial Metacommunity in a ``pickle`` file.
 97 | 
 98 | 
 99 | .. confval:: function_lograte
100 | 
101 |     :type: integer
102 |     :default: ``1``
103 | 
104 |     How often you save the function in transfers. Default is saving functional data from every transfer.
105 | 
106 | .. confval:: composition_lograte
107 | 
108 |     :type: integer
109 |     :default: ``20``
110 | 
111 |     How often do you save the composition in transfers.
112 | 
113 | |
114 | 
115 | Protocol-specific parameters
116 | ----------------------------
117 | 
118 | .. confval:: scale
119 | 
120 |     :type: integer
121 |     :default: ``1000000``
122 | 
123 |     Number of cells equivalent to :math:`N_i = 1`.
124 | 
125 | 
126 | .. confval:: n_inoc
127 | 
128 |     :type: integer
129 |     :default: ``1000000``
130 | 
131 |     Number of cells in the initial inoculum.
132 | 
133 | 
134 | .. confval:: rich_medium
135 | 
136 |     :type: boolean
137 |     :default: ``True``
138 | 
139 |     Set ``True`` to generate a rich medium sampled from an uniform distribution. Set ``False`` to generate a minimal medium with only the first resource is supplied.
140 | 
141 | 
142 | .. confval:: monoculture
143 | 
144 |     :type: boolean
145 |     :default: ``False``
146 | 
147 |     Set ``True`` to run simple screening with all monocultures from the regional species pool. The number of wells is equal to the number of species in the regional pool.
148 | 
149 | 
150 | .. confval:: dilution
151 | 
152 |     :type: float
153 |     :default: ``0.001``
154 | 
155 |     Dilution factor in the batch culture.
156 | 
157 | 
158 | .. confval:: n_wells
159 | 
160 |     :type: integer
161 |     :default: ``96``
162 | 
163 |     Number of wells (communities) in a plate (metacommunity).
164 | 
165 | 
166 | .. confval:: n_propagation
167 | 
168 |     :type: float
169 |     :default: ``1``
170 | 
171 |     Incubation time of a transfer.
172 | 
173 | 
174 | .. confval:: n_transfer
175 | 
176 |     :type: integer
177 |     :default: ``40``
178 | 
179 |     Number of total transfers (generations) to be run in the protocol.
180 | 
181 | 
182 | .. confval:: n_transfer_selection
183 | 
184 |     :type: interger
185 |     :default: ``20``
186 | 
187 |     Number of transfers (generations) that consecutively executes selection matrices from the start of an experiment. The number of stabilizaiton transfer equals to the difference between ``n_transfer_total`` and ``n_transfer_selection``.
188 | 
189 | 
190 | .. confval:: metacommunity_sampling
191 | 
192 |     :type: string
193 |     :default: ``Power``
194 | 
195 |     Sampling method for initial metacommunity. Available options are ``Power``, ``Lognormal``, ``Default``.
196 | 
197 | 
198 | .. confval:: power_alpha
199 | 
200 |     :type: float
201 |     :default: ``0.01``
202 | 
203 | 
204 | .. confval:: lognormal_mean
205 | 
206 |     :type: float
207 |     :default: ``8``
208 | 
209 | 
210 | .. confval:: lognormal_sd
211 | 
212 |     :type: float
213 |     :default: ``8``
214 | 
215 | 
216 | 
217 | |
218 | 
219 | Species contribution to function
220 | --------------------------------
221 | 
222 | .. confval:: phi_distribution
223 | 
224 |     :type: string
225 |     :default: ``Norm``
226 | 
227 |     {"Norm", "Uniform"}
228 | 
229 | 
230 | .. confval:: phi_mean
231 | 
232 |     :type: float
233 |     :default: ``0``
234 | 
235 |     Mean of normal distribution when ``phi_distribution`` is set to Norm
236 | 
237 | 
238 | .. confval:: phi_sd
239 | 
240 |     :type: float
241 |     :default: ``1``
242 | 
243 |     Standard deviation of normal distribution when ``phi_distribution`` is set to Norm
244 | 
245 | 
246 | .. confval:: phi_lower
247 | 
248 |     :type: float
249 |     :default: ``0``
250 | 
251 |     Lower boundary of normal distribution when ``phi_distribution`` is set to Unif
252 | 
253 | 
254 | .. confval:: phi_upper
255 | 
256 |     :type: float
257 |     :default: ``1``
258 | 
259 |     Upper boundary of normal distribution when ``phi_distribution`` is set to Unif
260 | 
261 | 
262 | .. confval:: ruggedness
263 | 
264 |     :type: fload
265 |     :default: ``0.8``
266 | 
267 |     (1-``ruggedness``) of the additive and non-additive per-capita functino will contribute to community function, whereas the rest will be set to 0.
268 | 
269 | 
270 | .. confval:: binary_threshold
271 | 
272 |     :type: float
273 |     :default: ``1``
274 | 
275 |     Threshold for binary functions.
276 | 
277 | 
278 | .. confval:: g0
279 | 
280 |     :type: float
281 |     :default: ``1``
282 | 
283 |     The baseline conversion factor of biomass per energy.
284 | 
285 | 
286 | .. confval:: cost_distribution
287 | 
288 |     :type: string
289 |     :default: ``Norm``
290 | 
291 |     {"Gamma", "Unif"}
292 | 
293 | .. confval:: cost_mean
294 | 
295 |     :type: float
296 |     :default: ``0``
297 | 
298 |     Mean fraction of cost feeded normal distribution. Suggested maximum to 0.05.
299 | 
300 | 
301 | .. confval:: cost_sd
302 | 
303 |     :type: float
304 |     :default: ``0``
305 | 
306 |     Standard deviation of fraction of cost feeded into a gamma distribution. ``cost_sd = 0`` if ``cost_mean = 0``, ``cost_sd = 0.01`` if ``cost_mean > 0``.
307 | 
308 | 
309 | .. confval:: cost_lower
310 | 
311 |     :type: float
312 |     :default: ``0``
313 | 
314 |     Lower bound for cost if ``cost_distribution`` is set to Uniform
315 | 
316 | 
317 | .. confval:: cost_upper
318 | 
319 |     :type: float
320 |     :default: ``1``
321 | 
322 |     Upper bound for cost if ``cost_distribution`` is set to Uniform
323 | 
324 | 
325 | .. confval:: invader_index
326 | 
327 |     :type: integer
328 |     :default: ``2``
329 | 
330 |     Index of an invader. Only one index is choosen. Currently a invasive community is not allowed.
331 | 
332 | 
333 | .. confval:: invader_sampling
334 | 
335 |     :type: string
336 |     :default: ``Gamma``
337 | 
338 |     Sampling algorithm to generate the invader uptake rate vector. Options are ``Gaussian``, ``Binary``, ``Gamma``, ``Binary_Gamma``.
339 | 
340 | 
341 | .. confval:: invader_strength
342 | 
343 |     :type: positive float
344 |     :default: ``10``
345 | 
346 |     Mean utiliration vector for the invader versus the average of the species in the pool
347 | 
348 | .. confval:: target_resource
349 | 
350 |     :type: integer
351 |     :default: ``NA``
352 | 
353 |     Target resource production when ``selected_function`` is set to ``f6_target_resourece``
354 | 
355 | 
356 | |
357 | 
358 | Directed evolution
359 | ------------------
360 | 
361 | .. confval:: directed_selection
362 | 
363 |     :type: boolean
364 |     :default: ``False``
365 | 
366 |     Set ``True`` to run directed selection, one of flags below in directed evolution has to be also set ``True``.
367 | 
368 | 
369 | .. confval:: knock_out
370 | 
371 |     :type: boolean
372 |     :default: ``False``
373 | 
374 |     Set ``True`` to perform knock out pertubation.
375 | 
376 | 
377 | .. confval:: knock_in
378 | 
379 |     :type: boolean
380 |     :default: ``F``
381 | 
382 |     Set ``True`` performs knock in pertubation.
383 | 
384 | 
385 | .. confval:: knock_in_threshold
386 | 
387 |     :type: float
388 |     :default: ``0.95``
389 | 
390 |     If ``knock_in = True``, use the default ``knock_in_threshold=0.95``, which means that top 5% species in the pool is prepared to be knocked in a community, whereas the rest 95% of are not used.
391 | 
392 | 
393 | .. confval:: bottleneck
394 | 
395 |     :type: boolean
396 |     :default: ``False``
397 | 
398 |     Set ``True`` to perform bottleneck pertubations.
399 | 
400 | 
401 | .. confval:: bottleneck_size
402 | 
403 |     :type: float
404 |     :default: ``0.00001``
405 | 
406 |     If ``bottleneck=T``, perform an bottleneck shock to the specified communities by a dilution factor default to ``bottleneck_size=0.00001``. This bottleneck dilutoon is in addition to the regular dilution factor in the batch culture ``dilution=0.001``.
407 | 
408 | 
409 | .. confval:: migration
410 | 
411 |     :type: boolean
412 |     :default: ``False``
413 | 
414 |     Set ``True`` to perform migration pertubations.
415 | 
416 | 
417 | .. confval:: n_migration
418 | 
419 |     :type: integer
420 |     :default: ``1000000``
421 | 
422 |     Number of cells in the migrant community.
423 | 
424 | 
425 | .. confval:: s_migration
426 | 
427 |     :type: integer
428 |     :default: ``NA``
429 | 
430 |     Number of species in the migrant community. If ``NA`` (as default), the migrant community is sampled from a regional pool where the species abundance follows power-law distribution. If set into an integer, ``n_migration`` cells will be equally allocated to ``s_migrations`` species from the pool to build the migrant community.
431 | 
432 | 
433 | .. confval:: coalescence
434 | 
435 |     :type: boolean
436 |     :default: ``False``
437 | 
438 |     Set ``True`` to perform coalescence pertubation.
439 | 
440 | 
441 | .. confval:: f_coalescence
442 | 
443 |     :type: float
444 |     :default: ``0.5``
445 | 
446 |     Between 0 and 1. Fraction of migrant community during coalescence. The fraction of a perturbed community is ``1-f_coalescence``.
447 | 
448 | 
449 | .. confval:: resource_shift
450 | 
451 |     :type: boolean
452 |     :default: ``False``
453 | 
454 |     Set ``True`` performs resource pertubations.
455 | 
456 | 
457 | .. confval:: r_type
458 | 
459 |     :type: string
460 |     :default: ``add``
461 | 
462 |     Type of resource pertubation. Available options are ``rescale_add``, ``rescale_remove``, ``add``, ``remove``, ``old``. A fraction ``r_percent`` of resource A is removed, and that amount of resource is added to another resource B.
463 | 
464 | 
465 | .. confval:: r_percent
466 | 
467 |     :type: float
468 |     :default: ``1``
469 | 
470 |     Fraction of specified resource that is removed. ``r_percent=1`` means all resource A is removed.
471 | 
472 | |
473 | 
474 | Community-simulator parameters
475 | -------------------------------
476 | 
477 | The parameters in this section are inherited and some with differnt values from community-simulator.
478 | 
479 | .. confval:: sampling
480 | 
481 |     :type: string
482 |     :default: ``Binary_Gamma``
483 | 
484 |     Specify choice of sampling algorithm to generate the consumer uptake rate vector. Options are ``Gaussian``, ``Binary``, ``Gamma``, ``Binary_Gamma``.
485 | 
486 | 
487 | .. confval:: sn
488 | 
489 |     :type: integer
490 |     :default: ``2100``
491 | 
492 |     Number of microbial species in the global pool.
493 | 
494 | 
495 | .. confval:: sf
496 | 
497 |     :type: integer
498 |     :default: ``1``
499 | 
500 |     Number of specialist family.
501 | 
502 | 
503 | .. confval:: s_gen
504 | 
505 |     :type: integer
506 |     :default: ``0``
507 | 
508 |     Number/Richness of generalist taxa.
509 | 
510 | 
511 | .. confval:: rn
512 | 
513 |     :type: integer
514 |     :default: ``90``
515 | 
516 |     Number of resource types.
517 | 
518 | 
519 | .. confval:: rf
520 | 
521 |     :type: integer
522 |     :default: ``1``
523 | 
524 |     Number of resource classes.
525 | 
526 | 
527 | .. confval:: R0_food
528 | 
529 |     :type: float
530 |     :default: ``1000``
531 | 
532 |     Total resource abundance.
533 | 
534 | 
535 | .. confval:: food
536 | 
537 |     :type: float
538 |     :default: ``1000``
539 | 
540 |     Index of food source being supplied in the minimal medium. Only works when ``rich_medium=False``.
541 | 
542 | 
543 | .. confval:: supply
544 | 
545 |     :type: string
546 |     :default: ``off``
547 | 
548 |     Choice of intrinsic resoruce dynamics. Set ``off`` for batch culture where resource is not renewing within a transfer.
549 | 
550 | 
551 | .. confval:: muc
552 | 
553 |     :type: float
554 |     :default: ``10``
555 | 
556 |     Mean sum over a row of the preference matrix ciα.
557 | 
558 | 
559 | .. confval:: sigc
560 | 
561 |     :type: float
562 |     :default: ``3``
563 | 
564 |     Standard deviation of sum over a row of the preference matrix ciα.
565 | 
566 | 
567 | .. confval:: c0
568 | 
569 |     :type: float
570 |     :default: ``0``
571 | 
572 |     Low consumption level for binary ciα.
573 | 
574 | 
575 | .. confval:: c1
576 | 
577 |     :type: integer
578 |     :default: ``1``:
579 | 
580 |     High consumption level for binary ciα.
581 | 
582 | 
583 | .. confval:: q
584 | 
585 |     :type: float
586 |     :default: ``0``
587 | 
588 |     Fraction of consumption capacity allocated to preferred resource class.
589 | 
590 | 
591 | .. confval:: sparsity
592 | 
593 |     :type: float
594 |     :default: ``0.2``
595 | 
596 |     Sparsity of metabolic matrix.
597 | 
598 | 
599 | .. confval:: fs
600 | 
601 |     :type: float
602 |     :default: ``0.45``
603 | 
604 |     Fraction of secreted byproducts allocated to the same resource class.
605 | 
606 | 
607 | .. confval:: fw
608 | 
609 |     :type: float
610 |     :default: ``0.45``
611 | 
612 |     Fraction of secreted byproducts allocated to waste resource class.
613 | 
614 | 
615 | .. confval:: g
616 | 
617 |     :type: float
618 |     :default: ``1``
619 | 
620 |     Conversion factor from energy uptake to growth rate (1/energy).
621 | 
622 | 
623 | .. confval:: w
624 | 
625 |     :type: float
626 |     :default: ``1``
627 | 
628 |     Energy content of resource α (energy/mass).
629 | 
630 | 
631 | .. confval:: l
632 | 
633 |     :type: float
634 |     :default: ``0``
635 | 
636 |     Leakage fraction.
637 | 
638 | 
639 | .. confval:: m
640 | 
641 |     :type: float
642 |     :default: ``0``
643 | 
644 |     Minimal energy uptake for maintenance of species i (energy/time). Mortality.
645 | 
646 | 
647 | .. confval:: n
648 | 
649 |     :type: integer
650 |     :default: ``2``
651 | 
652 |     Hill coefficient for functional response (unitless).
653 | 
654 | 
655 | .. confval:: response
656 | 
657 |     :type: string
658 |     :default: ``type III``
659 | 
660 |     Functional response of uptaking rates.
661 | 
662 | 
663 | .. confval:: sigma_max
664 | 
665 |     :type: float
666 |     :default: ``1``
667 | 
668 |     Maximum input flux (mass/time) for type III functional response.
669 | 
670 | 
671 | .. confval:: regulation
672 | 
673 |     :type: string
674 |     :default: ``independent``
675 | 
676 |     Metabolic regulation.
677 | 
678 | 
679 | .. confval:: nreg
680 | 
681 |     :type: integer
682 |     :default: ``10``
683 | 
684 |     Hill coefficient that tunes steepness of metabolic regulation.
685 | 
686 | 
687 | .. confval:: tau
688 | 
689 |     :type: float
690 |     :default: ``1``
691 | 
692 |     External resource supply rate when ``supply="external"`` for chemostat setting.
693 | 
694 | 
695 | .. confval:: r
696 | 
697 |     :type: string
698 |     :default: ``independent``
699 | 
700 |     Renewal rate for self renewing resources when ``supply="self-renewing"`` for chemostat setting.
701 | 
702 | 
703 | 
704 | 
705 | 
706 | 
707 | 


--------------------------------------------------------------------------------
/community_selection/A_experiment_functions.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Nov 26 2019
  4 | @author: changyuchang
  5 | """
  6 | import numpy as np
  7 | from community_simulator import *
  8 | from community_simulator.usertools import *
  9 | import community_simulator.usertools
 10 | from community_selection.__init__ import *
 11 | from community_selection.B_community_phenotypes import *
 12 | 
 13 | # Species features
 14 | 
 15 | def new_MakeMatrices(assumptions):
 16 |     """
 17 |     Inherited function from community-simulator package
 18 |     
 19 |     Changes:
 20 |     
 21 |     - Add BINARY_GAMMA SAMPLING
 22 |     """
 23 |     #PREPARE VARIABLES
 24 |     #Force number of species to be an array:
 25 |     if isinstance(assumptions['MA'],numbers.Number):
 26 |         assumptions['MA'] = [assumptions['MA']]
 27 |     if isinstance(assumptions['SA'],numbers.Number):
 28 |         assumptions['SA'] = [assumptions['SA']]
 29 |     #Force numbers of species to be integers:
 30 |     assumptions['MA'] = np.asarray(assumptions['MA'],dtype=int)
 31 |     assumptions['SA'] = np.asarray(assumptions['SA'],dtype=int)
 32 |     assumptions['Sgen'] = int(assumptions['Sgen'])
 33 |     #Default waste type is last type in list:
 34 |     if 'waste_type' not in assumptions.keys():
 35 |         assumptions['waste_type']=len(assumptions['MA'])-1
 36 | 
 37 |     #Extract total numbers of resources, consumers, resource types, and consumer families:
 38 |     M = np.sum(assumptions['MA'])
 39 |     T = len(assumptions['MA'])
 40 |     S = np.sum(assumptions['SA'])+assumptions['Sgen']
 41 |     F = len(assumptions['SA'])
 42 |     M_waste = assumptions['MA'][assumptions['waste_type']]
 43 |     #Construct lists of names of resources, consumers, resource types, and consumer families:
 44 |     resource_names = ['R'+str(k) for k in range(M)]
 45 |     type_names = ['T'+str(k) for k in range(T)]
 46 |     family_names = ['F'+str(k) for k in range(F)]
 47 |     consumer_names = ['S'+str(k) for k in range(S)]
 48 |     waste_name = type_names[assumptions['waste_type']]
 49 |     resource_index = [[type_names[m] for m in range(T) for k in range(assumptions['MA'][m])],
 50 |                       resource_names]
 51 |     consumer_index = [[family_names[m] for m in range(F) for k in range(assumptions['SA'][m])]
 52 |                       +['GEN' for k in range(assumptions['Sgen'])],consumer_names]
 53 |     
 54 |     #PERFORM GAUSSIAN SAMPLING
 55 |     if assumptions['sampling'] == 'Gaussian':
 56 |         #Initialize dataframe:
 57 |         c = pd.DataFrame(np.zeros((S,M)),columns=resource_index,index=consumer_index)
 58 |         #Add Gaussian-sampled values, biasing consumption of each family towards its preferred resource:
 59 |         for k in range(F):
 60 |             for j in range(T):
 61 |                 if k==j:
 62 |                     c_mean = (assumptions['muc']/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j])
 63 |                     c_var = (assumptions['sigc']**2/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j])
 64 |                 else:
 65 |                     c_mean = (assumptions['muc']/M)*(1-assumptions['q'])
 66 |                     c_var = (assumptions['sigc']**2/M)*(1-assumptions['q'])
 67 |                 c.loc['F'+str(k)]['T'+str(j)] = c_mean + np.random.randn(assumptions['SA'][k],assumptions['MA'][j])*np.sqrt(c_var)
 68 |         if 'GEN' in c.index:
 69 |             c_mean = assumptions['muc']/M
 70 |             c_var = assumptions['sigc']**2/M
 71 |             c.loc['GEN'] = c_mean + np.random.randn(assumptions['Sgen'],M)*np.sqrt(c_var)
 72 |                     
 73 |     #PERFORM BINARY SAMPLING
 74 |     elif assumptions['sampling'] == 'Binary':
 75 |         assert assumptions['muc'] < M*assumptions['c1'], 'muc not attainable with given M and c1.'
 76 |         #Construct uniform matrix at total background consumption rate c0:
 77 |         c = pd.DataFrame(np.ones((S,M))*assumptions['c0']/M,columns=resource_index,index=consumer_index)
 78 |         #Sample binary random matrix blocks for each pair of family/resource type:
 79 |         for k in range(F):
 80 |             for j in range(T):
 81 |                 if k==j:
 82 |                     p = (assumptions['muc']/(M*assumptions['c1']))*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j])
 83 |                 else:
 84 |                     p = (assumptions['muc']/(M*assumptions['c1']))*(1-assumptions['q'])
 85 |                     
 86 |                 c.loc['F'+str(k)]['T'+str(j)] = (c.loc['F'+str(k)]['T'+str(j)].values 
 87 |                                                 + assumptions['c1']*BinaryRandomMatrix(assumptions['SA'][k],assumptions['MA'][j],p))
 88 |         #Sample uniform binary random matrix for generalists:
 89 |         if 'GEN' in c.index:
 90 |             p = assumptions['muc']/(M*assumptions['c1'])
 91 |             c.loc['GEN'] = c.loc['GEN'].values + assumptions['c1']*BinaryRandomMatrix(assumptions['Sgen'],M,p)
 92 | 
 93 |     elif assumptions['sampling'] == 'Gamma':
 94 |         #Initialize dataframe
 95 |         c = pd.DataFrame(np.zeros((S,M)),columns=resource_index,index=consumer_index)
 96 |         #Add Gamma-sampled values, biasing consumption of each family towards its preferred resource
 97 |         for k in range(F):
 98 |             for j in range(T):
 99 |                 if k==j:
100 |                     c_mean = (assumptions['muc']/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j])
101 |                     c_var = (assumptions['sigc']**2/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j])
102 |                     thetac = c_var/c_mean
103 |                     kc = c_mean**2/c_var
104 |                     c.loc['F'+str(k)]['T'+str(j)] = np.random.gamma(kc,scale=thetac,size=(assumptions['SA'][k],assumptions['MA'][j]))
105 |                 else:
106 |                     c_mean = (assumptions['muc']/M)*(1-assumptions['q'])
107 |                     c_var = (assumptions['sigc']**2/M)*(1-assumptions['q'])
108 |                     thetac = c_var/c_mean
109 |                     kc = c_mean**2/c_var
110 |                     c.loc['F'+str(k)]['T'+str(j)] = np.random.gamma(kc,scale=thetac,size=(assumptions['SA'][k],assumptions['MA'][j]))
111 |         if 'GEN' in c.index:
112 |             c_mean = assumptions['muc']/M
113 |             c_var = assumptions['sigc']**2/M
114 |             thetac = c_var/c_mean
115 |             kc = c_mean**2/c_var
116 |             c.loc['GEN'] = np.random.gamma(kc,scale=thetac,size=(assumptions['Sgen'],M))
117 |     
118 |     #PERFORM UNIFORM SAMPLING
119 |     elif assumptions['sampling'] == 'Uniform':
120 |         #Initialize dataframe:
121 |         c = pd.DataFrame(np.zeros((S,M)),columns=resource_index,index=consumer_index)
122 |         #Add uniformly sampled values, biasing consumption of each family towards its preferred resource:
123 |         for k in range(F):
124 |             for j in range(T):
125 |                 if k==j:
126 |                     c_mean = (assumptions['muc']/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j])
127 |                 else:
128 |                     c_mean = (assumptions['muc']/M)*(1-assumptions['q'])
129 |                 c.loc['F'+str(k)]['T'+str(j)] = c_mean + (np.random.rand(assumptions['SA'][k],assumptions['MA'][j])-0.5)*assumptions['b']
130 |         if 'GEN' in c.index:
131 |             c_mean = assumptions['muc']/M
132 |             c.loc['GEN'] = c_mean + (np.random.rand(assumptions['Sgen'],M)-0.5)*assumptions['b']
133 |     
134 |     #PERFORM BINARY_GAMMA SAMPLING
135 |     elif assumptions['sampling'] == 'Binary_Gamma':
136 |         assert assumptions['muc'] < M*assumptions['c1'], 'muc not attainable with given M and c1.'
137 |         #Construct uniform matrix at total background consumption rate c0:
138 |         c = pd.DataFrame(np.ones((S,M))*assumptions['c0']/M,columns=resource_index,index=consumer_index)
139 |         #Sample binary random matrix blocks for each pair of family/resource type:
140 |         for k in range(F):
141 |             for j in range(T):
142 |                 if k==j:
143 |                     p = (assumptions['muc']/(M*assumptions['c1']))*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j])
144 |                     c_mean = (assumptions['muc']/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j])
145 |                     c_var = (assumptions['sigc']**2/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j])
146 |                 else:
147 |                     p = (assumptions['muc']/(M*assumptions['c1']))*(1-assumptions['q'])
148 |                     c_mean = (assumptions['muc']/M)*(1-assumptions['q'])
149 |                     c_var = (assumptions['sigc']**2/M)*(1-assumptions['q'])
150 |                 c_mean_binary = assumptions['c0']+ assumptions['c1']*p
151 |                 c_var_binary = assumptions['c1']**2 *p*(1-p)
152 |                 c_mean_gamma = c_mean/c_mean_binary
153 |                 c_var_gamma = (c_var - c_var_binary*(c_mean_gamma**2))/(c_var_binary + c_mean_binary**2)
154 |                 thetac = c_var_gamma/c_mean_gamma
155 |                 kc = c_mean_gamma**2/c_var_gamma
156 |                 c.loc['F'+str(k)]['T'+str(j)] = (c.loc['F'+str(k)]['T'+str(j)].values + assumptions['c1']*BinaryRandomMatrix(assumptions['SA'][k],assumptions['MA'][j],p))*np.random.gamma(kc,scale=thetac,size=(assumptions['SA'][k],assumptions['MA'][j]))
157 |         #Sample uniform binary random matrix for generalists:
158 |         if 'GEN' in c.index:
159 |             p = assumptions['muc']/(M*assumptions['c1'])
160 |             c_mean = assumptions['muc']/M
161 |             c_var = assumptions['sigc']**2/M
162 |             c_mean_binary = assumptions['c0']+ assumptions['c1']*p
163 |             c_var_binary = assumptions['c1']**2 *p*(1-p)
164 |             c_mean_gamma = c_mean/c_mean_binary
165 |             c_var_gamma = (c_var - c_var_binary*(c_mean_gamma**2))/(c_var_binary + c_mean_binary**2)
166 |             thetac = c_var_gamma/c_mean_gamma
167 |             kc = c_mean_gamma**2/c_var_gamma
168 |             c.loc['GEN'] = (c.loc['GEN'].values + assumptions['c1']*BinaryRandomMatrix(assumptions['Sgen'],M,p))*np.random.gamma(kc,scale=thetac,size=(assumptions['Sgen'],M))
169 |     else:
170 |         print('Invalid distribution choice. Valid choices are kind=Gaussian and kind=Binary.')
171 |         return 'Error'
172 | 
173 |     #SAMPLE METABOLIC MATRIX FROM DIRICHLET DISTRIBUTION
174 |     DT = pd.DataFrame(np.zeros((M,M)),index=c.keys(),columns=c.keys())
175 |     for type_name in type_names:
176 |         MA = len(DT.loc[type_name])
177 |         if type_name is not waste_name:
178 |             #Set background secretion levels
179 |             p = pd.Series(np.ones(M)*(1-assumptions['fs']-assumptions['fw'])/(M-MA-M_waste),index = DT.keys())
180 |             #Set self-secretion level
181 |             p.loc[type_name] = assumptions['fs']/MA
182 |             #Set waste secretion level
183 |             p.loc[waste_name] = assumptions['fw']/M_waste
184 |             #Sample from dirichlet
185 |             DT.loc[type_name] = dirichlet(p/assumptions['sparsity'],size=MA)
186 |         else:
187 |             if M > MA:
188 |                 #Set background secretion levels
189 |                 p = pd.Series(np.ones(M)*(1-assumptions['fw']-assumptions['fs'])/(M-MA),index = DT.keys())
190 |                 #Set self-secretion level
191 |                 p.loc[type_name] = (assumptions['fw']+assumptions['fs'])/MA
192 |             else:
193 |                 p = pd.Series(np.ones(M)/M,index = DT.keys())
194 |             #Sample from dirichlet
195 |             DT.loc[type_name] = dirichlet(p/assumptions['sparsity'],size=MA)
196 |         
197 |     return c, DT.T
198 | community_simulator.usertools.MakeMatrices = new_MakeMatrices
199 | 
200 | def create_invader(params, assumptions):
201 |     """
202 |     Draw invader species feature
203 |     """
204 |     assumptions_invader = assumptions.copy()
205 |     assumptions_invader.update({"sampling": assumptions["invader_sampling"]})
206 |     params = MakeParams(assumptions) 
207 |     params_invader = MakeParams(assumptions_invader)
208 |     params["c"].iloc[assumptions["invader_index"],:] = params_invader["c"].iloc[assumptions["invader_index"],:] * assumptions["invader_strength"]
209 |     
210 |     return params
211 | 
212 | 
213 | def draw_species_function(assumptions):
214 |     """
215 |     Draw species-specific functions
216 |     
217 |     assumptions = dictionary of metaparameters from community-simulator
218 |     
219 |     Return:
220 |     function_species, function_interaction
221 |     """
222 |     S_tot = int(np.sum(assumptions['SA']) + assumptions['Sgen']) 
223 | 
224 |     if assumptions["phi_distribution"] == "Norm":
225 |         f1_species_smooth = np.random.normal(assumptions["phi_mean"], assumptions["phi_sd"], size = S_tot)
226 |         f1_species_rugged = f1_species_smooth * np.random.binomial(1, 1-assumptions["ruggedness"], size = S_tot)
227 |         f2_species_smooth = np.random.normal(assumptions["phi_mean"], assumptions["phi_sd"] * assumptions["function_ratio"], size = S_tot**2).reshape(S_tot, S_tot)
228 |         f2_species_rugged = np.random.binomial(1, 1-assumptions["ruggedness"], S_tot**2).reshape(S_tot, S_tot) * np.array(f2_species_smooth)
229 |     
230 |     elif assumptions["phi_distribution"] == "Uniform":
231 |         f1_species_smooth = np.random.uniform(assumptions["phi_lower"], assumptions["phi_upper"], size = S_tot)
232 |         f1_species_rugged = f1_species_smooth * np.random.binomial(1, 1-assumptions["ruggedness"], size = S_tot)
233 |         f2_species_smooth = np.random.uniform(assumptions["phi_lower"], assumptions["phi_upper"] * assumptions["function_ratio"], size = S_tot**2).reshape(S_tot, S_tot)
234 |         f2_species_rugged = np.random.binomial(1, 1-assumptions["ruggedness"], S_tot**2).reshape(S_tot, S_tot) * np.array(f2_species_smooth)
235 |         
236 |     # Remove diagonals in the interation matrix
237 |     np.fill_diagonal(f2_species_smooth, 0)
238 |     np.fill_diagonal(f2_species_rugged, 0)
239 | 
240 |     return f1_species_smooth, f1_species_rugged, f2_species_smooth, f2_species_rugged
241 | 
242 | def draw_species_cost(per_capita_function, assumptions):
243 |     """
244 |     Draw species-specific function cost
245 |     k_i is a conversion factor that specifies cost per function 
246 |     """
247 |     if assumptions["cost_distribution"] == "Norm":
248 |         if assumptions["cost_mean"] !=0:
249 |             cost_var = assumptions["cost_sd"]**2
250 |             cost_k = assumptions["cost_mean"]**2/cost_var
251 |             cost_theta = cost_var/assumptions["cost_mean"]
252 |             cost = np.random.gamma(shape = cost_k, scale = cost_theta, size = len(per_capita_function))
253 |             g0 = assumptions["g0"]
254 |             gi = g0/(1+per_capita_function*cost)
255 |         else: 
256 |             gi = np.repeat(assumptions["g0"], len(per_capita_function))
257 |     
258 |     elif assumptions["cost_distribution"] == "Uniform":
259 |         assert assumptions["phi_distribution"] == "Uniform", "Phi should follow uniform distribution as the cost"
260 |         gi = 1-per_capita_function
261 | 
262 |     return gi
263 | 
264 | def add_community_function(plate, assumptions, params):
265 |     """
266 |     Add the function attribute to the community
267 |     
268 |     For f1 and f3, add species_function 
269 |     For f2 and f4, add interaction_function
270 |     For f5, add invasion_plate_t0 and invasion_plate_t1
271 |     For f6, f7, and f8, add resident_plate_t0_N, resident_plate_t1_N, resident_plate_t0_R, and resident_plate_t1_R
272 |     
273 |     if isolates calculate function for every isolate in monoculture.
274 |     """
275 |     
276 |     # Generate per capita species function
277 |     np.random.seed(assumptions['seed']) 
278 |     f1_species_smooth, f1_species_rugged, f2_species_smooth, f2_species_rugged = draw_species_function(assumptions)
279 |     
280 |     # Species function for f1 additive community function
281 |     setattr(plate, "f1_species_smooth", f1_species_smooth)
282 |     setattr(plate, "f1_species_rugged", f1_species_rugged)
283 | 
284 |     # Species interaction function for f2 Interactive function
285 |     setattr(plate, "f2_species_smooth", f2_species_smooth)
286 |     setattr(plate, "f2_species_rugged", f2_species_rugged)
287 | 
288 | 
289 |     # Invasion function f5 or knock_in with a threshold requires us to grow isolates in monoculture to obtain their abundance.
290 |     if assumptions['knock_in']:
291 |         print("\nStabilizing monoculture plate")
292 |         # Update assumptions
293 |         assumptions_monoculture = assumptions.copy()
294 |         params_invasion = params.copy()
295 |         assumptions_monoculture.update({"n_wells": np.sum(assumptions["SA"])  + assumptions["Sgen"]})
296 |         assumptions_monoculture.update({"monoculture":True})
297 | 
298 |         # Monoculture plate for knock in
299 |         plate_monoculture = make_plate(assumptions_monoculture, params_invasion)
300 |         print("\nStabilizing monoculture plate for knock-in")
301 |         for i in range(assumptions_monoculture["n_transfer"] - assumptions_monoculture["n_transfer_selection"]):
302 |             plate_monoculture.Propagate(assumptions_monoculture["n_propagation"])
303 |             plate_monoculture = passage_monoculture(plate_monoculture, assumptions_monoculture["dilution"])
304 |             print("Transfer " + str(i+1))
305 |         plate_monoculture.Propagate(assumptions_monoculture["n_propagation"]) #  1 final growth cycle before storing data
306 |         print("\nFinished stabilizing monoculture plate")
307 |         
308 |         print("\nMeasuring monocultures for preparing knock_in list")
309 |         if "f1" in assumptions["selected_function"]:
310 |             setattr(plate_monoculture, "f1_species_smooth", f1_species_smooth)
311 |             setattr(plate_monoculture, "f1_species_rugged", f1_species_rugged)
312 |         elif "f2" in assumptions["selected_function"]:
313 |             setattr(plate_monoculture, "f2_species_smooth", f2_species_smooth)
314 |             setattr(plate_monoculture, "f2_species_rugged", f2_species_rugged)
315 |         elif "f6" in assumptions["selected_function"]:
316 |             setattr(plate_monoculture, "target_resource", assumptions["target_resource"])
317 |         setattr(plate, "knock_in_species_function", globals()[assumptions["selected_function"]](plate_monoculture, params_simulation = assumptions_monoculture))
318 |         print("\nknock_in_species_function ", plate.knock_in_species_function)
319 |     
320 | 
321 |     # f6_target_resource
322 |     if "target_resource" in assumptions["selected_function"]:
323 |         setattr(plate, "target_resource", assumptions["target_resource"])
324 |     
325 |     return plate
326 | 
327 | 
328 | def sample_from_pool(plate_N, assumptions, n = None):
329 |     """
330 |     Sample communities from regional species pool.
331 |     plate_N = consumer data.frame
332 |     """
333 |     S_tot = plate_N.shape[0] # Total number of species in the pool
334 |     N0 = np.zeros((plate_N.shape)) # Make empty plate
335 |     consumer_index = plate_N.index
336 |     well_names = plate_N.columns
337 |     if n is None:
338 |         n = int(assumptions['n_inoc']) #if not specified n is n_inoc
339 |     
340 |     # Draw community
341 |     if assumptions['monoculture'] == False and assumptions['metacommunity_sampling'] == 'Power':
342 |         # Sample initial community for each well
343 |         for k in range(plate_N.shape[1]):
344 |             pool = np.random.power(assumptions['power_alpha'], size = S_tot) # Power-law distribution
345 |             pool = pool/np.sum(pool) # Normalize the pool
346 |             consumer_list = np.random.choice(S_tot, size = n , replace = True, p = pool) # Draw from the pool
347 |             my_tab = pd.crosstab(index = consumer_list, columns = "count") # Calculate the cell count
348 |             N0[my_tab.index.values,k] = np.ravel(my_tab.values / assumptions['scale']) # Scale to biomass
349 |         # Make data.frame
350 |         N0 = pd.DataFrame(N0, index = consumer_index, columns = well_names)
351 |     elif assumptions['monoculture'] == False and assumptions['metacommunity_sampling'] == 'Lognormal':
352 |         for k in range(plate_N.shape[1]):
353 |             pool = np.random.lognormal(assumptions['lognormal_mean'], assumptions['lognormal_sd'], size = S_tot) # Power-law distribution
354 |             pool = pool/np.sum(pool) # Normalize the pool
355 |             consumer_list = np.random.choice(S_tot, size = n , replace = True, p = pool) # Draw from the pool
356 |             my_tab = pd.crosstab(index = consumer_list, columns = "count") # Calculate the cell count
357 |             N0[my_tab.index.values,k] = np.ravel(my_tab.values / assumptions['scale']) # Scale to biomass
358 |         # Make data.frame
359 |         N0 = pd.DataFrame(N0, index = consumer_index, columns = well_names)
360 |     elif assumptions['monoculture'] == False and assumptions['metacommunity_sampling'] == 'Default':
361 |         #Default was already sampled (each species starts wtih an abundance of 1. number of species in each species pool determined by 
362 |         #N0 = plate_N/assumptions['S']
363 |         N0 = MakeInitialState(assumptions)[0]
364 |         if not isinstance(N0, pd.DataFrame):#add labels to consumer state
365 |             if len(np.shape(N0)) == 1:
366 |                 N0 = N0[:,np.newaxis]
367 |             column_names = ['W'+str(k) for k in range(np.shape(N)[1])]
368 |             species_names = ['S'+str(k) for k in range(np.shape(N)[0])]
369 |             N0 = pd.DataFrame(N,columns=column_names)
370 |             N0.index = species_names
371 |         N0 = N0/assumptions['S']
372 |     # Monoculture plate
373 |     elif assumptions['monoculture'] == True:
374 |         N0 = np.eye(plate_N.shape[0]) *assumptions['n_inoc']/assumptions['scale']
375 |         N0 = pd.DataFrame(N0, index = consumer_index, columns = ["W" + str(i) for i in range(plate_N.shape[0])])
376 |     
377 |     return N0
378 | 
379 | 
380 | 
381 | 
382 | def sample_from_pool2(plate_N, assumptions, synthetic_community_size = 2, n = None):
383 |     """
384 |     Make synthetic communities with given initial richness
385 |     """
386 |     S_tot = plate_N.shape[0] 
387 |     N0 = np.zeros((plate_N.shape))
388 |     consumer_index = plate_N.index
389 |     well_names = plate_N.columns
390 |     
391 |     if n is None:
392 |         n = assumptions['n_inoc']
393 |         
394 |     for k in range(plate_N.shape[1]):
395 |         consumer_list = np.random.choice(S_tot, size = synthetic_community_size, replace = False) 
396 |         
397 |         for v in range(synthetic_community_size):
398 |                 N0[consumer_list[v], k] = n / synthetic_community_size / assumptions["scale"]
399 | 
400 |     N0 = pd.DataFrame(N0, index = consumer_index, columns = well_names)
401 | 
402 |     return N0
403 | 
404 | def migrate_from_pool(plate,migration_factor,params_simulation, power_law = True, n = None):
405 |     """
406 |     Migrate from species pool to the plate mainly for directed selection)
407 |     If power_law pool is true than sample n cells from species pool following power law distribution (default is same as inoculum)
408 |     If power_law is false sample s_migration species from isolates with each total number of cells equivalent to n
409 |     """
410 |     from community_selection.usertools import sample_from_pool
411 |     if n is None:
412 |         n = params_simulation['n_inoc']
413 |     if power_law:
414 |         if np.sum(migration_factor) != 0:
415 |             temp_params_simulation = params_simulation.copy() 
416 |             migration_plate = sample_from_pool(plate.N, params_simulation,n=n) * migration_factor # Migration factor is a list determined by migration algorithms and community function
417 |             plate_migrated = plate.N + migration_plate 
418 |         else:
419 |             plate_migrated = plate.N
420 |     else: 
421 |         if np.sum(migration_factor) != 0:
422 |             migration_plate = plate.N.copy()
423 |             migration_plate[:]  = 0
424 |             for k in plate.N.columns:
425 |                 if migration_factor[np.where(plate.N.columns == k)[0]]>0: 
426 |                     for j in range(0,params_simulation['s_migration']):
427 |                         s_id = np.random.choice(np.where(plate.N[k]==0)[0])
428 |                         migration_plate[k][s_id]= n * 1/params_simulation["scale"] * 1/params_simulation['s_migration']
429 |             plate_migrated = plate.N + migration_plate
430 |         else:
431 |             plate_migrated = plate.N
432 |     return plate_migrated
433 | 
434 | def passage_monoculture(plate_mono, f, scale = None, refresh_resource = True):
435 |     """
436 |     Reduced version of Passage(), for passaging a large set of wells without multinomial sampling
437 |     Most code adapted from community-simulator
438 |     """
439 |     self = plate_mono.copy()
440 |     #HOUSEKEEPING
441 |     if scale == None:
442 |         scale = self.scale #Use scale from initialization by default
443 |     self.N[self.N<0] = 0 #Remove any negative values that may have crept in
444 |     self.R[self.R<0] = 0
445 |     
446 |     #DEFINE NEW VARIABLES
447 |     N_tot = np.sum(self.N)
448 |     R_tot = np.sum(self.R)
449 |     N = np.zeros(np.shape(self.N))
450 |     
451 |     #Poisson sample cells
452 |     self.N = self.N * f *scale
453 |     self.N.applymap(np.random.poisson)   
454 |     self.N = self.N/scale
455 | 
456 |     if refresh_resource:
457 |         self.R = self.R * f
458 |         self.R = self.R+self.R0
459 |         
460 |     #In continuous culture, it is useful to eliminate the resources that are
461 |     #going extinct, to avoid numerical instability
462 |     else:
463 |         R_tot = np.sum(self.R)
464 |         R = np.zeros(np.shape(self.R))
465 |         for k in range(self.n_wells):
466 |             if f[k,k] > 0 and R_tot[k] > 0:
467 |                 R[:,k] += np.random.multinomial(int(scale*R_tot[k]*f[k,k]),(self.R/R_tot).values[:,k])*1./scale
468 |         self.R = pd.DataFrame(R, index = self.R.index, columns = self.R.keys())
469 | 
470 |     return self
471 | 
472 | def make_medium(plate_R, assumptions):
473 |     """
474 |     Design medium for the plate
475 |     if assumptions['rich_medium'] == True, make rich medium
476 |     """
477 |     if assumptions['rich_medium'] == True:
478 |         np.random.seed(1)
479 |     
480 |         # Total number of resource in this universe
481 |         R_tot = plate_R.shape[0] 
482 |     
483 |         # Make empty plate
484 |         R0 = np.zeros((plate_R.shape)) # Make empty plate
485 |     
486 |         # Resource index
487 |         resource_index = plate_R.index 
488 |     
489 |         # Well index
490 |         well_names = plate_R.columns
491 |     
492 |         resource_pool = np.random.uniform(0, 1, size = R_tot) # Uniform distribution
493 |         resource_pool = resource_pool/np.sum(resource_pool)
494 |         resource_list = np.random.choice(R_tot, size = assumptions["R0_food"], replace = True, p = resource_pool) # Draw from the pool
495 |         my_tab = pd.crosstab(index = resource_list, columns = "count")
496 |         food_compostion = np.ravel(my_tab.values)
497 |         for i in range(plate_R.shape[1]):
498 |             R0[my_tab.index.values,i] = food_compostion
499 |         R0 = pd.DataFrame(R0, index = resource_index, columns = well_names)
500 |     else:
501 |         R0 = plate_R
502 |     return R0
503 | 
504 | def make_plate(assumptions, params):
505 |     """
506 |     prepares the plate
507 |     """
508 |     # Make dynamical equations
509 |     def dNdt(N,R,params):
510 |         return MakeConsumerDynamics(assumptions)(N,R,params)
511 |     def dRdt(N,R,params):
512 |         return MakeResourceDynamics(assumptions)(N,R,params)
513 | 
514 |     dynamics = [dNdt,dRdt]
515 | 
516 |     # Make initial state
517 |     init_state = MakeInitialState(assumptions)
518 |     plate = Metacommunity(init_state, dynamics, params, scale = assumptions["scale"], parallel = False) 
519 |     
520 |     # Add media to plate (overrides community simulator)
521 |     plate.R = make_medium(plate.R, assumptions)
522 |     plate.R0 = make_medium(plate.R0, assumptions)
523 |     
524 |     # Set the target resource to 0 when target function is resource production f6a
525 |     if assumptions["selected_function"] == "f6a_target_resource":
526 |         plate.R.iloc[assumptions["target_resource"],:] = 0
527 |         plate.R0.iloc[assumptions["target_resource"],:] = 0
528 |     
529 |     # If plate is to be replaced by overwritting plate, skip the sampling
530 |     if pd.isnull(assumptions["overwrite_plate"]):
531 |         plate.N = sample_from_pool(plate.N, assumptions)
532 | 
533 |     # Remove invader in the plate 
534 |     if assumptions["selected_function"] == "f5_invader_suppression":
535 |         plate.N.iloc[assumptions["invader_index"],:] = 0
536 |     
537 |     return plate
538 | 
539 | # Data operation
540 | 
541 | def reshape_plate_data(plate, params_simulation,transfer_loop_index):
542 |     """
543 |     Reshape the plate resource and consumer matrices (wider form) into a melted data.frame (longer form)
544 |     """
545 |     # Temporary function for adding variables to and melting df
546 |     def melt_df(plate_df, data_type = "consumer"):
547 |         # Consumers
548 |         temp_df = pd.DataFrame(plate_df)
549 |         total_number = temp_df.shape[0]
550 |         
551 |         ## Add variables
552 |         temp_df["Type"] = np.repeat(data_type, total_number)
553 |         temp_df["ID"] = range(total_number)
554 |         temp_df["Transfer"] = np.repeat(str(transfer_loop_index), total_number)
555 |         temp_df["exp_id"] = np.repeat(params_simulation['exp_id'] , total_number)
556 | 
557 |         ## Melt the df
558 |         temp_df = pd.melt(temp_df, id_vars = ["exp_id","Transfer", "Type", "ID"], var_name = "Well", value_name = "Abundance")
559 |         temp_df = temp_df[temp_df.Abundance != 0] # Remove zero abundances
560 |         return temp_df
561 |         
562 |     # Melt the df
563 |     temp_plate = plate.copy() # Copy the original plate 
564 |     df_N = melt_df(temp_plate.N, data_type = "consumer")
565 |     df_R = melt_df(temp_plate.R, data_type = "resource")
566 |     df_R0 = melt_df(temp_plate.R0,data_type = "R0")
567 |     
568 |     # Concatenate dataframes
569 |     merged_df = pd.concat([df_N, df_R,df_R0]) 
570 |     merged_df["Index"] = list(range(0, merged_df.shape[0]))
571 |     merged_df.set_index("Index", inplace = True)
572 | 
573 |     return merged_df # Return concatenated dataframe
574 | 
575 | def reshape_function_data(params_simulation,community_function, richness, biomass, transfer_loop_index):
576 |     """
577 |     Reshape the community function, richness, biomass into a melted data.frame
578 |     """
579 |     temp_vector1 = community_function.copy()
580 |     temp_vector2 = richness.copy()
581 |     temp_vector3 = biomass.copy()
582 |     
583 |     # Number of wells
584 |     number_well = len(richness)
585 | 
586 |     # Make data.frame
587 |     temp_df = pd.DataFrame({
588 |         "exp_id": np.repeat(params_simulation['exp_id'], number_well),
589 |         "Well": ["W" + str(i) for i in range(number_well)], 
590 |         "Transfer": np.repeat(str(transfer_loop_index), number_well), 
591 |         "CommunityPhenotype": temp_vector1,
592 |         "Richness": temp_vector2,
593 |         "Biomass": temp_vector3})
594 |     
595 |     # Turn the transfer columns as numeric
596 |     temp_df[["Transfer"]] = temp_df[["Transfer"]].apply(pd.to_numeric)
597 |     
598 |     return temp_df 
599 | 
600 | def overwrite_plate(plate, assumptions):
601 |     """ 
602 |     Overwrite the plate N, R, and R0 dataframe by the input composition file
603 |     """
604 |     import os
605 |     assert(os.path.isfile(assumptions['overwrite_plate'])), "The overwrite_plate does not exist"
606 |     # Read the input data file
607 |     df = pd.read_csv(assumptions["overwrite_plate"])
608 |     
609 |     # By default, use the latest transfer to avoid well name conflict
610 |     df = df[df.Transfer == np.max(df.Transfer)]
611 | 
612 |     # If only one community, repeat filling this community into n_wells wells
613 |     if len(df["Well"].unique()) == 1:
614 |         print("The overwrite plate has only one community (well). Replicate it to the number of wells in current plate")
615 |         temp_df = df.copy()
616 |         df = pd.concat([temp_df.assign(Well = "W" + str(i)) for i in range(assumptions["n_wells"])])
617 |     # Else if n_wells does not conform to the number of wells in the overwrite_plate, overwrite it
618 |     else:
619 |         assumptions["n_wells"] = len(df["Well"].unique())
620 |     # If the input overwrite file has multiple communities, check if it has the same number as n_wells
621 |     #assert len(df["Well"].unique()) == assumptions["n_wells"], "overwrite_plate does not have the same number of wells as n_wells"
622 |     # Check if the input file type has consumer, resurce and R0
623 |     assert all(pd.Series(df["Type"].unique()).isin(["consumer", "resource", "R0"])), "overwrite_plate must have three types of rows: consumer, resource, R0"
624 |     # Make empty dataframes
625 |     N = plate.N.copy()
626 |     R = plate.R.copy()
627 |     R0 = plate.R.copy()
628 |     # N0
629 |     for w in range(assumptions["n_wells"]):
630 |         temp_comm = df[(df["Well"] == ("W" + str(w))) & (df["Type"] == "consumer")][["ID", "Abundance"]]
631 |         temp = np.zeros(N.shape[0])
632 |         for i in range(temp_comm.shape[0]):
633 |             temp[int(temp_comm.iloc[i]["ID"])] = temp_comm.iloc[i]["Abundance"]
634 |             N["W" + str(w)] = temp
635 | 
636 |     # R
637 |     for w in range(assumptions["n_wells"]):
638 |         temp_res = df[(df["Well"] == ("W" + str(w))) & (df["Type"] == "resource")][["ID", "Abundance"]]
639 |         temp = np.zeros(R.shape[0])
640 |         for i in range(temp_res.shape[0]):
641 |             temp[int(temp_res.iloc[i]["ID"])] = temp_res.iloc[i]["Abundance"]
642 |             R["W" + str(w)] = temp
643 |     # R0
644 |     for w in range(assumptions["n_wells"]):
645 |         temp_R0 = df[(df["Well"] == ("W" + str(w))) & (df["Type"] == "R0")][["ID", "Abundance"]]
646 |         temp = np.zeros(R0.shape[0])
647 |         for i in range(temp_R0.shape[0]):
648 |             temp[int(temp_R0.iloc[i]["ID"])] = temp_R0.iloc[i]["Abundance"]
649 |             R0["W" + str(w)] = temp
650 |     plate.N = N
651 |     plate.N0 = N
652 |     plate.R = R
653 |     plate.R0 = R0
654 |     
655 |     # Passaage the overwrite plate
656 |     if assumptions["passage_overwrite_plate"]:
657 |         plate.Passage(np.eye(assumptions["n_wells"]) * assumptions["dilution"])
658 |     
659 |     return(plate)
660 | 


--------------------------------------------------------------------------------