├── outline.png ├── docs ├── source │ ├── images │ │ ├── ecoprospector.png │ │ ├── selection_matrix_identity.png │ │ └── selection_matrix_propagule_pooling.png │ ├── content │ │ ├── micrm.rst │ │ ├── perturbation.rst │ │ ├── metacommunity.rst │ │ ├── community_function.rst │ │ ├── installation.rst │ │ ├── quickstart.rst │ │ ├── usertools.rst │ │ ├── selection_matrix.rst │ │ ├── protocol.rst │ │ └── mapping_file.rst │ ├── data │ │ └── input_test.csv │ ├── conf.py │ └── index.rst ├── Makefile └── make.bat ├── requirements.txt ├── commandline_tool ├── ecoprospector └── extract_species_function ├── setup.py ├── LICENSE ├── input_example.csv ├── .gitignore ├── README.md └── community_selection ├── __init__.py ├── B_community_phenotypes.py ├── E_protocols.py ├── D_perturbation_algorithms.py ├── C_selection_algorithms.py ├── usertools.py └── A_experiment_functions.py /outline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chang-Yu-Chang/ecoprospector/HEAD/outline.png -------------------------------------------------------------------------------- /docs/source/images/ecoprospector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chang-Yu-Chang/ecoprospector/HEAD/docs/source/images/ecoprospector.png -------------------------------------------------------------------------------- /docs/source/images/selection_matrix_identity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chang-Yu-Chang/ecoprospector/HEAD/docs/source/images/selection_matrix_identity.png -------------------------------------------------------------------------------- /docs/source/content/micrm.rst: -------------------------------------------------------------------------------- 1 | Microbial Consumer-Resource Model 2 | ================================= 3 | 4 | * Briefly explain MiCRM model and reference Bobby's paper. -------------------------------------------------------------------------------- /docs/source/content/perturbation.rst: -------------------------------------------------------------------------------- 1 | Perturbation 2 | ============= 3 | 4 | * Describe how perturbations are carried out 5 | * Where are the perturbations codeup 6 | -------------------------------------------------------------------------------- /docs/source/images/selection_matrix_propagule_pooling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chang-Yu-Chang/ecoprospector/HEAD/docs/source/images/selection_matrix_propagule_pooling.png -------------------------------------------------------------------------------- /docs/source/content/metacommunity.rst: -------------------------------------------------------------------------------- 1 | Metacommunity 2 | ============= 3 | 4 | * Metacommunity object inherited from community-simulator 5 | 6 | * Describe how it is constructed from mapping file 7 | 8 | * Describe what it contains (resource, community composition, species feature) 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | contourpy==1.1.0 2 | cvxpy==1.3.2 3 | cycler==0.11.0 4 | ecos==2.0.12 5 | fonttools==4.42.0 6 | kiwisolver==1.4.4 7 | matplotlib==3.7.2 8 | numpy==1.25.2 9 | osqp==0.6.3 10 | packaging==23.1 11 | pandas==1.2.0 12 | Pillow==10.0.0 13 | pyparsing==3.0.9 14 | python-dateutil==2.8.2 15 | pytz==2023.3 16 | qdldl==0.1.7.post0 17 | scipy==1.11.1 18 | scs==3.2.3 19 | six==1.16.0 20 | tzdata==2023.3 21 | -------------------------------------------------------------------------------- /docs/source/content/community_function.rst: -------------------------------------------------------------------------------- 1 | Community Function 2 | =================== 3 | 4 | Six types of community function is currently available in ecoprospector, including 5 | 6 | * Additive function ``f1_additive`` and ``f1a_additive`` 7 | * Non-additive, epistatic function ``f2_interaction`` ``f2a_interaction`` 8 | * Binary function ``f3_additive_binary`` ``f4_interaction_binary`` 9 | * Invader resistance ``f5_invader_suppression`` 10 | * Resource minimization ``f6_target_resource`` 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /commandline_tool/ecoprospector: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os 4 | from community_selection.usertools import * 5 | 6 | input_csv = str(sys.argv[1]) # Input file name 7 | row_number = int(sys.argv[2]) # Which row of experiment to run 8 | 9 | assumptions = make_assumptions(input_csv, row_number) 10 | params, params_simulation , params_algorithm, plate = prepare_experiment(assumptions) 11 | simulate_community(params = params, params_simulation = params_simulation, params_algorithm = params_algorithm,plate = plate) 12 | save_plate(assumptions, plate) #Save plate (will onlys save if assumptions specify that) 13 | 14 | -------------------------------------------------------------------------------- /commandline_tool/extract_species_function: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os 4 | import pandas as pd 5 | from community_selection.usertools import * 6 | 7 | input_csv = str(sys.argv[1]) # Input file name 8 | row_number = int(sys.argv[2]) # Which row of experiment to run 9 | output_file_name = str(sys.argv[3]) 10 | 11 | assumptions = make_assumptions(input_csv, row_number) 12 | species_function = extract_species_function(assumptions) 13 | species_function.to_csv(output_file_name, index = False) 14 | print("\nGenerated per-capita species functions from: " + input_csv + "\nRow (0-based): " + str(row_number) + "\nOutput file name: " + output_file_name) 15 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | 4 | from setuptools import setup 5 | 6 | setup(name='ecoprospector', 7 | version='0.0.2', 8 | description='Simulate community selection protocols', 9 | url='https://github.com/Chang-Yu-Chang/ecoprospector', 10 | author=['Chang-Yu Chang', 'Jean Villa'], 11 | author_email=['chang-yu.chang@yale.edu'], 12 | license='MIT', 13 | packages = ['community_selection'], 14 | scripts = ['commandline_tool/ecoprospector', 'commandline_tool/extract_species_function'], 15 | include_package_data = True, 16 | package_data = {"": ["*.csv"]}, 17 | install_requires=["community-simulator@ git+https://github.com/Emergent-Behaviors-in-Biology/community-simulator.git@master"], 18 | zip_safe=False) 19 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Chang-Yu Chang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docs/source/content/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | 5 | System requirement 6 | ------------------ 7 | 8 | * Python 3.7.3 9 | * `community-simulator `_ 10 | * Scipy, Numpy, Pandas, Matplotlib, functools, itertools, random. CVXPY is not required for our simulations as batch-culture simulations do not use the steadystate method in community-simulator. 11 | 12 | Ecoprospector package depends on [community-simulator](https://github.com/Emergent-Behaviors-in-Biology/community-simulator) (developed by the Mehta group and described in their [paper](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0230430)), which depends on Numpy, Pandas, Matplotlib, SciPy that are all included in Anaconda distribution. 13 | 14 | 15 | | 16 | 17 | Install the development version 18 | ------------------------------- 19 | 20 | Clone the github repository to a local directory :: 21 | 22 | $ git clone https://github.com/Chang-Yu-Chang/ecoprospector . 23 | 24 | Then browse to the Ecoprospector directory and install package :: 25 | 26 | $ pip install -e . 27 | -------------------------------------------------------------------------------- /docs/source/content/quickstart.rst: -------------------------------------------------------------------------------- 1 | Quick Start Guide 2 | ================= 3 | 4 | This page provides a quick start guide for using ecoprospector with an single input :code:`.csv` file. If you do not know how to configurate :code:`.csv` file, please read :ref:`Input Mapping File` for more details on each parameter. 5 | 6 | You can execute a experiments in a command line to quickly run an experiment. For example in Terminal on Mac, enter 7 | 8 | .. code-block:: bash 9 | 10 | $ ecoprospector mapping_file.csv 0 11 | 12 | Where mapping_file.csv is the input :code:`csv` file and i is the row (0-indexed) specifying the experiment to be run. 13 | 14 | You can also run the above code in python. The line above is equivalent as: 15 | 16 | .. code-block:: python 17 | 18 | from community_selection import * 19 | from community_selection.usertools import * 20 | assumptions = make_assumptions("mapping_file.csv", 0) 21 | params, params_simulation , params_algorithm, plate = prepare_experiment(assumptions) 22 | simulate_community(params = params, params_simulation = params_simulation, params_algorithm = params_algorithm, plate = plate) 23 | 24 | The functons are described in :ref:`User Tools` -------------------------------------------------------------------------------- /input_example.csv: -------------------------------------------------------------------------------- 1 | selected_function,protocol,seed,exp_id,overwrite_plate,passage_overwrite_plate,output_dir,save_function,save_composition,save_plate,function_lograte,composition_lograte,scale,n_inoc,rich_medium,monoculture,dilution,n_wells,n_propagation,n_transfer,n_transfer_selection,metacommunity_sampling,power_alpha,lognormal_mean,lognormal_sd,phi_distribution,phi_mean,phi_sd,phi_lower,phi_upper,ruggedness,function_ratio,binary_threshold,g0,cost_distribution,cost_mean,cost_sd,cost_lower,cost_upper,invader_index,invader_sampling,invader_strength,target_resource,directed_selection,knock_out,knock_in,knock_in_threshold,bottleneck,bottleneck_size,migration,n_migration,s_migration,coalescence,frac_coalescence,resource_shift,r_type,r_percent,sampling,sn,sf,Sgen,rn,rf,R0_food,food,supply,muc,sigc,c0,c1,q,sparsity,fs,fw,g,w,l,m,n,response,sigma_max,regulation,nreg,tau,r,S 2 | f1_additive,simple_screening,1,f1_additive-simple_screening-1,NA,FALSE,./,TRUE,TRUE,FALSE,1,20,1000000,1.00E+06,TRUE,FALSE,0.001,6,1,10,5,Power,0.01,8,8,Norm,0,1,0,1,NA,1,1,1,Norm,0,0,0,1,2,Gamma,10,NA,FALSE,FALSE,FALSE,NA,FALSE,NA,FALSE,1000000,NA,FALSE,NA,FALSE,NA,NA,Binary_Gamma,2100,1,0,90,1,1000,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,0,NA,type III,1,NA,NA,NA,NA,NA 3 | f1_additive,simple_screening,1,f1_additive-monoculture-1,NA,FALSE,./,TRUE,TRUE,FALSE,1,20,1000000,1.00E+06,TRUE,TRUE,0.001,6,1,10,5,Power,0.01,8,8,Norm,0,1,0,1,NA,1,1,1,Norm,0,0,0,1,2,Gamma,10,NA,FALSE,FALSE,FALSE,NA,FALSE,NA,FALSE,1000000,NA,FALSE,NA,FALSE,NA,NA,Binary_Gamma,2100,1,0,90,1,1000,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,0,NA,type III,1,NA,NA,NA,NA,NA -------------------------------------------------------------------------------- /docs/source/data/input_test.csv: -------------------------------------------------------------------------------- 1 | selected_function,protocol,seed,exp_id,overwrite_plate,passage_overwrite_plate,output_dir,save_function,save_composition,save_plate,function_lograte,composition_lograte,scale,n_inoc,rich_medium,monoculture,dilution,n_wells,n_propagation,n_transfer,n_transfer_selection,metacommunity_sampling,power_alpha,lognormal_mean,lognormal_sd,phi_distribution,phi_mean,phi_sd,phi_lower,phi_upper,ruggedness,function_ratio,binary_threshold,g0,cost_distribution,cost_mean,cost_sd,cost_lower,cost_upper,invader_index,invader_sampling,invader_strength,target_resource,directed_selection,knock_out,knock_in,knock_in_threshold,bottleneck,bottleneck_size,migration,n_migration,s_migration,coalescence,frac_coalescence,resource_shift,r_type,r_percent,sampling,sn,sf,Sgen,rn,rf,R0_food,food,supply,muc,sigc,c0,c1,q,sparsity,fs,fw,g,w,l,m,n,response,sigma_max,regulation,nreg,tau,r,S 2 | f1_additive,simple_screening,1,f1_additive-simple_screening-1,NA,False,/home/cc2553/project/community-selection/data/independent_f1_additive/,True,True,False,1,20,1000000,1e+06,TRUE,False,0.001,96,1,40,20,Power,0.01,8,8,Norm,0,1,0,1,NA,1,1,1,Norm,0,0,0,1,2,Gamma,10,NA,False,False,False,NA,False,NA,False,1000000,NA,False,NA,False,NA,NA,Binary_Gamma,2100,1,0,90,1,1000,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,0,NA,type III,1,NA,NA,NA,NA,NA 3 | f1_additive,simple_screening,1,f1_additive-monoculture-1,NA,False,/home/cc2553/project/community-selection/data/independent_f1_additive/,True,True,False,1,20,1000000,1e+06,TRUE,True,0.001,96,1,40,20,Power,0.01,8,8,Norm,0,1,0,1,NA,1,1,1,Norm,0,0,0,1,2,Gamma,10,NA,False,False,False,NA,False,NA,False,1000000,NA,False,NA,False,NA,NA,Binary_Gamma,2100,1,0,90,1,1000,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,0,NA,type III,1,NA,NA,NA,NA,NA 4 | -------------------------------------------------------------------------------- /docs/source/content/usertools.rst: -------------------------------------------------------------------------------- 1 | User Tools 2 | ========== 3 | 4 | Main functions in ecoprospector 5 | 6 | 7 | 8 | Make parameters 9 | --------------- 10 | 11 | .. code-block:: python 12 | 13 | make_assumptions(input_csv, row_number) 14 | 15 | 16 | .. confval:: input_csv 17 | 18 | :type: DataFrame 19 | :default: ``input_csv`` 20 | 21 | mapping csv file 22 | 23 | .. confval:: row_number 24 | 25 | :type: integer 26 | :default: ``0`` 27 | 28 | The row number that specifies the experiment to run (0-indexed) 29 | 30 | | 31 | 32 | Prepare and set up expeirments 33 | ------------------------------ 34 | 35 | .. code-block:: python 36 | 37 | params, params_simulation , params_algorithm, plate = prepare_experiment(assumptions) 38 | 39 | 40 | .. confval:: assumptions 41 | 42 | :type: List 43 | :default: ``assumptions`` 44 | 45 | A comprehensive list read from the input csv file 46 | 47 | 48 | | 49 | 50 | Simulate the protocol 51 | ---------------------- 52 | 53 | 54 | .. code-block:: python 55 | 56 | simulate_community(params = params, params_simulation = params_simulation, params_algorithm = params_algorithm, plate = plate) 57 | 58 | 59 | .. confval:: params 60 | 61 | :type: List 62 | :default: ``assumptions`` 63 | 64 | A comprehensive list read from the input csv file 65 | 66 | 67 | .. confval:: params_simulation 68 | 69 | :type: List 70 | :default: ``params_simulation`` 71 | 72 | Parameters related to simulating batch culture 73 | 74 | .. confval:: params_algorithm 75 | 76 | :type: List 77 | :default: ``params_algorithm`` 78 | 79 | Parameters related to protocol, community function, selection matrices, and 80 | 81 | 82 | .. confval:: plate 83 | 84 | :type: Metacommunity object 85 | :default: ``plate`` 86 | 87 | Object defined in this project 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Folders 7 | deprecated/* 8 | tests/* 9 | *.ipynb 10 | Rreport/* 11 | 12 | # R documents 13 | *.Rproj 14 | .Rhistory 15 | *.Rproj.user 16 | *.Rhistory 17 | *.RData 18 | *.Ruserdata 19 | *.html 20 | *.nb.html 21 | .DS_Store 22 | *.Rmd 23 | 24 | # C extensions 25 | *.so 26 | 27 | # Distribution / packaging 28 | .Python 29 | build/ 30 | develop-eggs/ 31 | dist/ 32 | downloads/ 33 | eggs/ 34 | .eggs/ 35 | lib/ 36 | lib64/ 37 | parts/ 38 | sdist/ 39 | var/ 40 | wheels/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *.cover 65 | .hypothesis/ 66 | .pytest_cache/ 67 | 68 | # Translations 69 | *.mo 70 | *.pot 71 | 72 | # Django stuff: 73 | *.log 74 | local_settings.py 75 | db.sqlite3 76 | 77 | # Flask stuff: 78 | instance/ 79 | .webassets-cache 80 | 81 | # Scrapy stuff: 82 | .scrapy 83 | 84 | # Sphinx documentation 85 | docs/_build/ 86 | 87 | # PyBuilder 88 | target/ 89 | 90 | # Jupyter Notebook 91 | .ipynb_checkpoints 92 | 93 | # pyenv 94 | .python-version 95 | 96 | # celery beat schedule file 97 | celerybeat-schedule 98 | 99 | # SageMath parsed files 100 | *.sage.py 101 | 102 | # Environments 103 | .env 104 | .venv 105 | env/ 106 | venv/ 107 | ENV/ 108 | env.bak/ 109 | venv.bak/ 110 | 111 | # Spyder project settings 112 | .spyderproject 113 | .spyproject 114 | 115 | # Rope project settings 116 | .ropeproject 117 | 118 | # mkdocs documentation 119 | /site 120 | 121 | # mypy 122 | .mypy_cache/ 123 | .Rproj.user 124 | -------------------------------------------------------------------------------- /docs/source/content/selection_matrix.rst: -------------------------------------------------------------------------------- 1 | Selection Matrix 2 | =================== 3 | 4 | What is a selection matrix? 5 | ------------------------------------ 6 | A selection matrix is a map specifying how the parental communities are selected according to their function ranks, and how the selected communities are pooled or distributed to seed the offspring communities. It is a square matrix of size ``n_wells``. The columns are ranked parental communities and the rows are offspring communities. Each element in the selection matrix specifies the dilution factor used for the batch culture. 7 | 8 | .. image:: ../images/selection_matrix_identity.png 9 | :width: 600 10 | 11 | 12 | The selection matrices allow us to standardize most strategies of artificial community selection, for example, propagule and migrant pool approaches, into a regular form. 13 | 14 | .. image:: ../images/selection_matrix_propagule_pooling.png 15 | :width: 600 16 | 17 | 18 | How does selection matrix work in ecoprospector? 19 | ---------------------------------------------------------------------- 20 | 21 | A selection matrix is written in the form of a Python function to accommodate a varied number of communities in different independent experiments. These functions take a vector of values (the default output of :ref:`Community Function` functions) as input. The selection matrix function will read the length of the input vector, and construct a selection matrix of that length. The selection matrix is then used to guide the passaging of the metacommunity. 22 | 23 | A selection matrix must be defined during the simulation setup, i.e. stored in the ``C_selection_matrices.py``. During simulation, any particular selection matrix will be called according to :ref:`Selection Protocol`. 24 | 25 | A library of selection matrices 26 | ---------------------------------------------------------------------- 27 | 28 | We saved all the predefined selection matrices in ``C_selection_matrices.py``. These selection matrices were adapted from the selection protocols in the prior empirical and theoretical studies. 29 | 30 | 31 | -------------------------------------------------------------------------------- /docs/source/content/protocol.rst: -------------------------------------------------------------------------------- 1 | Selection Protocol 2 | ================== 3 | 4 | What is a selection protocol? 5 | --------------------------------------------------- 6 | 7 | A selection protocol is a table that defines the selected function, selection regime, and the number of community generations. 8 | 9 | How does a selection protocol in ecoprospector look like? Inspired by batch culture experiments of microbial communities, we specify the selection protocol in a transfer/generation-wise manner. Here is an example of no-selection (simple_screening) protocol where it is simply doing nothing but passaging the plate in every transfer : 10 | 11 | [insert an example protocol table of no-selection protocol ] 12 | 13 | A selection protocol is then a table with four columns: 14 | 15 | * Protocol name: specified by ``protocol`` in the input ``csv``. 16 | * Transfer or community generation. 17 | * :ref:`Community Function`: the community function under selection at each transfer. 18 | * :ref:`Selection Matrix`: the selection regime conducted at each transfer. 19 | 20 | 21 | How to make a selection protocol 22 | -------------------------------- 23 | 24 | The selection protocol is automatically generated by ecoprospector with the mapping ``csv``. Key parameters include the specified protocol (``protocol``) the number of total transfers (``n_transfer``) and the number of selection transfers (``n_transfer_selection``). 25 | 26 | By default, ecoprospector will divide the protocol into two phases: selection and stabilization. In each transfer of the selection phase, a subset of the metacommunity is selected and used to seed the next generation. The selection matrix is consecutively implemented for ``n_transfer_selection`` times. Then for the rest of transfers until ``n_transfer``, the metacommunity is stabilized by simply passaged without selection. 27 | 28 | There are some examples of default selection protocols, which are contained in the ``E_protocols.py``. 29 | 30 | Note that users can make their own protocol without regard to the ecoprospector predefined protocols. To do that, make a pandas DataFrame with the same column names and include it in the ``E_protocols.py``, and make sure that: 31 | 32 | * The number of transfers does not exceed the ``n_transfer`` 33 | * The selection matrix specified in the protocol is contained in ``C_selection_matrices.py`` 34 | * Specify the new protocol name in the input ``csv``. 35 | 36 | What a selection protocol does not do 37 | --------------------------------------------------- 38 | 39 | While the table form of a selection protocol is a convenient way to standardize empirical protocols, some features that are usually specified in a “protocol” at an experimental setting are not included: 40 | 41 | * The number of communities (``n_wells``) in a metacommunity. 42 | * Dilution factor (``l``) 43 | * Incubation time (``n_propagation``) 44 | * Media or resource composition 45 | 46 | Instead these parameters, either specified in the mapping ``csv`` or generated during simulation setup, become object attributes of the :ref:`Metacommunity` during simulation. 47 | 48 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | import sphinx_rtd_theme 17 | from sphinx.locale import _ 18 | 19 | # -- Project information ----------------------------------------------------- 20 | master_doc = 'index' 21 | project = 'ecoprospector' 22 | copyright = '2020, Chang-Yu Chang' 23 | author = 'Chang-Yu Chang' 24 | 25 | # The full version, including alpha/beta/rc tags 26 | release = '0.0.1' 27 | 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = ['recommonmark', 35 | 'sphinx_rtd_theme', 36 | 'sphinx.ext.autosectionlabel' 37 | ] 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ['_templates'] 41 | 42 | # List of patterns, relative to source directory, that match files and 43 | # directories to ignore when looking for source files. 44 | # This pattern also affects html_static_path and html_extra_path. 45 | exclude_patterns = [] 46 | 47 | 48 | # -- Options for HTML output ------------------------------------------------- 49 | 50 | # The theme to use for HTML and HTML Help pages. See the documentation for 51 | # a list of builtin themes. 52 | # 53 | html_theme = 'sphinx_rtd_theme' 54 | 55 | # Add any paths that contain custom static files (such as style sheets) here, 56 | # relative to this directory. They are copied after the builtin static files, 57 | # so a file named "default.css" will overwrite the builtin "default.css". 58 | html_static_path = ['_static'] 59 | 60 | 61 | # Extensions to theme docs 62 | def setup(app): 63 | from sphinx.domains.python import PyField 64 | from sphinx.util.docfields import Field 65 | 66 | app.add_object_type( 67 | 'confval', 68 | 'confval', 69 | objname='configuration value', 70 | indextemplate='pair: %s; configuration value', 71 | doc_field_types=[ 72 | PyField( 73 | 'type', 74 | label=_('Type'), 75 | has_arg=False, 76 | names=('type',), 77 | bodyrolename='class' 78 | ), 79 | Field( 80 | 'default', 81 | label=_('Default'), 82 | has_arg=False, 83 | names=('default',), 84 | ), 85 | ] 86 | ) 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ecoprospector 2 | 3 | > Simulating protocols for the artificial selection of microbial communities 4 | 5 | This package is designed to simulate arbitrary community-level selection protocols on microbial metacommunitities in order to determine whether they can effectively engineer communitities with desired functions. The simulation are based on batch culture and the microbes in each community interact via consumer-resource dynamics. 6 | 7 | See our [paper](https://www.nature.com/articles/s41559-021-01457-5) that uses ecoprospector to study a range of selection strategies that direct the evolution of microbial communities. 8 | 9 | ![](outline.png) 10 | 11 | 12 | # Installation 13 | 14 | ## Anaconda 15 | 16 | A python development setup by [Anaconda](https://docs.anaconda.com/anaconda/install/) will be sufficient to implement community-simulator and ecoprospector. 17 | 18 | ### Mac and Linux 19 | 20 | Install requirement 21 | 22 | ```sh 23 | # Required to build wheel for qdldl 24 | pip install cmake 25 | 26 | # Install requirements 27 | pip install -r requirements.txt 28 | ``` 29 | 30 | 31 | Download the code or clone the github repository of community simulator to a local directory and browse to the community-simulator directory and install the package 32 | 33 | ```sh 34 | cd 35 | git clone https://github.com/Emergent-Behaviors-in-Biology/community-simulator 36 | ``` 37 | 38 | Download the code or clone this github repository to a local directory and browse to the ecoprospector directory and install package 39 | 40 | ```sh 41 | cd 42 | git clone https://github.com/Chang-Yu-Chang/ecoprospector 43 | pip install -e . 44 | ``` 45 | 46 | ### Windows 47 | 48 | The parallelization features in community-simulator are not currently supported on Windows and as such we cannot guarantee that the current version of ecoprospector will work in a windows environment. We would recommend using a linux emulator for windows such as Cygwin instead. 49 | 50 | 51 | ## Usage example 52 | 53 | With the mapping file (csv), executing one experiment is simple as 54 | 55 | ```sh 56 | $ ecoprospector input_example.csv 0 57 | ``` 58 | 59 | For more examples and usage, please refer to the [documentation](https://ecoprospector.readthedocs.io/en/latest/). 60 | 61 | 62 | ## Release History 63 | 64 | * 0.0.3 65 | * Add requirements file. Solve the syntax issue caused by latest pandas 66 | * Update README for installing the required packages 67 | * Provide the input_example.csv 68 | * Change the commandline tool to lower case 69 | * 0.0.2 70 | * Include other non-additive functions 71 | * 0.0.1 72 | * Work in progress 73 | 74 | ## Documentation 75 | 76 | Ecoprospector's documentation lives at [ecoprospector.readthedocs.io](https://ecoprospector.readthedocs.io/en/latest/) 77 | 78 | ## Meta 79 | 80 | Chang-Yu Chang – [@changyu_chang](https://twitter.com/changyu_chang) – changyuchang5@gmail.com 81 | 82 | Jean Vila – [@jccvila](https://twitter.com/jccvila) – Jeanccvila@gmail.com 83 | 84 | Distributed under the MIT license. See ``LICENSE`` for more information. 85 | 86 | [https://github.com/Chang-Yu-Chang/ecoprospector](https://github.com/Chang-Yu-Chang/ecoprospector) 87 | 88 | 89 | -------------------------------------------------------------------------------- /community_selection/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | """ 6 | from __future__ import division 7 | import pandas as pd 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | import copy 11 | import community_selection 12 | from multiprocessing import Pool 13 | from functools import partial 14 | 15 | from community_simulator import Community 16 | 17 | class Metacommunity(Community): 18 | """ 19 | Inherited object from community-simulator package. 20 | 21 | Changes: 22 | 23 | - Passage are Possion distributed 24 | 25 | """ 26 | def Passage(self,f,scale=None,refresh_resource=True): 27 | """ 28 | Transfer cells to a fresh plate. 29 | 30 | f = matrix specifying fraction of each old well (column) to transfer 31 | to each new well (row) 32 | 33 | scale = option for using a different scale factor from the one defined 34 | for the plate on initialization. 35 | 36 | refresh_resource says whether the new plate comes supplied with fresh 37 | media. The resource concentrations in the media are assumed to be 38 | the same as the initial resource concentrations from the first plate. 39 | The "Reset" method can be used to adjust these concentrations. 40 | """ 41 | #HOUSEKEEPING 42 | if scale == None: 43 | scale = self.scale #Use scale from initialization by default 44 | f = np.asarray(f) #Allow for f to be a dataframe 45 | self.N[self.N<0] = 0 #Remove any negative values that may have crept in 46 | self.R[self.R<0] = 0 47 | 48 | #DEFINE NEW VARIABLES 49 | N_tot = np.sum(self.N) 50 | R_tot = np.sum(self.R) 51 | N = np.zeros(np.shape(self.N)) 52 | 53 | #MULTINOMIAL SAMPLING 54 | #(simulate transfering a finite fraction of a discrete collection of cells) 55 | for k in range(self.n_wells): 56 | for j in range(self.n_wells): 57 | if f[k,j] > 0 and N_tot[j] > 0: 58 | N[:,k] += np.random.multinomial(np.random.poisson(scale*N_tot[j]*f[k,j]),(self.N/N_tot).values[:,j])*1./scale 59 | self.N = pd.DataFrame(N, index = self.N.index, columns = self.N.keys()) 60 | 61 | #In batch culture, there is no need to do multinomial sampling on the resources, 62 | #since they are externally replenished before they cause numerical problems 63 | if refresh_resource: 64 | self.R = pd.DataFrame(np.dot(self.R,f.T), index = self.R.index, columns = self.R.keys()) 65 | self.R = self.R+self.R0 66 | 67 | #In continuous culture, it is useful to eliminate the resources that are 68 | #going extinct, to avoid numerical instability 69 | else: 70 | R_tot = np.sum(self.R) 71 | R = np.zeros(np.shape(self.R)) 72 | for k in range(self.n_wells): 73 | for j in range(self.n_wells): 74 | if f[k,j] > 0 and R_tot[j] > 0: 75 | R[:,k] += np.random.multinomial(int(scale*R_tot[j]*f[k,j]),(self.R/R_tot).values[:,j])*1./scale 76 | self.R = pd.DataFrame(R, index = self.R.index, columns = self.R.keys()) 77 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Ecoprospector's Tutorial 2 | ========================================= 3 | 4 | .. image:: images/ecoprospector.png 5 | :width: 1000 6 | 7 | 8 | What is ecoprospector? 9 | ====================== 10 | 11 | Ecoprospector is a Python package designed to simulate protocols of artificial selection on microbial metacommunities. Experiments are run by using a :code:`mapping_file.csv` as an input. Each row in this csv file corresponds to a single experiment and each column specifies the paramaters for that experiment. Running a single experiment specified in row :code:`i` simply involves the bash command 12 | 13 | .. code-block:: bash 14 | 15 | $ ecoprospector mapping_file.csv i 16 | 17 | | 18 | 19 | Main features 20 | ============= 21 | 22 | Ecoprospector aims to flexibly adapt major componets of commonly used experiemntal protocols so that they can be tested on in-silico microbial meta-communitities. The main features of our simulations include: 23 | 24 | * **Consumer-resource dynamics**: virtual microbial species with idiosyncratic metabolic properties interact with others in a community through secretion and uptakes. Microbial community dynamics can be adjusted using a wide range of paramaters 25 | * **Batch-culture**: the community generation is divided into serial batch culture with a tunable incubation time and number of generations. 26 | * **Community function**: any arbitrarily designed community functions can be under selection. 27 | * **Selection matrix**: the selection regimes (i.e., which parental communitues to select and how to seed the offspring communities) are standardized by selection matrix at the end of each generation. 28 | * **Pertubations**: at end of any generation the top performing community can be replicated and copies can be be perturbed simulating possibe manipulations (i.e single-species invasions, resource-shifts, bottle-necking etc). 29 | * **Modular protocol design**: the feature mentioned above can be assembled in any combination to form a user designed experimental protocol. 30 | 31 | Our package is designed with three types of user in mind. 32 | 33 | * **Beginners** who have no python experience would be able to re-run all the simulations presented in this paper using the csv file alone and should be able to repeat pre-coded experimental protocols under varying parameter choices. 34 | * **Intermediate users** who have basic knowledge of python, should be able to code up their own protocols and may also be able to perform simple extensions to the package (such as introducing new types of community function, or selection matrices). 35 | * **Advanced users** who are familiar with python coding should be able to add additional functionality to the package, including carrying over several features of community-simulator that are not currently in use. This includes but is not limited to a) introducing intrinsic resource dynamics (for chemostat simulations) b) alternative dynamical models such as Lotka-Volterra models. 36 | 37 | | 38 | 39 | Key contributors 40 | ================ 41 | 42 | Jean Vila and Chang-Yu Chang (both at Yale working with `Alvaro Sanchez `_) started to build ecoprospector in collaboration with students from Physical Biology of Cells Course at Marine Biology Laboratoy in Woods Hole (Molly Bassette, Julia Borden, Stefan Golfier, Paul G. Sanchez, Rachel Waymack, Xinwen Zhu), who provided assistance during early development. 43 | 44 | | 45 | 46 | .. toctree:: 47 | :maxdepth: 2 48 | :caption: Getting Started 49 | 50 | content/installation 51 | content/quickstart 52 | 53 | .. toctree:: 54 | :maxdepth: 2 55 | :caption: User Guide 56 | 57 | content/usertools 58 | content/mapping_file 59 | content/micrm 60 | content/metacommunity 61 | content/protocol 62 | content/community_function 63 | content/selection_matrix 64 | content/perturbation 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /community_selection/B_community_phenotypes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Nov 26 2019 5 | @author: changyuchang 6 | """ 7 | import numpy as np 8 | 9 | def f1_additive(plate, params_simulation): 10 | """ 11 | Additive community function(F1) 12 | 13 | plate = plate object from package 14 | k = an 1-D array of saturation factors. set k = np.zeros(n) for binary function (species presence or absense) 15 | """ 16 | 17 | community_function = np.sum(plate.N.values * plate.f1_species_smooth[:,None], axis = 0) 18 | 19 | return community_function 20 | 21 | def f1a_additive(plate, params_simulation): 22 | """ 23 | Additive community function (F1) with ruggedness 24 | """ 25 | 26 | community_function = np.sum(plate.N.values * plate.f1_species_rugged[:,None], axis = 0) 27 | 28 | return community_function 29 | 30 | def f2_interaction(plate, params_simulation): 31 | """ 32 | Additive community function with interaction (F2) 33 | 34 | plate = plate object from package 35 | species_function = a n by n 2-D array; n is the size of species pool 36 | """ 37 | 38 | # Number of species in the pool 39 | S_tot = plate.N.shape[0] 40 | 41 | # Additive term 42 | #additive_term = np.sum(plate.N.values * plate.f1_species_smooth[:,None], axis = 0) 43 | 44 | # Interaction term 45 | interaction_term = np.zeros(plate.N.shape[1]) 46 | for i in range(plate.N.shape[1]): # For each community 47 | community_composition = np.array(plate.N.iloc[:,i]).reshape(S_tot, 1) 48 | community_composition_square = np.multiply(community_composition, community_composition.reshape(1, S_tot)) 49 | interaction_term[i] = np.sum(community_composition_square * plate.f2_species_smooth) 50 | 51 | return interaction_term 52 | 53 | def f2a_interaction(plate, params_simulation): 54 | """ 55 | Additive community function with interaction (F2) and ruggedness 56 | 57 | plate = plate object from package 58 | species_function = a n by n 2-D array; n is the size of species pool 59 | """ 60 | 61 | # Number of species in the pool 62 | S_tot = plate.N.shape[0] 63 | 64 | # Additive term 65 | #additive_term = np.sum(plate.N.values * plate.f1_species_smooth[:,None], axis = 0) 66 | 67 | # Interaction term 68 | interaction_term = np.zeros(plate.N.shape[1]) 69 | for i in range(plate.N.shape[1]): # For each community 70 | community_composition = np.array(plate.N.iloc[:,i]).reshape(S_tot, 1) 71 | community_composition_square = np.multiply(community_composition, community_composition.reshape(1, S_tot)) 72 | interaction_term[i] = np.sum(community_composition_square * plate.f2_species_rugged) 73 | 74 | return interaction_term 75 | 76 | def f3_additive_binary(plate, params_simulation): 77 | """ 78 | Complex community function 79 | 80 | plate = plate object from package 81 | species_function = a n by n 2-D array; n is the size of species pool 82 | """ 83 | # Binary function using type III response 84 | plate_temp = plate.copy() 85 | n = 10; Sm = 1 86 | plate_temp.N = plate_temp.N / params_simulation["binary_threshold"] 87 | plate_temp.N = plate_temp.N**n / (1 + plate_temp.N**n/Sm) 88 | community_function = np.sum(plate_temp.N.values * plate_temp.species_function[:,None], axis = 0) 89 | 90 | return community_function 91 | 92 | def f4_interaction_binary(plate, params_simulation): 93 | """ 94 | Complex community function 95 | 96 | plate = plate object from package 97 | species_function = a n by n 2-D array; n is the size of species pool 98 | k = an 2-D array of saturation factors. set k = np.zeros([n, n]) for binary function (species presence or absense) 99 | 100 | """ 101 | # Number of species in the pool 102 | S_tot = plate.N.shape[0] 103 | 104 | # Binary function using type III response 105 | plate_temp = plate.copy() 106 | n = 10; Sm = 1 107 | plate_temp.N = plate_temp.N / params_simulation["binary_threshold"] 108 | plate_temp.N = plate_temp.N**n / (1 + plate_temp.N**n/Sm) 109 | 110 | # Additive term 111 | additive_term = np.sum(plate_temp.N.values * plate_temp.species_function[:,None], axis = 0) 112 | 113 | # Interaction term 114 | interaction_term = np.zeros(plate_temp.N.shape[1]) 115 | for i in range(plate_temp.N.shape[1]): # For each community 116 | community_composition = np.array(plate_temp.N.iloc[:,i]).reshape(S_tot, 1) 117 | community_composition_square = np.multiply(community_composition, community_composition.reshape(1, S_tot)) 118 | interaction_term[i] = np.sum(community_composition_square * plate_temp.interaction_function) 119 | 120 | return additive_term + interaction_term 121 | 122 | def f5_invader_suppression(plate, params_simulation): 123 | """ 124 | Community function in which an indentical alien community (single or multiple species) invades the selected resident communities. 125 | This community function is the ratio between the biomass when invader grows with the community and when invader grows alone. 126 | The biomass of invader growing alone (plate.invasion_plate_t1) should have been included in the plate object attribute. 127 | 128 | """ 129 | S_tot = plate.N.shape[0] 130 | n_wells = plate.N.shape[1] 131 | plate_test = plate.copy() 132 | plate_test.Passage(params_simulation['dilution']*np.eye(params_simulation['n_wells'])) 133 | plate_test.N.iloc[params_simulation["invader_index"],:] = plate_test.N.iloc[params_simulation["invader_index"],:] + 10 / params_simulation['scale'] 134 | plate_test.Propagate(params_simulation["n_propagation"]) 135 | invader_growth_together = plate_test.N.iloc[params_simulation["invader_index"],:] 136 | function_invader_suppressed_growth = -invader_growth_together 137 | return function_invader_suppressed_growth 138 | 139 | def f6_target_resource(plate, params_simulation): 140 | """ 141 | Function as minimized an supplied resource 142 | The target resource by default is the resouce in the last index 143 | If rich medium is provided, the target resource amount in the initial plate is set to 0 144 | """ 145 | target_resource_index = plate.target_resource 146 | community_function_temp = plate.R.iloc[target_resource_index,:].tolist() 147 | community_function = [-i for i in community_function_temp] 148 | return community_function 149 | 150 | def f6a_target_resource(plate, params_simulation): 151 | """ 152 | Function as maximized non-supplied resource production 153 | The target resource by default is the resouce in the last index 154 | If rich medium is provided, the target resource amount in the initial plate is set to 0 155 | """ 156 | target_resource_index = plate.target_resource 157 | community_function = plate.R.iloc[target_resource_index,:].tolist() 158 | 159 | return community_function 160 | 161 | 162 | 163 | 164 | def resource_distance_community_function(plate, R_target, sigma = 0.01): # Sigma is the measurement error 165 | """# Compute the distances from the target resource """ 166 | R_tot = plate.R.shape[0] 167 | well_tot = plate.R.shape[1] 168 | relative_resource = np.array(plate.R) #Load plate resource data 169 | relative_resource[0,:] = 0.0 #Set supplied resource to 0 170 | relative_resource = relative_resource/relative_resource.sum(0) #Look at relative abundance of remaining resource 171 | R_dist = np.sqrt(np.sum(np.array((np.tile(R_target,(well_tot,1)) - relative_resource.T)**2)[:,1:],axis=1)) 172 | return (np.array(R_dist.T)* -1) * (1+ np.random.normal(0,sigma,well_tot))#(so we select for positive community function) 173 | 174 | -------------------------------------------------------------------------------- /community_selection/E_protocols.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Nov 26 2019 5 | @author: changyuchang 6 | """ 7 | import pandas as pd 8 | 9 | def make_algorithm_library(): 10 | """ 11 | Show the table of algorithms in this package 12 | """ 13 | import re 14 | import pandas as pd 15 | 16 | # Find directory of community_selection modultes 17 | import community_selection 18 | module_dir = community_selection.__file__ 19 | module_dir = re.sub("__init__.py", "", module_dir) 20 | 21 | # 22 | algorithm_types = ["community_phenotypes", "selection_algorithms", "perturbation_algorithms"] 23 | algorithms = list() 24 | 25 | for i in range(len(algorithm_types)): 26 | 27 | # Open files 28 | file_algorithm_phenotype = open(module_dir + ["B", "C", "D"][i] + "_" + algorithm_types[i] + ".py", "r") 29 | 30 | # Read lines 31 | line_list = list() 32 | line = file_algorithm_phenotype.readline() 33 | cnt = 1 34 | 35 | while line: 36 | line = file_algorithm_phenotype.readline() 37 | line_list.append(line.strip()) 38 | cnt += 1 39 | 40 | # Regular expression 41 | algorithm_names = re.findall("def \w+", " ".join(line_list)) 42 | list_algorithm = [re.sub("^def ", "", x) for x in algorithm_names] 43 | 44 | # Write the files 45 | algorithms.append(pd.DataFrame({"AlgorithmType": re.sub("s$", "", algorithm_types[i]), "AlgorithmName": list_algorithm})) 46 | 47 | return pd.concat(algorithms) 48 | 49 | 50 | def make_protocol(params_simulation, protocol_name, selection_algorithm = None, repeated_selection = False): 51 | """ 52 | Make protocol for one experimental protocol 53 | """ 54 | temp_df = pd.DataFrame({ 55 | "algorithm_name": protocol_name, 56 | "transfer": range(1, params_simulation["n_transfer"] + 1), 57 | "community_phenotype": params_simulation["selected_function"], 58 | "selection_algorithm": "no_selection" 59 | }) 60 | if protocol_name != "simple_screening": 61 | if repeated_selection: 62 | temp_df["selection_algorithm"] = [selection_algorithm for i in range(params_simulation["n_transfer_selection"])] + ["no_selection" for i in range(params_simulation["n_transfer"] - params_simulation["n_transfer_selection"])] 63 | elif repeated_selection == False: 64 | temp_df["selection_algorithm"] = ["no_selection" for i in range(params_simulation["n_transfer_selection"]-1)] + [selection_algorithm] + ["no_selection" for i in range(params_simulation["n_transfer"] - params_simulation["n_transfer_selection"])] 65 | 66 | return temp_df 67 | 68 | 69 | def make_algorithms(params_simulation): 70 | """ 71 | Make a comprehensive dataframe of all protocols 72 | """ 73 | 74 | 75 | # Control 76 | simple_screening = make_protocol(params_simulation, "simple_screening") 77 | select_top25 = make_protocol(params_simulation, protocol_name = "select_top25", selection_algorithm = "select_top25percent", repeated_selection = False) 78 | select_top10 = make_protocol(params_simulation, protocol_name = "select_top10", selection_algorithm = "select_top10percent", repeated_selection = False) 79 | pool_top25 = make_protocol(params_simulation, protocol_name = "pool_top25", selection_algorithm = "pool_top25percent", repeated_selection = False) 80 | pool_top10 = make_protocol(params_simulation, protocol_name = "pool_top10", selection_algorithm = "pool_top10percent", repeated_selection = False) 81 | 82 | # Experimental protocols 83 | Blouin2015 = make_protocol(params_simulation, protocol_name = "Blouin2015", selection_algorithm = "pool_top10percent", repeated_selection = True) 84 | Blouin2015_control = make_protocol(params_simulation, protocol_name = "Blouin2015_control", selection_algorithm = "pool_top10percent_control", repeated_selection = True) 85 | Chang2020a = make_protocol(params_simulation, protocol_name = "Chang2020a", selection_algorithm = "select_top16percent", repeated_selection = True) 86 | Chang2020a_control = make_protocol(params_simulation, protocol_name = "Chang2020a_control", selection_algorithm = "select_top16percent_control", repeated_selection = True) 87 | Chang2020b = make_protocol(params_simulation, protocol_name = "Chang2020b", selection_algorithm = "select_top25percent", repeated_selection = True) 88 | Chang2020b_control = make_protocol(params_simulation, protocol_name = "Chang2020b_control", selection_algorithm = "select_top25percent_control", repeated_selection = True) 89 | Jochum2019 = make_protocol(params_simulation, protocol_name = "Jochum2019", selection_algorithm = "pool_top10percent", repeated_selection = True) 90 | Mueller2019 = make_protocol(params_simulation, protocol_name = "Mueller2019", selection_algorithm = "pool_top25percent", repeated_selection = True) 91 | Panke_Buisse2015 = make_protocol(params_simulation, protocol_name = "Panke_Buisse2015", selection_algorithm = "pool_top28percent", repeated_selection = True) 92 | Swenson2000a = make_protocol(params_simulation, protocol_name = "Swenson2000a", selection_algorithm = "pool_top20percent", repeated_selection = True) 93 | Swenson2000a_control = make_protocol(params_simulation, protocol_name = "Swenson2000a_control", selection_algorithm = "pool_top20percent_control", repeated_selection = True) 94 | Swenson2000b = make_protocol(params_simulation, protocol_name = "Swenson2000b", selection_algorithm = "select_top25percent", repeated_selection = True) 95 | Swenson2000b_control = make_protocol(params_simulation, protocol_name = "Swenson2000b_control", selection_algorithm = "select_top25percent_control", repeated_selection = True) 96 | Swenson2000c = make_protocol(params_simulation, protocol_name = "Swenson2000c", selection_algorithm = "pool_top20percent", repeated_selection = True) 97 | Wright2019 = make_protocol(params_simulation, protocol_name = "Wright2019", selection_algorithm = "pool_top10percent", repeated_selection = True) 98 | Wright2019_control = make_protocol(params_simulation, protocol_name = "Wright2019_control", selection_algorithm = "pool_top10percent_control", repeated_selection = True) 99 | 100 | # Sub-lineage protocols 101 | Arora2019 = make_protocol(params_simulation, protocol_name = "Arora2019", selection_algorithm = "Arora2019", repeated_selection = True) 102 | Arora2019_control = make_protocol(params_simulation, protocol_name = "Arora2019_control", selection_algorithm = "Arora2019_control", repeated_selection = True) 103 | Raynaud2019a = make_protocol(params_simulation, protocol_name = "Raynaud2019a", selection_algorithm = "Raynaud2019a", repeated_selection = True) 104 | Raynaud2019a_control = make_protocol(params_simulation, protocol_name = "Raynaud2019a_control", selection_algorithm = "Raynaud2019a_control", repeated_selection = True) 105 | Raynaud2019b = make_protocol(params_simulation, protocol_name = "Raynaud2019b", selection_algorithm = "Raynaud2019b", repeated_selection = True) 106 | Raynaud2019b_control = make_protocol(params_simulation, protocol_name = "Raynaud2019b_control", selection_algorithm = "Raynaud2019b_control", repeated_selection = True) 107 | 108 | # Theory 109 | Penn2004 = make_protocol(params_simulation, protocol_name = "Penn2004", selection_algorithm = "Williams2007a", repeated_selection = True) 110 | Williams2007a = make_protocol(params_simulation, protocol_name = "Williams2007a", selection_algorithm = "Williams2007a", repeated_selection = True) 111 | Williams2007b = make_protocol(params_simulation, protocol_name = "Williams2007b", selection_algorithm = "Williams2007b", repeated_selection = True) 112 | Xie2019a = make_protocol(params_simulation, protocol_name = "Xie2019a", selection_algorithm = "select_top_dog", repeated_selection = True) 113 | Xie2019b = make_protocol(params_simulation, protocol_name = "Xie2019b", selection_algorithm = "select_top10percent", repeated_selection = True) 114 | 115 | 116 | #directed_selection 117 | directed_selection = pd.DataFrame({ 118 | "algorithm_name": "directed_selection", 119 | "transfer": range(1, params_simulation["n_transfer"] + 1), 120 | "community_phenotype": params_simulation["selected_function"], 121 | "selection_algorithm": ["no_selection" for i in range(params_simulation["n_transfer_selection"]-1)] + ["select_top"] + ["no_selection" for i in range(params_simulation["n_transfer"] - params_simulation["n_transfer_selection"])] 122 | }) 123 | 124 | algorithms = pd.concat([ 125 | # Control 126 | simple_screening, select_top25, select_top10, pool_top25, pool_top10, 127 | # Experimental protocols 128 | Blouin2015, Blouin2015_control, Chang2020a, Chang2020a_control, Chang2020b, Chang2020b_control, 129 | Jochum2019, Mueller2019, Panke_Buisse2015, 130 | Swenson2000a, Swenson2000a_control, Swenson2000b, Swenson2000b_control, Swenson2000c, 131 | Wright2019, Wright2019_control, 132 | # Sub-lineage protocols 133 | Arora2019, Arora2019_control, Raynaud2019a, Raynaud2019a_control, Raynaud2019b, Raynaud2019b_control, 134 | # Theory 135 | Penn2004, Williams2007a, Williams2007b, Xie2019a, Xie2019b, 136 | directed_selection 137 | ]) 138 | 139 | 140 | return algorithms 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /community_selection/D_perturbation_algorithms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Nov 27 2019 5 | @author: changyuchang 6 | """ 7 | import numpy as np 8 | import random 9 | from community_selection.A_experiment_functions import * 10 | 11 | def resource_perturb(plate, params_simulation, keep): 12 | """ 13 | Perturb the communities by shifting the medium composition 14 | """ 15 | #Remove new fresh media 16 | plate.R = plate.R - plate.R0 17 | old_R0 = plate.R0[plate.N.columns[keep]] 18 | #First construct olist of possible metabolite perturbations (depends on r_type, either list of tuples of index opr simple list of index) 19 | if params_simulation['r_type'] == 'add': #Remove from top and add to random 20 | metabolite_choice = [(x,y) for x in old_R0.index for y in old_R0.index if x !=y and x == old_R0.idxmax()] 21 | if params_simulation['r_type'] == 'remove': #Remove from random and add to bottom 22 | metabolite_choice = [(x,y) for x in old_R0.index for y in old_R0.index if x !=y and y == old_R0.idxmin() and old_R0[x]>0] 23 | if params_simulation['r_type'] == 'rescale_add' or params_simulation['r_type'] == 'old': # add to random 24 | metabolite_choice = [x for x in old_R0.index] 25 | if params_simulation['r_type'] == 'rescale_remove': #remove from random 26 | metabolite_choice = [x for x in old_R0.index if old_R0[x] >0] 27 | else: #default_resource_swap 28 | metabolite_choice = [(x,y) for x in old_R0.index for y in old_R0.index if x !=y] 29 | 30 | # If f6_target_resource, avoid target_resource 31 | if "target_resource" in params_simulation["selected_function"]: 32 | target_resource = old_R0.index[params_simulation["target_resource"]] 33 | if params_simulation['r_type'] == 'add': #Remove from top and add to random 34 | metabolite_choice = [(x,y) for x in old_R0.index for y in old_R0.index if x !=y and x == old_R0.idxmax() and x != target_resource and y != target_resource] 35 | if params_simulation['r_type'] == 'remove': #Remove from random and add to bottom 36 | metabolite_choice = [(x,y) for x in old_R0.index for y in old_R0.index if x !=y and y == old_R0.idxmin() and old_R0[x]>0 and x != target_resource and y != target_resource] 37 | if params_simulation['r_type'] == 'rescale_add' or params_simulation['r_type'] == 'old': # add to random 38 | metabolite_choice = [x for x in old_R0.index if x != target_resource and y != target_resource] 39 | if params_simulation['r_type'] == 'rescale_remove': #remove from random 40 | metabolite_choice = [x for x in old_R0.index if old_R0[x] >0 and x != target_resource and y != target_resource] 41 | else: #default_resource_swap 42 | metabolite_choice = [(x,y) for x in old_R0.index for y in old_R0.index if x !=y and x != target_resource and y != target_resource] 43 | 44 | #next randomly pick element in list and apply pertubation 45 | for k in plate.R0.columns: 46 | if k != plate.R0.columns[keep]: 47 | #So first default to kept media 48 | plate.R0[k] = old_R0 49 | if len(metabolite_choice) ==0: #If all possible pertubations have been carried out skip 50 | continue 51 | #Pick random pertubation 52 | r_id = random.choice(metabolite_choice) 53 | #perform pertubations 54 | if params_simulation['r_type'] == 'rescale_add': 55 | plate.R0[k][r_id] = plate.R0[k][r_id]*(1+params_simulation['r_percent']) 56 | elif params_simulation['r_type'] == 'rescale_remove': 57 | plate.R0[k][r_id] = plate.R0[k][r_id]*(1-params_simulation['r_percent']) 58 | elif params_simulation['r_type'] == 'old': 59 | plate.R0[k] = plate.R0[k] * (1-params_simulation['R_percent']) #Dilute old resource 60 | plate.R0[k][r_id] = plate.R0[k][r_id] + (params_simulation['R0_food']*params_simulation['R_percent']) #Add fixed percent 61 | else: 62 | plate.R0[k][r_id[0]] = plate.R0[k][r_id[0]] + (plate.R0[k][r_id[1]]*params_simulation['r_percent']) #add new resources 63 | plate.R0[k][r_id[1]] = plate.R0[k][r_id[1]]*(1-params_simulation['r_percent']) #remove new resources 64 | # Remove chosen pertubation as option for subsequent loop 65 | metabolite_choice = [x for x in metabolite_choice if x != r_id] 66 | plate.R0 = plate.R0/np.sum(plate.R0)*params_simulation['R0_food'] #Keep this to avoid floating point error and rescale when neeeded. 67 | #add new fresh environment (so that this round uses R0 68 | plate.R = plate.R + plate.R0 69 | return plate 70 | 71 | 72 | def perturb(plate, params_simulation, keep): 73 | """ 74 | Perturbs all communities except for the one specified by the argument keep. Default is the first well so keep = 0 75 | Only runs if directed selection is true 76 | """ 77 | #Bottleneck 78 | if params_simulation['bottleneck']: 79 | dilution_matrix = np.eye(params_simulation['n_wells'])*params_simulation['bottleneck_size'] 80 | dilution_matrix[keep,keep] = 1 81 | old_R = plate.R.copy() 82 | plate.Passage(dilution_matrix) 83 | plate.R = old_R.copy() #knock_in isolates absent from all communities 84 | if params_simulation['knock_in']: 85 | knock_in_list = np.where(np.logical_and(np.array(np.sum(plate.N,axis=1)==0.0), plate.knock_in_species_function >= np.percentile(plate.knock_in_species_function, q = 100*params_simulation['knock_in_threshold'])))[0] 86 | # If f5, avoid using invader 87 | if "invader" in params_simulation["selected_function"]: 88 | knock_in_list[params_simulation["invader_index"]] = False 89 | for k in plate.N.columns: 90 | if k == plate.N.columns[keep] or len(knock_in_list) ==0.0: 91 | continue 92 | else: 93 | s_id = np.random.choice(knock_in_list) 94 | plate.N[k][s_id]= 1/params_simulation["dilution"] * 1/params_simulation["scale"] #Knock in enough to survive 1 dilution even with no growth 95 | knock_in_list = knock_in_list[knock_in_list != s_id] 96 | #knock_out isolates present in all communities 97 | if params_simulation['knock_out']: 98 | knock_out_list = np.where(np.sum(plate.N>0.0,axis=1) == params_simulation['n_wells'])[0] 99 | for k in plate.N.columns: 100 | if k == plate.N.columns[keep] or len(knock_out_list) ==0.0: 101 | continue 102 | else: 103 | s_id = np.random.choice(knock_out_list) 104 | plate.N[k][s_id]= 0 105 | knock_out_list = knock_out_list[knock_out_list != s_id] 106 | #Migrate taxa into the best performing community. By default migrations are done using power law model but can tune the diversity of migration using s_migration 107 | if params_simulation['migration']: 108 | migration_factor = np.ones(params_simulation['n_wells']) 109 | migration_factor[keep] = 0 110 | if np.isfinite(params_simulation['s_migration']): 111 | plate.N = migrate_from_pool(plate,migration_factor,params_simulation,power_law=False,n=params_simulation['n_migration']) 112 | else: 113 | plate.N = migrate_from_pool(plate,migration_factor,params_simulation,power_law = True,n=params_simulation['n_migration']) 114 | # If f5, avoid using invader 115 | if "invader" in params_simulation["selected_function"]: 116 | plate.N.iloc[params_simulation["invader_index"],] = 0 117 | #Migrate taxa into the best performing community. By default migrations are done using power law model but can tune the diversity of migration using s_migration 118 | if params_simulation['coalescence']: 119 | plate.Propagate(params_simulation["n_propagation"]) 120 | plate.N = plate.N*(1-params_simulation['frac_coalescence']) + plate.prior_N*params_simulation['frac_coalescence'] 121 | plate.R = plate.R*(1-params_simulation['frac_coalescence']) + plate.prior_R*params_simulation['frac_coalescence'] 122 | plate.Passage(np.eye(params_simulation['n_wells'])*params_simulation['dilution'] ) 123 | #Shift_R0 124 | if params_simulation['resource_shift']: 125 | plate = resource_perturb(plate, params_simulation, keep) 126 | return plate 127 | 128 | 129 | 130 | # Design migration_factor (a sequence of binary factors) 131 | def no_migration(community_function): 132 | """ 133 | No migration 134 | """ 135 | # Number of wells 136 | n_wells = len(community_function) 137 | 138 | # No migration 139 | migration_factor = np.zeros(n_wells) 140 | 141 | return migration_factor 142 | 143 | def parent_migration(community_function): 144 | """ 145 | Parent migration, migrate into all wells 146 | """ 147 | # Number of wells 148 | n_wells = len(community_function) 149 | 150 | # All migration 151 | migration_factor = np.ones(n_wells) 152 | 153 | #dont migrate into winner 154 | winner_index = np.where(community_function >= np.max(community_function))[0][::-1] # Reverse the list so the higher 155 | migration_factor[winner_index] = 0 156 | return migration_factor 157 | 158 | def directed_selection_migrate(community_function): 159 | """ 160 | Sample new communities from species pool, coalesce the migrant communities to the species pools 161 | """ 162 | # Number of wells 163 | n_wells = len(community_function) 164 | 165 | # Compute the cutoff based on the number of wells 166 | cut_off_percent = (np.sqrt(n_wells))/n_wells 167 | 168 | # Sort the community function in this transfer 169 | sorted_community_function = np.sort(community_function) 170 | 171 | # Community function value cutoff for selecting communities 172 | cut_off = sorted_community_function[int(np.round(len(community_function)*(1-cut_off_percent)))] 173 | 174 | # Winner wells 175 | winner_index = np.where(community_function >= cut_off)[0][::-1] 176 | 177 | # Migration factor. A list of whether to migrate the community or not 178 | migration_factor = np.ones(n_wells) # Migrate all the wells except for the new wells that contain the winner replicate 179 | migration_factor[range(len(winner_index))] = 0 # Don't migrate to the winner wells 180 | 181 | return migration_factor 182 | 183 | def migrate_half(community_function): 184 | # Number of wells 185 | n_wells = len(community_function) 186 | 187 | # Migration 188 | migration_factor = [1, 0] * int(n_wells/2) 189 | 190 | return migration_factor 191 | 192 | 193 | def migrate_random(community_function): 194 | # Number of wells 195 | n_wells = len(community_function) 196 | 197 | # Migration 198 | migration_factor = np.random.binomial(1, 0.5, size = n_wells) 199 | 200 | return migration_factor 201 | 202 | 203 | 204 | -------------------------------------------------------------------------------- /community_selection/C_selection_algorithms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Nov 27 2019 5 | @author: changyuchang 6 | """ 7 | import numpy as np 8 | from functools import partial 9 | 10 | def no_selection(community_function): 11 | """ 12 | Direct well-to-well transfer without selection 13 | """ 14 | n_wells = len(community_function) 15 | return np.eye(n_wells) 16 | 17 | # Make selection algorithms with similar names, using partial functions 18 | ## Select top n% 19 | def temp_select_top(community_function, p): 20 | n_wells = len(community_function) 21 | sorted_community_function = np.sort(community_function) 22 | cut_off = sorted_community_function[int(np.floor(len(community_function)*(1-p)))] 23 | winner_index = np.where(community_function >= cut_off)[0][::-1] 24 | transfer_matrix = np.zeros((n_wells,n_wells)) 25 | t_new = range(n_wells) # New wells 26 | t_old = list(winner_index) * (int(np.ceil(1/p) + 1)) # Old wells 27 | for i in range(n_wells): 28 | transfer_matrix[t_new[i], t_old[i]] = 1 29 | return transfer_matrix 30 | 31 | for i in [10, 15, 16, 20, 25, 28, 30, 33, 40, 50, 60]: 32 | globals()['select_top%spercent' %i] = partial(temp_select_top, p = i/100) 33 | 34 | 35 | ## Select top n% control 36 | def temp_select_top_control(community_function, p): 37 | n_wells = len(community_function) 38 | randomized_community_function = community_function.copy() 39 | np.random.shuffle(randomized_community_function) 40 | sorted_community_function = np.sort(randomized_community_function) 41 | cut_off = sorted_community_function[int(np.floor(len(randomized_community_function)*(1-p)))] 42 | winner_index = np.where(randomized_community_function >= cut_off)[0][::-1] 43 | transfer_matrix = np.zeros((n_wells,n_wells)) 44 | t_new = range(n_wells) # New wells 45 | t_old = list(winner_index) * (int(np.ceil(1/p)+1)) # Old wells 46 | for i in range(n_wells): 47 | transfer_matrix[t_new[i], t_old[i]] = 1 48 | return transfer_matrix 49 | 50 | for i in [10, 15, 16, 20, 25, 28, 30, 33, 40, 50, 60]: 51 | globals()['select_top%spercent_control' %i] = partial(temp_select_top_control, p = i/100) 52 | 53 | 54 | ## Pooling 55 | def temp_pool_top(community_function, p): 56 | n_wells = len(community_function) 57 | sorted_community_function = np.sort(community_function) 58 | cut_off = sorted_community_function[int(np.floor(len(community_function)*(1-p)))] 59 | winner_index = np.where(community_function >= cut_off)[0][::-1] 60 | transfer_matrix = np.zeros((n_wells,n_wells)) 61 | transfer_matrix[:, list(winner_index)] = 1 62 | return transfer_matrix 63 | 64 | for i in [10, 15, 16, 20, 25, 28, 30, 33, 40, 50, 60]: 65 | globals()['pool_top%spercent' %i] = partial(temp_pool_top, p = i/100) 66 | 67 | ## Pooling control 68 | def temp_pool_top_control(community_function, p): 69 | n_wells = len(community_function) 70 | randomized_community_function = community_function.copy() 71 | np.random.shuffle(randomized_community_function) 72 | sorted_community_function = np.sort(randomized_community_function) 73 | cut_off = sorted_community_function[int(np.floor(len(randomized_community_function)*(1-p)))] 74 | winner_index = np.where(randomized_community_function >= cut_off)[0][::-1] 75 | transfer_matrix = np.zeros((n_wells,n_wells)) 76 | transfer_matrix[:, winner_index] = 1 77 | return transfer_matrix 78 | 79 | for i in [10, 15, 16, 20, 25, 28, 30, 33, 40, 50, 60]: 80 | globals()['pool_top%spercent_control' %i] = partial(temp_pool_top_control, p = i/100) 81 | 82 | 83 | # Sub-lineage algorithms 84 | def Arora2019(community_function, n_rep = 3): 85 | """ 86 | Arora2019 87 | Sub-divide wells of plate into lines where each 'line' consists of n_rep communities' 88 | Each round the highest function member of the line is used to colonize the next three triplicate wells of that line 89 | """ 90 | n_wells = len(community_function) 91 | n_lines = int(np.ceil(n_wells/n_rep)) #Number of lines 92 | transfer_matrix = np.zeros((n_wells,n_wells)) 93 | for i in range(n_lines): 94 | sorted_community_function = np.sort(community_function[i*n_rep:(i*n_rep)+n_rep]) 95 | cut_off = np.max(sorted_community_function) 96 | winner_index = np.where(community_function[i*n_rep:(i*n_rep)+n_rep] == cut_off)[0] 97 | transfer_matrix[i*n_rep:(i*n_rep)+n_rep, winner_index+i*n_rep] = 1 98 | return transfer_matrix 99 | 100 | 101 | def Arora2019_control(community_function, n_rep = 3): 102 | """ 103 | Same as Arora2019 except the line member is selected at Random 104 | """ 105 | n_wells = len(community_function) 106 | n_lines = int(np.ceil(n_wells/n_rep)) #Number of lines 107 | transfer_matrix = np.zeros((n_wells,n_wells)) 108 | for i in range(n_lines): 109 | sorted_community_function = np.sort(community_function[i*n_rep:(i*n_rep)+n_rep]) 110 | cut_off = np.max(sorted_community_function) 111 | winner_index = np.random.randint(0,n_rep) 112 | if winner_index+i*n_rep >= n_wells: 113 | corrected_n_rep = n_wells % n_rep 114 | winner_index = np.random.randint(0,corrected_n_rep) 115 | transfer_matrix[i*n_rep:(i*n_rep)+n_rep, winner_index+i*n_rep] = 1 116 | return transfer_matrix 117 | 118 | 119 | def Raynaud2019a(community_function, n_lines = 3): 120 | """ 121 | Raynaud2019a 122 | Sub-divide wells of plate into n_lines' 123 | Each round the highest function member of the line is used to colonize the wells of that lineage 124 | """ 125 | n_wells = len(community_function) 126 | n_rep = int(np.ceil(n_wells/n_lines)) #Number of replicates per line 127 | transfer_matrix = np.zeros((n_wells,n_wells)) 128 | for i in range(n_lines): 129 | sorted_community_function = np.sort(community_function[i*n_rep:(i*n_rep)+n_rep]) 130 | cut_off = np.max(sorted_community_function) 131 | winner_index = np.where(community_function[i*n_rep:(i*n_rep)+n_rep] == cut_off)[0] 132 | transfer_matrix[i*n_rep:(i*n_rep)+n_rep, winner_index+i*n_rep] = 1 133 | return transfer_matrix 134 | 135 | 136 | def Raynaud2019a_control(community_function, n_lines = 3): 137 | """ 138 | Same as Raynaud2019a except the lineage member is selected at Random 139 | """ 140 | n_wells = len(community_function) 141 | n_rep = int(np.ceil(n_wells/n_lines)) #Number of replicates per line 142 | transfer_matrix = np.zeros((n_wells,n_wells)) 143 | for i in range(n_lines): 144 | sorted_community_function = np.sort(community_function[i*n_rep:(i*n_rep)+n_rep]) 145 | cut_off = np.max(sorted_community_function) 146 | winner_index = np.random.randint(0,n_rep) 147 | if winner_index+i*n_rep >= n_wells: 148 | corrected_n_rep = n_wells % n_rep 149 | winner_index = np.random.randint(0,corrected_n_rep) 150 | transfer_matrix[i*n_rep:(i*n_rep)+n_rep, winner_index+i*n_rep] = 1 151 | return transfer_matrix 152 | 153 | 154 | def Raynaud2019b(community_function, n_lines = 3): 155 | """ 156 | same as Raynaud2019a except top from each lineage is pooled 157 | """ 158 | n_wells = len(community_function) 159 | n_rep = int(np.ceil(n_wells/n_lines)) #Number of replicates per line 160 | transfer_matrix = np.zeros((n_wells,n_wells)) 161 | for i in range(n_lines): 162 | sorted_community_function = np.sort(community_function[i*n_rep:(i*n_rep)+n_rep]) 163 | cut_off = np.max(sorted_community_function) 164 | winner_index = np.where(community_function[i*n_rep:(i*n_rep)+n_rep] == cut_off)[0] 165 | transfer_matrix[:, winner_index+i*n_rep] = 1 166 | return transfer_matrix 167 | 168 | 169 | def Raynaud2019b_control(community_function, n_lines = 3): 170 | """ 171 | Same as Raynaud2019b except the lineage member is selected at Random 172 | """ 173 | n_wells = len(community_function) 174 | n_rep = int(np.ceil(n_wells/n_lines)) #Number of replicates per line 175 | transfer_matrix = np.zeros((n_wells,n_wells)) 176 | for i in range(n_lines): 177 | sorted_community_function = np.sort(community_function[i*n_rep:(i*n_rep)+n_rep]) 178 | cut_off = np.max(sorted_community_function) 179 | winner_index = np.random.randint(0,n_rep) 180 | if winner_index+i*n_rep >= n_wells: 181 | corrected_n_rep = n_wells % n_rep 182 | winner_index = np.random.randint(0,corrected_n_rep) 183 | transfer_matrix[:, winner_index+i*n_rep] = 1 184 | return transfer_matrix 185 | 186 | 187 | def select_top(community_function): 188 | """ 189 | Select the top community 190 | """ 191 | # Read number of wells 192 | n_wells = len(community_function) 193 | 194 | # Winner wells 195 | winner_index = np.where(community_function >= np.max(community_function))[0][::-1] # Reverse the list so the higher 196 | 197 | # Transfer matrix 198 | transfer_matrix = np.zeros((n_wells,n_wells)) 199 | t_new = range(n_wells) # New wells 200 | t_old = list(winner_index) * n_wells # Old wells 201 | 202 | # Fill in the transfer matrix 203 | for i in range(n_wells): 204 | transfer_matrix[t_new[i], t_old[i]] = 1 205 | 206 | return transfer_matrix 207 | 208 | 209 | # Other selection algorithms 210 | def select_top_nth(community_function, n): 211 | """ 212 | Select the top nth single community. Designed for perturbation effect 213 | """ 214 | n_wells = len(community_function) 215 | sorted_community_function = np.sort(community_function)[::-1] 216 | cut_off = sorted_community_function[n-1] # The top nth 217 | winner_index = np.where(community_function == cut_off)[0] 218 | 219 | # Transfer matrix 220 | transfer_matrix = np.zeros((n_wells,n_wells)) 221 | t_new = range(n_wells) # New wells 222 | t_old = list(winner_index) * n_wells # Old wells 223 | 224 | # Fill in the transfer matrix 225 | for i in range(n_wells): 226 | transfer_matrix[t_new[i], t_old[i]] = 1 227 | 228 | return transfer_matrix 229 | 230 | 231 | def select_top_dog(community_function): 232 | """ 233 | 100 communities. Reproduce the best one to 60-70 newborns, and reproduce the second best to 30-40 newborns. 234 | """ 235 | n_wells = len(community_function) 236 | sorted_community_function = np.sort(community_function) 237 | cut_off = sorted_community_function[int(np.round(len(community_function)*0.5)) - 1] 238 | winner_index = np.where(community_function >= cut_off)[0][::-1] 239 | 240 | # Transfer matrix 241 | transfer_matrix = np.zeros((n_wells,n_wells)) 242 | t_new = range(n_wells) # New wells 243 | # The best performed community 244 | t_old = [list(winner_index)[0]] * int(0.6 * n_wells) + [list(winner_index)[1]] * int(0.5 * n_wells) # Old wells 245 | 246 | # Fill in the transfer matrix 247 | for i in range(n_wells): 248 | transfer_matrix[t_new[i], t_old[i]] = 1 249 | 250 | return transfer_matrix 251 | 252 | 253 | def Williams2007a(community_function): 254 | """ 255 | Williams2007a 256 | Select the top community and impose an bottleneck 257 | """ 258 | n_wells = len(community_function) 259 | sorted_community_function = np.sort(community_function) 260 | winner_index = np.where(community_function == np.max(community_function))[0][::-1] 261 | transfer_matrix = np.zeros((n_wells,n_wells)) 262 | t_new = range(n_wells) # New wells 263 | t_old = list(winner_index) * n_wells # Old wells 264 | for i in range(n_wells): 265 | transfer_matrix[t_new[i], t_old[i]] = 10**(-4) # An additional strong bottleneck 266 | return transfer_matrix 267 | 268 | 269 | def Williams2007b(community_function, p = 0.2): 270 | """ 271 | Williams2007b 272 | Select and pool the top 20% community and impose an bottleneck 273 | """ 274 | n_wells = len(community_function) 275 | sorted_community_function = np.sort(community_function) 276 | cut_off = sorted_community_function[int(np.round(len(community_function)*(1-p))) - 1] 277 | winner_index = np.where(community_function > cut_off)[0][::-1] 278 | transfer_matrix = np.zeros((n_wells,n_wells)) 279 | transfer_matrix[:, winner_index] = 10**(-4) # An additional strong bottleneck 280 | return transfer_matrix 281 | 282 | 283 | def pair_top(community_function): 284 | """ 285 | Pair the top communities. Each pairwise combination has roughly two replicates 286 | """ 287 | import itertools 288 | 289 | # Read number of wells 290 | n_wells = len(community_function) 291 | 292 | # Compute the cutoff based on the number of wells 293 | cut_off_percent = (np.sqrt(n_wells))/n_wells 294 | 295 | # Sort the community function in this transfer 296 | sorted_community_function = np.sort(community_function) 297 | 298 | # Community function value cutoff for selecting communities 299 | cut_off = sorted_community_function[int(np.round(len(community_function)*(1-cut_off_percent)))] 300 | 301 | # Winner wells 302 | winner_index = np.where(community_function >= cut_off)[0] # Reverse the list so the higher 303 | pairs_list = list(itertools.combinations(winner_index, 2)) # Pair list based on the winer wells 304 | 305 | # Transfer matrix 306 | transfer_matrix = np.zeros((n_wells,n_wells)) 307 | t_new = range(n_wells) # New wells 308 | t_old = list(winner_index) + pairs_list * (int(np.round(1/cut_off_percent)) + 1) # Old wells 309 | 310 | # Fill in the transfer matrix 311 | for i in range(n_wells): 312 | transfer_matrix[t_new[i], t_old[i]] = 1 313 | 314 | return transfer_matrix 315 | 316 | 317 | 318 | -------------------------------------------------------------------------------- /community_selection/usertools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mar 09 2020 5 | @author: changyuchang 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | from community_selection.A_experiment_functions import * 10 | from community_selection.B_community_phenotypes import * 11 | from community_selection.C_selection_algorithms import * 12 | from community_selection.D_perturbation_algorithms import * 13 | from community_selection.E_protocols import * 14 | 15 | 16 | def plot_community_function(function_df): 17 | """Plot community function""" 18 | function_df.plot.scatter(x = "Transfer", y = "CommunityPhenotype") 19 | 20 | def plot_transfer_matrix(transfer_matrix): 21 | """Plot transfer matrix""" 22 | import seaborn as sns 23 | fig,ax=plt.subplots() 24 | sns.heatmap(transfer_matrix,ax=ax) 25 | ax.set_xlabel('Old well',fontsize=14) 26 | ax.set_ylabel('New well',fontsize=14) 27 | ax.set_title(r'Transfer Matrix',fontsize=14) 28 | plt.show() 29 | 30 | def make_assumptions(input_file, row): 31 | ''' Generate the assumptions dictionary from input file and row of input file ''' 32 | #Load row dat and default assumptions 33 | row_dat = pd.read_csv(input_file, keep_default_na=False).iloc[row] 34 | assumptions = a_default.copy() 35 | original_params = MakeParams(assumptions.copy()) 36 | #Update assumptions based on row_dat 37 | for k in row_dat.keys(): 38 | #if NA default to original value 39 | if k in assumptions.keys() and row_dat[k] != 'NA' : 40 | assumptions.update({k :row_dat[k]}) 41 | elif k in assumptions.keys() and row_dat[k] == 'NA' : 42 | continue 43 | #some params for who we wan't to resort to there default value are not stored in assumptions but are generated by MakeParams 44 | elif k not in assumptions.keys() and k in original_params.keys() and row_dat[k] != 'NA': 45 | assumptions.update({k :row_dat[k]}) 46 | elif k not in assumptions.keys() and k in original_params.keys() and row_dat[k] == 'NA': 47 | assumptions.update({k:original_params[k]}) 48 | else: 49 | if row_dat[k] != 'NA': 50 | assumptions.update({k :row_dat[k]}) 51 | else: 52 | assumptions.update({k :np.nan}) 53 | 54 | #These two assumptions are generated from combinations of other paramaters 55 | assumptions.update({'SA' :row_dat['sn']*np.ones(row_dat['sf']) }) #Number of consumers in each Specialist family 56 | assumptions.update({'MA' :row_dat['rn']*np.ones(row_dat['rf']) }) #Number of resources in each class 57 | 58 | #MakeParams does not work with numpy type for R0_food so convert to base python if not using default 59 | if not isinstance(assumptions['R0_food'],int): 60 | assumptions['R0_food'] = assumptions['R0_food'].item() 61 | 62 | #When running monoculture (every isolate in monoculture) 63 | if assumptions['monoculture'] : 64 | assumptions.update({"n_wells": int(np.sum(assumptions["SA"]) + assumptions["Sgen"])}) 65 | 66 | #If knock_in isolate is True and no threshold is set threshold size defaults to 0 67 | if assumptions['bottleneck']: 68 | if pd.isnull(assumptions['bottleneck_size']): 69 | assumptions['bottleneck_size'] =assumptions['dilution'] 70 | else: 71 | assumptions['bottleneck_size'] = float(assumptions['bottleneck_size']) 72 | 73 | #If knock_in isolate is True and no threshold is set threshold size defaults to 0 74 | if assumptions['knock_in']: 75 | if pd.isnull(assumptions['knock_in_threshold']) : 76 | assumptions['knock_in_threshold'] =0 77 | else: 78 | assumptions['knock_in_threshold'] = float(assumptions['knock_in_threshold']) 79 | 80 | #If coalescence is True and no frac coalescence is set defaults to 50-50 81 | if assumptions['coalescence']: 82 | if pd.isnull(assumptions['frac_coalescence']): 83 | assumptions['frac_coalescence'] =0.5 84 | else: 85 | assumptions['frac_coalescence'] = float(assumptions['frac_coalescence']) 86 | 87 | #If migration is True and no n_migration is set defaults to n_inoc 88 | if assumptions['migration']: 89 | if pd.isnull(assumptions['n_migration']): 90 | assumptions['n_migration'] =assumptions['n_inoc'] 91 | else: 92 | assumptions['n_migration'] = int(assumptions['n_migration']) 93 | 94 | if pd.isnull(assumptions['s_migration']): 95 | pass 96 | else: 97 | assumptions['s_migration'] = int(assumptions['s_migration']) 98 | 99 | #If coalescence is True and no frac coalescence is set defaults to 50-50 100 | if assumptions['resource_shift']: 101 | if pd.isnull(assumptions['r_percent']): 102 | assumptions['r_percent'] =0.1 103 | else: 104 | assumptions['r_percent'] = float(assumptions['r_percent']) 105 | 106 | # Overwrite plate 107 | if isinstance(assumptions["overwrite_plate"], str) and assumptions["overwrite_plate"] != "": 108 | print("\nUpdating the n_wells with overwrite_plate") 109 | df = pd.read_csv(assumptions["overwrite_plate"]) 110 | df = df[df.Transfer == np.max(df.Transfer)] 111 | if len(df["Well"].unique()) != 1: 112 | assumptions["n_wells"] = len(df["Well"].unique()) 113 | 114 | if np.isnan(assumptions["ruggedness"]): 115 | assumptions["ruggedness"] = 0 116 | 117 | # f6_target_resource 118 | if "target_resource" in assumptions["selected_function"]: 119 | # Default target resource is the last resource 120 | if pd.isnull(assumptions['target_resource']): 121 | assumptions["target_resource"] = int(assumptions["rn"]) * int(assumptions["rf"]) - 1 122 | else: 123 | assumptions["target_resource"] = int(assumptions["target_resource"]) 124 | 125 | return assumptions 126 | 127 | def prepare_experiment(assumptions): 128 | """ 129 | Prepare the experimental setup for this simulation 130 | 131 | assumptions = dictionary of metaparameters 132 | 133 | Return: params, params_simulation, params_algorithm,plate 134 | """ 135 | print("\nGenerate species parameters") 136 | np.random.seed(assumptions['seed']) 137 | params = MakeParams(assumptions) 138 | if assumptions["selected_function"] == "f5_invader_suppression": 139 | print("\nDraw invader feature") 140 | params = create_invader(params, assumptions) 141 | 142 | print("\nDraw per-capita function and cost") 143 | f1_species_smooth, f1_species_rugged, f2_species_smooth, f2_species_rugged = draw_species_function(assumptions) 144 | params.update({"f1_species_smooth": f1_species_smooth, "f1_species_rugged": f1_species_rugged, "f2_species_smooth": f2_species_smooth, "f2_species_rugged": f2_species_rugged}) 145 | gi = draw_species_cost(f1_species_smooth, assumptions) 146 | params.update({"g": gi}) 147 | 148 | print("\nConstruct plate") 149 | np.random.seed(assumptions['seed']) 150 | plate = make_plate(assumptions,params) 151 | 152 | print("\nAdd community function to plate") 153 | plate = add_community_function(plate, assumptions, params) 154 | 155 | if not pd.isnull(assumptions["overwrite_plate"]) : 156 | print("\nUpdating the initial plate composition by overwrite_plate") 157 | plate = overwrite_plate(plate, assumptions) 158 | 159 | print("\nPrepare Protocol") 160 | #Extract Protocol from protocol database 161 | algorithms = make_algorithms(assumptions) 162 | params_algorithm = algorithms[algorithms['algorithm_name'] == assumptions['protocol']] 163 | 164 | #Params_simulation by default contains all assumptions not stored in params. 165 | params_simulation = dict((k, assumptions[k]) for k in assumptions.keys() if k not in params.keys()) 166 | 167 | return params, params_simulation , params_algorithm, plate 168 | 169 | def simulate_community(params, params_simulation, params_algorithm, plate): 170 | """ 171 | Simulate community dynamics by given experimental regimes 172 | 173 | params = parameter passed from community-simulator 174 | params_simulation = dictionary of parameters for running experiment 175 | params_algorithm = dictionary of algorithms that determine the selection regime, migration regime, and community pheotypes 176 | plate = Plate object specified by community-simulator 177 | 178 | Return: 179 | community_composition = concatenated, melted panda dataframe of community and resource composition in each transfer 180 | community_function = melted panda dataframe of community function 181 | """ 182 | print("\nStarting " + params_simulation["exp_id"]) 183 | print(params_algorithm) 184 | 185 | # Test the community function 186 | globals()[params_algorithm["community_phenotype"][0]](plate, params_simulation = params_simulation) 187 | try: 188 | community_function = globals()[params_algorithm["community_phenotype"][0]](plate, params_simulation = params_simulation) # Community phenotype 189 | except: 190 | print('\nCommunity phenotype test failed') 191 | raise SystemExit 192 | 193 | # Save the inocula composition 194 | if params_simulation['save_composition']: 195 | plate_data_list = list() # Plate composition 196 | plate_data = reshape_plate_data(plate, params_simulation,transfer_loop_index=0) # Initial state 197 | plate_data_list.append(plate_data) 198 | composition_filename = params_simulation['output_dir'] + params_simulation['exp_id'] + '_composition.txt' 199 | 200 | # Save the initial community function + richness + biomass 201 | if params_simulation['save_function']: 202 | community_function_list = list() # Plate composition 203 | richness = np.sum(plate.N >= 1/params_simulation["scale"], axis = 0) # Richness 204 | biomass = list(np.sum(plate.N, axis = 0)) # Biomass 205 | function_data = reshape_function_data(params_simulation,community_function, richness, biomass, transfer_loop_index =0) 206 | community_function_list.append(function_data) 207 | function_filename = params_simulation['output_dir'] + params_simulation['exp_id'] + '_function.txt' 208 | 209 | print("\nStart propogation") 210 | # Run simulation 211 | for i in range(0, params_simulation["n_transfer"]): 212 | # Algorithms used in this transfer 213 | phenotype_algorithm = params_algorithm["community_phenotype"][i] 214 | selection_algorithm = params_algorithm["selection_algorithm"][i] 215 | 216 | # Propagation 217 | plate.Propagate(params_simulation["n_propagation"]) 218 | 219 | # Measure Community phenotype 220 | community_function = globals()[phenotype_algorithm](plate, params_simulation = params_simulation) # Community phenotype 221 | 222 | # Append the composition to a list 223 | if params_simulation['save_composition'] and ((i+1) % params_simulation['composition_lograte'] == 0): 224 | plate_data = reshape_plate_data(plate, params_simulation, transfer_loop_index=i+1) # Initial state 225 | plate_data_list.append(plate_data) 226 | 227 | if params_simulation['save_function'] and ((i+1) % params_simulation['function_lograte'] == 0): 228 | richness = np.sum(plate.N >= 1/params_simulation["scale"], axis = 0) # Richness 229 | biomass = list(np.sum(plate.N, axis = 0)) # Biomass 230 | function_data = reshape_function_data(params_simulation, community_function, richness, biomass, transfer_loop_index =i+1) 231 | community_function_list.append(function_data) 232 | 233 | #Store prior state before passaging (For coalescence) 234 | setattr(plate, "prior_N", plate.N) 235 | setattr(plate, "prior_R", plate.R) 236 | setattr(plate, "prior_R0", plate.R0) 237 | 238 | # Passage and transfer matrix 239 | transfer_matrix = globals()[selection_algorithm](community_function) 240 | if params_simulation['monoculture']: 241 | plate = passage_monoculture(plate, params_simulation["dilution"]) 242 | else: 243 | plate.Passage(transfer_matrix * params_simulation["dilution"]) 244 | 245 | # Perturbation 246 | if params_simulation['directed_selection']: 247 | if selection_algorithm == 'select_top': # In principle it can take select_top_x% but leave it as select_top for now 248 | plate = perturb(plate, params_simulation, keep = np.where(community_function >= np.max(community_function))[0][0]) 249 | # if selection_algorithm != 'select_top' and (params_algorithm.iloc[i]["algorithm_name"] != 'simple_screening'): 250 | # plate = perturb(plate, params_simulation, keep = None) 251 | elif selection_algorithm == "no_selection": 252 | pass 253 | 254 | print("Transfer " + str(i+1)) 255 | 256 | if params_simulation['save_composition']: 257 | pd.concat(plate_data_list).to_csv(composition_filename, index = False) 258 | if params_simulation['save_function']: 259 | pd.concat(community_function_list).to_csv(function_filename, index = False) 260 | print("\n" + params_simulation["exp_id"] + " finished") 261 | 262 | def save_plate(assumptions, plate): 263 | """ 264 | Save the initial plate in a pickle file. Like saving a frozen stock at -80C 265 | """ 266 | if assumptions['save_plate']: 267 | import dill as pickle 268 | with open(assumptions['output_dir'] + assumptions['exp_id'] + ".p", "wb") as f: 269 | pickle.dump(plate, f) 270 | 271 | def extract_species_function(assumptions): 272 | """ 273 | Extract the per-capita species function from the community data 274 | """ 275 | np.random.seed(assumptions['seed']) 276 | params = MakeParams(assumptions) 277 | f1_species_smooth, f1_species_rugged, f2_species_smooth, f2_species_rugged = draw_species_function(assumptions) 278 | S_tot = int(assumptions["sn"]) * int(assumptions["sf"]) + int(assumptions["Sgen"]) 279 | 280 | if "additive" in assumptions["selected_function"]: 281 | if assumptions["selected_function"] == "f1_additive": 282 | per_capita_function = f1_species_smooth 283 | species_function = pd.DataFrame({"SelectedFunction": assumptions["selected_function"], "Seed": np.repeat(assumptions['seed'], S_tot), "ID": range(1, S_tot+1), "PerCapitaFunction": per_capita_function}) 284 | if "cost" in assumptions["exp_id"]: # Should read a flag instead of name 285 | gi = draw_species_cost(f1_species_smooth, assumptions) 286 | params.update({"g": gi}) 287 | species_function = pd.DataFrame({"SelectedFunction": assumptions["selected_function"], "Seed": np.repeat(assumptions['seed'], S_tot), "ID": range(1, S_tot+1), "PerCapitaFunction": per_capita_function, "g": gi}) 288 | elif assumptions["selected_function"] == "f1a_additive": 289 | per_capita_function = f1_species_rugged 290 | species_function = pd.DataFrame({"SelectedFunction": assumptions["selected_function"], "Seed": np.repeat(assumptions['seed'], S_tot), "ID": range(1, S_tot+1), "PerCapitaFunction": per_capita_function}) 291 | 292 | 293 | elif "interaction" in assumptions["selected_function"]: 294 | if assumptions["selected_function"] == "f2_interaction": 295 | per_interaction_function = f2_species_smooth 296 | elif assumptions["selected_function"] == "f2a_interaction": 297 | per_interaction_function = f2_species_rugged 298 | 299 | df_interaction_function = pd.DataFrame(per_interaction_function) 300 | df_interaction_function.columns = range(1, S_tot+1) 301 | df_interaction_function = df_interaction_function.assign(ID_row=range(1,S_tot+1)).melt(id_vars="ID_row", var_name = "ID_col", value_name = "PerCapitaFunction") 302 | df_interaction_function = df_interaction_function.assign(SelectedFunction = assumptions["selected_function"], Seed = assumptions['seed']) 303 | species_function = df_interaction_function[["SelectedFunction", "Seed", "ID_row", "ID_col", "PerCapitaFunction"]] 304 | 305 | return(species_function) 306 | 307 | 308 | -------------------------------------------------------------------------------- /docs/source/content/mapping_file.rst: -------------------------------------------------------------------------------- 1 | Input Mapping File 2 | ================== 3 | 4 | The input mapping ``.csv`` lists 86 essential parameters in columns and (indepdendent) selection experiments in rows. Here is an example of mapping file with two independent experiments. 5 | 6 | .. csv-table:: 7 | :file: ../data/input_test.csv 8 | 9 | 10 | 11 | The mapping file has five categories of parameters: 12 | 13 | .. contents:: 14 | :local: 15 | 16 | File operation 17 | --------------- 18 | 19 | .. confval:: selected_function 20 | 21 | :type: string 22 | :default: ``f1_additive`` 23 | 24 | Function under selection. Available options are ``f1_additive`` and ``f2_interaction``, ``f2a_interaction``, ``f3_additive_binary``, ``f4_interaction_binary``, ``f5_invader_growth``, and ``resource_distance_community_function``. 25 | 26 | 27 | .. confval:: protocol 28 | 29 | :type: string 30 | :default: ``simple_screening`` 31 | 32 | Protocol to implement. Only the protocols listed in ``E_protocols.py`` can be used. 33 | 34 | 35 | .. confval:: seed 36 | 37 | :type: integer 38 | :default: ``1`` 39 | 40 | Random seed to initiate pseudorandom number generator. 41 | 42 | 43 | .. confval:: exp_id 44 | 45 | :type: string 46 | :default: ``f1_additive-simple_screening-1`` 47 | 48 | Experiment-specific ID, which will also determine the naming convention of output files. For example, the community function is saved in ``f1_additive-simple_screening-1_function.txt`` if ``save_function=True``, whereas community compostition is saved in ``f1_additive-simple_screening-1_compostition.txt`` if ``save_composition=True``. 49 | 50 | 51 | .. confval:: overwrite_plate 52 | 53 | :type: string 54 | :default: ``NA`` 55 | 56 | To replace the initial plate composition with an arbitrary plate, specify a text file of the community composition that containes four columns: Type, ID, Well, and Abundance. If an output text file (e.g., ``f1_additive-simple_screening-1_compostition.txt``) is specified and it contains composition for more than two transfers, by default only the metacommunity compostition of the latter tranfer is read. 57 | 58 | 59 | .. confval:: passage_overwrite_plate 60 | 61 | :type: boolean 62 | :default: ``False`` 63 | 64 | If overwrite_plate != NA, set TRUE if the community from overwrite_plate is at equilibrium and need an addititonal transfer. 65 | 66 | 67 | .. confval:: output_dir 68 | 69 | :type: string 70 | :default: ``data/`` 71 | 72 | Directory where the output files will be stored. 73 | 74 | 75 | .. confval:: save_function 76 | 77 | :type: boolean 78 | :default: ``True`` 79 | 80 | Set True to save function data. 81 | 82 | 83 | .. confval:: save_composition 84 | 85 | :type: boolean 86 | :default: ``True`` 87 | 88 | Set ``True`` to save composition data. 89 | 90 | 91 | .. confval:: save_plate 92 | 93 | :type: boolean 94 | :default: ``False`` 95 | 96 | Set ``True`` to save initial Metacommunity in a ``pickle`` file. 97 | 98 | 99 | .. confval:: function_lograte 100 | 101 | :type: integer 102 | :default: ``1`` 103 | 104 | How often you save the function in transfers. Default is saving functional data from every transfer. 105 | 106 | .. confval:: composition_lograte 107 | 108 | :type: integer 109 | :default: ``20`` 110 | 111 | How often do you save the composition in transfers. 112 | 113 | | 114 | 115 | Protocol-specific parameters 116 | ---------------------------- 117 | 118 | .. confval:: scale 119 | 120 | :type: integer 121 | :default: ``1000000`` 122 | 123 | Number of cells equivalent to :math:`N_i = 1`. 124 | 125 | 126 | .. confval:: n_inoc 127 | 128 | :type: integer 129 | :default: ``1000000`` 130 | 131 | Number of cells in the initial inoculum. 132 | 133 | 134 | .. confval:: rich_medium 135 | 136 | :type: boolean 137 | :default: ``True`` 138 | 139 | Set ``True`` to generate a rich medium sampled from an uniform distribution. Set ``False`` to generate a minimal medium with only the first resource is supplied. 140 | 141 | 142 | .. confval:: monoculture 143 | 144 | :type: boolean 145 | :default: ``False`` 146 | 147 | Set ``True`` to run simple screening with all monocultures from the regional species pool. The number of wells is equal to the number of species in the regional pool. 148 | 149 | 150 | .. confval:: dilution 151 | 152 | :type: float 153 | :default: ``0.001`` 154 | 155 | Dilution factor in the batch culture. 156 | 157 | 158 | .. confval:: n_wells 159 | 160 | :type: integer 161 | :default: ``96`` 162 | 163 | Number of wells (communities) in a plate (metacommunity). 164 | 165 | 166 | .. confval:: n_propagation 167 | 168 | :type: float 169 | :default: ``1`` 170 | 171 | Incubation time of a transfer. 172 | 173 | 174 | .. confval:: n_transfer 175 | 176 | :type: integer 177 | :default: ``40`` 178 | 179 | Number of total transfers (generations) to be run in the protocol. 180 | 181 | 182 | .. confval:: n_transfer_selection 183 | 184 | :type: interger 185 | :default: ``20`` 186 | 187 | Number of transfers (generations) that consecutively executes selection matrices from the start of an experiment. The number of stabilizaiton transfer equals to the difference between ``n_transfer_total`` and ``n_transfer_selection``. 188 | 189 | 190 | .. confval:: metacommunity_sampling 191 | 192 | :type: string 193 | :default: ``Power`` 194 | 195 | Sampling method for initial metacommunity. Available options are ``Power``, ``Lognormal``, ``Default``. 196 | 197 | 198 | .. confval:: power_alpha 199 | 200 | :type: float 201 | :default: ``0.01`` 202 | 203 | 204 | .. confval:: lognormal_mean 205 | 206 | :type: float 207 | :default: ``8`` 208 | 209 | 210 | .. confval:: lognormal_sd 211 | 212 | :type: float 213 | :default: ``8`` 214 | 215 | 216 | 217 | | 218 | 219 | Species contribution to function 220 | -------------------------------- 221 | 222 | .. confval:: phi_distribution 223 | 224 | :type: string 225 | :default: ``Norm`` 226 | 227 | {"Norm", "Uniform"} 228 | 229 | 230 | .. confval:: phi_mean 231 | 232 | :type: float 233 | :default: ``0`` 234 | 235 | Mean of normal distribution when ``phi_distribution`` is set to Norm 236 | 237 | 238 | .. confval:: phi_sd 239 | 240 | :type: float 241 | :default: ``1`` 242 | 243 | Standard deviation of normal distribution when ``phi_distribution`` is set to Norm 244 | 245 | 246 | .. confval:: phi_lower 247 | 248 | :type: float 249 | :default: ``0`` 250 | 251 | Lower boundary of normal distribution when ``phi_distribution`` is set to Unif 252 | 253 | 254 | .. confval:: phi_upper 255 | 256 | :type: float 257 | :default: ``1`` 258 | 259 | Upper boundary of normal distribution when ``phi_distribution`` is set to Unif 260 | 261 | 262 | .. confval:: ruggedness 263 | 264 | :type: fload 265 | :default: ``0.8`` 266 | 267 | (1-``ruggedness``) of the additive and non-additive per-capita functino will contribute to community function, whereas the rest will be set to 0. 268 | 269 | 270 | .. confval:: binary_threshold 271 | 272 | :type: float 273 | :default: ``1`` 274 | 275 | Threshold for binary functions. 276 | 277 | 278 | .. confval:: g0 279 | 280 | :type: float 281 | :default: ``1`` 282 | 283 | The baseline conversion factor of biomass per energy. 284 | 285 | 286 | .. confval:: cost_distribution 287 | 288 | :type: string 289 | :default: ``Norm`` 290 | 291 | {"Gamma", "Unif"} 292 | 293 | .. confval:: cost_mean 294 | 295 | :type: float 296 | :default: ``0`` 297 | 298 | Mean fraction of cost feeded normal distribution. Suggested maximum to 0.05. 299 | 300 | 301 | .. confval:: cost_sd 302 | 303 | :type: float 304 | :default: ``0`` 305 | 306 | Standard deviation of fraction of cost feeded into a gamma distribution. ``cost_sd = 0`` if ``cost_mean = 0``, ``cost_sd = 0.01`` if ``cost_mean > 0``. 307 | 308 | 309 | .. confval:: cost_lower 310 | 311 | :type: float 312 | :default: ``0`` 313 | 314 | Lower bound for cost if ``cost_distribution`` is set to Uniform 315 | 316 | 317 | .. confval:: cost_upper 318 | 319 | :type: float 320 | :default: ``1`` 321 | 322 | Upper bound for cost if ``cost_distribution`` is set to Uniform 323 | 324 | 325 | .. confval:: invader_index 326 | 327 | :type: integer 328 | :default: ``2`` 329 | 330 | Index of an invader. Only one index is choosen. Currently a invasive community is not allowed. 331 | 332 | 333 | .. confval:: invader_sampling 334 | 335 | :type: string 336 | :default: ``Gamma`` 337 | 338 | Sampling algorithm to generate the invader uptake rate vector. Options are ``Gaussian``, ``Binary``, ``Gamma``, ``Binary_Gamma``. 339 | 340 | 341 | .. confval:: invader_strength 342 | 343 | :type: positive float 344 | :default: ``10`` 345 | 346 | Mean utiliration vector for the invader versus the average of the species in the pool 347 | 348 | .. confval:: target_resource 349 | 350 | :type: integer 351 | :default: ``NA`` 352 | 353 | Target resource production when ``selected_function`` is set to ``f6_target_resourece`` 354 | 355 | 356 | | 357 | 358 | Directed evolution 359 | ------------------ 360 | 361 | .. confval:: directed_selection 362 | 363 | :type: boolean 364 | :default: ``False`` 365 | 366 | Set ``True`` to run directed selection, one of flags below in directed evolution has to be also set ``True``. 367 | 368 | 369 | .. confval:: knock_out 370 | 371 | :type: boolean 372 | :default: ``False`` 373 | 374 | Set ``True`` to perform knock out pertubation. 375 | 376 | 377 | .. confval:: knock_in 378 | 379 | :type: boolean 380 | :default: ``F`` 381 | 382 | Set ``True`` performs knock in pertubation. 383 | 384 | 385 | .. confval:: knock_in_threshold 386 | 387 | :type: float 388 | :default: ``0.95`` 389 | 390 | If ``knock_in = True``, use the default ``knock_in_threshold=0.95``, which means that top 5% species in the pool is prepared to be knocked in a community, whereas the rest 95% of are not used. 391 | 392 | 393 | .. confval:: bottleneck 394 | 395 | :type: boolean 396 | :default: ``False`` 397 | 398 | Set ``True`` to perform bottleneck pertubations. 399 | 400 | 401 | .. confval:: bottleneck_size 402 | 403 | :type: float 404 | :default: ``0.00001`` 405 | 406 | If ``bottleneck=T``, perform an bottleneck shock to the specified communities by a dilution factor default to ``bottleneck_size=0.00001``. This bottleneck dilutoon is in addition to the regular dilution factor in the batch culture ``dilution=0.001``. 407 | 408 | 409 | .. confval:: migration 410 | 411 | :type: boolean 412 | :default: ``False`` 413 | 414 | Set ``True`` to perform migration pertubations. 415 | 416 | 417 | .. confval:: n_migration 418 | 419 | :type: integer 420 | :default: ``1000000`` 421 | 422 | Number of cells in the migrant community. 423 | 424 | 425 | .. confval:: s_migration 426 | 427 | :type: integer 428 | :default: ``NA`` 429 | 430 | Number of species in the migrant community. If ``NA`` (as default), the migrant community is sampled from a regional pool where the species abundance follows power-law distribution. If set into an integer, ``n_migration`` cells will be equally allocated to ``s_migrations`` species from the pool to build the migrant community. 431 | 432 | 433 | .. confval:: coalescence 434 | 435 | :type: boolean 436 | :default: ``False`` 437 | 438 | Set ``True`` to perform coalescence pertubation. 439 | 440 | 441 | .. confval:: f_coalescence 442 | 443 | :type: float 444 | :default: ``0.5`` 445 | 446 | Between 0 and 1. Fraction of migrant community during coalescence. The fraction of a perturbed community is ``1-f_coalescence``. 447 | 448 | 449 | .. confval:: resource_shift 450 | 451 | :type: boolean 452 | :default: ``False`` 453 | 454 | Set ``True`` performs resource pertubations. 455 | 456 | 457 | .. confval:: r_type 458 | 459 | :type: string 460 | :default: ``add`` 461 | 462 | Type of resource pertubation. Available options are ``rescale_add``, ``rescale_remove``, ``add``, ``remove``, ``old``. A fraction ``r_percent`` of resource A is removed, and that amount of resource is added to another resource B. 463 | 464 | 465 | .. confval:: r_percent 466 | 467 | :type: float 468 | :default: ``1`` 469 | 470 | Fraction of specified resource that is removed. ``r_percent=1`` means all resource A is removed. 471 | 472 | | 473 | 474 | Community-simulator parameters 475 | ------------------------------- 476 | 477 | The parameters in this section are inherited and some with differnt values from community-simulator. 478 | 479 | .. confval:: sampling 480 | 481 | :type: string 482 | :default: ``Binary_Gamma`` 483 | 484 | Specify choice of sampling algorithm to generate the consumer uptake rate vector. Options are ``Gaussian``, ``Binary``, ``Gamma``, ``Binary_Gamma``. 485 | 486 | 487 | .. confval:: sn 488 | 489 | :type: integer 490 | :default: ``2100`` 491 | 492 | Number of microbial species in the global pool. 493 | 494 | 495 | .. confval:: sf 496 | 497 | :type: integer 498 | :default: ``1`` 499 | 500 | Number of specialist family. 501 | 502 | 503 | .. confval:: s_gen 504 | 505 | :type: integer 506 | :default: ``0`` 507 | 508 | Number/Richness of generalist taxa. 509 | 510 | 511 | .. confval:: rn 512 | 513 | :type: integer 514 | :default: ``90`` 515 | 516 | Number of resource types. 517 | 518 | 519 | .. confval:: rf 520 | 521 | :type: integer 522 | :default: ``1`` 523 | 524 | Number of resource classes. 525 | 526 | 527 | .. confval:: R0_food 528 | 529 | :type: float 530 | :default: ``1000`` 531 | 532 | Total resource abundance. 533 | 534 | 535 | .. confval:: food 536 | 537 | :type: float 538 | :default: ``1000`` 539 | 540 | Index of food source being supplied in the minimal medium. Only works when ``rich_medium=False``. 541 | 542 | 543 | .. confval:: supply 544 | 545 | :type: string 546 | :default: ``off`` 547 | 548 | Choice of intrinsic resoruce dynamics. Set ``off`` for batch culture where resource is not renewing within a transfer. 549 | 550 | 551 | .. confval:: muc 552 | 553 | :type: float 554 | :default: ``10`` 555 | 556 | Mean sum over a row of the preference matrix ciα. 557 | 558 | 559 | .. confval:: sigc 560 | 561 | :type: float 562 | :default: ``3`` 563 | 564 | Standard deviation of sum over a row of the preference matrix ciα. 565 | 566 | 567 | .. confval:: c0 568 | 569 | :type: float 570 | :default: ``0`` 571 | 572 | Low consumption level for binary ciα. 573 | 574 | 575 | .. confval:: c1 576 | 577 | :type: integer 578 | :default: ``1``: 579 | 580 | High consumption level for binary ciα. 581 | 582 | 583 | .. confval:: q 584 | 585 | :type: float 586 | :default: ``0`` 587 | 588 | Fraction of consumption capacity allocated to preferred resource class. 589 | 590 | 591 | .. confval:: sparsity 592 | 593 | :type: float 594 | :default: ``0.2`` 595 | 596 | Sparsity of metabolic matrix. 597 | 598 | 599 | .. confval:: fs 600 | 601 | :type: float 602 | :default: ``0.45`` 603 | 604 | Fraction of secreted byproducts allocated to the same resource class. 605 | 606 | 607 | .. confval:: fw 608 | 609 | :type: float 610 | :default: ``0.45`` 611 | 612 | Fraction of secreted byproducts allocated to waste resource class. 613 | 614 | 615 | .. confval:: g 616 | 617 | :type: float 618 | :default: ``1`` 619 | 620 | Conversion factor from energy uptake to growth rate (1/energy). 621 | 622 | 623 | .. confval:: w 624 | 625 | :type: float 626 | :default: ``1`` 627 | 628 | Energy content of resource α (energy/mass). 629 | 630 | 631 | .. confval:: l 632 | 633 | :type: float 634 | :default: ``0`` 635 | 636 | Leakage fraction. 637 | 638 | 639 | .. confval:: m 640 | 641 | :type: float 642 | :default: ``0`` 643 | 644 | Minimal energy uptake for maintenance of species i (energy/time). Mortality. 645 | 646 | 647 | .. confval:: n 648 | 649 | :type: integer 650 | :default: ``2`` 651 | 652 | Hill coefficient for functional response (unitless). 653 | 654 | 655 | .. confval:: response 656 | 657 | :type: string 658 | :default: ``type III`` 659 | 660 | Functional response of uptaking rates. 661 | 662 | 663 | .. confval:: sigma_max 664 | 665 | :type: float 666 | :default: ``1`` 667 | 668 | Maximum input flux (mass/time) for type III functional response. 669 | 670 | 671 | .. confval:: regulation 672 | 673 | :type: string 674 | :default: ``independent`` 675 | 676 | Metabolic regulation. 677 | 678 | 679 | .. confval:: nreg 680 | 681 | :type: integer 682 | :default: ``10`` 683 | 684 | Hill coefficient that tunes steepness of metabolic regulation. 685 | 686 | 687 | .. confval:: tau 688 | 689 | :type: float 690 | :default: ``1`` 691 | 692 | External resource supply rate when ``supply="external"`` for chemostat setting. 693 | 694 | 695 | .. confval:: r 696 | 697 | :type: string 698 | :default: ``independent`` 699 | 700 | Renewal rate for self renewing resources when ``supply="self-renewing"`` for chemostat setting. 701 | 702 | 703 | 704 | 705 | 706 | 707 | -------------------------------------------------------------------------------- /community_selection/A_experiment_functions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Nov 26 2019 4 | @author: changyuchang 5 | """ 6 | import numpy as np 7 | from community_simulator import * 8 | from community_simulator.usertools import * 9 | import community_simulator.usertools 10 | from community_selection.__init__ import * 11 | from community_selection.B_community_phenotypes import * 12 | 13 | # Species features 14 | 15 | def new_MakeMatrices(assumptions): 16 | """ 17 | Inherited function from community-simulator package 18 | 19 | Changes: 20 | 21 | - Add BINARY_GAMMA SAMPLING 22 | """ 23 | #PREPARE VARIABLES 24 | #Force number of species to be an array: 25 | if isinstance(assumptions['MA'],numbers.Number): 26 | assumptions['MA'] = [assumptions['MA']] 27 | if isinstance(assumptions['SA'],numbers.Number): 28 | assumptions['SA'] = [assumptions['SA']] 29 | #Force numbers of species to be integers: 30 | assumptions['MA'] = np.asarray(assumptions['MA'],dtype=int) 31 | assumptions['SA'] = np.asarray(assumptions['SA'],dtype=int) 32 | assumptions['Sgen'] = int(assumptions['Sgen']) 33 | #Default waste type is last type in list: 34 | if 'waste_type' not in assumptions.keys(): 35 | assumptions['waste_type']=len(assumptions['MA'])-1 36 | 37 | #Extract total numbers of resources, consumers, resource types, and consumer families: 38 | M = np.sum(assumptions['MA']) 39 | T = len(assumptions['MA']) 40 | S = np.sum(assumptions['SA'])+assumptions['Sgen'] 41 | F = len(assumptions['SA']) 42 | M_waste = assumptions['MA'][assumptions['waste_type']] 43 | #Construct lists of names of resources, consumers, resource types, and consumer families: 44 | resource_names = ['R'+str(k) for k in range(M)] 45 | type_names = ['T'+str(k) for k in range(T)] 46 | family_names = ['F'+str(k) for k in range(F)] 47 | consumer_names = ['S'+str(k) for k in range(S)] 48 | waste_name = type_names[assumptions['waste_type']] 49 | resource_index = [[type_names[m] for m in range(T) for k in range(assumptions['MA'][m])], 50 | resource_names] 51 | consumer_index = [[family_names[m] for m in range(F) for k in range(assumptions['SA'][m])] 52 | +['GEN' for k in range(assumptions['Sgen'])],consumer_names] 53 | 54 | #PERFORM GAUSSIAN SAMPLING 55 | if assumptions['sampling'] == 'Gaussian': 56 | #Initialize dataframe: 57 | c = pd.DataFrame(np.zeros((S,M)),columns=resource_index,index=consumer_index) 58 | #Add Gaussian-sampled values, biasing consumption of each family towards its preferred resource: 59 | for k in range(F): 60 | for j in range(T): 61 | if k==j: 62 | c_mean = (assumptions['muc']/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j]) 63 | c_var = (assumptions['sigc']**2/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j]) 64 | else: 65 | c_mean = (assumptions['muc']/M)*(1-assumptions['q']) 66 | c_var = (assumptions['sigc']**2/M)*(1-assumptions['q']) 67 | c.loc['F'+str(k)]['T'+str(j)] = c_mean + np.random.randn(assumptions['SA'][k],assumptions['MA'][j])*np.sqrt(c_var) 68 | if 'GEN' in c.index: 69 | c_mean = assumptions['muc']/M 70 | c_var = assumptions['sigc']**2/M 71 | c.loc['GEN'] = c_mean + np.random.randn(assumptions['Sgen'],M)*np.sqrt(c_var) 72 | 73 | #PERFORM BINARY SAMPLING 74 | elif assumptions['sampling'] == 'Binary': 75 | assert assumptions['muc'] < M*assumptions['c1'], 'muc not attainable with given M and c1.' 76 | #Construct uniform matrix at total background consumption rate c0: 77 | c = pd.DataFrame(np.ones((S,M))*assumptions['c0']/M,columns=resource_index,index=consumer_index) 78 | #Sample binary random matrix blocks for each pair of family/resource type: 79 | for k in range(F): 80 | for j in range(T): 81 | if k==j: 82 | p = (assumptions['muc']/(M*assumptions['c1']))*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j]) 83 | else: 84 | p = (assumptions['muc']/(M*assumptions['c1']))*(1-assumptions['q']) 85 | 86 | c.loc['F'+str(k)]['T'+str(j)] = (c.loc['F'+str(k)]['T'+str(j)].values 87 | + assumptions['c1']*BinaryRandomMatrix(assumptions['SA'][k],assumptions['MA'][j],p)) 88 | #Sample uniform binary random matrix for generalists: 89 | if 'GEN' in c.index: 90 | p = assumptions['muc']/(M*assumptions['c1']) 91 | c.loc['GEN'] = c.loc['GEN'].values + assumptions['c1']*BinaryRandomMatrix(assumptions['Sgen'],M,p) 92 | 93 | elif assumptions['sampling'] == 'Gamma': 94 | #Initialize dataframe 95 | c = pd.DataFrame(np.zeros((S,M)),columns=resource_index,index=consumer_index) 96 | #Add Gamma-sampled values, biasing consumption of each family towards its preferred resource 97 | for k in range(F): 98 | for j in range(T): 99 | if k==j: 100 | c_mean = (assumptions['muc']/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j]) 101 | c_var = (assumptions['sigc']**2/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j]) 102 | thetac = c_var/c_mean 103 | kc = c_mean**2/c_var 104 | c.loc['F'+str(k)]['T'+str(j)] = np.random.gamma(kc,scale=thetac,size=(assumptions['SA'][k],assumptions['MA'][j])) 105 | else: 106 | c_mean = (assumptions['muc']/M)*(1-assumptions['q']) 107 | c_var = (assumptions['sigc']**2/M)*(1-assumptions['q']) 108 | thetac = c_var/c_mean 109 | kc = c_mean**2/c_var 110 | c.loc['F'+str(k)]['T'+str(j)] = np.random.gamma(kc,scale=thetac,size=(assumptions['SA'][k],assumptions['MA'][j])) 111 | if 'GEN' in c.index: 112 | c_mean = assumptions['muc']/M 113 | c_var = assumptions['sigc']**2/M 114 | thetac = c_var/c_mean 115 | kc = c_mean**2/c_var 116 | c.loc['GEN'] = np.random.gamma(kc,scale=thetac,size=(assumptions['Sgen'],M)) 117 | 118 | #PERFORM UNIFORM SAMPLING 119 | elif assumptions['sampling'] == 'Uniform': 120 | #Initialize dataframe: 121 | c = pd.DataFrame(np.zeros((S,M)),columns=resource_index,index=consumer_index) 122 | #Add uniformly sampled values, biasing consumption of each family towards its preferred resource: 123 | for k in range(F): 124 | for j in range(T): 125 | if k==j: 126 | c_mean = (assumptions['muc']/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j]) 127 | else: 128 | c_mean = (assumptions['muc']/M)*(1-assumptions['q']) 129 | c.loc['F'+str(k)]['T'+str(j)] = c_mean + (np.random.rand(assumptions['SA'][k],assumptions['MA'][j])-0.5)*assumptions['b'] 130 | if 'GEN' in c.index: 131 | c_mean = assumptions['muc']/M 132 | c.loc['GEN'] = c_mean + (np.random.rand(assumptions['Sgen'],M)-0.5)*assumptions['b'] 133 | 134 | #PERFORM BINARY_GAMMA SAMPLING 135 | elif assumptions['sampling'] == 'Binary_Gamma': 136 | assert assumptions['muc'] < M*assumptions['c1'], 'muc not attainable with given M and c1.' 137 | #Construct uniform matrix at total background consumption rate c0: 138 | c = pd.DataFrame(np.ones((S,M))*assumptions['c0']/M,columns=resource_index,index=consumer_index) 139 | #Sample binary random matrix blocks for each pair of family/resource type: 140 | for k in range(F): 141 | for j in range(T): 142 | if k==j: 143 | p = (assumptions['muc']/(M*assumptions['c1']))*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j]) 144 | c_mean = (assumptions['muc']/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j]) 145 | c_var = (assumptions['sigc']**2/M)*(1+assumptions['q']*(M-assumptions['MA'][j])/assumptions['MA'][j]) 146 | else: 147 | p = (assumptions['muc']/(M*assumptions['c1']))*(1-assumptions['q']) 148 | c_mean = (assumptions['muc']/M)*(1-assumptions['q']) 149 | c_var = (assumptions['sigc']**2/M)*(1-assumptions['q']) 150 | c_mean_binary = assumptions['c0']+ assumptions['c1']*p 151 | c_var_binary = assumptions['c1']**2 *p*(1-p) 152 | c_mean_gamma = c_mean/c_mean_binary 153 | c_var_gamma = (c_var - c_var_binary*(c_mean_gamma**2))/(c_var_binary + c_mean_binary**2) 154 | thetac = c_var_gamma/c_mean_gamma 155 | kc = c_mean_gamma**2/c_var_gamma 156 | c.loc['F'+str(k)]['T'+str(j)] = (c.loc['F'+str(k)]['T'+str(j)].values + assumptions['c1']*BinaryRandomMatrix(assumptions['SA'][k],assumptions['MA'][j],p))*np.random.gamma(kc,scale=thetac,size=(assumptions['SA'][k],assumptions['MA'][j])) 157 | #Sample uniform binary random matrix for generalists: 158 | if 'GEN' in c.index: 159 | p = assumptions['muc']/(M*assumptions['c1']) 160 | c_mean = assumptions['muc']/M 161 | c_var = assumptions['sigc']**2/M 162 | c_mean_binary = assumptions['c0']+ assumptions['c1']*p 163 | c_var_binary = assumptions['c1']**2 *p*(1-p) 164 | c_mean_gamma = c_mean/c_mean_binary 165 | c_var_gamma = (c_var - c_var_binary*(c_mean_gamma**2))/(c_var_binary + c_mean_binary**2) 166 | thetac = c_var_gamma/c_mean_gamma 167 | kc = c_mean_gamma**2/c_var_gamma 168 | c.loc['GEN'] = (c.loc['GEN'].values + assumptions['c1']*BinaryRandomMatrix(assumptions['Sgen'],M,p))*np.random.gamma(kc,scale=thetac,size=(assumptions['Sgen'],M)) 169 | else: 170 | print('Invalid distribution choice. Valid choices are kind=Gaussian and kind=Binary.') 171 | return 'Error' 172 | 173 | #SAMPLE METABOLIC MATRIX FROM DIRICHLET DISTRIBUTION 174 | DT = pd.DataFrame(np.zeros((M,M)),index=c.keys(),columns=c.keys()) 175 | for type_name in type_names: 176 | MA = len(DT.loc[type_name]) 177 | if type_name is not waste_name: 178 | #Set background secretion levels 179 | p = pd.Series(np.ones(M)*(1-assumptions['fs']-assumptions['fw'])/(M-MA-M_waste),index = DT.keys()) 180 | #Set self-secretion level 181 | p.loc[type_name] = assumptions['fs']/MA 182 | #Set waste secretion level 183 | p.loc[waste_name] = assumptions['fw']/M_waste 184 | #Sample from dirichlet 185 | DT.loc[type_name] = dirichlet(p/assumptions['sparsity'],size=MA) 186 | else: 187 | if M > MA: 188 | #Set background secretion levels 189 | p = pd.Series(np.ones(M)*(1-assumptions['fw']-assumptions['fs'])/(M-MA),index = DT.keys()) 190 | #Set self-secretion level 191 | p.loc[type_name] = (assumptions['fw']+assumptions['fs'])/MA 192 | else: 193 | p = pd.Series(np.ones(M)/M,index = DT.keys()) 194 | #Sample from dirichlet 195 | DT.loc[type_name] = dirichlet(p/assumptions['sparsity'],size=MA) 196 | 197 | return c, DT.T 198 | community_simulator.usertools.MakeMatrices = new_MakeMatrices 199 | 200 | def create_invader(params, assumptions): 201 | """ 202 | Draw invader species feature 203 | """ 204 | assumptions_invader = assumptions.copy() 205 | assumptions_invader.update({"sampling": assumptions["invader_sampling"]}) 206 | params = MakeParams(assumptions) 207 | params_invader = MakeParams(assumptions_invader) 208 | params["c"].iloc[assumptions["invader_index"],:] = params_invader["c"].iloc[assumptions["invader_index"],:] * assumptions["invader_strength"] 209 | 210 | return params 211 | 212 | 213 | def draw_species_function(assumptions): 214 | """ 215 | Draw species-specific functions 216 | 217 | assumptions = dictionary of metaparameters from community-simulator 218 | 219 | Return: 220 | function_species, function_interaction 221 | """ 222 | S_tot = int(np.sum(assumptions['SA']) + assumptions['Sgen']) 223 | 224 | if assumptions["phi_distribution"] == "Norm": 225 | f1_species_smooth = np.random.normal(assumptions["phi_mean"], assumptions["phi_sd"], size = S_tot) 226 | f1_species_rugged = f1_species_smooth * np.random.binomial(1, 1-assumptions["ruggedness"], size = S_tot) 227 | f2_species_smooth = np.random.normal(assumptions["phi_mean"], assumptions["phi_sd"] * assumptions["function_ratio"], size = S_tot**2).reshape(S_tot, S_tot) 228 | f2_species_rugged = np.random.binomial(1, 1-assumptions["ruggedness"], S_tot**2).reshape(S_tot, S_tot) * np.array(f2_species_smooth) 229 | 230 | elif assumptions["phi_distribution"] == "Uniform": 231 | f1_species_smooth = np.random.uniform(assumptions["phi_lower"], assumptions["phi_upper"], size = S_tot) 232 | f1_species_rugged = f1_species_smooth * np.random.binomial(1, 1-assumptions["ruggedness"], size = S_tot) 233 | f2_species_smooth = np.random.uniform(assumptions["phi_lower"], assumptions["phi_upper"] * assumptions["function_ratio"], size = S_tot**2).reshape(S_tot, S_tot) 234 | f2_species_rugged = np.random.binomial(1, 1-assumptions["ruggedness"], S_tot**2).reshape(S_tot, S_tot) * np.array(f2_species_smooth) 235 | 236 | # Remove diagonals in the interation matrix 237 | np.fill_diagonal(f2_species_smooth, 0) 238 | np.fill_diagonal(f2_species_rugged, 0) 239 | 240 | return f1_species_smooth, f1_species_rugged, f2_species_smooth, f2_species_rugged 241 | 242 | def draw_species_cost(per_capita_function, assumptions): 243 | """ 244 | Draw species-specific function cost 245 | k_i is a conversion factor that specifies cost per function 246 | """ 247 | if assumptions["cost_distribution"] == "Norm": 248 | if assumptions["cost_mean"] !=0: 249 | cost_var = assumptions["cost_sd"]**2 250 | cost_k = assumptions["cost_mean"]**2/cost_var 251 | cost_theta = cost_var/assumptions["cost_mean"] 252 | cost = np.random.gamma(shape = cost_k, scale = cost_theta, size = len(per_capita_function)) 253 | g0 = assumptions["g0"] 254 | gi = g0/(1+per_capita_function*cost) 255 | else: 256 | gi = np.repeat(assumptions["g0"], len(per_capita_function)) 257 | 258 | elif assumptions["cost_distribution"] == "Uniform": 259 | assert assumptions["phi_distribution"] == "Uniform", "Phi should follow uniform distribution as the cost" 260 | gi = 1-per_capita_function 261 | 262 | return gi 263 | 264 | def add_community_function(plate, assumptions, params): 265 | """ 266 | Add the function attribute to the community 267 | 268 | For f1 and f3, add species_function 269 | For f2 and f4, add interaction_function 270 | For f5, add invasion_plate_t0 and invasion_plate_t1 271 | For f6, f7, and f8, add resident_plate_t0_N, resident_plate_t1_N, resident_plate_t0_R, and resident_plate_t1_R 272 | 273 | if isolates calculate function for every isolate in monoculture. 274 | """ 275 | 276 | # Generate per capita species function 277 | np.random.seed(assumptions['seed']) 278 | f1_species_smooth, f1_species_rugged, f2_species_smooth, f2_species_rugged = draw_species_function(assumptions) 279 | 280 | # Species function for f1 additive community function 281 | setattr(plate, "f1_species_smooth", f1_species_smooth) 282 | setattr(plate, "f1_species_rugged", f1_species_rugged) 283 | 284 | # Species interaction function for f2 Interactive function 285 | setattr(plate, "f2_species_smooth", f2_species_smooth) 286 | setattr(plate, "f2_species_rugged", f2_species_rugged) 287 | 288 | 289 | # Invasion function f5 or knock_in with a threshold requires us to grow isolates in monoculture to obtain their abundance. 290 | if assumptions['knock_in']: 291 | print("\nStabilizing monoculture plate") 292 | # Update assumptions 293 | assumptions_monoculture = assumptions.copy() 294 | params_invasion = params.copy() 295 | assumptions_monoculture.update({"n_wells": np.sum(assumptions["SA"]) + assumptions["Sgen"]}) 296 | assumptions_monoculture.update({"monoculture":True}) 297 | 298 | # Monoculture plate for knock in 299 | plate_monoculture = make_plate(assumptions_monoculture, params_invasion) 300 | print("\nStabilizing monoculture plate for knock-in") 301 | for i in range(assumptions_monoculture["n_transfer"] - assumptions_monoculture["n_transfer_selection"]): 302 | plate_monoculture.Propagate(assumptions_monoculture["n_propagation"]) 303 | plate_monoculture = passage_monoculture(plate_monoculture, assumptions_monoculture["dilution"]) 304 | print("Transfer " + str(i+1)) 305 | plate_monoculture.Propagate(assumptions_monoculture["n_propagation"]) # 1 final growth cycle before storing data 306 | print("\nFinished stabilizing monoculture plate") 307 | 308 | print("\nMeasuring monocultures for preparing knock_in list") 309 | if "f1" in assumptions["selected_function"]: 310 | setattr(plate_monoculture, "f1_species_smooth", f1_species_smooth) 311 | setattr(plate_monoculture, "f1_species_rugged", f1_species_rugged) 312 | elif "f2" in assumptions["selected_function"]: 313 | setattr(plate_monoculture, "f2_species_smooth", f2_species_smooth) 314 | setattr(plate_monoculture, "f2_species_rugged", f2_species_rugged) 315 | elif "f6" in assumptions["selected_function"]: 316 | setattr(plate_monoculture, "target_resource", assumptions["target_resource"]) 317 | setattr(plate, "knock_in_species_function", globals()[assumptions["selected_function"]](plate_monoculture, params_simulation = assumptions_monoculture)) 318 | print("\nknock_in_species_function ", plate.knock_in_species_function) 319 | 320 | 321 | # f6_target_resource 322 | if "target_resource" in assumptions["selected_function"]: 323 | setattr(plate, "target_resource", assumptions["target_resource"]) 324 | 325 | return plate 326 | 327 | 328 | def sample_from_pool(plate_N, assumptions, n = None): 329 | """ 330 | Sample communities from regional species pool. 331 | plate_N = consumer data.frame 332 | """ 333 | S_tot = plate_N.shape[0] # Total number of species in the pool 334 | N0 = np.zeros((plate_N.shape)) # Make empty plate 335 | consumer_index = plate_N.index 336 | well_names = plate_N.columns 337 | if n is None: 338 | n = int(assumptions['n_inoc']) #if not specified n is n_inoc 339 | 340 | # Draw community 341 | if assumptions['monoculture'] == False and assumptions['metacommunity_sampling'] == 'Power': 342 | # Sample initial community for each well 343 | for k in range(plate_N.shape[1]): 344 | pool = np.random.power(assumptions['power_alpha'], size = S_tot) # Power-law distribution 345 | pool = pool/np.sum(pool) # Normalize the pool 346 | consumer_list = np.random.choice(S_tot, size = n , replace = True, p = pool) # Draw from the pool 347 | my_tab = pd.crosstab(index = consumer_list, columns = "count") # Calculate the cell count 348 | N0[my_tab.index.values,k] = np.ravel(my_tab.values / assumptions['scale']) # Scale to biomass 349 | # Make data.frame 350 | N0 = pd.DataFrame(N0, index = consumer_index, columns = well_names) 351 | elif assumptions['monoculture'] == False and assumptions['metacommunity_sampling'] == 'Lognormal': 352 | for k in range(plate_N.shape[1]): 353 | pool = np.random.lognormal(assumptions['lognormal_mean'], assumptions['lognormal_sd'], size = S_tot) # Power-law distribution 354 | pool = pool/np.sum(pool) # Normalize the pool 355 | consumer_list = np.random.choice(S_tot, size = n , replace = True, p = pool) # Draw from the pool 356 | my_tab = pd.crosstab(index = consumer_list, columns = "count") # Calculate the cell count 357 | N0[my_tab.index.values,k] = np.ravel(my_tab.values / assumptions['scale']) # Scale to biomass 358 | # Make data.frame 359 | N0 = pd.DataFrame(N0, index = consumer_index, columns = well_names) 360 | elif assumptions['monoculture'] == False and assumptions['metacommunity_sampling'] == 'Default': 361 | #Default was already sampled (each species starts wtih an abundance of 1. number of species in each species pool determined by 362 | #N0 = plate_N/assumptions['S'] 363 | N0 = MakeInitialState(assumptions)[0] 364 | if not isinstance(N0, pd.DataFrame):#add labels to consumer state 365 | if len(np.shape(N0)) == 1: 366 | N0 = N0[:,np.newaxis] 367 | column_names = ['W'+str(k) for k in range(np.shape(N)[1])] 368 | species_names = ['S'+str(k) for k in range(np.shape(N)[0])] 369 | N0 = pd.DataFrame(N,columns=column_names) 370 | N0.index = species_names 371 | N0 = N0/assumptions['S'] 372 | # Monoculture plate 373 | elif assumptions['monoculture'] == True: 374 | N0 = np.eye(plate_N.shape[0]) *assumptions['n_inoc']/assumptions['scale'] 375 | N0 = pd.DataFrame(N0, index = consumer_index, columns = ["W" + str(i) for i in range(plate_N.shape[0])]) 376 | 377 | return N0 378 | 379 | 380 | 381 | 382 | def sample_from_pool2(plate_N, assumptions, synthetic_community_size = 2, n = None): 383 | """ 384 | Make synthetic communities with given initial richness 385 | """ 386 | S_tot = plate_N.shape[0] 387 | N0 = np.zeros((plate_N.shape)) 388 | consumer_index = plate_N.index 389 | well_names = plate_N.columns 390 | 391 | if n is None: 392 | n = assumptions['n_inoc'] 393 | 394 | for k in range(plate_N.shape[1]): 395 | consumer_list = np.random.choice(S_tot, size = synthetic_community_size, replace = False) 396 | 397 | for v in range(synthetic_community_size): 398 | N0[consumer_list[v], k] = n / synthetic_community_size / assumptions["scale"] 399 | 400 | N0 = pd.DataFrame(N0, index = consumer_index, columns = well_names) 401 | 402 | return N0 403 | 404 | def migrate_from_pool(plate,migration_factor,params_simulation, power_law = True, n = None): 405 | """ 406 | Migrate from species pool to the plate mainly for directed selection) 407 | If power_law pool is true than sample n cells from species pool following power law distribution (default is same as inoculum) 408 | If power_law is false sample s_migration species from isolates with each total number of cells equivalent to n 409 | """ 410 | from community_selection.usertools import sample_from_pool 411 | if n is None: 412 | n = params_simulation['n_inoc'] 413 | if power_law: 414 | if np.sum(migration_factor) != 0: 415 | temp_params_simulation = params_simulation.copy() 416 | migration_plate = sample_from_pool(plate.N, params_simulation,n=n) * migration_factor # Migration factor is a list determined by migration algorithms and community function 417 | plate_migrated = plate.N + migration_plate 418 | else: 419 | plate_migrated = plate.N 420 | else: 421 | if np.sum(migration_factor) != 0: 422 | migration_plate = plate.N.copy() 423 | migration_plate[:] = 0 424 | for k in plate.N.columns: 425 | if migration_factor[np.where(plate.N.columns == k)[0]]>0: 426 | for j in range(0,params_simulation['s_migration']): 427 | s_id = np.random.choice(np.where(plate.N[k]==0)[0]) 428 | migration_plate[k][s_id]= n * 1/params_simulation["scale"] * 1/params_simulation['s_migration'] 429 | plate_migrated = plate.N + migration_plate 430 | else: 431 | plate_migrated = plate.N 432 | return plate_migrated 433 | 434 | def passage_monoculture(plate_mono, f, scale = None, refresh_resource = True): 435 | """ 436 | Reduced version of Passage(), for passaging a large set of wells without multinomial sampling 437 | Most code adapted from community-simulator 438 | """ 439 | self = plate_mono.copy() 440 | #HOUSEKEEPING 441 | if scale == None: 442 | scale = self.scale #Use scale from initialization by default 443 | self.N[self.N<0] = 0 #Remove any negative values that may have crept in 444 | self.R[self.R<0] = 0 445 | 446 | #DEFINE NEW VARIABLES 447 | N_tot = np.sum(self.N) 448 | R_tot = np.sum(self.R) 449 | N = np.zeros(np.shape(self.N)) 450 | 451 | #Poisson sample cells 452 | self.N = self.N * f *scale 453 | self.N.applymap(np.random.poisson) 454 | self.N = self.N/scale 455 | 456 | if refresh_resource: 457 | self.R = self.R * f 458 | self.R = self.R+self.R0 459 | 460 | #In continuous culture, it is useful to eliminate the resources that are 461 | #going extinct, to avoid numerical instability 462 | else: 463 | R_tot = np.sum(self.R) 464 | R = np.zeros(np.shape(self.R)) 465 | for k in range(self.n_wells): 466 | if f[k,k] > 0 and R_tot[k] > 0: 467 | R[:,k] += np.random.multinomial(int(scale*R_tot[k]*f[k,k]),(self.R/R_tot).values[:,k])*1./scale 468 | self.R = pd.DataFrame(R, index = self.R.index, columns = self.R.keys()) 469 | 470 | return self 471 | 472 | def make_medium(plate_R, assumptions): 473 | """ 474 | Design medium for the plate 475 | if assumptions['rich_medium'] == True, make rich medium 476 | """ 477 | if assumptions['rich_medium'] == True: 478 | np.random.seed(1) 479 | 480 | # Total number of resource in this universe 481 | R_tot = plate_R.shape[0] 482 | 483 | # Make empty plate 484 | R0 = np.zeros((plate_R.shape)) # Make empty plate 485 | 486 | # Resource index 487 | resource_index = plate_R.index 488 | 489 | # Well index 490 | well_names = plate_R.columns 491 | 492 | resource_pool = np.random.uniform(0, 1, size = R_tot) # Uniform distribution 493 | resource_pool = resource_pool/np.sum(resource_pool) 494 | resource_list = np.random.choice(R_tot, size = assumptions["R0_food"], replace = True, p = resource_pool) # Draw from the pool 495 | my_tab = pd.crosstab(index = resource_list, columns = "count") 496 | food_compostion = np.ravel(my_tab.values) 497 | for i in range(plate_R.shape[1]): 498 | R0[my_tab.index.values,i] = food_compostion 499 | R0 = pd.DataFrame(R0, index = resource_index, columns = well_names) 500 | else: 501 | R0 = plate_R 502 | return R0 503 | 504 | def make_plate(assumptions, params): 505 | """ 506 | prepares the plate 507 | """ 508 | # Make dynamical equations 509 | def dNdt(N,R,params): 510 | return MakeConsumerDynamics(assumptions)(N,R,params) 511 | def dRdt(N,R,params): 512 | return MakeResourceDynamics(assumptions)(N,R,params) 513 | 514 | dynamics = [dNdt,dRdt] 515 | 516 | # Make initial state 517 | init_state = MakeInitialState(assumptions) 518 | plate = Metacommunity(init_state, dynamics, params, scale = assumptions["scale"], parallel = False) 519 | 520 | # Add media to plate (overrides community simulator) 521 | plate.R = make_medium(plate.R, assumptions) 522 | plate.R0 = make_medium(plate.R0, assumptions) 523 | 524 | # Set the target resource to 0 when target function is resource production f6a 525 | if assumptions["selected_function"] == "f6a_target_resource": 526 | plate.R.iloc[assumptions["target_resource"],:] = 0 527 | plate.R0.iloc[assumptions["target_resource"],:] = 0 528 | 529 | # If plate is to be replaced by overwritting plate, skip the sampling 530 | if pd.isnull(assumptions["overwrite_plate"]): 531 | plate.N = sample_from_pool(plate.N, assumptions) 532 | 533 | # Remove invader in the plate 534 | if assumptions["selected_function"] == "f5_invader_suppression": 535 | plate.N.iloc[assumptions["invader_index"],:] = 0 536 | 537 | return plate 538 | 539 | # Data operation 540 | 541 | def reshape_plate_data(plate, params_simulation,transfer_loop_index): 542 | """ 543 | Reshape the plate resource and consumer matrices (wider form) into a melted data.frame (longer form) 544 | """ 545 | # Temporary function for adding variables to and melting df 546 | def melt_df(plate_df, data_type = "consumer"): 547 | # Consumers 548 | temp_df = pd.DataFrame(plate_df) 549 | total_number = temp_df.shape[0] 550 | 551 | ## Add variables 552 | temp_df["Type"] = np.repeat(data_type, total_number) 553 | temp_df["ID"] = range(total_number) 554 | temp_df["Transfer"] = np.repeat(str(transfer_loop_index), total_number) 555 | temp_df["exp_id"] = np.repeat(params_simulation['exp_id'] , total_number) 556 | 557 | ## Melt the df 558 | temp_df = pd.melt(temp_df, id_vars = ["exp_id","Transfer", "Type", "ID"], var_name = "Well", value_name = "Abundance") 559 | temp_df = temp_df[temp_df.Abundance != 0] # Remove zero abundances 560 | return temp_df 561 | 562 | # Melt the df 563 | temp_plate = plate.copy() # Copy the original plate 564 | df_N = melt_df(temp_plate.N, data_type = "consumer") 565 | df_R = melt_df(temp_plate.R, data_type = "resource") 566 | df_R0 = melt_df(temp_plate.R0,data_type = "R0") 567 | 568 | # Concatenate dataframes 569 | merged_df = pd.concat([df_N, df_R,df_R0]) 570 | merged_df["Index"] = list(range(0, merged_df.shape[0])) 571 | merged_df.set_index("Index", inplace = True) 572 | 573 | return merged_df # Return concatenated dataframe 574 | 575 | def reshape_function_data(params_simulation,community_function, richness, biomass, transfer_loop_index): 576 | """ 577 | Reshape the community function, richness, biomass into a melted data.frame 578 | """ 579 | temp_vector1 = community_function.copy() 580 | temp_vector2 = richness.copy() 581 | temp_vector3 = biomass.copy() 582 | 583 | # Number of wells 584 | number_well = len(richness) 585 | 586 | # Make data.frame 587 | temp_df = pd.DataFrame({ 588 | "exp_id": np.repeat(params_simulation['exp_id'], number_well), 589 | "Well": ["W" + str(i) for i in range(number_well)], 590 | "Transfer": np.repeat(str(transfer_loop_index), number_well), 591 | "CommunityPhenotype": temp_vector1, 592 | "Richness": temp_vector2, 593 | "Biomass": temp_vector3}) 594 | 595 | # Turn the transfer columns as numeric 596 | temp_df[["Transfer"]] = temp_df[["Transfer"]].apply(pd.to_numeric) 597 | 598 | return temp_df 599 | 600 | def overwrite_plate(plate, assumptions): 601 | """ 602 | Overwrite the plate N, R, and R0 dataframe by the input composition file 603 | """ 604 | import os 605 | assert(os.path.isfile(assumptions['overwrite_plate'])), "The overwrite_plate does not exist" 606 | # Read the input data file 607 | df = pd.read_csv(assumptions["overwrite_plate"]) 608 | 609 | # By default, use the latest transfer to avoid well name conflict 610 | df = df[df.Transfer == np.max(df.Transfer)] 611 | 612 | # If only one community, repeat filling this community into n_wells wells 613 | if len(df["Well"].unique()) == 1: 614 | print("The overwrite plate has only one community (well). Replicate it to the number of wells in current plate") 615 | temp_df = df.copy() 616 | df = pd.concat([temp_df.assign(Well = "W" + str(i)) for i in range(assumptions["n_wells"])]) 617 | # Else if n_wells does not conform to the number of wells in the overwrite_plate, overwrite it 618 | else: 619 | assumptions["n_wells"] = len(df["Well"].unique()) 620 | # If the input overwrite file has multiple communities, check if it has the same number as n_wells 621 | #assert len(df["Well"].unique()) == assumptions["n_wells"], "overwrite_plate does not have the same number of wells as n_wells" 622 | # Check if the input file type has consumer, resurce and R0 623 | assert all(pd.Series(df["Type"].unique()).isin(["consumer", "resource", "R0"])), "overwrite_plate must have three types of rows: consumer, resource, R0" 624 | # Make empty dataframes 625 | N = plate.N.copy() 626 | R = plate.R.copy() 627 | R0 = plate.R.copy() 628 | # N0 629 | for w in range(assumptions["n_wells"]): 630 | temp_comm = df[(df["Well"] == ("W" + str(w))) & (df["Type"] == "consumer")][["ID", "Abundance"]] 631 | temp = np.zeros(N.shape[0]) 632 | for i in range(temp_comm.shape[0]): 633 | temp[int(temp_comm.iloc[i]["ID"])] = temp_comm.iloc[i]["Abundance"] 634 | N["W" + str(w)] = temp 635 | 636 | # R 637 | for w in range(assumptions["n_wells"]): 638 | temp_res = df[(df["Well"] == ("W" + str(w))) & (df["Type"] == "resource")][["ID", "Abundance"]] 639 | temp = np.zeros(R.shape[0]) 640 | for i in range(temp_res.shape[0]): 641 | temp[int(temp_res.iloc[i]["ID"])] = temp_res.iloc[i]["Abundance"] 642 | R["W" + str(w)] = temp 643 | # R0 644 | for w in range(assumptions["n_wells"]): 645 | temp_R0 = df[(df["Well"] == ("W" + str(w))) & (df["Type"] == "R0")][["ID", "Abundance"]] 646 | temp = np.zeros(R0.shape[0]) 647 | for i in range(temp_R0.shape[0]): 648 | temp[int(temp_R0.iloc[i]["ID"])] = temp_R0.iloc[i]["Abundance"] 649 | R0["W" + str(w)] = temp 650 | plate.N = N 651 | plate.N0 = N 652 | plate.R = R 653 | plate.R0 = R0 654 | 655 | # Passaage the overwrite plate 656 | if assumptions["passage_overwrite_plate"]: 657 | plate.Passage(np.eye(assumptions["n_wells"]) * assumptions["dilution"]) 658 | 659 | return(plate) 660 | --------------------------------------------------------------------------------