├── .github └── workflows │ └── release.yml ├── .gitignore ├── .vscode └── settings.json ├── MANIFEST.in ├── README.md ├── assets └── network_image.JPG ├── demo ├── README.md ├── data │ ├── demo_data.bed │ ├── demo_data.bim │ └── demo_data.fam ├── outputs │ ├── demo_run.7.P.expected │ └── demo_run.7.Q.expected ├── run_demo.sh └── run_diagnostics.py ├── neural_admixture ├── __init__.py ├── entry.py ├── model │ ├── __init__.py │ ├── neural_admixture.py │ └── train.py ├── src │ ├── __init__.py │ ├── inference.py │ ├── loaders.py │ ├── main.py │ ├── snp_reader.py │ ├── svd.py │ ├── utils.py │ └── utils_c │ │ ├── __int__.py │ │ ├── pack2bit.cu │ │ ├── rsvd.pyx │ │ └── utils.pyx └── tests │ └── test_placeholder.py ├── pyproject.toml ├── setup.cfg ├── setup.py └── tox.ini /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | test: 9 | name: ${{ matrix.platform }} py${{ matrix.python-version }} 10 | runs-on: ${{ matrix.platform }} 11 | strategy: 12 | fail-fast: false # Opcional: para que no falle todo si una combinación falla 13 | matrix: 14 | platform: [ubuntu-latest, macos-latest] # Añadido macos-latest 15 | python-version: ["3.10", "3.11", "3.12"] 16 | steps: 17 | - uses: actions/checkout@v4 # Actualizado 18 | 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v4 # Actualizado 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | python -m pip install setuptools tox tox-gh-actions 28 | 29 | - name: Test with tox (Linux with Xvfb) 30 | if: matrix.platform == 'ubuntu-latest' # Solo para Linux 31 | uses: GabrielBB/xvfb-action@v1 32 | with: 33 | run: python -m tox 34 | env: 35 | PLATFORM: ${{ matrix.platform }} 36 | 37 | - name: Test with tox (macOS) 38 | if: matrix.platform == 'macos-latest' # Test directo en macOS 39 | run: python -m tox 40 | env: 41 | PLATFORM: ${{ matrix.platform }} 42 | 43 | build: 44 | name: Build wheels on ${{ matrix.platform }} 45 | needs: test 46 | runs-on: ${{ matrix.platform }} # Estrategia de matriz para la plataforma 47 | strategy: 48 | fail-fast: false # Opcional 49 | matrix: 50 | platform: [ubuntu-latest, macos-latest] # Añadido macos-latest 51 | steps: 52 | - uses: actions/checkout@v4 # Actualizado 53 | 54 | - name: Set up Python for build 55 | uses: actions/setup-python@v4 # Actualizado 56 | with: 57 | python-version: "3.12" # Python para ejecutar cibuildwheel 58 | 59 | # Inspirado en tu segundo ejemplo: Setup OpenMP para macOS si es necesario 60 | - name: Setup OpenMP (macOS) 61 | if: matrix.platform == 'macos-latest' # Solo para macOS 62 | # Este paso es necesario si tus extensiones C/C++ usan OpenMP. 63 | # Si no es el caso, puedes omitirlo. 64 | shell: bash 65 | run: | 66 | echo "Checking and installing libomp for macOS if needed..." 67 | if ! brew list libomp &>/dev/null; then 68 | brew install libomp 69 | else 70 | echo "libomp is already installed." 71 | fi 72 | # Las variables de entorno CC, CXX, CFLAGS, CXXFLAGS, LDFLAGS 73 | # pueden necesitar ser configuradas aquí si cibuildwheel no las recoge 74 | # automáticamente o si tu build lo requiere. 75 | # Por ejemplo, podrías necesitar añadir a GITHUB_ENV: 76 | # echo "CPPFLAGS=-I$(brew --prefix libomp)/include" >> $GITHUB_ENV 77 | # echo "LDFLAGS=-L$(brew --prefix libomp)/lib" >> $GITHUB_ENV 78 | 79 | - name: Install cibuildwheel 80 | run: python -m pip install --upgrade cibuildwheel 81 | 82 | - name: Build wheels 83 | run: cibuildwheel --output-dir dist 84 | env: 85 | # Común para cibuildwheel 86 | CIBW_SKIP: "pp* *musllinux*" # pp* para PyPy, *musllinux* si no quieres wheels musl 87 | CIBW_BUILD_VERBOSITY: 1 88 | # Específico para macOS (inspirado en tu segundo ejemplo y mejores prácticas): 89 | # Construye para x86_64 y arm64 (Apple Silicon). 90 | # 'auto' podría solo construir para la arquitectura del runner (x86_64 en macos-latest). 91 | # 'auto64' es una buena opción para obtener ambas arquitecturas comunes. 92 | CIBW_ARCHS_MACOS: ${{ matrix.platform == 'macos-latest' && 'auto64' || 'auto' }} 93 | # Si necesitas tests específicos durante la construcción con cibuildwheel: 94 | # CIBW_TEST_REQUIRES: pytest 95 | # CIBW_TEST_COMMAND: "pytest {project}/tests" # Ajusta la ruta a tus tests 96 | 97 | - name: Upload dist as artifact 98 | uses: actions/upload-artifact@v4 # Actualizado 99 | with: 100 | name: dist-wheels-${{ matrix.platform }} # Nombre de artefacto único por plataforma 101 | path: dist/ 102 | 103 | upload-pypi: 104 | name: Upload to PyPI 105 | needs: build # Depende de que todos los trabajos de 'build' (para cada plataforma) finalicen 106 | runs-on: ubuntu-latest 107 | # Solo se ejecuta en eventos de 'release published' 108 | if: github.event_name == 'release' && github.event.action == 'published' 109 | steps: 110 | # No es necesario checkout si solo descargas y publicas 111 | # - uses: actions/checkout@v4 112 | 113 | - name: Download all built wheels 114 | uses: actions/download-artifact@v4 # Actualizado 115 | with: 116 | path: dist/ # Directorio donde se descargarán todos los artefactos 117 | pattern: dist-wheels-* # Patrón para descargar todos los artefactos de wheels 118 | merge-multiple: true # Fusiona múltiples artefactos en el directorio 'path' 119 | 120 | - name: Publish package to PyPI 121 | uses: pypa/gh-action-pypi-publish@release/v1 122 | with: 123 | password: ${{ secrets.PYPI_API_TOKEN }} 124 | # gh-action-pypi-publish subirá todos los .whl y .tar.gz de dist/ 125 | # Asegúrate de que también construyes y subes un sdist si lo deseas. 126 | # Podrías añadir un trabajo 'build_sdist' similar al segundo ejemplo. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Data folder contents 132 | **/data/** 133 | 134 | # wandb 135 | **/wandb/** 136 | 137 | # logs 138 | logs/ 139 | 140 | # outputs 141 | outputs/ 142 | 143 | # figures 144 | **figures** 145 | 146 | # DS_Store 147 | **/.DS_Store 148 | 149 | # Cython/C 150 | *.c 151 | # Version file 152 | neural_admixture/_version.py 153 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "vector": "cpp", 4 | "initializer_list": "cpp", 5 | "type_traits": "cpp", 6 | "xutility": "cpp" 7 | } 8 | } -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include neural_admixture/src/utils_c/*.cu -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/neural-admixture.svg) 2 | ![PyPI - Version](https://img.shields.io/pypi/v/neural-admixture) 3 | ![PyPI - License](https://img.shields.io/pypi/l/neural-admixture) 4 | ![PyPI - Status](https://img.shields.io/pypi/status/neural-admixture) 5 | [![tests](https://github.com/AI-sandbox/neural-admixture/workflows/tests/badge.svg)](https://github.com/AI-sandbox/neural-admixture/actions) 6 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/neural-admixture) 7 | [![DOI](https://zenodo.org/badge/331290967.svg)](https://zenodo.org/badge/latestdoi/331290967) 8 | 9 | # Neural ADMIXTURE 10 | 11 | Neural ADMIXTURE is an unsupervised global ancestry inference technique based on ADMIXTURE. By using neural networks, Neural ADMIXTURE offers high quality ancestry assignments with a running time which is much faster than ADMIXTURE's. For more information, we recommend reading [our corresponding article](https://www.nature.com/articles/s43588-023-00482-7). 12 | 13 | The software can be invoked via CLI and has a similar interface to ADMIXTURE (_e.g._ the output format is completely interchangeable). While the software runs in both CPU and GPU, we recommend using GPUs if available to take advantage of the neural network-based implementation. 14 | 15 | ![nadm_mna](assets/network_image.JPG) 16 | 17 | ## System requirements 18 | 19 | ### Hardware requirements 20 | The successful usage of this package requires a computer with enough RAM to be able to handle the large datasets the network has been designed to work with. Due to this, we recommend using compute clusters whenever available to avoid memory issues. 21 | 22 | ### Software requirements 23 | 24 | The package has been tested on both Linux (CentOS 7.9.2009, Ubuntu 18.04.5 LTS) and MacOS (BigSur 11.2.3, Intel and Monterey 12.3.1, M1). It is highly recommended to use GPUs for optimal performance - make sure CUDA drivers are properly installed. 25 | 26 | ## Installation guide 27 | We recommend creating a fresh Python 3.12 environment using `conda` (or `virtualenv`), and then install the package `neural-admixture` there. As an example, for `conda`, one should launch the following commands: 28 | 29 | ```console 30 | $ conda create -n nadmenv python=3.12 31 | $ conda activate nadmenv 32 | (nadmenv) $ pip install neural-admixture 33 | ``` 34 | 35 | **Important note:** Using GPUs greatly speeds up processing and is recommended for large datasets. 36 | 37 | Specify the number of GPUs (`--num_gpus`) and CPUs (`--num_cpus`) you have available in your machine to optimize the performance. For MacOS users with an Apple Metal chip, using `--num_gpus 1` will enable MPS acceleration in the software. Note that despite MPS acceleration being supported, the RAM available in laptops is probably limited, so larger datasets should be run on CUDA-capable GPUs, which the software is more optimized for. 38 | 39 | ## Usage 40 | ### Running Neural ADMIXTURE 41 | 42 | To train a model from scratch, simply invoke the following commands from the root directory of the project. For more info about all the arguments, please run `neural-admixture train --help`. If training a single-head version of the network suffices, please use the flag `--k` instead of `--min_k` and `--max_k`. Note that BED, PGEN and VCF are supported as of now. 43 | 44 | For unsupervised Neural ADMIXTURE (single-head): 45 | 46 | ```console 47 | $ neural-admixture train --k K --name RUN_NAME --data_path DATA_PATH --save_dir SAVE_PATH 48 | ```` 49 | 50 | For unsupervised Neural ADMIXTURE (multi-head): 51 | 52 | ```console 53 | $ neural-admixture train --min_k K_MIN --max_k K_MAX --name RUN_NAME --data_path DATA_PATH --save_dir SAVE_PATH 54 | ```` 55 | 56 | For supervised Neural ADMIXTURE: 57 | 58 | ```console 59 | $ neural-admixture train --k K --supervised --populations_path POPS_PATH --name RUN_NAME --data_path DATA_PATH --save_dir SAVE_PATH # only single-head support at the moment 60 | ``` 61 | 62 | As an example, the following ADMIXTURE call 63 | 64 | ```console 65 | $ ./admixture snps_data.bed 8 -s 42 66 | ``` 67 | 68 | would be mimicked in Neural ADMIXTURE by running 69 | 70 | ```console 71 | $ neural-admixture train --k 8 --data_path snps_data.bed --save_dir SAVE_PATH --init_file INIT_FILE --name snps_data --seed 42 72 | ``` 73 | 74 | with some parameters such as the decoder initialization or the save directories not having a direct equivalent. 75 | 76 | Several files will be output to the `SAVE_PATH` directory (the `name` parameter will be used to create the whole filenames): 77 | - A `.P` file, similar to ADMIXTURE. 78 | - A `.Q` file, similar to ADMIXTURE. 79 | - A `.pt` file, containing the weights of the trained network. 80 | - A `.json` file, with the configuration of the network. 81 | 82 | The last three files are required to run posterior inference using the network, so be aware of not deleting them accidentally! Logs are printed to the `stdout` channel by default. If you want to save them to a file, you can use the command `tee` along with a pipe: 83 | 84 | ```console 85 | $ neural-admixture train --k 8 ... | tee run.log 86 | ``` 87 | 88 | ### Inference mode (projective analysis) 89 | 90 | ADMIXTURE allows reusing computations in the _projective analysis_ mode, in which the `P` (`F`, frequencies) matrix is fixed to an already known result and only the assignments are computed. Due to the nature of our algorithm, assignments can be computed for unseen data by simply feeding the data through the encoder. This mode can be run by typing `infer` instead of `train` right after the `neural-admixture` call. 91 | 92 | For example, assuming we have a trained Neural ADMIXTURE (named `nadm_test`) in the path `./outputs`, one could run inference on unseen data (`./data/unseen_data.bed`) via the following command: 93 | 94 | ```console 95 | $ neural-admixture infer --name nadm_test --save_dir ./outputs --out_name unseen_nadm_test --data_path ./data/unseen_data.bed 96 | ``` 97 | 98 | For this command to work, files `./outputs/nadm_test.pt` and `./outputs/nadm_test_config.json`, which are training outputs, must exist. In this case, only a `.Q` will be created, which will contain the assignments for this data (the parameter of the flag `out_name` will be used to generate the output file name). This file will be written in the `--save_dir` directory (in this case, `./outputs`). 99 | 100 | ### Supervised Neural ADMIXTURE 101 | 102 | The supervised version of the algorithm can be used when all samples have a corresponding population label. This can be very benificial, especially when dealing with large imbalances in the data (_e.g_ data contains 1K samples from Pop1 and 50 samples from Pop2). 103 | 104 | In order to use the supervised mode, the `--pops_path` argument pointing to the file where the ancestries are defined must be passed. The latter file must be a single-column, headerless, plain text file where row `i` denotes the ancestry for the `i`-th sample in the data. We currently do not support datasets which contain samples with missing ancestries. 105 | 106 | The supervised mode works by adding a scaled classification loss to the bottleneck of the algorithm (Equation 5 of the paper). The scaling factor can have a big impact on the performance. If it is too small, then the supervised loss will have little impact on the training, so results would be similar to an unsupervised run. On the other hand, if it is too large, then the supervision will dominate training, making the network overconfident in its predictions: essentially, one would get only binary assignments. The default value of the scaling factor is _η=100, and can be controlled using the parameter `--supervised_loss_weight`. 107 | 108 | Basically, if on validation data you are getting single-ancestry estimations when you expect admixed estimations, try setting a smaller value for the supervised loss scaling factor _η_ (`--supervised_loss_weight`). 109 | 110 | Moreover, note that the initialization method chosen will have no effect, as the supervised method is always used when using the supervised version. 111 | 112 | ## Other options 113 | - `batch_size`: number of samples used at every update. If you have memory issues, try setting a lower batch size. Defaults to 800. 114 | - `n_components`: dimension of the PCA projection for SVD. Defaults to 8. 115 | - `epochs`: maximum number of times the whole training dataset is used to update the weights. Defaults to 250. 116 | - `learning_rate`: dictates how large an update to the weights will be. If you find the loss function oscillating, try setting a lower value. If convergence is slow, try setting a higher value. Defaults to 25e-4. 117 | - `seed`: RNG seed for replication purposes. Defaults to 42. 118 | - `num_gpus`: number of GPUs to use during training. Set to 0 for CPU-only execution. Defaults to 0. 119 | 120 | ## Experiments replication 121 | 122 | The datasets _All-Chms_, _Chm-22_ and _Chm-22-Sim_ used in the Experiments section of the article can be found in [figshare](https://doi.org/10.6084/m9.figshare.19387538.v1). For descriptions of the datasets, please refer to the corresponding section in the paper. The exact hyperparameters used in the experiments to allow replication can be found in the Supplementary Table 3 of the article. 123 | 124 | ## Demo 125 | 126 | To run the software with a small demo dataset, check the instructions in [the corresponding folder of the repository](https://github.com/AI-sandbox/neural-admixture/tree/main/demo). 127 | 128 | ## Troubleshooting 129 | 130 | ### CUDA issues 131 | 132 | If you get an error similar to the following (when using GPU): 133 | 134 | 135 | ``` 136 | OSError: CUDA_HOME environment variable is not set. Please set it to your CUDA install root. 137 | ``` 138 | 139 | simply installing `nvcc` using `conda`/`mamba` should fix it: 140 | 141 | ```console 142 | $ conda install -c nvidia nvcc 143 | ``` 144 | 145 | ## License 146 | 147 | **NOTICE**: This software is available for use free of charge for academic research use only. Academic users may fork this repository and modify and improve to suit their research needs, but also inherit these terms and must include a licensing notice to that effect. Commercial users, for profit companies or consultants, and non-profit institutions not qualifying as "academic research" should contact the authors for a separate license. This applies to this repository directly and any other repository that includes source, executables, or git commands that pull/clone this repository as part of its function. Such repositories, whether ours or others, must include this notice. 148 | 149 | ## Cite 150 | 151 | When using this software, please cite the following paper: 152 | 153 | ```{tex} 154 | @article{dominguezmantes23, 155 | abstract = {Characterizing the genetic structure of large cohorts has become increasingly important as genetic studies extend to massive, increasingly diverse biobanks. Popular methods decompose individual genomes into fractional cluster assignments with each cluster representing a vector of DNA variant frequencies. However, with rapidly increasing biobank sizes, these methods have become computationally intractable. Here we present Neural ADMIXTURE, a neural network autoencoder that follows the same modeling assumptions as the current standard algorithm, ADMIXTURE, while reducing the compute time by orders of magnitude surpassing even the fastest alternatives. One month of continuous compute using ADMIXTURE can be reduced to just hours with Neural ADMIXTURE. A multi-head approach allows Neural ADMIXTURE to offer even further acceleration by computing multiple cluster numbers in a single run. Furthermore, the models can be stored, allowing cluster assignment to be performed on new data in linear time without needing to share the training samples.}, 156 | author = {Dominguez Mantes, Albert and Mas Montserrat, Daniel and Bustamante, Carlos D. and Gir{\'o}-i-Nieto, Xavier and Ioannidis, Alexander G.}, 157 | doi = {10.1038/s43588-023-00482-7}, 158 | id = {Dominguez Mantes2023}, 159 | isbn = {2662-8457}, 160 | journal = {Nature Computational Science}, 161 | title = {Neural ADMIXTURE for rapid genomic clustering}, 162 | url = {https://doi.org/10.1038/s43588-023-00482-7}, 163 | year = {2023}, 164 | bdsk-url-1 = {https://doi.org/10.1038/s43588-023-00482-7}} 165 | 166 | ``` 167 | -------------------------------------------------------------------------------- /assets/network_image.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-sandbox/neural-admixture/270d8ec14135dc5ebbbfc13bc9571178b69c9f31/assets/network_image.JPG -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | # Demo 2 | 3 | ## Demo data 4 | 5 | This demo uses a very small subset of the dataset _AllChms_ used in the article (105 individuals, 8451 variants). Therefore, results are not intended to be meaningful and should not be intepreted as such. 6 | 7 | ## Running the demo 8 | 9 | Make sure the package is installed (`> pip3 install neural-admixture`) in the current environment and run the following command: 10 | 11 | ```console 12 | > sh run_demo.sh 13 | ``` 14 | 15 | This will launch a 5-epoch training of Neural ADMIXTURE. When the training's finished, the `Q` and `P` outputs are then compared against the expected outputs. Note that the output may be a bit different to the expected one depending on the hardware, even to the point where the message _Output and expected output are not similar_ is displayed. Therefore, seeing this message at the end of the demo does not necessarily mean that the installation is faulty. 16 | 17 | The expected output was generated using the version `1.1.2` of the software on a 2019 MacBook Air (Intel) running MacOS BigSur 11.2.3 (Python version: `3.9.7`). The demo took ~7 seconds to execute under the same conditions. Be aware that the first usage may be a bit slower due to first-time package loading, especially on a recently created environment. 18 | -------------------------------------------------------------------------------- /demo/data/demo_data.bed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-sandbox/neural-admixture/270d8ec14135dc5ebbbfc13bc9571178b69c9f31/demo/data/demo_data.bed -------------------------------------------------------------------------------- /demo/data/demo_data.fam: -------------------------------------------------------------------------------- 1 | 0 HG03086 0 0 0 -9 2 | 0 HG03139 0 0 0 -9 3 | 0 HG03461 0 0 0 -9 4 | 0 HG03499 0 0 0 -9 5 | 0 NA19026 0 0 0 -9 6 | 0 NA19030 0 0 0 -9 7 | 0 NA19114 0 0 0 -9 8 | 0 NA19198 0 0 0 -9 9 | 0 NA19235 0 0 0 -9 10 | 0 NA19375 0 0 0 -9 11 | 0 NA19456 0 0 0 -9 12 | 0 HGDP00460 0 0 0 -9 13 | 0 HGDP00470 0 0 0 -9 14 | 0 HGDP00940 0 0 0 -9 15 | 0 HGDP01271 0 0 0 -9 16 | 0 HG01954 0 0 0 -9 17 | 0 HG02150 0 0 0 -9 18 | 0 HG02271 0 0 0 -9 19 | 0 HGDP00710 0 0 0 -9 20 | 0 HGDP00838 0 0 0 -9 21 | 0 HGDP00849 0 0 0 -9 22 | 0 HGDP00855 0 0 0 -9 23 | 0 HGDP00857 0 0 0 -9 24 | 0 HGDP00864 0 0 0 -9 25 | 0 HGDP00865 0 0 0 -9 26 | 0 HGDP00995 0 0 0 -9 27 | 0 HGDP01012 0 0 0 -9 28 | 0 HGDP01014 0 0 0 -9 29 | 0 LP6005443-DNA_E11 0 0 0 -9 30 | 0 LP6005677-DNA_F01 0 0 0 -9 31 | 0 HG00437 0 0 0 -9 32 | 0 HG00759 0 0 0 -9 33 | 0 HG01028 0 0 0 -9 34 | 0 HG01600 0 0 0 -9 35 | 0 HG02136 0 0 0 -9 36 | 0 HG02395 0 0 0 -9 37 | 0 NA18616 0 0 0 -9 38 | 0 NA18990 0 0 0 -9 39 | 0 NA19059 0 0 0 -9 40 | 0 HGDP00775 0 0 0 -9 41 | 0 HGDP00786 0 0 0 -9 42 | 0 HGDP00954 0 0 0 -9 43 | 0 HGDP00961 0 0 0 -9 44 | 0 HGDP01331 0 0 0 -9 45 | 0 HGDP01347 0 0 0 -9 46 | 0 HG00096 0 0 0 -9 47 | 0 HG00102 0 0 0 -9 48 | 0 HG00173 0 0 0 -9 49 | 0 HG00179 0 0 0 -9 50 | 0 HG00285 0 0 0 -9 51 | 0 HG00342 0 0 0 -9 52 | 0 HG00357 0 0 0 -9 53 | 0 HG01509 0 0 0 -9 54 | 0 HG01537 0 0 0 -9 55 | 0 HG01626 0 0 0 -9 56 | 0 HG02239 0 0 0 -9 57 | 0 NA20768 0 0 0 -9 58 | 0 HGDP00534 0 0 0 -9 59 | 0 HGDP01157 0 0 0 -9 60 | 0 HGDP01380 0 0 0 -9 61 | 0 HGDP00540 0 0 0 -9 62 | 0 HGDP00544 0 0 0 -9 63 | 0 HGDP00545 0 0 0 -9 64 | 0 HGDP00546 0 0 0 -9 65 | 0 HGDP00547 0 0 0 -9 66 | 0 HGDP00549 0 0 0 -9 67 | 0 HGDP00552 0 0 0 -9 68 | 0 HGDP00554 0 0 0 -9 69 | 0 HGDP00556 0 0 0 -9 70 | 0 HGDP00655 0 0 0 -9 71 | 0 HGDP00656 0 0 0 -9 72 | 0 HGDP00661 0 0 0 -9 73 | 0 HGDP00663 0 0 0 -9 74 | 0 SS6004477 0 0 0 -9 75 | 0 LP6005519-DNA_F06 0 0 0 -9 76 | 0 HG03019 0 0 0 -9 77 | 0 HG03238 0 0 0 -9 78 | 0 HG03653 0 0 0 -9 79 | 0 HG03693 0 0 0 -9 80 | 0 HG03803 0 0 0 -9 81 | 0 HG04159 0 0 0 -9 82 | 0 NA20894 0 0 0 -9 83 | 0 NA21098 0 0 0 -9 84 | 0 NA21118 0 0 0 -9 85 | 0 HGDP00102 0 0 0 -9 86 | 0 HGDP00145 0 0 0 -9 87 | 0 HGDP00243 0 0 0 -9 88 | 0 HGDP00288 0 0 0 -9 89 | 0 HGDP00330 0 0 0 -9 90 | 0 HGDP00412 0 0 0 -9 91 | 0 HGDP00572 0 0 0 -9 92 | 0 HGDP00573 0 0 0 -9 93 | 0 HGDP00579 0 0 0 -9 94 | 0 HGDP00584 0 0 0 -9 95 | 0 HGDP00616 0 0 0 -9 96 | 0 HGDP00624 0 0 0 -9 97 | 0 HGDP00629 0 0 0 -9 98 | 0 HGDP00631 0 0 0 -9 99 | 0 HGDP00634 0 0 0 -9 100 | 0 HGDP00637 0 0 0 -9 101 | 0 HGDP00645 0 0 0 -9 102 | 0 HGDP00647 0 0 0 -9 103 | 0 HGDP00725 0 0 0 -9 104 | 0 LP6005592-DNA_F01 0 0 0 -9 105 | 0 LP6005442-DNA_E04 0 0 0 -9 106 | -------------------------------------------------------------------------------- /demo/outputs/demo_run.7.Q.expected: -------------------------------------------------------------------------------- 1 | 1.026339083909988403e-01 6.267656385898590088e-02 5.052371025085449219e-01 1.482540965080261230e-01 2.952153794467449188e-02 6.639166921377182007e-02 8.528512716293334961e-02 2 | 9.251680970191955566e-02 6.272771954536437988e-02 5.152272582054138184e-01 1.504537910223007202e-01 2.806563116610050201e-02 6.224949285387992859e-02 8.875934779644012451e-02 3 | 9.119559079408645630e-02 5.901207029819488525e-02 5.015094280242919922e-01 1.577878296375274658e-01 2.811167947947978973e-02 5.972754955291748047e-02 1.026558503508567810e-01 4 | 1.018894985318183899e-01 6.102929264307022095e-02 5.127995014190673828e-01 1.437018811702728271e-01 2.939077094197273254e-02 6.578374654054641724e-02 8.540533483028411865e-02 5 | 9.009853750467300415e-02 6.954587250947952271e-02 5.106025338172912598e-01 1.390539556741714478e-01 2.897270396351814270e-02 6.225844100117683411e-02 9.946793317794799805e-02 6 | 1.068219617009162903e-01 6.981537491083145142e-02 5.014021992683410645e-01 1.244452074170112610e-01 2.884176932275295258e-02 6.863441318273544312e-02 1.000390872359275818e-01 7 | 9.945099055767059326e-02 5.962971597909927368e-02 5.219345688819885254e-01 1.406721621751785278e-01 2.659933827817440033e-02 5.994132906198501587e-02 9.177187085151672363e-02 8 | 8.120039850473403931e-02 6.450828909873962402e-02 5.159909725189208984e-01 1.477399617433547974e-01 2.662849240005016327e-02 5.969546735286712646e-02 1.042363792657852173e-01 9 | 9.312941133975982666e-02 5.804005637764930725e-02 5.173891186714172363e-01 1.510709822177886963e-01 2.743572555482387543e-02 6.334877759218215942e-02 8.958587795495986938e-02 10 | 9.065252542495727539e-02 6.588764488697052002e-02 5.052088499069213867e-01 1.445127427577972412e-01 2.977943420410156250e-02 6.507129967212677002e-02 9.888749569654464722e-02 11 | 1.003060191869735718e-01 6.023761630058288574e-02 5.125740170478820801e-01 1.446916162967681885e-01 2.738897874951362610e-02 6.334304809570312500e-02 9.145869314670562744e-02 12 | 8.932106941938400269e-02 7.259805500507354736e-02 4.968560934066772461e-01 1.400540322065353394e-01 2.906028926372528076e-02 8.074314892292022705e-02 9.136739373207092285e-02 13 | 8.794509619474411011e-02 6.988359242677688599e-02 5.079348683357238770e-01 1.371903270483016968e-01 2.815063670277595520e-02 7.584752142429351807e-02 9.304797649383544922e-02 14 | 9.389115124940872192e-02 6.013741344213485718e-02 5.214622020721435547e-01 1.463746279478073120e-01 2.863685600459575653e-02 6.473111361265182495e-02 8.476667106151580811e-02 15 | 9.060718864202499390e-02 8.849702775478363037e-02 4.007193446159362793e-01 1.544875353574752808e-01 3.899788483977317810e-02 7.207488268613815308e-02 1.546162217855453491e-01 16 | 1.227139681577682495e-01 4.378820955753326416e-02 5.525210499763488770e-02 4.708085581660270691e-02 2.894365973770618439e-02 6.179656386375427246e-01 8.425558358430862427e-02 17 | 1.306370943784713745e-01 4.250923916697502136e-02 5.963420122861862183e-02 5.471429973840713501e-02 2.934113703668117523e-02 6.027933359146118164e-01 8.037070184946060181e-02 18 | 1.344204545021057129e-01 3.640499711036682129e-02 4.541063681244850159e-02 4.721116647124290466e-02 2.795655094087123871e-02 6.306158900260925293e-01 7.798022776842117310e-02 19 | 1.285841166973114014e-01 2.938763797283172607e-02 3.544783592224121094e-02 4.597333073616027832e-02 2.812908589839935303e-02 6.782383322715759277e-01 5.423955991864204407e-02 20 | 1.316325664520263672e-01 3.135866671800613403e-02 3.636519983410835266e-02 5.571527034044265747e-02 2.567203901708126068e-02 6.441005468368530273e-01 7.515570521354675293e-02 21 | 1.296698749065399170e-01 3.187648206949234009e-02 3.682581335306167603e-02 5.710158869624137878e-02 2.503978274762630463e-02 6.449305415153503418e-01 7.455591857433319092e-02 22 | 1.408576220273971558e-01 3.092189505696296692e-02 4.432411491870880127e-02 5.196523666381835938e-02 2.678558044135570526e-02 6.454180479049682617e-01 5.972749367356300354e-02 23 | 1.433526426553726196e-01 2.925603836774826050e-02 4.814311489462852478e-02 5.203331634402275085e-02 2.712922915816307068e-02 6.474248170852661133e-01 5.266081914305686951e-02 24 | 1.322397589683532715e-01 3.484867140650749207e-02 4.282678291201591492e-02 5.088277906179428101e-02 2.869553118944168091e-02 6.401270627975463867e-01 7.037942111492156982e-02 25 | 1.245993897318840027e-01 3.663603588938713074e-02 5.208056047558784485e-02 4.783252254128456116e-02 2.754941768944263458e-02 6.485846042633056641e-01 6.271743029356002808e-02 26 | 1.268509626388549805e-01 2.903451770544052124e-02 3.104041889309883118e-02 5.263415351510047913e-02 2.959198318421840668e-02 6.727272868156433105e-01 5.812075361609458923e-02 27 | 1.125282794237136841e-01 2.816665172576904297e-02 2.988037839531898499e-02 4.682679846882820129e-02 2.955091185867786407e-02 6.978623270988464355e-01 5.518467724323272705e-02 28 | 1.280308365821838379e-01 2.873385883867740631e-02 3.237454965710639954e-02 5.037575960159301758e-02 2.864145673811435699e-02 6.779161095619201660e-01 5.392739921808242798e-02 29 | 1.381522417068481445e-01 3.041531331837177277e-02 4.522873461246490479e-02 5.267472937703132629e-02 2.759821712970733643e-02 6.501670479774475098e-01 5.576367303729057312e-02 30 | 1.296372115612030029e-01 3.595152497291564941e-02 4.742358624935150146e-02 4.344658181071281433e-02 2.719729579985141754e-02 6.524921059608459473e-01 6.385172158479690552e-02 31 | 5.031718015670776367e-01 3.339315578341484070e-02 9.005539864301681519e-02 1.041764318943023682e-01 3.699044883251190186e-02 1.574783772230148315e-01 7.473435252904891968e-02 32 | 5.307685136795043945e-01 3.266777843236923218e-02 9.201218187808990479e-02 1.009290814399719238e-01 3.501508757472038269e-02 1.427633166313171387e-01 6.584403663873672485e-02 33 | 5.344495177268981934e-01 3.238227590918540955e-02 9.015206992626190186e-02 1.055869236588478088e-01 3.285622969269752502e-02 1.407350897789001465e-01 6.383786350488662720e-02 34 | 5.297114253044128418e-01 3.344405815005302429e-02 8.862984925508499146e-02 1.093922853469848633e-01 3.553711622953414917e-02 1.335639357566833496e-01 6.972134858369827271e-02 35 | 5.031564831733703613e-01 3.414725139737129211e-02 9.081640839576721191e-02 1.194441169500350952e-01 3.224276006221771240e-02 1.528746187686920166e-01 6.731840968132019043e-02 36 | 5.054782032966613770e-01 3.410077095031738281e-02 9.166014194488525391e-02 1.084112301468849182e-01 3.129023313522338867e-02 1.525653004646301270e-01 7.649414241313934326e-02 37 | 5.152413249015808105e-01 3.313977271318435669e-02 8.325566351413726807e-02 1.129371002316474915e-01 3.430519625544548035e-02 1.500560045242309570e-01 7.106497883796691895e-02 38 | 4.801708757877349854e-01 3.515897691249847412e-02 9.512817859649658203e-02 1.114207655191421509e-01 3.371969237923622131e-02 1.659474372863769531e-01 7.845396548509597778e-02 39 | 4.634827673435211182e-01 3.558567911386489868e-02 8.953073620796203613e-02 1.151075288653373718e-01 3.593860939145088196e-02 1.696114838123321533e-01 9.074316173791885376e-02 40 | 5.338096022605895996e-01 3.308479860424995422e-02 8.245141804218292236e-02 1.023382768034934998e-01 3.353553637862205505e-02 1.464537829160690308e-01 6.832652539014816284e-02 41 | 5.311237573623657227e-01 3.514011949300765991e-02 8.429657667875289917e-02 1.016578972339630127e-01 3.369308263063430786e-02 1.471010446548461914e-01 6.698754429817199707e-02 42 | 4.662582576274871826e-01 3.578637167811393738e-02 8.024627715349197388e-02 1.063621789216995239e-01 3.887055069208145142e-02 1.882137656211853027e-01 8.426260203123092651e-02 43 | 3.810746669769287109e-01 5.164561048150062561e-02 9.330693632364273071e-02 1.053026989102363586e-01 4.337673634290695190e-02 2.019078880548477173e-01 1.233854591846466064e-01 44 | 5.241291522979736328e-01 3.322152048349380493e-02 8.234718441963195801e-02 1.054174900054931641e-01 3.382226824760437012e-02 1.606709659099578857e-01 6.039146333932876587e-02 45 | 4.732211232185363770e-01 3.731831908226013184e-02 8.505529165267944336e-02 1.171029582619667053e-01 3.691057115793228149e-02 1.743639111518859863e-01 7.602787017822265625e-02 46 | 3.867214173078536987e-02 1.334833800792694092e-01 5.717650800943374634e-02 5.105338618159294128e-02 4.267430678009986877e-02 5.972114577889442444e-02 6.172191500663757324e-01 47 | 3.589279577136039734e-02 1.580660045146942139e-01 5.707128345966339111e-02 4.850009828805923462e-02 4.622421413660049438e-02 6.099285185337066650e-02 5.932527780532836914e-01 48 | 4.241966083645820618e-02 1.382216662168502808e-01 5.955597758293151855e-02 5.077299103140830994e-02 4.327210038900375366e-02 6.462983042001724243e-02 6.011277437210083008e-01 49 | 4.063285142183303833e-02 1.322430372238159180e-01 5.227447301149368286e-02 5.575856566429138184e-02 4.955764114856719971e-02 5.809348821640014648e-02 6.114399433135986328e-01 50 | 3.933155536651611328e-02 1.335334777832031250e-01 5.858688428997993469e-02 5.006084591150283813e-02 4.220550134778022766e-02 7.250458002090454102e-02 6.037771701812744141e-01 51 | 4.944851621985435486e-02 1.150498613715171814e-01 6.179780140519142151e-02 5.691127106547355652e-02 4.300087317824363708e-02 7.119239121675491333e-02 6.025993227958679199e-01 52 | 3.847502171993255615e-02 1.484078168869018555e-01 5.625396221876144409e-02 4.606339707970619202e-02 4.585820809006690979e-02 6.171460077166557312e-02 6.032269597053527832e-01 53 | 2.968768775463104248e-02 1.720697134733200073e-01 5.343652144074440002e-02 4.643030464649200439e-02 4.550908878445625305e-02 5.158670991659164429e-02 6.012800335884094238e-01 54 | 4.580913484096527100e-02 1.587826460599899292e-01 6.780745089054107666e-02 5.100804194808006287e-02 5.042636021971702576e-02 6.368242949247360229e-02 5.624839067459106445e-01 55 | 3.537496551871299744e-02 1.759605109691619873e-01 5.835678428411483765e-02 4.561939090490341187e-02 4.463983327150344849e-02 6.336323916912078857e-02 5.766853690147399902e-01 56 | 3.504528850317001343e-02 1.407878398895263672e-01 5.314100533723831177e-02 5.043087899684906006e-02 4.244241118431091309e-02 5.353575944900512695e-02 6.246168613433837891e-01 57 | 4.284802824258804321e-02 1.683670878410339355e-01 7.272753864526748657e-02 5.772206559777259827e-02 4.950516670942306519e-02 5.956367403268814087e-02 5.492663979530334473e-01 58 | 3.654861450195312500e-02 1.552699804306030273e-01 6.272755563259124756e-02 5.383008345961570740e-02 4.194117337465286255e-02 5.636166781187057495e-02 5.933209657669067383e-01 59 | 3.503073379397392273e-02 1.550247967243194580e-01 5.140528082847595215e-02 4.615417495369911194e-02 4.579026997089385986e-02 5.846123397350311279e-02 6.081334948539733887e-01 60 | 3.261328116059303284e-02 1.511043459177017212e-01 5.037583410739898682e-02 4.554663971066474915e-02 4.420215263962745667e-02 5.537199601531028748e-02 6.207857131958007812e-01 61 | 1.149801462888717651e-01 3.017134033143520355e-02 9.910191595554351807e-02 5.969761013984680176e-01 3.366236388683319092e-02 6.030176207423210144e-02 6.480636447668075562e-02 62 | 1.701491177082061768e-01 3.979361802339553833e-02 1.101978048682212830e-01 4.808952510356903076e-01 4.017596319317817688e-02 7.381588965654373169e-02 8.497238159179687500e-02 63 | 1.052394583821296692e-01 3.328595682978630066e-02 1.021017059683799744e-01 5.885684490203857422e-01 3.642015531659126282e-02 6.091912090778350830e-02 7.346518337726593018e-02 64 | 1.083954572677612305e-01 3.191741928458213806e-02 1.070761680603027344e-01 5.809265375137329102e-01 3.490410372614860535e-02 6.516672670841217041e-02 7.161360234022140503e-02 65 | 1.336681991815567017e-01 3.020470589399337769e-02 9.784509986639022827e-02 5.718632936477661133e-01 3.434365242719650269e-02 7.198777794837951660e-02 6.008725240826606750e-02 66 | 1.138102337718009949e-01 3.036855906248092651e-02 1.204624846577644348e-01 5.740532875061035156e-01 3.179935365915298462e-02 5.892162770032882690e-02 7.058447599411010742e-02 67 | 1.138582751154899597e-01 3.116168454289436340e-02 1.118872463703155518e-01 5.818269252777099609e-01 3.313372656702995300e-02 6.373179703950881958e-02 6.440030783414840698e-02 68 | 1.087706834077835083e-01 3.110642172396183014e-02 1.129907891154289246e-01 5.858775973320007324e-01 3.269686177372932434e-02 5.836668983101844788e-02 7.019101828336715698e-02 69 | 1.191024109721183777e-01 3.142318129539489746e-02 1.029730513691902161e-01 5.784719586372375488e-01 3.471574559807777405e-02 6.793545931577682495e-02 6.537817418575286865e-02 70 | 1.458434909582138062e-01 3.463225811719894409e-02 1.040446683764457703e-01 5.151012539863586426e-01 3.368179500102996826e-02 7.382755726575851440e-02 9.286901354789733887e-02 71 | 1.659699380397796631e-01 3.644698485732078552e-02 1.039459481835365295e-01 4.941037595272064209e-01 3.416043892502784729e-02 6.891855597496032715e-02 9.645438194274902344e-02 72 | 1.581414490938186646e-01 3.376428037881851196e-02 1.002914831042289734e-01 5.130609273910522461e-01 3.313886374235153198e-02 7.135791331529617310e-02 9.024512767791748047e-02 73 | 1.379595100879669189e-01 3.789829462766647339e-02 1.039057746529579163e-01 5.061457753181457520e-01 3.470750525593757629e-02 7.453095912933349609e-02 1.048522293567657471e-01 74 | 1.501872986555099487e-01 3.589059785008430481e-02 1.157000586390495300e-01 4.975796639919281006e-01 3.660218790173530579e-02 7.834140956401824951e-02 8.569878339767456055e-02 75 | 5.140593051910400391e-01 3.336402401328086853e-02 8.745007961988449097e-02 1.141018345952033997e-01 3.613149374723434448e-02 1.499456167221069336e-01 6.494761258363723755e-02 76 | 1.333442628383636475e-01 1.081301271915435791e-01 9.677359461784362793e-02 1.066264137625694275e-01 7.702838629484176636e-02 1.169565171003341675e-01 3.611407577991485596e-01 77 | 6.477854400873184204e-02 1.444229334592819214e-01 7.984091341495513916e-02 7.902268320322036743e-02 6.639648228883743286e-02 8.651427924633026123e-02 4.790241122245788574e-01 78 | 1.334589123725891113e-01 1.187324225902557373e-01 9.367379546165466309e-02 1.037445440888404846e-01 8.131598681211471558e-02 1.591640561819076538e-01 3.099102675914764404e-01 79 | 1.551461219787597656e-01 9.244783222675323486e-02 1.092637777328491211e-01 1.577205061912536621e-01 7.736475765705108643e-02 1.223962604999542236e-01 2.856607437133789062e-01 80 | 1.809124648571014404e-01 1.023331582546234131e-01 1.094948425889015198e-01 1.336903423070907593e-01 7.328374683856964111e-02 1.476457715034484863e-01 2.526396811008453369e-01 81 | 1.390016973018646240e-01 9.925974160432815552e-02 1.102549731731414795e-01 1.348739862442016602e-01 7.579182088375091553e-02 1.209842562675476074e-01 3.198335766792297363e-01 82 | 1.150280237197875977e-01 1.260419189929962158e-01 7.636820524930953979e-02 1.011653915047645569e-01 8.292925357818603516e-02 1.133173108100891113e-01 3.851498961448669434e-01 83 | 1.287327706813812256e-01 1.114885360002517700e-01 1.020146384835243225e-01 1.274430602788925171e-01 7.430212944746017456e-02 1.102327182888984680e-01 3.457861840724945068e-01 84 | 1.050966903567314148e-01 1.255373656749725342e-01 9.619902819395065308e-02 1.058389917016029358e-01 7.947504520416259766e-02 1.160829290747642517e-01 3.717699348926544189e-01 85 | 2.572717666625976562e-01 9.170681238174438477e-02 1.171267703175544739e-01 1.119260489940643311e-01 5.456022918224334717e-02 1.652697026729583740e-01 2.021386623382568359e-01 86 | 5.674485117197036743e-02 1.670028120279312134e-01 7.525172829627990723e-02 7.949704676866531372e-02 6.022566929459571838e-02 7.986124604940414429e-02 4.814167022705078125e-01 87 | 5.750272795557975769e-02 1.389541327953338623e-01 6.990885734558105469e-02 6.829702854156494141e-02 6.145592033863067627e-02 8.557900041341781616e-02 5.183023810386657715e-01 88 | 7.375598698854446411e-02 1.387443691492080688e-01 6.750514358282089233e-02 9.377470612525939941e-02 6.245809048414230347e-02 9.586577117443084717e-02 4.678958952426910400e-01 89 | 6.200886517763137817e-02 1.333556771278381348e-01 6.667350977659225464e-02 7.732859253883361816e-02 6.173191592097282410e-02 7.721804827451705933e-02 5.216833949089050293e-01 90 | 6.837572902441024780e-02 1.396013051271438599e-01 6.737218797206878662e-02 8.447577059268951416e-02 6.643436104059219360e-02 1.082622036337852478e-01 4.654785096645355225e-01 91 | 4.997546225786209106e-02 1.877751648426055908e-01 7.359099388122558594e-02 6.736035645008087158e-02 5.361466109752655029e-02 6.131826341152191162e-02 5.063651204109191895e-01 92 | 3.420634195208549500e-02 1.632033735513687134e-01 5.504674836993217468e-02 6.149784475564956665e-02 4.847835376858711243e-02 5.433557555079460144e-02 5.832317471504211426e-01 93 | 3.614548593759536743e-02 2.250378727912902832e-01 6.277496367692947388e-02 5.353133752942085266e-02 5.425567552447319031e-02 6.393615156412124634e-02 5.043184161186218262e-01 94 | 3.292379155755043030e-02 1.772966533899307251e-01 5.557262524962425232e-02 5.831120535731315613e-02 4.917667433619499207e-02 5.488506332039833069e-02 5.718340873718261719e-01 95 | 5.046849697828292847e-02 2.039373368024826050e-01 8.067864179611206055e-02 7.657592743635177612e-02 5.316416919231414795e-02 6.132584065198898315e-02 4.738495647907257080e-01 96 | 5.418673902750015259e-02 2.220365852117538452e-01 9.442870318889617920e-02 7.511465251445770264e-02 5.428933724761009216e-02 6.830126047134399414e-02 4.316426813602447510e-01 97 | 4.189762473106384277e-02 1.849231272935867310e-01 1.051352918148040771e-01 7.081984728574752808e-02 4.537677019834518433e-02 6.562038511037826538e-02 4.862270057201385498e-01 98 | 3.693402931094169617e-02 2.142885327339172363e-01 6.144808605313301086e-02 5.027781799435615540e-02 5.220115184783935547e-02 5.084617435932159424e-02 5.340041518211364746e-01 99 | 5.305769294500350952e-02 1.940278559923171997e-01 1.043965816497802734e-01 6.842438876628875732e-02 5.549040436744689941e-02 6.356114894151687622e-02 4.610420167446136475e-01 100 | 4.062966257333755493e-02 2.224535495042800903e-01 8.720973879098892212e-02 6.590262055397033691e-02 5.283495411276817322e-02 5.612201243638992310e-02 4.748474061489105225e-01 101 | 5.056896805763244629e-02 2.013969570398330688e-01 1.231455281376838684e-01 8.691898733377456665e-02 5.105160549283027649e-02 7.033866643905639648e-02 4.165792763233184814e-01 102 | 4.947416856884956360e-02 2.058863341808319092e-01 7.510770857334136963e-02 7.529673725366592407e-02 5.660678073763847351e-02 5.792718380689620972e-02 4.797011017799377441e-01 103 | 4.201318323612213135e-02 1.927892118692398071e-01 8.086338639259338379e-02 6.255368888378143311e-02 5.047253891825675964e-02 5.622312426567077637e-02 5.150848627090454102e-01 104 | 4.250044003129005432e-02 2.083901017904281616e-01 6.909964978694915771e-02 5.777588859200477600e-02 5.441035330295562744e-02 6.199487671256065369e-02 5.058286786079406738e-01 105 | 4.536971077322959900e-02 1.979545652866363525e-01 9.331937879323959351e-02 7.347242534160614014e-02 5.449029803276062012e-02 6.403182446956634521e-02 4.713617861270904541e-01 106 | -------------------------------------------------------------------------------- /demo/run_demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "Running Neural ADMIXTURE on demo data..." 3 | start=`date +%s` 4 | neural-admixture train --k 7 --data_path data/demo_data.bed --save_dir outputs --name demo_run --epochs 5 --seed 42 5 | end=`date +%s` 6 | runtime=$((end-start)) 7 | echo "Demo run in ${runtime} seconds." 8 | echo "Running diagnostics..." 9 | python3 run_diagnostics.py 10 | -------------------------------------------------------------------------------- /demo/run_diagnostics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | 4 | TOL = 1e-4 5 | 6 | def run_checks(): 7 | # Load actual outputs 8 | try: 9 | Q = np.genfromtxt('./outputs/demo_run.7.Q') 10 | P = np.genfromtxt('./outputs/demo_run.7.P') 11 | except FileNotFoundError as e: 12 | print('Could not find output files. Please make sure to run the demo before running the test.') 13 | return False 14 | except Exception as e: 15 | raise e 16 | # Load expected outputs 17 | try: 18 | expected_Q = np.genfromtxt('./outputs/demo_run.7.Q.expected') 19 | expected_P = np.genfromtxt('./outputs/demo_run.7.P.expected') 20 | except FileNotFoundError as e: 21 | print('Could not expected output files. Please make sure they are present in the outputs folder of the demo.') 22 | return False 23 | except Exception as e: 24 | raise e 25 | # Check mean error is below tolerance 26 | return np.allclose(Q, expected_Q) and np.allclose(P, expected_P) 27 | 28 | if __name__ == '__main__': 29 | passed = run_checks() 30 | print(f'Output and expected output are {"" if passed else "NOT "}similar.') 31 | sys.exit(0) 32 | -------------------------------------------------------------------------------- /neural_admixture/__init__.py: -------------------------------------------------------------------------------- 1 | from ._version import __version__, __version_tuple__ -------------------------------------------------------------------------------- /neural_admixture/entry.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import os 4 | import torch 5 | import torch.multiprocessing as mp 6 | import platform 7 | import sys 8 | 9 | from ._version import __version__ 10 | 11 | from .src import utils 12 | from .src.svd import RSVD 13 | 14 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") 15 | log = logging.getLogger(__name__) 16 | 17 | from colorama import init, Fore, Style 18 | 19 | def print_neural_admixture_banner(version: str="2.0") -> None: 20 | """ 21 | Display the Neural Admixture banner with version and author information in color. 22 | """ 23 | init(autoreset=True) 24 | 25 | banner = fr""" 26 | {Fore.CYAN} 27 | _ _ _ ___ ____ __ __ _______ _________ _ _ _____ ______ 28 | | \ | | | | / _ \| _ \| \/ |_ _\ \ / /__ __| | | | __ \| ____| 29 | | \| | ___ _ _ _ __ __ _| | / /_\ | | | | \ / | | | \ V / | | | | | | |__) | |__ 30 | | . ` |/ _ \ | | | '__/ _` | | | _ | | | | |\/| | | | > < | | | | | | _ /| __| 31 | | |\ | __/ |_| | | | (_| | | | | | | |_| | | | |_| |_ / . \ | | | |__| | | \ \| |____ 32 | |_| \_|\___|\__,_|_| \__,_|_| \_| |_/____/|_| |_|_____/_/ \_\ |_| \____/|_| \_\______| 33 | {Style.RESET_ALL} 34 | """ 35 | 36 | info = f""" 37 | {Fore.CYAN} Version: {version}{Style.RESET_ALL} 38 | {Fore.CYAN} Authors: Joan Saurina Ricós, Albert Dominguez Mantes, 39 | Daniel Mas Montserrat, Alexander G. Ioannidis.{Style.RESET_ALL} 40 | {Fore.CYAN} Help: https://github.com/AI-sandbox/neural-admixture{Style.RESET_ALL} 41 | """ 42 | 43 | log.info("\n" + banner + info) 44 | 45 | def main(): 46 | print_neural_admixture_banner(__version__) 47 | arg_list = tuple(sys.argv) 48 | 49 | assert len(arg_list) > 1, 'Please provide either the argument "train" or "infer" to choose running mode.' 50 | 51 | # CONTROL NUMBER OF THREADS: 52 | if '--num_cpus' in arg_list: 53 | num_cpus_index = arg_list.index('--num_cpus') + 1 54 | if num_cpus_index < len(arg_list): 55 | num_cpus = int(arg_list[num_cpus_index]) 56 | num_threads = num_cpus//2 57 | else: 58 | num_cpus = 1 59 | num_threads = 1 60 | 61 | os.environ["MKL_NUM_THREADS"] = "1" 62 | os.environ["MKL_MAX_THREADS"] = "1" 63 | os.environ["OMP_NUM_THREADS"] = "1" 64 | os.environ["OMP_MAX_THREADS"] = "1" 65 | os.environ["NUMEXPR_NUM_THREADS"] = "1" 66 | os.environ["NUMEXPR_MAX_THREADS"] = "1" 67 | os.environ["OPENBLAS_NUM_THREADS"] = "1" 68 | os.environ["OPENBLAS_MAX_THREADS"] = "1" 69 | 70 | log.info(f" There are {num_cpus} CPU's available for this execution. Hence, using {num_threads} threads.") 71 | 72 | #CONTROL OS: 73 | system = platform.system() 74 | if system == "Linux": 75 | log.info(" Operating system is Linux!") 76 | os.environ["CC"] = "gcc" 77 | os.environ["CXX"] = "g++" 78 | elif system == "Darwin": 79 | log.info(" Operating system is Darwin (Mac OS)!") 80 | os.environ["CC"] = "clang" 81 | os.environ["CXX"] = "clang++" 82 | elif system == "Windows": 83 | log.info(" Operating system is Windows!") 84 | pass 85 | else: 86 | log.info(f"System not recognized: {system}") 87 | sys.exit(1) 88 | 89 | # CONTROL NUMBER OF DEVICES: 90 | num_gpus = 0 91 | if '--num_gpus' in arg_list: 92 | num_gpus_index = arg_list.index('--num_gpus') + 1 93 | if num_gpus_index < len(arg_list): 94 | num_gpus = int(arg_list[num_gpus_index]) 95 | 96 | max_devices = torch.cuda.device_count() if not torch.backends.mps.is_available() else 1 97 | if num_gpus > max_devices: 98 | log.warning(f" Requested {num_gpus} GPUs, but only {max_devices} are available. Using {max_devices} GPUs.") 99 | num_gpus = max_devices 100 | 101 | # CONTROL SEED: 102 | seed = int(arg_list[arg_list.index('--seed') + 1]) if '--seed' in arg_list else 42 103 | utils.set_seed(seed) 104 | 105 | # BEGIN TRAIN OF INFERENCE: 106 | if sys.argv[1]=='train': 107 | from .src import main 108 | 109 | data_path = arg_list[arg_list.index('--data_path') + 1] 110 | pops_path = arg_list[arg_list.index('--pops_path') + 1] if '--pops_path' in arg_list else None 111 | n_components = int(arg_list[arg_list.index('--n_components') + 1]) if '--n_components' in arg_list else 8 112 | data, pops, N, M = utils.read_data(data_path, pops_path) 113 | log.info("") 114 | log.info(" Running SVD...") 115 | log.info("") 116 | V = RSVD(data, N, M, n_components, seed) 117 | data = torch.as_tensor(data, dtype=torch.uint8).share_memory_() 118 | 119 | if num_gpus>1: 120 | log.info(" Entering multi-GPU training...") 121 | mp.spawn(main.main, args=(arg_list[2:], num_gpus, data, V, pops), nprocs=num_gpus) 122 | else: 123 | log.info(" Entering single-GPU or CPU training...") 124 | sys.exit(main.main(0, arg_list[2:], num_gpus, data, V, pops)) 125 | 126 | elif sys.argv[1]=='infer': 127 | from .src import inference 128 | log.info(" Entering inference...") 129 | sys.exit(inference.main(arg_list[2:])) 130 | 131 | else: 132 | log.error(f' Invalid argument {arg_list[1]}. Please run either "neural-admixture train" or "neural-admixture infer"') 133 | sys.exit(1) -------------------------------------------------------------------------------- /neural_admixture/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-sandbox/neural-admixture/270d8ec14135dc5ebbbfc13bc9571178b69c9f31/neural_admixture/model/__init__.py -------------------------------------------------------------------------------- /neural_admixture/model/neural_admixture.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import json 4 | import torch 5 | 6 | from pathlib import Path 7 | from typing import Optional, Tuple, List 8 | from tqdm.auto import tqdm 9 | 10 | from ..src.loaders import dataloader_admixture 11 | 12 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") 13 | log = logging.getLogger(__name__) 14 | 15 | class NeuralEncoder(torch.nn.Module): 16 | """ 17 | Neural network encoder component. Creates a separate linear head 18 | for each specified value of k in the provided ks list. 19 | """ 20 | def __init__(self, input_size, ks): 21 | """ 22 | Args: 23 | input_size (int): Dimension of the input features (output of common encoder). 24 | ks (list[int]): A list of K values for which to create encoder heads. 25 | """ 26 | super().__init__() 27 | self.ks = sorted(ks) 28 | self.min_k_val = min(self.ks) 29 | self.heads = torch.nn.ModuleList([torch.nn.Linear(input_size, k_val, bias=True) for k_val in self.ks]) 30 | 31 | def _get_head_for_k(self, k_val): 32 | """Retrieves the specific encoder head for a given K value.""" 33 | index = k_val - self.min_k_val 34 | if index < 0 or index >= len(self.heads): 35 | raise ValueError(f"K value {k_val} not found in the specified ks list {self.ks}") 36 | return self.heads[index] 37 | 38 | def forward(self, X): 39 | """ 40 | Forward pass through the encoder. 41 | 42 | Args: 43 | X (torch.Tensor): Input data tensor from the common encoder. 44 | 45 | Returns: 46 | list[torch.Tensor]: A list of output tensors (hidden states), one for each K in self.ks. 47 | """ 48 | outputs = [self._get_head_for_k(k_val)(X) for k_val in self.ks] 49 | return outputs 50 | 51 | class NeuralDecoder(torch.nn.Module): 52 | """ 53 | Neural network decoder component. Creates a separate linear decoder 54 | for each specified value of k, initialized with corresponding parts 55 | of the initial P matrix. 56 | """ 57 | def __init__(self, output_size, inits, ks): 58 | """ 59 | Args: 60 | output_size (int): Dimension of the output (number of markers M). 61 | inits (torch.Tensor): The initial P matrix (M, sum(ks)). 62 | ks (list[int]): A list of K values for which to create decoders. 63 | """ 64 | super().__init__() 65 | self.output_size = output_size 66 | self.ks = sorted(ks) 67 | self.min_k_val = min(self.ks) 68 | 69 | layers = [None]*len(self.ks) 70 | ini = 0 71 | for i in range(len(self.ks)): 72 | end = ini+self.ks[i] 73 | layers[i] = torch.nn.Linear(self.ks[i], output_size, bias=False) 74 | layers[i].weight = torch.nn.Parameter(inits[ini:end].T) 75 | ini = end 76 | self.decoders = torch.nn.ModuleList(layers) 77 | 78 | def _get_decoder_for_k(self, k_val): 79 | """Retrieves the specific decoder for a given K value.""" 80 | index = k_val - self.min_k_val 81 | return self.decoders[index] 82 | 83 | def forward(self, probs): 84 | """ 85 | Forward pass through the decoder. 86 | 87 | Args: 88 | probs (list[torch.Tensor]): A list of probability tensors. 89 | 90 | Returns: 91 | list[torch.Tensor]: A list of output tensors (reconstructions). 92 | """ 93 | outputs = [] 94 | for i, k_val in enumerate(self.ks): 95 | decoder = self._get_decoder_for_k(self.ks[i]) 96 | output = decoder(probs[i]) 97 | outputs.append(torch.clamp_(output, 0, 1)) 98 | return outputs 99 | 100 | class Q_P(torch.nn.Module): 101 | """ 102 | Q_P model. 103 | 104 | Args: 105 | hidden_size (int): The size of the hidden layer. 106 | num_features (int): The number of features in the input data. 107 | k (int): The number of output classes or components. 108 | V (torch.Tensor): The projection matrix used to map inputs to PCA space. 109 | P (Optional[torch.Tensor], optional): The P matrix to be optimized. Defaults to None. 110 | is_train (bool): Indicates whether the model is in training mode (True) or inference mode (False). Defaults to True. 111 | """ 112 | def __init__(self, hidden_size: int, num_features: int, V: torch.Tensor=None, P: torch.Tensor=None, 113 | ks_list: List=[], is_train: bool=True) -> None: 114 | """ 115 | Initialize the Q_P module with the given parameters. 116 | 117 | Args: 118 | hidden_size (int): The size of the hidden layer. 119 | num_features (int): The number of features in the input data. 120 | k (int): The number of output classes or components. 121 | activation (torch.nn.Module): The activation function to use in the encoder. 122 | V (torch.Tensor): The projection matrix used to map inputs to PCA space. 123 | P (Optional[torch.Tensor], optional): The P matrix to be optimized. Defaults to None. 124 | is_train (bool): Indicates whether the model is in training mode (True) or inference mode (False). Defaults to True. 125 | """ 126 | super(Q_P, self).__init__() 127 | self.ks_list = ks_list 128 | 129 | if V is not None: 130 | self.V = torch.nn.Parameter(V) 131 | else: 132 | self.V = None 133 | 134 | self.num_features = num_features 135 | self.batch_norm = torch.nn.RMSNorm(self.num_features, eps=1e-8) 136 | self.encoder_activation = torch.nn.ReLU(inplace=True) 137 | self.hidden_size = hidden_size 138 | self.common_encoder = torch.nn.Sequential( 139 | torch.nn.Linear(self.num_features, self.hidden_size, bias=True), 140 | self.encoder_activation) 141 | self.multihead_encoder = NeuralEncoder(self.hidden_size, ks=self.ks_list) 142 | if P is not None: 143 | self.decoders = NeuralDecoder(self.num_features, P, ks=self.ks_list) 144 | self.softmax = torch.nn.Softmax(dim=1) 145 | 146 | if is_train: 147 | self.return_func = self._return_training 148 | else: 149 | self.return_func = self._return_infer 150 | 151 | def _return_training(self, probs: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 152 | return self.decoders(probs), probs 153 | 154 | def _return_infer(self, probs: torch.Tensor) -> torch.Tensor: 155 | return probs 156 | 157 | def forward(self, X: torch.Tensor) -> torch.Tensor: 158 | """ 159 | Perform a forward pass with the given batch of input data. 160 | 161 | Args: 162 | X (torch.Tensor): A tensor of input data. 163 | 164 | Returns: 165 | Union[Tuple[torch.Tensor, torch.Tensor], torch.Tensor]: 166 | - If training: A tuple containing the transformed tensor (clamped between 0 and 1) and the probability tensor. 167 | - If inference: The probability tensor. 168 | """ 169 | X = X.float() / 2 170 | X = torch.where(X == 1.5, 0.0, X) 171 | 172 | X_pca = X@self.V 173 | X_pca = self.batch_norm(X_pca) 174 | enc = self.common_encoder(X_pca) 175 | hid_states = self.multihead_encoder(enc) 176 | probs = [self.softmax(h) for h in hid_states] 177 | return self.return_func(probs), X 178 | 179 | @torch.no_grad() 180 | def restrict_P(self): 181 | """ 182 | Restrict the values of P matrix within the range [0, 1]. 183 | """ 184 | for dec in self.decoders.decoders: 185 | dec.weight.data.clamp_(0., 1.) 186 | 187 | def create_custom_adam(self, device: torch.device, lr: float=1e-5) -> torch.optim.Adam: 188 | """ 189 | Creates a custom Adam optimizer with different learning rates for different phases. 190 | 191 | Args: 192 | lr (float): Learning rate for all parameters. 193 | 194 | Returns: 195 | optim.Adam: The Adam optimizer configured with the specified learning rates. 196 | """ 197 | p = [ 198 | {'params': self.multihead_encoder.parameters(), 'lr': lr}, 199 | {'params': self.common_encoder.parameters(), 'lr': lr}, 200 | {'params': self.batch_norm.parameters(), 'lr': lr}, 201 | {'params': self.V, 'lr': lr}, 202 | {'params': self.decoders.parameters(), 'lr': lr} 203 | ] 204 | return torch.optim.Adam(p, betas=[0.9, 0.95], fused=device.type != 'mps') 205 | 206 | def save_config(self, name: str, save_dir: str) -> None: 207 | """ 208 | Saves the model configuration to a JSON file in the specified directory. 209 | 210 | Args: 211 | name (str): The name of the configuration file (without extension). 212 | save_dir (str): The directory where the configuration file should be saved. 213 | """ 214 | _activations = { 215 | torch.nn.modules.activation.ReLU: 'relu', 216 | torch.nn.modules.activation.Tanh: 'tanh', 217 | torch.nn.modules.activation.GELU: 'gelu' 218 | } 219 | 220 | _config = { 221 | 'ks': self.ks_list, 222 | 'num_features': self.num_features, 223 | 'hidden_size': self.hidden_size, 224 | 'activation': _activations.get(type(self.encoder_activation), str(self.encoder_activation)), 225 | } 226 | 227 | with open(Path(save_dir)/f"{name}_config.json", 'w') as fb: 228 | json.dump(_config, fb) 229 | log.info(" Configuration file saved.") 230 | return 231 | 232 | class NeuralAdmixture(): 233 | """ 234 | Neural Admixture class. 235 | 236 | Args: 237 | k (int): Number of components for clustering. 238 | epochs (int): Number of training epochs. 239 | batch_size (int): Size of each training batch. 240 | learning_rate (float): Learning rate for optimization. 241 | device (torch.device): Device to perform computations on (e.g., 'cuda' or 'cpu'). 242 | seed (int): Random seed for reproducibility. 243 | num_gpus (int): Number of GPUs available for training. 244 | master (bool): Indicates if the current process is the master process (used for logging/output control in multi-GPU settings). 245 | pack2bit (Any): Encoding used for compressing data in 2 bit format. 246 | supervised_loss_weight (Optional[float]): Weight of the supervised loss component (if using supervised training). Defaults to None. 247 | """ 248 | def __init__(self, k: int, epochs: int, batch_size: int, learning_rate: float, device: torch.device, seed: int, num_gpus: int, 249 | master: bool, pack2bit, min_k: int, max_k: int, supervised_loss_weight: Optional[float]=100): 250 | """ 251 | Initializes the NeuralAdmixture class with training parameters and settings. 252 | 253 | Args: 254 | k (int): Number of components for clustering. 255 | epochs (int): Number of training epochs. 256 | batch_size (int): Size of each training batch. 257 | learning_rate (float): Learning rate for optimization. 258 | device (torch.device): Device to perform computations on (e.g., 'cuda' or 'cpu'). 259 | seed (int): Random seed for reproducibility. 260 | num_gpus (int): Number of GPUs available for training. 261 | master (bool): Indicates if the current process is the master process (used for logging/output control in multi-GPU settings). 262 | pack2bit (Any): Encoding used for compressing data in 2 bit format. 263 | supervised_loss_weight (Optional[float]): Weight of the supervised loss component (if using supervised training). Defaults to None. 264 | """ 265 | super(NeuralAdmixture, self).__init__() 266 | 267 | # Model configuration: 268 | self.k = k 269 | self.min_k = min_k 270 | self.max_k = max_k 271 | 272 | if k is not None: 273 | self.ks_list = [self.k] 274 | else: 275 | self.ks_list = list(range(self.min_k, self.max_k + 1)) 276 | 277 | self.num_gpus = num_gpus 278 | self.device = device 279 | self.master = master 280 | 281 | # Random seed configuration 282 | self.seed = seed 283 | self.generator = torch.Generator().manual_seed(self.seed) 284 | 285 | # Training configuration: 286 | self.epochs = epochs 287 | self.batch_size = batch_size//self.num_gpus if self.num_gpus>0 else batch_size 288 | self.loss_function = torch.nn.BCELoss(reduction='sum').to(device) 289 | self.lr = learning_rate 290 | 291 | # Supervised version: 292 | self.supervised_loss_weight = supervised_loss_weight 293 | self.loss_function_supervised = torch.nn.CrossEntropyLoss(reduction='sum') 294 | 295 | #Pack2bit function 296 | self.pack2bit = pack2bit 297 | 298 | def initialize_model(self, P: torch.Tensor, hidden_size: int, num_features: int, V: torch.Tensor, ks_list: List) -> None: 299 | """ 300 | Initializes the Q_P model and sets up distributed training if applicable. 301 | 302 | Args: 303 | P (torch.Tensor): Tensor representing the initial P matrix (e.g., allele frequencies). 304 | hidden_size (int): Number of units in the hidden layer of the encoder. 305 | num_features (int): Dimensionality of the input features. 306 | k (int): Number of components or clusters. 307 | V (torch.Tensor): PCA projection matrix used to reduce input dimensionality. 308 | 309 | Returns: 310 | None 311 | """ 312 | self.base_model = Q_P(hidden_size, num_features, V, P, ks_list).to(self.device) 313 | if self.device.type == 'cuda': 314 | self.model = torch.compile(self.base_model) 315 | if self.num_gpus > 1 and torch.distributed.is_initialized(): 316 | self.model = torch.nn.parallel.DistributedDataParallel(self.base_model, device_ids=[self.device], 317 | output_device=[self.device], find_unused_parameters=False) 318 | self.raw_model = self.model.module 319 | else: 320 | self.model = self.base_model 321 | self.raw_model = self.base_model 322 | 323 | def launch_training(self, P: torch.Tensor, data: torch.Tensor, hidden_size:int, num_features:int, 324 | V: torch.Tensor, M: int, N: int, pops: Optional[torch.Tensor]=None) -> Tuple[torch.Tensor, torch.Tensor, torch.nn.Module]: 325 | """ 326 | Launches the training process, which includes two distinct phases and a final inference step to compute Q. 327 | 328 | Args: 329 | P (torch.Tensor): Initial tensor for the P matrix. 330 | data (torch.Tensor): Input data matrix (e.g., genotype matrix). 331 | hidden_size (int): Size of the hidden layer in the encoder. 332 | num_features (int): Number of input features (e.g., SNPs). 333 | k (int): Number of latent components or population clusters. 334 | V (torch.Tensor): PCA projection matrix used for dimensionality reduction. 335 | M (int): Number of SNPs (columns) in the dataset. 336 | N (int): Number of individuals (rows) in the dataset. 337 | y (Optional[torch.Tensor]): Optional labels for supervised loss (if available). 338 | 339 | Returns: 340 | Tuple[torch.Tensor, torch.Tensor, torch.nn.Module]: A tuple containing: 341 | - Trained P matrix. 342 | - Inferred Q matrix. 343 | - Trained raw model (Q_P instance). 344 | """ 345 | #SETUP: 346 | self.M = M 347 | self.N = N 348 | torch.set_float32_matmul_precision('medium') 349 | torch.set_flush_denormal(True) 350 | self.initialize_model(P, hidden_size, num_features, V, self.ks_list) 351 | if pops is None: 352 | pops = torch.zeros(data.size(0), device=self.device) 353 | run_epoch = self._run_epoch 354 | else: 355 | run_epoch = self._run_epoch_supervised 356 | 357 | #TRAINING: 358 | if self.master: 359 | log.info("") 360 | log.info(" Starting training...") 361 | log.info("") 362 | self.optimizer = self.raw_model.create_custom_adam(device=self.device, lr=self.lr) 363 | dataloader = dataloader_admixture(data, self.batch_size, self.num_gpus, self.seed, self.generator, pops, shuffle=True) 364 | for epoch in tqdm(range(self.epochs), desc="Epochs", file=sys.stderr): 365 | run_epoch(epoch, dataloader) 366 | 367 | #INFERENCE OF Q's: 368 | self.raw_model.return_func = self.raw_model._return_infer 369 | batch_size_inference_Q = min(data.shape[0], 5000) 370 | self.model.eval() 371 | Qs = [torch.tensor([], device=self.device) for _ in self.ks_list] 372 | with torch.inference_mode(): 373 | dataloader = dataloader_admixture(data, batch_size_inference_Q, 1 if self.num_gpus >= 1 else 0, self.seed, self.generator, pops, shuffle=False) 374 | for x_step, _ in dataloader: 375 | if self.pack2bit is not None: 376 | unpacked_step = torch.empty((x_step.shape[0], self.M), dtype=torch.uint8, device=self.device) 377 | self.pack2bit.unpack2bit_gpu_to_gpu(x_step, unpacked_step) 378 | probs, _ = self.model(unpacked_step) 379 | else: 380 | probs, _ = self.model(x_step) 381 | for i in range(len(self.ks_list)): 382 | Qs[i]= torch.cat((Qs[i], probs[i]), dim=0) 383 | 384 | if self.master: 385 | log.info("") 386 | log.info(" Training finished!") 387 | log.info("") 388 | 389 | #RETURN OUTPUT: 390 | self.display_divergences(self.k) 391 | return self.process_results(Qs) 392 | 393 | def _run_epoch(self, epoch, dataloader: torch.utils.data.DataLoader): 394 | """ 395 | Executes one epoch of training. 396 | 397 | Args: 398 | epoch (int): Number of current epoch. 399 | dataloader (Dataloader): Dataloader 400 | """ 401 | loss_acc = 0 402 | for x_step, _ in dataloader: 403 | if self.pack2bit is not None: 404 | unpacked_step = torch.empty((x_step.shape[0], self.M), dtype=torch.uint8, device=self.device) 405 | self.pack2bit.unpack2bit_gpu_to_gpu(x_step, unpacked_step) 406 | loss = self._run_step(unpacked_step) 407 | else: 408 | loss = self._run_step(x_step) 409 | loss.backward() 410 | self.optimizer.step() 411 | self.raw_model.restrict_P() 412 | 413 | loss_acc += loss.item() 414 | 415 | if epoch%5==0: 416 | log.info(f" Loss in epoch {epoch:3d} on device {self.device} is {loss_acc:,.0f}") 417 | 418 | def _run_step(self, x_step: torch.Tensor) -> torch.Tensor: 419 | """ 420 | Executes one training step. 421 | 422 | Args: 423 | x_step (torch.Tensor): Batch of X data. 424 | 425 | Returns: 426 | torch.Tensor: Computed loss for the batch. 427 | """ 428 | self.optimizer.zero_grad(set_to_none=True) 429 | recs, x_step = self.model(x_step) 430 | loss = sum((self.loss_function(rec, x_step) for rec in recs[0])) 431 | return loss 432 | 433 | def _run_epoch_supervised(self, epoch, dataloader: torch.utils.data.DataLoader): 434 | """ 435 | Executes one epoch of training (supervised version). 436 | 437 | Args: 438 | epoch (int): Number of current epoch. 439 | dataloader (Dataloader): Dataloader. 440 | """ 441 | loss_acc = 0 442 | for x_step, pops_step in dataloader: 443 | if self.pack2bit is not None: 444 | unpacked_step = torch.empty((x_step.shape[0], self.M), dtype=torch.uint8, device=self.device) 445 | self.pack2bit.unpack2bit_gpu_to_gpu(x_step, unpacked_step) 446 | loss = self._run_step_supervised(unpacked_step, pops_step) 447 | else: 448 | loss = self._run_step_supervised(x_step, pops_step) 449 | 450 | loss.backward() 451 | self.optimizer.step() 452 | self.raw_model.restrict_P() 453 | 454 | loss_acc += loss.item() 455 | 456 | if epoch%2==0: 457 | log.info(f" Loss in epoch {epoch:3d} on device {self.device} is {int(loss_acc):,.0f}") 458 | 459 | def _run_step_supervised(self, x_step: torch.Tensor, pops_step: torch.Tensor) -> torch.Tensor: 460 | """ 461 | Executes one training step. 462 | 463 | Args: 464 | X (torch.Tensor): Batch of X data. 465 | 466 | Returns: 467 | torch.Tensor: Computed loss for the batch. 468 | """ 469 | self.optimizer.zero_grad(set_to_none=True) 470 | out, x_step = self.model(x_step) 471 | loss = self.loss_function(out[0][0], x_step) 472 | loss += self.supervised_loss_weight*self.loss_function_supervised(out[1][0], pops_step) 473 | return loss 474 | 475 | def display_divergences(self, k) -> None: 476 | """ 477 | Displays pairwise Fst divergences between estimated populations. 478 | 479 | Args: 480 | k (int): Number of populations (K) used in the model. 481 | 482 | Details: 483 | - The function calculates and prints Hudson's Fst for each pair 484 | of estimated populations, providing a measure of genetic divergence. 485 | """ 486 | if self.master: 487 | for i, k in enumerate(self.ks_list): 488 | dec = self.raw_model.decoders.decoders[i].weight.data 489 | header = '\t'.join([f'Pop{p}' for p in range(k - 1)]) 490 | 491 | log.info(" Results:") 492 | log.info(f'\n Fst divergences between estimated populations: (K = {k})') 493 | log.info("") 494 | log.info(f' \t{header}') 495 | log.info(' Pop0') 496 | 497 | for j in range(1, k): 498 | output = f' Pop{j}' 499 | pop2 = dec[:, j] 500 | 501 | for l in range(j): 502 | pop1 = dec[:, l] 503 | fst = self._hudsons_fst(pop1, pop2) 504 | output += f"\t{fst:0.3f}" 505 | 506 | log.info(output) 507 | 508 | log.info("\n") 509 | 510 | def process_results(self, Qs: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.nn.Module]: 511 | """ 512 | Processes and logs final results after training. 513 | 514 | Args: 515 | data (torch.Tensor): Original data tensor. 516 | Q (torch.Tensor): Learned Q matrix (assignments to populations). 517 | 518 | Returns: 519 | Tuple: Processed population matrix (P), Q matrix, and the raw model. 520 | 521 | Details: 522 | - Computes and logs the log-likelihood of the model given the data. 523 | """ 524 | if self.master: 525 | Ps = [dec.weight.data.detach().cpu().numpy() for dec in self.raw_model.decoders.decoders] 526 | Qs = [Q.cpu().numpy() for Q in Qs] 527 | else: 528 | Ps, Qs = [], [] 529 | return Qs, Ps, self.raw_model 530 | 531 | @staticmethod 532 | def _hudsons_fst(pop1: torch.Tensor, pop2: torch.Tensor) -> float: 533 | """ 534 | Computes Hudson's Fst between two populations. 535 | 536 | Args: 537 | pop1 (torch.Tensor): Frequencies for population 1. 538 | pop2 (torch.Tensor): Frequencies for population 2. 539 | 540 | Returns: 541 | float: Hudson's Fst value. 542 | 543 | Formula: 544 | Fst = mean((p1 - p2)^2) / mean(p1 * (1-p2) + p2 * (1-p1)) 545 | """ 546 | try: 547 | num = torch.mean((pop1 - pop2) ** 2) 548 | den = torch.mean(pop1 * (1 - pop2) + pop2 * (1 - pop1)) + 1e-7 549 | return (num / den).item() 550 | except Exception as e: 551 | log.info(f" Error computing Hudson's Fst: {e}") 552 | return float('nan') 553 | -------------------------------------------------------------------------------- /neural_admixture/model/train.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import numpy as np 4 | import sys 5 | import torch 6 | import torch.distributed as dist 7 | 8 | from torch.utils.cpp_extension import load 9 | from sklearn.mixture import GaussianMixture as GaussianMixture 10 | from typing import Tuple 11 | from .neural_admixture import NeuralAdmixture 12 | from ..src.utils_c import utils 13 | 14 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") 15 | logging.getLogger("distributed").setLevel(logging.WARNING) 16 | 17 | log = logging.getLogger(__name__) 18 | 19 | def train(epochs: int, batch_size: int, learning_rate: float, K: int, seed: int, 20 | data: torch.Tensor, device: torch.device, num_gpus: int, hidden_size: int, 21 | master: bool, V: np.ndarray, pops : np.ndarray, min_k: int=None, max_k: int=None, n_components: int=None) -> Tuple[torch.Tensor, torch.Tensor, torch.nn.Module]: 22 | """ 23 | Initializes P and Q matrices and trains a neural admixture model using GMM. 24 | 25 | Args: 26 | epochs (int): Number of epochs 27 | batch_size (int): Batch size. 28 | learning_rate (float): Learning rate. 29 | K (int): Number of components (clusters). 30 | seed (int): Random seed for reproducibility. 31 | data (torch.Tensor): Input data array (samples x features). 32 | device (torch.device): Device for computation (e.g., CPU or GPU). 33 | num_gpus (int): Number of GPUs available. 34 | hidden_size (int): Hidden layer size for the model. 35 | master (bool): Wheter or not this process is the master for printing the output. 36 | V (np.ndarray): V matrix for PCA. 37 | 38 | Returns: 39 | Tuple[torch.Tensor, torch.Tensor, torch.nn.Module]: Initialized P matrix, Q matrix, and trained model. 40 | """ 41 | 42 | N, M = data.shape 43 | 44 | if master: 45 | 46 | if pops is None: 47 | data = data.numpy() 48 | # PCA: 49 | X_pca = np.zeros((N, n_components), dtype=np.float32) 50 | for i in range(0, N, 1024): 51 | end_idx = min(i + 1024, N) 52 | batch = data[i:end_idx, :].astype(np.float32)/2 53 | X_pca[i:end_idx] = batch@V.T 54 | 55 | # GMM: 56 | log.info("") 57 | log.info(" Running Gaussian Mixture in PCA subspace...") 58 | log.info("") 59 | if K is not None: 60 | gmm = GaussianMixture(n_components=K, n_init=5, init_params='k-means++', tol=1e-4, covariance_type='full', max_iter=100, random_state=seed) 61 | gmm.fit(X_pca) 62 | P = np.clip((gmm.means_@V), 5e-6, 1 - 5e-6) 63 | del gmm 64 | else: 65 | gmm_objs = [GaussianMixture(n_components=K, n_init=5, init_params='k-means++', tol=1e-4, covariance_type='full', max_iter=100, random_state=seed).fit(X_pca) for K in range(min_k, max_k + 1)] 66 | P = np.concatenate([np.clip((obj.means_@V), 5e-6, 1 - 5e-6) for obj in gmm_objs], axis=0) 67 | del gmm_objs 68 | del X_pca 69 | 70 | data = torch.as_tensor(data, dtype=torch.uint8, device='cpu' if device.type != 'mps' else 'mps') 71 | else: 72 | data = data.numpy() 73 | 74 | log.info("") 75 | log.info(" Running Supervised Mode...") 76 | log.info("") 77 | ancestry_dict = {anc: idx for idx, anc in enumerate(sorted(np.unique([a for a in pops])))} 78 | assert len(ancestry_dict) == K, f'Number of ancestries in training ground truth ({len(ancestry_dict)}) is not equal to the value of K ({K})' 79 | to_idx_mapper = np.vectorize(lambda x: ancestry_dict[x]) 80 | y_num = to_idx_mapper(pops[:]) 81 | P = np.vstack([data[y_num == idx, :].astype(np.float32).mean(axis=0) for idx in range(K)]) 82 | 83 | data = torch.as_tensor(data, dtype=torch.uint8, device='cpu' if device.type != 'mps' else 'mps') 84 | 85 | if torch.distributed.is_initialized(): 86 | dist.barrier() 87 | 88 | if num_gpus>1: 89 | if master: 90 | P_init = torch.as_tensor(P, dtype=torch.float32, device=device).contiguous() 91 | V = torch.as_tensor(V.T, dtype=torch.float32, device=device).contiguous() 92 | if pops is not None: 93 | pops = torch.as_tensor(y_num, dtype=torch.int64, device=device) 94 | else: 95 | if K is not None: 96 | P_init = torch.empty((K, M), dtype=torch.float32, device=device) 97 | if pops is not None: 98 | pops = torch.empty(len(pops), dtype=torch.int64, device=device) 99 | else: 100 | total_K = sum(range(min_k, max_k + 1)) 101 | P_init = torch.empty((total_K, M), dtype=torch.float32, device=device) 102 | V = torch.empty((M, n_components), dtype=torch.float32, device=device) 103 | 104 | if master: 105 | log.info(" Broadcasting to all GPUs...") 106 | if pops is not None: 107 | dist.broadcast(pops, src=0) 108 | dist.broadcast(P_init, src=0) 109 | dist.broadcast(V, src=0) 110 | dist.barrier() 111 | if master: 112 | log.info(" Finished broadcasting!") 113 | else: 114 | P_init = torch.as_tensor(P, dtype=torch.float32, device=device).contiguous() 115 | V = torch.as_tensor(V.T, dtype=torch.float32, device=device).contiguous() 116 | if pops is not None: 117 | pops = torch.as_tensor(y_num, dtype=torch.int64, device=device) 118 | 119 | if num_gpus>0 and device.type != 'mps': 120 | packed_data = torch.empty((N, (M + 3) // 4), dtype=torch.uint8, device=device) 121 | from neural_admixture import __file__ as installation_dir 122 | from pathlib import Path 123 | source_path = os.path.abspath(f"{Path(installation_dir).parent}/src/utils_c/pack2bit.cu") 124 | pack2bit = load(name="pack2bit", sources=[source_path], verbose=True) 125 | pack2bit.pack2bit_cpu_to_gpu(data, packed_data) 126 | else: 127 | pack2bit = None 128 | packed_data = data 129 | 130 | model = NeuralAdmixture(K, epochs, batch_size, learning_rate, device, seed, num_gpus, master, pack2bit, min_k, max_k) 131 | Qs, Ps, model = model.launch_training(P_init, packed_data, hidden_size, V.shape[1], V, M, N, pops) 132 | 133 | if master: 134 | data = data.cpu().numpy() 135 | if K is not None: 136 | P = np.ascontiguousarray(Ps[0].astype(np.float64)) 137 | Q = np.ascontiguousarray(Qs[0].astype(np.float64)) 138 | logl = utils.loglikelihood(data, P, Q, K) 139 | log.info(f" Log-likelihood: {logl:2f}.") 140 | else: 141 | for i, K in enumerate(range(min_k, max_k + 1)): 142 | P = np.ascontiguousarray(Ps[i].astype(np.float64)) 143 | Q = np.ascontiguousarray(Qs[i].astype(np.float64)) 144 | logl = utils.loglikelihood(data, P, Q, K) 145 | log.info(f" Log-likelihood for K={K}: {logl:2f}.") 146 | del data 147 | del packed_data 148 | return Ps, Qs, model 149 | -------------------------------------------------------------------------------- /neural_admixture/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-sandbox/neural-admixture/270d8ec14135dc5ebbbfc13bc9571178b69c9f31/neural_admixture/src/__init__.py -------------------------------------------------------------------------------- /neural_admixture/src/inference.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import sys 4 | import torch 5 | import time 6 | 7 | from typing import List 8 | 9 | from . import utils 10 | from .loaders import dataloader_admixture 11 | from ..model.neural_admixture import Q_P 12 | 13 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") 14 | log = logging.getLogger(__name__) 15 | 16 | def main(argv: List[str]): 17 | """Inference entry point 18 | """ 19 | 20 | # LOAD ARGUMENTS: 21 | args = utils.parse_infer_args(argv) 22 | 23 | if args.num_gpus > 1: 24 | raise ValueError("Neural Admixture does not support multi-GPU inference. Please set --num_gpus to 1 (for single GPU) or 0 (for CPU only).") 25 | 26 | if args.num_gpus == 1: 27 | if torch.cuda.is_available(): 28 | device = torch.device('cuda:0') 29 | elif torch.backends.mps.is_available(): 30 | device = torch.device('mps') 31 | else: 32 | raise RuntimeError("GPU was specified but no GPU was found. Please set --num_gpus to 0 for CPU only inference.") 33 | else: 34 | device = torch.device('cpu') 35 | 36 | log.info(f" There is {args.num_gpus} GPUs available.") 37 | log.info("") 38 | data_file_str = args.data_path 39 | out_name = args.out_name 40 | model_file_str = f'{args.save_dir}/{args.name}.pt' 41 | config_file_str = f'{args.save_dir}/{args.name}_config.json' 42 | seed = int(args.seed) 43 | batch_size_inference_Q = int(args.batch_size) 44 | generator = torch.Generator().manual_seed(seed) 45 | 46 | # LOAD MODEL: 47 | try: 48 | with open(config_file_str, 'r') as fb: 49 | config = json.load(fb) 50 | except FileNotFoundError as _: 51 | log.error(f" Config file ({config_file_str}) not found. Make sure it is in the correct directory and with the correct name.") 52 | return 1 53 | except Exception as e: 54 | raise e 55 | 56 | log.info(" Model config file loaded. Loading weights...") 57 | state_dict = torch.load(model_file_str, map_location=device, weights_only=True) 58 | V = state_dict.get("V") 59 | model = Q_P(int(config['hidden_size']), int(config['num_features']), ks_list=config['ks'], V=V, is_train=False) 60 | model.load_state_dict(state_dict) 61 | model.to(device) 62 | log.info("") 63 | log.info(" Model weights loaded.") 64 | log.info("") 65 | 66 | # LOAD DATA: 67 | t0 = time.time() 68 | data, *_ = utils.read_data(data_file_str) 69 | data = torch.as_tensor(data, dtype=torch.uint8, device=device) 70 | 71 | # INFERENCE: 72 | model.eval() 73 | Qs = [torch.tensor([], device=device) for _ in config['ks']] 74 | log.info(" Running inference...") 75 | with torch.inference_mode(): 76 | pops = torch.zeros(data.size(0), device=device) 77 | dataloader = dataloader_admixture(data, batch_size_inference_Q, args.num_gpus, seed, generator, pops=pops, shuffle=False) 78 | for x_step, _ in dataloader: 79 | probs, _ = model(x_step) 80 | for i, _ in enumerate(config['ks']): 81 | Qs[i]= torch.cat((Qs[i], probs[i]), dim=0) 82 | log.info(" Inference run successfully! Writing outputs...!") 83 | 84 | # WRITE OUTPUTS: 85 | K = config['ks'][0] 86 | if len(config['ks'])==1: 87 | K = config['ks'][0] 88 | min_k = None 89 | max_k = None 90 | else: 91 | K = None 92 | min_k = config['ks'][0] 93 | max_k = config['ks'][-1] 94 | 95 | Qs = [Q.cpu().numpy() for Q in Qs] 96 | utils.write_outputs(Qs, out_name, K, min_k, max_k, args.save_dir) 97 | 98 | t1 = time.time() 99 | log.info("") 100 | log.info(f" Total elapsed time: {t1-t0:.2f} seconds.") 101 | log.info("") 102 | 103 | logging.shutdown() 104 | 105 | if __name__ == '__main__': 106 | main(sys.argv[1:]) 107 | -------------------------------------------------------------------------------- /neural_admixture/src/loaders.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler 4 | from torch.utils.data.distributed import DistributedSampler 5 | from typing import Tuple 6 | 7 | # DATALOADER: 8 | def dataloader_admixture(X: torch.Tensor, batch_size: int, num_gpus: int, seed: int, 9 | generator: torch.Generator, pops: torch.Tensor, shuffle: bool): 10 | """ 11 | Creates a DataLoader with batch sampler or distributed sampler for the phase 2. 12 | 13 | Parameters: 14 | - X (torch.Tensor): Input tensor X used in the `Dataset_f2`. 15 | - input (torch.Tensor): Additional input tensor used in the dataset. 16 | - batch_size (int): Size of each batch. 17 | - num_gpus (int): Number of GPUs available for distributed training. 18 | - seed (int): Seed for random number generation. 19 | - generator (torch.Generator): Random number generator instance. 20 | - pin (bool): Whether to pin memory for data loading. 21 | 22 | Returns: 23 | - Tuple[DataLoader, Union[BatchSampler, DistributedSampler]]: DataLoader object and the used sampler. 24 | """ 25 | dataset = Dataset_admixture(X, pops) 26 | if num_gpus > 1: 27 | sampler = DistributedSampler(dataset, shuffle=True, seed=seed) 28 | else: 29 | if shuffle: 30 | sampler = RandomSampler(dataset, generator=generator) 31 | else: 32 | sampler = SequentialSampler(dataset) 33 | loader = DataLoader(dataset, sampler=sampler, batch_size=batch_size) 34 | 35 | return loader 36 | 37 | # P2 DATASET: 38 | class Dataset_admixture(Dataset): 39 | """ 40 | Dataset for phase 2 of Neural Admixture. 41 | 42 | Args: 43 | X (torch.Tensor): The main data tensor. 44 | input (torch.Tensor): The input tensor associated with the data. 45 | """ 46 | def __init__(self, X: torch.Tensor, pops: torch.Tensor): 47 | """ 48 | Args: 49 | X (torch.Tensor): The main data tensor. 50 | input (torch.Tensor): The input tensor associated with the data. 51 | """ 52 | self.X = X 53 | self.pops = pops 54 | 55 | def __len__(self) -> int: 56 | """ 57 | Returns: 58 | int: The number of elements in the dataset. 59 | """ 60 | return self.X.shape[0] 61 | 62 | def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]: 63 | """ 64 | Args: 65 | idx (int): Index of the item to retrieve. 66 | 67 | Returns: 68 | Tuple[torch.Tensor, torch.Tensor]: A tuple containing `batch_X` and `batch_input` tensors. 69 | """ 70 | batch_X = self.X[idx] 71 | batch_pops = self.pops[idx] 72 | return batch_X, batch_pops 73 | -------------------------------------------------------------------------------- /neural_admixture/src/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import sys 4 | import time 5 | import torch 6 | import numpy as np 7 | 8 | from pathlib import Path 9 | from typing import List 10 | from argparse import ArgumentError, ArgumentTypeError 11 | from pathlib import Path 12 | 13 | from . import utils 14 | from ..model.train import train 15 | 16 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") 17 | log = logging.getLogger(__name__) 18 | 19 | def fit_model(args: argparse.Namespace, data: torch.Tensor, device: torch.device, num_gpus: int, 20 | master: bool, V: np.ndarray, pops: np.ndarray) -> None: 21 | """ 22 | Wrapper function to start training 23 | """ 24 | (epochs, batch_size, learning_rate, save_dir, hidden_size, name, seed, n_components) = (int(args.epochs), int(args.batch_size), float(args.learning_rate), args.save_dir, 25 | int(args.hidden_size), args.name, int(args.seed), int(args.n_components)) 26 | 27 | if args.k is not None: 28 | K = int(args.k) 29 | min_k = None 30 | max_k = None 31 | else: 32 | min_k = int(args.min_k) 33 | max_k = int(args.max_k) 34 | K = None 35 | 36 | Ps, Qs, model = train(epochs, batch_size, learning_rate, K, seed, data, device, num_gpus, hidden_size, master, V, pops, min_k, max_k, n_components) 37 | 38 | if master: 39 | Path(save_dir).mkdir(parents=True, exist_ok=True) 40 | save_path = f'{save_dir}/{name}.pt' 41 | state_dict = {key: value for key, value in model.state_dict().items() if not key.startswith('decoders')} 42 | torch.save(state_dict, save_path) 43 | model.save_config(name, save_dir) 44 | utils.write_outputs(Qs, name, K, min_k, max_k, save_dir, Ps) 45 | 46 | return 47 | 48 | """ 49 | def perform_cross_validation(args: argparse.Namespace, trX: da.core.Array, device: torch.device, num_gpus: int, 50 | master: bool) -> None: 51 | 52 | Perform cross-validation and log the results. 53 | 54 | Args: 55 | args: A namespace object containing command-line arguments. 56 | trX: Training data. 57 | device: A string representing the device ('cuda:0', 'cpu', etc.) 58 | num_gpus: Number of GPUs. 59 | 60 | if master: 61 | log.info(f'Performing {args.cv}-fold cross-validation...') 62 | cv_obj = KFold(n_splits=args.cv, random_state=args.seed, shuffle=True) 63 | cv_errs = [] 64 | 65 | for tr_idx, val_idx in tqdm(cv_obj.split(trX), desc="Cross-Validation"): 66 | with dask.config.set(**{'array.slicing.split_large_chunks': True}): 67 | trX_curr, valX_curr = trX[tr_idx], trX[val_idx] 68 | 69 | for k in range (args.k_range[0], args.k_range[1]): 70 | loglikelihood = fit_model(args, trX_curr, valX_curr, device, num_gpus, master, k=k) 71 | cv_errs.append(loglikelihood) 72 | 73 | cv_errs_reduced = utils.process_cv_loglikelihood(cv_errs) 74 | 75 | if master: 76 | for _, row in cv_errs_reduced.iterrows(): 77 | log.info(f"CV error (K={int(row['K'])}): {row['cv_error_mean']:.5f} ± {row['cv_error_std']:.3f}") 78 | 79 | utils.save_cv_error_plot(cv_errs_reduced, args.save_dir) 80 | """ 81 | 82 | def main(rank: int, argv: List[str], num_gpus: int, data: torch.Tensor, V: np.ndarray, pops: np.ndarray): 83 | """ 84 | Training entry point 85 | """ 86 | utils.ddp_setup('begin', rank, num_gpus) 87 | master = rank == 0 88 | 89 | try: 90 | if any(arg in argv for arg in ['-h', '--help']): 91 | if master: 92 | args = utils.parse_train_args(argv) 93 | return 94 | args = utils.parse_train_args(argv) 95 | 96 | if num_gpus>0: 97 | if torch.cuda.is_available(): 98 | device = torch.device(f'cuda:{int(rank)}') 99 | elif torch.backends.mps.is_available(): 100 | device = torch.device('mps') 101 | else: 102 | device = torch.device('cpu') 103 | else: 104 | device = torch.device('cpu') 105 | 106 | if master: 107 | log.info(f" There are {args.num_cpus} CPUs and {num_gpus} GPUs available for this execution.") 108 | log.info("") 109 | if args.k is not None: 110 | log.info(f" Running on K = {args.k}.") 111 | else: 112 | assert args.min_k is not None and args.max_k is not None, "You must provide either K or both min_k and max_k." 113 | min_k = int(args.min_k) 114 | max_k = int(args.max_k) 115 | assert min_k < max_k, f"min_k ({min_k}) must be less than max_k ({max_k})." 116 | log.info(f" Running from K={min_k} to K={max_k}.") 117 | log.info("") 118 | Path(args.save_dir).mkdir(parents=True, exist_ok=True) 119 | 120 | t0 = time.time() 121 | 122 | #if args.cv is not None: 123 | # perform_cross_validation(args, trX, device, num_gpus, master) 124 | 125 | fit_model(args, data, device, num_gpus, master, V, pops) 126 | 127 | if master: 128 | t1 = time.time() 129 | log.info("") 130 | log.info(f" Total elapsed time: {t1-t0:.2f} seconds.") 131 | log.info("") 132 | 133 | logging.shutdown() 134 | utils.ddp_setup('end', rank, num_gpus) 135 | 136 | except (ArgumentError, ArgumentTypeError) as e: 137 | if master: 138 | log.error(f" Error parsing arguments") 139 | logging.shutdown() 140 | utils.ddp_setup('end', rank, num_gpus) 141 | if master: 142 | raise e 143 | 144 | except Exception as e: 145 | if master: 146 | log.error(f" Unexpected error") 147 | logging.shutdown() 148 | utils.ddp_setup('end', rank, num_gpus) 149 | if master: 150 | raise e 151 | 152 | if __name__ == '__main__': 153 | main(sys.argv[1:]) -------------------------------------------------------------------------------- /neural_admixture/src/snp_reader.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import numpy as np 3 | import sys 4 | 5 | from .utils_c import utils 6 | from math import ceil 7 | from pathlib import Path 8 | 9 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") 10 | log = logging.getLogger(__name__) 11 | 12 | class SNPReader: 13 | """Wrapper to read genotype data from several formats 14 | """ 15 | 16 | def _read_bed(self, file: str) -> np.ndarray: 17 | """Reader wrapper for BED files 18 | 19 | Args: 20 | file (str): path to file. 21 | master (bool): Wheter or not this process is the master for printing the output. 22 | 23 | Returns: 24 | np.ndarray: averaged genotype Dask array of shape (n_samples, n_snps) 25 | """ 26 | log.info(" Input format is BED.") 27 | 28 | file_path = Path(file) 29 | fam_file = file_path.with_suffix(".fam") 30 | bed_file = file_path.with_suffix(".bed") 31 | 32 | with open(fam_file, "r") as fam: 33 | N = sum(1 for _ in fam) 34 | N_bytes = ceil(N / 4) 35 | 36 | with open(bed_file, "rb") as bed: 37 | B = np.fromfile(bed, dtype=np.uint8, offset=3) 38 | 39 | assert (B.shape[0] % N_bytes) == 0, "bim file doesn't match!" 40 | M = B.shape[0] // N_bytes 41 | B.shape = (M, N_bytes) 42 | 43 | G = np.zeros((N, M), dtype=np.uint8) 44 | utils.read_bed(B, G) 45 | del B 46 | return G 47 | 48 | def _read_pgen(self, file: str, master: bool) -> np.ndarray: 49 | """Reader wrapper for PGEN files""" 50 | log.info(" Input format is PGEN.") 51 | try: 52 | import pgenlib as pg 53 | except ImportError: 54 | if master: 55 | log.error(" Cannot read PGEN file as pgenlib is not installed.") 56 | sys.exit(1) 57 | 58 | pgen_reader = pg.PgenReader(str.encode(file)) 59 | num_vars = pgen_reader.get_variant_ct() 60 | num_samples = pgen_reader.get_raw_sample_ct() 61 | 62 | calldata = np.empty((num_vars, 2 * num_samples), dtype=np.uint8) 63 | pgen_reader.read_alleles_range(0, num_vars, calldata) 64 | 65 | return np.ascontiguousarray((calldata[:, ::2] + calldata[:, 1::2]).T).astype(np.uint8) 66 | 67 | def _read_vcf(self, file: str) -> np.ndarray: 68 | """Reader wrapper for VCF files 69 | 70 | Args: 71 | file (str): path to file. 72 | master (bool): Wheter or not this process is the master for printing the output. 73 | 74 | Returns: 75 | np.ndarray: averaged genotype array of shape (n_samples, n_snps) 76 | """ 77 | log.info(" Input format is VCF.") 78 | import allel 79 | f_tr = allel.read_vcf(file) 80 | calldata = f_tr["calldata/GT"].astype(np.uint8) 81 | return np.ascontiguousarray(np.sum(calldata, axis=2, dtype=np.uint8).T) 82 | 83 | def read_data(self, file: str) -> np.ndarray: 84 | """Wrapper of readers 85 | 86 | Args: 87 | file (str): path to file 88 | Returns: 89 | np.ndarray: averaged genotype numpy array of shape (n_samples, n_snps) 90 | """ 91 | file_extensions = Path(file).suffixes 92 | 93 | if '.bed' in file_extensions: 94 | G = self._read_bed(file) 95 | elif '.pgen' in file_extensions: 96 | G = self._read_pgen(file) 97 | elif '.vcf' in file_extensions: 98 | G = self._read_vcf(file) 99 | else: 100 | log.error(" Invalid format. Unrecognized file format. Make sure file ends with .bed, .pgen or .vcf .") 101 | sys.exit(1) 102 | return G if G.mean() < 1 else 2 - G 103 | -------------------------------------------------------------------------------- /neural_admixture/src/svd.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import time 4 | import numpy as np 5 | 6 | from .utils_c import rsvd 7 | 8 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") 9 | log = logging.getLogger(__name__) 10 | 11 | 12 | # ----------------------------------------------------------------------------- 13 | # High-level randomized SVD function 14 | # ----------------------------------------------------------------------------- 15 | 16 | def svd_flip(V, U): 17 | """ 18 | Adjust signs of V rows based on dominant signs in U columns to ensure consistent SVD output. 19 | 20 | Parameters: 21 | ----------- 22 | V : np.ndarray 23 | Matrix (e.g., V or Vt) to flip signs on rows. 24 | U : np.ndarray 25 | Left singular vectors matrix to determine sign direction. 26 | 27 | Returns: 28 | -------- 29 | np.ndarray 30 | Sign-corrected version of V. 31 | """ 32 | k_components = U.shape[1] 33 | max_abs_val_row_indices = np.argmax(np.abs(U), axis=0) 34 | col_selector = np.arange(k_components) 35 | elements_for_sign = U[max_abs_val_row_indices, col_selector] 36 | signs = np.sign(elements_for_sign) 37 | return V * signs[:, np.newaxis] 38 | 39 | def RSVD(A_uint8, N, M, k=8, seed=42, oversampling=10, power_iterations=2): 40 | """ 41 | Randomized SVD para matrices uint8 de forma (n_features, m_samples). 42 | Retorna Vt_k de forma (k, m_samples). 43 | """ 44 | rng = np.random.default_rng(seed) 45 | k_prime = max(k + oversampling, 20) 46 | 47 | total_start_time = time.time() 48 | log.info(" 1) Generating Ω y Y = A @ Ω...") 49 | Omega = rng.standard_normal(size=(M, k_prime), dtype=np.float32) 50 | Y = rsvd.multiply_A_omega(A_uint8, Omega) 51 | log.info(f" Time={time.time() - total_start_time:.4f}s") 52 | 53 | if power_iterations > 0: 54 | iter_start = time.time() 55 | for _ in range(power_iterations): 56 | Q_y, _ = np.linalg.qr(Y, mode='reduced') # (n, k_prime) 57 | Q_y = np.ascontiguousarray(Q_y.T) 58 | B_tmp = rsvd.multiply_QT_A(Q_y, A_uint8) # (k_prime, m) 59 | B_tmp = np.ascontiguousarray(B_tmp.T) 60 | Y = rsvd.multiply_A_omega(A_uint8, B_tmp) # (n, k_prime) 61 | log.info(f" Power iterations time={time.time() - iter_start:.4f}s") 62 | 63 | log.info(" 2) QR of Y...") 64 | qr_start = time.time() 65 | Q, _ = np.linalg.qr(Y, mode='reduced') # (n, k_prime) 66 | log.info(f" Time={time.time() - qr_start:.4f}s") 67 | 68 | log.info(" 3) B = Qᵀ @ A...") 69 | b_start = time.time() 70 | Q = np.ascontiguousarray(Q.T) 71 | B = rsvd.multiply_QT_A(Q, A_uint8) # (k_prime, m) 72 | log.info(f" Time={time.time() - b_start:.4f}s") 73 | 74 | log.info(" 4) SVD of B...") 75 | svd_start = time.time() 76 | Ut, _, Vt = np.linalg.svd(B, full_matrices=False) 77 | log.info(f" SVD time={time.time() - svd_start:.4f}s") 78 | 79 | Vt = svd_flip(Vt, Ut) 80 | 81 | log.info("") 82 | log.info(f" Total time SVD: {time.time() - total_start_time:.4f}s") 83 | log.info("") 84 | return Vt[:k, :] -------------------------------------------------------------------------------- /neural_admixture/src/utils.py: -------------------------------------------------------------------------------- 1 | import configargparse 2 | import logging 3 | import random 4 | import os 5 | import sys 6 | import numpy as np 7 | import torch 8 | 9 | from pathlib import Path 10 | from typing import List 11 | 12 | from .snp_reader import SNPReader 13 | 14 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") 15 | log = logging.getLogger(__name__) 16 | 17 | def parse_train_args(argv: List[str]): 18 | """Training arguments parser 19 | """ 20 | parser = configargparse.ArgumentParser(prog='neural-admixture train', 21 | description='Rapid population clustering with autoencoders - training mode', 22 | config_file_parser_class=configargparse.YAMLConfigFileParser) 23 | 24 | parser.add_argument('--epochs', required=False, type=int, default=250, help='Maximum number of epochs.') 25 | parser.add_argument('--batch_size', required=False, default=800, type=int, help='Batch size.') 26 | parser.add_argument('--learning_rate', required=False, default=20e-4, type=float, help='Learning rate.') 27 | 28 | parser.add_argument('--seed', required=False, type=int, default=42, help='Seed') 29 | parser.add_argument('--k', required=False, type=int, help='Number of populations/clusters.') 30 | parser.add_argument('--min_k', required=False, type=int, help='Minimum number of populations/clusters (multi-head)') 31 | parser.add_argument('--max_k', required=False, type=int, help='Maximum number of populations/clusters (multi-head)') 32 | parser.add_argument('--hidden_size', required=False, default=1024, type=int, help='Dimension of first projection in encoder.') 33 | parser.add_argument('--save_dir', required=True, type=str, help='Save model in this directory') 34 | parser.add_argument('--data_path', required=True, type=str, help='Path containing the main data') 35 | parser.add_argument('--name', required=True, type=str, help='Experiment/model name') 36 | 37 | parser.add_argument('--supervised_loss_weight', required=False, default=100, type=float, help='Weight given to the supervised loss') 38 | parser.add_argument('--pops_path', required=False, default='', type=str, help='Path containing the main data populations') 39 | 40 | parser.add_argument('--n_components', required=False, type=int, default=8, help='Number of components to use for the SVD initialization.') 41 | 42 | parser.add_argument('--num_gpus', required=False, default=0, type=int, help='Number of GPUs to be used in the execution.') 43 | parser.add_argument('--num_cpus', required=False, default=1, type=int, help='Number of CPUs to be used in the execution.') 44 | 45 | #parser.add_argument('--cv', required=False, default=None, type=int, help='Number of folds for cross-validation') 46 | return parser.parse_args(argv) 47 | 48 | def parse_infer_args(argv: List[str]): 49 | """Inference arguments parser 50 | """ 51 | parser = configargparse.ArgumentParser(prog='neural-admixture infer', 52 | description='Rapid population clustering with autoencoders - inference mode', 53 | config_file_parser_class=configargparse.YAMLConfigFileParser) 54 | parser.add_argument('--out_name', required=True, type=str, help='Name used to output files on inference mode.') 55 | parser.add_argument('--save_dir', required=True, type=str, help='Load model from this directory.') 56 | parser.add_argument('--data_path', required=True, type=str, help='Path containing the main data.') 57 | parser.add_argument('--name', required=True, type=str, help='Trained experiment/model name.') 58 | parser.add_argument('--batch_size', required=False, default=1000, type=int, help='Batch size.') 59 | parser.add_argument('--seed', required=False, type=int, default=42, help='Seed') 60 | 61 | parser.add_argument('--num_cpus', required=False, default=1, type=int, help='Number of CPUs to be used in the execution.') 62 | parser.add_argument('--num_gpus', required=False, default=0, type=int, help='Number of GPUs to be used in the execution.') 63 | return parser.parse_args(argv) 64 | 65 | def read_data(tr_file: str, tr_pops_f: str=None) -> np.ndarray: 66 | """ 67 | Reads SNP data from a file and applies imputation if specified.. 68 | 69 | Args: 70 | tr_file (str): Path to the SNP data file. 71 | 72 | Returns: 73 | np.ndarray: A numpy array containing the SNP data. 74 | """ 75 | snp_reader = SNPReader() 76 | data = snp_reader.read_data(tr_file) 77 | log.info(f" Data contains {data.shape[0]} samples and {data.shape[1]} SNPs.") 78 | if tr_pops_f: 79 | log.info(" Population file provided!") 80 | with open(tr_pops_f, 'r') as fb: 81 | pops = [p.strip() for p in fb.readlines()] 82 | else: 83 | pops = None 84 | return data, pops, data.shape[0], data.shape[1] 85 | 86 | def write_outputs(Qs: np.ndarray, run_name: str, K: int, min_k: int, max_k: int, out_path: str, Ps: np.ndarray = None) -> None: 87 | """ 88 | Save the Q and optional P matrices to specified output files. 89 | 90 | Args: 91 | Qs (list of numpy.ndarray): List of Q matrices to be saved. 92 | run_name (str): Identifier for the run, used in file naming. 93 | K (int): Number of clusters, included in the file name. 94 | min_k (int): Minimum number of clusters (for range output). 95 | max_k (int): Maximum number of clusters (for range output). 96 | out_path (str or Path): Directory where the output files should be saved. 97 | Ps (list of numpy.ndarray, optional): List of P matrices to be saved, if provided. 98 | 99 | Returns: 100 | None 101 | """ 102 | out_path = Path(out_path) 103 | out_path.mkdir(parents=True, exist_ok=True) 104 | 105 | if K is not None: 106 | np.savetxt(out_path / f"{run_name}.{K}.Q", Qs[0], delimiter=' ') 107 | if Ps is not None: 108 | np.savetxt(out_path / f"{run_name}.{K}.P", Ps[0], delimiter=' ') 109 | log.info(" Q and P matrices saved.") 110 | else: 111 | log.info(" Q matrix saved.") 112 | else: 113 | for i, K in enumerate(range(min_k, max_k + 1)): 114 | np.savetxt(out_path / f"{run_name}.{K}.Q", Qs[i], delimiter=' ') 115 | if Ps is not None: 116 | np.savetxt(out_path / f"{run_name}.{K}.P", Ps[i], delimiter=' ') 117 | log.info(" Q and P matrices saved for all K." if Ps is not None else " Q matrices saved for all K.") 118 | 119 | def ddp_setup(stage: str, rank: int, world_size: int) -> None: 120 | """ 121 | Set up the distributed environment for training. 122 | 123 | Args: 124 | stage (str): Either 'begin' to initialize or 'end' to finalize the distributed process group. 125 | rank (int): The rank (ID) of the current process. 126 | world_size (int): The total number of processes participating in the training. 127 | 128 | Returns: 129 | None 130 | """ 131 | if world_size > 1: 132 | if stage == 'begin': 133 | os.environ["MASTER_ADDR"] = os.environ.get("MASTER_ADDR", "127.0.0.1") 134 | os.environ["MASTER_PORT"] = os.environ.get("MASTER_PORT", "29500") 135 | 136 | torch.cuda.set_device(rank % torch.cuda.device_count()) 137 | 138 | torch.distributed.init_process_group( 139 | backend="nccl", 140 | init_method="env://", 141 | rank=rank, 142 | world_size=world_size 143 | ) 144 | else: 145 | torch.distributed.destroy_process_group() 146 | 147 | def set_seed(seed: int) -> None: 148 | """ 149 | Set the seed for random number generators to ensure reproducibility. 150 | 151 | Args: 152 | seed (int): Seed value. 153 | 154 | Returns: 155 | None 156 | """ 157 | torch.manual_seed(seed) 158 | torch.cuda.manual_seed_all(seed) 159 | np.random.seed(seed) 160 | random.seed(seed) 161 | torch.backends.cudnn.deterministic = True 162 | torch.backends.cudnn.benchmark = False 163 | -------------------------------------------------------------------------------- /neural_admixture/src/utils_c/__int__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-sandbox/neural-admixture/270d8ec14135dc5ebbbfc13bc9571178b69c9f31/neural_admixture/src/utils_c/__int__.py -------------------------------------------------------------------------------- /neural_admixture/src/utils_c/pack2bit.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include // Para std::min 6 | 7 | #define THREADS_PER_BLOCK 256 8 | #define MAX_ROWS_PER_BATCH 1024 9 | 10 | __global__ void pack2bit_kernel(const uint8_t* __restrict__ input, uint8_t* __restrict__ output, 11 | int batch_rows, int M, int packed_cols) { 12 | // Calcular índices globales 13 | int row = blockIdx.y; 14 | int chunk_idx = blockIdx.x * blockDim.x + threadIdx.x; 15 | 16 | // Salir si estamos fuera de los límites del batch 17 | if (row >= batch_rows || chunk_idx >= packed_cols) 18 | return; 19 | 20 | int row_offset_in = row * M; 21 | int row_offset_out = row * packed_cols; 22 | int in_col = chunk_idx * 4; // Cada hilo procesa 4 valores 23 | 24 | // Empaquetamiento 25 | uint8_t packed = 0; 26 | for (int i = 0; i < 4; ++i) { 27 | int idx = in_col + i; 28 | if (idx < M) { // Asegurar que no excedemos las columnas originales M 29 | uint8_t val = input[row_offset_in + idx] & 0x03; // Tomar solo los 2 bits menos significativos 30 | packed |= (val << (i * 2)); 31 | } 32 | } 33 | 34 | // Escribir resultado 35 | output[row_offset_out + chunk_idx] = packed; 36 | } 37 | 38 | __global__ void unpack2bit_kernel(const uint8_t* __restrict__ input, uint8_t* __restrict__ output, 39 | int batch_rows, int M, int packed_cols) { 40 | // Calcular índices globales 41 | int row = blockIdx.y; 42 | int chunk_idx = blockIdx.x * blockDim.x + threadIdx.x; 43 | 44 | // Salir si estamos fuera de los límites del batch 45 | if (row >= batch_rows || chunk_idx >= packed_cols) 46 | return; 47 | 48 | int row_offset_in = row * packed_cols; 49 | int row_offset_out = row * M; 50 | int out_col = chunk_idx * 4; // Cada valor empaquetado contiene 4 valores 51 | 52 | // Obtener valor empaquetado 53 | uint8_t packed = input[row_offset_in + chunk_idx]; 54 | 55 | // Desempaquetar (4 valores por byte) 56 | for (int i = 0; i < 4; ++i) { 57 | int idx = out_col + i; 58 | if (idx < M) { // Asegurar que no excedemos las columnas originales M 59 | output[row_offset_out + idx] = (packed >> (i * 2)) & 0x03; // Extraer los 2 bits correspondientes 60 | } 61 | } 62 | } 63 | 64 | // Función para comprimir datos desde CPU y dejar el resultado en GPU 65 | void pack2bit_cpu_to_gpu_cuda(torch::Tensor input_cpu, torch::Tensor output_gpu) { 66 | // Verificar que los tensores estén en los dispositivos correctos 67 | TORCH_CHECK(input_cpu.device().is_cpu(), "Input tensor must be on CPU"); 68 | TORCH_CHECK(output_gpu.device().is_cuda(), "Output tensor must be on CUDA device"); 69 | 70 | int N = input_cpu.size(0); 71 | int M = input_cpu.size(1); 72 | int packed_cols = (M + 3) / 4; 73 | 74 | // Verificar que las dimensiones coinciden 75 | TORCH_CHECK(output_gpu.size(0) == N, "Output tensor row dimension mismatch"); 76 | TORCH_CHECK(output_gpu.size(1) == packed_cols, "Output tensor column dimension mismatch"); 77 | 78 | // Determinar el tamaño del batch 79 | int rows_per_batch = std::min(MAX_ROWS_PER_BATCH, N); 80 | 81 | // Crear tensor temporal para batch de entrada en GPU 82 | auto options_input = torch::TensorOptions().dtype(torch::kUInt8).device(torch::kCUDA); 83 | auto input_gpu_batch_temp = torch::empty({rows_per_batch, M}, options_input); 84 | 85 | // Configuración de grid y bloques 86 | dim3 threads(THREADS_PER_BLOCK); 87 | 88 | // Procesar por lotes 89 | for (int start_row = 0; start_row < N; start_row += rows_per_batch) { 90 | int current_batch_rows = std::min(rows_per_batch, N - start_row); 91 | 92 | // Crear vistas para el batch actual en CPU y GPU 93 | auto input_cpu_batch_view = input_cpu.slice(0, start_row, start_row + current_batch_rows); 94 | 95 | // Redimensionar la vista del tensor GPU temporal si este batch es más pequeño que el máximo 96 | auto input_gpu_batch_view = (current_batch_rows < rows_per_batch) ? 97 | input_gpu_batch_temp.slice(0, 0, current_batch_rows) : input_gpu_batch_temp; 98 | 99 | // Vista del tensor de salida final en GPU para este batch 100 | auto output_gpu_batch_view = output_gpu.slice(0, start_row, start_row + current_batch_rows); 101 | 102 | // Copiar batch de CPU a GPU temporal 103 | input_gpu_batch_view.copy_(input_cpu_batch_view); 104 | 105 | // Configurar grid para este batch 106 | dim3 blocks((packed_cols + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK, current_batch_rows); 107 | 108 | // Lanzar kernel usando los datos del tensor temporal en GPU como entrada 109 | pack2bit_kernel<<>>( 110 | input_gpu_batch_view.data_ptr(), 111 | output_gpu_batch_view.data_ptr(), 112 | current_batch_rows, M, packed_cols 113 | ); 114 | 115 | cudaDeviceSynchronize(); 116 | } 117 | } 118 | 119 | // Función para descomprimir datos desde GPU y dejar el resultado en GPU 120 | void unpack2bit_gpu_to_gpu(torch::Tensor input_gpu, torch::Tensor output_gpu) { 121 | TORCH_CHECK(input_gpu.device().is_cuda(), "Input tensor must be on CUDA device"); 122 | TORCH_CHECK(output_gpu.device().is_cuda(), "Output tensor must be on CUDA device"); 123 | TORCH_CHECK(input_gpu.get_device() == output_gpu.get_device(), "Input and Output tensors must be on the same CUDA device"); 124 | 125 | int N = output_gpu.size(0); 126 | int M = output_gpu.size(1); 127 | int packed_cols = (M + 3) / 4; 128 | 129 | TORCH_CHECK(input_gpu.size(0) == N, "Input tensor row dimension mismatch"); 130 | TORCH_CHECK(input_gpu.size(1) == packed_cols, "Input tensor column dimension mismatch based on output shape"); 131 | 132 | dim3 threads(THREADS_PER_BLOCK); 133 | dim3 blocks((packed_cols + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK, N); 134 | 135 | unpack2bit_kernel<<>>( 136 | input_gpu.data_ptr(), 137 | output_gpu.data_ptr(), 138 | N, M, packed_cols 139 | ); 140 | 141 | cudaDeviceSynchronize(); 142 | } 143 | 144 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 145 | m.def("pack2bit_cpu_to_gpu", &pack2bit_cpu_to_gpu_cuda, "Pack 2-bit values from CPU tensor to GPU tensor (batched)"); 146 | m.def("unpack2bit_gpu_to_gpu", &unpack2bit_gpu_to_gpu, "Unpack 2-bit values from GPU tensor to GPU tensor (batched)"); 147 | } -------------------------------------------------------------------------------- /neural_admixture/src/utils_c/rsvd.pyx: -------------------------------------------------------------------------------- 1 | # cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True, infer_types=True 2 | 3 | import time 4 | import logging 5 | import numpy as np 6 | cimport numpy as np 7 | cimport cython 8 | from cython.parallel import prange 9 | 10 | np.import_array() 11 | 12 | # ----------------------------------------------------------------------------- 13 | # Cython kernels 14 | # ----------------------------------------------------------------------------- 15 | 16 | cpdef inline void _multiply_A_omega_uint8_float_parallel( 17 | const np.uint8_t[:, ::1] A_view, 18 | const np.float32_t[:, ::1] Omega_view, 19 | np.float32_t[:, ::1] Y_view, 20 | int n_rows_A, 21 | int m_cols_A, 22 | int k_prime) nogil: 23 | cdef: 24 | int i, j, l 25 | float temp_sum 26 | 27 | for i in prange(n_rows_A, nogil=True, schedule='guided'): 28 | for j in range(k_prime): 29 | temp_sum = 0.0 30 | for l in range(m_cols_A): 31 | temp_sum = temp_sum + A_view[i, l] * Omega_view[l, j] 32 | Y_view[i, j] = temp_sum 33 | 34 | cpdef inline void _multiply_QT_A_float_uint8_parallel( 35 | const np.float32_t[:, ::1] QT_view, 36 | const np.uint8_t[:, ::1] A_view, 37 | np.float32_t[:, ::1] B_view, 38 | int k_prime_rows_QT, 39 | int n_rows_A, 40 | int m_cols_A) nogil: 41 | cdef: 42 | int i, j, l 43 | float temp_sum 44 | 45 | for i in prange(k_prime_rows_QT, nogil=True, schedule='guided'): 46 | for j in range(m_cols_A): 47 | temp_sum = 0.0 48 | for l in range(n_rows_A): 49 | temp_sum = temp_sum + QT_view[i, l] * A_view[l, j] 50 | B_view[i, j] = temp_sum 51 | 52 | # ----------------------------------------------------------------------------- 53 | # Python-callable wrappers 54 | # ----------------------------------------------------------------------------- 55 | 56 | def multiply_A_omega(np.ndarray[np.uint8_t, ndim=2, mode="c"] A_np, 57 | np.ndarray[np.float32_t, ndim=2, mode="c"] Omega_np): 58 | """ 59 | Multiplica A_np (n_rows_A x m_cols_A) por Omega_np (m_cols_A x k_prime) 60 | retornando Y_np (n_rows_A x k_prime). 61 | """ 62 | cdef int n_rows_A = A_np.shape[0] 63 | cdef int m_cols_A = A_np.shape[1] 64 | cdef int k_prime = Omega_np.shape[1] 65 | cdef np.ndarray[np.float32_t, ndim=2, mode="c"] Y_np = \ 66 | np.zeros((n_rows_A, k_prime), dtype=np.float32) 67 | 68 | _multiply_A_omega_uint8_float_parallel( 69 | A_np, Omega_np, Y_np, 70 | n_rows_A, m_cols_A, k_prime) 71 | return Y_np 72 | 73 | 74 | def multiply_QT_A(np.ndarray[np.float32_t, ndim=2, mode="c"] QT_np, 75 | np.ndarray[np.uint8_t, ndim=2, mode="c"] A_np): 76 | """ 77 | Multiplica QT_np (k_prime x n_rows_A) por A_np (n_rows_A x m_cols_A) 78 | retornando B_np (k_prime x m_cols_A). 79 | """ 80 | cdef int k_prime_rows_QT = QT_np.shape[0] 81 | cdef int n_rows_A = A_np.shape[0] 82 | cdef int m_cols_A = A_np.shape[1] 83 | if QT_np.shape[1] != n_rows_A: 84 | raise ValueError( 85 | "Dimensiones incompatibles: QT_np.shape[1] debe ser igual a A_np.shape[0]") 86 | 87 | cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B_np = \ 88 | np.zeros((k_prime_rows_QT, m_cols_A), dtype=np.float32) 89 | 90 | _multiply_QT_A_float_uint8_parallel( 91 | QT_np, A_np, B_np, 92 | k_prime_rows_QT, n_rows_A, m_cols_A) 93 | return B_np 94 | -------------------------------------------------------------------------------- /neural_admixture/src/utils_c/utils.pyx: -------------------------------------------------------------------------------- 1 | # cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True 2 | cimport numpy as np 3 | from cython.parallel import parallel, prange 4 | from libc.math cimport fmax, fmin, log, log1p, sqrt, fmaxf, fminf, sqrtf 5 | from libc.stdlib cimport calloc, free 6 | 7 | # Compute reconstruction matrix 8 | cdef inline double _reconstruct(const double* p, const double* q, const size_t K) noexcept nogil: 9 | cdef: 10 | size_t k 11 | double rec = 0.0 12 | for k in range(K): 13 | rec += q[k]*p[k] 14 | return rec 15 | 16 | # Log-likelihood calculation 17 | cpdef double loglikelihood(const unsigned char[:,::1] G, 18 | double[:,::1] P, 19 | const double[:,::1] Q, 20 | size_t K, 21 | double eps=1e-6) noexcept nogil: 22 | cdef: 23 | size_t N = G.shape[0] 24 | size_t M = G.shape[1] 25 | size_t i, j 26 | double logl = 0.0 27 | double g_d, rec 28 | double* p 29 | for j in prange(M): 30 | p = &P[j,0] 31 | for i in range(N): 32 | if G[i,j] != 3: 33 | rec = _reconstruct(p, &Q[i,0], K) 34 | rec = fmax(eps, fmin(rec, 1.0 - eps)) 35 | 36 | g_d = G[i,j] 37 | g_d = fmax(eps, fmin(g_d, 2.0 - eps)) 38 | 39 | logl += g_d * log(rec) + (2.0 - g_d) * log1p(-rec) 40 | return logl 41 | 42 | # Read Bed data file: 43 | cpdef void read_bed(const unsigned char[:,::1] bed_source, unsigned char[:,::1] geno_target) noexcept nogil: 44 | cdef: 45 | size_t n_snps = geno_target.shape[1] 46 | size_t n_samples = geno_target.shape[0] 47 | size_t byte_count = bed_source.shape[1] 48 | size_t snp_idx, byte_pos, sample_pos 49 | unsigned char current_byte 50 | unsigned char[4] lookup_table = [2, 3, 1, 0] 51 | 52 | with nogil, parallel(): 53 | for snp_idx in prange(n_snps): 54 | for byte_pos in range(byte_count): 55 | current_byte = bed_source[snp_idx, byte_pos] 56 | sample_pos = byte_pos * 4 57 | 58 | if sample_pos < n_samples: 59 | geno_target[sample_pos, snp_idx] = lookup_table[current_byte & 3] 60 | 61 | if sample_pos + 1 < n_samples: 62 | geno_target[sample_pos + 1, snp_idx] = lookup_table[(current_byte >> 2) & 3] 63 | 64 | if sample_pos + 2 < n_samples: 65 | geno_target[sample_pos + 2, snp_idx] = lookup_table[(current_byte >> 4) & 3] 66 | 67 | if sample_pos + 3 < n_samples: 68 | geno_target[sample_pos + 3, snp_idx] = lookup_table[(current_byte >> 6) & 3] 69 | -------------------------------------------------------------------------------- /neural_admixture/tests/test_placeholder.py: -------------------------------------------------------------------------------- 1 | def test_placeholder(): 2 | assert True -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools >= 45", 4 | "setuptools_scm[toml]>=6.2", 5 | "Cython", 6 | "numpy" 7 | ] 8 | build-backend = "setuptools.build_meta" 9 | 10 | [tool.setuptools_scm] 11 | write_to = "neural_admixture/_version.py" 12 | fallback_version = "0.1.0" 13 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = neural-admixture 3 | author = Joan Saurina Ricós, Albert Dominguez Mantes 4 | author_email = joansaurina03@gmail.com, adomi@stanford.edu 5 | license = CC BY-NC 4.0 6 | description = Rapid population clustering with autoencoders 7 | long_description = file: README.md 8 | long_description_content_type = text/markdown 9 | url = https://github.com/AI-sandbox/neural-admixture 10 | classifiers = 11 | Development Status :: 5 - Production/Stable 12 | Intended Audience :: Science/Research 13 | Topic :: Scientific/Engineering 14 | Programming Language :: Python :: 3.10 15 | Programming Language :: Python :: 3.11 16 | Programming Language :: Python :: 3.12 17 | Operating System :: POSIX :: Linux 18 | Operating System :: MacOS 19 | [options] 20 | packages = find: 21 | install_requires = 22 | configargparse>=1.5.3 23 | Cython>=0.29.30 24 | numpy>=2.2.5 25 | setuptools>=50.3.1 26 | torch<=2.4.0,>2.0.0 27 | tqdm>=4.64.0 28 | scikit-allel>=1.3.5 29 | scikit-learn>=1.1.0 30 | ninja 31 | colorama 32 | 33 | python_requires = >=3.10 34 | 35 | [options.entry_points] 36 | console_scripts = 37 | neural-admixture = neural_admixture.entry:main 38 | 39 | [options.extras_require] 40 | testing = 41 | tox 42 | pytest # https://docs.pytest.org/en/latest/contents.html 43 | pytest-cov # https://pytest-cov.readthedocs.io/en/latest/ 44 | pytest-mock 45 | 46 | [flake8] 47 | ignore = E116, E501, E203 48 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from Cython.Build import cythonize 3 | 4 | import numpy 5 | import os 6 | import platform 7 | import sys 8 | import logging 9 | 10 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") 11 | log = logging.getLogger(__name__) 12 | 13 | system = platform.system() 14 | 15 | if system == "Linux": 16 | compile_args = ['-fopenmp', '-O3', '-ffast-math', '-march=native', '-fno-wrapv'] 17 | link_args = ['-fopenmp', '-lm'] 18 | os.environ["CC"] = "gcc" 19 | os.environ["CXX"] = "g++" 20 | elif system == "Darwin": # macOS 21 | compile_args = ['-O3', '-ffast-math', '-fno-wrapv'] 22 | link_args = ['-lm'] 23 | os.environ["CC"] = "clang" 24 | os.environ["CXX"] = "clang++" 25 | elif system == "Windows": 26 | if os.environ.get("CC", "").endswith("gcc"): 27 | compile_args = ['-O3', '-fopenmp'] 28 | else: 29 | compile_args = ['/O2', '/openmp'] 30 | else: 31 | log.info(f"System not recognized: {system}") 32 | sys.exit(1) 33 | 34 | common_macros = [('NPY_NO_DEPRECATED_API', 'NPY_1_7_API_VERSION')] 35 | 36 | # Define extensions 37 | extensions = [ 38 | Extension( 39 | name="neural_admixture.src.utils_c.utils", 40 | sources=["neural_admixture/src/utils_c/utils.pyx"], 41 | include_dirs=[numpy.get_include()], 42 | extra_compile_args=compile_args, 43 | extra_link_args=link_args, 44 | define_macros=common_macros, 45 | ), 46 | Extension( 47 | name="neural_admixture.src.utils_c.rsvd", 48 | sources=["neural_admixture/src/utils_c/rsvd.pyx"], 49 | include_dirs=[numpy.get_include()], 50 | extra_compile_args=compile_args, 51 | extra_link_args=link_args, 52 | define_macros=common_macros, 53 | ) 54 | ] 55 | 56 | setup( 57 | ext_modules=cythonize(extensions), 58 | include_package_data=True, 59 | ) 60 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # For more information about tox, see https://tox.readthedocs.io/en/latest/ 2 | [tox] 3 | envlist = py{310,311,312}-{linux, macos} 4 | isolated_build=true 5 | 6 | [gh-actions] 7 | python = 8 | 3.10: py310 9 | 3.11: py311 10 | 3.12: py312 11 | 12 | [gh-actions:env] 13 | PLATFORM = 14 | ubuntu-latest: linux 15 | 16 | [testenv] 17 | platform = 18 | linux: linux 19 | passenv = 20 | CI 21 | GITHUB_ACTIONS 22 | extras = 23 | testing 24 | commands = pytest -v --color=yes --cov=neural_admixture --cov-report=xml 25 | --------------------------------------------------------------------------------