├── .gitmodules
├── benchmarks
├── sim5g.sh
├── simHC_edges.sh
├── simHC.sh
├── aale_loss_vae.sh
├── aale_loss_gcn3.sh
├── losscoef_vae.sh
├── noise_experiments.sh
├── test_graphmb.sh
├── strong100.sh
├── aale_gnn.sh
├── run_wwtp.sh
├── aale.sh
├── run_strong100.sh
├── aale_lr.sh
├── run_test.sh
├── runs.sh
├── aale_vae.sh
└── aale_bs.sh
├── src
└── graphmb
│ ├── version.py
│ ├── data
│ ├── kernel.npz
│ └── Bacteria.ms
│ ├── __init__.py
│ ├── arg_options.py
│ ├── dgl_dataset.py
│ ├── utils.py
│ ├── train_gnn.py
│ ├── visualize.py
│ ├── unused
│ ├── train_vae.py
│ └── train_gnn_decode.py
│ └── amber_eval.py
├── MANIFEST.in
├── .dockerignore
├── docs
├── source
│ ├── modules.rst
│ ├── _build
│ │ └── html
│ │ │ ├── objects.inv
│ │ │ ├── _static
│ │ │ ├── file.png
│ │ │ ├── plus.png
│ │ │ ├── minus.png
│ │ │ ├── css
│ │ │ │ ├── fonts
│ │ │ │ │ ├── lato-bold.woff
│ │ │ │ │ ├── lato-bold.woff2
│ │ │ │ │ ├── lato-normal.woff
│ │ │ │ │ ├── lato-normal.woff2
│ │ │ │ │ ├── Roboto-Slab-Bold.woff
│ │ │ │ │ ├── Roboto-Slab-Bold.woff2
│ │ │ │ │ ├── fontawesome-webfont.eot
│ │ │ │ │ ├── fontawesome-webfont.ttf
│ │ │ │ │ ├── lato-bold-italic.woff
│ │ │ │ │ ├── lato-bold-italic.woff2
│ │ │ │ │ ├── lato-normal-italic.woff
│ │ │ │ │ ├── Roboto-Slab-Regular.woff
│ │ │ │ │ ├── Roboto-Slab-Regular.woff2
│ │ │ │ │ ├── fontawesome-webfont.woff
│ │ │ │ │ ├── fontawesome-webfont.woff2
│ │ │ │ │ └── lato-normal-italic.woff2
│ │ │ │ └── badge_only.css
│ │ │ ├── documentation_options.js
│ │ │ ├── js
│ │ │ │ ├── badge_only.js
│ │ │ │ ├── html5shiv.min.js
│ │ │ │ ├── html5shiv-printshiv.min.js
│ │ │ │ └── theme.js
│ │ │ ├── pygments.css
│ │ │ ├── doctools.js
│ │ │ └── language_data.js
│ │ │ ├── .doctrees
│ │ │ ├── index.doctree
│ │ │ ├── intro.doctree
│ │ │ ├── examples.doctree
│ │ │ ├── graphmb.doctree
│ │ │ ├── modules.doctree
│ │ │ └── environment.pickle
│ │ │ ├── .buildinfo
│ │ │ └── searchindex.js
│ ├── generated
│ │ └── graphmb.rst
│ ├── index.rst
│ ├── intro.rst
│ ├── graphmb.rst
│ ├── conf.py
│ ├── development.rst
│ └── examples.rst
├── Makefile
└── make.bat
├── results
├── graphmb 0.2 results.ods
└── graphbemb experiments.ods
├── pyproject.toml
├── .gitignore
├── Dockerfile
├── .github
└── ISSUE_TEMPLATE
│ ├── feature_request.md
│ └── bug_report.md
├── setup.py
├── LICENSE
└── CHANGELOG.md
/.gitmodules:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/benchmarks/sim5g.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
--------------------------------------------------------------------------------
/src/graphmb/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.2.6"
2 |
3 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include src/graphmb/data/Bacteria.ms
2 | include src/graphmb/data/kernel.npz
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | build/
2 | dist/
3 | results/
4 | venv/
5 | graphmb.egg-info/
6 | docs/
7 |
--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | graphmb
2 | =======
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 |
7 | graphmb
8 |
--------------------------------------------------------------------------------
/src/graphmb/data/kernel.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/src/graphmb/data/kernel.npz
--------------------------------------------------------------------------------
/results/graphmb 0.2 results.ods:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/results/graphmb 0.2 results.ods
--------------------------------------------------------------------------------
/docs/source/_build/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/objects.inv
--------------------------------------------------------------------------------
/results/graphbemb experiments.ods:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/results/graphbemb experiments.ods
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/file.png
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/plus.png
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/minus.png
--------------------------------------------------------------------------------
/src/graphmb/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | from . import contigsdataset
3 | from . import utils
4 | from . import evaluate
5 | from . import version
6 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | build-backend = "setuptools.build_meta"
3 | requires = ["setuptools~=58.0", "pip>=19,!=20.0,!=20.0.1,<21", "wheel"]
4 |
--------------------------------------------------------------------------------
/docs/source/_build/html/.doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/.doctrees/index.doctree
--------------------------------------------------------------------------------
/docs/source/_build/html/.doctrees/intro.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/.doctrees/intro.doctree
--------------------------------------------------------------------------------
/docs/source/_build/html/.doctrees/examples.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/.doctrees/examples.doctree
--------------------------------------------------------------------------------
/docs/source/_build/html/.doctrees/graphmb.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/.doctrees/graphmb.doctree
--------------------------------------------------------------------------------
/docs/source/_build/html/.doctrees/modules.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/.doctrees/modules.doctree
--------------------------------------------------------------------------------
/docs/source/_build/html/.doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/.doctrees/environment.pickle
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/lato-bold.woff
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/lato-bold.woff2
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/lato-normal.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/lato-normal.woff
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/lato-normal.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/lato-normal.woff2
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/fontawesome-webfont.eot
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/fontawesome-webfont.ttf
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/lato-bold-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/lato-bold-italic.woff
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/lato-bold-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/lato-bold-italic.woff2
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/lato-normal-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/lato-normal-italic.woff
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff2
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/fontawesome-webfont.woff
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/fontawesome-webfont.woff2
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/fonts/lato-normal-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrobialDarkMatter/GraphMB/HEAD/docs/source/_build/html/_static/css/fonts/lato-normal-italic.woff2
--------------------------------------------------------------------------------
/docs/source/generated/graphmb.rst:
--------------------------------------------------------------------------------
1 | graphmb
2 | =======
3 |
4 | .. automodule:: graphmb
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/docs/source/_build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: 1ea206695607a08625ce58412a77a94b
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 |
--------------------------------------------------------------------------------
/benchmarks/simHC_edges.sh:
--------------------------------------------------------------------------------
1 | python src/graphmb/main.py --cuda --assembly ../data/simHC/ --outdir results/simHC_edges/ --assembly_name assembly.fasta --depth
2 | abundance.tsv.edges_jgi --evalskip 0 --epoch 100 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 0 --nega
3 | tives 10 --outname ae_lr1e-3 --nruns 1 --embsize_gnn 32
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.edges
2 | *_out/
3 | bins/
4 | data/
5 | *_logs/
6 | embeddings/
7 | graphs/
8 | *.ckpt
9 | .vscode/
10 | venv/
11 | *.tsv
12 | *.csv
13 | *.html
14 | *.txt
15 | *.gfa
16 | *.pkl
17 | dist/
18 | build/
19 | .eggs/
20 | results/
21 | *.out
22 | *.png
23 | *.sbatch
24 | *.pyc
25 | PKG-INFO
26 | mlruns/
27 | src/graphmb/unused/*
28 | src/graphmb/__pycache__/
29 | *.log
30 |
31 |
--------------------------------------------------------------------------------
/benchmarks/simHC.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # simHC experiments
3 |
4 | python src/graphmb/main.py --cuda --assembly ../data/simHC/ --outdir results/simHC/ --assembly_name contigs.fasta --depth abundance.tsv --contignodes --evalskip 0 --epoch 100 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 0 --negatives 10 --outname ae_lr1e-3 --nruns 1 --labels amber_ground_truth.tsv --embsize_gnn 32
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/documentation_options.js:
--------------------------------------------------------------------------------
1 | var DOCUMENTATION_OPTIONS = {
2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
3 | VERSION: 'v0.1.2',
4 | LANGUAGE: 'None',
5 | COLLAPSE_INDEX: false,
6 | BUILDER: 'html',
7 | FILE_SUFFIX: '.html',
8 | LINK_SUFFIX: '.html',
9 | HAS_SOURCE: true,
10 | SOURCELINK_SUFFIX: '.txt',
11 | NAVIGATION_WITH_KEYS: false
12 | };
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:22.04
2 | ARG DEBIAN_FRONTEND=noninteractive
3 | RUN apt-get update -y && apt-get install wget unzip vim -y
4 | RUN apt-get update -y && apt-get install -y python3 python3-pip python3-dev git && apt-get autoclean -y
5 | #RUN apt-get update && apt-get install sqlite3 libsqlite3-dev -y
6 | #RUN ln -s $(which pip3) /usr/bin/pip
7 | RUN pip install --upgrade pip
8 |
9 | #RUN make /app
10 | COPY ./ /graphmb/
11 | #COPY ./data/strong100/ /graphmb/data/strong100/
12 | WORKDIR /graphmb
13 | RUN python3 -m pip install -e .
14 | #CMD python /app/app.py
15 |
--------------------------------------------------------------------------------
/benchmarks/aale_loss_vae.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | for ga in 0 0.1 0.2 0.3 0.5 1
5 | do
6 | for sa in 0 0.1 0.2 0.3 0.5 1
7 | do
8 |
9 | #### VAE+GNN0
10 | python src/graphmb/main.py --cuda --assembly ../data/aale/ --outdir results/aale/ --evalskip 100 \
11 | --epoch 1000 --model gcn_ae --batchsize 512 --gnn_alpha $ga \
12 | --ae_alpha 1 --scg_alpha $sa --lr_gnn 1e-2 --layers_gnn 0 --negatives 10 \
13 | --outname vaegcn_lr1e-2_e512_negs10_ga${ga}_sa${sa} --nruns 3 \
14 | --embsize_gnn 64 --skip_preclustering --quick --rawfeatures
15 |
16 | done
17 | done
18 |
--------------------------------------------------------------------------------
/benchmarks/aale_loss_gcn3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | for ga in 0.1
5 | do
6 | for sa in 0.1
7 | do
8 |
9 | #### VAE+GNN0
10 | python src/graphmb/main.py --cuda --assembly ../data/aale/ --outdir results/aale/ --evalskip 100 \
11 | --epoch 1000 --model gcn_ae --batchsize 0 --gnn_alpha $ga \
12 | --ae_alpha 1 --scg_alpha $sa --lr_gnn 1e-2 --layers_gnn 3 --negatives 10 \
13 | --outname vaegcn3_lr1e-2_ga${ga}_sa${sa}_pv50_fv_gd_bs0 --nruns 3 \
14 | --embsize_gnn 32 --skip_preclustering --quick --rawfeatures --concatfeatures \
15 | --vaepretrain 50 --decoder_input gnn
16 |
17 | done
18 | done
19 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. GraphMB documentation master file, created by
2 | sphinx-quickstart on Tue Dec 14 10:18:04 2021.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to GraphMB's documentation!
7 | ===================================
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 | :caption: Contents:
12 |
13 | intro
14 | examples
15 | development
16 | graphmb
17 |
18 |
19 |
20 | Indices and tables
21 | ==================
22 |
23 | * :ref:`genindex`
24 | * :ref:`modindex`
25 | * :ref:`search`
26 |
--------------------------------------------------------------------------------
/benchmarks/losscoef_vae.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | wwtp=$1
4 | bs=128
5 | emb=32
6 | lr=1e-3
7 | quick=
8 | #quick="--quick"
9 | for ga in 0 0.1 0.2 0.3 0.5 1
10 | do
11 | for sa in 0 0.1 0.2 0.3 0.5 1
12 | do
13 |
14 | #### VAE+GNN0
15 | python src/graphmb/main.py --cuda --assembly ../data/$wwtp/ --outdir results/$wwtp/ --evalskip 100 \
16 | --epoch 1000 --model gcn_ae --batchsize ${bs} --gnn_alpha $ga \
17 | --ae_alpha 1 --scg_alpha $sa --lr_gnn ${lr} --layers_gnn 0 --negatives 10 \
18 | --outname vaegcn_lr${lr}_e${bs}_negs10_ga${ga}_sa${sa} --nruns 3 \
19 | --embsize_gnn ${emb} --skip_preclustering --rawfeatures ${quick}
20 |
21 | done
22 | done
23 |
--------------------------------------------------------------------------------
/benchmarks/noise_experiments.sh:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | python src/graphmb/main.py --assembly ../data/strong100/ --outdir results/simdata/ --evalskip 200 --epoch 1000 \
5 | --model gcn_ae --rawfeatures --gnn_alpha 1 --ae_alpha 0 --scg_alpha 1 --lr_gnn 1e-3 \
6 | --layers_gnn 1 --read_cache --markers "" --scg_alpha 0 --noise
7 |
8 | python src/graphmb/main.py --assembly ../data/strong100/ --outdir results/strong100/ --evalskip 200 --epoch 1000 \
9 | --model gcn_ae --rawfeatures --gnn_alpha 1 --ae_alpha 0 --scg_alpha 1 --lr_gnn 1e-3 \
10 | --layers_gnn 1 --scg_alpha 0 --noise
--------------------------------------------------------------------------------
/benchmarks/test_graphmb.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | DATADIR=data/
3 |
4 | #check data
5 | if [ -d $DATADIR/strong100 ]
6 | then
7 | echo "Strong100 dataset found"
8 | else
9 | echo "Error: dataset not found, downloading"
10 | cd $DATADIR; wget https://zenodo.org/record/6122610/files/strong100.zip; unzip strong100.zip
11 | fi
12 |
13 | # check venv
14 | if [ -d "./venv/" ]
15 | then
16 | echo "venv found"
17 | source venv/bin/activate
18 | else
19 | echo "venv not found"
20 | python -m venv venv
21 | source venv/bin/activate
22 | pip install -e .
23 | fi
24 |
25 | python src/graphmb/main.py --assembly $DATADIR/strong100/ --outdir results/strong100/ --markers marker_gene_stats.tsv
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is, the command you used and error you obtained.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Environment used
16 | 2. Type of data
17 | 3. How GraphMB was installed
18 | 4. Options used
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Output messages**
24 | If applicable, add your output messages to help explain your problem.
25 |
26 |
27 | **Additional context**
28 | Add any other context about the problem here.
29 |
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/js/badge_only.js:
--------------------------------------------------------------------------------
1 | !function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}});
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.https://www.sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | from setuptools.command.install import install
3 | import os
4 | import subprocess
5 | from distutils.util import convert_path
6 |
7 | main_ns = {}
8 | ver_path = convert_path("src/graphmb/version.py")
9 | with open(ver_path) as ver_file:
10 | exec(ver_file.read(), main_ns)
11 |
12 | setup(
13 | name="graphmb",
14 | version=main_ns["__version__"],
15 | packages=["graphmb"],
16 | python_requires=">=3.8",
17 | package_dir={"": "src"},
18 | setup_requires=["setuptools~=58.0", "wheel", "sphinx-rtd-theme", "twine"],
19 | install_requires=[
20 | "wheel",
21 | "requests",
22 | "networkx==2.6.2",
23 | "torch==1.13.1",
24 | "tensorflow==2.11.1",
25 | "tqdm==4.61.2",
26 | "mlflow==2.6.0",
27 | "importlib_resources"
28 |
29 | ],
30 | entry_points={
31 | "console_scripts": ["graphmb=graphmb.main:main"],
32 | },
33 | include_package_data=True,
34 | )
35 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2018 The Python Packaging Authority
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
--------------------------------------------------------------------------------
/src/graphmb/data/Bacteria.ms:
--------------------------------------------------------------------------------
1 | # [Taxon Marker File]
2 | Bacteria 1 0 Bacteria 5449 [{'PF01193.19', 'PF00411.14', 'PF00416.17', 'PF01000.21', 'PF01196.14'}, {'PF00281.14', 'PF00861.17', 'PF03719.10', 'PF00828.14', 'PF00673.16', 'PF00347.18', 'PF00238.14', 'PF00333.15', 'TIGR01079', 'TIGR00967', 'PF00410.14'}, {'PF00573.17', 'PF00181.18', 'PF00366.15', 'PF00831.18', 'PF00276.15', 'PF00203.16', 'PF03947.13', 'PF00189.15', 'PF00297.17', 'PF00252.13', 'PF00237.14'}, {'PF05000.12', 'PF04998.12', 'PF00562.23', 'PF04560.15', 'PF10385.4', 'PF04997.7', 'PF04983.13', 'PF04565.11', 'PF04563.10', 'PF04561.9', 'PF00623.15'}, {'PF00572.13', 'PF00380.14'}, {'PF00298.14', 'PF00687.16', 'PF03946.9'}, {'PF01281.14', 'PF03948.9'}, {'PF08529.6', 'PF13184.1'}, {'PF00453.13', 'PF01632.14'}, {'PF00164.20', 'PF00177.16'}, {'TIGR00855', 'PF00466.15'}, {'PF02912.13', 'PF01409.15'}, {'PF00889.14', 'PF00318.15'}, {'PF00829.16', 'PF01016.14'}, {'TIGR03723', 'TIGR00329'}, {'PF01668.13'}, {'PF01250.12'}, {'PF00312.17'}, {'PF01121.15'}, {'TIGR00459'}, {'PF01245.15'}, {'TIGR00755'}, {'PF02130.12'}, {'PF02367.12'}, {'TIGR03594'}, {'PF02033.13'}, {'TIGR00615'}, {'TIGR00084'}, {'PF01018.17'}, {'PF01195.14'}, {'TIGR00019'}, {'PF01649.13'}, {'PF01795.14'}, {'TIGR00250'}, {'PF00886.14'}, {'PF06421.7'}, {'PF11987.3'}, {'PF00338.17'}, {'TIGR00392'}, {'PF01509.13'}, {'PF01746.16'}, {'PF06071.8'}, {'PF05697.8'}, {'TIGR00922'}, {'PF02978.14'}, {'PF03484.10'}, {'TIGR02075'}, {'TIGR00810'}, {'PF13603.1'}, {'PF01765.14'}, {'PF00162.14'}, {'PF12344.3'}, {'TIGR02432'}, {'TIGR00460'}, {'PF05491.8'}, {'TIGR03263'}, {'PF08459.6'}, {'TIGR00344'}]
3 |
--------------------------------------------------------------------------------
/benchmarks/strong100.sh:
--------------------------------------------------------------------------------
1 |
2 | #!/bin/bash
3 |
4 | python src/graphmb/main.py --assembly data/strong100/ --outdir results/strong100/ --evalskip 100 \
5 | --epoch 1000 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 \
6 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 0 --negatives 0 \
7 | --outname vae_lr1e-3_bs256 --nruns 3 --labels amber_ground_truth.tsv \
8 | --embsize_gnn 32 --quick
9 |
10 |
11 | python src/graphmb/main.py --assembly data/strong100/ --outdir results/strong100/ --evalskip 100 \
12 | --epoch 1000 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 \
13 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-4 --layers_gnn 0 \
14 | --outname vae_lr1e-4_bs256 --nruns 3 --labels amber_ground_truth.tsv \
15 | --embsize_gnn 32 --skip_preclustering --quick
16 |
17 |
18 | python src/graphmb/main.py --assembly data/strong100/ --outdir results/strong100/ --evalskip 100 \
19 | --epoch 1000 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0.1 \
20 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-4 --layers_gnn 0 --negatives 5 \
21 | --outname vaegcn_lr1e-4_edgesbatch256_negs5 --nruns 3 --labels amber_ground_truth.tsv \
22 | --embsize_gnn 32 --batchtype edges --skip_preclustering --quick
23 |
24 |
25 | python src/graphmb/main.py --assembly data/strong100/ --outdir results/strong100/ --evalskip 100 \
26 | --epoch 1000 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0.1 \
27 | --ae_alpha 1 --scg_alpha 0.3 --lr_gnn 1e-4 --layers_gnn 0 --negatives 5 \
28 | --outname vaegcn_lr1e-4_bs256_negs5_scg --nruns 3 --labels amber_ground_truth.tsv \
29 | --embsize_gnn 32 --skip_preclustering --quick
--------------------------------------------------------------------------------
/benchmarks/aale_gnn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #### baseline VAE lr 1e-3
4 | #python src/graphmb/main.py --cuda --assembly ../data/aale/ --outdir results/aale/ --evalskip 100 \
5 | # --epoch 1000 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 \
6 | # --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 0 --negatives 0 \
7 | # --outname vae_lr1e-3_nodesbatch256 --nruns 3 --labels amber_ground_truth_species.tsv \
8 | # --embsize_gnn 32 --batchtype nodes
9 |
10 |
11 |
12 | #### VAE+GNN0
13 | python src/graphmb/main.py --cuda --assembly ../data/aale/ --outdir results/aale/ --evalskip 100 \
14 | --epoch 1000 --model gcn_ae --batchsize 256 --gnn_alpha 0.1 \
15 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 1 --negatives 10 \
16 | --outname vaegcn1_lr1e-3_negs10 --nruns 3 --labels amber_ground_truth_species.tsv \
17 | --embsize_gnn 64 --skip_preclustering --quick
18 |
19 | python src/graphmb/main.py --cuda --assembly ../data/aale/ --outdir results/aale/ --evalskip 100 \
20 | --epoch 1000 --model gcn_ae --batchsize 256 --gnn_alpha 0.1 \
21 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 2 --negatives 10 \
22 | --outname vaegcn2_lr1e-3_negs10 --nruns 3 --labels amber_ground_truth_species.tsv \
23 | --embsize_gnn 64 --skip_preclustering --quick
24 |
25 | python src/graphmb/main.py --cuda --assembly ../data/aale/ --outdir results/aale/ --evalskip 100 \
26 | --epoch 1000 --model gcn_ae --batchsize 256 --gnn_alpha 0.1 \
27 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 2 --negatives 10 \
28 | --outname vaegcn2_lr1e-3_negs10 --nruns 3 --labels amber_ground_truth_species.tsv \
29 | --embsize_gnn 64 --skip_preclustering --quick
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 |
2 | # Change Log
3 | All notable changes to this project will be documented in this file.
4 |
5 | The format is based on [this sample changelog](https://gist.github.com/juampynr/4c18214a8eb554084e21d6e288a18a2c).
6 |
7 | ## [0.2.5] - 2023-07-18
8 | - Update MLflow version
9 | - Fix data dir missing from src/graphmb
10 | - Add option to write bins to fasta files (--writebins)
11 |
12 | ## [0.2.4] - 2023-03-31
13 | - Update Tensorflow and MLflow versions
14 |
15 |
16 | ## [0.2.3] - 2023-02-03
17 |
18 | ### Changed
19 | - vaepretrain parameter controls the number of epochs of VAE pre-training (default 500)
20 |
21 | ## [0.2.2] - 2023-02-02
22 |
23 | ### Fixed
24 | - Correct wheel file
25 |
26 | ## [0.2.0] - 2023-02-01
27 |
28 | ### Added
29 | - VAE, GCN, SAGE and GAT models based on tensorflow (VAEG code)
30 | - SCG-based loss to train VAE and GNNs
31 | - Output assembly stats while starting
32 | - Eliminate VAMB and DGL dependencies
33 | - PyPI installation
34 |
35 | ### Changed
36 | - Code structure changed to load data outside of DGL and use DGL only for the GraphSAGE-LSTM model
37 | - Log dataloading steps
38 | - Write cache to numpy files
39 |
40 | ### Fixed
41 | - Feature files are written to specific directories (fixes #17)
42 |
43 | ## [0.1.3] - 2022-02-25
44 |
45 | BioarXiv version
46 |
47 | `pip install . --upgrade`
48 |
49 | ### Added
50 | - Dockerfile and docker image link
51 | - Set seed option
52 | - Eval interval option
53 |
54 | ### Changed
55 |
56 | - Change default file name
57 |
58 |
59 | ### Fixed
60 |
61 | - Assembly dir option is no longer mandatory, so files can be in different directories
62 | - Logging also includes errors
63 | - DGL should no longer write a file to ~/
64 |
65 |
--------------------------------------------------------------------------------
/docs/source/intro.rst:
--------------------------------------------------------------------------------
1 | Introduction
2 | ============
3 |
4 | GraphMB is a Metagenomic Binner developed for long-read assemblies, that takes advantage of graph machine learning
5 | algorithms and the assembly graph generated during assembly.
6 | It has been tested on (meta)flye assemblies.
7 |
8 | Installation
9 | ************
10 |
11 | Option 1 - From wheel::
12 | pip install https://github.com/AndreLamurias/GraphMB/releases/download/v0.1.2/graphmb-0.1.2-py3-none-any.whl
13 |
14 |
15 | Option 2 - From source::
16 | git clone https://github.com/AndreLamurias/GraphMB
17 | cd GraphMB
18 | python -m venv venv; source venv/bin/activate # optional
19 | pip install .
20 |
21 |
22 | Option 3 - From anaconda::
23 | conda install -c andrelamurias graphmb
24 |
25 | Option 4 - From pip
26 |
27 | pip install graphmb
28 |
29 |
30 | Input files
31 | ***********
32 | The only files required are the contigs in fasta format, and the assembly graph in GFA format. For optimal performance,
33 | the assembly graph should be generated with Flye 2.9, since it includes the number of reads mapping to each pair of
34 | contigs. Also, for better results, CheckM is run on each contig using the general Bacteria marker sets. This is optional
35 | though, you can just run the model for a number of epochs and pick the last model.
36 | By default, it runs with with early stopping.
37 |
38 | In summary, you need to have a directory with these files:
39 | - edges.fasta
40 | - assembly_graph.fasta
41 | - edges_depth.txt (output of `jgi_summarize_bam_contig_depths`)
42 | - marker_gene_stats.csv (optional)
43 |
44 | You can get an example of these files https://drive.google.com/drive/folders/1m6uTgTPUghk_q9GxfX1UNEOfn8jnIdt5?usp=sharing
45 | Download from this link and extract to data/strong100.
46 |
47 |
--------------------------------------------------------------------------------
/docs/source/graphmb.rst:
--------------------------------------------------------------------------------
1 | graphmb package
2 | ===============
3 |
4 | Submodules
5 | ----------
6 |
7 | graphmb.contigsdataset module
8 | -----------------------------
9 |
10 | .. automodule:: graphmb.contigsdataset
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | graphmb.evaluate module
16 | -----------------------
17 |
18 | .. automodule:: graphmb.evaluate
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 | graphmb.utils module
24 | -------------------------------
25 |
26 | .. automodule:: graphmb.utils
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 |
31 | graphmb.models module
32 | -------------------------------
33 |
34 | .. automodule:: graphmb.models
35 | :members:
36 | :undoc-members:
37 | :show-inheritance:
38 |
39 | graphmb.gnn_models module
40 | -------------------------------
41 |
42 | .. automodule:: graphmb.gnn_models
43 | :members:
44 | :undoc-members:
45 | :show-inheritance:
46 |
47 | graphmb.train_ccvae module
48 | -------------------------------
49 |
50 | .. automodule:: graphmb.train_ccvae
51 | :members:
52 | :undoc-members:
53 | :show-inheritance:
54 |
55 | graphmb.train_gnn module
56 | -------------------------------
57 |
58 | .. automodule:: graphmb.train_gnn
59 | :members:
60 | :undoc-members:
61 | :show-inheritance:
62 |
63 | graphmb.graphsage\_unsupervised module
64 | --------------------------------------
65 |
66 | .. automodule:: graphmb.graphsage_unsupervised
67 | :members:
68 | :undoc-members:
69 | :show-inheritance:
70 |
71 | graphmb.main module
72 | -------------------
73 |
74 | .. automodule:: graphmb.main
75 | :members:
76 | :undoc-members:
77 | :show-inheritance:
78 |
79 | Module contents
80 | ---------------
81 |
82 | .. automodule:: graphmb
83 | :members:
84 | :undoc-members:
85 | :show-inheritance:
86 |
--------------------------------------------------------------------------------
/benchmarks/run_wwtp.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wwtp=$1
3 |
4 | #### baseline VAE lr 1e-2
5 | #python src/graphmb/main.py --cuda --assembly ../data/$wwtp/ --outdir results/$wwtp/ --evalskip 100 \
6 | # --epoch 500 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 \
7 | # --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-2 --layers_gnn 0 --negatives 0 \
8 | # --outname vae_lr1e-2_nb256 --nruns 3 \
9 | # --embsize_gnn 64 --quick --batchtype nodes
10 |
11 |
12 | #### baseline VAE lr 1e-2
13 | #python src/graphmb/main.py --cuda --assembly ../data/$wwtp/ --outdir results/$wwtp/ --evalskip 100 \
14 | # --epoch 500 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 \
15 | # --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 0 --negatives 0 \
16 | # --outname vae_lr1e-3_nb256 --nruns 3 \
17 | # --embsize_gnn 64 --quick --batchtype nodes
18 |
19 | #### baseline VAE lr 1e-4
20 | #python src/graphmb/main.py --cuda --assembly ../data/$wwtp/ --outdir results/$wwtp/ --evalskip 100 \
21 | # --epoch 1000 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 \
22 | # --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-4 --layers_gnn 0 \
23 | # --outname vae_lr1e-4_nb256 --nruns 3 \
24 | # --skip_preclustering --embsize_gnn 64 --quick --batchtype nodes
25 |
26 |
27 | #### VAE+GNN0
28 | python src/graphmb/main.py --cuda --assembly ../data/$wwtp/ --outdir results/$wwtp/ --evalskip 100 \
29 | --epoch 2000 --evalepoch 20 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0.1 \
30 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 0 --negatives 10 \
31 | --outname vaegcn_lr1e-3_eb256_negs10_gnn0.1 --nruns 3 \
32 | --skip_preclustering --embsize_gnn 64 --quick
33 |
34 | #### VAE+GNN0+SCG
35 | python src/graphmb/main.py --cuda --assembly ../data/$wwtp/ --outdir results/$wwtp/ --evalskip 10 \
36 | --epoch 2000 --evalepoch 20 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0.1 \
37 | --ae_alpha 1 --scg_alpha 0.3 --lr_gnn 1e-3 --layers_gnn 0 --negatives 10 \
38 | --outname vaegcn_lr1e-3_eb256_negs10_scg0.1_gnn0.3 --nruns 3 \
39 | --skip_preclustering --embsize_gnn 64 --quick
40 |
--------------------------------------------------------------------------------
/benchmarks/aale.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #### baseline VAE lr 1e-3
4 | python src/graphmb/main.py --cuda --assembly ../data/aale/ --outdir results/aale/ --evalskip 100 \
5 | --epoch 1000 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 \
6 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 0 --negatives 0 \
7 | --outname vae_lr1e-3_nodesbatch256 --nruns 3 --labels amber_ground_truth_species.tsv \
8 | --embsize_gnn 64 --batchtype nodes
9 |
10 | #### baseline VAE lr 1e-4
11 | python src/graphmb/main.py --cuda --assembly ../data/aale/ --outdir results/aale/ --evalskip 100 \
12 | --epoch 1000 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 \
13 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-4 --layers_gnn 0 \
14 | --outname vae_lr1e-4_nodesbatch256 --nruns 3 --labels amber_ground_truth_species.tsv \
15 | --embsize_gnn 32 --batchtype nodes --skip_preclustering
16 |
17 |
18 | #### VAE+GNN0
19 | python src/graphmb/main.py --cuda --assembly ../data/aale/ --outdir results/aale/ --evalskip 100 \
20 | --epoch 1000 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 1 \
21 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-4 --layers_gnn 0 --negatives 10 \
22 | --outname vaegcn_lr1e-4_edgesbatch256_negs10 --nruns 3 --labels amber_ground_truth_species.tsv \
23 | --embsize_gnn 32 --batchtype edges --skip_preclustering
24 |
25 | #### VAE+GNN0+SCG
26 | python src/graphmb/main.py --cuda --assembly ../data/aale/ --outdir results/aale/ --evalskip 100 \
27 | --epoch 1000 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 1 \
28 | --ae_alpha 1 --scg_alpha 1 --lr_gnn 1e-4 --layers_gnn 0 --negatives 10 \
29 | --outname vaegcn_lr1e-4_edgesbatch256_negs10_scg1 --nruns 3 --labels amber_ground_truth_species.tsv \
30 | --skip_preclustering
31 |
32 | #### VAE+GNN3+SCG 55+1/175+3
33 | python src/graphmb/main.py --cuda --assembly ../data/aale/ --outdir results/aale/ --evalskip 100 \
34 | --epoch 1000 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 1 \
35 | --ae_alpha 1 --scg_alpha 1 --lr_gnn 1e-4 --layers_gnn 3 --negatives 10 \
36 | --outname vaegcn_lr1e-4_edgesbatch256_negs10_noise --nruns 3 --labels amber_ground_truth_species.tsv \
37 | --skip_preclustering
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 |
16 | sys.path.insert(0, os.path.abspath("../../src/"))
17 |
18 |
19 | # -- Project information -----------------------------------------------------
20 |
21 | project = "GraphMB"
22 | copyright = "2022, Andre Lamurias"
23 | author = "Andre Lamurias"
24 |
25 | # The full version, including alpha/beta/rc tags
26 | release = "v0.2.0"
27 |
28 |
29 | # -- General configuration ---------------------------------------------------
30 |
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | # extensions = []
35 | extensions = ["sphinx.ext.napoleon", 'sphinx.ext.autodoc', 'sphinx.ext.autosummary']
36 |
37 | # Add any paths that contain templates here, relative to this directory.
38 | templates_path = ["_templates"]
39 |
40 | # List of patterns, relative to source directory, that match files and
41 | # directories to ignore when looking for source files.
42 | # This pattern also affects html_static_path and html_extra_path.
43 | exclude_patterns = []
44 |
45 |
46 | # -- Options for HTML output -------------------------------------------------
47 |
48 | # The theme to use for HTML and HTML Help pages. See the documentation for
49 | # a list of builtin themes.
50 | #
51 | html_theme = "sphinx_rtd_theme"
52 |
53 | # Add any paths that contain custom static files (such as style sheets) here,
54 | # relative to this directory. They are copied after the builtin static files,
55 | # so a file named "default.css" will overwrite the builtin "default.css".
56 | html_static_path = ["_static"]
57 |
--------------------------------------------------------------------------------
/benchmarks/run_strong100.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wwtp=$1
3 |
4 | #### baseline VAE lr 1e-2
5 | #python src/graphmb/main.py --cuda --assembly ../data/$wwtp/ --outdir results/$wwtp/ --evalskip 100 \
6 | # --epoch 500 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 \
7 | # --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-2 --layers_gnn 0 --negatives 0 \
8 | # --outname vae_lr1e-2_nb256 --nruns 3 \
9 | # --embsize_gnn 64 --quick --batchtype nodes
10 |
11 |
12 | #### baseline VAE lr 1e-2
13 | #python src/graphmb/main.py --cuda --assembly ../data/$wwtp/ --outdir results/$wwtp/ --evalskip 100 \
14 | # --epoch 500 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 \
15 | # --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 0 --negatives 0 \
16 | # --outname vae_lr1e-3_nb256 --nruns 3 \
17 | # --embsize_gnn 64 --quick --batchtype nodes
18 |
19 | #### baseline VAE lr 1e-3
20 | #python src/graphmb/main.py --cuda --assembly ../data/$wwtp/ --outdir results/$wwtp/ --evalskip 100 \
21 | # --epoch 1000 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0 \
22 | # --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 0 \
23 | # --outname vae_lr1e-3_nb256 --nruns 3 --labels amber_ground_truth.tsv \
24 | # --skip_preclustering --embsize_gnn 64 --quick --batchtype nodes
25 |
26 |
27 | #### VAE+GNN0
28 | python src/graphmb/main.py --cuda --assembly ../data/$wwtp/ --outdir results/$wwtp/ --evalskip 100 \
29 | --epoch 2000 --evalepoch 20 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0.1 \
30 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 0 --negatives 10 \
31 | --outname vaegcn_lr1e-3_nb256_negs10_gnn0.1 --nruns 3 --labels amber_ground_truth.tsv \
32 | --skip_preclustering --embsize_gnn 64 --batchtype nodes
33 |
34 | #### VAE+GNN0+SCG
35 | python src/graphmb/main.py --cuda --assembly ../data/$wwtp/ --outdir results/$wwtp/ --evalskip 10 \
36 | --epoch 2000 --evalepoch 20 --model gcn_ae --rawfeatures --batchsize 256 --gnn_alpha 0.1 \
37 | --ae_alpha 1 --scg_alpha 0.3 --lr_gnn 1e-3 --layers_gnn 0 --negatives 10 \
38 | --outname vaegcn_lr1e-3_nb256_negs10_scg0.1_gnn0.3 --nruns 3 --labels amber_ground_truth.tsv \
39 | --skip_preclustering --embsize_gnn 64 --batchtype nodes
40 |
--------------------------------------------------------------------------------
/docs/source/development.rst:
--------------------------------------------------------------------------------
1 | Development
2 | ===========
3 |
4 | Code structure
5 | ****************
6 |
7 | GraphMB contains options to experiment with model architecture and training,
8 | as well as with pre-processing of data and post-processing of the results.
9 | The core of GraphMB are deep learning models that process contigs into an
10 | embedding space.
11 |
12 | The files **models.py**, **gnn_models.py**, and **layers.py** contain the
13 | tensorflow models used on version 0.2.
14 | These fails also contain the trainer helper function and loss functions.
15 | The files **train_ccvae.py** and **train_gnn.py** contain the training loops of those models.
16 | **graphsage_unsupervised.py** contains the model used by v0.1, while **graphmb1.py**
17 | contains helper functions used on the initial GraphMB release (but not anymore).
18 |
19 | The file **evaluate.py** contains several evaluation metrics, and a function to
20 | run a clustering algorithm on the embeddings and evaluate the output.
21 | The main clustering algorithm is in **vamb_clustering.py** as was originally developed
22 | for VAMB. The file **amber_eval.py** is adapted from the AMBER evaluation tool, to run
23 | the same metrics as that tool.
24 |
25 | The file **contigsdataset.py** contains the code to read, pre-process and save a
26 | set of contigs, along with their assembly graph, depth, single-copy marker genes
27 | and embeddings. It also computes several stats on a dataset.
28 | The file **dgl_dataset.py** contains code to convert the v0.2 AssemblyDataset class
29 | to the one used by v0.1.
30 | The file **utils.py** contains some additional helper functions that did not fit
31 | elsewhere.
32 |
33 | Finally, all the running parameters are stored in **arg_options.py**.
34 | The main file **main.py** reads these parameters and executes the experiments
35 | accordingly. **version.py** is used only to store the current version number.
36 | **setup.py** defines the dependencies and other parameters to build a new version.
37 |
38 |
39 |
40 | Typical workflow for new versions
41 | **********************************
42 | Useful commands to build new version:
43 |
44 | .. code-block:: bash
45 |
46 | python setup.py sdist bdist_wheel
47 | python -m twine upload dist/graphmb-X.X.X*
48 | cd docs; make html
49 | sudo docker build . -t andrelamurias/graphmb:X.X.X
50 | sudo docker push
51 |
52 |
53 | Documentation
54 | ****************
55 | The documentation is stored in docs/ and uses Sphinx to generate HTML pages.
56 | The docstring of each funtion and class are automatically added. If new source
57 | code files are added, these should be added too to docs/source/graphmb.rst.
58 |
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/js/html5shiv.min.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
3 | */
4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document);
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/css/badge_only.css:
--------------------------------------------------------------------------------
1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}
--------------------------------------------------------------------------------
/benchmarks/aale_lr.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wwtp=$1
3 |
4 |
5 | #### baseline VAE lr 1e-3
6 | #python src/graphmb/main.py --cuda --assembly ../data/${wwtp}/ --outdir results/${wwtp}/ --evalskip 100 \
7 | # --epoch 1000 --model gcn_ae --batchsize 512 --gnn_alpha 0 \
8 | # --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 0 --negatives 0 \
9 | # --outname vae_lr1e-3_b512 --nruns 3 \
10 | # --embsize_gnn 32 --batchtype edges --rawfeatures
11 |
12 |
13 |
14 | #### VAE+GNN0
15 | python src/graphmb/main.py --cuda --assembly ../data/${wwtp}/ --outdir results/${wwtp}/ --evalskip 100 \
16 | --epoch 1000 --model gcn_ae --batchsize 512 --gnn_alpha 0.1 \
17 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-1 --layers_gnn 0 --negatives 10 \
18 | --outname vaegcn_lr1e-1_eb512_negs10 --nruns 3 \
19 | --embsize_gnn 64 --skip_preclustering --quick --rawfeatures
20 |
21 | #### VAE+GNN0+SCG
22 | #python src/graphmb/main.py --cuda --assembly ../data/${wwtp}/ --outdir results/${wwtp}/ --evalskip 100 \
23 | # --epoch 1000 --model gcn_ae --batchsize 512 --gnn_alpha 1 \
24 | # --ae_alpha 1 --scg_alpha 1 --lr_gnn 1e-4 --layers_gnn 0 --negatives 10 \
25 | # --outname vaegcn_lr1e-4_edgesbatch512_negs10_scg1 --nruns 3 \
26 | # --skip_preclustering
27 |
28 | #### VAE+GNN0
29 | python src/graphmb/main.py --cuda --assembly ../data/${wwtp}/ --outdir results/${wwtp}/ --evalskip 100 \
30 | --epoch 1000 --model gcn_ae --batchsize 512 --gnn_alpha 0.1 \
31 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-2 --layers_gnn 0 --negatives 10 \
32 | --outname vaegcn_lr1e-2_eb512_negs10 --nruns 3 \
33 | --embsize_gnn 64 --skip_preclustering --quick --rawfeatures
34 |
35 | #### VAE+GNN0
36 | python src/graphmb/main.py --cuda --assembly ../data/${wwtp}/ --outdir results/${wwtp}/ --evalskip 100 \
37 | --epoch 1000 --model gcn_ae --batchsize 512 --gnn_alpha 0.1 \
38 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-3 --layers_gnn 0 --negatives 10 \
39 | --outname vaegcn_lr1e-3_eb512_negs10 --nruns 3 \
40 | --embsize_gnn 64 --skip_preclustering --quick --rawfeatures
41 |
42 | #### VAE+GNN0
43 | python src/graphmb/main.py --cuda --assembly ../data/${wwtp}/ --outdir results/${wwtp}/ --evalskip 100 \
44 | --epoch 1000 --model gcn_ae --batchsize 512 --gnn_alpha 1 \
45 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 5e-4 --layers_gnn 0 --negatives 10 \
46 | --outname vaegcn_lr5e-4_eb512_negs10 --nruns 3 \
47 | --embsize_gnn 64 --skip_preclustering --quick --rawfeatures
48 |
49 | #### VAE+GNN0
50 | python src/graphmb/main.py --cuda --assembly ../data/${wwtp}/ --outdir results/${wwtp}/ --evalskip 100 \
51 | --epoch 1000 --model gcn_ae --batchsize 512 --gnn_alpha 1 \
52 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-4 --layers_gnn 0 --negatives 10 \
53 | --outname vaegcn_lr1e-4_eb512_negs10 --nruns 3 \
54 | --embsize_gnn 64 --skip_preclustering --quick --rawfeatures
55 |
56 | #### VAE+GNN0
57 | python src/graphmb/main.py --cuda --assembly ../data/${wwtp}/ --outdir results/${wwtp}/ --evalskip 100 \
58 | --epoch 1000 --model gcn_ae --batchsize 512 --gnn_alpha 1 \
59 | --ae_alpha 1 --scg_alpha 0 --lr_gnn 1e-5 --layers_gnn 0 --negatives 10 \
60 | --outname vaegcn_lr1e-5_eb512_negs10 --nruns 3 \
61 | --embsize_gnn 64 --skip_preclustering --quick --rawfeatures
--------------------------------------------------------------------------------
/benchmarks/run_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -x
3 | set -e
4 |
5 | export CUDA_VISIBLE_DEVICES=0
6 | dataset=$1
7 | #source venv/bin/activate
8 | #python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
9 | # --model_name vae --markers marker_gene_stats.tsv --epoch 500 \
10 | # --nruns 1 --evalepochs 20 --outname vae --batchsize 256 \
11 | # --evalskip 200 --labels amber_ground_truth_species.tsv
12 | #mv results/$dataset/vae_best_embs.pickle ../data/$dataset/
13 |
14 | #python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
15 | # --model_name sage_lstm --markers marker_gene_stats.tsv --epoch 500 \
16 | # --nruns 1 --evalepochs 20 --outname sagelstm --skip_preclustering \
17 | # --features vae_best_embs.pickle --concat_features --evalskip 10 --labels amber_ground_truth_species.tsv
18 |
19 | #python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
20 | # --model_name gcn --markers marker_gene_stats.tsv --epoch 500 \
21 | # --nruns 1 --evalepochs 20 --outname gcn \
22 | # --features vae_best_embs.pickle --concat_features --evalskip 10 --labels amber_ground_truth_species.tsv
23 |
24 |
25 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
26 | --model_name gcn_ae --markers marker_gene_stats.tsv --epoch 1000 \
27 | --nruns 1 --evalepochs 20 --outname gcnae_nognn \
28 | --layers_gnn 0 --evalskip 200 --batchsize 256 --labels amber_ground_truth_species.tsv
29 |
30 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
31 | --model_name gcn_ae --markers marker_gene_stats.tsv --epoch 1000 \
32 | --nruns 1 --evalepochs 20 --outname gcnae \
33 | --concat_features --evalskip 200 --batchsize 256 --labels amber_ground_truth_species.tsv
34 |
35 | ## with GTDB
36 |
37 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
38 | --model_name vae --markers gtdb --epoch 500 \
39 | --nruns 1 --evalepochs 20 --outname vae_gtdb --batchsize 256 \
40 | --evalskip 200 --labels amber_ground_truth_species.tsv
41 | mv results/$dataset/vae_best_embs.pickle ../data/$dataset/
42 |
43 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
44 | --model_name sage_lstm --markers gtdb --epoch 500 \
45 | --nruns 1 --evalepochs 20 --outname sagelstm_gtdb --skip_preclustering \
46 | --features vae_best_embs.pickle --concat_features --evalskip 10 --labels amber_ground_truth_species.tsv
47 |
48 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
49 | --model_name gcn --markers gtdb --epoch 500 \
50 | --nruns 1 --evalepochs 20 --outname gcn_gtdb \
51 | --features vae_best_embs.pickle --concat_features --evalskip 10 --labels amber_ground_truth_species.tsv
52 |
53 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
54 | --model_name gcn_ae --markers marker_gene_stats.tsv --epoch 1000 \
55 | --nruns 1 --evalepochs 20 --outname gcnae_gtdb_nognn \
56 | --layers_gnn 0 --evalskip 200 --batchsize 256 --labels amber_ground_truth_species.tsv
57 |
58 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
59 | --model_name gcn_ae --markers gtdb --epoch 1000 \
60 | --nruns 1 --evalepochs 20 --outname gcnae_gtdb \
61 | --concat_features --evalskip 200 --batchsize 256 --labels amber_ground_truth_species.tsv
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/js/html5shiv-printshiv.min.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
3 | */
4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document);
--------------------------------------------------------------------------------
/benchmarks/runs.sh:
--------------------------------------------------------------------------------
1 | # VAE
2 |
3 | # multiple runs with filtered nodes (SCG and connected only)
4 | ############################################################
5 |
6 |
7 | dataset=$2
8 | # GCN on pre-trained AE features
9 | export CUDA_VISIBLE_DEVICES=$1
10 | quick=" --quick --nruns 5"
11 | #quick=""
12 | addname="_fixloss"
13 |
14 | #python src/graphmb/main.py --cuda --assembly ../data/$dataset --outdir results/$dataset --model_name vae \
15 | # --markers marker_gene_stats.tsv --batchsize 256 --epoch 500 --lr_vae 1e-3 \
16 | # --nruns 5 --evalepochs 20 --outname vae_baseline
17 |
18 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
19 | --model_name gcn --markers marker_gene_stats.tsv --epoch 500 \
20 | --evalepochs 20 --outname gcn_lr1e-4$addname --lr_gnn 1e-4 \
21 | --features vae_baseline_best_embs.pickle --concat_features --evalskip 200 $quick
22 |
23 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
24 | --model_name gcn --markers marker_gene_stats.tsv --epoch 500 \
25 | --evalepochs 20 --outname gcn_lr1e-3$addname --lr_gnn 1e-3 \
26 | --features vae_best_embs.pickle --concat_features --evalskip 200 $quick
27 |
28 | # VAE+GCN model (separate losses)
29 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
30 | --model_name gcn_ae --markers marker_gene_stats.tsv --epoch 500 \
31 | --evalepochs 20 --outname gcnae_lr1e-4$addname --lr_gnn 1e-4 \
32 | --batchsize 256 --rawfeatures --gnn_alpha 0.5 --scg_alpha 100 --concat_features \
33 | --evalskip 100 --skip_preclustering $quick
34 |
35 |
36 | # GVAE model, reconloss
37 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
38 | --model_name gcn_decode --markers marker_gene_stats.tsv --epoch 500 \
39 | --evalepochs 20 --outname gcndecode_lr1e-4$addname --lr_gnn 1e-4 --batchsize 256 \
40 | --rawfeatures --gnn_alpha 0.5 --scg_alpha 0 --evalskip 100 --skip_preclustering --layers_gnn 3 $quick
41 |
42 |
43 |
44 | # Using only top 10% of edges (separate losses)
45 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
46 | --model_name gcn_ae --markers marker_gene_stats.tsv --epoch 500 \
47 | --evalepochs 20 --outname gcnae_lr1e-4_binarize$addname --lr_gnn 1e-4 \
48 | --batchsize 256 --rawfeatures --gnn_alpha 0.5 --scg_alpha 100 --concat_features \
49 | --evalskip 100 --skip_preclustering --binarize $quick
50 |
51 |
52 | # VAE+GCN augmented graph
53 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
54 | --model_name gcn_aug --markers marker_gene_stats.tsv --epoch 500 \
55 | --evalepochs 20 --outname gcnaug_lr1e-4$addname --concat_features \
56 | --lr_gnn 1e-4 --rawfeatures --evalskip 100 $quick
57 |
58 |
59 | ### extra experiments
60 |
61 | ### no edges
62 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
63 | --model_name gcn --markers marker_gene_stats.tsv --epoch 500 \
64 | --evalepochs 20 --outname gcn_lr1e-3_noedges$addname --noedges --lr_gnn 1e-3 \
65 | --features vae_best_embs.pickle --concat_features --evalskip 200 $quick
66 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
67 | --model_name gcn_ae --markers marker_gene_stats.tsv --epoch 500 \
68 | --evalepochs 20 --outname gcnae_lr1e-4_noedges$addname --noedges --lr_gnn 1e-4 \
69 | --batchsize 256 --rawfeatures --gnn_alpha 0.5 --scg_alpha 100 --concat_features \
70 | --evalskip 100 --skip_preclustering $quick
71 | # GVAE model, reconloss
72 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
73 | --model_name gcn_decode --markers marker_gene_stats.tsv --epoch 500 \
74 | --evalepochs 20 --outname gcndecode_lr1e-4_noedges$addname --noedges --lr_gnn 1e-4 --batchsize 256 \
75 | --rawfeatures --gnn_alpha 0.5 --scg_alpha 0 --evalskip 100 --skip_preclustering --layers_gnn 3 $quick
76 | python src/graphmb/main.py --cuda --assembly ../data/$dataset/ --outdir results/$dataset/ \
77 | --model_name gcn_aug --markers marker_gene_stats.tsv --epoch 500 \
78 | --evalepochs 20 --outname gcnaug_lr1e-4_noedges$addname --noedges --concat_features \
79 | --lr_gnn 1e-4 --rawfeatures --evalskip 100 $quick
--------------------------------------------------------------------------------
/docs/source/_build/html/_static/js/theme.js:
--------------------------------------------------------------------------------
1 | !function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("
"),n("table.docutils.footnote").wrap(""),n("table.docutils.citation").wrap(""),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n(''),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}if(t.length>0){$(".wy-menu-vertical .current").removeClass("current").attr("aria-expanded","false"),t.addClass("current").attr("aria-expanded","true"),t.closest("li.toctree-l1").parent().addClass("current").attr("aria-expanded","true");for(let n=1;n<=10;n++)t.closest("li.toctree-l"+n).addClass("current").attr("aria-expanded","true");t[0].scrollIntoView()}}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current").attr("aria-expanded","false"),e.siblings().find("li.current").removeClass("current").attr("aria-expanded","false");var t=e.find("> ul li");t.length&&(t.removeClass("current").attr("aria-expanded","false"),e.toggleClass("current").attr("aria-expanded",(function(n,e){return"true"==e?"false":"true"})))}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t -o