├── .github
└── workflows
│ └── pypi_release.yml
├── .gitignore
├── ArchR_h5ad
├── __init__.py
├── _compose_adata
│ ├── _add_obs_var.py
│ ├── _cleanup_anndata.py
│ ├── _compose_anndata.py
│ ├── _dict_to_adata.py
│ └── _write_h5ad.py
├── _main
│ ├── _Arrow.py
│ └── _read_arrow_to_adata.py
├── _parse_arrow
│ ├── _add_ArchR_metadata.py
│ ├── _add_matrix_parameters.py
│ └── _read_arrow_chromosome.py
└── _utility_functions
│ └── _ordered_chromosomes.py
├── LICENSE
├── README.md
└── setup.py
/.github/workflows/pypi_release.yml:
--------------------------------------------------------------------------------
1 | # This workflow will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: pypi-release
5 | on:
6 | release:
7 | types: [published]
8 |
9 | jobs:
10 | deploy:
11 |
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - uses: actions/checkout@v2
16 | - name: Set up Python
17 | uses: actions/setup-python@v2
18 | with:
19 | python-version: '3.x'
20 | - name: Install dependencies
21 | run: |
22 | python -m pip install --upgrade pip
23 | pip install setuptools wheel twine
24 | - name: Build and publish
25 | env:
26 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
27 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
28 | run: |
29 | python setup.py sdist bdist_wheel
30 | twine upload dist/*
31 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/ArchR_h5ad/__init__.py:
--------------------------------------------------------------------------------
1 | # __init__.py
2 |
3 | from ._main._Arrow import _Arrow as Arrow
4 | from ._main._read_arrow_to_adata import _read_arrow_to_adata as read_arrow
--------------------------------------------------------------------------------
/ArchR_h5ad/_compose_adata/_add_obs_var.py:
--------------------------------------------------------------------------------
1 |
2 | __module_name__ = "_add_obs_var.py"
3 | __author__ = ", ".join(["Michael E. Vinyard"])
4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
5 |
6 |
7 | # import packages #
8 | # --------------- #
9 | import numpy as np
10 | import pandas as pd
11 |
12 |
13 | def _add_obs(metadata, str_col_keys=["CellNames"]):
14 |
15 | obs_dict = {}
16 | metadata_dict = {}
17 |
18 | for key, value in metadata.items():
19 | if value is None:
20 | metadata_dict[key] = value
21 | elif value.shape[0] == 1:
22 | metadata_dict[key] = value[:][0].decode("utf-8")
23 | else:
24 | obs_dict[key] = value[:]
25 |
26 | obs_df = pd.DataFrame(obs_dict)
27 |
28 | for col in str_col_keys:
29 | if col in obs_df.columns:
30 | obs_df[col] = pd.Categorical(obs_df[col].str.decode("utf-8"))
31 |
32 | return obs_df, metadata_dict
33 |
34 |
35 | def _add_var(feature_df, str_col_keys=["seqnames", "name"]):
36 |
37 | """return Feature_DF as var"""
38 |
39 | var_df = pd.DataFrame(np.array(feature_df))
40 |
41 | for col in str_col_keys:
42 | if col in var_df.columns:
43 | var_df[col] = pd.Categorical(var_df[col].str.decode("utf-8"))
44 |
45 | return var_df
46 |
47 | def _add_obs_var(adata, metadata, feature_df):
48 |
49 | adata.var = _add_var(feature_df)
50 | adata.obs, adata.uns['metadata_dict'] = _add_obs(metadata)
51 |
52 | return adata
--------------------------------------------------------------------------------
/ArchR_h5ad/_compose_adata/_cleanup_anndata.py:
--------------------------------------------------------------------------------
1 |
2 | __module_name__ = "_cleanup_anndata.py"
3 | __author__ = ", ".join(["Michael E. Vinyard"])
4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
5 |
6 |
7 | # import packages #
8 | # --------------- #
9 | import scipy.sparse
10 |
11 |
12 | format_dict = {
13 | "csc": scipy.sparse.csc_matrix,
14 | "csr": scipy.sparse.csr_matrix,
15 | }
16 |
17 | def _to_sparse_format(X, to="csr"):
18 | return format_dict[to](X)
19 |
20 | def _format_adata_indices(adata):
21 |
22 | adata.obs.index = adata.obs.index.astype(str)
23 | adata.var.index = adata.var.index.astype(str)
24 |
25 | return adata
26 |
27 | def _cleanup_anndata(adata):
28 |
29 | adata.X = _to_sparse_format(adata.X, to="csr")
30 | return _format_adata_indices(adata)
--------------------------------------------------------------------------------
/ArchR_h5ad/_compose_adata/_compose_anndata.py:
--------------------------------------------------------------------------------
1 |
2 | __module_name__ = "_compose_anndata.py"
3 | __author__ = ", ".join(["Michael E. Vinyard"])
4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
5 |
6 |
7 |
8 |
9 | # import local dependencies #
10 | # ------------------------- #
11 | from ._dict_to_adata import _dict_to_adata
12 | from ._add_obs_var import _add_obs_var
13 | from ._cleanup_anndata import _cleanup_anndata
14 | from ._write_h5ad import _write_h5ad
15 |
16 |
17 | def _compose_anndata(DataDict,
18 | metadata,
19 | feature_df,
20 | use_matrix,
21 | write_h5ad,
22 | outpath,
23 | silent,
24 | ):
25 |
26 | adata = _dict_to_adata(DataDict)
27 | adata = _add_obs_var(adata, metadata, feature_df)
28 | adata = _cleanup_anndata(adata)
29 |
30 | if not silent:
31 | print(adata)
32 |
33 | if write_h5ad:
34 | _write_h5ad(adata, use_matrix, outpath, silent)
35 |
36 | return adata
--------------------------------------------------------------------------------
/ArchR_h5ad/_compose_adata/_dict_to_adata.py:
--------------------------------------------------------------------------------
1 |
2 | __module_name__ = "_dict_to_adata.py"
3 | __author__ = ", ".join(["Michael E. Vinyard"])
4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
5 |
6 |
7 | # import packages #
8 | # --------------- #
9 | import anndata
10 | import scipy.sparse
11 |
12 |
13 | # import local dependencies #
14 | # --------------- #
15 | from .._utility_functions._ordered_chromosomes import _ordered_chromosomes
16 |
17 |
18 | def _dict_to_adata(DataDict):
19 |
20 | _ordered_matrices = []
21 | for chrom in _ordered_chromosomes():
22 | if chrom in DataDict:
23 | if DataDict[chrom] is not None:
24 | _ordered_matrices.append(DataDict[chrom])
25 |
26 | X_ = scipy.sparse.hstack(_ordered_matrices)
27 |
28 | return anndata.AnnData(X_, dtype=X_.dtype)
--------------------------------------------------------------------------------
/ArchR_h5ad/_compose_adata/_write_h5ad.py:
--------------------------------------------------------------------------------
1 |
2 | __module_name__ = "_h5ad_filename.py"
3 | __author__ = ", ".join(["Michael E. Vinyard"])
4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
5 |
6 |
7 | # import packages #
8 | # --------------- #
9 | import licorice_font
10 | import os
11 |
12 |
13 | def _h5ad_filepath(adata, use_matrix, outpath):
14 |
15 | filename = "{}.{}.h5ad".format(adata.uns['metadata_dict']['Sample'], use_matrix)
16 | return os.path.join(outpath, filename)
17 |
18 | def _write_h5ad(adata, use_matrix, outpath, silent):
19 |
20 | h5ad_filepath = _h5ad_filepath(adata, use_matrix, outpath)
21 | if not silent:
22 | msg = licorice_font.font_format("Saving to", ["BOLD"])
23 | print("\n{}: {}".format(msg, h5ad_filepath))
24 |
25 | adata.write_h5ad(h5ad_filepath)
--------------------------------------------------------------------------------
/ArchR_h5ad/_main/_Arrow.py:
--------------------------------------------------------------------------------
1 |
2 | __module_name__ = "_Arrow.py"
3 | __author__ = ", ".join(["Michael E. Vinyard"])
4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
5 |
6 |
7 | # import packages #
8 | # --------------- #
9 | import h5py
10 | import licorice_font
11 |
12 |
13 | # import local dependencies #
14 | # ------------------------- #
15 | from .._parse_arrow._read_arrow_chromosome import _read_arrow_chromosome
16 | from .._parse_arrow._add_ArchR_metadata import _add_ArchR_metadata
17 | from .._parse_arrow._add_matrix_parameters import _add_matrix_parameters
18 | from .._compose_adata._compose_anndata import _compose_anndata
19 |
20 |
21 | class _Arrow:
22 |
23 | """Class for reading an Arrow File from .h5"""
24 |
25 | def __init__(
26 | self,
27 | path,
28 | matrices=["GeneScoreMatrix", "TileMatrix"],
29 | metadata_keys=["ArchRVersion", "Class"],
30 | silent=False,
31 | verbose=False
32 | ):
33 |
34 | self._path = path
35 | self._file = h5py.File(self._path)
36 | self._silent = silent
37 | self._verbose = verbose
38 | _add_ArchR_metadata(self, metadata_keys=metadata_keys)
39 | _add_matrix_parameters(self, matrices)
40 |
41 | def to_adata(self, use_matrix="GeneScoreMatrix", outpath="./", write_h5ad=True):
42 |
43 |
44 | self._use_matrix = use_matrix
45 | self._outpath = outpath
46 |
47 | if not self._silent:
48 | mtx = licorice_font.font_format(self._use_matrix, ["BOLD", "BLUE"])
49 | print("Reading ArchR {} to AnnData".format(mtx))
50 |
51 | self._DataDict = _read_arrow_chromosome(self._file, self._use_matrix, self._verbose)
52 | self._adata = _compose_anndata(DataDict=self._DataDict,
53 | metadata=self._file['Metadata'],
54 | feature_df=self._file[self._use_matrix]["Info"]["FeatureDF"],
55 | use_matrix=self._use_matrix,
56 | write_h5ad=write_h5ad,
57 | outpath=outpath,
58 | silent=self._silent,
59 | )
60 |
61 |
--------------------------------------------------------------------------------
/ArchR_h5ad/_main/_read_arrow_to_adata.py:
--------------------------------------------------------------------------------
1 |
2 | __module_name__ = "_read_ArchR_to_adata.py"
3 | __author__ = ", ".join(["Michael E. Vinyard"])
4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
5 |
6 |
7 | # import local dependencies #
8 | # ------------------------- #
9 | from ._Arrow import _Arrow
10 |
11 |
12 | def _read_arrow_to_adata(
13 | path,
14 | matrices=["GeneScoreMatrix", "TileMatrix"],
15 | metadata_keys=["ArchRVersion", "Class"],
16 | use_matrix="GeneScoreMatrix",
17 | silent=False,
18 | write_h5ad=True,
19 | ):
20 | """
21 | Read an ArchR ".arrow" file as AnnData (adata).
22 |
23 | Parameters:
24 | -----------
25 | path
26 | path to ArchR.arrow file.
27 | type: str
28 |
29 | matrices
30 | Matrices saved in the ArchR.arrow file.
31 | default: ["GeneScoreMatrix", "TileMatrix"]
32 | type: list(str)
33 |
34 | metadata_keys
35 | Keys to high-level metadata saved by ArchR.
36 | default: ["ArchRVersion", "Class"]
37 | type: list(str)
38 |
39 | use_matrix
40 | Which matrix to use. Currently TileMatrix is not implemented.
41 | default: "GeneScoreMatrix",
42 | type: str
43 |
44 | silent
45 | If True, print extra messages.
46 | default: False
47 | type: bool
48 |
49 | write_h5ad
50 | default: True
51 | type: bool
52 |
53 | Returns:
54 | --------
55 | adata
56 | anndata._core.anndata.AnnData
57 |
58 | Notes:
59 | ------
60 | (1)
61 | """
62 |
63 | arrow = _Arrow(
64 | path, matrices=matrices, metadata_keys=metadata_keys, silent=silent
65 | )
66 | arrow.to_adata(use_matrix=use_matrix, write_h5ad=write_h5ad)
67 |
68 | return arrow._adata
--------------------------------------------------------------------------------
/ArchR_h5ad/_parse_arrow/_add_ArchR_metadata.py:
--------------------------------------------------------------------------------
1 |
2 | __module_name__ = "_add_ArchR_metadata.py"
3 | __author__ = ", ".join(["Michael E. Vinyard"])
4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
5 |
6 |
7 | def _add_ArchR_metadata(arrow, metadata_keys=["ArchRVersion", "Class"]):
8 | for key in metadata_keys:
9 | arrow.__setattr__("_{}".format(key), arrow._file[key][0].decode("utf-8"))
--------------------------------------------------------------------------------
/ArchR_h5ad/_parse_arrow/_add_matrix_parameters.py:
--------------------------------------------------------------------------------
1 |
2 | __module_name__ = "_add_matrix_parameters.py"
3 | __author__ = ", ".join(["Michael E. Vinyard"])
4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
5 |
6 |
7 | def _return_matrix_params(file, use_matrix):
8 |
9 | params = file[use_matrix]["Info"]["Params"][:][0]
10 |
11 | params_ = []
12 | for val in params.tolist():
13 | if type(val) is bytes:
14 | params_.append(val.decode("utf-8"))
15 | else:
16 | params_.append(val)
17 |
18 | return params_
19 |
20 | def _add_matrix_parameters(arrow, matrices=["GeneScoreMatrix", "TileMatrix"]):
21 |
22 | file = arrow._file
23 |
24 | for matrix in matrices:
25 | if matrix in list(file.keys()):
26 | key_added = "_params_{}".format(matrix)
27 | arrow.__setattr__(key_added, _return_matrix_params(arrow._file, matrix))
--------------------------------------------------------------------------------
/ArchR_h5ad/_parse_arrow/_read_arrow_chromosome.py:
--------------------------------------------------------------------------------
1 |
2 | __module_name__ = "_read_arrow_chromosome.py"
3 | __author__ = ", ".join(["Michael E. Vinyard"])
4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
5 |
6 |
7 | # import packages #
8 | # --------------- #
9 | import numpy as np
10 | import scipy.sparse
11 | from tqdm.notebook import tqdm
12 |
13 |
14 | # import local dependencies #
15 | # ------------------------- #
16 | from .._utility_functions._ordered_chromosomes import _ordered_chromosomes
17 |
18 |
19 | def _return_sum_chromosome_axis_sums(chromosome):
20 |
21 | colsums = np.array(chromosome["colSums"]).flatten().sum()
22 | rowsums = np.array(chromosome["rowSums"]).flatten().sum()
23 |
24 | return np.array([colsums, rowsums]).sum()
25 |
26 | def _return_zero_rows(colsums):
27 | return np.where(colsums == 0)[0]
28 |
29 | def _get_matrix_size(chromosome):
30 |
31 | """"""
32 |
33 | ncols = np.array(chromosome["colSums"]).flatten().shape[0]
34 | nrows = np.array(chromosome["rowSums"]).flatten().shape[0]
35 |
36 | return [ncols, nrows]
37 |
38 | def _initialize_empty_chromosome_data_matrix(chromosome):
39 |
40 | [ncols, nrows] = _get_matrix_size(chromosome)
41 |
42 | return np.zeros([ncols, nrows])
43 |
44 | def _return_jLengths(chromosome):
45 | return np.append(0, np.array(chromosome["jLengths"]).flatten()).cumsum()
46 |
47 | def _fetch_chromosome_data_from_arrow_h5(chromosome, binary):
48 |
49 | colsums = np.array(chromosome["colSums"]).flatten()
50 | rowsums = np.array(chromosome["rowSums"]).flatten()
51 |
52 | axis_sums = _return_sum_chromosome_axis_sums(chromosome)
53 | zero_rows = _return_zero_rows(colsums)
54 |
55 | if axis_sums.sum() == 0:
56 | print("\tNo features / cells found in chromosome...")
57 | return None
58 |
59 | else:
60 |
61 | X_empty = _initialize_empty_chromosome_data_matrix(chromosome)
62 | j_lengths = _return_jLengths(chromosome)
63 |
64 | i = np.array(chromosome["i"]).flatten()
65 |
66 | if not binary:
67 | x = np.array(chromosome["x"]).flatten()
68 | else:
69 | x = np.ones(len(i))
70 |
71 | row_adj = 0
72 | row_sums = []
73 |
74 | for row in range(len(X_empty)):
75 | if not row in zero_rows:
76 | j_len_i = j_lengths[row_adj]
77 | if not row_adj == len(j_lengths):
78 | j_len_j = j_lengths[int(row_adj + 1)]
79 | else:
80 | j_len_j = j_len_i
81 | row_vals = x[j_len_i:j_len_j]
82 | row_sums.append(row_vals.sum())
83 | idx = i[j_len_i:j_len_j] - 1
84 | row_adj += 1
85 | X_empty[row, idx] = row_vals
86 |
87 | return scipy.sparse.csr_matrix(X_empty)
88 |
89 |
90 | def _read_arrow_chromosome(h5_file, use_matrix="GeneScoreMatrix", verbose=False):
91 |
92 | chromosomes = list(h5_file[use_matrix].keys())
93 | chromosomes.remove("Info")
94 |
95 | if use_matrix == "TileMatrix":
96 | binary = True
97 | else:
98 | binary = False
99 |
100 | DataDict = {}
101 | if verbose:
102 | print("Loading chromosomes from Arrow:")
103 | for chrom_key in tqdm(_ordered_chromosomes(), desc="Chromosomes"):
104 | if chrom_key in chromosomes:
105 | chromosome = h5_file[use_matrix][chrom_key]
106 | if verbose:
107 | print("- {}".format(chrom_key))
108 | DataDict[chrom_key] = _fetch_chromosome_data_from_arrow_h5(chromosome, binary)
109 | else:
110 | print(" - Warning: {} not detected!".format(chrom_key))
111 | return DataDict
--------------------------------------------------------------------------------
/ArchR_h5ad/_utility_functions/_ordered_chromosomes.py:
--------------------------------------------------------------------------------
1 |
2 | __module_name__ = "_ordered_chromosomes.py"
3 | __author__ = ", ".join(["Michael E. Vinyard"])
4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
5 |
6 |
7 | # import packages #
8 | # --------------- #
9 | import numpy as np
10 |
11 |
12 | def _ordered_chromosomes():
13 | return ["chr{}".format(i) for i in np.append(np.arange(1, 23), "X")]
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Michael Vinyard
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ArchR-h5ad
2 |
3 | [](https://pypi.python.org/pypi/ArchR-h5ad/)
4 | [](https://badge.fury.io/py/ArchR-h5ad)
5 | [](https://github.com/psf/black)
6 |
7 | A lightweight python package to parse **[`.arrow`]()** files produced by [**ArchR**](https://www.archrproject.com) - to [**AnnData**](https://anndata.readthedocs.io/en/stable/).
8 |
9 | ## Installation
10 |
11 | Install the last-released ([**`v0.0.12`**](https://pypi.org/project/ArchR-h5ad/0.0.12/)) distrubution from [PYPI](https://pypi.org/project/ArchR-h5ad/):
12 |
13 | ```BASH
14 | pip install ArchR_h5ad
15 | ```
16 |
17 | Alternatively, clone the repo to install the development version, locally:
18 |
19 | ```BASH
20 | git clone https://github.com/mvinyard/ArchR-h5ad.git; cd ArchR_h5ad
21 |
22 | pip install -e .
23 | ```
24 |
25 | ## A brief example
26 |
27 | As an example, we will use the data from the ArchR [hematopoiesis tutorial](https://www.archrproject.com/articles/Articles/tutorial.html#creating-an-archrproject-1).
28 |
29 | ### Option 1. Directly read a `.arrow` file to adata
30 | ```python
31 | import ArchR_h5ad
32 |
33 | arrow_path = "/home/user/data/scATAC_CD34_BMMC_R1.arrow"
34 |
35 | adata = ArchR_h5ad.read_arrow(arrow_path, use_matrix="GeneScoreMatrix")
36 | ```
37 |
38 |
39 | Alternatively, one may use the **`"TileMatrix"`** generated by ArchR.
40 |
41 | ```python
42 | adata = ArchR_h5ad.read_arrow(arrow_path, use_matrix="TileMatrix")
43 | ```
44 |
45 |
46 |
47 | ### Option 2. Instantiate the `Arrow` class.
48 |
49 | ```python
50 | arrow = ArchR_h5ad.Arrow(arrow_path)
51 | arrow.to_adata()
52 | ```
53 |
54 |
55 | ```python
56 | import numpy as np
57 |
58 | np.array(arrow.__dir__())[
59 | np.array([not i.startswith("__") for i in arrow.__dir__()])
60 | ].tolist()
61 | ```
62 |
63 |
64 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | import re
3 | import os
4 | import sys
5 |
6 |
7 | setup(
8 | name="ArchR_h5ad",
9 | version="0.0.12",
10 | python_requires=">3.6.0",
11 | author="Michael E. Vinyard - Harvard University - Massachussetts General Hospital - Broad Institute of MIT and Harvard",
12 | author_email="mvinyard@broadinstitute.org",
13 | url="",
14 | long_description=open("README.md", encoding="utf-8").read(),
15 | long_description_content_type="text/markdown",
16 | description="ArchR_h5ad: Read .arrow files (from ArchR) to anndata.",
17 | packages=[
18 | "ArchR_h5ad",
19 | "ArchR_h5ad._compose_adata",
20 | "ArchR_h5ad._main",
21 | "ArchR_h5ad._parse_arrow",
22 | "ArchR_h5ad._utility_functions",
23 | ],
24 | install_requires=[
25 | "anndata>=0.7.8",
26 | "licorice_font>=0.0.3",
27 | "tqdm>=4.64.0",
28 | ],
29 | classifiers=[
30 | "Development Status :: 2 - Pre-Alpha",
31 | "Programming Language :: Python :: 3.6",
32 | "Intended Audience :: Science/Research",
33 | "Topic :: Scientific/Engineering :: Bio-Informatics",
34 | ],
35 | license="MIT",
36 | )
37 |
--------------------------------------------------------------------------------