├── .github └── workflows │ └── pypi_release.yml ├── .gitignore ├── ArchR_h5ad ├── __init__.py ├── _compose_adata │ ├── _add_obs_var.py │ ├── _cleanup_anndata.py │ ├── _compose_anndata.py │ ├── _dict_to_adata.py │ └── _write_h5ad.py ├── _main │ ├── _Arrow.py │ └── _read_arrow_to_adata.py ├── _parse_arrow │ ├── _add_ArchR_metadata.py │ ├── _add_matrix_parameters.py │ └── _read_arrow_chromosome.py └── _utility_functions │ └── _ordered_chromosomes.py ├── LICENSE ├── README.md └── setup.py /.github/workflows/pypi_release.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: pypi-release 5 | on: 6 | release: 7 | types: [published] 8 | 9 | jobs: 10 | deploy: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up Python 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: '3.x' 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install setuptools wheel twine 24 | - name: Build and publish 25 | env: 26 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 27 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 28 | run: | 29 | python setup.py sdist bdist_wheel 30 | twine upload dist/* 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /ArchR_h5ad/__init__.py: -------------------------------------------------------------------------------- 1 | # __init__.py 2 | 3 | from ._main._Arrow import _Arrow as Arrow 4 | from ._main._read_arrow_to_adata import _read_arrow_to_adata as read_arrow -------------------------------------------------------------------------------- /ArchR_h5ad/_compose_adata/_add_obs_var.py: -------------------------------------------------------------------------------- 1 | 2 | __module_name__ = "_add_obs_var.py" 3 | __author__ = ", ".join(["Michael E. Vinyard"]) 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",]) 5 | 6 | 7 | # import packages # 8 | # --------------- # 9 | import numpy as np 10 | import pandas as pd 11 | 12 | 13 | def _add_obs(metadata, str_col_keys=["CellNames"]): 14 | 15 | obs_dict = {} 16 | metadata_dict = {} 17 | 18 | for key, value in metadata.items(): 19 | if value is None: 20 | metadata_dict[key] = value 21 | elif value.shape[0] == 1: 22 | metadata_dict[key] = value[:][0].decode("utf-8") 23 | else: 24 | obs_dict[key] = value[:] 25 | 26 | obs_df = pd.DataFrame(obs_dict) 27 | 28 | for col in str_col_keys: 29 | if col in obs_df.columns: 30 | obs_df[col] = pd.Categorical(obs_df[col].str.decode("utf-8")) 31 | 32 | return obs_df, metadata_dict 33 | 34 | 35 | def _add_var(feature_df, str_col_keys=["seqnames", "name"]): 36 | 37 | """return Feature_DF as var""" 38 | 39 | var_df = pd.DataFrame(np.array(feature_df)) 40 | 41 | for col in str_col_keys: 42 | if col in var_df.columns: 43 | var_df[col] = pd.Categorical(var_df[col].str.decode("utf-8")) 44 | 45 | return var_df 46 | 47 | def _add_obs_var(adata, metadata, feature_df): 48 | 49 | adata.var = _add_var(feature_df) 50 | adata.obs, adata.uns['metadata_dict'] = _add_obs(metadata) 51 | 52 | return adata -------------------------------------------------------------------------------- /ArchR_h5ad/_compose_adata/_cleanup_anndata.py: -------------------------------------------------------------------------------- 1 | 2 | __module_name__ = "_cleanup_anndata.py" 3 | __author__ = ", ".join(["Michael E. Vinyard"]) 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",]) 5 | 6 | 7 | # import packages # 8 | # --------------- # 9 | import scipy.sparse 10 | 11 | 12 | format_dict = { 13 | "csc": scipy.sparse.csc_matrix, 14 | "csr": scipy.sparse.csr_matrix, 15 | } 16 | 17 | def _to_sparse_format(X, to="csr"): 18 | return format_dict[to](X) 19 | 20 | def _format_adata_indices(adata): 21 | 22 | adata.obs.index = adata.obs.index.astype(str) 23 | adata.var.index = adata.var.index.astype(str) 24 | 25 | return adata 26 | 27 | def _cleanup_anndata(adata): 28 | 29 | adata.X = _to_sparse_format(adata.X, to="csr") 30 | return _format_adata_indices(adata) -------------------------------------------------------------------------------- /ArchR_h5ad/_compose_adata/_compose_anndata.py: -------------------------------------------------------------------------------- 1 | 2 | __module_name__ = "_compose_anndata.py" 3 | __author__ = ", ".join(["Michael E. Vinyard"]) 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",]) 5 | 6 | 7 | 8 | 9 | # import local dependencies # 10 | # ------------------------- # 11 | from ._dict_to_adata import _dict_to_adata 12 | from ._add_obs_var import _add_obs_var 13 | from ._cleanup_anndata import _cleanup_anndata 14 | from ._write_h5ad import _write_h5ad 15 | 16 | 17 | def _compose_anndata(DataDict, 18 | metadata, 19 | feature_df, 20 | use_matrix, 21 | write_h5ad, 22 | outpath, 23 | silent, 24 | ): 25 | 26 | adata = _dict_to_adata(DataDict) 27 | adata = _add_obs_var(adata, metadata, feature_df) 28 | adata = _cleanup_anndata(adata) 29 | 30 | if not silent: 31 | print(adata) 32 | 33 | if write_h5ad: 34 | _write_h5ad(adata, use_matrix, outpath, silent) 35 | 36 | return adata -------------------------------------------------------------------------------- /ArchR_h5ad/_compose_adata/_dict_to_adata.py: -------------------------------------------------------------------------------- 1 | 2 | __module_name__ = "_dict_to_adata.py" 3 | __author__ = ", ".join(["Michael E. Vinyard"]) 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",]) 5 | 6 | 7 | # import packages # 8 | # --------------- # 9 | import anndata 10 | import scipy.sparse 11 | 12 | 13 | # import local dependencies # 14 | # --------------- # 15 | from .._utility_functions._ordered_chromosomes import _ordered_chromosomes 16 | 17 | 18 | def _dict_to_adata(DataDict): 19 | 20 | _ordered_matrices = [] 21 | for chrom in _ordered_chromosomes(): 22 | if chrom in DataDict: 23 | if DataDict[chrom] is not None: 24 | _ordered_matrices.append(DataDict[chrom]) 25 | 26 | X_ = scipy.sparse.hstack(_ordered_matrices) 27 | 28 | return anndata.AnnData(X_, dtype=X_.dtype) -------------------------------------------------------------------------------- /ArchR_h5ad/_compose_adata/_write_h5ad.py: -------------------------------------------------------------------------------- 1 | 2 | __module_name__ = "_h5ad_filename.py" 3 | __author__ = ", ".join(["Michael E. Vinyard"]) 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",]) 5 | 6 | 7 | # import packages # 8 | # --------------- # 9 | import licorice_font 10 | import os 11 | 12 | 13 | def _h5ad_filepath(adata, use_matrix, outpath): 14 | 15 | filename = "{}.{}.h5ad".format(adata.uns['metadata_dict']['Sample'], use_matrix) 16 | return os.path.join(outpath, filename) 17 | 18 | def _write_h5ad(adata, use_matrix, outpath, silent): 19 | 20 | h5ad_filepath = _h5ad_filepath(adata, use_matrix, outpath) 21 | if not silent: 22 | msg = licorice_font.font_format("Saving to", ["BOLD"]) 23 | print("\n{}: {}".format(msg, h5ad_filepath)) 24 | 25 | adata.write_h5ad(h5ad_filepath) -------------------------------------------------------------------------------- /ArchR_h5ad/_main/_Arrow.py: -------------------------------------------------------------------------------- 1 | 2 | __module_name__ = "_Arrow.py" 3 | __author__ = ", ".join(["Michael E. Vinyard"]) 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",]) 5 | 6 | 7 | # import packages # 8 | # --------------- # 9 | import h5py 10 | import licorice_font 11 | 12 | 13 | # import local dependencies # 14 | # ------------------------- # 15 | from .._parse_arrow._read_arrow_chromosome import _read_arrow_chromosome 16 | from .._parse_arrow._add_ArchR_metadata import _add_ArchR_metadata 17 | from .._parse_arrow._add_matrix_parameters import _add_matrix_parameters 18 | from .._compose_adata._compose_anndata import _compose_anndata 19 | 20 | 21 | class _Arrow: 22 | 23 | """Class for reading an Arrow File from .h5""" 24 | 25 | def __init__( 26 | self, 27 | path, 28 | matrices=["GeneScoreMatrix", "TileMatrix"], 29 | metadata_keys=["ArchRVersion", "Class"], 30 | silent=False, 31 | verbose=False 32 | ): 33 | 34 | self._path = path 35 | self._file = h5py.File(self._path) 36 | self._silent = silent 37 | self._verbose = verbose 38 | _add_ArchR_metadata(self, metadata_keys=metadata_keys) 39 | _add_matrix_parameters(self, matrices) 40 | 41 | def to_adata(self, use_matrix="GeneScoreMatrix", outpath="./", write_h5ad=True): 42 | 43 | 44 | self._use_matrix = use_matrix 45 | self._outpath = outpath 46 | 47 | if not self._silent: 48 | mtx = licorice_font.font_format(self._use_matrix, ["BOLD", "BLUE"]) 49 | print("Reading ArchR {} to AnnData".format(mtx)) 50 | 51 | self._DataDict = _read_arrow_chromosome(self._file, self._use_matrix, self._verbose) 52 | self._adata = _compose_anndata(DataDict=self._DataDict, 53 | metadata=self._file['Metadata'], 54 | feature_df=self._file[self._use_matrix]["Info"]["FeatureDF"], 55 | use_matrix=self._use_matrix, 56 | write_h5ad=write_h5ad, 57 | outpath=outpath, 58 | silent=self._silent, 59 | ) 60 | 61 | -------------------------------------------------------------------------------- /ArchR_h5ad/_main/_read_arrow_to_adata.py: -------------------------------------------------------------------------------- 1 | 2 | __module_name__ = "_read_ArchR_to_adata.py" 3 | __author__ = ", ".join(["Michael E. Vinyard"]) 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",]) 5 | 6 | 7 | # import local dependencies # 8 | # ------------------------- # 9 | from ._Arrow import _Arrow 10 | 11 | 12 | def _read_arrow_to_adata( 13 | path, 14 | matrices=["GeneScoreMatrix", "TileMatrix"], 15 | metadata_keys=["ArchRVersion", "Class"], 16 | use_matrix="GeneScoreMatrix", 17 | silent=False, 18 | write_h5ad=True, 19 | ): 20 | """ 21 | Read an ArchR ".arrow" file as AnnData (adata). 22 | 23 | Parameters: 24 | ----------- 25 | path 26 | path to ArchR.arrow file. 27 | type: str 28 | 29 | matrices 30 | Matrices saved in the ArchR.arrow file. 31 | default: ["GeneScoreMatrix", "TileMatrix"] 32 | type: list(str) 33 | 34 | metadata_keys 35 | Keys to high-level metadata saved by ArchR. 36 | default: ["ArchRVersion", "Class"] 37 | type: list(str) 38 | 39 | use_matrix 40 | Which matrix to use. Currently TileMatrix is not implemented. 41 | default: "GeneScoreMatrix", 42 | type: str 43 | 44 | silent 45 | If True, print extra messages. 46 | default: False 47 | type: bool 48 | 49 | write_h5ad 50 | default: True 51 | type: bool 52 | 53 | Returns: 54 | -------- 55 | adata 56 | anndata._core.anndata.AnnData 57 | 58 | Notes: 59 | ------ 60 | (1) 61 | """ 62 | 63 | arrow = _Arrow( 64 | path, matrices=matrices, metadata_keys=metadata_keys, silent=silent 65 | ) 66 | arrow.to_adata(use_matrix=use_matrix, write_h5ad=write_h5ad) 67 | 68 | return arrow._adata -------------------------------------------------------------------------------- /ArchR_h5ad/_parse_arrow/_add_ArchR_metadata.py: -------------------------------------------------------------------------------- 1 | 2 | __module_name__ = "_add_ArchR_metadata.py" 3 | __author__ = ", ".join(["Michael E. Vinyard"]) 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",]) 5 | 6 | 7 | def _add_ArchR_metadata(arrow, metadata_keys=["ArchRVersion", "Class"]): 8 | for key in metadata_keys: 9 | arrow.__setattr__("_{}".format(key), arrow._file[key][0].decode("utf-8")) -------------------------------------------------------------------------------- /ArchR_h5ad/_parse_arrow/_add_matrix_parameters.py: -------------------------------------------------------------------------------- 1 | 2 | __module_name__ = "_add_matrix_parameters.py" 3 | __author__ = ", ".join(["Michael E. Vinyard"]) 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",]) 5 | 6 | 7 | def _return_matrix_params(file, use_matrix): 8 | 9 | params = file[use_matrix]["Info"]["Params"][:][0] 10 | 11 | params_ = [] 12 | for val in params.tolist(): 13 | if type(val) is bytes: 14 | params_.append(val.decode("utf-8")) 15 | else: 16 | params_.append(val) 17 | 18 | return params_ 19 | 20 | def _add_matrix_parameters(arrow, matrices=["GeneScoreMatrix", "TileMatrix"]): 21 | 22 | file = arrow._file 23 | 24 | for matrix in matrices: 25 | if matrix in list(file.keys()): 26 | key_added = "_params_{}".format(matrix) 27 | arrow.__setattr__(key_added, _return_matrix_params(arrow._file, matrix)) -------------------------------------------------------------------------------- /ArchR_h5ad/_parse_arrow/_read_arrow_chromosome.py: -------------------------------------------------------------------------------- 1 | 2 | __module_name__ = "_read_arrow_chromosome.py" 3 | __author__ = ", ".join(["Michael E. Vinyard"]) 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",]) 5 | 6 | 7 | # import packages # 8 | # --------------- # 9 | import numpy as np 10 | import scipy.sparse 11 | from tqdm.notebook import tqdm 12 | 13 | 14 | # import local dependencies # 15 | # ------------------------- # 16 | from .._utility_functions._ordered_chromosomes import _ordered_chromosomes 17 | 18 | 19 | def _return_sum_chromosome_axis_sums(chromosome): 20 | 21 | colsums = np.array(chromosome["colSums"]).flatten().sum() 22 | rowsums = np.array(chromosome["rowSums"]).flatten().sum() 23 | 24 | return np.array([colsums, rowsums]).sum() 25 | 26 | def _return_zero_rows(colsums): 27 | return np.where(colsums == 0)[0] 28 | 29 | def _get_matrix_size(chromosome): 30 | 31 | """""" 32 | 33 | ncols = np.array(chromosome["colSums"]).flatten().shape[0] 34 | nrows = np.array(chromosome["rowSums"]).flatten().shape[0] 35 | 36 | return [ncols, nrows] 37 | 38 | def _initialize_empty_chromosome_data_matrix(chromosome): 39 | 40 | [ncols, nrows] = _get_matrix_size(chromosome) 41 | 42 | return np.zeros([ncols, nrows]) 43 | 44 | def _return_jLengths(chromosome): 45 | return np.append(0, np.array(chromosome["jLengths"]).flatten()).cumsum() 46 | 47 | def _fetch_chromosome_data_from_arrow_h5(chromosome, binary): 48 | 49 | colsums = np.array(chromosome["colSums"]).flatten() 50 | rowsums = np.array(chromosome["rowSums"]).flatten() 51 | 52 | axis_sums = _return_sum_chromosome_axis_sums(chromosome) 53 | zero_rows = _return_zero_rows(colsums) 54 | 55 | if axis_sums.sum() == 0: 56 | print("\tNo features / cells found in chromosome...") 57 | return None 58 | 59 | else: 60 | 61 | X_empty = _initialize_empty_chromosome_data_matrix(chromosome) 62 | j_lengths = _return_jLengths(chromosome) 63 | 64 | i = np.array(chromosome["i"]).flatten() 65 | 66 | if not binary: 67 | x = np.array(chromosome["x"]).flatten() 68 | else: 69 | x = np.ones(len(i)) 70 | 71 | row_adj = 0 72 | row_sums = [] 73 | 74 | for row in range(len(X_empty)): 75 | if not row in zero_rows: 76 | j_len_i = j_lengths[row_adj] 77 | if not row_adj == len(j_lengths): 78 | j_len_j = j_lengths[int(row_adj + 1)] 79 | else: 80 | j_len_j = j_len_i 81 | row_vals = x[j_len_i:j_len_j] 82 | row_sums.append(row_vals.sum()) 83 | idx = i[j_len_i:j_len_j] - 1 84 | row_adj += 1 85 | X_empty[row, idx] = row_vals 86 | 87 | return scipy.sparse.csr_matrix(X_empty) 88 | 89 | 90 | def _read_arrow_chromosome(h5_file, use_matrix="GeneScoreMatrix", verbose=False): 91 | 92 | chromosomes = list(h5_file[use_matrix].keys()) 93 | chromosomes.remove("Info") 94 | 95 | if use_matrix == "TileMatrix": 96 | binary = True 97 | else: 98 | binary = False 99 | 100 | DataDict = {} 101 | if verbose: 102 | print("Loading chromosomes from Arrow:") 103 | for chrom_key in tqdm(_ordered_chromosomes(), desc="Chromosomes"): 104 | if chrom_key in chromosomes: 105 | chromosome = h5_file[use_matrix][chrom_key] 106 | if verbose: 107 | print("- {}".format(chrom_key)) 108 | DataDict[chrom_key] = _fetch_chromosome_data_from_arrow_h5(chromosome, binary) 109 | else: 110 | print(" - Warning: {} not detected!".format(chrom_key)) 111 | return DataDict -------------------------------------------------------------------------------- /ArchR_h5ad/_utility_functions/_ordered_chromosomes.py: -------------------------------------------------------------------------------- 1 | 2 | __module_name__ = "_ordered_chromosomes.py" 3 | __author__ = ", ".join(["Michael E. Vinyard"]) 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",]) 5 | 6 | 7 | # import packages # 8 | # --------------- # 9 | import numpy as np 10 | 11 | 12 | def _ordered_chromosomes(): 13 | return ["chr{}".format(i) for i in np.append(np.arange(1, 23), "X")] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Michael Vinyard 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ArchR-h5ad 2 | 3 | [![PyPI pyversions](https://img.shields.io/pypi/pyversions/ArchR-h5ad.svg)](https://pypi.python.org/pypi/ArchR-h5ad/) 4 | [![PyPI version](https://badge.fury.io/py/ArchR-h5ad.svg)](https://badge.fury.io/py/ArchR-h5ad) 5 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 6 | 7 | A lightweight python package to parse **[`.arrow`]()** files produced by [**ArchR**](https://www.archrproject.com) - to [**AnnData**](https://anndata.readthedocs.io/en/stable/). 8 | 9 | ## Installation 10 | 11 | Install the last-released ([**`v0.0.12`**](https://pypi.org/project/ArchR-h5ad/0.0.12/)) distrubution from [PYPI](https://pypi.org/project/ArchR-h5ad/): 12 | 13 | ```BASH 14 | pip install ArchR_h5ad 15 | ``` 16 | 17 | Alternatively, clone the repo to install the development version, locally: 18 | 19 | ```BASH 20 | git clone https://github.com/mvinyard/ArchR-h5ad.git; cd ArchR_h5ad 21 | 22 | pip install -e . 23 | ``` 24 | 25 | ## A brief example 26 | 27 | As an example, we will use the data from the ArchR [hematopoiesis tutorial](https://www.archrproject.com/articles/Articles/tutorial.html#creating-an-archrproject-1). 28 | 29 | ### Option 1. Directly read a `.arrow` file to adata 30 | ```python 31 | import ArchR_h5ad 32 | 33 | arrow_path = "/home/user/data/scATAC_CD34_BMMC_R1.arrow" 34 | 35 | adata = ArchR_h5ad.read_arrow(arrow_path, use_matrix="GeneScoreMatrix") 36 | ``` 37 | adata-GeneScoreMatrix 38 | 39 | Alternatively, one may use the **`"TileMatrix"`** generated by ArchR. 40 | 41 | ```python 42 | adata = ArchR_h5ad.read_arrow(arrow_path, use_matrix="TileMatrix") 43 | ``` 44 | adata-TileMatrix 45 | 46 | 47 | ### Option 2. Instantiate the `Arrow` class. 48 | 49 | ```python 50 | arrow = ArchR_h5ad.Arrow(arrow_path) 51 | arrow.to_adata() 52 | ``` 53 | adata-GeneScoreMatrix-ArrowClass 54 | 55 | ```python 56 | import numpy as np 57 | 58 | np.array(arrow.__dir__())[ 59 | np.array([not i.startswith("__") for i in arrow.__dir__()]) 60 | ].tolist() 61 | ``` 62 | adata-GeneScoreMatrix-ArrowClass-attributes 63 | 64 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import re 3 | import os 4 | import sys 5 | 6 | 7 | setup( 8 | name="ArchR_h5ad", 9 | version="0.0.12", 10 | python_requires=">3.6.0", 11 | author="Michael E. Vinyard - Harvard University - Massachussetts General Hospital - Broad Institute of MIT and Harvard", 12 | author_email="mvinyard@broadinstitute.org", 13 | url="", 14 | long_description=open("README.md", encoding="utf-8").read(), 15 | long_description_content_type="text/markdown", 16 | description="ArchR_h5ad: Read .arrow files (from ArchR) to anndata.", 17 | packages=[ 18 | "ArchR_h5ad", 19 | "ArchR_h5ad._compose_adata", 20 | "ArchR_h5ad._main", 21 | "ArchR_h5ad._parse_arrow", 22 | "ArchR_h5ad._utility_functions", 23 | ], 24 | install_requires=[ 25 | "anndata>=0.7.8", 26 | "licorice_font>=0.0.3", 27 | "tqdm>=4.64.0", 28 | ], 29 | classifiers=[ 30 | "Development Status :: 2 - Pre-Alpha", 31 | "Programming Language :: Python :: 3.6", 32 | "Intended Audience :: Science/Research", 33 | "Topic :: Scientific/Engineering :: Bio-Informatics", 34 | ], 35 | license="MIT", 36 | ) 37 | --------------------------------------------------------------------------------