├── .github
    └── workflows
    │   └── pypi_release.yml
├── .gitignore
├── ArchR_h5ad
    ├── __init__.py
    ├── _compose_adata
    │   ├── _add_obs_var.py
    │   ├── _cleanup_anndata.py
    │   ├── _compose_anndata.py
    │   ├── _dict_to_adata.py
    │   └── _write_h5ad.py
    ├── _main
    │   ├── _Arrow.py
    │   └── _read_arrow_to_adata.py
    ├── _parse_arrow
    │   ├── _add_ArchR_metadata.py
    │   ├── _add_matrix_parameters.py
    │   └── _read_arrow_chromosome.py
    └── _utility_functions
    │   └── _ordered_chromosomes.py
├── LICENSE
├── README.md
└── setup.py


/.github/workflows/pypi_release.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: pypi-release
 5 | on:
 6 |   release:
 7 |     types: [published]
 8 | 
 9 | jobs:
10 |   deploy:
11 | 
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - name: Set up Python
17 |       uses: actions/setup-python@v2
18 |       with:
19 |         python-version: '3.x'
20 |     - name: Install dependencies
21 |       run: |
22 |         python -m pip install --upgrade pip
23 |         pip install setuptools wheel twine
24 |     - name: Build and publish
25 |       env:
26 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
27 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
28 |       run: |
29 |         python setup.py sdist bdist_wheel
30 |         twine upload dist/*
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/ArchR_h5ad/__init__.py:
--------------------------------------------------------------------------------
1 | # __init__.py
2 | 
3 | from ._main._Arrow import _Arrow as Arrow
4 | from ._main._read_arrow_to_adata import _read_arrow_to_adata as read_arrow


--------------------------------------------------------------------------------
/ArchR_h5ad/_compose_adata/_add_obs_var.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __module_name__ = "_add_obs_var.py"
 3 | __author__ = ", ".join(["Michael E. Vinyard"])
 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
 5 | 
 6 | 
 7 | # import packages #
 8 | # --------------- #
 9 | import numpy as np
10 | import pandas as pd
11 | 
12 |         
13 | def _add_obs(metadata, str_col_keys=["CellNames"]):
14 | 
15 |     obs_dict = {}
16 |     metadata_dict = {}
17 | 
18 |     for key, value in metadata.items():
19 |         if value is None:
20 |             metadata_dict[key] = value
21 |         elif value.shape[0] == 1:
22 |             metadata_dict[key] = value[:][0].decode("utf-8")
23 |         else:
24 |             obs_dict[key] = value[:]
25 | 
26 |     obs_df = pd.DataFrame(obs_dict)
27 | 
28 |     for col in str_col_keys:
29 |         if col in obs_df.columns:
30 |             obs_df[col] = pd.Categorical(obs_df[col].str.decode("utf-8"))
31 | 
32 |     return obs_df, metadata_dict
33 | 
34 | 
35 | def _add_var(feature_df, str_col_keys=["seqnames", "name"]):
36 | 
37 |     """return Feature_DF as var"""
38 | 
39 |     var_df = pd.DataFrame(np.array(feature_df))
40 | 
41 |     for col in str_col_keys:
42 |         if col in var_df.columns:
43 |             var_df[col] = pd.Categorical(var_df[col].str.decode("utf-8"))
44 | 
45 |     return var_df
46 |              
47 | def _add_obs_var(adata, metadata, feature_df):
48 |     
49 |     adata.var = _add_var(feature_df)
50 |     adata.obs, adata.uns['metadata_dict'] = _add_obs(metadata)
51 |     
52 |     return adata


--------------------------------------------------------------------------------
/ArchR_h5ad/_compose_adata/_cleanup_anndata.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __module_name__ = "_cleanup_anndata.py"
 3 | __author__ = ", ".join(["Michael E. Vinyard"])
 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
 5 | 
 6 | 
 7 | # import packages #
 8 | # --------------- #
 9 | import scipy.sparse
10 | 
11 | 
12 | format_dict = {
13 |     "csc": scipy.sparse.csc_matrix,
14 |     "csr": scipy.sparse.csr_matrix,
15 | }
16 | 
17 | def _to_sparse_format(X, to="csr"):
18 |     return format_dict[to](X)
19 | 
20 | def _format_adata_indices(adata):
21 |     
22 |     adata.obs.index = adata.obs.index.astype(str)
23 |     adata.var.index = adata.var.index.astype(str)
24 |     
25 |     return adata
26 | 
27 | def _cleanup_anndata(adata):
28 |     
29 |     adata.X = _to_sparse_format(adata.X, to="csr")
30 |     return _format_adata_indices(adata)


--------------------------------------------------------------------------------
/ArchR_h5ad/_compose_adata/_compose_anndata.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __module_name__ = "_compose_anndata.py"
 3 | __author__ = ", ".join(["Michael E. Vinyard"])
 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
 5 | 
 6 | 
 7 | 
 8 | 
 9 | # import local dependencies #
10 | # ------------------------- #
11 | from ._dict_to_adata import _dict_to_adata
12 | from ._add_obs_var import _add_obs_var
13 | from ._cleanup_anndata import _cleanup_anndata
14 | from ._write_h5ad import _write_h5ad
15 | 
16 | 
17 | def _compose_anndata(DataDict,
18 |                      metadata,
19 |                      feature_df,
20 |                      use_matrix,
21 |                      write_h5ad,
22 |                      outpath,
23 |                      silent,
24 |                     ):
25 |     
26 |     adata = _dict_to_adata(DataDict)
27 |     adata = _add_obs_var(adata, metadata, feature_df)
28 |     adata = _cleanup_anndata(adata)
29 |     
30 |     if not silent:
31 |         print(adata)
32 |         
33 |     if write_h5ad:
34 |         _write_h5ad(adata, use_matrix, outpath, silent)
35 |     
36 |     return adata


--------------------------------------------------------------------------------
/ArchR_h5ad/_compose_adata/_dict_to_adata.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __module_name__ = "_dict_to_adata.py"
 3 | __author__ = ", ".join(["Michael E. Vinyard"])
 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
 5 | 
 6 | 
 7 | # import packages #
 8 | # --------------- #
 9 | import anndata
10 | import scipy.sparse
11 | 
12 | 
13 | # import local dependencies #
14 | # --------------- #
15 | from .._utility_functions._ordered_chromosomes import _ordered_chromosomes
16 | 
17 | 
18 | def _dict_to_adata(DataDict):
19 | 
20 |     _ordered_matrices = []
21 |     for chrom in _ordered_chromosomes():
22 |         if chrom in DataDict:
23 |             if DataDict[chrom] is not None:
24 |                 _ordered_matrices.append(DataDict[chrom])
25 |             
26 |     X_ = scipy.sparse.hstack(_ordered_matrices)
27 |     
28 |     return anndata.AnnData(X_, dtype=X_.dtype)


--------------------------------------------------------------------------------
/ArchR_h5ad/_compose_adata/_write_h5ad.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __module_name__ = "_h5ad_filename.py"
 3 | __author__ = ", ".join(["Michael E. Vinyard"])
 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
 5 | 
 6 | 
 7 | # import packages #
 8 | # --------------- #
 9 | import licorice_font
10 | import os
11 | 
12 | 
13 | def _h5ad_filepath(adata, use_matrix, outpath):
14 |     
15 |     filename = "{}.{}.h5ad".format(adata.uns['metadata_dict']['Sample'], use_matrix)
16 |     return os.path.join(outpath, filename)
17 | 
18 | def _write_h5ad(adata, use_matrix, outpath, silent):
19 | 
20 |     h5ad_filepath = _h5ad_filepath(adata, use_matrix, outpath)
21 |     if not silent:
22 |         msg = licorice_font.font_format("Saving to", ["BOLD"])
23 |         print("\n{}: {}".format(msg, h5ad_filepath))
24 | 
25 |     adata.write_h5ad(h5ad_filepath)


--------------------------------------------------------------------------------
/ArchR_h5ad/_main/_Arrow.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __module_name__ = "_Arrow.py"
 3 | __author__ = ", ".join(["Michael E. Vinyard"])
 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
 5 | 
 6 | 
 7 | # import packages #
 8 | # --------------- #
 9 | import h5py
10 | import licorice_font
11 | 
12 | 
13 | # import local dependencies #
14 | # ------------------------- #
15 | from .._parse_arrow._read_arrow_chromosome import _read_arrow_chromosome
16 | from .._parse_arrow._add_ArchR_metadata import _add_ArchR_metadata
17 | from .._parse_arrow._add_matrix_parameters import _add_matrix_parameters
18 | from .._compose_adata._compose_anndata import _compose_anndata
19 | 
20 | 
21 | class _Arrow:
22 | 
23 |     """Class for reading an Arrow File from .h5"""
24 | 
25 |     def __init__(
26 |         self,
27 |         path,
28 |         matrices=["GeneScoreMatrix", "TileMatrix"],
29 |         metadata_keys=["ArchRVersion", "Class"],
30 |         silent=False,
31 |         verbose=False
32 |     ):
33 | 
34 |         self._path = path
35 |         self._file = h5py.File(self._path)
36 |         self._silent = silent
37 |         self._verbose = verbose
38 |         _add_ArchR_metadata(self, metadata_keys=metadata_keys)
39 |         _add_matrix_parameters(self, matrices)
40 | 
41 |     def to_adata(self, use_matrix="GeneScoreMatrix", outpath="./", write_h5ad=True):
42 |         
43 |         
44 |         self._use_matrix = use_matrix
45 |         self._outpath = outpath
46 |         
47 |         if not self._silent:
48 |             mtx = licorice_font.font_format(self._use_matrix, ["BOLD", "BLUE"])
49 |             print("Reading ArchR {} to AnnData".format(mtx))
50 |         
51 |         self._DataDict = _read_arrow_chromosome(self._file, self._use_matrix, self._verbose)
52 |         self._adata = _compose_anndata(DataDict=self._DataDict,
53 |                                        metadata=self._file['Metadata'],
54 |                                        feature_df=self._file[self._use_matrix]["Info"]["FeatureDF"],
55 |                                        use_matrix=self._use_matrix,
56 |                                        write_h5ad=write_h5ad,
57 |                                        outpath=outpath,
58 |                                        silent=self._silent,
59 |                                       )
60 |         
61 |         


--------------------------------------------------------------------------------
/ArchR_h5ad/_main/_read_arrow_to_adata.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __module_name__ = "_read_ArchR_to_adata.py"
 3 | __author__ = ", ".join(["Michael E. Vinyard"])
 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
 5 | 
 6 | 
 7 | # import local dependencies #
 8 | # ------------------------- #
 9 | from ._Arrow import _Arrow
10 | 
11 | 
12 | def _read_arrow_to_adata(
13 |     path,
14 |     matrices=["GeneScoreMatrix", "TileMatrix"],
15 |     metadata_keys=["ArchRVersion", "Class"],
16 |     use_matrix="GeneScoreMatrix",
17 |     silent=False,
18 |     write_h5ad=True,
19 | ):
20 |     """
21 |     Read an ArchR ".arrow" file as AnnData (adata).
22 |  
23 |     Parameters:
24 |     -----------
25 |     path
26 |         path to ArchR.arrow file.
27 |         type: str
28 | 
29 |     matrices
30 |         Matrices saved in the ArchR.arrow file.
31 |         default: ["GeneScoreMatrix", "TileMatrix"]
32 |         type: list(str)
33 | 
34 |     metadata_keys
35 |         Keys to high-level metadata saved by ArchR.
36 |         default: ["ArchRVersion", "Class"]
37 |         type: list(str)
38 | 
39 |     use_matrix
40 |         Which matrix to use. Currently TileMatrix is not implemented.
41 |         default: "GeneScoreMatrix",
42 |         type: str
43 | 
44 |     silent
45 |         If True, print extra messages.
46 |         default: False
47 |         type: bool
48 |         
49 |     write_h5ad
50 |         default: True
51 |         type: bool
52 |     
53 |     Returns:
54 |     --------
55 |     adata
56 |         anndata._core.anndata.AnnData
57 | 
58 |     Notes:
59 |     ------
60 |     (1)
61 |     """
62 | 
63 |     arrow = _Arrow(
64 |         path, matrices=matrices, metadata_keys=metadata_keys, silent=silent
65 |     )
66 |     arrow.to_adata(use_matrix=use_matrix, write_h5ad=write_h5ad)
67 | 
68 |     return arrow._adata


--------------------------------------------------------------------------------
/ArchR_h5ad/_parse_arrow/_add_ArchR_metadata.py:
--------------------------------------------------------------------------------
1 | 
2 | __module_name__ = "_add_ArchR_metadata.py"
3 | __author__ = ", ".join(["Michael E. Vinyard"])
4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
5 | 
6 | 
7 | def _add_ArchR_metadata(arrow, metadata_keys=["ArchRVersion", "Class"]):
8 |     for key in metadata_keys:
9 |         arrow.__setattr__("_{}".format(key), arrow._file[key][0].decode("utf-8"))


--------------------------------------------------------------------------------
/ArchR_h5ad/_parse_arrow/_add_matrix_parameters.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __module_name__ = "_add_matrix_parameters.py"
 3 | __author__ = ", ".join(["Michael E. Vinyard"])
 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
 5 | 
 6 | 
 7 | def _return_matrix_params(file, use_matrix):
 8 | 
 9 |     params = file[use_matrix]["Info"]["Params"][:][0]
10 | 
11 |     params_ = []
12 |     for val in params.tolist():
13 |         if type(val) is bytes:
14 |             params_.append(val.decode("utf-8"))
15 |         else:
16 |             params_.append(val)
17 | 
18 |     return params_
19 | 
20 | def _add_matrix_parameters(arrow, matrices=["GeneScoreMatrix", "TileMatrix"]):
21 | 
22 |     file = arrow._file
23 | 
24 |     for matrix in matrices:
25 |         if matrix in list(file.keys()):
26 |             key_added = "_params_{}".format(matrix)
27 |             arrow.__setattr__(key_added, _return_matrix_params(arrow._file, matrix))


--------------------------------------------------------------------------------
/ArchR_h5ad/_parse_arrow/_read_arrow_chromosome.py:
--------------------------------------------------------------------------------
  1 | 
  2 | __module_name__ = "_read_arrow_chromosome.py"
  3 | __author__ = ", ".join(["Michael E. Vinyard"])
  4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
  5 | 
  6 | 
  7 | # import packages #
  8 | # --------------- #
  9 | import numpy as np
 10 | import scipy.sparse
 11 | from tqdm.notebook import tqdm
 12 | 
 13 | 
 14 | # import local dependencies #
 15 | # ------------------------- #
 16 | from .._utility_functions._ordered_chromosomes import _ordered_chromosomes
 17 | 
 18 | 
 19 | def _return_sum_chromosome_axis_sums(chromosome):
 20 | 
 21 |     colsums = np.array(chromosome["colSums"]).flatten().sum()
 22 |     rowsums = np.array(chromosome["rowSums"]).flatten().sum()
 23 | 
 24 |     return np.array([colsums, rowsums]).sum()
 25 | 
 26 | def _return_zero_rows(colsums):
 27 |     return np.where(colsums == 0)[0]
 28 | 
 29 | def _get_matrix_size(chromosome):
 30 | 
 31 |     """"""
 32 | 
 33 |     ncols = np.array(chromosome["colSums"]).flatten().shape[0]
 34 |     nrows = np.array(chromosome["rowSums"]).flatten().shape[0]
 35 | 
 36 |     return [ncols, nrows]
 37 | 
 38 | def _initialize_empty_chromosome_data_matrix(chromosome):
 39 | 
 40 |     [ncols, nrows] = _get_matrix_size(chromosome)
 41 | 
 42 |     return np.zeros([ncols, nrows])
 43 | 
 44 | def _return_jLengths(chromosome):
 45 |     return np.append(0, np.array(chromosome["jLengths"]).flatten()).cumsum()
 46 | 
 47 | def _fetch_chromosome_data_from_arrow_h5(chromosome, binary):
 48 | 
 49 |     colsums = np.array(chromosome["colSums"]).flatten()
 50 |     rowsums = np.array(chromosome["rowSums"]).flatten()
 51 | 
 52 |     axis_sums = _return_sum_chromosome_axis_sums(chromosome)
 53 |     zero_rows = _return_zero_rows(colsums)
 54 | 
 55 |     if axis_sums.sum() == 0:
 56 |         print("\tNo features / cells found in chromosome...")
 57 |         return None
 58 | 
 59 |     else:
 60 | 
 61 |         X_empty = _initialize_empty_chromosome_data_matrix(chromosome)
 62 |         j_lengths = _return_jLengths(chromosome)
 63 |         
 64 |         i = np.array(chromosome["i"]).flatten()
 65 |         
 66 |         if not binary:
 67 |             x = np.array(chromosome["x"]).flatten()
 68 |         else:
 69 |             x = np.ones(len(i))
 70 | 
 71 |         row_adj = 0
 72 |         row_sums = []
 73 | 
 74 |         for row in range(len(X_empty)):
 75 |             if not row in zero_rows:
 76 |                 j_len_i = j_lengths[row_adj]
 77 |                 if not row_adj == len(j_lengths):
 78 |                     j_len_j = j_lengths[int(row_adj + 1)]
 79 |                 else:
 80 |                     j_len_j = j_len_i
 81 |                 row_vals = x[j_len_i:j_len_j]
 82 |                 row_sums.append(row_vals.sum())
 83 |                 idx = i[j_len_i:j_len_j] - 1
 84 |                 row_adj += 1
 85 |                 X_empty[row, idx] = row_vals
 86 | 
 87 |         return scipy.sparse.csr_matrix(X_empty)
 88 |     
 89 | 
 90 | def _read_arrow_chromosome(h5_file, use_matrix="GeneScoreMatrix", verbose=False):
 91 | 
 92 |     chromosomes = list(h5_file[use_matrix].keys())
 93 |     chromosomes.remove("Info")
 94 |     
 95 |     if use_matrix == "TileMatrix":
 96 |         binary = True
 97 |     else:
 98 |         binary = False
 99 | 
100 |     DataDict = {}
101 |     if verbose:
102 |         print("Loading chromosomes from Arrow:")
103 |     for chrom_key in tqdm(_ordered_chromosomes(), desc="Chromosomes"):
104 |         if chrom_key in chromosomes:
105 |             chromosome = h5_file[use_matrix][chrom_key]
106 |             if verbose:
107 |                 print("- {}".format(chrom_key))
108 |             DataDict[chrom_key] = _fetch_chromosome_data_from_arrow_h5(chromosome, binary)
109 |         else:
110 |             print(" - Warning: {} not detected!".format(chrom_key))
111 |     return DataDict


--------------------------------------------------------------------------------
/ArchR_h5ad/_utility_functions/_ordered_chromosomes.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __module_name__ = "_ordered_chromosomes.py"
 3 | __author__ = ", ".join(["Michael E. Vinyard"])
 4 | __email__ = ", ".join(["vinyard@g.harvard.edu",])
 5 | 
 6 | 
 7 | # import packages #
 8 | # --------------- #
 9 | import numpy as np
10 | 
11 | 
12 | def _ordered_chromosomes():
13 |     return ["chr{}".format(i) for i in np.append(np.arange(1, 23), "X")]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Michael Vinyard
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ArchR-h5ad
 2 | 
 3 | [![PyPI pyversions](https://img.shields.io/pypi/pyversions/ArchR-h5ad.svg)](https://pypi.python.org/pypi/ArchR-h5ad/)
 4 | [![PyPI version](https://badge.fury.io/py/ArchR-h5ad.svg)](https://badge.fury.io/py/ArchR-h5ad)
 5 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 6 | 
 7 | A lightweight python package to parse **[`.arrow`]()** files produced by [**ArchR**](https://www.archrproject.com) - to [**AnnData**](https://anndata.readthedocs.io/en/stable/).
 8 | 
 9 | ## Installation
10 | 
11 | Install the last-released ([**`v0.0.12`**](https://pypi.org/project/ArchR-h5ad/0.0.12/)) distrubution from [PYPI](https://pypi.org/project/ArchR-h5ad/):
12 | 
13 | ```BASH
14 | pip install ArchR_h5ad
15 | ```
16 | 
17 | Alternatively, clone the repo to install the development version, locally:
18 | 
19 | ```BASH
20 | git clone https://github.com/mvinyard/ArchR-h5ad.git; cd ArchR_h5ad
21 | 
22 | pip install -e .
23 | ```
24 | 
25 | ## A brief example
26 | 
27 | As an example, we will use the data from the ArchR [hematopoiesis tutorial](https://www.archrproject.com/articles/Articles/tutorial.html#creating-an-archrproject-1).
28 | 
29 | ### Option 1. Directly read a `.arrow` file to adata
30 | ```python
31 | import ArchR_h5ad
32 | 
33 | arrow_path = "/home/user/data/scATAC_CD34_BMMC_R1.arrow"
34 | 
35 | adata = ArchR_h5ad.read_arrow(arrow_path, use_matrix="GeneScoreMatrix")
36 | ```
37 | <img width="900" alt="adata-GeneScoreMatrix" src="https://user-images.githubusercontent.com/47393421/168927479-8e303e28-60c4-43ce-a7d0-c826304a4ca6.png">
38 | 
39 | Alternatively, one may use the **`"TileMatrix"`** generated by ArchR. 
40 | 
41 | ```python
42 | adata = ArchR_h5ad.read_arrow(arrow_path, use_matrix="TileMatrix")
43 | ```
44 | <img width="900" alt="adata-TileMatrix" src="https://user-images.githubusercontent.com/47393421/168928538-e909dd1b-d579-47a1-9dd6-0139ab6066fd.png">
45 | 
46 | 
47 | ### Option 2. Instantiate the `Arrow` class.
48 | 
49 | ```python
50 | arrow = ArchR_h5ad.Arrow(arrow_path)
51 | arrow.to_adata()
52 | ```
53 | <img width="900" alt="adata-GeneScoreMatrix-ArrowClass" src="https://user-images.githubusercontent.com/47393421/168928362-e351cb46-1d14-4aff-a9c8-cf84c1af07b5.png">
54 | 
55 | ```python
56 | import numpy as np
57 | 
58 | np.array(arrow.__dir__())[
59 |     np.array([not i.startswith("__") for i in arrow.__dir__()])
60 | ].tolist()
61 | ```
62 | <img width="900" alt="adata-GeneScoreMatrix-ArrowClass-attributes" src="https://user-images.githubusercontent.com/47393421/168928426-013057da-3bf7-4dd4-82dd-86be129a85ed.png">
63 | 
64 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | import re
 3 | import os
 4 | import sys
 5 | 
 6 | 
 7 | setup(
 8 |     name="ArchR_h5ad",
 9 |     version="0.0.12",
10 |     python_requires=">3.6.0",
11 |     author="Michael E. Vinyard - Harvard University - Massachussetts General Hospital - Broad Institute of MIT and Harvard",
12 |     author_email="mvinyard@broadinstitute.org",
13 |     url="",
14 |     long_description=open("README.md", encoding="utf-8").read(),
15 |     long_description_content_type="text/markdown",
16 |     description="ArchR_h5ad: Read .arrow files (from ArchR) to anndata.",
17 |     packages=[
18 |         "ArchR_h5ad",
19 |         "ArchR_h5ad._compose_adata",
20 |         "ArchR_h5ad._main",
21 |         "ArchR_h5ad._parse_arrow",
22 |         "ArchR_h5ad._utility_functions",
23 |     ],
24 |     install_requires=[
25 |         "anndata>=0.7.8",
26 |         "licorice_font>=0.0.3",
27 |         "tqdm>=4.64.0",
28 |     ],
29 |     classifiers=[
30 |         "Development Status :: 2 - Pre-Alpha",
31 |         "Programming Language :: Python :: 3.6",
32 |         "Intended Audience :: Science/Research",
33 |         "Topic :: Scientific/Engineering :: Bio-Informatics",
34 |     ],
35 |     license="MIT",
36 | )
37 | 


--------------------------------------------------------------------------------