├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── atlas ├── __init__.py ├── __main__.py ├── apple_health.py └── cli.py ├── examples └── apple-health-exploration-clickhouse-chdb-altair-quarto │ └── index.qmd ├── pyproject.toml └── tests └── test_expanse.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | test: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | cache: pip 23 | cache-dependency-path: pyproject.toml 24 | - name: Install dependencies 25 | run: | 26 | pip install '.[test]' 27 | - name: Run tests 28 | run: | 29 | pytest 30 | deploy: 31 | runs-on: ubuntu-latest 32 | needs: [test] 33 | environment: release 34 | permissions: 35 | id-token: write 36 | steps: 37 | - uses: actions/checkout@v4 38 | - name: Set up Python 39 | uses: actions/setup-python@v5 40 | with: 41 | python-version: "3.12" 42 | cache: pip 43 | cache-dependency-path: pyproject.toml 44 | - name: Install dependencies 45 | run: | 46 | pip install setuptools wheel build 47 | - name: Build 48 | run: | 49 | python -m build 50 | - name: Publish 51 | uses: pypa/gh-action-pypi-publish@release/v1 52 | 53 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | permissions: 6 | contents: read 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | cache: pip 21 | cache-dependency-path: pyproject.toml 22 | - name: Install dependencies 23 | run: | 24 | pip install '.[test]' 25 | - name: Run tests 26 | run: | 27 | [ -d tests ] && pytest || echo "Tests directory not found" 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | venv 6 | .eggs 7 | .pytest_cache 8 | *.egg-info 9 | .DS_Store 10 | dist 11 | build 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |
3 | Atlas 6 |
7 | 8 | # Atlas 9 | 10 | Atlas lets you explore your Apple Health data. 11 | 12 | --- 13 | 14 | [![PyPI](https://img.shields.io/pypi/v/atlas-db.svg)](https://pypi.org/project/atlas-db/) 15 | [![Tests](https://github.com/atlaslib/atlas/actions/workflows/test.yml/badge.svg)](https://github.com/atlaslib/atlas/actions/workflows/test.yml) 16 | [![Changelog](https://img.shields.io/github/v/release/atlaslib/atlas?include_prereleases&label=changelog)](https://github.com/atlaslib/atlas/releases) 17 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/atlaslib/atlas/blob/main/LICENSE) 18 | 19 | ## Installation 20 | 21 | Install Atlas using `pip`: 22 | ```bash 23 | pip install atlas-db 24 | ``` 25 | 26 | Upgrade Atlas using `pip`: 27 | ```bash 28 | pip install atlas-db --upgrade 29 | ``` 30 | 31 | Uninstall Expanse (old name) using `pipx`: 32 | ```bash 33 | pipx uninstall expanse 34 | ``` 35 | 36 | ## Explore 37 | 38 | > [!NOTE] 39 | > Here is a [Quarto notebook](https://github.com/atlaslib/atlas/blob/main/examples/apple-health-exploration-clickhouse-chdb-altair-quarto/index.qmd) with example code and SQL queries. 40 | > 41 | > The notebook uses Vega-Altair and Clickhouse (chDB) to explore Apple Health time series data in a .parquet file. The .parquet file was generated by Atlas from an Apple Health export.xml file. 42 | > ![vega-altair charts](https://github.com/atlaslib/atlas/assets/14825/96b63ae1-9bc9-4a01-aa7b-04ef4540d6eb) 43 | 44 | First we create the `.parquet` file from the `export.xml` file. 45 | 46 | ```bash 47 | atlas parquet export.xml -o ah.parquet 48 | ``` 49 | 50 | We can explore the data in many ways. 51 | 52 | It is just a table/dataframe/parquet file with 5 columns. 53 | 54 | But here we'll use `clickhouse local`: 55 | 56 | ```bash 57 | clickhouse local 58 | ``` 59 | 60 | Let's take a look at the table. 61 | 62 | ```sql 63 | DESCRIBE TABLE `ah.parquet` 64 | ``` 65 | 66 | ``` 67 | ┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ 68 | │ type │ Nullable(String) │ │ │ │ │ │ 69 | │ start │ Nullable(DateTime64(6)) │ │ │ │ │ │ 70 | │ end │ Nullable(DateTime64(6)) │ │ │ │ │ │ 71 | │ created │ Nullable(DateTime64(6)) │ │ │ │ │ │ 72 | │ value │ Nullable(String) │ │ │ │ │ │ 73 | └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ 74 | ``` 75 | 76 | What kind of "types" do we have and how many? 77 | 78 | ```sql 79 | SELECT 80 | type, 81 | COUNT(*) AS count 82 | FROM `ah.parquet` 83 | GROUP BY type 84 | ORDER BY count DESC 85 | ``` 86 | 87 | ``` 88 | ┌─type───────────────────────────┬──count─┐ 89 | │ ActiveEnergyBurned │ 879902 │ 90 | │ HeartRate │ 451854 │ 91 | │ BasalEnergyBurned │ 289031 │ 92 | │ DistanceWalkingRunning │ 260500 │ 93 | │ StepCount │ 217384 │ 94 | │ PhysicalEffort │ 69747 │ 95 | │ AppleExerciseTime │ 61363 │ 96 | │ AppleStandTime │ 58309 │ 97 | │ EnvironmentalAudioExposure │ 44535 │ 98 | │ SleepAnalysis │ 36599 │ 99 | │ WalkingStepLength │ 28281 │ 100 | │ WalkingSpeed │ 28281 │ 101 | │ RespiratoryRate │ 27829 │ 102 | │ AppleStandHour │ 25877 │ 103 | │ FlightsClimbed │ 22690 │ 104 | │ WalkingDoubleSupportPercentage │ 21900 │ 105 | │ WalkingAsymmetryPercentage │ 13820 │ 106 | │ HeartRateVariabilitySDNN │ 11961 │ 107 | │ OxygenSaturation │ 4912 │ 108 | │ StairDescentSpeed │ 4718 │ 109 | │ StairAscentSpeed │ 4249 │ 110 | │ DistanceCycling │ 2890 │ 111 | │ TimeInDaylight │ 2403 │ 112 | │ HeadphoneAudioExposure │ 2323 │ 113 | │ RestingHeartRate │ 1399 │ 114 | │ WalkingHeartRateAverage │ 1176 │ 115 | │ DistanceSwimming │ 455 │ 116 | │ SwimmingStrokeCount │ 455 │ 117 | │ AppleSleepingWristTemperature │ 442 │ 118 | │ RunningSpeed │ 391 │ 119 | │ VO2Max │ 366 │ 120 | │ RunningPower │ 173 │ 121 | │ DietaryCaffeine │ 171 │ 122 | │ AppleWalkingSteadiness │ 138 │ 123 | │ SixMinuteWalkTestDistance │ 122 │ 124 | │ HeartRateRecoveryOneMinute │ 76 │ 125 | │ RunningVerticalOscillation │ 74 │ 126 | │ RunningGroundContactTime │ 67 │ 127 | │ RunningStrideLength │ 54 │ 128 | │ MindfulSession │ 34 │ 129 | │ HighHeartRateEvent │ 18 │ 130 | │ AudioExposureEvent │ 14 │ 131 | │ BodyMass │ 14 │ 132 | │ Height │ 5 │ 133 | │ Fatigue │ 1 │ 134 | │ HKDataTypeSleepDurationGoal │ 1 │ 135 | └────────────────────────────────┴────────┘ 136 | ``` 137 | 138 | What's our total step count? 139 | 140 | > [!NOTE] 141 | > The `value` column is type `Nullable(String)` so we have to cast `toFloat64` to sum up the step values. 142 | 143 | ```sql 144 | SELECT sum(toFloat64(value)) 145 | FROM `ah.parquet` 146 | WHERE type = 'StepCount' 147 | ``` 148 | 149 | ``` 150 | ┌─sum(toFloat64(value))─┐ 151 | │ 30295811 │ 152 | └───────────────────────┘ 153 | ``` 154 | 155 | 30.295.811 (30.29 million) steps. That's a lot of steps! 156 | 157 | ## How to get the Apple Health export.xml file 158 | 159 | ![group-figma-small](https://github.com/atlaslib/atlas/assets/14825/e48971a3-bc13-4496-8fe2-5dcd292c9019) 160 | 161 | - open the Apple **Health** app on iOS 162 | - tap on your **profile picture** (or initials) at the top right 163 | - tap on **Export All Health Data** 164 | - tap on **Export** 165 | - **wait** a few seconds to a few minutes (~3min for 10 years of data) 166 | - **get the export.zip** archive via Airdrop to a Mac (or save to Files) 167 | 168 | > [!NOTE] 169 | > The **export.xml** file is **in** the **export.zip** archive. 170 | 171 | You can expand the **export.zip** file by double-clicking on it. 172 | 173 | This creates a directory named **apple_health_export** and in it is the **export.xml** file. 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | See: [Apple Support on how to export Apple Health and Fitness in XML format](https://support.apple.com/en-gb/guide/iphone/iph5ede58c3d/ios#:~:text=Share%20your%20health%20and%20fitness%20data%20in%20XML%20format) 185 | 186 | ## Usage 187 | 188 | `atlas parquet export.xml` 189 | 190 | ## Features 191 | 192 | - turn export.xml into a simple parquet file 193 | 194 | -------------------------------------------------------------------------------- /atlas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atlaslib/atlas/88d511df96be52c21ddaa7e6c4220d2733d6f1b7/atlas/__init__.py -------------------------------------------------------------------------------- /atlas/__main__.py: -------------------------------------------------------------------------------- 1 | from .cli import cli 2 | 3 | if __name__ == "__main__": 4 | cli() 5 | -------------------------------------------------------------------------------- /atlas/apple_health.py: -------------------------------------------------------------------------------- 1 | import os 2 | import polars as pl 3 | import lxml.etree as et 4 | import sys 5 | import time 6 | 7 | 8 | def to_df(file_path): 9 | start_time = time.time() 10 | last_time = time.time() 11 | 12 | print("transform apple health export.xml to parquet") 13 | print("- parsing xml", end="") 14 | sys.stdout.flush() 15 | 16 | attributes = [] 17 | 18 | for event, elem in et.iterparse(file_path, tag="Record"): 19 | attributes.append( 20 | { 21 | attr: elem.get(attr) 22 | for attr in ["value", "type", "startDate", "endDate", "creationDate"] 23 | if elem.get(attr) is not None 24 | } 25 | ) 26 | elem.clear() 27 | 28 | last_time = time.time() 29 | elapsed_time = last_time - start_time 30 | print(f" ({elapsed_time} s)") 31 | print("- loading df", end="") 32 | sys.stdout.flush() 33 | 34 | df = pl.DataFrame(attributes) 35 | 36 | last_time = time.time() 37 | elapsed_time = last_time - start_time 38 | print(f" ({elapsed_time} s)") 39 | print("- drop rows where type is missing", end="") 40 | sys.stdout.flush() 41 | 42 | df = df.filter(pl.col("type").is_not_null()) 43 | 44 | # replace remove prefixes 45 | # print(df.shape) 46 | last_time = time.time() 47 | elapsed_time = last_time - start_time 48 | print(f" ({elapsed_time} s)") 49 | print("- remove prefixes", end="") 50 | sys.stdout.flush() 51 | 52 | df = df.with_columns( 53 | pl.col("type").str.replace(r"^HKQuantityTypeIdentifier", "") 54 | ).with_columns(pl.col("type").str.replace(r"^HKCategoryTypeIdentifier", "")) 55 | df = df.select( 56 | pl.all().name.map( 57 | lambda col_name: col_name.replace("HKCharacteristicTypeIdentifier", "") 58 | ) 59 | ) 60 | 61 | # print(df.shape) 62 | last_time = time.time() 63 | elapsed_time = last_time - start_time 64 | print(f" ({elapsed_time} s)") 65 | print("- convert time strings to datetime", end="") 66 | sys.stdout.flush() 67 | 68 | # convert datetime strings to datetime (example: "2024-02-18 15:15:06 +0100") 69 | # TODO: add timezone handling (currently the timezone offset is ignored via truncation) 70 | df = df.with_columns( 71 | pl.col("startDate").str.slice(0, 19).str.to_datetime(), 72 | pl.col("endDate").str.slice(0, 19).str.to_datetime(), 73 | pl.col("creationDate").str.slice(0, 19).str.to_datetime(), 74 | ) 75 | 76 | df = df.rename({"startDate": "start", "endDate": "end", "creationDate": "created"}) 77 | 78 | # drop unneeded entries 79 | last_time = time.time() 80 | elapsed_time = last_time - start_time 81 | print(f" ({elapsed_time} s)") 82 | print("- drop unneeded entries", end="") 83 | sys.stdout.flush() 84 | 85 | # replacement map for enums 86 | replacement_map = { 87 | # 'HKCategoryValueSeverityUnspecified': '1', 88 | # 'HKCategoryValueLowCardioFitnessEventLowFitness': '1', 89 | # 'HKCategoryValueNotApplicable': '-1', 90 | # 'HKCategoryValueEnvironmentalAudioExposureEventMomentaryLimit': '1', 91 | # 'HKCategoryValueAppleStandHourStood': '1', 92 | # 'HKCategoryValueAppleStandHourIdle': '0', 93 | # 'HKCategoryValueSleepAnalysisAwake': '0', 94 | # 'HKCategoryValueSleepAnalysisAsleepCore': '1', 95 | # 'HKCategoryValueSleepAnalysisAsleepDeep': '2', 96 | # 'HKCategoryValueSleepAnalysisAsleepREM': '3', 97 | # 'HKCategoryValueSleepAnalysisAsleepUnspecified': '1', 98 | # 'HKCategoryValueSleepAnalysisInBed': '-1', 99 | # '2024-02-24 12:46:57 +0100': '-1' 100 | } 101 | 102 | # Replace string values 103 | # print(df.shape) 104 | last_time = time.time() 105 | elapsed_time = last_time - start_time 106 | print(f" ({elapsed_time} s)") 107 | print("- replace string values", end="") 108 | sys.stdout.flush() 109 | 110 | for key, val in replacement_map.items(): 111 | df = df.with_columns(pl.col("value").str.replace(key, val)) 112 | 113 | # split "value" column "23.1 kPa" -> ["23.1", "kPa"] 114 | # if there are multiple parts, only take the 0th part 115 | df = ( 116 | df.with_columns( 117 | pl.col("value") 118 | .str.splitn(" ", 2) 119 | .struct.rename_fields(["frst", "rest"]) 120 | .alias("fields"), 121 | ) 122 | .unnest("fields") 123 | .drop("rest") 124 | .drop("value") 125 | .rename({"frst": "value"}) 126 | ) 127 | 128 | last_time = time.time() 129 | elapsed_time = last_time - start_time 130 | print(f" ({elapsed_time} s)") 131 | print("- convert 'value' column to numeric", end="") 132 | sys.stdout.flush() 133 | 134 | # convert 'value' column to float 135 | # df = df.with_columns( 136 | # pl.col('value').cast(pl.Float64) 137 | # ) 138 | 139 | # sort: newest data DESC 140 | # print(df.shape) 141 | last_time = time.time() 142 | elapsed_time = last_time - start_time 143 | print(f" ({elapsed_time} s)") 144 | print("- sort: newest data first", end="") 145 | sys.stdout.flush() 146 | 147 | df = df.sort("type").sort("start", descending=True) 148 | 149 | last_time = time.time() 150 | elapsed_time = last_time - start_time 151 | print(f" ({elapsed_time} s)") 152 | 153 | print(f"\ndf data shape: {df.shape}") 154 | print(f"total elapsed time: {elapsed_time} s\n") 155 | sys.stdout.flush() 156 | 157 | return df 158 | 159 | 160 | def write_parquet(df, path=None): 161 | df.write_parquet(path, compression="zstd", compression_level=22) 162 | return 163 | -------------------------------------------------------------------------------- /atlas/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | import datetime as dt 4 | from atlas.apple_health import to_df, write_parquet 5 | 6 | 7 | @click.group() 8 | @click.version_option() 9 | def cli(): 10 | "turn apple health export.xml into parquet" 11 | 12 | 13 | @cli.command(name="parquet") 14 | @click.argument("path", type=click.Path(exists=True)) 15 | @click.option( 16 | "-o", 17 | "--out", 18 | help="path where to write the parquet file", 19 | ) 20 | def transform(path, out): 21 | "Command description goes here" 22 | cwd = os.getcwd() 23 | 24 | df = to_df(path) 25 | 26 | if out is None: 27 | stamp = dt.datetime.now().strftime("%Y-%m-%d") 28 | filename = f"apple-health-{stamp}.parquet" 29 | write_parquet(df, os.path.join(cwd, filename)) 30 | else: 31 | write_parquet(df, out) 32 | -------------------------------------------------------------------------------- /examples/apple-health-exploration-clickhouse-chdb-altair-quarto/index.qmd: -------------------------------------------------------------------------------- 1 | # Apple Health Exploration with ClickHouse (chDB), Altair and Quarto 2 | 3 | ### Install libs 4 | ```{python} 5 | !pip install chdb 6 | !pip install altair 7 | !pip install polars 8 | !pip install pyarrow 9 | ``` 10 | 11 | ### Parquet path 12 | ```{python} 13 | apple_health_parquet = "ah.parquet" 14 | ``` 15 | 16 | ### Imports 17 | ```{python} 18 | import altair as alt 19 | import pyarrow as pa 20 | import chdb 21 | from chdb.session import Session 22 | ``` 23 | 24 | ### Create Database 25 | ```{python} 26 | db = Session() 27 | db.query("CREATE DATABASE db") 28 | db.query("USE db") 29 | ``` 30 | 31 | ### Drop Table (if exists) 32 | ```{python} 33 | db.query("drop table if exists db.ah") 34 | ``` 35 | 36 | ### Create Table from Parquet 37 | ```{python} 38 | create_table = f""" 39 | CREATE TABLE ah 40 | ENGINE = MergeTree 41 | ORDER BY tuple() AS 42 | SELECT * 43 | FROM file('{apple_health_parquet}', Parquet) 44 | """ 45 | 46 | db.query(create_table) 47 | ``` 48 | 49 | ### Describe Table 50 | ```{python} 51 | q = "desc table ah" 52 | r = db.query(q, "PrettyCompactNoEscapes") 53 | print(r) 54 | ``` 55 | 56 | ### Types 57 | ```{python} 58 | r = db.query("select distinct(type) from ah") 59 | print(r) 60 | ``` 61 | 62 | ### Distance (Walking and Running) 63 | ```{python} 64 | q = """ 65 | from ah 66 | select sum(toFloat64(value)) as val, 67 | toString(date(end)) as cdate 68 | where type == 'DistanceWalkingRunning' 69 | group by cdate 70 | order by cdate 71 | """ 72 | s = chdb.to_df(db.query(q, "Arrow")) 73 | 74 | c = ( 75 | alt.Chart(s, title="distance over time") 76 | .mark_point() 77 | .encode( 78 | alt.X("cdate:T").axis(format="%Y", labelAngle=-45).title("days"), 79 | alt.Y("val").axis(format=".2s").title("distance"), 80 | tooltip=["cdate:T", "val"], 81 | ) 82 | ) 83 | c = c + c.transform_regression("cdate", "val", method="poly").mark_line(color="black") 84 | c.configure_mark(color="coral").properties(width=500).interactive() 85 | ``` 86 | 87 | ### VO2 Max 88 | ```{python} 89 | q = """ 90 | from ah 91 | select toFloat64(value) as vo2max, 92 | toString(date(end)) as cdate 93 | where type == 'VO2Max' 94 | order by cdate 95 | """ 96 | s = chdb.to_df(db.query(q, "Arrow")) 97 | 98 | c = ( 99 | alt.Chart(s, title="V̇O2 max over time") 100 | .mark_point() 101 | .encode( 102 | alt.X("cdate:T").axis(format="%Y %b", labelAngle=-45).title("days"), 103 | alt.Y("vo2max").axis(format=".2s").title("vo2max").scale(zero=False), 104 | tooltip=["cdate:T", "vo2max"], 105 | ) 106 | ) 107 | c = c + c.transform_regression("cdate", "vo2max", method="poly").mark_line( 108 | color="black" 109 | ) 110 | c.configure_mark(color="purple").properties(width=500).interactive() 111 | ``` 112 | 113 | ### Body Mass 114 | ```{python} 115 | q = """ 116 | from ah 117 | select sum(toFloat64(value)) as weight, 118 | toString(date(end)) as cdate 119 | where type == 'BodyMass' 120 | group by cdate 121 | having weight < 80 122 | order by cdate 123 | """ 124 | s = chdb.to_df(db.query(q, "Arrow")) 125 | 126 | c = ( 127 | alt.Chart(s, title="weight over time") 128 | .mark_line(point=True) # increased point size 129 | .encode( 130 | alt.X("cdate:T").axis(format="%Y %b", labelAngle=-45).title("days"), 131 | alt.Y("weight").axis(format=".2s").title("weight (kg)").scale(zero=False), 132 | tooltip=["cdate:T", "weight"], 133 | ) 134 | ) 135 | c.configure_mark(color="black").properties(width=500).configure_point( 136 | size=80 137 | ).interactive() 138 | ``` 139 | 140 | ### Cycling 141 | ```{python} 142 | q = """ 143 | from ah 144 | select sum(toFloat64(value)) as distance, 145 | toString(toStartOfDay(end)) as cdate 146 | where type == 'DistanceCycling' 147 | and created is not null 148 | group by cdate 149 | order by cdate 150 | """ 151 | s = db.query(q, "dataframe") 152 | 153 | alt.Chart(s, title="cycling distance over time").mark_point().encode( 154 | alt.X("cdate:T").axis(format="%Y %b", labelAngle=-45).title("days"), 155 | alt.Y("distance").axis(format=".2s").title("distance"), 156 | tooltip=["cdate:T", "distance"], 157 | ).configure_mark(color="coral").properties(width=500).interactive() 158 | ``` 159 | 160 | ### Wrist Temperature during Sleep 161 | ```{python} 162 | q = """ 163 | from ah 164 | select toFloat64(value) as temp, 165 | end as cdate 166 | where type == 'AppleSleepingWristTemperature' 167 | order by cdate 168 | """ 169 | s = chdb.to_df(db.query(q, "Arrow")) 170 | 171 | c = ( 172 | alt.Chart(s, title="wrist temperature over time") 173 | .mark_point() 174 | .encode( 175 | alt.X("yearmonthdate(cdate):T") 176 | .axis(format="%Y %b", labelAngle=-45) 177 | .title("days"), 178 | alt.Y( 179 | "temp", 180 | ) 181 | .axis(format=".4s") 182 | .title("temperature") 183 | .scale(zero=False), 184 | tooltip=["cdate:T", "temp"], 185 | ) 186 | ) 187 | c = c + c.transform_regression("cdate", "temp", method="poly").mark_line(color="black") 188 | c.configure_mark(color="green").properties(width=500).interactive() 189 | ``` 190 | 191 | ### Sleep Duration and State 192 | ```{python} 193 | import polars as pl 194 | 195 | q = """ 196 | from ah 197 | select toString(value) as val, 198 | start, 199 | end 200 | where type == 'SleepAnalysis' 201 | and toYearWeek(end) = toYearWeek(now())-5 202 | order by end desc 203 | """ 204 | s = chdb.to_df(db.query(q, "Arrow")) 205 | df = pl.from_pandas(s) 206 | 207 | # duration is end - start 208 | df = df.with_columns( 209 | pl.col("end").sub(pl.col("start")).dt.total_seconds().alias("duration") 210 | ) 211 | 212 | # add date column based on truncated end 213 | df = df.with_columns(pl.col("end").dt.date().alias("date")) 214 | 215 | # order by start desc 216 | df = df.sort("start", descending=True) 217 | 218 | category_names = { 219 | "HKCategoryValueSleepAnalysisAwake": "Awake", 220 | "HKCategoryValueSleepAnalysisAsleepCore": "Light Sleep", 221 | "HKCategoryValueSleepAnalysisAsleepDeep": "Deep Sleep", 222 | "HKCategoryValueSleepAnalysisAsleepREM": "REM Sleep", 223 | } 224 | 225 | category_colors = { 226 | "HKCategoryValueSleepAnalysisAwake": "#FFA500", # Orange 227 | "HKCategoryValueSleepAnalysisAsleepCore": "#AEC7E8", # Light Blue 228 | "HKCategoryValueSleepAnalysisAsleepDeep": "#4169E1", # Dark Blue 229 | "HKCategoryValueSleepAnalysisAsleepREM": "#00008B", # Navy Blue 230 | } 231 | 232 | c = ( 233 | alt.Chart(df.to_pandas(), title="sleep duration over time") 234 | .mark_bar(size=16) 235 | .encode( 236 | alt.X("date:T").axis(format="%Y %b %d", labelAngle=-45).title("days"), 237 | alt.Y( 238 | "duration:Q", 239 | ) 240 | .axis(format=".2s") 241 | .title("duration (s)") 242 | .scale(domain=[0, 35000]), 243 | color=alt.Color( 244 | "val:N", 245 | scale=alt.Scale( 246 | domain=list(category_colors.keys()), 247 | range=list(category_colors.values()), 248 | ), 249 | legend=alt.Legend( 250 | title="Sleep State", 251 | labelExpr="{'HKCategoryValueSleepAnalysisAwake': 'Awake', 'HKCategoryValueSleepAnalysisAsleepCore': 'Light Sleep', 'HKCategoryValueSleepAnalysisAsleepDeep': 'Deep Sleep', 'HKCategoryValueSleepAnalysisAsleepREM': 'REM Sleep'}[datum.label]", 252 | values=list(category_names.keys()), 253 | symbolFillColor="black", 254 | symbolSize=200, 255 | ), 256 | ), 257 | order=alt.Order("val", sort="ascending"), 258 | tooltip=[ 259 | "date:T", 260 | alt.Tooltip("val:N", title="Sleep State"), 261 | alt.Tooltip("duration:Q", title="Duration"), 262 | ], 263 | ) 264 | ) 265 | c 266 | ``` 267 | 268 | ## Environmental Audio Exposure 269 | ```{python} 270 | q = """ 271 | from ah 272 | select toString(toStartOfDay(end)) as cdate, 273 | toFloat64(value) as audio 274 | where type == 'EnvironmentalAudioExposure' 275 | and toYear(end) = 2024 276 | order by cdate 277 | limit 5000 278 | """ 279 | s = chdb.to_df(db.query(q, "Arrow")) 280 | 281 | c = ( 282 | ( 283 | alt.Chart(s, title="🚜 Environmental Audio Exposure") 284 | .mark_point(color="orange") 285 | .encode( 286 | alt.X( 287 | "cdate:T", 288 | axis=alt.Axis(format="%Y %b", labelAngle=-45), 289 | title="days", 290 | ), 291 | alt.Y("audio:Q").axis(title="Audio Exposure (dB)"), 292 | tooltip=[ 293 | alt.Tooltip("cdate:T", title="date"), 294 | alt.Tooltip("audio:Q", title="audio exposure (dB)"), 295 | ], 296 | ) 297 | ) 298 | .properties(width=500) 299 | .interactive() 300 | ) 301 | c = c + alt.Chart().mark_rule(color="red", size=2).encode(y=alt.datum(75)) 302 | c 303 | ``` 304 | 305 | ## Calories Burned 306 | 307 | ```{python} 308 | q = """ 309 | from ah 310 | select type, 311 | sum(toFloat64(value)) as calories, 312 | toString(date(end)) as cdate 313 | where type in ('ActiveEnergyBurned', 'BasalEnergyBurned') 314 | and toYear(end) = 2021 315 | and toMonth(end) in (10) 316 | group by cdate, type 317 | order by cdate 318 | """ 319 | s = chdb.to_df(db.query(q, "Arrow")) 320 | 321 | c = ( 322 | alt.Chart(s, title="🔥 Calories Burned") 323 | .mark_bar(size=8) 324 | .encode( 325 | alt.X("cdate:T").axis(format="%Y %b %d", labelAngle=-45).title("days"), 326 | alt.Y("calories:Q").axis(format=".2s").title("kcal"), 327 | color=alt.condition( 328 | alt.datum.type == "ActiveEnergyBurned", alt.value("red"), alt.value("blue") 329 | ), 330 | order=alt.Order("type", sort="descending"), 331 | tooltip=["cdate:T", "calories"], 332 | ) 333 | ) 334 | c = c.properties(width=500).interactive() 335 | c 336 | ``` 337 | 338 | ## Caffeine 339 | ```{python} 340 | q = """ 341 | from ah 342 | select toString(toStartOfDay(end)) as cdate, 343 | sum(toFloat64(value)) as caffeine 344 | where type == 'DietaryCaffeine' 345 | and toYear(end) = 2023 and toMonth(end) in (1, 2) 346 | group by cdate 347 | order by cdate 348 | """ 349 | s = chdb.to_df(db.query(q, "Arrow")) 350 | 351 | c = ( 352 | ( 353 | alt.Chart(s, title="☕️ Caffeine") 354 | .mark_bar() 355 | .encode( 356 | alt.X("cdate:T").axis(format="%Y %b %d", labelAngle=-45).title("days"), 357 | alt.Y("caffeine:Q").axis(title="caffeine (mg)"), 358 | tooltip=[ 359 | alt.Tooltip("cdate:T", title="date"), 360 | alt.Tooltip("caffeine:Q", title="caffeine (mg)"), 361 | ], 362 | color=alt.value("black"), 363 | ) 364 | ) 365 | .properties(width=500) 366 | .interactive() 367 | ) 368 | c 369 | ``` 370 | 371 | ### Caffeine Heatmap 372 | ```{python} 373 | q = """ 374 | from ah 375 | select toString(toStartOfDay(end)) as cdate, 376 | sum(toFloat64(value)) as caffeine 377 | where type == 'DietaryCaffeine' 378 | and toYear(end) = 2023 379 | and toMonth(end) in (1, 2) 380 | group by cdate 381 | order by cdate 382 | """ 383 | s = chdb.to_df(db.query(q, "Arrow")) 384 | 385 | c = ( 386 | alt.Chart(s, title="☕️ Coffee Heatmap (2023 Jan-Feb)") 387 | .mark_rect() 388 | .encode( 389 | alt.X("date(cdate):O").axis(format="%d", labelAngle=-45).title("days"), 390 | alt.Y("month(cdate):O").title("month"), 391 | color=alt.Color("max(caffeine):Q", scale=alt.Scale(scheme="greys")).title( 392 | "Caffeine (mg)" 393 | ), 394 | tooltip=[alt.Tooltip("date(cdate):T", title="date")], 395 | ) 396 | .properties(width=500) # added padding for whitespace 397 | .interactive() 398 | ) 399 | c 400 | ``` 401 | 402 | ### Caffeine after 17:00 403 | ```{python} 404 | q = """ 405 | from ah 406 | select toString(toStartOfDay(end)) as cdate, 407 | sum(toFloat64(value)) as caffeine, 408 | toHour(max(end)) > 17 as late 409 | where type == 'DietaryCaffeine' 410 | and toYear(end) = 2023 411 | and toMonth(end) in (1, 2) 412 | group by cdate 413 | order by cdate 414 | """ 415 | s = chdb.to_df(db.query(q, "Arrow")) 416 | 417 | c = ( 418 | alt.Chart(s, title="☕️ Coffee after 17:00 (2023 Jan-Feb)") 419 | .mark_rect() 420 | .encode( 421 | alt.X("date(cdate):O").axis(format="%d", labelAngle=-45).title("days"), 422 | alt.Y("month(cdate):O").title("month"), 423 | color=alt.Color( 424 | "late:N", scale=alt.Scale(domain=[0, 1], range=["lightgrey", "red"]) 425 | ).title("Late?"), 426 | tooltip=[alt.Tooltip("date(cdate):T", title="date")], 427 | ) 428 | .properties(width=500) 429 | .interactive() 430 | ) 431 | c 432 | ``` 433 | 434 | ### Step Count Heatmap 435 | ```{python} 436 | q = """ 437 | from ah 438 | select toString(toStartOfDay(end)) as cdate, 439 | sum(toFloat64(value)) as steps 440 | where type == 'StepCount' and toYear(end) = 2023 441 | group by cdate 442 | order by cdate 443 | """ 444 | s = chdb.to_df(db.query(q, "Arrow")) 445 | 446 | c = ( 447 | alt.Chart(s, title="👣 Step Count (2023)") 448 | .mark_rect() 449 | .encode( 450 | alt.X("date(cdate):O").axis(format="%d", labelAngle=-45).title("days"), 451 | alt.Y("month(cdate):O").title("month"), 452 | color=alt.Color("sum(steps):Q", scale=alt.Scale(scheme="greens")).title( 453 | "Steps" 454 | ), 455 | tooltip=[alt.Tooltip("date(cdate):T", title="date")], 456 | ) 457 | .properties(width=500) 458 | .interactive() 459 | ) 460 | c 461 | ``` 462 | 463 | ### Last Swimming Workout 464 | ```{python} 465 | q = """ 466 | from ah 467 | select max(start) 468 | where type = 'DistanceSwimming'; 469 | """ 470 | 471 | s = db.query(q, "PrettyCompactNoEscapes") 472 | print(s) 473 | ``` 474 | 475 | ### Swimming Workouts 476 | ```{python} 477 | q = """ 478 | from ah 479 | select toString(toStartOfWeek(end)) as week, 480 | count(type) > 0 as had_swimming_workout 481 | where type = 'DistanceSwimming' 482 | group by week 483 | order by week 484 | """ 485 | 486 | s = db.query(q, "PrettyCompactNoEscapes") 487 | print(s) 488 | ``` -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "atlas-db" 3 | version = "0.2.11" 4 | description = "turn apple health export.xml into parquet" 5 | readme = "README.md" 6 | requires-python = ">=3.8" 7 | authors = [{name = "Thomas Schranz"}] 8 | license = {text = "Apache-2.0"} 9 | classifiers = [ 10 | "License :: OSI Approved :: Apache Software License" 11 | ] 12 | dependencies = [ 13 | "click", 14 | "lxml>=5.2.1", 15 | "polars>=0.20.22" 16 | ] 17 | 18 | [build-system] 19 | requires = ["setuptools"] 20 | build-backend = "setuptools.build_meta" 21 | 22 | [project.scripts] 23 | atlas = "atlas.cli:cli" 24 | 25 | 26 | [project.urls] 27 | Homepage = "https://github.com/atlaslib/atlas" 28 | Changelog = "https://github.com/atlaslib/atlas/releases" 29 | Issues = "https://github.com/atlaslib/atlas/issues" 30 | CI = "https://github.com/atlaslib/atlas/actions" 31 | 32 | [project.optional-dependencies] 33 | test = ["pytest"] 34 | -------------------------------------------------------------------------------- /tests/test_expanse.py: -------------------------------------------------------------------------------- 1 | def test_example_function(): 2 | assert 1 == 1 3 | --------------------------------------------------------------------------------