├── .github
└── workflows
│ ├── publish.yml
│ └── test.yml
├── .gitignore
├── LICENSE
├── README.md
├── atlas
├── __init__.py
├── __main__.py
├── apple_health.py
└── cli.py
├── examples
└── apple-health-exploration-clickhouse-chdb-altair-quarto
│ └── index.qmd
├── pyproject.toml
└── tests
└── test_expanse.py
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish Python Package
2 |
3 | on:
4 | release:
5 | types: [created]
6 |
7 | permissions:
8 | contents: read
9 |
10 | jobs:
11 | test:
12 | runs-on: ubuntu-latest
13 | strategy:
14 | matrix:
15 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
16 | steps:
17 | - uses: actions/checkout@v4
18 | - name: Set up Python ${{ matrix.python-version }}
19 | uses: actions/setup-python@v5
20 | with:
21 | python-version: ${{ matrix.python-version }}
22 | cache: pip
23 | cache-dependency-path: pyproject.toml
24 | - name: Install dependencies
25 | run: |
26 | pip install '.[test]'
27 | - name: Run tests
28 | run: |
29 | pytest
30 | deploy:
31 | runs-on: ubuntu-latest
32 | needs: [test]
33 | environment: release
34 | permissions:
35 | id-token: write
36 | steps:
37 | - uses: actions/checkout@v4
38 | - name: Set up Python
39 | uses: actions/setup-python@v5
40 | with:
41 | python-version: "3.12"
42 | cache: pip
43 | cache-dependency-path: pyproject.toml
44 | - name: Install dependencies
45 | run: |
46 | pip install setuptools wheel build
47 | - name: Build
48 | run: |
49 | python -m build
50 | - name: Publish
51 | uses: pypa/gh-action-pypi-publish@release/v1
52 |
53 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Test
2 |
3 | on: [push, pull_request]
4 |
5 | permissions:
6 | contents: read
7 |
8 | jobs:
9 | test:
10 | runs-on: ubuntu-latest
11 | strategy:
12 | matrix:
13 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
14 | steps:
15 | - uses: actions/checkout@v4
16 | - name: Set up Python ${{ matrix.python-version }}
17 | uses: actions/setup-python@v5
18 | with:
19 | python-version: ${{ matrix.python-version }}
20 | cache: pip
21 | cache-dependency-path: pyproject.toml
22 | - name: Install dependencies
23 | run: |
24 | pip install '.[test]'
25 | - name: Run tests
26 | run: |
27 | [ -d tests ] && pytest || echo "Tests directory not found"
28 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .venv
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | venv
6 | .eggs
7 | .pytest_cache
8 | *.egg-info
9 | .DS_Store
10 | dist
11 | build
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |

6 |
7 |
8 | # Atlas
9 |
10 | Atlas lets you explore your Apple Health data.
11 |
12 | ---
13 |
14 | [](https://pypi.org/project/atlas-db/)
15 | [](https://github.com/atlaslib/atlas/actions/workflows/test.yml)
16 | [](https://github.com/atlaslib/atlas/releases)
17 | [](https://github.com/atlaslib/atlas/blob/main/LICENSE)
18 |
19 | ## Installation
20 |
21 | Install Atlas using `pip`:
22 | ```bash
23 | pip install atlas-db
24 | ```
25 |
26 | Upgrade Atlas using `pip`:
27 | ```bash
28 | pip install atlas-db --upgrade
29 | ```
30 |
31 | Uninstall Expanse (old name) using `pipx`:
32 | ```bash
33 | pipx uninstall expanse
34 | ```
35 |
36 | ## Explore
37 |
38 | > [!NOTE]
39 | > Here is a [Quarto notebook](https://github.com/atlaslib/atlas/blob/main/examples/apple-health-exploration-clickhouse-chdb-altair-quarto/index.qmd) with example code and SQL queries.
40 | >
41 | > The notebook uses Vega-Altair and Clickhouse (chDB) to explore Apple Health time series data in a .parquet file. The .parquet file was generated by Atlas from an Apple Health export.xml file.
42 | > 
43 |
44 | First we create the `.parquet` file from the `export.xml` file.
45 |
46 | ```bash
47 | atlas parquet export.xml -o ah.parquet
48 | ```
49 |
50 | We can explore the data in many ways.
51 |
52 | It is just a table/dataframe/parquet file with 5 columns.
53 |
54 | But here we'll use `clickhouse local`:
55 |
56 | ```bash
57 | clickhouse local
58 | ```
59 |
60 | Let's take a look at the table.
61 |
62 | ```sql
63 | DESCRIBE TABLE `ah.parquet`
64 | ```
65 |
66 | ```
67 | ┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
68 | │ type │ Nullable(String) │ │ │ │ │ │
69 | │ start │ Nullable(DateTime64(6)) │ │ │ │ │ │
70 | │ end │ Nullable(DateTime64(6)) │ │ │ │ │ │
71 | │ created │ Nullable(DateTime64(6)) │ │ │ │ │ │
72 | │ value │ Nullable(String) │ │ │ │ │ │
73 | └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
74 | ```
75 |
76 | What kind of "types" do we have and how many?
77 |
78 | ```sql
79 | SELECT
80 | type,
81 | COUNT(*) AS count
82 | FROM `ah.parquet`
83 | GROUP BY type
84 | ORDER BY count DESC
85 | ```
86 |
87 | ```
88 | ┌─type───────────────────────────┬──count─┐
89 | │ ActiveEnergyBurned │ 879902 │
90 | │ HeartRate │ 451854 │
91 | │ BasalEnergyBurned │ 289031 │
92 | │ DistanceWalkingRunning │ 260500 │
93 | │ StepCount │ 217384 │
94 | │ PhysicalEffort │ 69747 │
95 | │ AppleExerciseTime │ 61363 │
96 | │ AppleStandTime │ 58309 │
97 | │ EnvironmentalAudioExposure │ 44535 │
98 | │ SleepAnalysis │ 36599 │
99 | │ WalkingStepLength │ 28281 │
100 | │ WalkingSpeed │ 28281 │
101 | │ RespiratoryRate │ 27829 │
102 | │ AppleStandHour │ 25877 │
103 | │ FlightsClimbed │ 22690 │
104 | │ WalkingDoubleSupportPercentage │ 21900 │
105 | │ WalkingAsymmetryPercentage │ 13820 │
106 | │ HeartRateVariabilitySDNN │ 11961 │
107 | │ OxygenSaturation │ 4912 │
108 | │ StairDescentSpeed │ 4718 │
109 | │ StairAscentSpeed │ 4249 │
110 | │ DistanceCycling │ 2890 │
111 | │ TimeInDaylight │ 2403 │
112 | │ HeadphoneAudioExposure │ 2323 │
113 | │ RestingHeartRate │ 1399 │
114 | │ WalkingHeartRateAverage │ 1176 │
115 | │ DistanceSwimming │ 455 │
116 | │ SwimmingStrokeCount │ 455 │
117 | │ AppleSleepingWristTemperature │ 442 │
118 | │ RunningSpeed │ 391 │
119 | │ VO2Max │ 366 │
120 | │ RunningPower │ 173 │
121 | │ DietaryCaffeine │ 171 │
122 | │ AppleWalkingSteadiness │ 138 │
123 | │ SixMinuteWalkTestDistance │ 122 │
124 | │ HeartRateRecoveryOneMinute │ 76 │
125 | │ RunningVerticalOscillation │ 74 │
126 | │ RunningGroundContactTime │ 67 │
127 | │ RunningStrideLength │ 54 │
128 | │ MindfulSession │ 34 │
129 | │ HighHeartRateEvent │ 18 │
130 | │ AudioExposureEvent │ 14 │
131 | │ BodyMass │ 14 │
132 | │ Height │ 5 │
133 | │ Fatigue │ 1 │
134 | │ HKDataTypeSleepDurationGoal │ 1 │
135 | └────────────────────────────────┴────────┘
136 | ```
137 |
138 | What's our total step count?
139 |
140 | > [!NOTE]
141 | > The `value` column is type `Nullable(String)` so we have to cast `toFloat64` to sum up the step values.
142 |
143 | ```sql
144 | SELECT sum(toFloat64(value))
145 | FROM `ah.parquet`
146 | WHERE type = 'StepCount'
147 | ```
148 |
149 | ```
150 | ┌─sum(toFloat64(value))─┐
151 | │ 30295811 │
152 | └───────────────────────┘
153 | ```
154 |
155 | 30.295.811 (30.29 million) steps. That's a lot of steps!
156 |
157 | ## How to get the Apple Health export.xml file
158 |
159 | 
160 |
161 | - open the Apple **Health** app on iOS
162 | - tap on your **profile picture** (or initials) at the top right
163 | - tap on **Export All Health Data**
164 | - tap on **Export**
165 | - **wait** a few seconds to a few minutes (~3min for 10 years of data)
166 | - **get the export.zip** archive via Airdrop to a Mac (or save to Files)
167 |
168 | > [!NOTE]
169 | > The **export.xml** file is **in** the **export.zip** archive.
170 |
171 | You can expand the **export.zip** file by double-clicking on it.
172 |
173 | This creates a directory named **apple_health_export** and in it is the **export.xml** file.
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 | See: [Apple Support on how to export Apple Health and Fitness in XML format](https://support.apple.com/en-gb/guide/iphone/iph5ede58c3d/ios#:~:text=Share%20your%20health%20and%20fitness%20data%20in%20XML%20format)
185 |
186 | ## Usage
187 |
188 | `atlas parquet export.xml`
189 |
190 | ## Features
191 |
192 | - turn export.xml into a simple parquet file
193 |
194 |
--------------------------------------------------------------------------------
/atlas/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atlaslib/atlas/88d511df96be52c21ddaa7e6c4220d2733d6f1b7/atlas/__init__.py
--------------------------------------------------------------------------------
/atlas/__main__.py:
--------------------------------------------------------------------------------
1 | from .cli import cli
2 |
3 | if __name__ == "__main__":
4 | cli()
5 |
--------------------------------------------------------------------------------
/atlas/apple_health.py:
--------------------------------------------------------------------------------
1 | import os
2 | import polars as pl
3 | import lxml.etree as et
4 | import sys
5 | import time
6 |
7 |
8 | def to_df(file_path):
9 | start_time = time.time()
10 | last_time = time.time()
11 |
12 | print("transform apple health export.xml to parquet")
13 | print("- parsing xml", end="")
14 | sys.stdout.flush()
15 |
16 | attributes = []
17 |
18 | for event, elem in et.iterparse(file_path, tag="Record"):
19 | attributes.append(
20 | {
21 | attr: elem.get(attr)
22 | for attr in ["value", "type", "startDate", "endDate", "creationDate"]
23 | if elem.get(attr) is not None
24 | }
25 | )
26 | elem.clear()
27 |
28 | last_time = time.time()
29 | elapsed_time = last_time - start_time
30 | print(f" ({elapsed_time} s)")
31 | print("- loading df", end="")
32 | sys.stdout.flush()
33 |
34 | df = pl.DataFrame(attributes)
35 |
36 | last_time = time.time()
37 | elapsed_time = last_time - start_time
38 | print(f" ({elapsed_time} s)")
39 | print("- drop rows where type is missing", end="")
40 | sys.stdout.flush()
41 |
42 | df = df.filter(pl.col("type").is_not_null())
43 |
44 | # replace remove prefixes
45 | # print(df.shape)
46 | last_time = time.time()
47 | elapsed_time = last_time - start_time
48 | print(f" ({elapsed_time} s)")
49 | print("- remove prefixes", end="")
50 | sys.stdout.flush()
51 |
52 | df = df.with_columns(
53 | pl.col("type").str.replace(r"^HKQuantityTypeIdentifier", "")
54 | ).with_columns(pl.col("type").str.replace(r"^HKCategoryTypeIdentifier", ""))
55 | df = df.select(
56 | pl.all().name.map(
57 | lambda col_name: col_name.replace("HKCharacteristicTypeIdentifier", "")
58 | )
59 | )
60 |
61 | # print(df.shape)
62 | last_time = time.time()
63 | elapsed_time = last_time - start_time
64 | print(f" ({elapsed_time} s)")
65 | print("- convert time strings to datetime", end="")
66 | sys.stdout.flush()
67 |
68 | # convert datetime strings to datetime (example: "2024-02-18 15:15:06 +0100")
69 | # TODO: add timezone handling (currently the timezone offset is ignored via truncation)
70 | df = df.with_columns(
71 | pl.col("startDate").str.slice(0, 19).str.to_datetime(),
72 | pl.col("endDate").str.slice(0, 19).str.to_datetime(),
73 | pl.col("creationDate").str.slice(0, 19).str.to_datetime(),
74 | )
75 |
76 | df = df.rename({"startDate": "start", "endDate": "end", "creationDate": "created"})
77 |
78 | # drop unneeded entries
79 | last_time = time.time()
80 | elapsed_time = last_time - start_time
81 | print(f" ({elapsed_time} s)")
82 | print("- drop unneeded entries", end="")
83 | sys.stdout.flush()
84 |
85 | # replacement map for enums
86 | replacement_map = {
87 | # 'HKCategoryValueSeverityUnspecified': '1',
88 | # 'HKCategoryValueLowCardioFitnessEventLowFitness': '1',
89 | # 'HKCategoryValueNotApplicable': '-1',
90 | # 'HKCategoryValueEnvironmentalAudioExposureEventMomentaryLimit': '1',
91 | # 'HKCategoryValueAppleStandHourStood': '1',
92 | # 'HKCategoryValueAppleStandHourIdle': '0',
93 | # 'HKCategoryValueSleepAnalysisAwake': '0',
94 | # 'HKCategoryValueSleepAnalysisAsleepCore': '1',
95 | # 'HKCategoryValueSleepAnalysisAsleepDeep': '2',
96 | # 'HKCategoryValueSleepAnalysisAsleepREM': '3',
97 | # 'HKCategoryValueSleepAnalysisAsleepUnspecified': '1',
98 | # 'HKCategoryValueSleepAnalysisInBed': '-1',
99 | # '2024-02-24 12:46:57 +0100': '-1'
100 | }
101 |
102 | # Replace string values
103 | # print(df.shape)
104 | last_time = time.time()
105 | elapsed_time = last_time - start_time
106 | print(f" ({elapsed_time} s)")
107 | print("- replace string values", end="")
108 | sys.stdout.flush()
109 |
110 | for key, val in replacement_map.items():
111 | df = df.with_columns(pl.col("value").str.replace(key, val))
112 |
113 | # split "value" column "23.1 kPa" -> ["23.1", "kPa"]
114 | # if there are multiple parts, only take the 0th part
115 | df = (
116 | df.with_columns(
117 | pl.col("value")
118 | .str.splitn(" ", 2)
119 | .struct.rename_fields(["frst", "rest"])
120 | .alias("fields"),
121 | )
122 | .unnest("fields")
123 | .drop("rest")
124 | .drop("value")
125 | .rename({"frst": "value"})
126 | )
127 |
128 | last_time = time.time()
129 | elapsed_time = last_time - start_time
130 | print(f" ({elapsed_time} s)")
131 | print("- convert 'value' column to numeric", end="")
132 | sys.stdout.flush()
133 |
134 | # convert 'value' column to float
135 | # df = df.with_columns(
136 | # pl.col('value').cast(pl.Float64)
137 | # )
138 |
139 | # sort: newest data DESC
140 | # print(df.shape)
141 | last_time = time.time()
142 | elapsed_time = last_time - start_time
143 | print(f" ({elapsed_time} s)")
144 | print("- sort: newest data first", end="")
145 | sys.stdout.flush()
146 |
147 | df = df.sort("type").sort("start", descending=True)
148 |
149 | last_time = time.time()
150 | elapsed_time = last_time - start_time
151 | print(f" ({elapsed_time} s)")
152 |
153 | print(f"\ndf data shape: {df.shape}")
154 | print(f"total elapsed time: {elapsed_time} s\n")
155 | sys.stdout.flush()
156 |
157 | return df
158 |
159 |
160 | def write_parquet(df, path=None):
161 | df.write_parquet(path, compression="zstd", compression_level=22)
162 | return
163 |
--------------------------------------------------------------------------------
/atlas/cli.py:
--------------------------------------------------------------------------------
1 | import click
2 | import os
3 | import datetime as dt
4 | from atlas.apple_health import to_df, write_parquet
5 |
6 |
7 | @click.group()
8 | @click.version_option()
9 | def cli():
10 | "turn apple health export.xml into parquet"
11 |
12 |
13 | @cli.command(name="parquet")
14 | @click.argument("path", type=click.Path(exists=True))
15 | @click.option(
16 | "-o",
17 | "--out",
18 | help="path where to write the parquet file",
19 | )
20 | def transform(path, out):
21 | "Command description goes here"
22 | cwd = os.getcwd()
23 |
24 | df = to_df(path)
25 |
26 | if out is None:
27 | stamp = dt.datetime.now().strftime("%Y-%m-%d")
28 | filename = f"apple-health-{stamp}.parquet"
29 | write_parquet(df, os.path.join(cwd, filename))
30 | else:
31 | write_parquet(df, out)
32 |
--------------------------------------------------------------------------------
/examples/apple-health-exploration-clickhouse-chdb-altair-quarto/index.qmd:
--------------------------------------------------------------------------------
1 | # Apple Health Exploration with ClickHouse (chDB), Altair and Quarto
2 |
3 | ### Install libs
4 | ```{python}
5 | !pip install chdb
6 | !pip install altair
7 | !pip install polars
8 | !pip install pyarrow
9 | ```
10 |
11 | ### Parquet path
12 | ```{python}
13 | apple_health_parquet = "ah.parquet"
14 | ```
15 |
16 | ### Imports
17 | ```{python}
18 | import altair as alt
19 | import pyarrow as pa
20 | import chdb
21 | from chdb.session import Session
22 | ```
23 |
24 | ### Create Database
25 | ```{python}
26 | db = Session()
27 | db.query("CREATE DATABASE db")
28 | db.query("USE db")
29 | ```
30 |
31 | ### Drop Table (if exists)
32 | ```{python}
33 | db.query("drop table if exists db.ah")
34 | ```
35 |
36 | ### Create Table from Parquet
37 | ```{python}
38 | create_table = f"""
39 | CREATE TABLE ah
40 | ENGINE = MergeTree
41 | ORDER BY tuple() AS
42 | SELECT *
43 | FROM file('{apple_health_parquet}', Parquet)
44 | """
45 |
46 | db.query(create_table)
47 | ```
48 |
49 | ### Describe Table
50 | ```{python}
51 | q = "desc table ah"
52 | r = db.query(q, "PrettyCompactNoEscapes")
53 | print(r)
54 | ```
55 |
56 | ### Types
57 | ```{python}
58 | r = db.query("select distinct(type) from ah")
59 | print(r)
60 | ```
61 |
62 | ### Distance (Walking and Running)
63 | ```{python}
64 | q = """
65 | from ah
66 | select sum(toFloat64(value)) as val,
67 | toString(date(end)) as cdate
68 | where type == 'DistanceWalkingRunning'
69 | group by cdate
70 | order by cdate
71 | """
72 | s = chdb.to_df(db.query(q, "Arrow"))
73 |
74 | c = (
75 | alt.Chart(s, title="distance over time")
76 | .mark_point()
77 | .encode(
78 | alt.X("cdate:T").axis(format="%Y", labelAngle=-45).title("days"),
79 | alt.Y("val").axis(format=".2s").title("distance"),
80 | tooltip=["cdate:T", "val"],
81 | )
82 | )
83 | c = c + c.transform_regression("cdate", "val", method="poly").mark_line(color="black")
84 | c.configure_mark(color="coral").properties(width=500).interactive()
85 | ```
86 |
87 | ### VO2 Max
88 | ```{python}
89 | q = """
90 | from ah
91 | select toFloat64(value) as vo2max,
92 | toString(date(end)) as cdate
93 | where type == 'VO2Max'
94 | order by cdate
95 | """
96 | s = chdb.to_df(db.query(q, "Arrow"))
97 |
98 | c = (
99 | alt.Chart(s, title="V̇O2 max over time")
100 | .mark_point()
101 | .encode(
102 | alt.X("cdate:T").axis(format="%Y %b", labelAngle=-45).title("days"),
103 | alt.Y("vo2max").axis(format=".2s").title("vo2max").scale(zero=False),
104 | tooltip=["cdate:T", "vo2max"],
105 | )
106 | )
107 | c = c + c.transform_regression("cdate", "vo2max", method="poly").mark_line(
108 | color="black"
109 | )
110 | c.configure_mark(color="purple").properties(width=500).interactive()
111 | ```
112 |
113 | ### Body Mass
114 | ```{python}
115 | q = """
116 | from ah
117 | select sum(toFloat64(value)) as weight,
118 | toString(date(end)) as cdate
119 | where type == 'BodyMass'
120 | group by cdate
121 | having weight < 80
122 | order by cdate
123 | """
124 | s = chdb.to_df(db.query(q, "Arrow"))
125 |
126 | c = (
127 | alt.Chart(s, title="weight over time")
128 | .mark_line(point=True) # increased point size
129 | .encode(
130 | alt.X("cdate:T").axis(format="%Y %b", labelAngle=-45).title("days"),
131 | alt.Y("weight").axis(format=".2s").title("weight (kg)").scale(zero=False),
132 | tooltip=["cdate:T", "weight"],
133 | )
134 | )
135 | c.configure_mark(color="black").properties(width=500).configure_point(
136 | size=80
137 | ).interactive()
138 | ```
139 |
140 | ### Cycling
141 | ```{python}
142 | q = """
143 | from ah
144 | select sum(toFloat64(value)) as distance,
145 | toString(toStartOfDay(end)) as cdate
146 | where type == 'DistanceCycling'
147 | and created is not null
148 | group by cdate
149 | order by cdate
150 | """
151 | s = db.query(q, "dataframe")
152 |
153 | alt.Chart(s, title="cycling distance over time").mark_point().encode(
154 | alt.X("cdate:T").axis(format="%Y %b", labelAngle=-45).title("days"),
155 | alt.Y("distance").axis(format=".2s").title("distance"),
156 | tooltip=["cdate:T", "distance"],
157 | ).configure_mark(color="coral").properties(width=500).interactive()
158 | ```
159 |
160 | ### Wrist Temperature during Sleep
161 | ```{python}
162 | q = """
163 | from ah
164 | select toFloat64(value) as temp,
165 | end as cdate
166 | where type == 'AppleSleepingWristTemperature'
167 | order by cdate
168 | """
169 | s = chdb.to_df(db.query(q, "Arrow"))
170 |
171 | c = (
172 | alt.Chart(s, title="wrist temperature over time")
173 | .mark_point()
174 | .encode(
175 | alt.X("yearmonthdate(cdate):T")
176 | .axis(format="%Y %b", labelAngle=-45)
177 | .title("days"),
178 | alt.Y(
179 | "temp",
180 | )
181 | .axis(format=".4s")
182 | .title("temperature")
183 | .scale(zero=False),
184 | tooltip=["cdate:T", "temp"],
185 | )
186 | )
187 | c = c + c.transform_regression("cdate", "temp", method="poly").mark_line(color="black")
188 | c.configure_mark(color="green").properties(width=500).interactive()
189 | ```
190 |
191 | ### Sleep Duration and State
192 | ```{python}
193 | import polars as pl
194 |
195 | q = """
196 | from ah
197 | select toString(value) as val,
198 | start,
199 | end
200 | where type == 'SleepAnalysis'
201 | and toYearWeek(end) = toYearWeek(now())-5
202 | order by end desc
203 | """
204 | s = chdb.to_df(db.query(q, "Arrow"))
205 | df = pl.from_pandas(s)
206 |
207 | # duration is end - start
208 | df = df.with_columns(
209 | pl.col("end").sub(pl.col("start")).dt.total_seconds().alias("duration")
210 | )
211 |
212 | # add date column based on truncated end
213 | df = df.with_columns(pl.col("end").dt.date().alias("date"))
214 |
215 | # order by start desc
216 | df = df.sort("start", descending=True)
217 |
218 | category_names = {
219 | "HKCategoryValueSleepAnalysisAwake": "Awake",
220 | "HKCategoryValueSleepAnalysisAsleepCore": "Light Sleep",
221 | "HKCategoryValueSleepAnalysisAsleepDeep": "Deep Sleep",
222 | "HKCategoryValueSleepAnalysisAsleepREM": "REM Sleep",
223 | }
224 |
225 | category_colors = {
226 | "HKCategoryValueSleepAnalysisAwake": "#FFA500", # Orange
227 | "HKCategoryValueSleepAnalysisAsleepCore": "#AEC7E8", # Light Blue
228 | "HKCategoryValueSleepAnalysisAsleepDeep": "#4169E1", # Dark Blue
229 | "HKCategoryValueSleepAnalysisAsleepREM": "#00008B", # Navy Blue
230 | }
231 |
232 | c = (
233 | alt.Chart(df.to_pandas(), title="sleep duration over time")
234 | .mark_bar(size=16)
235 | .encode(
236 | alt.X("date:T").axis(format="%Y %b %d", labelAngle=-45).title("days"),
237 | alt.Y(
238 | "duration:Q",
239 | )
240 | .axis(format=".2s")
241 | .title("duration (s)")
242 | .scale(domain=[0, 35000]),
243 | color=alt.Color(
244 | "val:N",
245 | scale=alt.Scale(
246 | domain=list(category_colors.keys()),
247 | range=list(category_colors.values()),
248 | ),
249 | legend=alt.Legend(
250 | title="Sleep State",
251 | labelExpr="{'HKCategoryValueSleepAnalysisAwake': 'Awake', 'HKCategoryValueSleepAnalysisAsleepCore': 'Light Sleep', 'HKCategoryValueSleepAnalysisAsleepDeep': 'Deep Sleep', 'HKCategoryValueSleepAnalysisAsleepREM': 'REM Sleep'}[datum.label]",
252 | values=list(category_names.keys()),
253 | symbolFillColor="black",
254 | symbolSize=200,
255 | ),
256 | ),
257 | order=alt.Order("val", sort="ascending"),
258 | tooltip=[
259 | "date:T",
260 | alt.Tooltip("val:N", title="Sleep State"),
261 | alt.Tooltip("duration:Q", title="Duration"),
262 | ],
263 | )
264 | )
265 | c
266 | ```
267 |
268 | ## Environmental Audio Exposure
269 | ```{python}
270 | q = """
271 | from ah
272 | select toString(toStartOfDay(end)) as cdate,
273 | toFloat64(value) as audio
274 | where type == 'EnvironmentalAudioExposure'
275 | and toYear(end) = 2024
276 | order by cdate
277 | limit 5000
278 | """
279 | s = chdb.to_df(db.query(q, "Arrow"))
280 |
281 | c = (
282 | (
283 | alt.Chart(s, title="🚜 Environmental Audio Exposure")
284 | .mark_point(color="orange")
285 | .encode(
286 | alt.X(
287 | "cdate:T",
288 | axis=alt.Axis(format="%Y %b", labelAngle=-45),
289 | title="days",
290 | ),
291 | alt.Y("audio:Q").axis(title="Audio Exposure (dB)"),
292 | tooltip=[
293 | alt.Tooltip("cdate:T", title="date"),
294 | alt.Tooltip("audio:Q", title="audio exposure (dB)"),
295 | ],
296 | )
297 | )
298 | .properties(width=500)
299 | .interactive()
300 | )
301 | c = c + alt.Chart().mark_rule(color="red", size=2).encode(y=alt.datum(75))
302 | c
303 | ```
304 |
305 | ## Calories Burned
306 |
307 | ```{python}
308 | q = """
309 | from ah
310 | select type,
311 | sum(toFloat64(value)) as calories,
312 | toString(date(end)) as cdate
313 | where type in ('ActiveEnergyBurned', 'BasalEnergyBurned')
314 | and toYear(end) = 2021
315 | and toMonth(end) in (10)
316 | group by cdate, type
317 | order by cdate
318 | """
319 | s = chdb.to_df(db.query(q, "Arrow"))
320 |
321 | c = (
322 | alt.Chart(s, title="🔥 Calories Burned")
323 | .mark_bar(size=8)
324 | .encode(
325 | alt.X("cdate:T").axis(format="%Y %b %d", labelAngle=-45).title("days"),
326 | alt.Y("calories:Q").axis(format=".2s").title("kcal"),
327 | color=alt.condition(
328 | alt.datum.type == "ActiveEnergyBurned", alt.value("red"), alt.value("blue")
329 | ),
330 | order=alt.Order("type", sort="descending"),
331 | tooltip=["cdate:T", "calories"],
332 | )
333 | )
334 | c = c.properties(width=500).interactive()
335 | c
336 | ```
337 |
338 | ## Caffeine
339 | ```{python}
340 | q = """
341 | from ah
342 | select toString(toStartOfDay(end)) as cdate,
343 | sum(toFloat64(value)) as caffeine
344 | where type == 'DietaryCaffeine'
345 | and toYear(end) = 2023 and toMonth(end) in (1, 2)
346 | group by cdate
347 | order by cdate
348 | """
349 | s = chdb.to_df(db.query(q, "Arrow"))
350 |
351 | c = (
352 | (
353 | alt.Chart(s, title="☕️ Caffeine")
354 | .mark_bar()
355 | .encode(
356 | alt.X("cdate:T").axis(format="%Y %b %d", labelAngle=-45).title("days"),
357 | alt.Y("caffeine:Q").axis(title="caffeine (mg)"),
358 | tooltip=[
359 | alt.Tooltip("cdate:T", title="date"),
360 | alt.Tooltip("caffeine:Q", title="caffeine (mg)"),
361 | ],
362 | color=alt.value("black"),
363 | )
364 | )
365 | .properties(width=500)
366 | .interactive()
367 | )
368 | c
369 | ```
370 |
371 | ### Caffeine Heatmap
372 | ```{python}
373 | q = """
374 | from ah
375 | select toString(toStartOfDay(end)) as cdate,
376 | sum(toFloat64(value)) as caffeine
377 | where type == 'DietaryCaffeine'
378 | and toYear(end) = 2023
379 | and toMonth(end) in (1, 2)
380 | group by cdate
381 | order by cdate
382 | """
383 | s = chdb.to_df(db.query(q, "Arrow"))
384 |
385 | c = (
386 | alt.Chart(s, title="☕️ Coffee Heatmap (2023 Jan-Feb)")
387 | .mark_rect()
388 | .encode(
389 | alt.X("date(cdate):O").axis(format="%d", labelAngle=-45).title("days"),
390 | alt.Y("month(cdate):O").title("month"),
391 | color=alt.Color("max(caffeine):Q", scale=alt.Scale(scheme="greys")).title(
392 | "Caffeine (mg)"
393 | ),
394 | tooltip=[alt.Tooltip("date(cdate):T", title="date")],
395 | )
396 | .properties(width=500) # added padding for whitespace
397 | .interactive()
398 | )
399 | c
400 | ```
401 |
402 | ### Caffeine after 17:00
403 | ```{python}
404 | q = """
405 | from ah
406 | select toString(toStartOfDay(end)) as cdate,
407 | sum(toFloat64(value)) as caffeine,
408 | toHour(max(end)) > 17 as late
409 | where type == 'DietaryCaffeine'
410 | and toYear(end) = 2023
411 | and toMonth(end) in (1, 2)
412 | group by cdate
413 | order by cdate
414 | """
415 | s = chdb.to_df(db.query(q, "Arrow"))
416 |
417 | c = (
418 | alt.Chart(s, title="☕️ Coffee after 17:00 (2023 Jan-Feb)")
419 | .mark_rect()
420 | .encode(
421 | alt.X("date(cdate):O").axis(format="%d", labelAngle=-45).title("days"),
422 | alt.Y("month(cdate):O").title("month"),
423 | color=alt.Color(
424 | "late:N", scale=alt.Scale(domain=[0, 1], range=["lightgrey", "red"])
425 | ).title("Late?"),
426 | tooltip=[alt.Tooltip("date(cdate):T", title="date")],
427 | )
428 | .properties(width=500)
429 | .interactive()
430 | )
431 | c
432 | ```
433 |
434 | ### Step Count Heatmap
435 | ```{python}
436 | q = """
437 | from ah
438 | select toString(toStartOfDay(end)) as cdate,
439 | sum(toFloat64(value)) as steps
440 | where type == 'StepCount' and toYear(end) = 2023
441 | group by cdate
442 | order by cdate
443 | """
444 | s = chdb.to_df(db.query(q, "Arrow"))
445 |
446 | c = (
447 | alt.Chart(s, title="👣 Step Count (2023)")
448 | .mark_rect()
449 | .encode(
450 | alt.X("date(cdate):O").axis(format="%d", labelAngle=-45).title("days"),
451 | alt.Y("month(cdate):O").title("month"),
452 | color=alt.Color("sum(steps):Q", scale=alt.Scale(scheme="greens")).title(
453 | "Steps"
454 | ),
455 | tooltip=[alt.Tooltip("date(cdate):T", title="date")],
456 | )
457 | .properties(width=500)
458 | .interactive()
459 | )
460 | c
461 | ```
462 |
463 | ### Last Swimming Workout
464 | ```{python}
465 | q = """
466 | from ah
467 | select max(start)
468 | where type = 'DistanceSwimming';
469 | """
470 |
471 | s = db.query(q, "PrettyCompactNoEscapes")
472 | print(s)
473 | ```
474 |
475 | ### Swimming Workouts
476 | ```{python}
477 | q = """
478 | from ah
479 | select toString(toStartOfWeek(end)) as week,
480 | count(type) > 0 as had_swimming_workout
481 | where type = 'DistanceSwimming'
482 | group by week
483 | order by week
484 | """
485 |
486 | s = db.query(q, "PrettyCompactNoEscapes")
487 | print(s)
488 | ```
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "atlas-db"
3 | version = "0.2.11"
4 | description = "turn apple health export.xml into parquet"
5 | readme = "README.md"
6 | requires-python = ">=3.8"
7 | authors = [{name = "Thomas Schranz"}]
8 | license = {text = "Apache-2.0"}
9 | classifiers = [
10 | "License :: OSI Approved :: Apache Software License"
11 | ]
12 | dependencies = [
13 | "click",
14 | "lxml>=5.2.1",
15 | "polars>=0.20.22"
16 | ]
17 |
18 | [build-system]
19 | requires = ["setuptools"]
20 | build-backend = "setuptools.build_meta"
21 |
22 | [project.scripts]
23 | atlas = "atlas.cli:cli"
24 |
25 |
26 | [project.urls]
27 | Homepage = "https://github.com/atlaslib/atlas"
28 | Changelog = "https://github.com/atlaslib/atlas/releases"
29 | Issues = "https://github.com/atlaslib/atlas/issues"
30 | CI = "https://github.com/atlaslib/atlas/actions"
31 |
32 | [project.optional-dependencies]
33 | test = ["pytest"]
34 |
--------------------------------------------------------------------------------
/tests/test_expanse.py:
--------------------------------------------------------------------------------
1 | def test_example_function():
2 | assert 1 == 1
3 |
--------------------------------------------------------------------------------