├── fitanalysis
├── __init__.py
├── util.py
└── activity.py
├── resources
└── TSS_and_IF_Coggan_2003.pdf
├── setup.py
├── LICENSE
├── .gitignore
└── README.md
/fitanalysis/__init__.py:
--------------------------------------------------------------------------------
1 | from fitanalysis.activity import Activity
2 |
3 |
4 | __version__ = '0.0.1'
5 | __all__ = ['Activity']
6 |
--------------------------------------------------------------------------------
/resources/TSS_and_IF_Coggan_2003.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mtraver/python-fitanalysis/HEAD/resources/TSS_and_IF_Coggan_2003.pdf
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | import sys
3 |
4 | import fitanalysis
5 |
6 |
7 | requires = ['fitparse', 'numpy', 'pandas']
8 | if sys.version_info < (2, 7):
9 | requires.append('argparse')
10 |
11 | with open('LICENSE', 'r') as f:
12 | license_content = f.read()
13 |
14 | setup(name='fitanalysis',
15 | version=fitanalysis.__version__,
16 | description='Python library for analysis of ANT/Garmin .fit files',
17 | author='Michael Traver',
18 | url='https://github.com/mtraver/python-fitanalysis',
19 | license=license_content,
20 | packages=['fitanalysis'],
21 | install_requires=requires)
22 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Michael Traver
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *.cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # Jupyter Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # SageMath parsed files
79 | *.sage.py
80 |
81 | # Environments
82 | .env
83 | .venv
84 | env/
85 | venv/
86 | ENV/
87 |
88 | # Spyder project settings
89 | .spyderproject
90 | .spyproject
91 |
92 | # Rope project settings
93 | .ropeproject
94 |
95 | # mkdocs documentation
96 | /site
97 |
98 | # mypy
99 | .mypy_cache/
100 |
101 | .DS_Store
102 |
--------------------------------------------------------------------------------
/fitanalysis/util.py:
--------------------------------------------------------------------------------
1 | """Utility functions."""
2 | import numpy as np
3 |
4 |
5 | def moving_average(time_series, window_len):
6 | """Calculates the moving average of an unevenly spaced time series.
7 |
8 | This moving average implementation weights each value by the time it remained
9 | unchanged, which conceptually matches smart recording on GPS devices: a sample
10 | is taken when some value changes sufficiently, so before a new sample is taken
11 | the previous one is assumed to be more or less constant.
12 |
13 | The term "area" below means a sum of time-weighted values.
14 |
15 | This implementation follows the SMA_last algorithm proposed
16 | in (Eckner, 2017) (see README for citation).
17 |
18 | Args:
19 | time_series: A pandas.Series of the values to average,
20 | indexed with timestamps.
21 | window_len: The size of the moving average window, in seconds.
22 |
23 | Returns:
24 | A numpy array of length len(time_series) containing the
25 | moving average values
26 | """
27 | # Re-index the time series with duration in seconds from the first value
28 | time_series.index = (
29 | (time_series.index
30 | - time_series.index[0]) / np.timedelta64(1, 's')).astype('int')
31 |
32 | window_area = time_series.iloc[0] * window_len
33 |
34 | # It may not always be possible to construct a window of length exactly equal
35 | # to window_len using timestamps present in the data. To handle this, the left
36 | # side of the window is allowed to fall between timestamps (the right side is
37 | # always fixed to a timestamp in the data). Therefore we need to separately
38 | # compute the area of the inter-timestamp region on the left side of the
39 | # window so that it can be added to the window area. left_area is that value.
40 | left_area = window_area
41 |
42 | out = np.zeros(len(time_series))
43 | out[0] = time_series.iloc[0]
44 |
45 | # i is the left side of the window and j is the right
46 | i = 0
47 | for j in xrange(1, len(time_series)):
48 | # Remove the last iteration's left_area as a new right window bound may
49 | # change the left_area required in this iteration
50 | window_area -= left_area
51 |
52 | # Expand window to the right
53 | window_area += time_series.iloc[j-1] * (time_series.index[j]
54 | - time_series.index[j-1])
55 |
56 | # Shrink window from the left if expanding to the right has created too
57 | # large a window. new_left_time may fall between timestamps present in the
58 | # data, which is fine, since that's handled by left_area.
59 | new_left_time = time_series.index[j] - window_len
60 | while time_series.index[i] < new_left_time:
61 | window_area -= time_series.iloc[i] * (time_series.index[i+1]
62 | - time_series.index[i])
63 | i += 1
64 |
65 | # Add left side inter-timestamp area to window
66 | left_area = time_series.iloc[max(0, i - 1)] * (time_series.index[i]
67 | - new_left_time)
68 | window_area += left_area
69 |
70 | out[j] = window_area / window_len
71 |
72 | return out
73 |
74 |
75 | def print_full(df):
76 | """Prints a DataFrame in full."""
77 | pandas.set_option('display.max_rows', len(df))
78 | print df
79 | pandas.reset_option('display.max_rows')
80 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # fitanalysis
2 | fitanalysis is a Python library for analysis of ANT/Garmin `.fit` files.
3 |
4 | It's geared toward cycling and allows for easy extraction of data such as the
5 | following from a `.fit` file:
6 | - elapsed time
7 | - moving time
8 | - average heart rate
9 | - average power
10 | - normalized power (based on information publicly available about
11 | TrainingPeaks' NP®)
12 | - intensity (based on information publicly available about TrainingPeaks' IF®)
13 | - training stress (based on information publicly available about
14 | TrainingPeaks' TSS®)
15 |
16 | My impetus for this project was to better understand how platforms like
17 | TrainingPeaks analyze power and heart rate data to arrive at an estimation
18 | of training stress. As such, this project attempts to match those platforms'
19 | calculations as closely as possible.
20 |
21 | # Dependencies and installation
22 | [Pandas](http://pandas.pydata.org/), [NumPy](http://www.numpy.org/), and
23 | [fitparse](https://github.com/dtcooper/python-fitparse) are required.
24 |
25 | `python setup.py install` (or `python setup.py install --user`) to install.
26 |
27 | # Example
28 |
29 | fitanalysis provides the `Activity` class.
30 |
31 | ```python
32 | import fitanalysis
33 |
34 | activity = fitanalysis.Activity('my_activity.fit')
35 |
36 | print activity.elapsed_time
37 | print activity.moving_time
38 |
39 | # Also available for heart rate and cadence
40 | print activity.mean_power
41 |
42 | print activity.norm_power
43 |
44 | # Intensity and training stress calculations require
45 | # a functional threshold power value (in Watts)
46 | print activity.intensity(310)
47 | print activity.training_stress(310)
48 | ```
49 |
50 | Construction of an `Activity` parses the `.fit` file and detects periods of
51 | inactivity, as such periods must be removed from the data for heart rate-,
52 | cadence-, and power-based calculations.
53 |
54 | # Comparison of activity analysis platforms
55 |
56 | Here is a comparison for a few of my rides of varying profiles across the
57 | various platforms.
58 |
59 |
60 |
61 | |
62 | |
63 | fitanalysis |
64 | TrainingPeaks |
65 | Garmin Connect |
66 | Strava |
67 |
68 |
69 |
70 | Ride 1: epic ride 126.5 mi 15207 ft climbing |
71 |
72 |
73 | | Elapsed time |
74 | 12:19:40 |
75 | * |
76 | 12:19:20 |
77 | 12:19:40 |
78 |
79 |
80 | | Moving time |
81 | 9:07:14 |
82 | - |
83 | 9:06:12 |
84 | 9:09:26 |
85 |
86 |
87 | | Mean power |
88 | 182 W |
89 | 183 W |
90 | 183 W |
91 | 183 W |
92 |
93 |
94 | | Norm. power |
95 | 232 W |
96 | 232 W |
97 | 232 W |
98 | - |
99 |
100 |
101 | | Intensity |
102 | 0.74 |
103 | 0.74 |
104 | 0.74 |
105 | - |
106 |
107 |
108 | | Training stress |
109 | 504.0 |
110 | 505.1 |
111 | 503.2 |
112 | - |
113 |
114 |
115 |
116 | Ride 2: interval workout 11.9 mi 1352 ft climbing |
117 |
118 |
119 | | Elapsed time |
120 | 1:32:34 |
121 | * |
122 | 1:32:34 |
123 | 1:32:34 |
124 |
125 |
126 | | Moving time |
127 | 57:17 |
128 | - |
129 | 57:11 |
130 | 57:51 |
131 |
132 |
133 | | Mean power |
134 | 172 W |
135 | 168 W |
136 | 168 W |
137 | 172 W |
138 |
139 |
140 | | Norm. power |
141 | 289 W |
142 | 286 W |
143 | 287 W |
144 | - |
145 |
146 |
147 | | Intensity |
148 | 0.93 |
149 | 0.92 |
150 | 0.92 |
151 | - |
152 |
153 |
154 | | Training stress |
155 | 81.7 |
156 | 82.3 |
157 | 83.1 |
158 | - |
159 |
160 |
161 |
162 | Ride 3: tempo 25.4 mi 2451 ft climbing |
163 |
164 |
165 | | Elapsed time |
166 | 2:09:02 |
167 | 2:08:58 |
168 | 2:08:58 |
169 | 2:09:02 |
170 |
171 |
172 | | Moving time |
173 | 1:32:39 |
174 | - |
175 | 1:32:23 |
176 | 1:32:43 |
177 |
178 |
179 | | Mean power |
180 | 201 W |
181 | 201 W |
182 | 201 W |
183 | 202 W |
184 |
185 |
186 | | Norm. power |
187 | 270 W |
188 | 269 W |
189 | 270 W |
190 | - |
191 |
192 |
193 | | Intensity |
194 | 0.86 |
195 | 0.86 |
196 | 0.87 |
197 | - |
198 |
199 |
200 | | Training stress |
201 | 115.3 |
202 | 114.1 |
203 | 115.1 |
204 | - |
205 |
206 |
207 |
208 | Ride 4: "coffee pace" 13.4 mi 902 ft climbing |
209 |
210 |
211 | | Elapsed time |
212 | 1:41:24 |
213 | 1:41:23 |
214 | 1:41:23 |
215 | 1:41:24 |
216 |
217 |
218 | | Moving time |
219 | 57:15 |
220 | - |
221 | 57:02 |
222 | 57:23 |
223 |
224 |
225 | | Mean power |
226 | 138 W |
227 | 139 W |
228 | 139 W |
229 | 139 W |
230 |
231 |
232 | | Norm. power |
233 | 251 W |
234 | 252 W |
235 | 252 W |
236 | - |
237 |
238 |
239 | | Intensity |
240 | 0.80 |
241 | 0.81 |
242 | 0.81 |
243 | - |
244 |
245 |
246 | | Training stress |
247 | 61.6 |
248 | 61.6 |
249 | 61.2 |
250 | - |
251 |
252 |
253 |
254 | \- Data not available on this platform
255 |
256 | \* Didn't calculate. TrainingPeaks doesn't directly report elapsed time so it
257 | has to be manually summed from lap durations, and these rides have lots of
258 | laps.
259 |
260 | ## Conclusions
261 |
262 | - Garmin Connect is the most aggressive when calculating moving time, Strava is
263 | the most lenient, and fitanalysis falls in between.
264 | - Mean power calculated by fitanalysis is at most 1 W different than mean power
265 | calculated by another platform.
266 | - Normalized power calculated by fitanalysis is at most 2 W different than
267 | normalized power calculated by another platform.
268 | - Training stress calculated by fitanalysis corresponds well to other platforms
269 | across a large range.
270 |
271 | ## Autopause and inactivity handling
272 |
273 | All of the activities in the table above were recorded with autopause enabled,
274 | so they don't highlight any differences in how each platform handles long
275 | periods of inactivity. To test this I recorded a ride with autopause disabled,
276 | and then used fitanalysis to analyze it in two ways: detecting and removing
277 | periods of inactivity (the default for fitanalysis), and leaving the data as-is.
278 | This activity includes a 2-minute period of inactivity, in addition to shorter
279 | stops e.g. at stop lights.
280 |
281 |
282 |
283 | |
284 | fitanalysis (inactivity removed) |
285 | fitanalysis (inactivity not removed) |
286 | TrainingPeaks |
287 | Garmin Connect |
288 | Strava |
289 |
290 |
291 |
292 | | Elapsed time |
293 | 34:54 |
294 | 34:54 |
295 | 34:54 |
296 | 34:54 |
297 | 34:54 |
298 |
299 |
300 | | Moving time |
301 | 30:48 |
302 | 34:54 |
303 | - |
304 | 30:57 |
305 | 31:12 |
306 |
307 |
308 | | Mean power |
309 | 247 W |
310 | 219 W |
311 | 220 W |
312 | 220 W |
313 | 248 W |
314 |
315 |
316 | | Norm. power |
317 | 279 W |
318 | 271 W |
319 | 272 W |
320 | 272 W |
321 | - |
322 |
323 |
324 | | Intensity |
325 | 0.89 |
326 | 0.87 |
327 | 0.87 |
328 | 0.87 |
329 | - |
330 |
331 |
332 | | Training stress |
333 | 41.1 |
334 | 43.8 |
335 | 43.6 |
336 | 43.7 |
337 | - |
338 |
339 |
340 |
341 | Average power with periods of inactivity removed matches Strava's average power,
342 | but not TrainingPeaks or Garmin Connect. They calculate average power from the
343 | raw data.
344 |
345 | TrainingPeaks and Garmin Connect also calculate normalized power from the
346 | raw data.
347 |
348 | Garmin Connect does calculate moving time but it appears not to use it for the
349 | power calculations. If inactivity isn't removed from the power data then
350 | elapsed time should indeed be used for consistency, but the choice to remove
351 | the inactivity for the purpose of moving time calculation and not do so for
352 | power is puzzling.
353 |
354 | Because Strava removes inactivity for power calculations, both approaches seem
355 | to be accepted. It's my opinion that removing inactivity is the correct
356 | approach because, depending on the length of inactivity, not doing so can lead
357 | to an inflated or deflated estimation of the effort during periods of activity.
358 | One counter-argument I can see is for structured workouts: it may be desirable
359 | to include the rest periods in calculations of intensity and training stress
360 | because in this case the length of the rest is deliberately chosen as part of
361 | the workout. Perhaps this is the reason for TrainingPeaks' implementation?
362 |
363 | This is only one data point, so looking at some more rides would be interesting,
364 | but one takeaway from this example is this: want to inflate your TSS? Try
365 | disabling autopause (and don't take _really_ long breaks, but apparently
366 | moderately long breaks are fine).
367 |
368 | # References
369 |
370 | Coggan, Andrew. (2012, June 20). _Calculate Normalised Power for an Interval._ [Forum comment]. Retrieved June 14, 2017, from http://www.timetriallingforum.co.uk/index.php?/topic/69738-calculate-normalised-power-for-an-interval/&do=findComment&comment=978386
371 |
372 | Coggan, Andrew. (2016, February 10). _Normalized Power, Intensity Factor and Training Stress Score._ Retrieved June 14, 2017, from
373 | https://www.trainingpeaks.com/blog/normalized-power-intensity-factor-training-stress/
374 |
375 | Coggan, Andrew. (2003, March 13). _TSS and IF - at last!_ Retrieved June 14, 2017, from http://lists.topica.com/lists/wattage/read/message.html?mid=907028398&sort=d&start=9353
376 |
377 | Eckner, Andreas. (2017, April 3). _Algorithms for Unevenly Spaced Time Series: Moving Averages and Other Rolling Operators._ Retrieved June 14, 2017, from http://eckner.com/papers/Algorithms%20for%20Unevenly%20Spaced%20Time%20Series.pdf
378 |
379 | Friel, Joe. (2009, Sept 21). _Estimating Training Stress Score (TSS)._ Retrieved June 22, 2017, from https://www.trainingpeaks.com/blog/estimating-training-stress-score-tss/
380 |
381 | # License
382 | This project is licensed under the MIT License. See
383 | [LICENSE](https://github.com/mtraver/fitanalysis/blob/master/LICENSE) file
384 | for details.
385 |
--------------------------------------------------------------------------------
/fitanalysis/activity.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import numpy as np
3 | import pandas
4 |
5 | import fitparse
6 |
7 | import fitanalysis.util
8 |
9 |
10 | # Set to True to add a column to the DataFrame indicating whether a row would
11 | # have been removed if removal of stopped periods were enabled, but don't
12 | # actually remove it.
13 | DEBUG_EXCISE = False
14 |
15 |
16 | class Activity(fitparse.FitFile):
17 | """Represents an activity recorded as a .fit file.
18 |
19 | Construction of an Activity parses the .fit file and detects periods of
20 | inactivity, as such periods must be removed from the data for heart rate-,
21 | cadence-, and power-based calculations.
22 | """
23 |
24 | EVENT_TYPE_START = 'start'
25 | EVENT_TYPE_STOP = 'stop'
26 |
27 | TIMER_TRIGGER_DETECTED = 'detected'
28 |
29 | # Speeds less than or equal to this value (in m/s) are
30 | # considered to be stopped
31 | STOPPED_THRESHOLD = 0.3
32 |
33 | def __init__(self, file_obj, remove_stopped_periods=True):
34 | """Creates an Activity from a .fit file.
35 |
36 | Args:
37 | file_obj: A file-like object representing a .fit file.
38 | remove_stopped_periods: If True, regions of data with speed below a
39 | threshold will be removed from the data. Default
40 | is True.
41 | """
42 | super(Activity, self).__init__(file_obj)
43 |
44 | self._remove_stopped_periods = remove_stopped_periods or DEBUG_EXCISE
45 |
46 | records = list(self.get_messages('record'))
47 |
48 | # Get elapsed time before modifying the data
49 | self.start_time = records[0].get('timestamp').value
50 | self.end_time = records[-1].get('timestamp').value
51 | self.elapsed_time = self.end_time - self.start_time
52 |
53 | # Calculated when needed and memoized here
54 | self._moving_time = None
55 | self._norm_power = None
56 |
57 | self.events = self._df_from_messages(
58 | self.get_messages('event'),
59 | ['event', 'event_type', 'event_group', 'timer_trigger', 'data'],
60 | timestamp_index=True)
61 |
62 | # We will build a DataFrame with these fields as columns. Values for each
63 | # of these fields will be extracted from each record from the .fit file.
64 | fields = ['timestamp', 'speed', 'heart_rate', 'power', 'cadence']
65 |
66 | # The primary index of the DataFrame is the "block". A block is defined as
67 | # a period of movement. Blocks may be defined by start/stop event messages
68 | # from the .fit file, or they may be detected based on speed in the case
69 | # that the recording device did not automatically pause recording when
70 | # stopped.
71 | blocks = []
72 | curr_block = -1
73 |
74 | # The secondary index is the duration from the start of the activity
75 | time_offsets = []
76 |
77 | # Get start/stop events from .fit file and combine with the events detected
78 | # from speed data, keeping the event from the .fit file if timestamps are
79 | # identical
80 | timer_events = self.events[self.events['event'] == 'timer']
81 |
82 | if self._remove_stopped_periods:
83 | # Detect start/stop events based on stopped threshold speed. If the
84 | # recording device did not have autopause enabled then this is the only
85 | # way periods of no movement can be detected and removed.
86 | detected_events = self._detect_start_stop_events(records)
87 | timer_events = timer_events.combine_first(detected_events)
88 |
89 | # Build the rows and indices of the DataFrame
90 | excise = False
91 | event_index = 0
92 | rows = []
93 | for record in records:
94 | curr_timestamp = record.get('timestamp').value
95 |
96 | # Match data record timestamps with event timestamps in order to mark
97 | # "blocks" as described above. Periods of no movement will be excised
98 | # (if the recording device did not have autopause enabled there will be
99 | # blocks of no movement that should be removed before data analysis).
100 | if event_index < len(timer_events) and (
101 | curr_timestamp >= timer_events.iloc[event_index].name):
102 |
103 | # Events usually have timestamps that correspond to a data timestamp,
104 | # but this isn't always the case. Process events until the events catch
105 | # up with the data.
106 | while True:
107 | event_type = timer_events.iloc[event_index]['event_type']
108 | trigger = timer_events.iloc[event_index]['timer_trigger']
109 |
110 | if event_type == self.EVENT_TYPE_START:
111 | curr_block += 1
112 |
113 | # If we've seen a start event we should not be excising data
114 | # TODO(mtraver) Do I care if the start event is detected or from
115 | # the .fit file? I don't think so.
116 | excise = False
117 | elif event_type.startswith(self.EVENT_TYPE_STOP):
118 | # If the stop event was detected based on speed, excise the region
119 | # until the next start event, because we know that it's a region of
120 | # data with speed under the stopped threshold.
121 | if trigger == self.TIMER_TRIGGER_DETECTED:
122 | excise = True
123 |
124 | event_index += 1
125 |
126 | # Once the event timestamp is ahead of the data timestamp we can
127 | # continue processing data; the next event will be processed as the
128 | # data timestamps catch up with it.
129 | if event_index >= len(timer_events) or (
130 | curr_timestamp < timer_events.iloc[event_index].name):
131 | break
132 |
133 | if not excise or DEBUG_EXCISE:
134 | # Build indices
135 | time_offsets.append(curr_timestamp - self.start_time)
136 | blocks.append(curr_block)
137 |
138 | row = []
139 | for field_name in fields:
140 | field = record.get(field_name)
141 | row.append(field.value if field is not None else None)
142 |
143 | if DEBUG_EXCISE:
144 | row.append(excise)
145 |
146 | rows.append(row)
147 |
148 | assert len(blocks) == len(time_offsets)
149 |
150 | if DEBUG_EXCISE:
151 | fields += ['excise']
152 |
153 | self.data = pandas.DataFrame(rows, columns=fields,
154 | index=[blocks, time_offsets])
155 | self.data.index.names = ['block', 'offset']
156 |
157 | # These fields may not exist in all .fit files,
158 | # so drop the columns if they're not present.
159 | for field in ['power', 'cadence', 'heart_rate']:
160 | if self.data[self.data[field].notnull()].empty:
161 | self.data.drop(field, axis=1, inplace=True)
162 |
163 | if self.has_power and self.has_cadence:
164 | self._clean_up_power_and_cadence()
165 |
166 | def _df_from_messages(self, messages, fields, timestamp_index=False):
167 | """Creates a DataFrame from an iterable of fitparse messages.
168 |
169 | Args:
170 | messages: Iterable of fitparse messages.
171 | fields: List of message fields to include in the DataFrame. Each one will
172 | be a separate column, and if a field isn't present in a particular
173 | message, its value will be set to None.
174 | timestamp_index: If True, message timestamps will be used as the index of
175 | the DataFrame. Otherwise the default index is used.
176 | Default is False.
177 |
178 | Returns:
179 | A DataFrame with one row per message and columns for each of
180 | the given fields.
181 | """
182 | rows = []
183 | timestamps = []
184 | for m in messages:
185 | timestamps.append(m.get('timestamp').value)
186 |
187 | row = []
188 | for field_name in fields:
189 | field = m.get(field_name)
190 | row.append(field.value if field is not None else None)
191 |
192 | rows.append(row)
193 |
194 | if timestamp_index:
195 | return pandas.DataFrame(rows, columns=fields, index=timestamps)
196 | else:
197 | return pandas.DataFrame(rows, columns=fields)
198 |
199 | def _detect_start_stop_events(self, records):
200 | """Detects periods of inactivity by comparing speed to a threshold value.
201 |
202 | Args:
203 | records: Iterable of fitparse messages. They must contain a 'speed' field.
204 |
205 | Returns:
206 | A DataFrame indexed by timestamp with these columns:
207 | - 'event_type': value is one of {'start','stop'}
208 | - 'timer_trigger': always the string 'detected', so that these
209 | start/stop events can be distinguished from those present in the
210 | .fit file.
211 |
212 | Each row is one event, and its timestamp is guaranteed to be that of a
213 | record in the given iterable of messages.
214 |
215 | When the speed of a record drops below the threshold speed a 'stop' event
216 | is created with its timestamp, and when the speed rises above the
217 | threshold speed a 'start' event is created with its timestamp.
218 | """
219 | stopped = False
220 | timestamps = []
221 | events = []
222 | for i, record in enumerate(records):
223 | ts = record.get('timestamp').value
224 |
225 | if i == 0:
226 | timestamps.append(ts)
227 | events.append([self.EVENT_TYPE_START, self.TIMER_TRIGGER_DETECTED])
228 | elif record.get('speed') is not None:
229 | speed = record.get('speed').value
230 | if speed <= self.STOPPED_THRESHOLD:
231 | if not stopped:
232 | timestamps.append(ts)
233 | events.append([self.EVENT_TYPE_STOP, self.TIMER_TRIGGER_DETECTED])
234 |
235 | stopped = True
236 | else:
237 | if stopped:
238 | timestamps.append(ts)
239 | events.append([self.EVENT_TYPE_START, self.TIMER_TRIGGER_DETECTED])
240 | stopped = False
241 |
242 | return pandas.DataFrame(events, columns=['event_type', 'timer_trigger'],
243 | index=timestamps)
244 |
245 | def _clean_up_power_and_cadence(self):
246 | """Infers true value of null power and cadence values in simple cases."""
247 | # If cadence in NaN and power is 0, assume cadence is 0
248 | self.data.loc[self.data['cadence'].isnull()
249 | & (self.data['power'] == 0.0), 'cadence'] = 0.0
250 |
251 | # If power in NaN and cadence is 0, assume power is 0
252 | self.data.loc[self.data['power'].isnull()
253 | & (self.data['cadence'] == 0.0), 'power'] = 0.0
254 |
255 | # If both power and cadence are NaN, assume they're both 0
256 | power_and_cadence_null = (
257 | self.data['cadence'].isnull() & self.data['power'].isnull())
258 | self.data.loc[power_and_cadence_null, 'power'] = 0.0
259 | self.data.loc[power_and_cadence_null, 'cadence'] = 0.0
260 |
261 | @property
262 | def moving_time(self):
263 | if self._moving_time is None:
264 | moving_time = 0
265 | for _, block_df in self.data.groupby(level='block'):
266 | # Calculate the number of seconds elapsed since the previous data point
267 | # and sum them to get the moving time
268 | moving_time += (
269 | (block_df['timestamp'] - block_df['timestamp'].shift(1).fillna(
270 | block_df.iloc[0]['timestamp'])) / np.timedelta64(1, 's')).sum()
271 |
272 | self._moving_time = datetime.timedelta(seconds=moving_time)
273 |
274 | return self._moving_time
275 |
276 | @property
277 | def has_power(self):
278 | return 'power' in self.data.columns
279 |
280 | @property
281 | def has_cadence(self):
282 | return 'cadence' in self.data.columns
283 |
284 | @property
285 | def has_heart_rate(self):
286 | return 'heart_rate' in self.data.columns
287 |
288 | @property
289 | def cadence(self):
290 | if not self.has_cadence:
291 | return None
292 |
293 | if self._remove_stopped_periods:
294 | return self.data[
295 | self.data['cadence'].notnull() & (self.data['cadence'] > 0)
296 | & (self.data['speed'] > self.STOPPED_THRESHOLD)]['cadence']
297 |
298 | return self.data[
299 | self.data['cadence'].notnull() & (self.data['cadence'] > 0)]['cadence']
300 |
301 | @property
302 | def mean_cadence(self):
303 | if not self.has_cadence:
304 | return None
305 |
306 | return self.cadence.mean()
307 |
308 | @property
309 | def heart_rate(self):
310 | if not self.has_heart_rate:
311 | return None
312 |
313 | if self._remove_stopped_periods:
314 | return self.data[
315 | self.data['heart_rate'].notnull()
316 | & self.data['speed'] > self.STOPPED_THRESHOLD]['heart_rate']
317 |
318 | return self.data[self.data['heart_rate'].notnull()]['heart_rate']
319 |
320 | @property
321 | def mean_heart_rate(self):
322 | if not self.has_heart_rate:
323 | return None
324 |
325 | return self.heart_rate.mean()
326 |
327 | @property
328 | def power(self):
329 | if not self.has_power:
330 | return None
331 |
332 | if self._remove_stopped_periods:
333 | return self.data[self.data['power'].notnull()
334 | & self.data['speed'] > self.STOPPED_THRESHOLD]['power']
335 |
336 | return self.data[self.data['power'].notnull()]['power']
337 |
338 | @property
339 | def mean_power(self):
340 | if not self.has_power:
341 | return None
342 |
343 | return self.power.mean()
344 |
345 | @property
346 | def norm_power(self):
347 | """Calculates the normalized power.
348 |
349 | See (Coggan, 2003) cited in README for details on the rationale behind the
350 | calculation.
351 |
352 | Normalized power is based on a 30-second moving average of power. Coggan's
353 | algorithm specifies that the moving average should start at the 30 second
354 | point in the data, but this implementation does not (it starts with the
355 | first value, like a standard moving average). This is an acceptable
356 | approximation because normalized power shouldn't be relied upon for efforts
357 | less than 20 minutes long (Coggan, 2012), so how the first 30 seconds are
358 | handled doesn't make much difference. Also, the values computed by this
359 | implementation are very similar to those computed by TrainingPeaks, so
360 | changing the moving average implementation doesn't seem to be critical.
361 |
362 | This function also does not specially handle gaps in the data. When a pause
363 | is present in the data (either from autopause on the recording device or
364 | removal of stopped periods in post-processing) the timestamp may jump by a
365 | large amount from one sample to the next. Ideally this should be handled in
366 | some way that takes into account the physiological impact of that rest, but
367 | currently this algorithm does not. But again, the values computed by this
368 | implementation are very similar to those computed by TrainingPeaks, so
369 | changing gap handling doesn't seem to be critical.
370 |
371 | Returns:
372 | Normalized power as a float
373 | """
374 | if not self.has_power:
375 | return None
376 |
377 | if self._norm_power is None:
378 | p = self.power
379 | p.index = p.index.droplevel(level='block')
380 | self._norm_power = (
381 | np.sqrt(np.sqrt(
382 | np.mean(fitanalysis.util.moving_average(p, 30) ** 4))))
383 |
384 | return self._norm_power
385 |
386 | def intensity(self, ftp):
387 | """Calculates the intensity factor of the activity.
388 |
389 | Intensity factor is defined as the ratio of normalized power to FTP.
390 | See (Coggan, 2016) cited in README for more details.
391 |
392 | Args:
393 | ftp: Functional threshold power in Watts.
394 |
395 | Returns:
396 | Intensity factor as a float
397 | """
398 | if not self.has_power:
399 | return None
400 |
401 | return self.norm_power / float(ftp)
402 |
403 | def training_stress(self, ftp):
404 | """Calculates the training stress of the activity.
405 |
406 | This is essentially a power-based version of Banister's heart rate-based
407 | TRIMP (training impulse). Andrew Coggan's introduction of TSS and IF
408 | specifies that average power should be used to calculate training stress
409 | (Coggan, 2003), but a later post on TrainingPeaks' blog specifies that
410 | normalized power should be used (Friel, 2009). Normalized power is used
411 | here because it yields values in line with the numbers from TrainingPeaks;
412 | using average power does not.
413 |
414 | Args:
415 | ftp: Functional threshold power in Watts.
416 |
417 | Returns:
418 | Training stress as a float
419 | """
420 | if not self.has_power:
421 | return None
422 |
423 | return (self.moving_time.total_seconds() * self.norm_power
424 | * self.intensity(ftp)) / (float(ftp) * 3600.0) * 100.0
425 |
--------------------------------------------------------------------------------