├── fitanalysis
    ├── __init__.py
    ├── util.py
    └── activity.py
├── resources
    └── TSS_and_IF_Coggan_2003.pdf
├── setup.py
├── LICENSE
├── .gitignore
└── README.md


/fitanalysis/__init__.py:
--------------------------------------------------------------------------------
1 | from fitanalysis.activity import Activity
2 | 
3 | 
4 | __version__ = '0.0.1'
5 | __all__ = ['Activity']
6 | 


--------------------------------------------------------------------------------
/resources/TSS_and_IF_Coggan_2003.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mtraver/python-fitanalysis/HEAD/resources/TSS_and_IF_Coggan_2003.pdf


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | import sys
 3 | 
 4 | import fitanalysis
 5 | 
 6 | 
 7 | requires = ['fitparse', 'numpy', 'pandas']
 8 | if sys.version_info < (2, 7):
 9 |   requires.append('argparse')
10 | 
11 | with open('LICENSE', 'r') as f:
12 |   license_content = f.read()
13 | 
14 | setup(name='fitanalysis',
15 |       version=fitanalysis.__version__,
16 |       description='Python library for analysis of ANT/Garmin .fit files',
17 |       author='Michael Traver',
18 |       url='https://github.com/mtraver/python-fitanalysis',
19 |       license=license_content,
20 |       packages=['fitanalysis'],
21 |       install_requires=requires)
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Michael Traver
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *.cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # Jupyter Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # SageMath parsed files
 79 | *.sage.py
 80 | 
 81 | # Environments
 82 | .env
 83 | .venv
 84 | env/
 85 | venv/
 86 | ENV/
 87 | 
 88 | # Spyder project settings
 89 | .spyderproject
 90 | .spyproject
 91 | 
 92 | # Rope project settings
 93 | .ropeproject
 94 | 
 95 | # mkdocs documentation
 96 | /site
 97 | 
 98 | # mypy
 99 | .mypy_cache/
100 | 
101 | .DS_Store
102 | 


--------------------------------------------------------------------------------
/fitanalysis/util.py:
--------------------------------------------------------------------------------
 1 | """Utility functions."""
 2 | import numpy as np
 3 | 
 4 | 
 5 | def moving_average(time_series, window_len):
 6 |   """Calculates the moving average of an unevenly spaced time series.
 7 | 
 8 |   This moving average implementation weights each value by the time it remained
 9 |   unchanged, which conceptually matches smart recording on GPS devices: a sample
10 |   is taken when some value changes sufficiently, so before a new sample is taken
11 |   the previous one is assumed to be more or less constant.
12 | 
13 |   The term "area" below means a sum of time-weighted values.
14 | 
15 |   This implementation follows the SMA_last algorithm proposed
16 |   in (Eckner, 2017) (see README for citation).
17 | 
18 |   Args:
19 |     time_series: A pandas.Series of the values to average,
20 |                  indexed with timestamps.
21 |     window_len: The size of the moving average window, in seconds.
22 | 
23 |   Returns:
24 |     A numpy array of length len(time_series) containing the
25 |     moving average values
26 |   """
27 |   # Re-index the time series with duration in seconds from the first value
28 |   time_series.index = (
29 |       (time_series.index
30 |        - time_series.index[0]) / np.timedelta64(1, 's')).astype('int')
31 | 
32 |   window_area = time_series.iloc[0] * window_len
33 | 
34 |   # It may not always be possible to construct a window of length exactly equal
35 |   # to window_len using timestamps present in the data. To handle this, the left
36 |   # side of the window is allowed to fall between timestamps (the right side is
37 |   # always fixed to a timestamp in the data). Therefore we need to separately
38 |   # compute the area of the inter-timestamp region on the left side of the
39 |   # window so that it can be added to the window area. left_area is that value.
40 |   left_area = window_area
41 | 
42 |   out = np.zeros(len(time_series))
43 |   out[0] = time_series.iloc[0]
44 | 
45 |   # i is the left side of the window and j is the right
46 |   i = 0
47 |   for j in xrange(1, len(time_series)):
48 |     # Remove the last iteration's left_area as a new right window bound may
49 |     # change the left_area required in this iteration
50 |     window_area -= left_area
51 | 
52 |     # Expand window to the right
53 |     window_area += time_series.iloc[j-1] * (time_series.index[j]
54 |                                             - time_series.index[j-1])
55 | 
56 |     # Shrink window from the left if expanding to the right has created too
57 |     # large a window. new_left_time may fall between timestamps present in the
58 |     # data, which is fine, since that's handled by left_area.
59 |     new_left_time = time_series.index[j] - window_len
60 |     while time_series.index[i] < new_left_time:
61 |       window_area -= time_series.iloc[i] * (time_series.index[i+1]
62 |                                             - time_series.index[i])
63 |       i += 1
64 | 
65 |     # Add left side inter-timestamp area to window
66 |     left_area = time_series.iloc[max(0, i - 1)] * (time_series.index[i]
67 |                                                    - new_left_time)
68 |     window_area += left_area
69 | 
70 |     out[j] = window_area / window_len
71 | 
72 |   return out
73 | 
74 | 
75 | def print_full(df):
76 |   """Prints a DataFrame in full."""
77 |   pandas.set_option('display.max_rows', len(df))
78 |   print df
79 |   pandas.reset_option('display.max_rows')
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # fitanalysis
  2 | fitanalysis is a Python library for analysis of ANT/Garmin `.fit` files.
  3 | 
  4 | It's geared toward cycling and allows for easy extraction of data such as the
  5 | following from a `.fit` file:
  6 | - elapsed time
  7 | - moving time
  8 | - average heart rate
  9 | - average power
 10 | - normalized power (based on information publicly available about
 11 |   TrainingPeaks' NP®)
 12 | - intensity (based on information publicly available about TrainingPeaks' IF®)
 13 | - training stress (based on information publicly available about
 14 |   TrainingPeaks' TSS®)
 15 | 
 16 | My impetus for this project was to better understand how platforms like
 17 | TrainingPeaks analyze power and heart rate data to arrive at an estimation
 18 | of training stress. As such, this project attempts to match those platforms'
 19 | calculations as closely as possible.
 20 | 
 21 | # Dependencies and installation
 22 | [Pandas](http://pandas.pydata.org/), [NumPy](http://www.numpy.org/), and
 23 | [fitparse](https://github.com/dtcooper/python-fitparse) are required.
 24 | 
 25 | `python setup.py install` (or `python setup.py install --user`) to install.
 26 | 
 27 | # Example
 28 | 
 29 | fitanalysis provides the `Activity` class.
 30 | 
 31 | ```python
 32 | import fitanalysis
 33 | 
 34 | activity = fitanalysis.Activity('my_activity.fit')
 35 | 
 36 | print activity.elapsed_time
 37 | print activity.moving_time
 38 | 
 39 | # Also available for heart rate and cadence
 40 | print activity.mean_power
 41 | 
 42 | print activity.norm_power
 43 | 
 44 | # Intensity and training stress calculations require
 45 | # a functional threshold power value (in Watts)
 46 | print activity.intensity(310)
 47 | print activity.training_stress(310)
 48 | ```
 49 | 
 50 | Construction of an `Activity` parses the `.fit` file and detects periods of
 51 | inactivity, as such periods must be removed from the data for heart rate-,
 52 | cadence-, and power-based calculations.
 53 | 
 54 | # Comparison of activity analysis platforms
 55 | 
 56 | Here is a comparison for a few of my rides of varying profiles across the
 57 | various platforms.
 58 | 
 59 | <table>
 60 |   <tr>
 61 |     <th></th>
 62 |     <th></th>
 63 |     <th>fitanalysis</th>
 64 |     <th>TrainingPeaks</th>
 65 |     <th>Garmin Connect</th>
 66 |     <th>Strava</th>
 67 |   </tr>
 68 | 
 69 |   <tr>
 70 |     <th rowspan="7">Ride 1: epic ride<br>126.5 mi<br>15207 ft climbing</th>
 71 |   </tr>
 72 |   <tr>
 73 |     <th>Elapsed time</th>
 74 |     <td>12:19:40</td>
 75 |     <td>*</td>
 76 |     <td>12:19:20</td>
 77 |     <td>12:19:40</td>
 78 |   </tr>
 79 |   <tr>
 80 |     <th>Moving time</th>
 81 |     <td>9:07:14</td>
 82 |     <td>-</td>
 83 |     <td>9:06:12</td>
 84 |     <td>9:09:26</td>
 85 |   </tr>
 86 |   <tr>
 87 |     <th>Mean power</th>
 88 |     <td>182 W</td>
 89 |     <td>183 W</td>
 90 |     <td>183 W</td>
 91 |     <td>183 W</td>
 92 |   </tr>
 93 |   <tr>
 94 |     <th>Norm. power</th>
 95 |     <td>232 W</td>
 96 |     <td>232 W</td>
 97 |     <td>232 W</td>
 98 |     <td>-</td>
 99 |   </tr>
100 |   <tr>
101 |     <th>Intensity</th>
102 |     <td>0.74</td>
103 |     <td>0.74</td>
104 |     <td>0.74</td>
105 |     <td>-</td>
106 |   </tr>
107 |   <tr>
108 |     <th>Training stress</th>
109 |     <td>504.0</td>
110 |     <td>505.1</td>
111 |     <td>503.2</td>
112 |     <td>-</td>
113 |   </tr>
114 | 
115 |   <tr>
116 |     <th rowspan="7">Ride 2: interval workout<br>11.9 mi<br>1352 ft climbing</th>
117 |   </tr>
118 |   <tr>
119 |     <th>Elapsed time</th>
120 |     <td>1:32:34</td>
121 |     <td>*</td>
122 |     <td>1:32:34</td>
123 |     <td>1:32:34</td>
124 |   </tr>
125 |   <tr>
126 |     <th>Moving time</th>
127 |     <td>57:17</td>
128 |     <td>-</td>
129 |     <td>57:11</td>
130 |     <td>57:51</td>
131 |   </tr>
132 |   <tr>
133 |     <th>Mean power</th>
134 |     <td>172 W</td>
135 |     <td>168 W</td>
136 |     <td>168 W</td>
137 |     <td>172 W</td>
138 |   </tr>
139 |   <tr>
140 |     <th>Norm. power</th>
141 |     <td>289 W</td>
142 |     <td>286 W</td>
143 |     <td>287 W</td>
144 |     <td>-</td>
145 |   </tr>
146 |   <tr>
147 |     <th>Intensity</th>
148 |     <td>0.93</td>
149 |     <td>0.92</td>
150 |     <td>0.92</td>
151 |     <td>-</td>
152 |   </tr>
153 |   <tr>
154 |     <th>Training stress</th>
155 |     <td>81.7</td>
156 |     <td>82.3</td>
157 |     <td>83.1</td>
158 |     <td>-</td>
159 |   </tr>
160 | 
161 |   <tr>
162 |     <th rowspan="7">Ride 3: tempo<br>25.4 mi<br>2451 ft climbing</th>
163 |   </tr>
164 |   <tr>
165 |     <th>Elapsed time</th>
166 |     <td>2:09:02</td>
167 |     <td>2:08:58</td>
168 |     <td>2:08:58</td>
169 |     <td>2:09:02</td>
170 |   </tr>
171 |   <tr>
172 |     <th>Moving time</th>
173 |     <td>1:32:39</td>
174 |     <td>-</td>
175 |     <td>1:32:23</td>
176 |     <td>1:32:43</td>
177 |   </tr>
178 |   <tr>
179 |     <th>Mean power</th>
180 |     <td>201 W</td>
181 |     <td>201 W</td>
182 |     <td>201 W</td>
183 |     <td>202 W</td>
184 |   </tr>
185 |   <tr>
186 |     <th>Norm. power</th>
187 |     <td>270 W</td>
188 |     <td>269 W</td>
189 |     <td>270 W</td>
190 |     <td>-</td>
191 |   </tr>
192 |   <tr>
193 |     <th>Intensity</th>
194 |     <td>0.86</td>
195 |     <td>0.86</td>
196 |     <td>0.87</td>
197 |     <td>-</td>
198 |   </tr>
199 |   <tr>
200 |     <th>Training stress</th>
201 |     <td>115.3</td>
202 |     <td>114.1</td>
203 |     <td>115.1</td>
204 |     <td>-</td>
205 |   </tr>
206 | 
207 |   <tr>
208 |     <th rowspan="7">Ride 4: "coffee pace"<br>13.4 mi<br>902 ft climbing</th>
209 |   </tr>
210 |   <tr>
211 |     <th>Elapsed time</th>
212 |     <td>1:41:24</td>
213 |     <td>1:41:23</td>
214 |     <td>1:41:23</td>
215 |     <td>1:41:24</td>
216 |   </tr>
217 |   <tr>
218 |     <th>Moving time</th>
219 |     <td>57:15</td>
220 |     <td>-</td>
221 |     <td>57:02</td>
222 |     <td>57:23</td>
223 |   </tr>
224 |   <tr>
225 |     <th>Mean power</th>
226 |     <td>138 W</td>
227 |     <td>139 W</td>
228 |     <td>139 W</td>
229 |     <td>139 W</td>
230 |   </tr>
231 |   <tr>
232 |     <th>Norm. power</th>
233 |     <td>251 W</td>
234 |     <td>252 W</td>
235 |     <td>252 W</td>
236 |     <td>-</td>
237 |   </tr>
238 |   <tr>
239 |     <th>Intensity</th>
240 |     <td>0.80</td>
241 |     <td>0.81</td>
242 |     <td>0.81</td>
243 |     <td>-</td>
244 |   </tr>
245 |   <tr>
246 |     <th>Training stress</th>
247 |     <td>61.6</td>
248 |     <td>61.6</td>
249 |     <td>61.2</td>
250 |     <td>-</td>
251 |   </tr>
252 | </table>
253 | 
254 | \- Data not available on this platform
255 | 
256 | \* Didn't calculate. TrainingPeaks doesn't directly report elapsed time so it
257 |    has to be manually summed from lap durations, and these rides have lots of
258 |    laps.
259 | 
260 | ## Conclusions
261 | 
262 | - Garmin Connect is the most aggressive when calculating moving time, Strava is
263 |   the most lenient, and fitanalysis falls in between.
264 | - Mean power calculated by fitanalysis is at most 1 W different than mean power
265 |   calculated by another platform.
266 | - Normalized power calculated by fitanalysis is at most 2 W different than
267 |   normalized power calculated by another platform.
268 | - Training stress calculated by fitanalysis corresponds well to other platforms
269 |   across a large range.
270 | 
271 | ## Autopause and inactivity handling
272 | 
273 | All of the activities in the table above were recorded with autopause enabled,
274 | so they don't highlight any differences in how each platform handles long
275 | periods of inactivity. To test this I recorded a ride with autopause disabled,
276 | and then used fitanalysis to analyze it in two ways: detecting and removing
277 | periods of inactivity (the default for fitanalysis), and leaving the data as-is.
278 | This activity includes a 2-minute period of inactivity, in addition to shorter
279 | stops e.g. at stop lights.
280 | 
281 | <table>
282 |   <tr>
283 |     <th></th>
284 |     <th>fitanalysis<br>(inactivity removed)</th>
285 |     <th>fitanalysis<br>(inactivity not removed)</th>
286 |     <th>TrainingPeaks</th>
287 |     <th>Garmin Connect</th>
288 |     <th>Strava</th>
289 |   </tr>
290 | 
291 |   <tr>
292 |     <th>Elapsed time</th>
293 |     <td>34:54</td>
294 |     <td>34:54</td>
295 |     <td>34:54</td>
296 |     <td>34:54</td>
297 |     <td>34:54</td>
298 |   </tr>
299 |   <tr>
300 |     <th>Moving time</th>
301 |     <td>30:48</td>
302 |     <td>34:54</td>
303 |     <td>-</td>
304 |     <td><b>30:57</b></td>
305 |     <td>31:12</td>
306 |   </tr>
307 |   <tr>
308 |     <th>Mean power</th>
309 |     <td><b>247 W</b></td>
310 |     <td>219 W</td>
311 |     <td>220 W</td>
312 |     <td>220 W</td>
313 |     <td><b>248 W</b></td>
314 |   </tr>
315 |   <tr>
316 |     <th>Norm. power</th>
317 |     <td><b>279 W</b></td>
318 |     <td>271 W</td>
319 |     <td>272 W</td>
320 |     <td>272 W</td>
321 |     <td>-</td>
322 |   </tr>
323 |   <tr>
324 |     <th>Intensity</th>
325 |     <td>0.89</td>
326 |     <td>0.87</td>
327 |     <td>0.87</td>
328 |     <td>0.87</td>
329 |     <td>-</td>
330 |   </tr>
331 |   <tr>
332 |     <th>Training stress</th>
333 |     <td><b>41.1</b></td>
334 |     <td>43.8</td>
335 |     <td>43.6</td>
336 |     <td>43.7</td>
337 |     <td>-</td>
338 |   </tr>
339 | </table>
340 | 
341 | Average power with periods of inactivity removed matches Strava's average power,
342 | but not TrainingPeaks or Garmin Connect. They calculate average power from the
343 | raw data.
344 | 
345 | TrainingPeaks and Garmin Connect also calculate normalized power from the
346 | raw data.
347 | 
348 | Garmin Connect does calculate moving time but it appears not to use it for the
349 | power calculations. If inactivity isn't removed from the power data then
350 | elapsed time should indeed be used for consistency, but the choice to remove
351 | the inactivity for the purpose of moving time calculation and not do so for
352 | power is puzzling.
353 | 
354 | Because Strava removes inactivity for power calculations, both approaches seem
355 | to be accepted. It's my opinion that removing inactivity is the correct
356 | approach because, depending on the length of inactivity, not doing so can lead
357 | to an inflated or deflated estimation of the effort during periods of activity.
358 | One counter-argument I can see is for structured workouts: it may be desirable
359 | to include the rest periods in calculations of intensity and training stress
360 | because in this case the length of the rest is deliberately chosen as part of
361 | the workout. Perhaps this is the reason for TrainingPeaks' implementation?
362 | 
363 | This is only one data point, so looking at some more rides would be interesting,
364 | but one takeaway from this example is this: want to inflate your TSS? Try
365 | disabling autopause (and don't take _really_ long breaks, but apparently
366 | moderately long breaks are fine).
367 | 
368 | # References
369 | 
370 | Coggan, Andrew. (2012, June 20). _Calculate Normalised Power for an Interval._ [Forum comment]. Retrieved June 14, 2017, from http://www.timetriallingforum.co.uk/index.php?/topic/69738-calculate-normalised-power-for-an-interval/&do=findComment&comment=978386
371 | 
372 | Coggan, Andrew. (2016, February 10). _Normalized Power, Intensity Factor and Training Stress Score._ Retrieved June 14, 2017, from
373 | https://www.trainingpeaks.com/blog/normalized-power-intensity-factor-training-stress/
374 | 
375 | Coggan, Andrew. (2003, March 13). _TSS and IF - at last!_ Retrieved June 14, 2017, from http://lists.topica.com/lists/wattage/read/message.html?mid=907028398&sort=d&start=9353
376 | 
377 | Eckner, Andreas. (2017, April 3). _Algorithms for Unevenly Spaced Time Series: Moving Averages and Other Rolling Operators._ Retrieved June 14, 2017, from http://eckner.com/papers/Algorithms%20for%20Unevenly%20Spaced%20Time%20Series.pdf
378 | 
379 | Friel, Joe. (2009, Sept 21). _Estimating Training Stress Score (TSS)._ Retrieved June 22, 2017, from https://www.trainingpeaks.com/blog/estimating-training-stress-score-tss/
380 | 
381 | # License
382 | This project is licensed under the MIT License. See
383 | [LICENSE](https://github.com/mtraver/fitanalysis/blob/master/LICENSE) file
384 | for details.
385 | 


--------------------------------------------------------------------------------
/fitanalysis/activity.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import numpy as np
  3 | import pandas
  4 | 
  5 | import fitparse
  6 | 
  7 | import fitanalysis.util
  8 | 
  9 | 
 10 | # Set to True to add a column to the DataFrame indicating whether a row would
 11 | # have been removed if removal of stopped periods were enabled, but don't
 12 | # actually remove it.
 13 | DEBUG_EXCISE = False
 14 | 
 15 | 
 16 | class Activity(fitparse.FitFile):
 17 |   """Represents an activity recorded as a .fit file.
 18 | 
 19 |   Construction of an Activity parses the .fit file and detects periods of
 20 |   inactivity, as such periods must be removed from the data for heart rate-,
 21 |   cadence-, and power-based calculations.
 22 |   """
 23 | 
 24 |   EVENT_TYPE_START = 'start'
 25 |   EVENT_TYPE_STOP = 'stop'
 26 | 
 27 |   TIMER_TRIGGER_DETECTED = 'detected'
 28 | 
 29 |   # Speeds less than or equal to this value (in m/s) are
 30 |   # considered to be stopped
 31 |   STOPPED_THRESHOLD = 0.3
 32 | 
 33 |   def __init__(self, file_obj, remove_stopped_periods=True):
 34 |     """Creates an Activity from a .fit file.
 35 | 
 36 |     Args:
 37 |       file_obj: A file-like object representing a .fit file.
 38 |       remove_stopped_periods: If True, regions of data with speed below a
 39 |                               threshold will be removed from the data. Default
 40 |                               is True.
 41 |     """
 42 |     super(Activity, self).__init__(file_obj)
 43 | 
 44 |     self._remove_stopped_periods = remove_stopped_periods or DEBUG_EXCISE
 45 | 
 46 |     records = list(self.get_messages('record'))
 47 | 
 48 |     # Get elapsed time before modifying the data
 49 |     self.start_time = records[0].get('timestamp').value
 50 |     self.end_time = records[-1].get('timestamp').value
 51 |     self.elapsed_time = self.end_time - self.start_time
 52 | 
 53 |     # Calculated when needed and memoized here
 54 |     self._moving_time = None
 55 |     self._norm_power = None
 56 | 
 57 |     self.events = self._df_from_messages(
 58 |         self.get_messages('event'),
 59 |         ['event', 'event_type', 'event_group', 'timer_trigger', 'data'],
 60 |         timestamp_index=True)
 61 | 
 62 |     # We will build a DataFrame with these fields as columns. Values for each
 63 |     # of these fields will be extracted from each record from the .fit file.
 64 |     fields = ['timestamp', 'speed', 'heart_rate', 'power', 'cadence']
 65 | 
 66 |     # The primary index of the DataFrame is the "block". A block is defined as
 67 |     # a period of movement. Blocks may be defined by start/stop event messages
 68 |     # from the .fit file, or they may be detected based on speed in the case
 69 |     # that the recording device did not automatically pause recording when
 70 |     # stopped.
 71 |     blocks = []
 72 |     curr_block = -1
 73 | 
 74 |     # The secondary index is the duration from the start of the activity
 75 |     time_offsets = []
 76 | 
 77 |     # Get start/stop events from .fit file and combine with the events detected
 78 |     # from speed data, keeping the event from the .fit file if timestamps are
 79 |     # identical
 80 |     timer_events = self.events[self.events['event'] == 'timer']
 81 | 
 82 |     if self._remove_stopped_periods:
 83 |       # Detect start/stop events based on stopped threshold speed. If the
 84 |       # recording device did not have autopause enabled then this is the only
 85 |       # way periods of no movement can be detected and removed.
 86 |       detected_events = self._detect_start_stop_events(records)
 87 |       timer_events = timer_events.combine_first(detected_events)
 88 | 
 89 |     # Build the rows and indices of the DataFrame
 90 |     excise = False
 91 |     event_index = 0
 92 |     rows = []
 93 |     for record in records:
 94 |       curr_timestamp = record.get('timestamp').value
 95 | 
 96 |       # Match data record timestamps with event timestamps in order to mark
 97 |       # "blocks" as described above. Periods of no movement will be excised
 98 |       # (if the recording device did not have autopause enabled there will be
 99 |       # blocks of no movement that should be removed before data analysis).
100 |       if event_index < len(timer_events) and (
101 |           curr_timestamp >= timer_events.iloc[event_index].name):
102 | 
103 |         # Events usually have timestamps that correspond to a data timestamp,
104 |         # but this isn't always the case. Process events until the events catch
105 |         # up with the data.
106 |         while True:
107 |           event_type = timer_events.iloc[event_index]['event_type']
108 |           trigger = timer_events.iloc[event_index]['timer_trigger']
109 | 
110 |           if event_type == self.EVENT_TYPE_START:
111 |             curr_block += 1
112 | 
113 |             # If we've seen a start event we should not be excising data
114 |             # TODO(mtraver) Do I care if the start event is detected or from
115 |             # the .fit file? I don't think so.
116 |             excise = False
117 |           elif event_type.startswith(self.EVENT_TYPE_STOP):
118 |             # If the stop event was detected based on speed, excise the region
119 |             # until the next start event, because we know that it's a region of
120 |             # data with speed under the stopped threshold.
121 |             if trigger == self.TIMER_TRIGGER_DETECTED:
122 |               excise = True
123 | 
124 |           event_index += 1
125 | 
126 |           # Once the event timestamp is ahead of the data timestamp we can
127 |           # continue processing data; the next event will be processed as the
128 |           # data timestamps catch up with it.
129 |           if event_index >= len(timer_events) or (
130 |               curr_timestamp < timer_events.iloc[event_index].name):
131 |             break
132 | 
133 |       if not excise or DEBUG_EXCISE:
134 |         # Build indices
135 |         time_offsets.append(curr_timestamp - self.start_time)
136 |         blocks.append(curr_block)
137 | 
138 |         row = []
139 |         for field_name in fields:
140 |           field = record.get(field_name)
141 |           row.append(field.value if field is not None else None)
142 | 
143 |         if DEBUG_EXCISE:
144 |           row.append(excise)
145 | 
146 |         rows.append(row)
147 | 
148 |     assert len(blocks) == len(time_offsets)
149 | 
150 |     if DEBUG_EXCISE:
151 |       fields += ['excise']
152 | 
153 |     self.data = pandas.DataFrame(rows, columns=fields,
154 |                                  index=[blocks, time_offsets])
155 |     self.data.index.names = ['block', 'offset']
156 | 
157 |     # These fields may not exist in all .fit files,
158 |     # so drop the columns if they're not present.
159 |     for field in ['power', 'cadence', 'heart_rate']:
160 |       if self.data[self.data[field].notnull()].empty:
161 |         self.data.drop(field, axis=1, inplace=True)
162 | 
163 |     if self.has_power and self.has_cadence:
164 |       self._clean_up_power_and_cadence()
165 | 
166 |   def _df_from_messages(self, messages, fields, timestamp_index=False):
167 |     """Creates a DataFrame from an iterable of fitparse messages.
168 | 
169 |     Args:
170 |       messages: Iterable of fitparse messages.
171 |       fields: List of message fields to include in the DataFrame. Each one will
172 |               be a separate column, and if a field isn't present in a particular
173 |               message, its value will be set to None.
174 |       timestamp_index: If True, message timestamps will be used as the index of
175 |                        the DataFrame. Otherwise the default index is used.
176 |                        Default is False.
177 | 
178 |     Returns:
179 |       A DataFrame with one row per message and columns for each of
180 |       the given fields.
181 |     """
182 |     rows = []
183 |     timestamps = []
184 |     for m in messages:
185 |       timestamps.append(m.get('timestamp').value)
186 | 
187 |       row = []
188 |       for field_name in fields:
189 |         field = m.get(field_name)
190 |         row.append(field.value if field is not None else None)
191 | 
192 |       rows.append(row)
193 | 
194 |     if timestamp_index:
195 |       return pandas.DataFrame(rows, columns=fields, index=timestamps)
196 |     else:
197 |       return pandas.DataFrame(rows, columns=fields)
198 | 
199 |   def _detect_start_stop_events(self, records):
200 |     """Detects periods of inactivity by comparing speed to a threshold value.
201 | 
202 |     Args:
203 |       records: Iterable of fitparse messages. They must contain a 'speed' field.
204 | 
205 |     Returns:
206 |       A DataFrame indexed by timestamp with these columns:
207 |         - 'event_type': value is one of {'start','stop'}
208 |         - 'timer_trigger': always the string 'detected', so that these
209 |           start/stop events can be distinguished from those present in the
210 |           .fit file.
211 | 
212 |       Each row is one event, and its timestamp is guaranteed to be that of a
213 |       record in the given iterable of messages.
214 | 
215 |       When the speed of a record drops below the threshold speed a 'stop' event
216 |       is created with its timestamp, and when the speed rises above the
217 |       threshold speed a 'start' event is created with its timestamp.
218 |     """
219 |     stopped = False
220 |     timestamps = []
221 |     events = []
222 |     for i, record in enumerate(records):
223 |       ts = record.get('timestamp').value
224 | 
225 |       if i == 0:
226 |         timestamps.append(ts)
227 |         events.append([self.EVENT_TYPE_START, self.TIMER_TRIGGER_DETECTED])
228 |       elif record.get('speed') is not None:
229 |         speed = record.get('speed').value
230 |         if speed <= self.STOPPED_THRESHOLD:
231 |           if not stopped:
232 |             timestamps.append(ts)
233 |             events.append([self.EVENT_TYPE_STOP, self.TIMER_TRIGGER_DETECTED])
234 | 
235 |           stopped = True
236 |         else:
237 |           if stopped:
238 |             timestamps.append(ts)
239 |             events.append([self.EVENT_TYPE_START, self.TIMER_TRIGGER_DETECTED])
240 |             stopped = False
241 | 
242 |     return pandas.DataFrame(events, columns=['event_type', 'timer_trigger'],
243 |                             index=timestamps)
244 | 
245 |   def _clean_up_power_and_cadence(self):
246 |     """Infers true value of null power and cadence values in simple cases."""
247 |     # If cadence in NaN and power is 0, assume cadence is 0
248 |     self.data.loc[self.data['cadence'].isnull()
249 |                   & (self.data['power'] == 0.0), 'cadence'] = 0.0
250 | 
251 |     # If power in NaN and cadence is 0, assume power is 0
252 |     self.data.loc[self.data['power'].isnull()
253 |                   & (self.data['cadence'] == 0.0), 'power'] = 0.0
254 | 
255 |     # If both power and cadence are NaN, assume they're both 0
256 |     power_and_cadence_null = (
257 |         self.data['cadence'].isnull() & self.data['power'].isnull())
258 |     self.data.loc[power_and_cadence_null, 'power'] = 0.0
259 |     self.data.loc[power_and_cadence_null, 'cadence'] = 0.0
260 | 
261 |   @property
262 |   def moving_time(self):
263 |     if self._moving_time is None:
264 |       moving_time = 0
265 |       for _, block_df in self.data.groupby(level='block'):
266 |         # Calculate the number of seconds elapsed since the previous data point
267 |         # and sum them to get the moving time
268 |         moving_time += (
269 |             (block_df['timestamp'] - block_df['timestamp'].shift(1).fillna(
270 |                 block_df.iloc[0]['timestamp'])) / np.timedelta64(1, 's')).sum()
271 | 
272 |       self._moving_time = datetime.timedelta(seconds=moving_time)
273 | 
274 |     return self._moving_time
275 | 
276 |   @property
277 |   def has_power(self):
278 |     return 'power' in self.data.columns
279 | 
280 |   @property
281 |   def has_cadence(self):
282 |     return 'cadence' in self.data.columns
283 | 
284 |   @property
285 |   def has_heart_rate(self):
286 |     return 'heart_rate' in self.data.columns
287 | 
288 |   @property
289 |   def cadence(self):
290 |     if not self.has_cadence:
291 |       return None
292 | 
293 |     if self._remove_stopped_periods:
294 |       return self.data[
295 |           self.data['cadence'].notnull() & (self.data['cadence'] > 0)
296 |           & (self.data['speed'] > self.STOPPED_THRESHOLD)]['cadence']
297 | 
298 |     return self.data[
299 |         self.data['cadence'].notnull() & (self.data['cadence'] > 0)]['cadence']
300 | 
301 |   @property
302 |   def mean_cadence(self):
303 |     if not self.has_cadence:
304 |       return None
305 | 
306 |     return self.cadence.mean()
307 | 
308 |   @property
309 |   def heart_rate(self):
310 |     if not self.has_heart_rate:
311 |       return None
312 | 
313 |     if self._remove_stopped_periods:
314 |       return self.data[
315 |           self.data['heart_rate'].notnull()
316 |           & self.data['speed'] > self.STOPPED_THRESHOLD]['heart_rate']
317 | 
318 |     return self.data[self.data['heart_rate'].notnull()]['heart_rate']
319 | 
320 |   @property
321 |   def mean_heart_rate(self):
322 |     if not self.has_heart_rate:
323 |       return None
324 | 
325 |     return self.heart_rate.mean()
326 | 
327 |   @property
328 |   def power(self):
329 |     if not self.has_power:
330 |       return None
331 | 
332 |     if self._remove_stopped_periods:
333 |       return self.data[self.data['power'].notnull()
334 |                        & self.data['speed'] > self.STOPPED_THRESHOLD]['power']
335 | 
336 |     return self.data[self.data['power'].notnull()]['power']
337 | 
338 |   @property
339 |   def mean_power(self):
340 |     if not self.has_power:
341 |       return None
342 | 
343 |     return self.power.mean()
344 | 
345 |   @property
346 |   def norm_power(self):
347 |     """Calculates the normalized power.
348 | 
349 |     See (Coggan, 2003) cited in README for details on the rationale behind the
350 |     calculation.
351 | 
352 |     Normalized power is based on a 30-second moving average of power. Coggan's
353 |     algorithm specifies that the moving average should start at the 30 second
354 |     point in the data, but this implementation does not (it starts with the
355 |     first value, like a standard moving average). This is an acceptable
356 |     approximation because normalized power shouldn't be relied upon for efforts
357 |     less than 20 minutes long (Coggan, 2012), so how the first 30 seconds are
358 |     handled doesn't make much difference. Also, the values computed by this
359 |     implementation are very similar to those computed by TrainingPeaks, so
360 |     changing the moving average implementation doesn't seem to be critical.
361 | 
362 |     This function also does not specially handle gaps in the data. When a pause
363 |     is present in the data (either from autopause on the recording device or
364 |     removal of stopped periods in post-processing) the timestamp may jump by a
365 |     large amount from one sample to the next. Ideally this should be handled in
366 |     some way that takes into account the physiological impact of that rest, but
367 |     currently this algorithm does not. But again, the values computed by this
368 |     implementation are very similar to those computed by TrainingPeaks, so
369 |     changing gap handling doesn't seem to be critical.
370 | 
371 |     Returns:
372 |       Normalized power as a float
373 |     """
374 |     if not self.has_power:
375 |       return None
376 | 
377 |     if self._norm_power is None:
378 |       p = self.power
379 |       p.index = p.index.droplevel(level='block')
380 |       self._norm_power = (
381 |           np.sqrt(np.sqrt(
382 |               np.mean(fitanalysis.util.moving_average(p, 30) ** 4))))
383 | 
384 |     return self._norm_power
385 | 
386 |   def intensity(self, ftp):
387 |     """Calculates the intensity factor of the activity.
388 | 
389 |     Intensity factor is defined as the ratio of normalized power to FTP.
390 |     See (Coggan, 2016) cited in README for more details.
391 | 
392 |     Args:
393 |       ftp: Functional threshold power in Watts.
394 | 
395 |     Returns:
396 |       Intensity factor as a float
397 |     """
398 |     if not self.has_power:
399 |       return None
400 | 
401 |     return self.norm_power / float(ftp)
402 | 
403 |   def training_stress(self, ftp):
404 |     """Calculates the training stress of the activity.
405 | 
406 |     This is essentially a power-based version of Banister's heart rate-based
407 |     TRIMP (training impulse). Andrew Coggan's introduction of TSS and IF
408 |     specifies that average power should be used to calculate training stress
409 |     (Coggan, 2003), but a later post on TrainingPeaks' blog specifies that
410 |     normalized power should be used (Friel, 2009). Normalized power is used
411 |     here because it yields values in line with the numbers from TrainingPeaks;
412 |     using average power does not.
413 | 
414 |     Args:
415 |       ftp: Functional threshold power in Watts.
416 | 
417 |     Returns:
418 |       Training stress as a float
419 |     """
420 |     if not self.has_power:
421 |       return None
422 | 
423 |     return (self.moving_time.total_seconds() * self.norm_power
424 |             * self.intensity(ftp)) / (float(ftp) * 3600.0) * 100.0
425 | 


--------------------------------------------------------------------------------