├── fitanalysis ├── __init__.py ├── util.py └── activity.py ├── resources └── TSS_and_IF_Coggan_2003.pdf ├── setup.py ├── LICENSE ├── .gitignore └── README.md /fitanalysis/__init__.py: -------------------------------------------------------------------------------- 1 | from fitanalysis.activity import Activity 2 | 3 | 4 | __version__ = '0.0.1' 5 | __all__ = ['Activity'] 6 | -------------------------------------------------------------------------------- /resources/TSS_and_IF_Coggan_2003.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mtraver/python-fitanalysis/HEAD/resources/TSS_and_IF_Coggan_2003.pdf -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | import sys 3 | 4 | import fitanalysis 5 | 6 | 7 | requires = ['fitparse', 'numpy', 'pandas'] 8 | if sys.version_info < (2, 7): 9 | requires.append('argparse') 10 | 11 | with open('LICENSE', 'r') as f: 12 | license_content = f.read() 13 | 14 | setup(name='fitanalysis', 15 | version=fitanalysis.__version__, 16 | description='Python library for analysis of ANT/Garmin .fit files', 17 | author='Michael Traver', 18 | url='https://github.com/mtraver/python-fitanalysis', 19 | license=license_content, 20 | packages=['fitanalysis'], 21 | install_requires=requires) 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Michael Traver 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *.cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # Jupyter Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # SageMath parsed files 79 | *.sage.py 80 | 81 | # Environments 82 | .env 83 | .venv 84 | env/ 85 | venv/ 86 | ENV/ 87 | 88 | # Spyder project settings 89 | .spyderproject 90 | .spyproject 91 | 92 | # Rope project settings 93 | .ropeproject 94 | 95 | # mkdocs documentation 96 | /site 97 | 98 | # mypy 99 | .mypy_cache/ 100 | 101 | .DS_Store 102 | -------------------------------------------------------------------------------- /fitanalysis/util.py: -------------------------------------------------------------------------------- 1 | """Utility functions.""" 2 | import numpy as np 3 | 4 | 5 | def moving_average(time_series, window_len): 6 | """Calculates the moving average of an unevenly spaced time series. 7 | 8 | This moving average implementation weights each value by the time it remained 9 | unchanged, which conceptually matches smart recording on GPS devices: a sample 10 | is taken when some value changes sufficiently, so before a new sample is taken 11 | the previous one is assumed to be more or less constant. 12 | 13 | The term "area" below means a sum of time-weighted values. 14 | 15 | This implementation follows the SMA_last algorithm proposed 16 | in (Eckner, 2017) (see README for citation). 17 | 18 | Args: 19 | time_series: A pandas.Series of the values to average, 20 | indexed with timestamps. 21 | window_len: The size of the moving average window, in seconds. 22 | 23 | Returns: 24 | A numpy array of length len(time_series) containing the 25 | moving average values 26 | """ 27 | # Re-index the time series with duration in seconds from the first value 28 | time_series.index = ( 29 | (time_series.index 30 | - time_series.index[0]) / np.timedelta64(1, 's')).astype('int') 31 | 32 | window_area = time_series.iloc[0] * window_len 33 | 34 | # It may not always be possible to construct a window of length exactly equal 35 | # to window_len using timestamps present in the data. To handle this, the left 36 | # side of the window is allowed to fall between timestamps (the right side is 37 | # always fixed to a timestamp in the data). Therefore we need to separately 38 | # compute the area of the inter-timestamp region on the left side of the 39 | # window so that it can be added to the window area. left_area is that value. 40 | left_area = window_area 41 | 42 | out = np.zeros(len(time_series)) 43 | out[0] = time_series.iloc[0] 44 | 45 | # i is the left side of the window and j is the right 46 | i = 0 47 | for j in xrange(1, len(time_series)): 48 | # Remove the last iteration's left_area as a new right window bound may 49 | # change the left_area required in this iteration 50 | window_area -= left_area 51 | 52 | # Expand window to the right 53 | window_area += time_series.iloc[j-1] * (time_series.index[j] 54 | - time_series.index[j-1]) 55 | 56 | # Shrink window from the left if expanding to the right has created too 57 | # large a window. new_left_time may fall between timestamps present in the 58 | # data, which is fine, since that's handled by left_area. 59 | new_left_time = time_series.index[j] - window_len 60 | while time_series.index[i] < new_left_time: 61 | window_area -= time_series.iloc[i] * (time_series.index[i+1] 62 | - time_series.index[i]) 63 | i += 1 64 | 65 | # Add left side inter-timestamp area to window 66 | left_area = time_series.iloc[max(0, i - 1)] * (time_series.index[i] 67 | - new_left_time) 68 | window_area += left_area 69 | 70 | out[j] = window_area / window_len 71 | 72 | return out 73 | 74 | 75 | def print_full(df): 76 | """Prints a DataFrame in full.""" 77 | pandas.set_option('display.max_rows', len(df)) 78 | print df 79 | pandas.reset_option('display.max_rows') 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fitanalysis 2 | fitanalysis is a Python library for analysis of ANT/Garmin `.fit` files. 3 | 4 | It's geared toward cycling and allows for easy extraction of data such as the 5 | following from a `.fit` file: 6 | - elapsed time 7 | - moving time 8 | - average heart rate 9 | - average power 10 | - normalized power (based on information publicly available about 11 | TrainingPeaks' NP®) 12 | - intensity (based on information publicly available about TrainingPeaks' IF®) 13 | - training stress (based on information publicly available about 14 | TrainingPeaks' TSS®) 15 | 16 | My impetus for this project was to better understand how platforms like 17 | TrainingPeaks analyze power and heart rate data to arrive at an estimation 18 | of training stress. As such, this project attempts to match those platforms' 19 | calculations as closely as possible. 20 | 21 | # Dependencies and installation 22 | [Pandas](http://pandas.pydata.org/), [NumPy](http://www.numpy.org/), and 23 | [fitparse](https://github.com/dtcooper/python-fitparse) are required. 24 | 25 | `python setup.py install` (or `python setup.py install --user`) to install. 26 | 27 | # Example 28 | 29 | fitanalysis provides the `Activity` class. 30 | 31 | ```python 32 | import fitanalysis 33 | 34 | activity = fitanalysis.Activity('my_activity.fit') 35 | 36 | print activity.elapsed_time 37 | print activity.moving_time 38 | 39 | # Also available for heart rate and cadence 40 | print activity.mean_power 41 | 42 | print activity.norm_power 43 | 44 | # Intensity and training stress calculations require 45 | # a functional threshold power value (in Watts) 46 | print activity.intensity(310) 47 | print activity.training_stress(310) 48 | ``` 49 | 50 | Construction of an `Activity` parses the `.fit` file and detects periods of 51 | inactivity, as such periods must be removed from the data for heart rate-, 52 | cadence-, and power-based calculations. 53 | 54 | # Comparison of activity analysis platforms 55 | 56 | Here is a comparison for a few of my rides of varying profiles across the 57 | various platforms. 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 |
fitanalysisTrainingPeaksGarmin ConnectStrava
Ride 1: epic ride
126.5 mi
15207 ft climbing
Elapsed time12:19:40*12:19:2012:19:40
Moving time9:07:14-9:06:129:09:26
Mean power182 W183 W183 W183 W
Norm. power232 W232 W232 W-
Intensity0.740.740.74-
Training stress504.0505.1503.2-
Ride 2: interval workout
11.9 mi
1352 ft climbing
Elapsed time1:32:34*1:32:341:32:34
Moving time57:17-57:1157:51
Mean power172 W168 W168 W172 W
Norm. power289 W286 W287 W-
Intensity0.930.920.92-
Training stress81.782.383.1-
Ride 3: tempo
25.4 mi
2451 ft climbing
Elapsed time2:09:022:08:582:08:582:09:02
Moving time1:32:39-1:32:231:32:43
Mean power201 W201 W201 W202 W
Norm. power270 W269 W270 W-
Intensity0.860.860.87-
Training stress115.3114.1115.1-
Ride 4: "coffee pace"
13.4 mi
902 ft climbing
Elapsed time1:41:241:41:231:41:231:41:24
Moving time57:15-57:0257:23
Mean power138 W139 W139 W139 W
Norm. power251 W252 W252 W-
Intensity0.800.810.81-
Training stress61.661.661.2-
253 | 254 | \- Data not available on this platform 255 | 256 | \* Didn't calculate. TrainingPeaks doesn't directly report elapsed time so it 257 | has to be manually summed from lap durations, and these rides have lots of 258 | laps. 259 | 260 | ## Conclusions 261 | 262 | - Garmin Connect is the most aggressive when calculating moving time, Strava is 263 | the most lenient, and fitanalysis falls in between. 264 | - Mean power calculated by fitanalysis is at most 1 W different than mean power 265 | calculated by another platform. 266 | - Normalized power calculated by fitanalysis is at most 2 W different than 267 | normalized power calculated by another platform. 268 | - Training stress calculated by fitanalysis corresponds well to other platforms 269 | across a large range. 270 | 271 | ## Autopause and inactivity handling 272 | 273 | All of the activities in the table above were recorded with autopause enabled, 274 | so they don't highlight any differences in how each platform handles long 275 | periods of inactivity. To test this I recorded a ride with autopause disabled, 276 | and then used fitanalysis to analyze it in two ways: detecting and removing 277 | periods of inactivity (the default for fitanalysis), and leaving the data as-is. 278 | This activity includes a 2-minute period of inactivity, in addition to shorter 279 | stops e.g. at stop lights. 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 |
fitanalysis
(inactivity removed)
fitanalysis
(inactivity not removed)
TrainingPeaksGarmin ConnectStrava
Elapsed time34:5434:5434:5434:5434:54
Moving time30:4834:54-30:5731:12
Mean power247 W219 W220 W220 W248 W
Norm. power279 W271 W272 W272 W-
Intensity0.890.870.870.87-
Training stress41.143.843.643.7-
340 | 341 | Average power with periods of inactivity removed matches Strava's average power, 342 | but not TrainingPeaks or Garmin Connect. They calculate average power from the 343 | raw data. 344 | 345 | TrainingPeaks and Garmin Connect also calculate normalized power from the 346 | raw data. 347 | 348 | Garmin Connect does calculate moving time but it appears not to use it for the 349 | power calculations. If inactivity isn't removed from the power data then 350 | elapsed time should indeed be used for consistency, but the choice to remove 351 | the inactivity for the purpose of moving time calculation and not do so for 352 | power is puzzling. 353 | 354 | Because Strava removes inactivity for power calculations, both approaches seem 355 | to be accepted. It's my opinion that removing inactivity is the correct 356 | approach because, depending on the length of inactivity, not doing so can lead 357 | to an inflated or deflated estimation of the effort during periods of activity. 358 | One counter-argument I can see is for structured workouts: it may be desirable 359 | to include the rest periods in calculations of intensity and training stress 360 | because in this case the length of the rest is deliberately chosen as part of 361 | the workout. Perhaps this is the reason for TrainingPeaks' implementation? 362 | 363 | This is only one data point, so looking at some more rides would be interesting, 364 | but one takeaway from this example is this: want to inflate your TSS? Try 365 | disabling autopause (and don't take _really_ long breaks, but apparently 366 | moderately long breaks are fine). 367 | 368 | # References 369 | 370 | Coggan, Andrew. (2012, June 20). _Calculate Normalised Power for an Interval._ [Forum comment]. Retrieved June 14, 2017, from http://www.timetriallingforum.co.uk/index.php?/topic/69738-calculate-normalised-power-for-an-interval/&do=findComment&comment=978386 371 | 372 | Coggan, Andrew. (2016, February 10). _Normalized Power, Intensity Factor and Training Stress Score._ Retrieved June 14, 2017, from 373 | https://www.trainingpeaks.com/blog/normalized-power-intensity-factor-training-stress/ 374 | 375 | Coggan, Andrew. (2003, March 13). _TSS and IF - at last!_ Retrieved June 14, 2017, from http://lists.topica.com/lists/wattage/read/message.html?mid=907028398&sort=d&start=9353 376 | 377 | Eckner, Andreas. (2017, April 3). _Algorithms for Unevenly Spaced Time Series: Moving Averages and Other Rolling Operators._ Retrieved June 14, 2017, from http://eckner.com/papers/Algorithms%20for%20Unevenly%20Spaced%20Time%20Series.pdf 378 | 379 | Friel, Joe. (2009, Sept 21). _Estimating Training Stress Score (TSS)._ Retrieved June 22, 2017, from https://www.trainingpeaks.com/blog/estimating-training-stress-score-tss/ 380 | 381 | # License 382 | This project is licensed under the MIT License. See 383 | [LICENSE](https://github.com/mtraver/fitanalysis/blob/master/LICENSE) file 384 | for details. 385 | -------------------------------------------------------------------------------- /fitanalysis/activity.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import numpy as np 3 | import pandas 4 | 5 | import fitparse 6 | 7 | import fitanalysis.util 8 | 9 | 10 | # Set to True to add a column to the DataFrame indicating whether a row would 11 | # have been removed if removal of stopped periods were enabled, but don't 12 | # actually remove it. 13 | DEBUG_EXCISE = False 14 | 15 | 16 | class Activity(fitparse.FitFile): 17 | """Represents an activity recorded as a .fit file. 18 | 19 | Construction of an Activity parses the .fit file and detects periods of 20 | inactivity, as such periods must be removed from the data for heart rate-, 21 | cadence-, and power-based calculations. 22 | """ 23 | 24 | EVENT_TYPE_START = 'start' 25 | EVENT_TYPE_STOP = 'stop' 26 | 27 | TIMER_TRIGGER_DETECTED = 'detected' 28 | 29 | # Speeds less than or equal to this value (in m/s) are 30 | # considered to be stopped 31 | STOPPED_THRESHOLD = 0.3 32 | 33 | def __init__(self, file_obj, remove_stopped_periods=True): 34 | """Creates an Activity from a .fit file. 35 | 36 | Args: 37 | file_obj: A file-like object representing a .fit file. 38 | remove_stopped_periods: If True, regions of data with speed below a 39 | threshold will be removed from the data. Default 40 | is True. 41 | """ 42 | super(Activity, self).__init__(file_obj) 43 | 44 | self._remove_stopped_periods = remove_stopped_periods or DEBUG_EXCISE 45 | 46 | records = list(self.get_messages('record')) 47 | 48 | # Get elapsed time before modifying the data 49 | self.start_time = records[0].get('timestamp').value 50 | self.end_time = records[-1].get('timestamp').value 51 | self.elapsed_time = self.end_time - self.start_time 52 | 53 | # Calculated when needed and memoized here 54 | self._moving_time = None 55 | self._norm_power = None 56 | 57 | self.events = self._df_from_messages( 58 | self.get_messages('event'), 59 | ['event', 'event_type', 'event_group', 'timer_trigger', 'data'], 60 | timestamp_index=True) 61 | 62 | # We will build a DataFrame with these fields as columns. Values for each 63 | # of these fields will be extracted from each record from the .fit file. 64 | fields = ['timestamp', 'speed', 'heart_rate', 'power', 'cadence'] 65 | 66 | # The primary index of the DataFrame is the "block". A block is defined as 67 | # a period of movement. Blocks may be defined by start/stop event messages 68 | # from the .fit file, or they may be detected based on speed in the case 69 | # that the recording device did not automatically pause recording when 70 | # stopped. 71 | blocks = [] 72 | curr_block = -1 73 | 74 | # The secondary index is the duration from the start of the activity 75 | time_offsets = [] 76 | 77 | # Get start/stop events from .fit file and combine with the events detected 78 | # from speed data, keeping the event from the .fit file if timestamps are 79 | # identical 80 | timer_events = self.events[self.events['event'] == 'timer'] 81 | 82 | if self._remove_stopped_periods: 83 | # Detect start/stop events based on stopped threshold speed. If the 84 | # recording device did not have autopause enabled then this is the only 85 | # way periods of no movement can be detected and removed. 86 | detected_events = self._detect_start_stop_events(records) 87 | timer_events = timer_events.combine_first(detected_events) 88 | 89 | # Build the rows and indices of the DataFrame 90 | excise = False 91 | event_index = 0 92 | rows = [] 93 | for record in records: 94 | curr_timestamp = record.get('timestamp').value 95 | 96 | # Match data record timestamps with event timestamps in order to mark 97 | # "blocks" as described above. Periods of no movement will be excised 98 | # (if the recording device did not have autopause enabled there will be 99 | # blocks of no movement that should be removed before data analysis). 100 | if event_index < len(timer_events) and ( 101 | curr_timestamp >= timer_events.iloc[event_index].name): 102 | 103 | # Events usually have timestamps that correspond to a data timestamp, 104 | # but this isn't always the case. Process events until the events catch 105 | # up with the data. 106 | while True: 107 | event_type = timer_events.iloc[event_index]['event_type'] 108 | trigger = timer_events.iloc[event_index]['timer_trigger'] 109 | 110 | if event_type == self.EVENT_TYPE_START: 111 | curr_block += 1 112 | 113 | # If we've seen a start event we should not be excising data 114 | # TODO(mtraver) Do I care if the start event is detected or from 115 | # the .fit file? I don't think so. 116 | excise = False 117 | elif event_type.startswith(self.EVENT_TYPE_STOP): 118 | # If the stop event was detected based on speed, excise the region 119 | # until the next start event, because we know that it's a region of 120 | # data with speed under the stopped threshold. 121 | if trigger == self.TIMER_TRIGGER_DETECTED: 122 | excise = True 123 | 124 | event_index += 1 125 | 126 | # Once the event timestamp is ahead of the data timestamp we can 127 | # continue processing data; the next event will be processed as the 128 | # data timestamps catch up with it. 129 | if event_index >= len(timer_events) or ( 130 | curr_timestamp < timer_events.iloc[event_index].name): 131 | break 132 | 133 | if not excise or DEBUG_EXCISE: 134 | # Build indices 135 | time_offsets.append(curr_timestamp - self.start_time) 136 | blocks.append(curr_block) 137 | 138 | row = [] 139 | for field_name in fields: 140 | field = record.get(field_name) 141 | row.append(field.value if field is not None else None) 142 | 143 | if DEBUG_EXCISE: 144 | row.append(excise) 145 | 146 | rows.append(row) 147 | 148 | assert len(blocks) == len(time_offsets) 149 | 150 | if DEBUG_EXCISE: 151 | fields += ['excise'] 152 | 153 | self.data = pandas.DataFrame(rows, columns=fields, 154 | index=[blocks, time_offsets]) 155 | self.data.index.names = ['block', 'offset'] 156 | 157 | # These fields may not exist in all .fit files, 158 | # so drop the columns if they're not present. 159 | for field in ['power', 'cadence', 'heart_rate']: 160 | if self.data[self.data[field].notnull()].empty: 161 | self.data.drop(field, axis=1, inplace=True) 162 | 163 | if self.has_power and self.has_cadence: 164 | self._clean_up_power_and_cadence() 165 | 166 | def _df_from_messages(self, messages, fields, timestamp_index=False): 167 | """Creates a DataFrame from an iterable of fitparse messages. 168 | 169 | Args: 170 | messages: Iterable of fitparse messages. 171 | fields: List of message fields to include in the DataFrame. Each one will 172 | be a separate column, and if a field isn't present in a particular 173 | message, its value will be set to None. 174 | timestamp_index: If True, message timestamps will be used as the index of 175 | the DataFrame. Otherwise the default index is used. 176 | Default is False. 177 | 178 | Returns: 179 | A DataFrame with one row per message and columns for each of 180 | the given fields. 181 | """ 182 | rows = [] 183 | timestamps = [] 184 | for m in messages: 185 | timestamps.append(m.get('timestamp').value) 186 | 187 | row = [] 188 | for field_name in fields: 189 | field = m.get(field_name) 190 | row.append(field.value if field is not None else None) 191 | 192 | rows.append(row) 193 | 194 | if timestamp_index: 195 | return pandas.DataFrame(rows, columns=fields, index=timestamps) 196 | else: 197 | return pandas.DataFrame(rows, columns=fields) 198 | 199 | def _detect_start_stop_events(self, records): 200 | """Detects periods of inactivity by comparing speed to a threshold value. 201 | 202 | Args: 203 | records: Iterable of fitparse messages. They must contain a 'speed' field. 204 | 205 | Returns: 206 | A DataFrame indexed by timestamp with these columns: 207 | - 'event_type': value is one of {'start','stop'} 208 | - 'timer_trigger': always the string 'detected', so that these 209 | start/stop events can be distinguished from those present in the 210 | .fit file. 211 | 212 | Each row is one event, and its timestamp is guaranteed to be that of a 213 | record in the given iterable of messages. 214 | 215 | When the speed of a record drops below the threshold speed a 'stop' event 216 | is created with its timestamp, and when the speed rises above the 217 | threshold speed a 'start' event is created with its timestamp. 218 | """ 219 | stopped = False 220 | timestamps = [] 221 | events = [] 222 | for i, record in enumerate(records): 223 | ts = record.get('timestamp').value 224 | 225 | if i == 0: 226 | timestamps.append(ts) 227 | events.append([self.EVENT_TYPE_START, self.TIMER_TRIGGER_DETECTED]) 228 | elif record.get('speed') is not None: 229 | speed = record.get('speed').value 230 | if speed <= self.STOPPED_THRESHOLD: 231 | if not stopped: 232 | timestamps.append(ts) 233 | events.append([self.EVENT_TYPE_STOP, self.TIMER_TRIGGER_DETECTED]) 234 | 235 | stopped = True 236 | else: 237 | if stopped: 238 | timestamps.append(ts) 239 | events.append([self.EVENT_TYPE_START, self.TIMER_TRIGGER_DETECTED]) 240 | stopped = False 241 | 242 | return pandas.DataFrame(events, columns=['event_type', 'timer_trigger'], 243 | index=timestamps) 244 | 245 | def _clean_up_power_and_cadence(self): 246 | """Infers true value of null power and cadence values in simple cases.""" 247 | # If cadence in NaN and power is 0, assume cadence is 0 248 | self.data.loc[self.data['cadence'].isnull() 249 | & (self.data['power'] == 0.0), 'cadence'] = 0.0 250 | 251 | # If power in NaN and cadence is 0, assume power is 0 252 | self.data.loc[self.data['power'].isnull() 253 | & (self.data['cadence'] == 0.0), 'power'] = 0.0 254 | 255 | # If both power and cadence are NaN, assume they're both 0 256 | power_and_cadence_null = ( 257 | self.data['cadence'].isnull() & self.data['power'].isnull()) 258 | self.data.loc[power_and_cadence_null, 'power'] = 0.0 259 | self.data.loc[power_and_cadence_null, 'cadence'] = 0.0 260 | 261 | @property 262 | def moving_time(self): 263 | if self._moving_time is None: 264 | moving_time = 0 265 | for _, block_df in self.data.groupby(level='block'): 266 | # Calculate the number of seconds elapsed since the previous data point 267 | # and sum them to get the moving time 268 | moving_time += ( 269 | (block_df['timestamp'] - block_df['timestamp'].shift(1).fillna( 270 | block_df.iloc[0]['timestamp'])) / np.timedelta64(1, 's')).sum() 271 | 272 | self._moving_time = datetime.timedelta(seconds=moving_time) 273 | 274 | return self._moving_time 275 | 276 | @property 277 | def has_power(self): 278 | return 'power' in self.data.columns 279 | 280 | @property 281 | def has_cadence(self): 282 | return 'cadence' in self.data.columns 283 | 284 | @property 285 | def has_heart_rate(self): 286 | return 'heart_rate' in self.data.columns 287 | 288 | @property 289 | def cadence(self): 290 | if not self.has_cadence: 291 | return None 292 | 293 | if self._remove_stopped_periods: 294 | return self.data[ 295 | self.data['cadence'].notnull() & (self.data['cadence'] > 0) 296 | & (self.data['speed'] > self.STOPPED_THRESHOLD)]['cadence'] 297 | 298 | return self.data[ 299 | self.data['cadence'].notnull() & (self.data['cadence'] > 0)]['cadence'] 300 | 301 | @property 302 | def mean_cadence(self): 303 | if not self.has_cadence: 304 | return None 305 | 306 | return self.cadence.mean() 307 | 308 | @property 309 | def heart_rate(self): 310 | if not self.has_heart_rate: 311 | return None 312 | 313 | if self._remove_stopped_periods: 314 | return self.data[ 315 | self.data['heart_rate'].notnull() 316 | & self.data['speed'] > self.STOPPED_THRESHOLD]['heart_rate'] 317 | 318 | return self.data[self.data['heart_rate'].notnull()]['heart_rate'] 319 | 320 | @property 321 | def mean_heart_rate(self): 322 | if not self.has_heart_rate: 323 | return None 324 | 325 | return self.heart_rate.mean() 326 | 327 | @property 328 | def power(self): 329 | if not self.has_power: 330 | return None 331 | 332 | if self._remove_stopped_periods: 333 | return self.data[self.data['power'].notnull() 334 | & self.data['speed'] > self.STOPPED_THRESHOLD]['power'] 335 | 336 | return self.data[self.data['power'].notnull()]['power'] 337 | 338 | @property 339 | def mean_power(self): 340 | if not self.has_power: 341 | return None 342 | 343 | return self.power.mean() 344 | 345 | @property 346 | def norm_power(self): 347 | """Calculates the normalized power. 348 | 349 | See (Coggan, 2003) cited in README for details on the rationale behind the 350 | calculation. 351 | 352 | Normalized power is based on a 30-second moving average of power. Coggan's 353 | algorithm specifies that the moving average should start at the 30 second 354 | point in the data, but this implementation does not (it starts with the 355 | first value, like a standard moving average). This is an acceptable 356 | approximation because normalized power shouldn't be relied upon for efforts 357 | less than 20 minutes long (Coggan, 2012), so how the first 30 seconds are 358 | handled doesn't make much difference. Also, the values computed by this 359 | implementation are very similar to those computed by TrainingPeaks, so 360 | changing the moving average implementation doesn't seem to be critical. 361 | 362 | This function also does not specially handle gaps in the data. When a pause 363 | is present in the data (either from autopause on the recording device or 364 | removal of stopped periods in post-processing) the timestamp may jump by a 365 | large amount from one sample to the next. Ideally this should be handled in 366 | some way that takes into account the physiological impact of that rest, but 367 | currently this algorithm does not. But again, the values computed by this 368 | implementation are very similar to those computed by TrainingPeaks, so 369 | changing gap handling doesn't seem to be critical. 370 | 371 | Returns: 372 | Normalized power as a float 373 | """ 374 | if not self.has_power: 375 | return None 376 | 377 | if self._norm_power is None: 378 | p = self.power 379 | p.index = p.index.droplevel(level='block') 380 | self._norm_power = ( 381 | np.sqrt(np.sqrt( 382 | np.mean(fitanalysis.util.moving_average(p, 30) ** 4)))) 383 | 384 | return self._norm_power 385 | 386 | def intensity(self, ftp): 387 | """Calculates the intensity factor of the activity. 388 | 389 | Intensity factor is defined as the ratio of normalized power to FTP. 390 | See (Coggan, 2016) cited in README for more details. 391 | 392 | Args: 393 | ftp: Functional threshold power in Watts. 394 | 395 | Returns: 396 | Intensity factor as a float 397 | """ 398 | if not self.has_power: 399 | return None 400 | 401 | return self.norm_power / float(ftp) 402 | 403 | def training_stress(self, ftp): 404 | """Calculates the training stress of the activity. 405 | 406 | This is essentially a power-based version of Banister's heart rate-based 407 | TRIMP (training impulse). Andrew Coggan's introduction of TSS and IF 408 | specifies that average power should be used to calculate training stress 409 | (Coggan, 2003), but a later post on TrainingPeaks' blog specifies that 410 | normalized power should be used (Friel, 2009). Normalized power is used 411 | here because it yields values in line with the numbers from TrainingPeaks; 412 | using average power does not. 413 | 414 | Args: 415 | ftp: Functional threshold power in Watts. 416 | 417 | Returns: 418 | Training stress as a float 419 | """ 420 | if not self.has_power: 421 | return None 422 | 423 | return (self.moving_time.total_seconds() * self.norm_power 424 | * self.intensity(ftp)) / (float(ftp) * 3600.0) * 100.0 425 | --------------------------------------------------------------------------------