├── README.md ├── pipeline ├── endpoints.py ├── headers.py ├── collect.py ├── core.py └── utils.py ├── data └── config_template.py ├── LICENSE ├── .gitignore └── conda_environment.yml /README.md: -------------------------------------------------------------------------------- 1 | # clockify-timesheet 2 | Using the Clockify API with Python & Requests to populate a timesheet 3 | -------------------------------------------------------------------------------- /pipeline/endpoints.py: -------------------------------------------------------------------------------- 1 | from data import config 2 | 3 | EP = { 4 | "ep_te": str( 5 | config.ENDPOINT 6 | + "/workspaces/" 7 | + config.WORKSPACE_ID 8 | + str("/user/") 9 | + config.USER_ID 10 | + "/time-entries?page-size=2000" # will need support for pagination 11 | ), 12 | "ep_ws": str(config.ENDPOINT + "/workspaces"), 13 | "ep_pr": str(config.ENDPOINT + "/workspaces/" + config.WORKSPACE_ID + "/projects?"), 14 | } 15 | 16 | -------------------------------------------------------------------------------- /data/config_template.py: -------------------------------------------------------------------------------- 1 | """Rename this config file to 'config.py' in the data/config.py location, and fill out""" 2 | 3 | 4 | VERBOSE = True 5 | 6 | # Project details 7 | 8 | CLIENT = "" # Add a client here for clientName filter 9 | 10 | # CREDENTIALS 11 | API_KEY = {"X-Api-Key": "______________"} 12 | WORKSPACE_ID = "________________" 13 | USER_ID = ( 14 | "___________________________" 15 | ) # recalled this - see docs for how to obtain userid from active 16 | 17 | # ENDPOINTS 18 | ENDPOINT = "https://api.clockify.me/api/v1" 19 | 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ben Jones 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pipeline/headers.py: -------------------------------------------------------------------------------- 1 | """ Use this list to define which headers are dropped during drop_headers() function""" 2 | 3 | drop_filter = { 4 | "id_timeEntry": "True", 5 | "description": "True", 6 | "tagIds": "False", 7 | "userId": "False", 8 | "billable_timeEntry": "True", 9 | "taskId": "False", # Potentially very useful 10 | "projectId": "False", # obtained from the join 11 | "timeInterval": "False", # may nee to drop this as part of the unzipping 12 | "workspaceId_timeEntry": "False", # assumed irrelevant 13 | "isLocked": "False", # premium feature 14 | "customFieldValues": "False", # premium feature 15 | "start": "True", # 16 | "end": "True", 17 | "duration_timeEntry": "False", # e.g. 2020-04-16 15:38:05+00:00 PT27M29S 18 | "minutes": "True", 19 | "hours_rounded": "True", 20 | "minutes_rounded_up": "True", 21 | "id_proj": "False", 22 | "name": "True", 23 | "hourlyRate": "False", ## BUILT IN ## Should use 24 | "clientId": "False", # could be more accurate, might be more fixed if name changes 25 | "workspaceId_proj": "False", # e.g. 5c+6984%b079&73a56e892cb 26 | "billable_proj": "True", 27 | "memberships": "True", ## invesgibate this 28 | "color": "True", 29 | "estimate": "False", # projected time for task 30 | "archived": "False", 31 | "duration_proj": "False", 32 | "clientName": "True", 33 | "note": "False", 34 | "template": "False", 35 | "public": "False", 36 | "_merge": "True", # check all True? from proj-time merge 37 | } 38 | 39 | -------------------------------------------------------------------------------- /pipeline/collect.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import numpy as np 3 | from pandas.io.json import json_normalize 4 | import pandas as pd 5 | import os 6 | from pathlib import Path 7 | 8 | CLOCK_CACHE = Path("./data/clock_cache/") 9 | CACHE_FILE = Path("./data/clock_cache/cache.pkl") 10 | 11 | from data import config 12 | from pipeline import utils 13 | from pipeline import endpoints 14 | from pipeline import collect 15 | 16 | from joblib import Memory 17 | 18 | cachedir = "./data" 19 | memory = Memory(cachedir, verbose=1) 20 | 21 | # From PROJECT *CONFIG* 22 | 23 | CLIENT = config.CLIENT 24 | 25 | # PERSONAL CREDENTIALS 26 | API_KEY = config.API_KEY 27 | 28 | # API BAse Endpoint URLS (as string) 29 | ENDPOINT = config.ENDPOINT 30 | 31 | 32 | # @memory.cache # (ignore=[]) # nothing to ignore 33 | def collect(pickle=False): 34 | if pickle == True: 35 | clock_cache_file = Path("./data/pkl/cache.pkl") 36 | if clock_cache_file.is_file(): 37 | df = pd.read_pickle("./data/pkl/cache.pkl") #### NEED TO ADD EXPIRY HERE 38 | return df 39 | else: 40 | return collect(pickle=False) 41 | else: 42 | ### LOAD *PROJECTS* FROM CLOCKIFY ### 43 | 44 | r_pr = requests.get(endpoints.EP["ep_pr"], headers=API_KEY) 45 | df_projects = pd.DataFrame(r_pr.json()) 46 | 47 | ### LOAD *TIMES* ENTRIES FROM CLOCKIFY ### 48 | 49 | r_te = requests.get(endpoints.EP["ep_te"], headers=API_KEY) 50 | df_times = pd.DataFrame(r_te.json()) 51 | 52 | # # Expand Time Interval in times column 53 | 54 | df_times = utils.expand_time(times) 55 | 56 | # print the earliest date to check how far back data goes 57 | print( 58 | "The oldest entry starts on: \n" + str(utils.check_min_date(times)) + "\n" 59 | ) 60 | 61 | df = utils.merge_times_proj(df_times, df_projects) 62 | 63 | # Hard Cache of retrieved data 64 | dump_loc = Path("./data/pkl/cache.pkl") 65 | df.to_pickle(dump_loc) 66 | 67 | return df 68 | -------------------------------------------------------------------------------- /pipeline/core.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import requests 5 | import numpy as np 6 | from pandas.io.json import json_normalize 7 | import pandas as pd 8 | 9 | # import seaborn as sns 10 | 11 | from data import config 12 | from pipeline import utils 13 | from pipeline import endpoints 14 | from pipeline import collect 15 | from pipeline import headers 16 | 17 | ### ALLOW FORMATTING IN TERMINAL BELOW ### 18 | class bcolors: 19 | HEADER = "\033[95m" 20 | OKBLUE = "\033[94m" 21 | OKGREEN = "\033[92m" 22 | WARNING = "\033[93m" 23 | FAIL = "\033[91m" 24 | ENDC = "\033[0m" 25 | BOLD = "\033[1m" 26 | UNDERLINE = "\033[4m" 27 | 28 | 29 | print(f"{bcolors.UNDERLINE}START{bcolors.ENDC}" + " ") 30 | # Check All working okay 31 | # print(utils.check_auth()) # should be 200 32 | 33 | # print(endpoints.EP["ep_te"]) 34 | 35 | 36 | print(f"{bcolors.BOLD}COLLECT DATA{bcolors.ENDC}" + " ") 37 | df = collect.collect(pickle=True) 38 | 39 | # Filter by clientName as set in config: CLIENT 40 | # df = utils.filter_by_clientName(df) 41 | 42 | # Filter out NaNs (else are exluded in summary by Name or Client) 43 | df = utils.NaN_filter(df) 44 | df = utils.drop_headers(df) 45 | 46 | # set the index as start date/time, then order the dataframe 47 | df = utils.date_index_order(df) 48 | 49 | print(f"{bcolors.OKGREEN}LIST OF HEADERS\n\n{bcolors.ENDC}" + " ") 50 | print(df.columns.tolist()) 51 | 52 | print(f"{bcolors.WARNING}PREPARE TO FILTER{bcolors.ENDC}" + " ") 53 | # filter by Month 54 | df = utils.filter_year_month(df, month=4, year=0) # for february example month = 2 55 | print(f"{bcolors.OKBLUE}FILTER{bcolors.ENDC}" + " ") 56 | 57 | 58 | """ # irrelevant - example of resampling 59 | week_total = df.resample("W").sum() 60 | day_total = df.resample("B") """ 61 | 62 | # Example: Group by Business Day and 'name' 63 | 64 | print(f"{bcolors.OKBLUE}groupby Day\n\n\n{bcolors.ENDC}" + " ") 65 | df_grouped_daily = df.groupby([pd.Grouper(freq="B"), "name"]) 66 | 67 | print(utils.check_min_date(df)) 68 | 69 | 70 | print(df_grouped_daily.sum()) 71 | print(utils.sum_name_grouped(df)) 72 | 73 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Directories 2 | /data/* 3 | !/data/config_template.py 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | pip-wheel-metadata/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | CLOCKIFY API - Dec 2019.py 136 | to_include.py 137 | -------------------------------------------------------------------------------- /conda_environment.yml: -------------------------------------------------------------------------------- 1 | name: clockify 2 | channels: 3 | - anaconda 4 | - defaults 5 | dependencies: 6 | - appnope=0.1.0=py38_0 7 | - asn1crypto=1.3.0=py38_0 8 | - attrs=19.3.0=py_0 9 | - backcall=0.1.0=py38_0 10 | - blas=1.0=mkl 11 | - bleach=3.1.0=py_0 12 | - ca-certificates=2020.1.1=0 13 | - certifi=2020.4.5.1=py38_0 14 | - cffi=1.14.0=py38hb5b8e2f_0 15 | - chardet=3.0.4=py38_1003 16 | - cryptography=2.8=py38ha12b0ac_0 17 | - cycler=0.10.0=py38_0 18 | - dbus=1.13.12=h90a0687_0 19 | - decorator=4.4.1=py_0 20 | - defusedxml=0.6.0=py_0 21 | - entrypoints=0.3=py38_0 22 | - expat=2.2.6=h0a44026_0 23 | - freetype=2.9.1=hb4e5f40_0 24 | - gettext=0.19.8.1=h15daf44_3 25 | - glib=2.63.1=hd977a24_0 26 | - icu=58.2=h4b95b61_1 27 | - idna=2.8=py38_1000 28 | - importlib_metadata=1.5.0=py38_0 29 | - intel-openmp=2019.4=233 30 | - ipykernel=5.1.4=py38h39e3cac_0 31 | - ipython=7.12.0=py38h5ca1d4c_0 32 | - ipython_genutils=0.2.0=py38_0 33 | - ipywidgets=7.5.1=py_0 34 | - jedi=0.16.0=py38_0 35 | - jinja2=2.11.1=py_0 36 | - joblib=0.14.1=py_0 37 | - jpeg=9b=he5867d9_2 38 | - jsonschema=3.2.0=py38_0 39 | - jupyter=1.0.0=py38_7 40 | - jupyter_client=5.3.4=py38_0 41 | - jupyter_console=6.1.0=py_0 42 | - jupyter_core=4.6.1=py38_0 43 | - kiwisolver=1.0.1=py38h0a44026_0 44 | - libcxx=4.0.1=hcfea43d_1 45 | - libcxxabi=4.0.1=hcfea43d_1 46 | - libedit=3.1.20181209=hb402a30_0 47 | - libffi=3.2.1=h475c297_4 48 | - libgfortran=3.0.1=h93005f0_2 49 | - libiconv=1.15=hdd342a3_7 50 | - libpng=1.6.37=ha441bb4_0 51 | - libsodium=1.0.16=h3efe00b_0 52 | - markupsafe=1.1.1=py38h1de35cc_0 53 | - matplotlib=3.1.3=py38_0 54 | - matplotlib-base=3.1.3=py38h9aa3819_0 55 | - mistune=0.8.4=py38h1de35cc_1000 56 | - mkl=2019.4=233 57 | - mkl-service=2.3.0=py38hfbe908c_0 58 | - mkl_fft=1.0.15=py38h5e564d8_0 59 | - mkl_random=1.1.0=py38h6440ff4_0 60 | - nbconvert=5.6.1=py38_0 61 | - nbformat=5.0.4=py_0 62 | - ncurses=6.1=h0a44026_1 63 | - notebook=6.0.3=py38_0 64 | - numpy=1.18.1=py38h7241aed_0 65 | - numpy-base=1.18.1=py38h6575580_1 66 | - openssl=1.1.1g=h1de35cc_0 67 | - pandas=1.0.1=py38h6c726b0_0 68 | - pandoc=2.2.3.2=0 69 | - pandocfilters=1.4.2=py38_1 70 | - parso=0.6.1=py_0 71 | - pcre=8.43=h0a44026_0 72 | - pexpect=4.8.0=py38_0 73 | - pickleshare=0.7.5=py38_1000 74 | - pip=20.0.2=py38_1 75 | - prometheus_client=0.7.1=py_0 76 | - prompt_toolkit=3.0.3=py_0 77 | - ptyprocess=0.6.0=py38_0 78 | - pycparser=2.19=py_0 79 | - pygments=2.5.2=py_0 80 | - pyopenssl=19.1.0=py38_0 81 | - pyparsing=2.4.6=py_0 82 | - pyqt=5.9.2=py38h655552a_2 83 | - pyrsistent=0.15.7=py38h1de35cc_0 84 | - pysocks=1.7.1=py38_0 85 | - python=3.8.1=h359304d_1 86 | - python-dateutil=2.8.1=py_0 87 | - pytz=2019.3=py_0 88 | - pyzmq=18.1.1=py38h0a44026_0 89 | - qt=5.9.7=h468cd18_1 90 | - qtconsole=4.6.0=py_1 91 | - readline=7.0=h1de35cc_5 92 | - requests=2.22.0=py38_1 93 | - scipy=1.4.1=py38h44e99c9_0 94 | - seaborn=0.10.0=py_0 95 | - send2trash=1.5.0=py38_0 96 | - setuptools=45.2.0=py38_0 97 | - sip=4.19.8=py38h0a44026_0 98 | - six=1.14.0=py38_0 99 | - sqlite=3.31.1=ha441bb4_0 100 | - terminado=0.8.3=py38_0 101 | - testpath=0.4.4=py_0 102 | - tk=8.6.8=ha441bb4_0 103 | - tornado=6.0.3=py38h1de35cc_3 104 | - traitlets=4.3.3=py38_0 105 | - urllib3=1.25.8=py38_0 106 | - wcwidth=0.1.8=py_0 107 | - webencodings=0.5.1=py38_1 108 | - wheel=0.34.2=py38_0 109 | - widgetsnbextension=3.5.1=py38_0 110 | - xz=5.2.4=h1de35cc_4 111 | - zeromq=4.3.1=h0a44026_3 112 | - zipp=2.2.0=py_0 113 | - zlib=1.2.11=h1de35cc_3 114 | prefix: /Users/ben/anaconda3/envs/clockify 115 | 116 | -------------------------------------------------------------------------------- /pipeline/utils.py: -------------------------------------------------------------------------------- 1 | from data import config 2 | from pipeline import headers 3 | import pandas as pd 4 | import requests 5 | import numpy as np 6 | import datetime as dt 7 | 8 | 9 | def check_auth(dest=config.ENDPOINT, headers=config.API_KEY): 10 | r = requests.get(dest + "/workspaces/", headers=headers) 11 | if r.status_code == 200: 12 | return r.status_code 13 | else: 14 | print( 15 | "possible authorisation issue, please check config:\n" + str(r.status_code) 16 | ) 17 | return r.status_code 18 | 19 | 20 | # should use try above>? 21 | 22 | """ r = requests.get(ENDPOINT, headers=HEADER) 23 | r_ws = requests.get(ep_ws, headers=HEADER) 24 | r_te = requests.get(ep_te, headers=HEADER) 25 | r_pr = requests.get(ep_pr, headers=HEADER) """ 26 | 27 | 28 | def expand_time(times): 29 | # function to create a new df with the times expanded out and hours rounded 30 | times_expanded = times 31 | times_expanded["start"] = times_expanded["timeInterval"].apply( 32 | lambda x: pd.to_datetime(x["start"]) 33 | ) 34 | times_expanded["end"] = times_expanded["timeInterval"].apply( 35 | lambda x: pd.to_datetime(x["end"]) 36 | ) 37 | times_expanded["duration"] = times_expanded["timeInterval"].apply( 38 | lambda x: (x["duration"]) 39 | ) 40 | 41 | times_expanded["minutes"] = (times_expanded.end - times_expanded.start).apply( 42 | lambda x: x.seconds / 60 43 | ) 44 | 45 | times_expanded["hours_rounded"] = ( 46 | np.round(times_expanded.minutes / 60 * 4) / 4 47 | ) ## rounded hours 48 | 49 | times_expanded["minutes_rounded_up"] = np.round( 50 | times_expanded.minutes / 60 * 4 51 | ) / 4 * 60 - ( 52 | times_expanded.minutes 53 | ) ## track trimmed minutes ≠ve = round up 54 | 55 | return times_expanded 56 | 57 | 58 | def check_min_date(df): 59 | return df["start"].min() 60 | 61 | 62 | def status_trimmed(df): 63 | print("hours rounded total = " + str(times_trimcheck["hours_rounded"].sum())) 64 | print( 65 | "sum minutes rounded up = " + str(times_trimcheck["minutes_rounded_up"].sum()) 66 | ) 67 | 68 | 69 | def merge_times_proj(left, right): 70 | merged = pd.merge( 71 | left, # should be times 72 | right, # should be projects 73 | how="left", 74 | left_on="projectId", 75 | right_on="id", 76 | left_index=False, 77 | right_index=False, 78 | sort=True, 79 | suffixes=("_timeEntry", "_proj"), 80 | copy=True, 81 | indicator=True, 82 | validate="m:1", 83 | ) 84 | return merged 85 | 86 | 87 | def filter_by_clientName(df): 88 | filtered = df[df["clientName"] == config.CLIENT] 89 | return filtered 90 | 91 | 92 | def date_index_order(df): 93 | df["startIndex"] = df["start"] # copy start date 94 | df = df.set_index( 95 | "startIndex", drop=True, verify_integrity=True 96 | ) # use copied column as index then drop it setting index 97 | df.index = pd.to_datetime(df.index) 98 | df.sort_values(by="startIndex", ascending=False) # order in reverse 99 | return df 100 | 101 | 102 | def filter_year_month(df, year=0, month=0): 103 | """filter a dataframe by year/month, where value is integer, and index is datetime, default to now()""" 104 | 105 | if year == 0: 106 | df = df[df.index.year.isin([(dt.datetime.now().year)])] 107 | elif year == "all": 108 | df = df # 'all' parameter skips 109 | else: 110 | df = df[df.index.year.isin([year])] 111 | 112 | if month == 0: 113 | df = df[df.index.month.isin([(dt.datetime.now().month)])] 114 | elif month == "all": 115 | df = df # 'all' parameter skips 116 | else: 117 | df = df[df.index.month.isin([month])] 118 | 119 | return df 120 | 121 | 122 | def sum_name_grouped(df): 123 | df = df.groupby("name").sum() 124 | return df 125 | 126 | 127 | def NaN_filter(df, name=True): 128 | """ Filter out NaN values as required, default, ["name"] NaN changed to 'No Project' """ 129 | df["name"].fillna("No Project", inplace=True) 130 | df["clientName"].fillna("No Client", inplace=True) 131 | return df 132 | 133 | 134 | def drop_headers(df): 135 | to_drop = [] 136 | dict_to_drop_now = {} 137 | for x in df.columns.tolist(): 138 | if headers.drop_filter[x] == "False": 139 | to_drop.append(x) 140 | else: 141 | continue 142 | print("Dropping:\n" + str(to_drop)) 143 | df.drop(to_drop, axis=1, inplace=True) 144 | print("Dropped") 145 | return df 146 | --------------------------------------------------------------------------------