├── .gitignore ├── LICENSE ├── README.md └── caddyLog.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Dorian Wiskow 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CaddyGoAccessDataLoggerConverter 2 | #### Caddy/GoAccess data logger & converter that translates Caddy web server JSON logs into a format that GoAccess can ingest and exploit in either batch or real-time modes. 3 | 4 | [Caddy](https://caddyserver.com) is a powerful, extensible platform to serve your sites, services, and apps, written in Go. Although most people use it as a web server or proxy. It provides a really simple, performant and flexible platform to host secure static web sites and/or web applications. 5 | 6 | [GoAccess](https://goaccess.io/) is a tool for convenient and quick analysis of access logs, it shares a philosophy (if not its development language) with Caddy in that it is self-contained and stand-alone with no dependencies (and can even generate self-contained access log file reporting in a single HTML file, that can then be auto-deployed on your web site). 7 | 8 | It is currently difficult to obtain the full benefit from GoAccess with Caddy log files as the log files output by Caddy are not in a format that can be easily ingested by GoAccess (Note: common log formats are supported by both tools, but that significantly limits the analysis and reporting). 9 | 10 | #### caddyLog.py 11 | caddyLog.py solves this problem by providing a tool to convert Caddy JSON logs into a format that GoAccess can understand, and maxmises the data that is shared with GOACCESS in order to optimise the analysis. 12 | 13 | caddyLog.py can use the log file(s) written by Caddy as its input in either batch or live mode. In live mode, it can monitor a 'live' Caddy log file for events being appended to it in pseudo real-time and reflect those changes immediately in a converted format log file that is streamed to GoAccess for processing. 14 | 15 | caddyLog.py can alternatively instantiate its own TCP/IP network socket server, configured to receive Caddy log data in real-time, and stream Caddy log data to a GoAccess format log file as events happen. This enables the 'live' monitoring capabilities of GoAccess to function seamlessly with Caddy in real-time. 16 | 17 | ## Usage 18 | 19 | Copy the caddyLog.py file to your computer and make it executable (**chmod +x caddyLog.py**). You can then run caddyLog .py as shown in one of the examples detailed below. 20 | 21 | ``` 22 | ./caddyLog.py -n localhost:55555 -g access.goaccess.log -j access.json 23 | 24 | set up a TCP/IP network socket server on IP address localhost:55555 25 | and output any log data streamed to it by Caddy over the network to 26 | a file named "access.goaccess.log" (containing Caddy log data converted 27 | into a format compatible with goAccess - https://goaccess.io/) AND ALSO 28 | to an output file named "access.json" (containing the complete Caddy log 29 | data in JSON format) 30 | 31 | optionally select only the -g [--outputGoAccessFilename] OR the -j 32 | [--outputJSONfilename] to output a single file of the required 33 | format 34 | ``` 35 | 36 | 37 | ``` 38 | ./caddyLog.py -i access.log -g access.goaccess.log 39 | 40 | read in the data from the file "access.log" (in JSON format) and write 41 | out a file named "access.goaccess.log" (containing the Caddy log data 42 | converted into a format compatible with goAccess). 43 | ``` 44 | 45 | ``` 46 | ./caddyLog.py -i access.log -t 600 -g access.goaccess.log 47 | 48 | read in the data from the file "access.log" (in JSON format) and write 49 | out a file named "access.goaccess.log" (containing the Caddy log data 50 | converted into a format compatible with goAccess), then repeatedly 51 | sleep for 10 minutes (600 seconds) before checking to see if any 52 | additional Caddy log data has been written to "access.log". If 53 | additional data has been added to "access.log", then convert it and 54 | append it to "access.goaccess.log" before again sleeping. 55 | 56 | ``` 57 | 58 | Executing caddyLog.py with the argument -h or --help will provide more instructions and detail on how to use caddyLog.py 59 | 60 | Example output format 61 | 62 | ``` 63 | 2020-08-03 19:17:37 example.com 192.168.1.1 GET / HTTP/1.1 200 458 0.005674565 unkown "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36" 64 | 2020-08-03 18:35:53 example.com 192.168.100.3 GET /wp-login.php HTTP/1.1 404 0 0.000298749 http://example.com/wp-login.php "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0" 65 | 2020-08-03 19:04:35 example.com 192.168.200.56 GET /admin/ HTTP/1.1 404 0 0.000482654 http://example.com/admin/ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0" 66 | ``` 67 | 68 | -------------------------------------------------------------------------------- /caddyLog.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | """ *********************************************************************************** 4 | 5 | Takes Caddy log data (in JSON format) as input (provided in the form of either an 6 | existing Caddy log file, or alternatively streamed over a network using TCP/IP sockets) 7 | and converts it into a format suitable for analysis by goAccess (https://goaccess.io/). 8 | 9 | To use the output file, goaccess must be run with the log-format specified as shown 10 | below 11 | 12 | goaccess access.goaccess.log --log-format="%d %t %v %h %m %U %H %s %b %T %R %u" \ 13 | --date-format=%F --time-format=%H:%M:%S -o access.html 14 | 15 | Note: when running Caddy the Caddyfile must specify "format json" and set the 16 | output as either a file name or a network address as shown in one of 17 | the two examples detailed below 18 | 19 | Caddyfile - file output Caddtfile - network socket stream 20 | ----------------------- --------------------------------- 21 | 22 | localhost { localhost { 23 | file_server file_server 24 | log { log { 25 | output file access.log { output net localhost:55555 26 | roll_local_time true format json 27 | } } 28 | format json } 29 | } 30 | } 31 | 32 | *********************************************************************************** """ 33 | 34 | import sys, signal, getopt, socket, json 35 | from datetime import datetime 36 | from time import sleep 37 | 38 | networkAddress = '' 39 | inputJSONfilename = '' 40 | timeInterval = 0 41 | outputGoAccessFilename = '' 42 | outputJSONfilename = '' 43 | 44 | def shortHelp(): 45 | print() 46 | print(' ./caddyLog.py -h -n -i -t ') 47 | print(' -g -j ') 48 | print() 49 | print(' use ./caddyLog.py --help to obtain more comprehensive help') 50 | print() 51 | 52 | def longHelp(): 53 | print() 54 | print(' ./caddyLog.py -h -n -j -i ') 55 | print(' -g -o ') 56 | print() 57 | print(' Example(s)') 58 | print(' __________') 59 | print() 60 | print(' ./caddyLog.py -n localhost:55555 -g access.goaccess.log -j access.json ') 61 | print() 62 | print(' set up a TCP/IP network socket server on IP address localhost:55555') 63 | print(' and output any log data streamed to it by Caddy over the network to') 64 | print(' a file named "access.goaccess.log" (containing Caddy log data converted') 65 | print(' into a format compatible with goAccess - https://goaccess.io/) AND ALSO') 66 | print(' to an output file named "access.json" (containing the complete Caddy log') 67 | print(' data in JSON format)') 68 | print() 69 | print(' optionally select only the -g [--outputGoAccessFilename] OR the -o') 70 | print(' [--outputJSONfilename] to output a single file of the required') 71 | print(' format') 72 | print() 73 | print(' ./caddyLog.py -i access.log -g access.goaccess.log') 74 | print() 75 | print(' read in the data from the file "access.log" (in JSON format) and write') 76 | print(' out a file named "access.goaccess.log" (containing the Caddy log data') 77 | print(' converted into a format compatible with goAccess).') 78 | print() 79 | print(' ./caddyLog.py -i access.log -t 600 -g access.goaccess.log') 80 | print() 81 | print(' read in the data from the file "access.log" (in JSON format) and write') 82 | print(' out a file named "access.goaccess.log" (containing the Caddy log data') 83 | print(' converted into a format compatible with goAccess), then repeatedly') 84 | print(' sleep for 10 minutes (600 seconds) before checking to see if any') 85 | print(' additional Caddy log data has been written to "access.log". If') 86 | print(' additional data has been added to "access.log", then convert it and') 87 | print(' append it to "access.goaccess.log" before again sleeping.') 88 | print() 89 | print(' Options and arguments') 90 | print(' _____________________') 91 | print() 92 | print(' -h --help') 93 | print() 94 | print(' output comprehensive help for users of caddyLog.py') 95 | print() 96 | print(' -n --networkAddress ') 97 | print() 98 | print(' Set the IP 4 address and PORT to be used by the caddyLog.py TCP/IP network') 99 | print(' socket server. This should be the same and as specified') 100 | print(' in the Caddyfile used when Caddy is run/started.') 101 | print() 102 | print(' If this option is specified caddyLog.py will run indefinitely. To terminate') 103 | print(' caddyLog.py use ctrl-c or stop the task/service') 104 | print() 105 | print(' -j --inputJSONfilename ') 106 | print() 107 | print(' The filename of an existing Caddy log file to be converted and output.') 108 | print() 109 | print(' -i --timeInterval ') 110 | print() 111 | print(' The time, in seconds for which caddyLog.py will sleep, after converting the') 112 | print(' current content of the , before checking to see if any') 113 | print(' additional Caddy log data has been appended to the by Caddy.') 114 | print(' To terminate caddyLog.py use ctrl-c or stop the task/service') 115 | print() 116 | print(' This option may only be selected when optipon -i [--inputJSONfilename]') 117 | print(' has also been specified. If this option is set to zero or omitted, caddyLog.py') 118 | print(' will simply convert and output the existing Caddy log data and then terminate') 119 | print(' when it detects the input file EOF.') 120 | print() 121 | print(' -g --outputGoAccessFilename ') 122 | print() 123 | print(' convert the Caddy log data from either the OR the TCP network') 124 | print(' socket stream into a format suitable for input to goAccess and write it into the') 125 | print(' file specified') 126 | print() 127 | print(' -o --outputJSONfilename ') 128 | print() 129 | print(' output the complete Caddy log data from the TCP network socket stream into the file') 130 | print(' specified. This will be an exact copy of a standard Caddy JSON log file,') 131 | print(' but by replicating it over a TCP/IP network socket it can be captured on an') 132 | print(' alternate server.') 133 | print() 134 | print(' This option may ony be selected when option -n [--networkAddress] is specified.') 135 | print() 136 | print(' Using caddyLog.py output with goAccess') 137 | print(' _______________________________________') 138 | print() 139 | print(' To process the output file from caddyLog.py with goaccess, use the following command') 140 | print() 141 | print(' goaccess access.goaccess.log --log-format="%d %t %v %h %m %U %H %s %b %T %R %u" \\') 142 | print(' --date-format=%F --time-format=%H:%M:%S -o filename.html') 143 | print() 144 | print(' This maximises the data provided to goAccess and optimises the analysis available.') 145 | print() 146 | 147 | def processArgs(argv): 148 | 149 | global networkAddress 150 | global inputJSONfilename 151 | global timeInterval 152 | global outputGoAccessFilename 153 | global outputJSONfilename 154 | 155 | try: 156 | opts, args = getopt.getopt(argv,"hn:i:t:g:j:", ["help", "networkAddress=", "inputJSONfilename=", "timeInterval=", "outputGoAccessFilename=", "outputJSONfilename="]) 157 | except getopt.GetoptError: 158 | shortHelp() 159 | sys.exit(2) 160 | 161 | for opt, arg in opts: 162 | if opt in ('-h'): 163 | shortHelp() 164 | sys.exit(0) 165 | elif opt in ("--help"): 166 | longHelp() 167 | sys.exit(0) 168 | elif opt in ("-n", "--networkAddress"): 169 | networkAddress = arg 170 | elif opt in ("-i", "--inputJSONfilename"): 171 | inputJSONfilename = arg 172 | elif opt in ("-g", "--outputGoAccessFilename"): 173 | outputGoAccessFilename = arg 174 | elif opt in ("-j", "--outputJSONfilename"): 175 | outputJSONfilename = arg 176 | elif opt in ("-t", "--timeInterval"): 177 | try: 178 | timeInterval = int(arg) 179 | except: 180 | print() 181 | print('{} - ERROR: Interval must be a whole number of seconds'.format(datetime.now())) 182 | shortHelp() 183 | sys.exit(2) 184 | 185 | if (len(args) > 0): 186 | print() 187 | print('{} - ERROR: superfluous trailing arguments on command line'.format(datetime.now())) 188 | shortHelp() 189 | sys.exit(2) 190 | 191 | if (networkAddress and inputJSONfilename): 192 | print() 193 | print('{} - ERROR: Input can not be both TCP/IP network socket AND JSON input file'.format(datetime.now())) 194 | shortHelp() 195 | sys.exit(2) 196 | 197 | if (networkAddress and (timeInterval > 0)): 198 | print() 199 | print('{} - ERROR: Interval time must be omitted (or zero) when TCP/IP network socket is selected as input'.format(datetime.now())) 200 | shortHelp() 201 | sys.exit(2) 202 | 203 | if (inputJSONfilename and outputJSONfilename): 204 | print() 205 | print('{} - ERROR: Output can not be JSON file when JSON input file is also selected'.format(datetime.now())) 206 | shortHelp() 207 | sys.exit(2) 208 | 209 | if ( (not outputGoAccessFilename) and (not outputJSONfilename) ): 210 | print() 211 | print('{} - ERROR: No output fie name specified'.format(datetime.now())) 212 | shortHelp() 213 | sys.exit(2) 214 | 215 | def convertJSONtoGoAccess (JSONdata): 216 | ts = str(datetime.fromtimestamp(JSONdata['ts'])) 217 | date = ts[0:10] #d 218 | time = ts[11:19] #t 219 | virtualHost = JSONdata['request']['host'] #v 220 | 221 | host = (JSONdata['request']['remote_addr']) #h 222 | host = host[0:host.rindex(':')] 223 | if (host[0] == '['): 224 | host = host[1:host.rindex(']')] 225 | 226 | method = JSONdata['request']['method'] #m 227 | uri = JSONdata['request']['uri'] #U 228 | proto = JSONdata['request']['proto'] #H 229 | 230 | status = str( JSONdata['status'] ) #s 231 | size = str( JSONdata['size'] ) #b 232 | latency = str( JSONdata['duration'] ) #T 233 | 234 | if "Referer" in JSONdata['request']['headers'].keys(): #R 235 | referer = JSONdata['request']['headers']['Referer'][0] 236 | else: 237 | referer = 'unknown' 238 | 239 | if "User-Agent" in JSONdata['request']['headers'].keys(): #u 240 | user_agent = '"'+JSONdata['request']['headers']['User-Agent'][0]+'"' 241 | else: 242 | user_agent = '""' 243 | 244 | goAccessData = date+' '+time+' '+virtualHost+' '+host+' '+method+' '+uri+' '+proto+' '+status+' '+size+' '+latency+' '+referer+' '+user_agent 245 | return goAccessData 246 | 247 | def main(): 248 | global networkAddress 249 | global inputJSONfilename 250 | global timeInterval 251 | global outputGoAccessFilename 252 | global outputJSONfilename 253 | 254 | print() 255 | print('{} - INITIALISING: caddyLog.py (Caddy/GoAccess data logger & converter - copyright 2020 Dorian Wiskow)'.format(datetime.now())) 256 | 257 | processArgs(sys.argv[1:]) 258 | 259 | if (inputJSONfilename): 260 | try: 261 | with open(outputGoAccessFilename, 'w') as g: 262 | totalLogCount = 0 263 | try: 264 | with open(inputJSONfilename) as j: 265 | print('{} - processing JSON input file: {}'.format(datetime.now(), inputJSONfilename)) 266 | batchLogCount = 0 267 | while True: 268 | line = j.readline() 269 | if (line != ""): 270 | JSONdata = json.loads(line) 271 | goAccessData = convertJSONtoGoAccess(JSONdata) 272 | g.write(goAccessData+'\n') 273 | totalLogCount += 1 274 | batchLogCount += 1 275 | elif (timeInterval > 0): 276 | g.flush() 277 | if (batchLogCount): 278 | print('{} - {} log entries written to {}'.format(datetime.now(), str(batchLogCount), outputGoAccessFilename)) 279 | batchLogCount = 0 280 | print('{} - sleeping for {} seconds before checking for additional log entries'.format(datetime.now(), str(timeInterval))) 281 | sleep(timeInterval) 282 | elif (timeInterval == 0): 283 | print('{} - TOTAL: {} log entries written to {}'.format(datetime.now(), str(totalLogCount), outputGoAccessFilename)) 284 | print() 285 | batchLogCount = 0 286 | g.flush() 287 | break 288 | except FileNotFoundError: 289 | print() 290 | print('{} - ERROR: Input file "{}" not found'.format(datetime.now(), inputJSONfilename)) 291 | shortHelp() 292 | sys.exit(2) 293 | except IOError: 294 | print() 295 | print('{} - ERROR: Output file "{}" error'.format(datetime.now(), outputGoAccessFilename)) 296 | shortHelp() 297 | sys.exit(2) 298 | 299 | if (networkAddress): 300 | host = networkAddress[0:networkAddress.rindex(':')] 301 | port = int(networkAddress[(networkAddress.rindex(':')+1):]) 302 | print('{} - TCP/IP NETWORK socket server created @ {}:{}'.format(datetime.now(), host, port)) 303 | with socket.socket(family=socket.AF_INET) as sock: 304 | sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 305 | sock.bind((host, port)) 306 | sock.listen() 307 | try: 308 | if (outputGoAccessFilename): 309 | g = open(outputGoAccessFilename, 'w') 310 | if (outputJSONfilename): 311 | j = open(outputJSONfilename, 'w') 312 | totalLogCount = 0 313 | while True: 314 | print('{} - Caddy not connected: Waiting for connection'.format(datetime.now())) 315 | connection, client_address = sock.accept() 316 | print('{} - Caddy @ {}:{} connected'.format(datetime.now(), client_address[0], client_address[1])) 317 | batchLogCount = 0 318 | with connection: 319 | while True: 320 | data = connection.recv(4096) 321 | if data: 322 | JSONdata = json.loads(data.strip().decode()) 323 | if (outputJSONfilename): 324 | j.write(json.dumps(JSONdata)+'\n') 325 | j.flush() 326 | if (outputGoAccessFilename): 327 | goAccessData = convertJSONtoGoAccess(JSONdata) 328 | g.write(goAccessData+'\n') 329 | g.flush() 330 | totalLogCount += 1 331 | batchLogCount += 1 332 | else: 333 | if (batchLogCount): 334 | print('{} - {} log entries written to {}'.format(datetime.now(), str(batchLogCount), outputGoAccessFilename)) 335 | batchLogCount = 0 336 | break 337 | except IOError: 338 | print() 339 | print('{} - ERROR: Output file "{}" error'.format(datetime.now(), outputGoAccessFilename)) 340 | shortHelp() 341 | sys.exit(2) 342 | finally: 343 | print('{} - TOTAL: {} log entries written to {}'.format(datetime.now(), str(totalLogCount), outputGoAccessFilename)) 344 | print() 345 | if (outputGoAccessFilename): 346 | g.close() 347 | if (outputJSONfilename): 348 | j.close() 349 | 350 | 351 | def signal_handler(signal, frame): 352 | print() 353 | print('{} - Terminating . . . '.format(datetime.now())) 354 | sys.exit(0) 355 | 356 | if __name__ == "__main__": 357 | signal.signal(signal.SIGINT, signal_handler) 358 | main() 359 | --------------------------------------------------------------------------------