├── README.md ├── cold2frozen.py ├── frozen2deepfreeze.sh └── inputs.conf /README.md: -------------------------------------------------------------------------------- 1 | # splunkDeepfreeze 2 | Move frozen buckets to AWS S3 (and ultimately Glacier) for long term storage 3 | 4 | This tool provides 2 scripts: 5 | 6 | ## cold2frozen.py 7 | Copy this file to app/bin 8 | This script can be configured as your frozen script. It will take buckets which have expired the cold lifecycle, and will move the data outside of the splunk db path to a location of your choosing - you will need to edit the script to set the paths 9 | 10 | ## frozen2deepfreeze.py 11 | Copy this file to app/bin 12 | This script runs as a scripted input and moves the contents of the frozen path into s3 - you will need to edit the script to set the paths 13 | 14 | ## inputs.conf 15 | Copy this file to an app/default folder to enable the scripted input 16 | 17 | If this script works for you, or you would like to see it built into a full app, please let me know how you get on! 18 | -------------------------------------------------------------------------------- /cold2frozen.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | 3 | # This is based on the splunk example script, but removes 4 | # much of the unnecessary complication for older versions. 5 | # This script is tested on Splunk 6.x on Linux. (Windows may need tweaking) 6 | 7 | ######## NOTE ####### 8 | # You MUST configure one of the following options 9 | 10 | # If you want a path relative to splunk home, use the follwing format: 11 | #ARCHIVE_DIR = os.path.join(os.getenv('SPLUNK_HOME'), 'frozenData') 12 | 13 | # -or- 14 | 15 | # Simply use a full base path like: 16 | #ARCHIVE_DIR = '/opt/frozenData' 17 | 18 | 19 | 20 | import sys, os, gzip, shutil, subprocess, random 21 | 22 | def archiveBucket(base, files): 23 | print 'Archiving bucket: ' + base 24 | for f in files: 25 | full = os.path.join(base, f) 26 | if os.path.isfile(full): 27 | os.remove(full) 28 | 29 | if __name__ == "__main__": 30 | if len(sys.argv) != 2: 31 | sys.exit('usage: python cold2frozen.py ') 32 | 33 | if not os.path.isdir(ARCHIVE_DIR): 34 | try: 35 | os.mkdir(ARCHIVE_DIR) 36 | except OSError: 37 | sys.stderr.write("mkdir warning: Directory '" + ARCHIVE_DIR + "' already exists\n") 38 | 39 | bucket = sys.argv[1] 40 | if not os.path.isdir(bucket): 41 | sys.exit('Given bucket is not a valid directory: ' + bucket) 42 | 43 | rawdatadir = os.path.join(bucket, 'rawdata') 44 | if not os.path.isdir(rawdatadir): 45 | sys.exit('No rawdata directory, given bucket is likely invalid: ' + bucket) 46 | 47 | files = os.listdir(bucket) 48 | journal = os.path.join(rawdatadir, 'journal.gz') 49 | if os.path.isfile(journal): 50 | archiveBucket(bucket, files) 51 | else: 52 | sys.exit('No journal file found, bucket invalid:' + bucket) 53 | 54 | if bucket.endswith('/'): 55 | bucket = bucket[:-1] 56 | 57 | indexname = os.path.basename(os.path.dirname(os.path.dirname(bucket))) 58 | destdir = os.path.join(ARCHIVE_DIR, indexname, os.path.basename(bucket)) 59 | 60 | while os.path.isdir(destdir): 61 | print 'Warning: This bucket already exists in the archive directory' 62 | print 'Adding a random extension to this directory...' 63 | destdir += '.' + str(random.randrange(10)) 64 | 65 | shutil.copytree(bucket, destdir) 66 | -------------------------------------------------------------------------------- /frozen2deepfreeze.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # This script is designed to be run on a system with the AWS command line tools 4 | # correctly installed and configured, including configured keys and bucket permissions. 5 | # ...or from an AWS instance with IAM roles etc. 6 | 7 | # If you need to specify a proxy uncomment and complete the following 8 | #export HTTPS_PROXY=http://a.b.c.d:3128 9 | 10 | ARCHIVE_DIR='/opt/frozenData' 11 | S3_ARCHIVE_BUCKET='your-bucket-name' 12 | S3_REGION='eu-west-1' 13 | 14 | cd $ARCHIVE_DIR 15 | for file in $(find . -name '*.gz'); 16 | do 17 | relativepath=$(echo $file |sed 's/\.\///') 18 | bucketdetails=$(echo $relativepath|awk -F/ '{print "index="$1 " bucket="$2}') 19 | timestarted=$(date --utc +%FT%TZ) 20 | /usr/bin/aws s3 cp $file s3://$S3_ARCHIVE_BUCKET/$relativepath --region $S3_REGION --sse --only-show-error 21 | 22 | if [ $? -eq 0 ]; then 23 | folder=$(echo $file|sed -e 's,/rawdata/journal.gz,,') 24 | rm -rf $folder 25 | res='success' 26 | else 27 | res='failure' 28 | fi 29 | 30 | echo $timestarted DeepFreezing $bucketdetails result=$res 31 | done 32 | -------------------------------------------------------------------------------- /inputs.conf: -------------------------------------------------------------------------------- 1 | [script://./bin/frozen2deepfreeze.sh] 2 | disabled = 0 3 | interval = 21600 4 | sourcetype = splunkDeepfreeze 5 | --------------------------------------------------------------------------------