├── LICENSE
├── README.md
└── bin
    ├── check_if_restart_required.sh
    ├── disableSplunkObjects.py
    ├── eventbreakerconfig.py
    ├── index_bucket_sizing.py
    ├── index_tuning_presteps.py
    ├── indextransfer.sh
    ├── indextuning.py
    ├── indextuning_dirchecker.py
    ├── indextuning_index_tuning.py
    ├── indextuning_indextempoutput.py
    ├── indextuning_utility.py
    ├── knowledge_obj_extraction_btool.py
    ├── knowledge_obj_extraction_conffiles.py
    ├── reownItems.sh
    ├── roll_and_resync_buckets.py
    ├── roll_and_resync_buckets_v2.py
    ├── roll_and_resync_buckets_v2.sh
    ├── splunk-props-transforms-routing.py
    ├── splunk_offline.service
    ├── splunk_offline.sh
    ├── syslog_migration_to_hec_assistant.py
    └── transfersplunkknowledgeobjects.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Splunk
 2 | Other Splunk scripts which do not fit into the SplunkAdmins application
 3 | 
 4 | Limited documentation is available for the transfer splunk knowledge objects script below
 5 | 
 6 | ## Purpose?
 7 | Transfer Splunk knowledge objects from 1 search head to another without going through the deployer (except for lookup files, they cannot be created via REST API as such, however refer to https://github.com/beckyburwell/splunk_rest_upload_lookups for an example of using the lookup editor REST endpoints) 
 8 | 
 9 | ## How?
10 | A custom written python script that can be run via: 
11 | 
12 | ```splunk cmd python transfersplunkknowledgeobjects.py```
13 | 
14 | ## Why?
15 | 
16 | Creating knowledge objects and pushing them out via the deployer is frustrating as they cannot be deleted from the search head, they must now be removed from the deployer. 
17 | 
18 | Splunk does not supply any scripts for search head to search head knowledge object transfer so I've created one. While you can copy from 1 search head cluster to another search head cluster you either need to trigger the replication of the knowledge object (via a save) or restart all search head cluster members with a copy of the data, this script avoids these issues by working through the REST API
19 | 
20 | ## Example usage
21 | ```splunk cmd python transfersplunkknowledgeobjects.py -srcURL "https://thesourceserver:8089" -destURL "https://localhost:8089" -srcUsername "admin" -srcPassword "removed" -srcApp sourceAppName -destUsername "admin" -destPassword "changeme" -destApp destAppName -printPasswords -all```
22 | 
23 | This transfers knowledge objects from thesourceserver to localhost with the username of admin, it also uses the login of "admin" on the localhost to create the new objects (note that the objects are created with the same owner as the source server unless the ```-destOwner``` option is used)
24 | 
25 | The source application on "thesourceserver" is sourceAppName and the destination app on the localhost server is destAppName 
26 | 
27 | If any issues are encountered a ```-debugMode``` switch will supply additional logging, also note that the ```-all``` switch is used to transfer all knowledge objects, if you do not wish to transfer all then you can send in a controlled list of object types ```-tags, -macros, -eventtypes``` or similar 
28 | 
29 | Here's an example of transferring one specific users objects over and re-owning them to a new owner: 
30 | 
31 | ```splunk cmd python transfersplunkknowledgeobjects.py -srcURL "https://thesourceserver:8089" -destURL "https://localhost:8089" -srcUsername "admin" -srcPassword "removed" -srcApp sourceAppName -includeOwner aSpecificIndividual -destOwner <another username to own the objects>```
32 | 
33 | ## Parameters
34 | 
35 | ```splunk cmd python transfersplunkknowledgeobjects.py -h ```
36 | 
37 | Provides the full list, a summary of the main options are below 
38 | 
39 | ### Controlling which knowledge objects are transferred
40 | ```-all``` (optional) migrate all knowledge objects  
41 | ```-macros``` (optional) migrate macro knowledge objects  
42 | ```-tags``` (optional) migrate tag knowledge objects  
43 | ```-eventtypes``` (optional) migrate event types knowledge objects  
44 | ```-allFieldRelated``` (optional) migrate all objects under fields  
45 | ```-calcFields``` (optional) migrate calc fields knowledge objects  
46 | ```-fieldAlias``` (optional) migrate field alias knowledge objects  
47 | ```-fieldExtraction``` (optional) migrate field extraction knowledge objects  
48 | ```-fieldTransforms``` (optional) migrate field transformation knowledge objects  
49 | ```-lookupDefinition``` (optional) migrate lookup definition knowledge objects  
50 | ```-workflowActions``` (optional) migrate workflow actions  
51 | ```-sourcetypeRenaming``` (optional) migrate sourcetype renaming  
52 | ```-automaticLookup``` (optional) migrate automation lookup knowledge objects  
53 | ```-datamodels``` (optional) migrate data model knowledge objects  
54 | ```-dashboards``` (optional) migrate dashboards (user interface -> views)  
55 | ```-savedsearches``` (optional) migrate saved search objects (this includes reports/alerts)  
56 | ```-navMenu``` (optional) migrate navigation menus  
57 | ```-navMenuWithDefaultOverride``` (optional) override the default nav menu in the destination app  
58 | ```-viewstates``` (optional) migrate viewstates  
59 | ```-times``` (optional) migrate time labels (conf-times)  
60 | ```-collections``` (optional) migrate collections (kvstore collections)  
61 | ```-panels``` (optional) migrate pre-built dashboard panels  
62 | 
63 | ### Filtering
64 | ```-noPrivate``` (optional) disable the migration of user level / private objects  
65 | ```-noDisabled``` (optional) disable the migratio of objects with a disabled status in Splunk  
66 | ```-includeEntities``` INCLUDEENTITIES comma separated list of object values to include (double quoted)  
67 | ```-excludeEntities``` EXCLUDEENTITIES comma separated list of object values to exclude (double quoted)  
68 | ```-includeOwner``` INCLUDEOWNER comma separated list of owners objects that should be transferred (double quoted)  
69 | ```-excludeOwner``` EXCLUDEOWNER comma separated list of owners objects that should be transferred (double quoted)  
70 | ```-privateOnly``` PRIVATEONLY Only transfer private objects  
71 | 
72 | ### Logging options
73 | ```-debugMode``` (optional) turn on DEBUG level logging (defaults to INFO)  
74 | ```-printPasswords``` (optional) print passwords in the log files (dev only)  
75 | 
76 | ### Other ###
77 | ```-ignoreViewstatesAttribute``` (optional) when creating saved searches strip the vsid parameter/attribute before attempting to create the saved search  
78 | ```-disableAlertsOrReportsOnMigration``` (optional) when creating alerts/reports, set disabled=1 (or enableSched=0) irrelevant of the previous setting pre-migration
79 | 
80 | ## Examples
81 | This was provided by J.R. Murray in relation to this script. Pull requests or email contributions welcome:
82 | 
83 | `| rest splunk_server=local /services/apps/local
84 |  | search disabled=0 label="*" NOT title IN(splunkclouduf)
85 |  | dedup title
86 |  | fields title
87 |  | rename title AS app
88 |  | map search="| rest splunk_server=local /servicesNS/-/$app$/directory count=0 search=$app$ " maxsearches=200
89 |  | rename eai:acl.* AS *, eai:type AS type 
90 |  | search (removable=1 OR NOT owner IN (nobody, splunk-system-user, admin) OR (updated=2023* OR updated=2024* NOT eai:acl.removable=1)) NOT disabled IN (true, 1) NOT app=missioncontrol
91 |  | eval title="\"".title."\""
92 |  | stats values(title) AS title by app type
93 |  | eval title=mvjoin(title, ",")
94 |  | eval script_type=case( type=="savedsearch", "savedsearches", type=="props-lookup", "automaticLookup", type=="transforms-lookup", "lookupDefinition", type=="fvtags", "tags", type=="props-extract", "fieldExtraction", type=="fieldaliases", "fieldAlias", type=="views", "dashboards", type=="collections-conf", "collections", true(), type)
95 |  | fillnull value=""
96 |  | eval command=" -".script_type." -srcApp ".app." -includeEntities ".title | table command
97 | `
98 | 
99 | 


--------------------------------------------------------------------------------
/bin/check_if_restart_required.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # a simple script to check if a restart may be required for a particular app/Splunk directory as per
  4 | # https://docs.splunk.com/Documentation/Splunk/latest/Indexer/Updatepeerconfigurations#Restart_or_reload_after_configuration_bundle_changes.3F
  5 | 
  6 | date=`date +"%Y-%m-%d %H:%M:%S.%3N %z"`
  7 | 
  8 | # https://www.baeldung.com/linux/check-variable-exists-in-list
  9 | function exists_in_list() {
 10 |     LIST=$1
 11 |     DELIMITER=" "
 12 |     VALUE=$2
 13 |     echo $LIST | tr "$DELIMITER" '\n' | grep -F -q -x "$VALUE"
 14 | }
 15 | 
 16 | function usage {
 17 |     echo "./$(basename $0) -d --> directory to check if a restart is required (or comma separated string of directories)"
 18 | }
 19 | 
 20 | if [ $# -eq 0 ]; then
 21 |   usage
 22 |   exit 0
 23 | fi
 24 | 
 25 | while getopts "d:" o; do
 26 |     case "${o}" in
 27 |         d)
 28 |             dir=`echo ${OPTARG} | tr ',' ' '`
 29 |             ;;
 30 |         h)
 31 |             usage
 32 |             ;;
 33 |         *)
 34 |             usage
 35 |             ;;
 36 |     esac
 37 | done
 38 | 
 39 | reload_conf=`grep "reload.*= simple" /opt/splunk/etc/system/default/app.conf | cut -d "." -f2 | awk '{ print $1".conf" }' | sort | uniq`
 40 | # this works in my environment may require further testing...triggers with access_endpoints sometimes works but it depends what was in the config
 41 | # for example reload.distsearch          = access_endpoints /search/distributed/bundle-replication-files
 42 | # if the config file for distsearch contains [replicationBlacklist] then it won't require a restart, but if contains [replicationSettings] it may require a restart...
 43 | reload_conf="${reload_conf} authentication.conf authorize.conf collections.conf indexes.conf messages.conf props.conf transforms.conf web.conf workload_pools.conf workload_rules.conf workload_policy.conf inputs.conf restmap.conf setup.xml"
 44 | 
 45 | restart_required_any="False"
 46 | 
 47 | echo "${date} restart script begins"
 48 | 
 49 | echo "dir is $dir"
 50 | 
 51 | dist_search_ignore="True"
 52 | files=`ls ${app}/default/distsearch.conf ${app}/local/distsearch.conf`
 53 | if [ "x$files" != "x" ]; then
 54 |     for file in `echo $files`; do
 55 |         # strip any blocks of text under the stanzas of
 56 |         # replicationWhitelist, replicationSettings:refineConf, replicationAllowlist, replicationBlacklist, replicationDenylist
 57 |         # this type of distsearch.conf should not trigger a restart
 58 |         # the grep removes the comments and counts non-empty lines
 59 |         awk '
 60 |          BEGIN {skip=0}
 61 |          /^\[(replicationWhitelist|replicationSettings:refineConf|replicationAllowlist|replicationBlacklist|replicationDenylist)\]/ {skip=1}
 62 |          /^\[/ && !/^\[(replicationWhitelist|replicationSettings:refineConf|replicationAllowlist|replicationBlacklist|replicationDenylist)\]/ { skip=0 }
 63 |          { if (skip==0) print $0 }
 64 |         ' $file | grep -v "#" | grep -vc '^$'
 65 | 
 66 |         exit_code=$?
 67 |         if [ ${exit_code} -eq 0 ]; then
 68 |             dist_search_ignore="False"
 69 |         fi
 70 |     done
 71 | fi
 72 | 
 73 | server_conf_ignore="True"
 74 | files=`ls ${app}/default/server.conf ${app}/local/server.conf`
 75 | if [ "x$files" != "x" ]; then
 76 |     for file in `echo $files`;
 77 |         do
 78 |         count=`grep -vE "^(#|\[|\s*$)" ${file} 2>/dev/null | grep -v "conf_replication_" | wc -l`
 79 |         if [ "$count" -ne 0 ]; then
 80 |             server_conf_ignore="False"
 81 |         fi
 82 | fi
 83 | 
 84 | 
 85 | # if any of these files cannot be reloaded a restart is required
 86 | for app in ${dir};
 87 | do
 88 |     restart_required="False"
 89 |     default=`ls ${app}/default 2>&1 | grep -vE "No such file|data"`;
 90 |     local=`ls ${app}/local 2>&1 | grep -vE "No such file|data"`;
 91 |     combined="$default $local";
 92 |     #echo $app $combined
 93 |     # if the app has custom triggers for reload attempt to handle this scenario
 94 |     custom_app_reload_default=`grep "^reload\..*= simple" ${app}/default/app.conf 2>/dev/null| cut -d "." -f2 | awk '{ print $1".conf" }'`
 95 |     custom_app_reload_local=`grep "^reload\..*= simple" ${app}/local/app.conf 2>/dev/null| cut -d "." -f2 | awk '{ print $1".conf" }'`
 96 |     custom_app_reload="$custom_app_reload_default $custom_app_reload_local"
 97 |     
 98 |     if [ "$server_conf_ignore" = "True" ]; then
 99 |         custom_app_reload="$custom_app_reload server.conf"
100 |     fi
101 |     if [ "$dist_search_ignore" = "True" ]; then
102 |         custom_app_reload="$custom_app_reload distsearch.conf"
103 |     fi
104 |     
105 |     for file in $combined;
106 |     do
107 |         if exists_in_list "$reload_conf" "$file"; then
108 |             echo "${date} ${app}/$file in system/default/app.conf, reload=true"
109 |         elif exists_in_list "$custom_app_reload" "$file"; then
110 |             echo "${date} ${app}/$file in ${app}/app.conf, reload=true"
111 |         else
112 |             echo "${date} ${app}/$file not found, reload=false"
113 |             restart_required="True"
114 |             restart_required_any="True"
115 |         fi
116 |     done
117 |     echo "${date} app=${app} restart_required=${restart_required}"
118 | done
119 | 
120 | echo "${date} restart_required=${restart_required_any}"
121 | 


--------------------------------------------------------------------------------
/bin/disableSplunkObjects.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import logging
  3 | from logging.config import dictConfig
  4 | import urllib
  5 | import argparse
  6 | import json
  7 | 
  8 | ###########################
  9 | #
 10 | # Disable Splunk Object script
 11 | #   Scripted disable/enable of Splunk objects, at this stage saved searches (reports & alerts)
 12 | # 
 13 | ###########################
 14 | 
 15 | #Setup the logging, the plan was to default to INFO and change to DEBUG level but it's currently the
 16 | #opposite version of this
 17 | logging_config = dict(
 18 |     version = 1,
 19 |     formatters = {
 20 |         'f': {'format':
 21 |               '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'}
 22 |         },
 23 |     handlers = {
 24 |         'h': {'class': 'logging.StreamHandler',
 25 |               'formatter': 'f',
 26 |               'level': logging.DEBUG},
 27 |         'file': {'class' : 'logging.handlers.RotatingFileHandler',
 28 |               'filename' : '/tmp/disable_splunk_objects.log',
 29 |               'formatter': 'f',
 30 |               'maxBytes' :  2097152,
 31 |               'level': logging.DEBUG}
 32 |         },        
 33 |     root = {
 34 |         'handlers': ['h','file'],
 35 |         'level': logging.DEBUG,
 36 |         },
 37 | )
 38 | 
 39 | dictConfig(logging_config)
 40 | 
 41 | logger = logging.getLogger()
 42 | 
 43 | #Create the argument parser
 44 | parser = argparse.ArgumentParser(description='Migrate Splunk configuration from 1 Splunk search head to another Splunk search head via the REST API')
 45 | parser.add_argument('-destURL', help='URL of the REST/API port of the Splunk instance, https://localhost:8089/ for example', required=True)
 46 | parser.add_argument('-destApp', help='Application name to be used on destURL to be migrated to defaults to srcApp', required=True)
 47 | parser.add_argument('-destUsername', help='username to use for REST API of destURL argument', required=True)
 48 | parser.add_argument('-destPassword', help='password to use for REST API of destURL argument', required=True)
 49 | parser.add_argument('-debugMode', help='(optional) turn on DEBUG level logging (defaults to INFO)', action='store_true')
 50 | parser.add_argument('-printPasswords', help='(optional) print passwords in the log files (dev only)', action='store_true')
 51 | parser.add_argument('-includeEntities', help='(optional) comma separated list of object values to include (double quoted), if this option is used only this list of objects will be changed')
 52 | parser.add_argument('-excludeEntities', help='(optional) comma separated list of object values to exclude (double quoted), if this option is used all other objects within the app may be disabled or enabled')
 53 | parser.add_argument('-relativeDate', help='(optional) Splunk-style entry for anything modified after -1d@d is in scope, or -5d or similar')
 54 | parser.add_argument('-absoluteDate', help='(optional) Anything modified after, for example 2018-01-01 12:00:00, must be in YYYY-MM-DD HH:MM:SS format, note this will use the timezone of the destUsername in terms of timezone offset')
 55 | parser.add_argument('-enableObj', help='(optional) Run the "enable" option against scheduled searches/reports', action='store_true')
 56 | parser.add_argument('-disableObj', help='(optional) Run the "disable" optoin against scheduled searches/reports', action='store_true')
 57 | 
 58 | args = parser.parse_args()
 59 | 
 60 | #If we want debugMode, keep the debug logging, otherwise drop back to INFO level
 61 | if not args.debugMode:
 62 |     logging.getLogger().setLevel(logging.INFO)
 63 |     
 64 | #helper function as per https://stackoverflow.com/questions/31433989/return-copy-of-dictionary-excluding-specified-keys
 65 | def without_keys(d, keys):
 66 |     return {x: d[x] for x in d if x not in keys}
 67 | 
 68 | excludedList = [ "destPassword" ]
 69 | cleanArgs = without_keys(vars(args), excludedList)
 70 | logger.info("disableSplunkObjects run with arguments %s" % (cleanArgs))
 71 | 
 72 | includeEntities = None
 73 | if args.includeEntities:
 74 |     includeEntities = [x.strip() for x in args.includeEntities.split(',')]
 75 | 
 76 | excludeEntities = None
 77 | if args.excludeEntities:
 78 |     excludeEntities = [x.strip() for x in args.excludeEntities.split(',')]
 79 | 
 80 | logger.info("Running a query to obtain a list of objects in scope" % (cleanArgs))
 81 | 
 82 | if args.absoluteDate:
 83 |     search = "| rest \"/servicesNS/-/" + args.destApp + "/directory?count=-1\" splunk_server=local" \
 84 |                 "| search eai:acl.app=" + args.destApp + " eai:type=\"savedsearch\"" \
 85 |                 "| eval updatedepoch=strptime(updated, \"%Y-%m-%dT%H:%M:%S%:z\")" \
 86 |                 "| where updatedepoch>strptime(\"" + args.absoluteDate + "\", \"%Y-%m-%d %H:%M:%S\")" \
 87 |                 "| table title, eai:acl.sharing, eai:acl.owner, updated"
 88 | elif args.relativeDate:
 89 |     search = "| rest \"/servicesNS/-/" + args.destApp + "/directory?count=-1\" splunk_server=local" \
 90 |                 "| search eai:acl.app=" + args.destApp + " eai:type=\"savedsearch\"" \
 91 |                 "| eval updatedepoch=strptime(updated, \"%Y-%m-%dT%H:%M:%S%:z\")" \
 92 |                 "| where updatedepoch>relative_time(now(), \"" + args.relativeDate + "\")" \
 93 |                 "| table title, eai:acl.sharing, eai:acl.owner, updated"
 94 | else:
 95 |     search = "| rest \"/servicesNS/-/" + args.destApp + "/directory?count=-1\" splunk_server=local" \
 96 |                 "| search eai:acl.app=" + args.destApp + " eai:type=\"savedsearch\"" \
 97 |                 "| table title, eai:acl.sharing, eai:acl.owner, updated"
 98 | 
 99 | payload = { "search": search, "output_mode": "json", "exec_mode" : "oneshot" }
100 | url = args.destURL + "/services/search/jobs"
101 | 
102 | logging.debug("Sending request to %s with username %s, payload %s" % (url, args.destUsername, payload))
103 | 
104 | res = requests.post(url, auth=(args.destUsername,args.destPassword), verify=False, data=payload)
105 | if (res.status_code != requests.codes.ok and res.status_code != 201):
106 |     logger.error("URL %s status code %s reason %s, response '%s', in app %s" % (url, res.status_code, res.reason, res.text, args.destApp))
107 | else:
108 |     logger.debug("App %s with URL %s result is: '%s'" % (args.destApp, url, res.text))
109 | 
110 | #load the result
111 | jsonRes = json.loads(res.text)
112 | resList = jsonRes["results"]
113 | logging.debug("Received %s results" % (len(resList)))
114 | 
115 | for aRes in resList:
116 |     name = aRes["title"]
117 |     sharing = aRes["eai:acl.sharing"]
118 |     owner = aRes["eai:acl.owner"]
119 |     lastUpdated = aRes["updated"]
120 |     
121 |     logging.debug("Working with name %s sharing %s owner %s updated %s in app %s" % (name, sharing, owner, lastUpdated, args.destApp))
122 | 
123 |     if includeEntities:
124 |         if not name in includeEntities:
125 |             logger.debug("%s not in includeEntities list in app %s therefore skipping" % (name, args.destApp))
126 |             continue
127 |     if excludeEntities:
128 |         if name in excludeEntities:
129 |             logger.debug("%s in excludeEntities list in app %s therefore skipping" % (name, args.destApp))
130 |             keep = False
131 |             continue
132 |     
133 |     isAlert = False
134 |     #If we may have to take action then we need to know if it's an alert or a report
135 |     #as this is a slightly different change
136 |     if args.enableObj or args.disableObj:
137 |         if sharing == "user":
138 |             url = args.destURL + "/servicesNS/" + owner + "/" + args.destApp + "/saved/searches/" + name
139 |         else:
140 |             url = args.destURL + "/servicesNS/nobody/" + args.destApp + "/saved/searches/" + name
141 |         
142 |         payload = { "output_mode": "json" }
143 |         
144 |         logging.debug("Sending request to %s with username %s payload %s" % (url, args.destUsername, payload))
145 |         res = requests.get(url, auth=(args.destUsername,args.destPassword), verify=False, data=payload)
146 |         if (res.status_code != requests.codes.ok):
147 |             logger.error("URL %s status code %s reason %s, response '%s', in app %s" % (url, res.status_code, res.reason, res.text, args.destApp))
148 |             break
149 |         localRes = json.loads(res.text)
150 |         if localRes["entry"][0]["content"].has_key("alert_condition"):
151 |             logging.debug("%s of sharing level %s with owner %s appears to be an alert" % (name, sharing, owner))
152 |             isAlert = True
153 |     
154 |     actionTaken = False
155 |     if args.enableObj:
156 |         logging.debug("Firing off enable request")
157 |         
158 |         #If this is an alert we set the disabled flag to 0 if enabling
159 |         if isAlert:
160 |             payload["disabled"] = "0"
161 |         else:
162 |             #If it's a savedsearch we reenable the schedule
163 |             payload["is_scheduled"] = "1"
164 |         actionTaken = "enabled"
165 |     elif args.disableObj:
166 |         logging.debug("Firing off disable request")
167 |         #If this is an alert we set the disabled flag to 0 if enabling
168 |         if isAlert:
169 |             payload["disabled"] = "1"
170 |         else:
171 |             #If it's a savedsearch we reenable the schedule
172 |             payload["is_scheduled"] = "0"
173 |         
174 |         actionTaken = "disabled"
175 |     
176 |     if actionTaken:
177 |         logging.debug("Sending request to %s with username %s payload %s" % (url, args.destUsername, payload))
178 |         res = requests.post(url, auth=(args.destUsername,args.destPassword), verify=False, data=payload)
179 |         if (res.status_code != requests.codes.ok):
180 |             logger.error("URL %s status code %s reason %s, response '%s', in app %s" % (url, res.status_code, res.reason, res.text, args.destApp))
181 |             break
182 |         
183 |         logging.info("name %s sharing %s owner %s updated %s in app %s is now %s" % (name, sharing, owner, lastUpdated, args.destApp, actionTaken))
184 |     else:
185 |         logging.info("No enable/disable flags used, name %s sharing %s owner %s updated %s is in scope in app %s" % (name, sharing, owner, lastUpdated, args.destApp))
186 | 
187 | logging.info("Done")


--------------------------------------------------------------------------------
/bin/eventbreakerconfig.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import errno
  4 | import argparse
  5 | import logging
  6 | from logging.config import dictConfig
  7 | 
  8 | ###############################
  9 | #
 10 | # What does this script do?
 11 | #
 12 | ###############################
 13 | # Attempts to browse through a directory to find all props.conf files
 14 | # if a props.conf file is found then searches for a LINE_BREAKER
 15 | # if the LINE_BREAKER exists creates an EVENT_BREAKER version in the new finalOutputDir
 16 | # 
 17 | # Note that the directories are relative so if the props.conf is in /opt/splunk/etc/deployment-apps/Splunk_TA_windows/default/props.conf
 18 | # then the output dir is finalOutputDir/Splunk_TA_windows/default/props.conf
 19 | #
 20 | # Only the relevant stanzas are included that contain a LINE_BREAKER and every item found is renamed EVENT_BREAKER
 21 | logging_config = dict(
 22 |     version = 1,
 23 |     formatters = {
 24 |         'f': {'format':
 25 |               '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'}
 26 |         },
 27 |     handlers = {
 28 |         'h': {'class': 'logging.StreamHandler',
 29 |               'formatter': 'f',
 30 |               'level': logging.DEBUG},
 31 |         'file': {'class' : 'logging.handlers.RotatingFileHandler',
 32 |               'filename' : '/tmp/event_breaker_config.log',
 33 |               'formatter': 'f',
 34 |               'maxBytes' :  2097152,
 35 |               'level': logging.DEBUG}
 36 |         },        
 37 |     root = {
 38 |         'handlers': ['h','file'],
 39 |         'level': logging.DEBUG,
 40 |         },
 41 | )
 42 | 
 43 | dictConfig(logging_config)
 44 | 
 45 | logger = logging.getLogger()
 46 | 
 47 | parser = argparse.ArgumentParser(description='Based on a directory containing Splunk apps, extract out all LINE_BREAKER entries and re-create as EVENT_BREAKER, also find stanzas using SHOULD_LINEMERGE=false to ensure EVENT_BREAKER_ENABLE is output to the output directory. Outputs directories with the same name as the input directories')
 48 | parser.add_argument('-srcDir', help='Splunk application directory containing dirs with props.conf files', required=True)
 49 | parser.add_argument('-destDirRoot', help='The directory where the output directory names should be created, the subdirectories will have the same application names as found in the srcDir', required=True)
 50 | parser.add_argument('-debugMode', help='(optional) turn on DEBUG level logging (defaults to INFO)', action='store_true')
 51 | 
 52 | args = parser.parse_args()
 53 | 
 54 | if not args.debugMode:
 55 |     logging.getLogger().setLevel(logging.INFO)
 56 | 
 57 | path = args.srcDir
 58 | finalOutputDir = args.destDirRoot
 59 | 
 60 | #Empty dictionary of what we need
 61 | appDirsRequired = {}
 62 | 
 63 | #List only directories and not files under a particular directory
 64 | def listdirs(dir):
 65 |     return [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
 66 |     
 67 | dirList = listdirs(path)
 68 | 
 69 | #Determine which directories have props.conf files and then parse them later
 70 | for dir in dirList:
 71 |     #Expecting default/local at this level
 72 |     relativepath = path + "/" + dir
 73 |     subdir = listdirs(relativepath)
 74 |     for configDir in subdir:
 75 |         relativeconfigdir = relativepath + "/" + configDir
 76 |         files = os.listdir(relativeconfigdir)
 77 |         for file in files:
 78 |             if file=="props.conf":
 79 |                 if not appDirsRequired.has_key(relativepath):
 80 |                     appDirsRequired[relativepath] = []
 81 |                 appDirsRequired[relativepath].append(relativeconfigdir)
 82 |                 logger.debug("Adding file %s from dir %s" % (file, relativeconfigdir))
 83 | 
 84 | #We use these regex'es later to find the start of any [source type name] or LINE_BREAKER lines
 85 | regex1 = re.compile("^\s*(\[)(.*)")
 86 | regex2 = re.compile("^\s*LINE_BREAKER(.*)")
 87 | regex3 = re.compile("^\s*SHOULD_LINEMERGE\s*=\s*([^ \r\n]+)\s*")
 88 | regex4 = re.compile("[^ =\t]")
 89 | outputFiles = {}
 90 | 
 91 | #Cycle through the list that have props files so we can work on them
 92 | for adir in appDirsRequired.keys():
 93 |     subdirs = appDirsRequired[adir]
 94 |     
 95 |     #At this point we're dealing with the default or local directory
 96 |     for dir in subdirs:
 97 |         #if it was in the list we had a props.conf to deal with
 98 |         aFile = dir + "/props.conf"
 99 | 
100 |         logger.info("Open file %s" % (aFile))
101 | 
102 |         #open the file read only and work with it
103 |         with open(aFile) as file:
104 |             currentStanza = ""            
105 |             first = True
106 |             linebreakerfound = False
107 |             eventbreaker = False
108 |            
109 |             fulldir = ""
110 |            
111 |             for line in file:
112 |                 line=line.rstrip()
113 |                 #determine if this is a stanza entry
114 |                 result = regex1.match(line)
115 |                 if result != None:
116 |                     #It is possible we have gone through this loop before and we're now looking at a new stanza
117 |                     #it's possible we have some unfinished business, such as what if SHOULD_LINEMERGE=false but we didn't
118 |                     #have a LINE_BREAKER, this is harmless as EVENT_BREAKER has a default but it prints annoying messages such as:
119 |                     #INFO  ChunkedLBProcessor - Failed to find EVENT_BREAKER regex in props.conf for sourcetype::thesourcetypename. Reverting to the default EVENT_BREAKER regex for now 
120 |                     #to avoid this we explictly put in the EVENT_BREAKER if it isn't there
121 |                     if eventbreaker and not linebreakerfound:
122 |                         outputFiles[fulldir].append("EVENT_BREAKER = ([\\r\\n]+)")
123 |                     #this scenario should ideally not occur, but it happens so print a warning
124 |                     elif linebreakerfound and not eventbreaker:
125 |                         logger.warn("For %s in %s there was a LINE_BREAKER but SHOULD_LINEMERGE not set to false therefore it won't work as expected. Fix it!" % (currentStanza, fulldir))
126 | 
127 |                     #Add a newline to the end of the last stanza
128 |                     if currentStanza != "" and fulldir != "":
129 |                         outputFiles[fulldir].append("")                        
130 |                         
131 |                     stanza = result.group(1) + result.group(2)
132 |                     currentStanza = stanza
133 |                     logger.debug("Working with stanza %s in file %s" % (stanza, aFile))
134 |                     first = True
135 |                     linebreakerfound = False
136 |                     eventbreaker = False
137 |                     continue
138 |                 
139 |                 
140 |                 #If we find the LINE_BREAKER line then we need to include this stanza into the output files we're creating
141 |                 result = regex2.match(line)
142 |                 result2 = regex3.match(line)
143 |                 if result != None or result2 != None:
144 |                     #We know the directory looks something like
145 |                     #/opt/splunk/etc/deployment-apps/Splunk_TA_windows/default
146 |                     #We use the basename to determine if it's local or default
147 |                     #then we use dirname + basename to determine the app name above
148 |                     curbase = os.path.basename(dir)
149 |                     parent = os.path.dirname(dir)
150 |                     relativedir = os.path.basename(parent)
151 |  
152 |                     fulldir = relativedir + "/" + curbase
153 |                     
154 |                     #We keep a list of output lines per-file
155 |                     if not outputFiles.has_key(fulldir):
156 |                         outputFiles[fulldir] = []
157 |                     
158 |                     res = ""
159 |                     if result != None:
160 |                         linebreakerfound = True
161 |                         #We re-use the LINE_BREAKER stanza but we call it EVENT_BREAKER now
162 |                         #However due to bug SPL-159337 we remove non-capturing regexes as they are not supported
163 |                         eventbreakerstr = result.group(1).replace("?:","")
164 |                         #Note in some version of the Splunk forwarder not having a capture group seg faults the UF, no issues in modern versions such as 8.0.5
165 |                         result4 = regex4.search(eventbreakerstr)
166 |                         eventbreakerstr = eventbreakerstr[0:result4.start()] + eventbreakerstr[result4.start():]
167 |                         res = "EVENT_BREAKER " + eventbreakerstr
168 |                         #print "%s file EVENT_BREAKER %s\n" % (outputFiles[fulldir], eventbreakerstr)
169 |                     else:                        
170 |                         res = result2.group(1).lower()
171 |                         #if SHOULD_LINEMERGE = false then we can enable the event breaker, if not we cannot
172 |                         if res == "false" or res == "0":
173 |                             res = "EVENT_BREAKER_ENABLE = true"
174 |                             eventbreaker = True
175 |                         else:
176 |                             continue
177 |                     
178 |                     if first:
179 |                         outputFiles[fulldir].append("%s" % (currentStanza))
180 |                         first = False
181 |                                         
182 |                     outputFiles[fulldir].append("%s" % (res))                                          
183 |             
184 |             #it is possible there is only 1 stanza in the file so the file ends and we didn't get to write out the EVENT_BREAKER= line
185 |             if eventbreaker and not linebreakerfound:
186 |                 outputFiles[fulldir].append("EVENT_BREAKER = ([\\r\\n]+)")
187 |             #this scenario should ideally not occur, but it happens so print a warning
188 |             elif linebreakerfound and not eventbreaker:
189 |                 logger.warn("For %s in %s there was a LINE_BREAKER but SHOULD_LINEMERGE not set to false therefore it won't work as expected. Fix it!" % (currentStanza, fulldir))
190 | 
191 | #We now have a list of outputFiles that we can output
192 | for outputDir in outputFiles.keys():
193 |     #Debugging info
194 |     #print "%s contents %s" % (outputDir, outputFiles[outputDir])
195 |     
196 |     finaldir = finalOutputDir + "/" + outputDir
197 |     
198 |     #make the relative directory for example /tmp/Splunk_TA_windows/default or similar
199 |     logging.debug("Creating directory in %s" % (finaldir))
200 |     try:
201 |         os.makedirs(finaldir)
202 |     except OSError as e:
203 |         if e.errno != errno.EEXIST:
204 |             raise
205 |     
206 |     #output the props.conf and all lines that we have
207 |     outputH = open(finaldir + "/props.conf", "w")
208 |     for aLine in outputFiles[outputDir]:
209 |         outputH.write(aLine + "\n")
210 | 


--------------------------------------------------------------------------------
/bin/index_bucket_sizing.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import datetime
  3 | 
  4 | logger = logging.getLogger()
  5 | 
  6 | 
  7 | #Functions exlusive to bucket sizing
  8 | def run_bucket_sizing(utility, index_list, index_name_restriction, index_limit, num_hours_per_bucket, bucket_contingency, upper_comp_ratio_level,
  9 |     min_size_to_calculate, num_of_indexers, rep_factor_multiplier, do_not_lose_data_flag):
 10 | 
 11 |     todays_date = datetime.datetime.now().strftime("%Y-%m-%d")
 12 | 
 13 |     counter = 0
 14 |     auto_high_volume_sizeMB = 10240
 15 |     index_count = len(index_list)
 16 | 
 17 |     if index_limit < index_count:
 18 |         index_count = index_limit
 19 | 
 20 |     logger.info("Running queries to determine bucket sizing, licensing, et cetera")
 21 |     # Actually run the various Splunk query functions to find bucket sizing, compression ratios and license usage
 22 |     for index_name in list(index_list.keys()):
 23 |         # If we have a restriction on which indexes to look at, skip the loop until we hit our specific index name
 24 |         if index_name_restriction:
 25 |             if index_name != index_name_restriction:
 26 |                 continue
 27 |         # useful for manual runs
 28 |         logger.debug("%s iteration of %s within indexLoop" % (counter, index_count))
 29 | 
 30 |         # If we're performing a limited run quit the loop early when we reach the limit
 31 |         if counter > index_limit:
 32 |             break
 33 |         # function returns a float for recommended_bucket_size
 34 |         logger.info("index=%s, running determine_recommended_bucket_size" % (index_name))
 35 | 
 36 |         index_list[index_name].recommended_bucket_size = utility.determine_recommended_bucket_size(index_name, num_hours_per_bucket)
 37 |         # Add % bucket_contingency to bucket sizing
 38 |         index_list[index_name].recommended_bucket_size = index_list[index_name].recommended_bucket_size * bucket_contingency
 39 | 
 40 |         # If we have run the required checks on the index mark it True, otherwise do not, this is used later and relates to the limited index runs
 41 |         index_list[index_name].checked = True
 42 |         counter = counter + 1
 43 | 
 44 |     # Keep a dictionary of indexes requiring changes and conf files we need to output
 45 |     indexes_requiring_changes = {}
 46 |     conf_files_requiring_changes = []
 47 | 
 48 |     counter = 0
 49 |     logger.info("Now running bucket sizing calculations")
 50 |     for index_name in list(index_list.keys()):
 51 |         logger.debug("Working on index=%s with counter=%s" % (index_name, counter))
 52 |         # If we have a restriction on which indexes to look at, skip the loop until we hit our specific index name
 53 |         if index_name_restriction:
 54 |             if index_name != index_name_restriction:
 55 |                 continue
 56 | 
 57 |         counter = counter + 1
 58 |         # If we're performing a limited run quit the loop early when we reach the limit
 59 |         if counter > index_limit:
 60 |             break
 61 | 
 62 |         # Shorter names to the various index attributes
 63 |         max_hot_buckets = index_list[index_name].max_hot_buckets
 64 |         bucket_size = index_list[index_name].max_data_size
 65 |         conf_file = index_list[index_name].conf_file
 66 |         recommended_bucket_size = index_list[index_name].recommended_bucket_size
 67 |         index_list[index_name].number_recommended_bucket_size = index_list[index_name].recommended_bucket_size
 68 | 
 69 |         frozen_time_period_in_days = int(index_list[index_name].frozen_time_period_in_secs)/60/60/24
 70 |         avg_license_usage_per_day = index_list[index_name].avg_license_usage_per_day
 71 | 
 72 |         summary_index = index_list[index_name].summary_index
 73 | 
 74 |         # Company specific field here, the commented size per day in the indexes.conf file
 75 |         sizing_comment = -1
 76 |         if hasattr(index_list[index_name],"size_per_day_in_mb"):
 77 |             sizing_comment = int(index_list[index_name].size_per_day_in_mb)
 78 | 
 79 |         if not hasattr(index_list[index_name], "index_comp_ratio"):
 80 |             logger.warn("index=%s has no data on disk so unable to do any bucket sizing calculations" % (index_name))
 81 |             continue
 82 |         index_comp_ratio = index_list[index_name].index_comp_ratio
 83 |         splunk_max_disk_usage_mb = index_list[index_name].splunk_max_disk_usage_mb
 84 |         oldest_data_found = index_list[index_name].oldest_data_found
 85 |         max_total_data_size_mb = float(index_list[index_name].max_total_data_size_mb)
 86 | 
 87 |         # If the compression ratio is unusually large warn but continue for now
 88 |         if index_comp_ratio > upper_comp_ratio_level:
 89 |             logger.info("index=%s, returned index_compression_ratio=%s, this is above the expected max_index_compression_ratio=%s, "\
 90 |             "this may break calculations changing this to index_compression_ratio=%s" % (index_name, index_comp_ratio, upper_comp_ratio_level, upper_comp_ratio_level))
 91 |             index_comp_ratio = upper_comp_ratio_level
 92 | 
 93 |         # If we have a really, really small amount of data such as hundreds of kilobytes the metadata can be larger than the raw data resulting in a compression ratio of 500
 94 |         # (i.e. the stored size is 500 times larger on disk than it is in raw data, resulting in other calculations such as bucket sizing getting broken
 95 |         # the alternative size calculation is used for this reason, and if the data is too small to calculate we use an upper bound on the ratio as a safety
 96 |         if splunk_max_disk_usage_mb > min_size_to_calculate:
 97 |             # If the data is poorly parsed (e.g. dates go well into the past) then the MB/day might be greater than what appears via dbinspect
 98 |             # and therefore we might need to sanity check this based on license usage * storage ratio / number of indexers / (potential hot buckets)
 99 |             # we add contingency to this as well
100 |             alt_bucket_size_calc = ((index_list[index_name].max_license_usage_per_day * index_comp_ratio * rep_factor_multiplier) / num_of_indexers) / max_hot_buckets
101 |             alt_bucket_size_calc = alt_bucket_size_calc * bucket_contingency
102 | 
103 |             if alt_bucket_size_calc > recommended_bucket_size:
104 |                 logger.info("index=%s alternative_bucket_size_calculation=%s, original recommended_bucket_size=%s, new recommended_bucket_size=%s" % (index_name, alt_bucket_size_calc, recommended_bucket_size, alt_bucket_size_calc))
105 |                 recommended_bucket_size = alt_bucket_size_calc
106 |                 index_list[index_name].recommended_bucket_size = recommended_bucket_size
107 |                 index_list[index_name].number_recommended_bucket_size = recommended_bucket_size
108 |         else:
109 |             logger.info("index=%s had a comp_ratio=%s and a splunk_total_size=%s, this is less than the lower_bound=%s, not performing the alternative bucket size calculation, oldest_data_found=%s days old" % (index_name, index_comp_ratio, splunk_max_disk_usage_mb, min_size_to_calculate, oldest_data_found))
110 |         # We only change values where required, otherwise we output the line as we read it
111 |         requires_change = False
112 |         # If we didn't auto tune the bucket and it's a lot smaller or bigger than we change the values to the new numbers
113 |         if bucket_size.find("auto") == -1:
114 |             logger.warn("Not an auto sized bucket for index=" + index_name + " this index will be excluded from sizing")
115 |             continue
116 |         # It is an auto sized bucket, this makes it slightly different
117 |         logger.debug("index=%s auto sized bucket with bucket_size=%s" % (index_name, bucket_size))
118 |         end = bucket_size.find("_")
119 |         # With auto sized buckets we probably care more when the buckets are too small rather than too large (for now)
120 |         bucket_auto_size = float(bucket_size[0:end])
121 |         perc_diff = (100 / bucket_auto_size)*recommended_bucket_size
122 | 
123 |         # If the calculated size of the index usage is less than
124 |         # max_hot_buckets * bucket_size
125 |         # Do not give out free space by increasing the bucket sizing unless its a summary index
126 |         # So if calculation > calculated_size we do not increase to auto_high_volume
127 |         # if calculated_size < calculated_size we ensure we are using auto setting (not auto_high_volume)
128 |         # Edge case if sizing comment is specified
129 |         if sizing_comment >= 0:
130 |             # If we have a do not lose data flag use the larger of the average license usage or the sizing comment
131 |             if do_not_lose_data_flag:
132 |                 if avg_license_usage_per_day > sizing_comment:
133 |                     license_calc_size = avg_license_usage_per_day
134 |                 else:
135 |                     license_calc_size = sizing_comment
136 |             else:
137 |                 license_calc_size = sizing_comment
138 |         else:
139 |             license_calc_size = avg_license_usage_per_day
140 | 
141 |         decrease_required = False
142 |         increase_allowed = True
143 |         calculated_size = (index_comp_ratio * license_calc_size * frozen_time_period_in_days * rep_factor_multiplier)/num_of_indexers
144 |         if (max_hot_buckets * bucket_auto_size) > calculated_size and not summary_index and bucket_size == "10240_auto":
145 |             decrease_required = True
146 |             logger.debug("index=%s requires a bucket_decrease as it has a (max_hot_buckets=%s * bucket_auto_size=%s() > ((index_comp_ratio=%s * license_calc_size=%s" \
147 |                 " * frozen_time_period_in_days=%s * rep_factor_multiplier=%s) / num_of_indexers=%s) == %s"
148 |                 % (index_name, max_hot_buckets, bucket_auto_size, index_comp_ratio, license_calc_size, frozen_time_period_in_days,
149 |                 rep_factor_multiplier, num_of_indexers, calculated_size))
150 |         elif (max_hot_buckets * 10240) > calculated_size and not summary_index and bucket_size == "750_auto":
151 |             increase_allowed = False
152 |             logger.debug("index=%s lacks the license usage/agreed size for an increase (max_hot_buckets=%s * bucket_auto_size=%s) > ((index_comp_ratio=%s * license_calc_size=%s" \
153 |                 " * frozen_time_period_in_days=%s * rep_factor_multiplier=%s) / num_of_indexers=%s) == %s"
154 |                 % (index_name, max_hot_buckets, bucket_auto_size, index_comp_ratio, license_calc_size, frozen_time_period_in_days,
155 |                 rep_factor_multiplier, num_of_indexers, calculated_size))
156 | 
157 |         # If we expect to exceed the auto size in use, go to the auto_high_volume setting, assuming we are not already there
158 |         if perc_diff > 100 and not bucket_size == "10240_auto":
159 |             homepath_max_data_size_mb = index_list[index_name].homepath_max_data_size_mb
160 | 
161 |             #logger.debug("homepath_max_data_size_mb=%s and auto_high_volume_sizeMB * max_hot_buckets calc=%s and max_total_data_size_mb=%s"
162 |             #    % (homepath_max_data_size_mb, auto_high_volume_sizeMB * max_hot_buckets, max_total_data_size_mb))
163 |             #if homepath_max_data_size_mb != 0.0 and (auto_high_volume_sizeMB * max_hot_buckets) > homepath_max_data_size_mb:
164 |             #    logger.warn("index=%s would require an auto_high_volume (10GB) bucket but the homepath_max_data_size_mb=%s "\
165 |             #                "cannot fit max_hot_buckets=%s of that size, not changing the bucket sizing" % (index_name, homepath_max_data_size_mb, max_hot_buckets))
166 |             #elif homepath_max_data_size_mb == 0.0 and (auto_high_volume_sizeMB * max_hot_buckets) > max_total_data_size_mb:
167 |             #    logger.warn("index=%s would require an auto_high_volume (10GB) bucket but the max_total_data_size_mb=%s "\
168 |             #                "cannot fit max_hot_buckets=%s buckets of that size, not changing the bucket sizing" % (index_name, max_total_data_size_mb, max_hot_buckets))
169 |             if not increase_allowed:
170 |                 logger.warn("index=%s would require an auto_high_volume (10GB) bucket but it lacks the license usage/agreed size for an increase based on the calculation"  \
171 |                     "(max_hot_buckets=%s * bucket_size=%s) > ((index_comp_ratio=%s * license_calc_size=%s * frozen_time_period_in_days=%s * rep_factor_multiplier=%s)" \
172 |                     " / num_of_indexers=%s)"
173 |                     % (index_name, max_hot_buckets, bucket_size, index_comp_ratio, license_calc_size, frozen_time_period_in_days,
174 |                     rep_factor_multiplier, num_of_indexers))
175 |             else:
176 |                 requires_change = "bucket"
177 |                 # If we don't have any change comments so far create the dictionary
178 |                 if not hasattr(index_list[index_name], "change_comment"):
179 |                     index_list[index_name].change_comment = {}
180 |                 # Write comments into the output files so we know what tuning occured and when
181 |                 index_list[index_name].change_comment['bucket'] = "# Bucket size increase required estimated %s, auto-tuned on %s\n" % (index_list[index_name].number_recommended_bucket_size, todays_date)
182 |                 # Simplify to auto_high_volume
183 |                 index_list[index_name].recommended_bucket_size = "auto_high_volume"
184 |                 # Update index object so index tuning is aware of this change
185 |                 index_list[index_name].max_data_size = "10240_auto"
186 |                 logger.info("index=%s file=%s current bucket size is auto tuned maxDataSize=%s, recommended_bucket_size=%s "\
187 |                 "(will be set to auto_high_volume (size increase)), max_hot_buckets=%s" % (index_name, conf_file, bucket_size, recommended_bucket_size, max_hot_buckets))
188 |         else:
189 |             # Bucket is smaller than current sizing, is it below the auto 750MB default or not, and is it currently set to a larger value?
190 |             if (recommended_bucket_size < 750 and bucket_auto_size > 750) or decrease_required:
191 |                 requires_change = "bucket"
192 |                 # If we don't have any change comments so far create the dictionary
193 |                 if not hasattr(index_list[index_name],"change_comment"):
194 |                     index_list[index_name].change_comment = {}
195 | 
196 |                 # Write comments into the output files so we know what tuning occured and when
197 |                 index_list[index_name].change_comment['bucket'] = "# Bucket size decrease required estimated %s, auto-tuned on %s\n" % (index_list[index_name].number_recommended_bucket_size, todays_date)
198 |                 index_list[index_name].recommended_bucket_size = "auto"
199 |                 # Update index object so index tuning is aware of this change
200 |                 index_list[index_name].max_data_size = "750_auto"
201 |                 if decrease_required:
202 |                     logger.info("index=%s file=%s current bucket size is auto tuned to maxDataSize=%s, recommended_bucket_size=%s "\
203 |                         "(will be set to auto (size decrease)), max_hot_buckets=%s this was decreased due to not fitting auto_high_volume into " \
204 |                         "the allocated index space"
205 |                         % (index_name, conf_file, bucket_size, recommended_bucket_size, max_hot_buckets))
206 |                 else:
207 |                     logger.info("index=%s file=%s current bucket size is auto tuned to maxDataSize=%s, recommended_bucket_size=%s "\
208 |                         "(will be set to auto (size decrease)), max_hot_buckets=%s" % (index_name, conf_file, bucket_size, recommended_bucket_size, max_hot_buckets))
209 | 
210 |         # If this index requires change we record this for later
211 |         if requires_change != False:
212 |             indexes_requiring_changes[index_name] = requires_change
213 |             logger.debug("index=%s requires changes of change_type=%s" % (index_name, requires_change))
214 | 
215 |             # Add the conf file to the list we need to work on
216 |             if conf_file not in conf_files_requiring_changes:
217 |                 conf_files_requiring_changes.append(conf_file)
218 |                 logger.debug("index=%s resulted in file=%s added to change list" % (index_name, conf_file))
219 | 
220 |     return indexes_requiring_changes, conf_files_requiring_changes
221 | 


--------------------------------------------------------------------------------
/bin/index_tuning_presteps.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | logger = logging.getLogger()
 4 | 
 5 | 
 6 | # Shared functions required by both index tuning/sizing & bucket tuning/sizing
 7 | def index_tuning_presteps(utility, index_list, index_ignore_list, earliest_license, latest_license, index_name_restriction, index_limit, indexerhostnamefilter, useIntrospectionData, indexes_not_getting_sized):
 8 |     logger.info("Running index_tuning_presteps")
 9 |     conf_files_to_check = {}
10 |     for index in list(index_list.keys()):
11 |         conf_file = index_list[index].conf_file
12 |         # ignore known system files that we should not touch
13 |         # TODO default to using a local file equivalent for non-system directories
14 |         if conf_file.find("/etc/system/default/") == -1 and conf_file.find("_cluster/default/") == -1:
15 |             conf_files_to_check[conf_file] = True
16 | 
17 |     logger.debug("conf_files_to_check=\"%s\"" % (conf_files_to_check))
18 | 
19 |     # parse all the conf files and look for comments about sizing (as this overrides settings later in the code)
20 |     logger.info("Running parse_conf_files_for_sizing_comments()")
21 |     # This just updates the index_list dictionary with new data
22 |     utility.parse_conf_files_for_sizing_comments(index_list, conf_files_to_check)
23 | 
24 |     counter = 0
25 |     index_count = len(index_list)
26 | 
27 |     if index_limit < index_count:
28 |         index_count = index_limit
29 | 
30 |     for index_name in list(index_list.keys()):
31 |         # If we have a restriction on which indexes to look at, skip the loop until we hit our specific index name
32 |         if index_name_restriction:
33 |             if index_name != index_name_restriction:
34 |                 continue
35 | 
36 |         # useful for manual runs
37 |         logger.debug("iteration_count=%s of iteration_count=%s within loop" % (counter, index_count))
38 | 
39 |         # If we're performing a limited run quit the loop early when we reach the limit
40 |         if counter > index_limit:
41 |             break
42 | 
43 |         # Actually check license usage per index over the past X days, function returns three ints
44 |         logger.info("index=%s running determine_license_usage_per_day" % (index_name))
45 |         index_list[index_name].avg_license_usage_per_day, index_list[index_name].first_seen, index_list[index_name].max_license_usage_per_day = \
46 |             utility.determine_license_usage_per_day(index_name, earliest_license, latest_license)
47 | 
48 |         # Determine compression ratio of each index, function returns floats, index_comp_ratio is re-used during index sizing so required by both index sizing
49 |         # and bucket sizing scenarios
50 |         logger.info("index=%s running determine_compression_ratio" % (index_name))
51 |         index_list[index_name].index_comp_ratio, index_list[index_name].splunk_max_disk_usage_mb, index_list[index_name].oldest_data_found, index_list[index_name].newest_data_found = \
52 |             utility.determine_compression_ratio(index_name, indexerhostnamefilter, useIntrospectionData)
53 | 
54 |         index_list[index_name].summary_index = False
55 |         avg_license_usage_per_day = index_list[index_name].avg_license_usage_per_day
56 |         # Zero license usage, this could be a summary index
57 |         if avg_license_usage_per_day == 0:
58 |             json_result = utility.run_search_query("| metadata index=%s type=sourcetypes | table sourcetype" % (index_name))
59 |             # Load the result so that it's formatted into a dictionary instead of string
60 | 
61 |             # If we get no results back assume its not a summary index
62 |             if "results" in json_result and len(json_result["results"]) == 1 and json_result["results"][0]["sourcetype"] == "stash":
63 |                 # At this point we know its a summary index
64 |                 # So we can use the average growth rate to determine if any sizing changes are required
65 |                 json_result = utility.run_search_query(""" search index=_introspection \"data.name\"=\"%s\"
66 |                 | bin _time span=1d
67 |                 | stats max(data.total_size) AS total_size by host, _time
68 |                 | streamstats current=f window=1 max(total_size) AS prev_total by host
69 |                 | eval diff=total_size - prev_total
70 |                 | stats avg(diff) AS avgchange by host
71 |                 | stats avg(avgchange) AS overallavg""" % (index_name))
72 | 
73 |                 # Load the result so that it's formatted into a dictionary instead of string
74 |                 if "results" in json_result and len(json_result["results"]) == 1:
75 |                     summary_usage_change_per_day = float(json_result["results"][0]["overallavg"])
76 |                     logger.info("index=%s is a summary index, average_change_per_day=%s from introspection logs" % (index_name, summary_usage_change_per_day))
77 |                     index_list[index_name].summary_usage_change_per_day = summary_usage_change_per_day
78 |                 index_list[index_name].summary_index = True
79 | 
80 |         counter = counter + 1
81 | 
82 |     # At this point we have indexes that we are supposed to ignore in the dictionary, we need them there so we could
83 |     # ensure that we didn't suggest deleting them from the filesystem, however now we can ignore them so we do not
84 |     # attempt to re-size the indexes with no license info available
85 |     logger.debug("The following indexes will be ignored as per configuration index_ignore_list=\"%s\"" % (index_ignore_list))
86 |     for index in index_ignore_list:
87 |         if index in index_list:
88 |             indexes_not_getting_sized[index] = index_list[index]
89 |             del index_list[index]
90 |             logger.debug("Removing index=\"%s\" from index_list" % (index))
91 | 
92 |     #Metric indexes are excluded from tuning at this stage
93 |     #for index_name in list(index_list.keys()):
94 |     #    datatype = index_list[index_name].datatype
95 |     #    if datatype != 'event':
96 |     #        logger.info("index=%s is excluded from tuning due to not been of type events, type=%s" % (index_name, datatype))
97 |     #        indexes_not_getting_sized[index_name] = index_list[index_name]
98 |     #        del index_list[index_name]
99 | 


--------------------------------------------------------------------------------
/bin/indextransfer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | #Note this script *will* need modifications to be used for another purpose
 4 | #however this need to transfer files with resume functionality via shell script may occur again so keeping this!
 5 | #Note that SSH keys were used for the rsync transfers, so the servers in question has access to the remote server
 6 | if [ $# -lt 1 ]; then
 7 |     echo "Error need index argument, and optional bandwidth limit argument"
 8 |     exit 1
 9 | fi
10 | 
11 | indexname=$1
12 | 
13 | #Limit at which to throttle the rsync transfer
14 | bandwidthlimit="4096"
15 | if [ $# -gt 1 ]; then
16 |   bandwidthlimit=$2
17 | fi
18 | 
19 | #Force SSH key to be added to known_hosts
20 | successFile=/tmp/successfultransferlist${indexname}.txt
21 | tmpFile=/tmp/tmpfile${indexname}.txt
22 | 
23 | hotSourceVol=FIXME
24 | coldSourceVol=FIXME
25 | destVol=FIXME
26 | host=FIXME
27 | epochTime=FIXME
28 | 
29 | #Get a list of files to transfer
30 | find ${hotSourceVol}/${indexname}/ -name "[dr]b*" | grep -v \.rbsentinel | grep -vE "/db$" | grep -v grep > $tmpFile 
31 | find ${coldSourceVol}/${indexname}/ -name "[dr]b*"  | grep -v \.rbsentinel | grep -vE "/colddb$" | grep -v grep  >> $tmpFile 
32 | 
33 | #This could be done without so many mv commands but that involves checking the manuals and this is quick!
34 | if [ -s $successFile ]; then
35 |     sort $successFile > ${successFile}.sorted
36 |     mv ${successFile}.sorted $successFile 
37 |     sort $tmpFile > ${tmpFile}.sorted
38 |     mv ${tmpFile}.sorted $tmpFile 
39 |     comm -23 $tmpFile $successFile > ${tmpFile}.2
40 |     mv ${tmpFile}.2 $tmpFile 
41 | fi
42 | 
43 | #Force SSH key to be added to known_hosts
44 | ssh -n -o "BatchMode yes" -o StrictHostKeyChecking=no -o ServerAliveInterval=30 -o ConnectTimeout=10 splunk@$desthost
45 | 
46 | destDir=${destVol}/${indexname}/`echo $i | grep -Eo "(hot|cold)\/[^/]+" | cut -d "/" -f2`/hot
47 | ssh splunk@$desthost "mkdir -p $destDir"
48 | for i in `cat $tmpFile`; do
49 |     time=`echo $i | cut -d "/" -f6 | cut -d "_" -f3`
50 |     if (( $time > epochTime )); then
51 |         echo "`date +"%Y-%m-%d %H:%M:%S"` Dir $i is in scope for transfer, begin transfer"
52 |         rsync -r -p -t -g --delete --exclude=.snapshots --bwlimit=$bandwidthlimit $i splunk@$desthost:$destDir
53 |         if [ $? -eq 0 ]; then
54 |             echo "`date +"%Y-%m-%d %H:%M:%S"` Dir $i transfer completed"
55 |             echo $i >> $successFile 
56 |         fi
57 |     fi
58 | done
59 | 
60 | 


--------------------------------------------------------------------------------
/bin/indextuning.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import re
  3 | import getpass
  4 | import sys
  5 | import os
  6 | import indextuning_utility as idx_utility
  7 | import indextuning_indextempoutput
  8 | import indextuning_dirchecker
  9 | import datetime
 10 | import shutil
 11 | import argparse
 12 | import logging
 13 | from logging.config import dictConfig
 14 | import requests
 15 | from requests.auth import HTTPBasicAuth
 16 | from index_bucket_sizing import run_bucket_sizing
 17 | from index_tuning_presteps import index_tuning_presteps
 18 | from indextuning_index_tuning import run_index_sizing
 19 | 
 20 | """
 21 | What does this script do?
 22 | 
 23 |  Attempts to use the output of the command:
 24 |  splunk btool indexes list --debug
 25 |  To determine all indexes configured on this Splunk indexer and then provides tuning based on the previously seen license usage & bucket sizing
 26 |  in addition this script provides a list of directories which are located in the same directory as hot, cold or tstatsHomePath and if there is no matching
 27 |  index configuration suggests deletion via an output file
 28 | 
 29 |  The script takes no input, the script has multiple output options:
 30 |  * output example index file that can be diffed
 31 |  * create a git-based change
 32 |  * create a git-based change and create a merge request in gitlab
 33 | 
 34 |  In more depth:
 35 |  Index bucket sizing (max_data_size setting)
 36 |    * Based on output of dbinspect command determining how many hours worth of data will fit into a bucket
 37 |    * Based on target number of hours determines if the bucket sizing is currently appropriate
 38 |    * Based on recent license usage, compression ratio, number of indexers/buckets approx if the license usage seems reasonable or not as a sanity check
 39 |    *   the above sanity check can cause issues *if* a very small amount of data exists in the index, resulting in compression ratios of 500:1 (500 times larger on disk)
 40 |    *   min_size_to_calculate exists to prevent this from happening
 41 |    *   upper_comp_ratio_level provides an additional safety here to cap the maximum found compression ratio
 42 |    * If bucket size is auto_high_volume, and tuned size*contingency is less than 750MB, then auto will be suggested
 43 |    * If bucket size is auto tuned, auto_high_volume will be suggested if tuned size * contingency is larger than 750MB
 44 |    * If bucket was using a size-based max_data_size then the new value will be suggested (not recommended, auto/auto_high_volume are usually fine)
 45 | 
 46 |  Index sizing (max_total_data_size_mb)
 47 |    * Based on the last X days of license usage (configurable) and the storage compression ratio based on the most recent introspection data
 48 |      and multiplied by the number of days before frozen, multiplied by contingency, rep factor multiplier and divided by number of indexers to determine value per indexer
 49 |    * If the index has recent license usage *but* it does not match the minimum number of days we require for sizing purposes we do not size the index
 50 |      this covers, for example an index that has 2 days of license usage and it's too early to correctly re-size it
 51 |    * If the index has no data on the filesystem no tuning is done but this is recorded in the log (either a new index or an unused index)
 52 |    * If the index has zero recent license usage over the measuring period *and* it does not have a sizing comment then we cap it at the
 53 |      current maximum size * contingency with a lower limit of lower_index_size_limit, this parameter prevents bucket explosion from undersizing indexes
 54 |    * If the index has zero recent license usage over the measuring period *and* it does have a sizing comment then we do not change it
 55 |    * If the index has recent license usage and it is oversized, in excess of the perc_before_adjustment then we adjust (oversized index)
 56 |    * If the index has recent license usage and it is undersized (including a % contingency we add), we adjust (undersized index) currently we do this even if sizing comments exist
 57 |      note that this particular scenario can result in the index becoming larger than the original sizing comment, it is assumed that data loss should be avoided by the script
 58 |    * Finally if oversized we sanity check that we are not dropping below the largest on-disk size * contingency on any indexer as that would result in deletion of data
 59 | 
 60 |  sizingEstimates mode
 61 |     * Checking max_total_data_size_mb of each index, and the size of the volumes set in Splunk, determine if we have over-allocated based
 62 |       on the worst case usage scenario that all max_total_data_size_mb is in use
 63 |     * If index tuning is occurring and this switch is passed a more accurate estimate of index sizing is provided
 64 | 
 65 |  deadIndexCheck mode
 66 |     * Check the relevant Splunk directories for index storage and determine if there are directories in these locations that no longer map to an index stanza in the config
 67 |     this commonly happens when an index is renamed leaving extra directories on the filesystem that will never delete themselves
 68 |     * Unless the deadIndexDelete flag is also passed in this will not actually delete any files, it will provide a listing of what appears to be extra files
 69 | 
 70 |  Indexes datatype can be event or metric, if metrics we do the same tuning but we avoid data loss as the chance of accidentally flooding a metrics index with data
 71 |  is very low...dbinspect still works along with other queries, and as of 7.3+ the license usage is now limited to 150 bytes/metric (it can be less now)
 72 | """
 73 | 
 74 | #In addition to console output we write the important info into a log file, console output for debugging purposes only
 75 | output_log_file = "/tmp/indextuning.log"
 76 | 
 77 | app_hosting_exclusion_list = 'monitoring'
 78 | 
 79 | logging_config = dict(
 80 |     version = 1,
 81 |     formatters = {
 82 |         'f': {'format':
 83 |               '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'}
 84 |         },
 85 |     handlers = {
 86 |         'h': {'class': 'logging.StreamHandler',
 87 |               'formatter': 'f',
 88 |               'level': logging.DEBUG},
 89 |         'file': {'class' : 'logging.handlers.RotatingFileHandler',
 90 |               'filename' : output_log_file,
 91 |               'formatter': 'f',
 92 |               'maxBytes' :  2097152,
 93 |               'level': logging.DEBUG,
 94 |               'backupCount': 5 }
 95 |         },
 96 |     root = {
 97 |         'handlers': ['h','file'],
 98 |         'level': logging.DEBUG,
 99 |         },
100 | )
101 | 
102 | dictConfig(logging_config)
103 | 
104 | logger = logging.getLogger(__name__)
105 | 
106 | # Create the argument parser
107 | parser = argparse.ArgumentParser(description="Re-tune the indexes.conf files of the current indexer based on " \
108 |     " passed in tuning parameters and an estimation of what is required")
109 | 
110 | # Useful for testing only, should be set to unlimited by default, limit setting
111 | parser.add_argument('-indexLimit', help='Number of indexes to run through before tuning is stopped at that point in time', default=9999, type=int)
112 | parser.add_argument('-destURL', help='URL of the REST/API port of the Splunk instance, https://localhost:8089/ for example', default="localhost:8089")
113 | 
114 | # How far to go back in queries related to bucket information
115 | parser.add_argument('-oldest_data_found', help='Earliest point in time to find bucket information', default="-16d")
116 | 
117 | # Earliest time is 14 days ago and we start at midnight to ensure we get whole days worth of license usage
118 | parser.add_argument('-earliest_license', help='Earliest point in time to find license details (use @d to ensure this starts at midnight)', default="-30d@d")
119 | parser.add_argument('-latest_license', help='Latest point in time to find license details (use @d to ensure this ends at midnight)', default="@d")
120 | parser.add_argument('-numberOfIndexers', help='Number of indexers the tuning should be based on', default="6", type=int)
121 | 
122 | # For bucket tuning, aim for 24 hours of data per bucket for now, we add contingency to this anyway
123 | parser.add_argument('-num_hours_per_bucket', help='Aim for approximate number of hours per bucket (not including contingency)', default="24")
124 | 
125 | # Add 20% contingency to the result for buckets
126 | parser.add_argument('-bucket_contingency', help='Contingency multiplier for buckets', default=1.2, type=float)
127 | 
128 | # Add 25% contingency to the result for index sizing purposes
129 | parser.add_argument('-sizing_continency', help='Contingency multiplier for index sizing', default=1.25, type=float)
130 | 
131 | # What % do we leave spare in our indexes before we increase the size? 20% for now
132 | # Note an additional safety that checks the max on disk size per index can also override this
133 | # By having the sizing_continency larger than this setting we hope that re-sizing will rarely occur once increased correctly
134 | # as it would have to change quite a bit from the average...
135 | parser.add_argument('-undersizing_continency', help='Contingency multiplier before an index is increased (1.2 is if the index is undersized by less than 20 perc. do nothing)', default=1.2, type=float)
136 | 
137 | # What host= filter do we use to find index on-disk sizing information for the compression ratio?
138 | parser.add_argument('-indexerhostnamefilter', help='Host names for the indexer servers for determining size on disk queries', default="pucq-sp*-*")
139 | 
140 | # What's the smallest index size we should go to, to avoid bucket explosion? At least max_hot_buckets*max_data_size + some room? Hardcoded for now
141 | parser.add_argument('-lower_index_size_limit', help='Minimum index size limit to avoid bucket explosion', default=3000, type=int)
142 | 
143 | # What's the minimum MB we can leave for a bucket in cold for a really small index?
144 | parser.add_argument('-smallbucket_size', help='Minimum cold section size for a small bucket', default=800, type=int)
145 | 
146 | # If the index is oversized by more than this % then adjustments will occur
147 | # this prevents the script from making small adjustments to index sizing for no reason, it has to be more than 20% oversized
148 | # if we utilise 0.8 as the perc_before_adjustment
149 | # If undersized we always attempt to adjust immediately
150 | parser.add_argument('-perc_before_adjustment', help='Multiplier for downsizing an oversized index (0.8 means that the index must be more than 20 perc. oversized for any adjustment to occur)', default=0.8, type=float)
151 | 
152 | # Path to store temporary files such as the indexes.conf outputs while this is running
153 | parser.add_argument('-workingPath', help='Path used for temporary storage of index.conf output', default="/tmp/indextuningtemp")
154 | 
155 | # Number of days of license data required for sizing to occur, if we don't have this many days available do not attempt to size
156 | parser.add_argument('-min_days_of_license_for_sizing', help="Number of days of license data required for sizing to occur, " \
157 |     "if we don't have this many days available do not attempt to resize", default=15, type=int)
158 | 
159 | # How many MB of data should we have before we even attempt to calculate bucket sizing on a per indexer basis?
160 | # where a number too small results in a giant compression ratio where stored size > raw data
161 | parser.add_argument('-min_size_to_calculate', help="Minimum bucket size before calculation is attempted", default=100.0, type=float)
162 | 
163 | # Exclude default directories from /opt/splunk/var/lib/splunk and should not be deleted as they won't exist in the indexes.conf files
164 | parser.add_argument('-excludedDirs', help="List of default directories to exclude from been listed in "\
165 |     " the deletion section", default='kvstore,.snapshots,lost+found,authDb,hashDb,persistentstorage,fishbucket,main,$_index_name')
166 | 
167 | # If the compression ratio is above this level we throw a warning
168 | parser.add_argument('-upper_comp_ratio_level', help="Comp ratio limit where a warning in thrown rather than calculation "\
169 |     "done on the bucket sizing if this limit is exceeded", default=6.0, type=float)
170 | 
171 | # Multiply license usage by this multiplier to take into account index replication
172 | parser.add_argument('-rep_factor_multiplier', help='Multiply by rep factor (for example if 2 copies of raw/indexed data, 2.0 works', default=2.0, type=float)
173 | 
174 | parser.add_argument('-debugMode', help='(optional) turn on DEBUG level logging (defaults to INFO)', action='store_true')
175 | parser.add_argument('-username', help='Username to login to the remote Splunk instance with', required=True)
176 | parser.add_argument('-password', help='Password to login to the remote Splunk instance with', required=True)
177 | parser.add_argument('-indexNameRestriction', help='List of index names to run against (defaults to all indexes)')
178 | parser.add_argument('-deadIndexCheck', help='Only use the utility to run the dead index check, no index tuning required', action='store_true')
179 | parser.add_argument('-deadIndexDelete', help='After running the dead index check perform an rm -R on the directories "\
180 |     "which appear to be no longer in use, use with caution', action='store_true')
181 | parser.add_argument('-do_not_lose_data_flag', help='By default the index sizing estimate overrides any attempt to prevent data loss, "\
182 |     "use this switch to provide extra storage to prevent data loss (free storage!)', action='store_true')
183 | parser.add_argument('-sizingEstimates', help='Only run sizing estimates (do not resize indexes)', action='store_true')
184 | parser.add_argument('-indexTuning', help='Run index tuning & bucket tuning (resize indexes + buckets)', action='store_true')
185 | parser.add_argument('-bucketTuning', help='Only run bucket tuning (resize buckets only)', action='store_true')
186 | parser.add_argument('-indexSizing', help='Only run index tuning (resize indexes only)', action='store_true')
187 | parser.add_argument('-all', help='Run index sizing and sizing estimates, along with a list of unused indexes', action='store_true')
188 | parser.add_argument('-useIntrospectionData', help='Use introspection data rather than the REST API data for index bucket calculations, "\
189 |      "slightly faster but earliest time may not be 100 percent accurate (data is likely older than logged)', action='store_true')
190 | parser.add_argument('-workWithGit', help='Check changes back into git', action='store_true')
191 | parser.add_argument('-gitWorkingDir', help='Directory to perform git clone / pull / commits in', default="/tmp/indextuning_git_temp")
192 | parser.add_argument('-gitFirstConnection', help='Run an SSH command to add the git repo to the list of trusted SSH fingerprints')
193 | parser.add_argument('-gitRepoURL', help='URL of the git repository (using an SSH key)')
194 | parser.add_argument('-gitBranch', help='git branch prefix to use to create to send to remote repo', default="indextuning")
195 | parser.add_argument('-gitRoot', help='What is the directory under the git repository where the master-apps directory sits?', default="/master-apps")
196 | parser.add_argument('-gitLabToken', help='The gitLab private token or access token, if supplied a merge request is created using this token')
197 | parser.add_argument('-gitLabURL', help='URL of the remote gitlab server to work with', default="https://localhost/api/v4/projects/1/merge_requests")
198 | parser.add_argument('-outputTempFilesWithTuning', help='Output files into the working path with tuning results (optional)', action='store_true')
199 | parser.add_argument('-no_sizing_comments', help='Do not add auto-sizing comments on entries that do not have a commented size (optional)', action='store_true')
200 | 
201 | # Skip indexes where the size on disk is greater than the estimated size (i.e. those who have serious balance issues or are exceeding usage expectations)
202 | parser.add_argument('-skipProblemIndexes', help="Skip re-sizing attempts on the index size for any index perceived as a problem "\
203 |     "(for example using more disk than expected or similar)", action='store_true')
204 | 
205 | # helper function as per https://stackoverflow.com/questions/31433989/return-copy-of-dictionary-excluding-specified-keys
206 | def without_keys(d, keys):
207 |     return {x: d[x] for x in d if x not in keys}
208 | 
209 | """
210 |  Initial setup
211 | """
212 | 
213 | args = parser.parse_args()
214 | 
215 | # If we want debugMode, keep the debug logging, otherwise drop back to INFO level
216 | if not args.debugMode:
217 |     logging.getLogger().setLevel(logging.INFO)
218 | 
219 | args.excludedDirs = args.excludedDirs.split(",")
220 | 
221 | if args.all:
222 |     args.sizingEstimates = True
223 |     args.indexTuning = True
224 |     args.deadIndexCheck = True
225 | 
226 | if args.indexTuning:
227 |     args.bucketTuning = True
228 |     args.indexSizing = True
229 | 
230 | # index_tuning_exclusion_list populating option
231 | # | makeresults | eval index="_internal,_audit,_telemetry,_thefishbucket,_introspection,history,default,splunklogger,notable_summary,ioc,threat_activity,endpoint_summary,whois,notable,risk,cim_modactions,cim_summary,xtreme_contexts" | makemv delim="," index | mvexpand index | table index | outputlookup index_tuning_exclusion_list
232 | if args.bucketTuning or args.indexSizing:
233 |     index_ignore_url = 'https://' + args.destURL + '/servicesNS/nobody/' + app_hosting_exclusion_list + '/storage/collections/data/index_tuning_exclusion_list'
234 |     logger.debug("Attempting to obtain index ignore list via rest call to url=%s" % (index_ignore_url))
235 |     res = requests.get(index_ignore_url, verify=False, auth=HTTPBasicAuth(args.username, args.password))
236 | 
237 |     if res.status_code != requests.codes.ok:
238 |         logger.fatal("Failure to obtain index_ignore_list list on url=%s in status_code=%s reason=%s response=\"%s\"" % (index_ignore_url, res.status_code, res.reason, res.text))
239 |         sys.exit(-1)
240 | 
241 |     logger.debug("index ignore list response=\"%s\"" % (res.text))
242 |     index_ignore_json = res.json()
243 |     if len(index_ignore_json) != 0:
244 |         index_ignore_list = [ item['index'] for item in index_ignore_json ]
245 |         logger.info("index=%s added to ignore list for tuning purposes" % (index_ignore_list))
246 |     else:
247 |         logger.warn("Found no indexes to add to ignore list for tuning purposes, this normally indicates a problem that should be checked")
248 | 
249 | # Setup the utility class with the username, password and URL we have available
250 | logger.debug("Creating utility object with username=%s, destURL=%s, oldest_data_found=%s" % (args.username, args.destURL, args.oldest_data_found))
251 | utility = idx_utility.utility(args.username, args.password, args.destURL, args.oldest_data_found)
252 | 
253 | # Determine the index/volume list we are working with
254 | index_list, vol_list = utility.parse_btool_output()
255 | 
256 | # We only care about unique dirs to check and unique directories that are unused
257 | dead_index_dir_list = {}
258 | dead_summary_dir_list = {}
259 | 
260 | # Cleanup previous runs
261 | if os.path.isdir(args.workingPath):
262 |     logger.debug("Deleting old dir=%s after previous run" % (args.workingPath))
263 |     shutil.rmtree(args.workingPath)
264 | 
265 | excludedList = [ "password", "gitLabToken" ]
266 | clean_args = without_keys(vars(args), excludedList)
267 | logger.info("Index sizing script running with args=\"%s\"" % (clean_args))
268 | 
269 | """
270 |  Check for directories on the filesystem that do not appear to be related to any index
271 | 
272 |  Determine locations that exist on the filesystem, and if they don't exist in the list from the btool output
273 |  they are likely dead index directories from indexes that were once here but now removed
274 |  TODO this could be a function and moved outside the main code
275 |  Note this is here because later we modify the index_list to remove ignored indexes
276 |  where now we want them in the list to ensure we do not attempt to delete in use directories!
277 | 
278 |  the checkdirs function returns the top level directories from the indexes.conf which we should be checking, for example /opt/splunk/var/lib/splunk
279 |  Should not be passing around objects but this will do for now
280 | """
281 | index_dir_check_res = False
282 | if args.deadIndexCheck:
283 |     index_dir_check_res = indextuning_dirchecker.check_for_dead_dirs(index_list, vol_list, args.excludedDirs, utility)
284 | 
285 | # We need to ignore these indexes for re-sizing purposes
286 | # but we need the details of these indexes for sizing estimates done later...
287 | indexes_not_getting_sized = {}
288 | 
289 | """
290 |  Begin main logic section
291 | """
292 | # We may or may not want to run the entire complex tuning script
293 | if args.bucketTuning or args.indexSizing:
294 |     index_tuning_presteps(utility, index_list, index_ignore_list, args.earliest_license, args.latest_license, args.indexNameRestriction, args.indexLimit, args.indexerhostnamefilter, args.useIntrospectionData, indexes_not_getting_sized)
295 | 
296 |     indexes_requiring_changes = {}
297 |     conf_files_requiring_changes = []
298 | 
299 |     if args.bucketTuning:
300 |         (indexes_requiring_changes, conf_files_requiring_changes) = run_bucket_sizing(utility, index_list, args.indexNameRestriction, args.indexLimit, args.num_hours_per_bucket,
301 |         args.bucket_contingency, args.upper_comp_ratio_level, args.min_size_to_calculate, args.numberOfIndexers, args.rep_factor_multiplier, args.do_not_lose_data_flag)
302 | 
303 |     if args.indexSizing:
304 |         (conf_files_requiring_changes, indexes_requiring_changes, calculated_size_total) = run_index_sizing(utility, index_list, args.indexNameRestriction, args.indexLimit,
305 |         args.numberOfIndexers, args.lower_index_size_limit, args.sizing_continency, args.min_days_of_license_for_sizing, args.perc_before_adjustment, args.do_not_lose_data_flag,
306 |         args.undersizing_continency, args.smallbucket_size, args.skipProblemIndexes, indexes_requiring_changes, conf_files_requiring_changes, args.rep_factor_multiplier, args.upper_comp_ratio_level,
307 |         args.no_sizing_comments)
308 | 
309 |     if args.outputTempFilesWithTuning:
310 |         indextuning_indextempoutput.output_index_files_into_temp_dir(conf_files_requiring_changes, index_list, args.workingPath, indexes_requiring_changes)
311 | 
312 |     if args.workWithGit:
313 |         success = True
314 |         if not args.gitRepoURL:
315 |             logger.warn("git repo URL not supplied, not performing any git work")
316 |             success = False
317 | 
318 |         if success:
319 |             if os.path.isdir(args.gitWorkingDir):
320 |                 logger.debug("git directory exists, determine if git is working by running git status")
321 |                 (output, stderr, res) = utility.run_os_process("cd " + args.gitWorkingDir + args.gitRoot + "; git status", logger)
322 | 
323 |                 if res == False:
324 |                     logger.info("Failure!")
325 |                     # Something failed? Easiest solution is to wipe the directory + allow re-clone to occur or similar
326 |                     # if that fails we cancel the git component completely perhaps?!
327 |                     logger.warn("git error occurred while attempting to work with dir={} stdout=\"{}\" stderr=\"{}\"".format(args.gitWorkingDir, output, stderr))
328 |                     shutil.rmtree(args.gitWorkingDir)
329 |                     os.makedirs(args.gitWorkingDir)
330 |                     logger.debug("Attempting clone again from url={}".format(args.gitRepoURL))
331 | 
332 |                     if args.gitFirstConnection:
333 |                         # This is a once off make it a switch?!
334 |                         (output, stderr, res) = utility.run_os_process("ssh -n -o \"BatchMode yes\" -o StrictHostKeyChecking=no " + args.gitRepoURL[:args.gitRepoURL.find(":")], logger)
335 |                         if res == False:
336 |                             logger.warn("Unexpected failure while attempting to trust the remote git repo. stdout=\"%s\", stderr=\"%s\"" % (output, stderr))
337 | 
338 |                     (output, stderr, res) = utility.run_os_process("cd %s; git clone %s" % (args.gitWorkingDir, args.gitRepoURL), logger, timeout=120)
339 |                     if res == False:
340 |                         logger.warn("git clone failed for some reason...on url=%s stdout=\"%s\", stderr=\"%s\"" % (args.gitRepoURL, output, stderr))
341 |                 else:
342 |                     logger.debug("git command result is res=%s" % (res))
343 |                     logger.info("Success, git is working as expected")
344 |             else:
345 |                 if not os.path.isdir(args.gitWorkingDir):
346 |                     os.makedirs(args.gitWorkingDir)
347 |                 (output, stderr, res) = utility.run_os_process("cd %s; git clone %s" % (args.gitWorkingDir, args.gitRepoURL), logger, timeout=120)
348 |                 if res == False:
349 |                     logger.warn("git clone failed for some reason...on url %s, output is '%s', stderr is '%s'" % (args.gitRepoURL, output, stderr))
350 | 
351 |             git_path = args.gitWorkingDir + args.gitRoot
352 | 
353 |             # Always start from master and the current version
354 |             (output, stderr, res) = utility.run_os_process("cd %s; git checkout master; git pull" % (git_path), logger)
355 |             if res == False:
356 |                 logger.warn("git checkout master or git pull failed, stdout=\"%s\" stderr=\"%s\"" % (output, stderr))
357 |                 # TODO below is copy and paste, should be a method/function
358 |                 logger.warn("git error occurred while attempting to work with dir={} stdout=\"{}\" stderr=\"{}\"".format(args.gitWorkingDir, output, stderr))
359 |                 shutil.rmtree(args.gitWorkingDir)
360 |                 os.makedirs(args.gitWorkingDir)
361 |                 logger.debug("git attempting clone again from url={}".format(args.gitRepoURL))
362 | 
363 |                 if args.gitFirstConnection:
364 |                     # This is a once off make it a switch?!
365 |                     (output, stderr, res) = utility.run_os_process("ssh -n -o \"BatchMode yes\" -o StrictHostKeyChecking=no " + args.gitRepoURL[:args.gitRepoURL.find(":")], logger)
366 |                     if res == False:
367 |                         logger.warn("git unexpected failure while attempting to trust the remote git repo url=%s, stdout=\"%s\", stderr=\"%s\"" % (args.gitRepoURL, output, stderr))
368 | 
369 |                 (output, stderr, res) = utility.run_os_process("cd %s; git clone %s" % (args.gitWorkingDir, args.gitRepoURL), logger, timeout=120)
370 |                 if res == False:
371 |                     logger.warn("git clone failed for some reason...on url=%s stdout=\"%s\", stderr=\"%s\"" % (args.gitRepoURL, output, stderr))
372 | 
373 |             # At this point we've written out the potential updates
374 |             indextuning_indextempoutput.output_index_files_into_temp_dir(conf_files_requiring_changes, index_list, git_path, indexes_requiring_changes, replace_slashes=False)
375 |             (output, stderr, res) = utility.run_os_process("cd %s; git status | grep \"nothing to commit\"" % (git_path), logger)
376 |             if res == False:
377 |                 # We have one or more files to commit, do something
378 |                 # Then we git checkout -b indextuning_20181220_1120
379 |                 todays_date = datetime.datetime.now().strftime("%Y-%m-%d_%H%M")
380 |                 (output, stderr, res) = utility.run_os_process("cd {0}; git checkout -b {1}_{2} 2>&1; git commit -am \"Updated by index auto-tuning algorithm on {2}\" 2>&1; git push origin {1}_{2} 2>&1".format(git_path, args.gitBranch, todays_date), logger)
381 |                 if res == False:
382 |                     logger.warn("git failure while creating new branch and pushing to remote git repo stdout=\"%s\" stderr=\"%s\"" % (output, stderr))
383 |                 else:
384 |                     logger.info("Changes commited into git and pushed without warnings, stdout=\"%s\", stderr=\"%s\"" % (output, stderr))
385 |                     if args.gitLabToken and args.gitLabURL:
386 |                         res = requests.post(args.gitLabURL,
387 |                         headers = { 'Private-Token': args.gitLabToken },
388 |                         data={ 'target_branch' : 'master',
389 |                             'source_branch' : args.gitBranch + "_" + todays_date,
390 |                             'title' : 'Automated merge request from index tuning script on ' + todays_date },
391 |                         verify=False)
392 | 
393 |                         if res.status_code != requests.codes.ok and res.status_code != 201:
394 |                             logger.error("git url=%s statuscode=%s reason=%s response=\"%s\"" % (args.gitLabURL, res.status_code, res.reason, res.text))
395 |                         else:
396 |                             logger.debug("gitlab res=\"%s\"" % (res.text))
397 |                     elif args.gitLabToken:
398 |                         logger.warn("gitLabToken supplied but the gitLabURL has not been provided, will not create a merge request")
399 |             else:
400 |                 logger.info("No changes to be checked into git")
401 | 
402 | # If we asked for sizing estimates only, and we're not running the dead index check only option
403 | if args.sizingEstimates:
404 |     total_index_allocation = 0
405 |     total_estimated_index_allocation = 0
406 | 
407 |     #Ugly hack until I find a better way to do this
408 |     total_growth_per_day_mb = [0 for i in range(0,365)]
409 | 
410 |     for index in list(index_list.keys()):
411 |         max_total_data_size_mb = index_list[index].max_total_data_size_mb
412 |         total_index_allocation = total_index_allocation + max_total_data_size_mb
413 |         logger.debug("index=%s max_total_data_size_mb=%s" % (index, max_total_data_size_mb))
414 |         if hasattr(index_list[index], "estimated_total_data_size"):
415 |             estimated_total_data_size = index_list[index].estimated_total_data_size
416 |             logger.info("index=%s estimated_total_data_size=%s, perc_of_current_disk_utilised=%s, days_until_full_compared_to_frozen=%s, days_until_full_disk_calculation=%s, "\
417 |                 " current_max_on_disk=%s, estimated_total_data_size_with_contingency=%s, perc_utilised_on_estimate=%s, days_until_full_disk_calculation_on_estimate=%s"
418 |                 % (index, estimated_total_data_size, index_list[index].perc_utilised, index_list[index].days_until_full,
419 |                 index_list[index].days_until_full_disk_calculation, index_list[index].splunk_max_disk_usage_mb,
420 |                 index_list[index].estimated_total_data_size_with_contingency, index_list[index].perc_utilised_on_estimate,
421 |                 index_list[index].days_until_full_disk_calculation_on_estimate))
422 | 
423 |             # If the index is not yet full it will likely consume further disk space on the indexing tier...
424 |             if index_list[index].days_until_full > 0:
425 |                 total_growth_per_day_calc = estimated_total_data_size / ((index_list[index].frozen_time_period_in_secs)/60/60/24)
426 | 
427 |                 if index_list[index].days_until_full > 365:
428 |                     days_until_full = 365
429 |                 else:
430 |                     days_until_full = index_list[index].days_until_full
431 |                 for entry in range(days_until_full):
432 |                     total_growth_per_day_mb[entry] = total_growth_per_day_mb[entry] + total_growth_per_day_calc
433 | 
434 |             total_estimated_index_allocation = total_estimated_index_allocation + estimated_total_data_size
435 | 
436 |     for index in list(indexes_not_getting_sized.keys()):
437 |         max_total_data_size_mb = indexes_not_getting_sized[index].max_total_data_size_mb
438 |         total_index_allocation = total_index_allocation + max_total_data_size_mb
439 |         logger.debug("index=%s max_total_data_size_mb=%s (indexes_not_getting_sized)" % (index, max_total_data_size_mb))
440 |         if hasattr(indexes_not_getting_sized[index], "estimated_total_data_size"):
441 |             estimated_total_data_size = indexes_not_getting_sized[index].estimated_total_data_size
442 |             logger.info("index=%s estimated_total_data_size=%s (indexes_not_getting_sized), perc_of_current_disk_utilised=%s, days_until_full_compared_to_frozen=%s, " \
443 |                 " days_until_full_disk_calculation=%s, current_max_on_disk=%s, estimated_total_data_size_with_contingency=%s, perc_utilised_on_estimate=%s, " \
444 |                 " days_until_full_disk_calculation_on_estimate=%s"
445 |                 % (index, estimated_total_data_size, indexes_not_getting_sized[index].perc_utilised, indexes_not_getting_sized[index].days_until_full,
446 |                 indexes_not_getting_sized[index].days_until_full_disk_calculation, indexes_not_getting_sized[index].splunk_max_disk_usage_mb,
447 |                 indexes_not_getting_sized[index].estimated_total_data_size_with_contingency, indexes_not_getting_sized[index].perc_utilised_on_estimate,
448 |                 indexes_not_getting_sized[index].days_until_full_disk_calculation_on_estimate))
449 | 
450 |             # If the index is not yet full it will likely consume further disk space on the indexing tier...
451 |             if indexes_not_getting_sized[index].days_until_full > 0:
452 |                 total_growth_per_day_calc = estimated_total_data_size / ((indexes_not_getting_sized[index].frozen_time_period_in_secs)/60/60/24)
453 | 
454 |                 if indexes_not_getting_sized[index].days_until_full > 365:
455 |                     days_until_full = 365
456 |                 else:
457 |                     days_until_full = indexes_not_getting_sized[index].days_until_full
458 | 
459 |                 for entry in range(days_until_full):
460 |                     total_growth_per_day_mb[entry] = total_growth_per_day_mb[entry] + total_growth_per_day_calc
461 | 
462 |             total_estimated_index_allocation = total_estimated_index_allocation + estimated_total_data_size
463 | 
464 |     total_vol_size = 0
465 |     total_in_use_currently = 0
466 |     for vol in list(vol_list.keys()):
467 |         if hasattr(vol_list[vol], "max_vol_data_size_mb"):
468 |             vol_size = vol_list[vol].max_vol_data_size_mb
469 | 
470 |             # Determine current disk utilisation for this volume
471 |             stat = os.statvfs(vol_list[vol].path)
472 |             used_in_mb = ((stat.f_blocks-stat.f_bfree)*stat.f_bsize)/1024/1024
473 |             if vol != "_splunk_summaries":
474 |                 total_vol_size = total_vol_size + vol_size
475 |                 total_in_use_currently = total_in_use_currently + used_in_mb
476 |             logger.info("volume=%s max_vol_data_size_mb=%s used_in_mb=%s" % (vol, vol_size, used_in_mb))
477 |         else:
478 |             logger.info("volume=%s, has no maxVolumedata_size_mb setting" % (vol))
479 |     logger.info("Summary: total_index_allocated=%s total_volume_allocated=%s (excluding _splunk_summaries)" % (total_index_allocation, total_vol_size))
480 | 
481 |     total_available = total_vol_size - total_in_use_currently
482 |     logger.debug("total_available=%s, total_vol_size=%s, total_in_use_currently=%s" % (total_available, total_vol_size, total_in_use_currently))
483 | 
484 |     day_counter = 0
485 |     while total_available > 0 and day_counter < 365:
486 |         total_available = total_available - total_growth_per_day_mb[day_counter]
487 |         day_counter = day_counter + 1
488 | 
489 |     if day_counter >= 365:
490 |         logger.info("Based on a combined available volume size of %s with %s in use currently, leaving %s available, I am calculating we will not run out of disk in the next 365 days"
491 |             % (total_vol_size, total_in_use_currently, total_available))        
492 |     else:
493 |         logger.info("Based on a combined available volume size of %s with %s in use currently, leaving %s available, I am calculating %s days before we run out of disk"
494 |             % (total_vol_size, total_in_use_currently, total_available, day_counter))
495 | 
496 | 
497 |     if total_estimated_index_allocation > 0:
498 |         if args.indexLimit < len(index_list):
499 |             logger.warn("Estimated size cannot be accurate as we have looked at index_limit=%s of total_indexes=%s" % (args.indexLimit, len(index_list)))
500 |         logger.info("estimated_index_allocation=%s" % (total_estimated_index_allocation))
501 | 
502 | if index_dir_check_res:
503 |     hot_dirs_checked = list(index_dir_check_res["hot_dirs_checked"].keys())
504 |     cold_dirs_checked = list(index_dir_check_res["cold_dirs_checked"].keys())
505 |     summaries_dirs_checked = list(index_dir_check_res["summaries_dirs_checked"].keys())
506 |     dead_hot_dirs = index_dir_check_res["hot_dirs_dead"]
507 |     dead_cold_dirs = index_dir_check_res["cold_dirs_dead"]
508 |     dead_summary_dirs = index_dir_check_res["summaries_dirs_dead"]
509 | 
510 |     #Note that duplicate values can be returned if the same path is used for hot/cold or summaries, for example /opt/splunk/var/lib/splunk
511 |     #may be duplicated...
512 |     #output the remaining data around "dead" indexes which have directories on the filesystem but no matching config
513 |     logger.info("The following directories were checked to ensure that they are still in use by Splunk indexes in the indexes hot path=\"%s\"" % (hot_dirs_checked))
514 |     if len(list(dead_hot_dirs.keys())) > 0:
515 |         logger.info("The below list were located in the above directories but no mention in the btool output, these should likely be removed from the filesystem:")
516 |         for line in list(dead_hot_dirs.keys()):
517 |             for entry in dead_hot_dirs[line]:
518 |                 #We escaped spaces for the shell, but we do not want spaces escaped for python
519 |                 line = line.replace('\\ ',' ')
520 |                 thedir = entry + "/" + line
521 |                 if args.deadIndexDelete and not line == "\\$_index_name":
522 |                     if os.path.isdir(thedir):
523 |                         logger.info("Wiping directory %s" % (thedir))
524 |                         shutil.rmtree(thedir)
525 |                     else:
526 |                         logger.warn("dir=%s does not exist, no deletion required" % (thedir))
527 |                 else:
528 |                     logger.info(thedir)
529 |     else:
530 |         logger.info("No dead hot dirs found")
531 | 
532 |     #output the remaining data around "dead" indexes which have directories on the filesystem but no matching config
533 |     logger.info("The following directories were checked to ensure that they are still in use by Splunk indexes in the indexes cold path=\"%s\"" % (cold_dirs_checked))
534 |     if len(list(dead_cold_dirs.keys())) > 0:
535 |         logger.info("The below list were located in the above directories but no mention in the btool output, these should likely be removed from the filesystem:")
536 |         for line in list(dead_cold_dirs.keys()):
537 |             for entry in dead_cold_dirs[line]:
538 |                 #We escaped spaces for the shell, but we do not want spaces escaped for python
539 |                 line = line.replace('\\ ',' ')
540 |                 thedir = entry + "/" + line
541 |                 if args.deadIndexDelete and not line == "\\$_index_name":
542 |                     if os.path.isdir(thedir):
543 |                         logger.info("dir=%s deleted due to not existing and deadIndexDelete flag enabled" % (thedir))
544 |                         shutil.rmtree(thedir)
545 |                     else:
546 |                         logger.warn("dir=%s does not exist, no deletion required" % (thedir))
547 |                 else:
548 |                     logger.info(thedir)
549 |     else:
550 |         logger.info("No dead cold dirs found")
551 | 
552 |     #output the remaining data around "dead" indexes which have directories on the filesystem but no matching config
553 |     logger.info("The following directories were checked to ensure that they are still in use by Splunk indexes in the summaries path=\"%s\"" % (summaries_dirs_checked))
554 |     if len(list(dead_summary_dirs.keys())) > 0:
555 |         logger.info("The below list were located in the above directories but no mention in the btool output, these should likely be removed from the filesystem:")
556 |         for line in list(dead_summary_dirs.keys()):
557 |             for entry in dead_summary_dirs[line]:
558 |                 #We escaped spaces for the shell, but we do not want spaces escaped for python
559 |                 line = line.replace('\\ ',' ')
560 |                 thedir = entry + "/" + line
561 |                 if args.deadIndexDelete and not line == "\\$_index_name":
562 |                     if os.path.isdir(thedir):
563 |                         logger.info("dir=%s deleted due to not existing and deadIndexDelete flag enabled" % (thedir))
564 |                         shutil.rmtree(thedir)
565 |                     else:
566 |                         logger.warn("dir=%s does not exist, no deletion required" % (thedir))
567 |                 else:
568 |                     logger.info(thedir)
569 |     else:
570 |         logger.info("No dead summary dirs found")
571 | 
572 |     if args.deadIndexDelete:
573 |         unique_directories_checked = hot_dirs_checked + cold_dirs_checked + summaries_dirs_checked
574 |         unique_directories_checked = list(set(unique_directories_checked))
575 | 
576 |         for a_dir in unique_directories_checked:
577 |             sub_dir_list = utility.listdirs(a_dir)
578 |             for a_sub_dir in sub_dir_list:
579 |                 try:
580 |                     os.rmdir(a_sub_dir)
581 |                 except OSError as e:
582 |                     continue
583 |                 logger.info("dir=%s directory was deleted" % (a_sub_dir))
584 | 
585 |     if len(list(dead_hot_dirs.keys())) > 0 and not args.deadIndexDelete:
586 |         for line in list(dead_hot_dirs.keys()):
587 |             for entry in dead_hot_dirs[line]:
588 |                 print(entry + "/" + line)
589 | 
590 |     if len(list(dead_cold_dirs.keys())) > 0 and not args.deadIndexDelete:
591 |         for line in list(dead_cold_dirs.keys()):
592 |             for entry in dead_cold_dirs[line]:
593 |                 print(entry + "/" + line)
594 | 
595 |     if len(list(dead_summary_dirs.keys())) > 0 and not args.deadIndexDelete:
596 |         for line in list(dead_summary_dirs.keys()):
597 |             for entry in dead_summary_dirs[line]:
598 |                 print(entry + "/" + line)
599 | 
600 | logger.info("End index sizing script with args=\"%s\"" % (clean_args))
601 | 


--------------------------------------------------------------------------------
/bin/indextuning_dirchecker.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import logging
  4 | 
  5 | logger = logging.getLogger()
  6 | 
  7 | # check_dirs checks that the directories on the filesystem relate to a real index and have not
  8 | # accidentally been left here by indexes that have been deleted
  9 | # If they have been left here it suggests a list of directories that could be deleted
 10 | def check_for_dead_dirs(index_list, vol_list, excluded_dirs, utility):
 11 | 
 12 |     index_dirs_to_check_hot = {}
 13 |     index_dirs_to_check_cold = {}
 14 |     summary_dirs_to_check = {}
 15 |     index_dirs_to_check_thawed = {}
 16 | 
 17 |     # Splunk uses the $SPLUNK_DB variable to specify the default location of the data
 18 |     splunkDBLoc = os.environ['SPLUNK_DB']
 19 |     for index in index_list:
 20 |         # expecting something similar to
 21 |         # home_path = volume:hot/$_index_name/db
 22 |         # cold_path = volume:cold/$_index_name/colddb
 23 |         # tstats_home_path = volume:_splunk_summaries/$_index_name/datamodel_summary
 24 |         home_path = index_list[index].home_path
 25 |         cold_path = index_list[index].cold_path
 26 |         tstats_home_path = index_list[index].tstats_home_path
 27 |         thawed_path = index_list[index].thawed_path
 28 |         if hasattr(index_list[index], "cold_to_frozen_dir"):
 29 |             cold_to_frozen_dir = index_list[index].cold_to_frozen_dir
 30 |         else:
 31 |             cold_to_frozen_dir = False
 32 | 
 33 |         logger.debug("dead dirs prechanges index=%s home_path=%s cold_path=%s tstats_home_path=%s thawed_path=%s cold_to_frozen_dir=%s" % (index, home_path, cold_path, tstats_home_path, thawed_path, cold_to_frozen_dir))
 34 |         # Ok we found a volume, replace it with the full directory path for the dir function to work
 35 |         if (home_path.find("volume:") != -1):
 36 |             end = home_path.find("/")
 37 |             findVol = home_path[7:end]
 38 |             home_path = home_path.replace("volume:%s" % (findVol), vol_list[findVol].path)
 39 | 
 40 |         # Ok we found a volume, replace it with the full directory path for the dir function to work
 41 |         if (cold_path.find("volume:") != -1):
 42 |             end = cold_path.find("/")
 43 |             findVol = cold_path[7:end]
 44 |             cold_path = cold_path.replace("volume:%s" % (findVol), vol_list[findVol].path)
 45 | 
 46 |         # Ok we found a volume, replace it with the full directory path for the dir function to work
 47 |         if (tstats_home_path.find("volume:") != -1):
 48 |             end = tstats_home_path.find("/")
 49 |             findVol = tstats_home_path[7:end]
 50 |             tstats_home_path = tstats_home_path.replace("volume:%s" % (findVol), vol_list[findVol].path)
 51 | 
 52 |         home_path = home_path.replace("$SPLUNK_DB", splunkDBLoc)
 53 |         cold_path = cold_path.replace("$SPLUNK_DB", splunkDBLoc)
 54 |         tstats_home_path = tstats_home_path.replace("$SPLUNK_DB", splunkDBLoc)
 55 |         thawed_path = thawed_path.replace("$SPLUNK_DB", splunkDBLoc)
 56 |         if cold_to_frozen_dir:
 57 |             cold_to_frozen_dir = cold_to_frozen_dir.replace("$SPLUNK_DB", splunkDBLoc)
 58 | 
 59 |         # $_index_name is just a variable for the index name stanza
 60 |         home_path = home_path.replace("$_index_name", index).replace("//","/").lower()
 61 |         cold_path = cold_path.replace("$_index_name", index).replace("//","/").lower()
 62 |         tstats_home_path = tstats_home_path.replace("$_index_name", index).replace("//","/").lower()
 63 |         thawed_path = thawed_path.replace("$_index_name", index).replace("//","/").lower()
 64 |         if cold_to_frozen_dir:
 65 |             cold_to_frozen_dir = cold_to_frozen_dir.replace("//","/").lower()
 66 | 
 67 |         # Splunk changes any directory specified in mixed case for home_path/cold_path/tstats_home_path locations to lowercase
 68 |         # btool does not therefore we lower() here
 69 |         index_list[index].home_path = home_path
 70 |         index_list[index].cold_path = cold_path
 71 |         index_list[index].tstats_home_path = tstats_home_path
 72 |         index_list[index].thawed_path = thawed_path
 73 |         index_list[index].cold_to_frozen_dir = cold_to_frozen_dir
 74 | 
 75 |         # Drop off the /db/, /cold_path, or /datamodel directories off the end of, for example /opt/splunk/var/lib/splunk/_internaldb/db
 76 |         home_path = home_path[:home_path.rfind("/")]
 77 |         cold_path = cold_path[:cold_path.rfind("/")]
 78 |         tstats_home_path = tstats_home_path[:tstats_home_path.rfind("/")]
 79 |         thawed_path = thawed_path[:thawed_path.rfind("/")]
 80 |         if index_list[index].cold_to_frozen_dir:
 81 |             cold_to_frozen_dir = cold_to_frozen_dir[:cold_to_frozen_dir.rfind("/")]
 82 |         else:
 83 |             cold_to_frozen_dir = False
 84 | 
 85 |         # drop off the /<index dir name> off the end, for example /opt/splunk/var/lib/splunk/_internaldb
 86 |         # this leaves a top level directory such as /opt/splunk/var/lib/splunk
 87 |         home_path_dir = home_path[:home_path.rfind("/")]
 88 |         cold_path_dir = cold_path[:cold_path.rfind("/")]
 89 |         tstats_home_path_dir = tstats_home_path[:tstats_home_path.rfind("/")]
 90 |         thawed_path_dir = thawed_path[:thawed_path.rfind("/")]
 91 | 
 92 |         # keep the dictionary up-to-date with directories that must be checked
 93 |         index_dirs_to_check_hot[home_path_dir] = True
 94 |         index_dirs_to_check_cold[cold_path_dir] = True
 95 |         summary_dirs_to_check[tstats_home_path_dir] = True
 96 |         index_dirs_to_check_thawed[thawed_path_dir] = True
 97 | 
 98 |         logger.debug("dead dirs postchanges index=%s home_path=%s cold_path=%s tstats_home_path=%s thawed_path=%s cold_to_frozen_dir=%s" % (index, home_path, cold_path, tstats_home_path, thawed_path, cold_to_frozen_dir))
 99 | 
100 |     # At this point we know what indexes we need to check
101 |     dead_index_dir_list_hot = check_dirs(index_list, index_dirs_to_check_hot, excluded_dirs, utility)
102 |     dead_index_dir_list_cold = check_dirs(index_list, index_dirs_to_check_cold, excluded_dirs, utility)
103 |     dead_index_dir_list_summaries = check_dirs(index_list, summary_dirs_to_check, excluded_dirs, utility)
104 |     dead_index_dir_list_thawed = check_dirs(index_list, index_dirs_to_check_thawed, excluded_dirs, utility)
105 | 
106 |     logger.debug("Returning these lists to be checked: dead_index_dir_list_hot=\"%s\", dead_index_dir_list_cold=\"%s\", dead_index_dir_list_summaries=\"%s\", dead_index_dir_list_thawed=\"%s\""
107 |                   % (dead_index_dir_list_hot, dead_index_dir_list_cold, dead_index_dir_list_summaries, dead_index_dir_list_thawed))
108 |     return { "hot_dirs_checked" : index_dirs_to_check_hot, "hot_dirs_dead": dead_index_dir_list_hot, "cold_dirs_checked" : index_dirs_to_check_cold,
109 |     "cold_dirs_dead" : dead_index_dir_list_cold, "summaries_dirs_checked" : summary_dirs_to_check, "summaries_dirs_dead" : dead_index_dir_list_summaries,
110 |     "thawed_dirs_checked" : dead_index_dir_list_thawed }
111 | 
112 | def check_dirs(index_list, dirsToCheck, excluded_dirs, utility):
113 |     dead_dir_list = {}
114 |     # For each directory that we should be checking we check if we have an index that relates to the sub-directories, if not it's probably an old directory
115 |     # left around by an index that has been removed from the config but left on the filesystem
116 |     for dirs in list(dirsToCheck.keys()):
117 |         # list the directories we see under the specified paths, ignoring files
118 |         logger.debug("Now checking directory=%s" % (dirs))
119 |         try:
120 |             dirlist = utility.listdirs(dirs)
121 |         except OSError as e:
122 |             if e.strerror.find("No such file or directory") != -1:
123 |                 print(e)
124 | 
125 |         for dir in dirlist:
126 |             found = False
127 |             logger.debug("Checking subdir=%s of dir=%s" % (dir, dirs))
128 |             # If we cannot find any mention of this index name then most likely it exists from a previous config / needs cleanup
129 |             abs_dir = dirs + "/" + dir
130 |             for index in index_list:
131 |                 home_path = index_list[index].home_path
132 |                 home_path = home_path[:home_path.rfind("/")]
133 |                 cold_path = index_list[index].cold_path
134 |                 cold_path = cold_path[:cold_path.rfind("/")]
135 |                 tstats_home_path = index_list[index].tstats_home_path
136 |                 tstats_home_path = tstats_home_path[:tstats_home_path.rfind("/")]
137 |                 thawed_path = index_list[index].thawed_path
138 |                 thawed_path = thawed_path[:thawed_path.rfind("/")]
139 |                 if index_list[index].cold_to_frozen_dir:
140 |                     cold_to_frozen_dir = index_list[index].cold_to_frozen_dir
141 |                     cold_to_frozen_dir2 = index_list[index].cold_to_frozen_dir
142 |                     cold_to_frozen_dir = cold_to_frozen_dir[:cold_to_frozen_dir.rfind("/")]
143 |                 else:
144 |                     cold_to_frozen_dir = False
145 |                     cold_to_frozen_dir2 = False
146 | 
147 |                 # logger.debug("home path is %s" % (home_path))
148 |                 if abs_dir==home_path or abs_dir==cold_path or abs_dir==tstats_home_path or abs_dir==thawed_path or abs_dir==cold_to_frozen_dir:
149 |                     found = True
150 |                     break
151 |                 else:
152 |                     # don't include the excluded directories
153 |                     if dir in excluded_dirs:
154 |                         logger.debug("dir=%s is excluded so marking it found" % (dir))
155 |                         found = True
156 |                         break
157 |             if not found:
158 |                 logger.debug("dir=%s not found in the btool listing for splunk btool indexes list --debug" % (dir))
159 |                 # If someone created the $_index_name on the filesystem...
160 |                 dead_dir = utility.replace_dollar_symbols(dir)
161 |                 if not dead_dir in dead_dir_list:
162 |                     dead_dir_list[dead_dir] = []
163 |                 dead_dir_list[dead_dir].append(dirs)
164 |                 logger.debug("dir=%s appears to be unused, adding to the list to be removed" % (dead_dir))
165 | 
166 |             try:
167 |                 sub_dir_list = utility.listdirs(abs_dir)
168 |                 logger.debug("Working with sub_dirs=\"%s\" from abs_dir=%s" % (sub_dir_list, abs_dir))
169 |             except OSError as e:
170 |                 if e.strerror.find("No such file or directory") != -1:
171 |                     logger.error(e)
172 |                 continue
173 | 
174 |             found2 = False
175 |             for a_dir in sub_dir_list:
176 |                 #These are always excluded as they should never be deleted
177 |                 if dir in excluded_dirs:
178 |                     continue
179 |                 abs_dir2 = abs_dir + "/" + a_dir
180 |                 for index in index_list:
181 |                     if abs_dir2==index_list[index].home_path or abs_dir2==index_list[index].cold_path or abs_dir2==index_list[index].tstats_home_path \
182 |                     or abs_dir2==index_list[index].thawed_path or abs_dir2==cold_to_frozen_dir2:
183 |                         found2 = True
184 |                         break
185 |                 if not found2:
186 |                     logger.debug("dir=%s not found in the btool listing for splunk btool indexes list --debug / home_path's" % (abs_dir2))
187 |                     #If someone created the $_index_name on the filesystem...
188 |                     dead_dir = utility.replace_dollar_symbols(dir + "/" + a_dir)
189 |                     if not dead_dir in dead_dir_list:
190 |                         dead_dir_list[dead_dir] = []
191 |                     dead_dir_list[dead_dir].append(dirs)
192 |                     logger.debug("dir=%s appears to be unused, adding to the list" % (dead_dir))
193 | 
194 |     return dead_dir_list
195 | 


--------------------------------------------------------------------------------
/bin/indextuning_indextempoutput.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | from io import open
  4 | import logging
  5 | import six
  6 | 
  7 | logger = logging.getLogger()
  8 | 
  9 | ###############################
 10 | #
 11 | # Begin index file re-writing into temp directory
 12 | #   This function exists to read a (real) indexes.conf file from the filesystem and to change and/or add any lines that
 13 | #   we require into the indexes.conf file
 14 | #   We write that out to a new directory so that we can run differencing between existing and new file
 15 | #
 16 | ###############################
 17 | def output_index_files_into_temp_dir(conf_files_requiring_change, index_list, path, indexes_requiring_changes, replace_slashes=True):
 18 |     #Create the required directory
 19 |     try:
 20 |         os.mkdir(path, 0o750)
 21 |     except OSError:
 22 |         #Debug level because this directory may already exist after previous runs
 23 |         logger.debug("Creation of the dir=%s failed" % path)
 24 | 
 25 |     # At this point we have a list of files requiring changes, a list of indexes with that file that require changing
 26 |     # we now read through the file and output an equivalent file in the working path that is the tuned version
 27 |     # we can (outside this script) diff the 2 files and / or implement the new file as required on the cluster master
 28 |     regex = re.compile(r"^\s*([^= ]+)")
 29 | 
 30 |     for a_file in conf_files_requiring_change:
 31 |         with open(a_file) as file:
 32 |             # TODO find a nicer way to do this
 33 |             # there is no obvious way to determine the end of an index stanza entry or any stanza entry in the indexes.conf file, therefore we know
 34 |             # that we have finished the stanza entry when we have either reached a new entry or the end of the file
 35 |             # however that means we'd have [indexxxx]...\n<blank line>\n<insert our line here>\n[nextindexyyy]...
 36 |             # to ensure we have [indexxxx]...\n<insert our line here>\n[nextindexyyy]...the script prints 2 lines behind to the file...
 37 |             previous_line = False
 38 |             prev_previous_line = False
 39 |             index_name = ""
 40 |             changes_required = False
 41 |             max_data_size_done = False
 42 |             max_total_data_size_done = False
 43 | 
 44 |             # name the output file based on the location on disk of the conf file
 45 |             # which means we replace / with _ symbols
 46 |             if replace_slashes:
 47 |                 output_file = a_file[a_file.find("slave-apps"):].replace("/", "_")
 48 |             else:
 49 |                 output_file = a_file[a_file.find("slave-apps")+11:]
 50 |             # output a new file in the working directory with our tuning modifications
 51 |             output_h = open(path + "/" + output_file, "w")
 52 | 
 53 |             for line in file:
 54 |                 logger.debug("Working with line: %s" % (line))
 55 |                 if (prev_previous_line):
 56 |                     output_h.write(six.text_type(prev_previous_line))
 57 | 
 58 |                 # We found a stanza
 59 |                 if (line.find("[") == 0):
 60 |                     # We don't need to do much with a volume stanza
 61 |                     if (line.find("[volume:") == -1):
 62 |                         # We have moved onto a new index entry, but did we finish our previous job?
 63 |                         # It's possible that maxTotalDataSizeMB was never specified in the stanza as it's optional
 64 |                         # therefore we now write it out
 65 |                         if (changes_required != False):
 66 |                             output_edge_case(changes_required, index_list, max_data_size_done, max_total_data_size_done, output_h, index_name)
 67 | 
 68 |                         # Some items are written into every index entry such as maxDataSize and maxTotalDataSize
 69 |                         max_data_size_done = False
 70 |                         max_total_data_size_done = False
 71 | 
 72 |                         end = line.find("]")
 73 |                         index_name = line[1:end]
 74 |                         if (index_name in indexes_requiring_changes and index_list[index_name].checked):
 75 |                             changes_required = indexes_requiring_changes[index_name].split("_")
 76 |                             logger.debug("index list info=\"%s\"" % (index_list[index_name]))
 77 |                         else:
 78 |                             changes_required = False
 79 |                     else:
 80 |                         changes_required = False
 81 | 
 82 |                 # We are somewhere after the [index...] stanza
 83 |                 if (changes_required != False):
 84 |                     result = regex.match(line)
 85 |                     stanza = result.group(1)
 86 | 
 87 |                     # If we have changes and we come across the stanza that requires changes, write it out, potentially with a comment we created earlier
 88 |                     if (("bucket" in changes_required) and stanza == "maxDataSize"):
 89 |                         recommended_bucket_size = index_list[index_name].recommended_bucket_size
 90 |                         comment = index_list[index_name].change_comment['bucket']
 91 |                         #strip off the newline character from the line before adding to the log, otherwise the log has random newlines in it
 92 |                         logger.debug("old_line=%s, new_line=%s (newline) maxDataSize=%s" % (line[:-1], comment[:-1], recommended_bucket_size))
 93 |                         #overwrite the old line with the new one
 94 |                         line = "%smaxDataSize = %s\n" % (comment, recommended_bucket_size)
 95 |                         max_data_size_done = True
 96 |                     elif (("sizing" in changes_required) and stanza == "maxTotalDataSizeMB"):
 97 |                         calc_max_total_data_size_mb = index_list[index_name].calc_max_total_data_size_mb
 98 |                         comment = index_list[index_name].change_comment['sizing']
 99 |                         # strip off the newline character from the line before adding to the log
100 |                         logger.debug("old_line=%s, new_line=%s (newline) maxTotalDataSizeMB=%s" % (line[:-1], comment[:-1], calc_max_total_data_size_mb))
101 |                         line = "%smaxTotalDataSizeMB = %s\n" % (comment, calc_max_total_data_size_mb)
102 |                         max_total_data_size_done = True
103 |                     elif (("sizing" in changes_required) and stanza == "homePath.maxDataSizeMB"):
104 |                         homepath_max_data_size_mb = index_list[index_name].homepath_max_data_size_mb
105 |                         # strip off the newline character from the line before adding to the log
106 |                         logger.debug("old_line=%s, new_line=\"homePath.maxDataSizeMB=%s\"" % (line[:-1], homepath_max_data_size_mb))
107 |                         line = "homePath.maxDataSizeMB = %s\n" % (homepath_max_data_size_mb)
108 |                     elif  (("sizing" in changes_required) and stanza == "coldPath.maxDataSizeMB"):
109 |                         coldpath_max_datasize_mb = index_list[index_name].coldpath_max_datasize_mb
110 |                         # strip off the newline character from the line before adding to the log
111 |                         logger.debug("old_line %s, new_line=\"coldPath.maxDataSizeMB=%s\"" % (line[:-1], coldpath_max_datasize_mb))
112 |                         line = "coldPath.maxDataSizeMB = %s\n" % (coldpath_max_datasize_mb)
113 |                 # record the previous, previous line if we have recorded a previous line already
114 |                 if (previous_line):
115 |                     prev_previous_line = previous_line
116 |                 previous_line = line
117 | 
118 |             # This is an edge case but what if changes required and they were not done already
119 |             # and we hit the end of the file?
120 |             # Then we print out all the required information now
121 |             if (changes_required == False):
122 |                 pass
123 |             else:
124 |                 output_edge_case(changes_required, index_list, max_data_size_done, max_total_data_size_done, output_h, index_name)
125 | 
126 |             # print out the remaining lines
127 |             output_h.write(six.text_type(prev_previous_line))
128 |             output_h.write(six.text_type(previous_line))
129 | 
130 | # After we get to a new index entry we might have missed stanzas from the last index entry we were working on
131 | # add them to the output file now
132 | def output_edge_case(changes_required, index_list, max_data_size_done, max_total_data_size_done, output_h, index_name):
133 |     if ("bucket" in changes_required and not "sizing" in changes_required and not max_data_size_done):
134 |         recommended_bucket_size = index_list[index_name].recommended_bucket_size
135 |         comment = index_list[index_name].change_comment['bucket']
136 |         logger.debug("Never found this so writing it now line=\"%s\" (newline) line=\"maxDataSize=%s\" with a preceding comment=\"%s\"" % (comment, recommended_bucket_size, comment[:-1]))
137 |         #Write the comment before the bucket sizing, so we record why this was changed
138 |         output_h.write(six.text_type(comment))
139 |         output_h.write(six.text_type("maxDataSize = %s\n" % (recommended_bucket_size)))
140 |     elif ("sizing" in changes_required and not "bucket" in changes_required and not max_total_data_size_done):
141 |         calc_max_total_data_size_mb = index_list[index_name].calc_max_total_data_size_mb
142 |         comment = index_list[index_name].change_comment['sizing']
143 |         output_h.write(six.text_type(comment))
144 |         logger.debug("Never found this so writing it now line=\"%s\" (newline) line=\"maxTotalDataSizeMB=%s\"" % (comment[:-1], calc_max_total_data_size_mb))
145 |         output_h.write(six.text_type("maxTotalDataSizeMB = %s\n" % (calc_max_total_data_size_mb)))
146 |     elif ("bucket" in changes_required and "sizing" in changes_required):
147 |         recommended_bucket_size = index_list[index_name].recommended_bucket_size
148 |         calc_max_total_data_size_mb = index_list[index_name].calc_max_total_data_size_mb
149 | 
150 |         # If we have not yet written the maxDataSize or maxTotalDataSize entries we write them together
151 |         if (not max_data_size_done):
152 |             comment = index_list[index_name].change_comment['bucket']
153 |             logger.debug("Never found this so writing it now line=\"%s\" (newline) line=\"maxDataSize=%s\"" % (comment[:-1], recommended_bucket_size))
154 |             output_h.write(six.text_type(comment))
155 |             output_h.write(six.text_type("maxDataSize = %s\n" % (recommended_bucket_size)))
156 |         if (not max_total_data_size_done):
157 |             comment = index_list[index_name].change_comment['sizing']
158 |             logger.debug("Never found this so writing it now line=\"%s\" (newline) line=\"maxTotalDataSizeMB=%s\"" % (comment[:-1], calc_max_total_data_size_mb))
159 |             output_h.write(six.text_type(comment))
160 |             output_h.write(six.text_type("maxTotalDataSizeMB = %s\n" % (calc_max_total_data_size_mb)))
161 |     #If we have a sizing comment to add and it was not added, do it now...
162 |     if (changes_required != False and "sizingcomment" in changes_required):
163 |         comment = index_list[index_name].change_comment['sizingcomment']
164 |         output_h.write(six.text_type(comment))
165 |         logger.debug("Wrote the sizing comment=\"%s\"" % (comment[:-1]))


--------------------------------------------------------------------------------
/bin/indextuning_utility.py:
--------------------------------------------------------------------------------
  1 | import six.moves.urllib.request, six.moves.urllib.parse, six.moves.urllib.error
  2 | import requests
  3 | from xml.dom import minidom
  4 | import re
  5 | from subprocess import Popen, PIPE, check_output
  6 | import threading
  7 | import six.moves.queue
  8 | from time import sleep
  9 | import sys
 10 | import os
 11 | import logging
 12 | from io import open
 13 | 
 14 | logger = logging.getLogger()
 15 | 
 16 | # Splunk volume
 17 | class volume:
 18 |     def __init__(self, name):
 19 |         self.name = name
 20 | 
 21 | 
 22 | # Splunk index
 23 | class index:
 24 |     def __init__(self, name):
 25 |         self.name = name
 26 | 
 27 | 
 28 | # Index Tuning Utility Class
 29 | # runs queries against Splunk or Splunk commands
 30 | class utility:
 31 |     username = ""
 32 |     password = ""
 33 |     splunkrest = ""
 34 |     earliest_time = ""
 35 | 
 36 |     def __init__(self, username, password, splunkrest, earliest_time):
 37 |         self.username = username
 38 |         self.password = password
 39 |         self.splunkrest = splunkrest
 40 |         self.earliest_time = earliest_time
 41 | 
 42 |     # Run a search query against the Splunk restful API
 43 |     def run_search_query(self, searchQuery):
 44 |         baseurl = 'https://' + self.splunkrest
 45 | 
 46 |         # For troubleshooting
 47 |         logger.debug("SearchQuery=\n" + searchQuery)
 48 | 
 49 |         # Run the search
 50 |         url = baseurl + '/services/search/jobs'
 51 |         data = {'search': searchQuery, 'earliest_time': self.earliest_time,
 52 |                 'latest_time': 'now', 'output_mode': "json",
 53 |                 "exec_mode": "oneshot", "timeout": 0 }
 54 |         logger.debug("Running against URL=%s with username=%s "\
 55 |             "with data_load=%s" % (url, self.username, data))
 56 |         res = requests.post(url, auth=(self.username, self.password),
 57 |                             data=data, verify=False)
 58 | 
 59 |         if res.status_code != requests.codes.ok:
 60 |             logger.error("Failed to run search on URL=%s with username=%s "\
 61 |             "response_code=%s, reason=%s, text=%s, payload=%s"
 62 |                           % (url, self.username, res.status_code,
 63 |                              res.reason, res.text, data))
 64 |             sys.exit(-1)
 65 | 
 66 |         if res.text != "":
 67 |             logger.debug("Result from query=%s" % (res.text))
 68 |         else:
 69 |             logger.warn("Result from query=%s URL=%s with username=%s "\
 70 |             "response_code=%s, reason=%s, text=%s, payload=%s"
 71 |                           % (res.text, url, self.username, res.status_code,
 72 |                              res.reason, res.text, data))
 73 |             res = requests.post(url, auth=(self.username, self.password),
 74 |                             data=data, verify=False)
 75 |             if res.text != "":
 76 |                 logger.info("Query retry suceeded")
 77 |             else:
 78 |                 logger.warn("Query retry failed")
 79 | 
 80 |         return res.json()
 81 | 
 82 |     ##############
 83 |     #
 84 |     # Read the btool output of splunk btool indexes list --debug
 85 |     #
 86 |     ##############
 87 |     # Run the btool command and parse the output
 88 |     def parse_btool_output(self):
 89 |         #  Keep a large dictionary of the indexes and associated information
 90 |         indexes = {}
 91 |         volumes = {}
 92 | 
 93 |         # Run python btool and read the output, use debug switch to obtain
 94 |         # file names
 95 |         logger.debug("Running /opt/splunk/bin/splunk btool indexes list --debug")
 96 |         output = check_output(["/opt/splunk/bin/splunk", "btool", "indexes",
 97 |                               "list", "--debug"])
 98 |         output = output.split("\n")
 99 | 
100 |         # If we are currently inside a [volume...] stanza or not...
101 |         disabled = False
102 | 
103 |         # Work line by line on the btool output
104 |         for line in output:
105 |             # don't attempt to parse empty lines
106 |             if line == "":
107 |                 continue
108 | 
109 |             logger.debug("Working with line=" + line)
110 |             # Search for the [<indexname>]
111 |             # Split the string /opt/splunk/etc/slave-apps/_cluster/local/indexes.conf                [_internal]
112 |             # into 2 pieces
113 |             regex = re.compile(r"^([^ ]+) +(.*)")
114 |             result = regex.match(line)
115 |             conf_file = result.group(1)
116 |             string_res = result.group(2)
117 |             logger.debug("conf_file=%s, string_res=%s" % (conf_file, string_res))
118 |             # Further split a line such as:
119 |             # homePath.maxDataSizeMB = 0
120 |             # into 2 pieces...
121 |             # we utilise this later in the code
122 |             regex2 = re.compile(r"^([^= ]+)\s*=\s*(.*)")
123 | 
124 |             stanza = ""
125 |             value = ""
126 |             if string_res[0] == "[":
127 |                 # Skip volume entries
128 |                 if string_res.find("[volume") == -1:
129 |                     # Slice it out of the string
130 |                     index_name = string_res[1:len(string_res)-1]
131 |                     cur_index = index(index_name)
132 |                     cur_index.conf_file = conf_file
133 |                     in_index_mode = True
134 |                     logger.debug("Working with index=" + str(cur_index))
135 |                 else:
136 |                     # skip the volume:... part and the ] at the end
137 |                     vol_name = string_res[8:len(string_res)-1]
138 |                     vol = volume(vol_name)
139 |                     vol.conf_file = conf_file
140 |                     in_index_mode = False
141 |                     logger.debug("Working with volume=" + vol_name)
142 |                 # We are done with this round of the loop
143 |                 continue
144 |             else:
145 |                 result2 = regex2.match(string_res)
146 |                 stanza = result2.group(1)
147 |                 value = result2.group(2)
148 | 
149 |             logger.debug("Working with stanza=%s and value=%s" % (stanza, value))
150 |             if stanza == "disabled":
151 |                 if value == "1":
152 |                     disabled = True
153 |             # Path exists only within volumes
154 |             elif stanza == "path" and not in_index_mode:
155 |                 vol.path = value
156 |             elif stanza == "coldPath.maxDataSizeMB" and in_index_mode:
157 |                 cur_index.coldpath_max_datasize_mb = int(value)
158 |             elif stanza == "coldToFrozenDir" and in_index_mode:
159 |                 if value != "":
160 |                     cur_index.cold_to_frozen_dir = value
161 |             elif stanza == "datatype" and in_index_mode:
162 |                 cur_index.datatype = value
163 |             elif stanza == "frozenTimePeriodInSecs" and in_index_mode:
164 |                 cur_index.frozen_time_period_in_secs = int(value)
165 |             elif stanza == "homePath.maxDataSizeMB" and in_index_mode:
166 |                 cur_index.homepath_max_data_size_mb = int(value)
167 |             elif stanza == "homePath" and in_index_mode:
168 |                 cur_index.home_path = value
169 |             elif stanza == "coldPath" and in_index_mode:
170 |                 cur_index.cold_path = value
171 |             elif stanza == "maxDataSize" and in_index_mode:
172 |                 cur_index.max_data_size = value
173 |                 if cur_index.max_data_size.find("auto_high_volume") != -1:
174 |                     cur_index.max_data_size = "10240_auto"
175 |                 elif cur_index.max_data_size.find("auto") != -1:
176 |                     cur_index.max_data_size = "750_auto"
177 |             elif stanza == "maxHotBuckets" and in_index_mode:
178 |                 cur_index.max_hot_buckets = float(value)
179 |             # This setting only appears in volumes
180 |             elif stanza == "maxVolumeDataSizeMB" and not in_index_mode:
181 |                 vol.max_vol_data_size_mb = int(value)
182 |             elif stanza == "maxTotalDataSizeMB" and in_index_mode:
183 |                 cur_index.max_total_data_size_mb = int(value)
184 |             elif stanza == "thawedPath" and in_index_mode:
185 |                 cur_index.thawed_path = value
186 | 
187 |             if not hasattr(cur_index, "home_path"):
188 |                     logger.warn("index=%s does not have a homePath, not recording this index" % index_name)
189 |                     continue                
190 |             # btool prints in lexicographical order so therefore we can assume
191 |             # tstatsHomePath is last for an index
192 |             elif stanza == "tstatsHomePath" and in_index_mode:
193 |                 # Do not record disabled indexes
194 |                 if disabled:
195 |                     disabled = False
196 |                     continue
197 |                 cur_index.tstats_home_path = value
198 |                 # btool uses alphabetical order so this is the last entry in
199 |                 # the list
200 |                 indexes[cur_index.name] = cur_index
201 |                 logger.debug("Recording index=%s into indexes dict" % (cur_index.name))
202 |             elif stanza == "warmToColdScript" and not in_index_mode:
203 |                 logger.debug("Recording vol=%s into volumes dict" % (vol.name))
204 |                 volumes[vol.name] = vol
205 | 
206 |         return indexes, volumes
207 | 
208 |     #####################
209 |     #
210 |     # Comment lines specific to this environment, pull out the lines that we
211 |     # need related to # .. @ 1GB/day ...
212 |     #
213 |     #####################
214 |     # Example lines to find the bits of
215 |     # maximum storage for all buckets (6 months @ 1GB/day @ 50% (15% + 35%)
216 |     # compression)
217 |     # maximum storage for all buckets (2 months @ 0.1GB/day @ 50% (15% + 35%)
218 |     # compression)
219 |     def parse_conf_files_for_sizing_comments(self, indexes, conf_files):
220 |         find_comments_rex = re.compile(r"^#[^@]+@\s+([0-9\.]+)\s*([^/%]+)/([^ ]+)")
221 | 
222 |         for a_file in list(conf_files.keys()):
223 |             with open(a_file) as file:
224 |                 index_name = ""
225 | 
226 |                 for line in file:
227 |                     if line.find("[volume:") != -1:
228 |                         pass
229 |                     # Now looking at an index entry and not a volume entry
230 |                     elif line.find("[") == 0:
231 |                         start = line.find("[")+1
232 |                         end = line.find("]")
233 |                         index_name = line[start:end]
234 |                     # comments with sizing have @ symbols, nothing else
235 |                     # has the @ symbol in the btool indexes list output
236 |                     elif line.find("@") != -1:
237 |                         result = find_comments_rex.match(line)
238 |                         if not result:
239 |                             continue
240 |                         # Size as in number (0.1 for example), unit as
241 |                         # GB perX as in day or similar
242 |                         size = result.group(1)
243 |                         unit = result.group(2).upper()
244 |                         # perX = result.group(3)
245 |                         calc_size = 0
246 |                         if unit == "GB":
247 |                             calc_size = int(float(size)*1024)
248 |                         elif unit == "TB":
249 |                             calc_size = int(float(size)*1024*1024)
250 |                         else:
251 |                             # Assume MB
252 |                             calc_size = int(size)
253 | 
254 |                         # Record the size in MB
255 |                         logger.debug("index=%s found size=%s, unit=%s, calculated=%s" % (index_name, size, unit, calc_size))
256 |                         if hasattr(indexes[index_name], "size_per_day_in_mb"):
257 |                             logger.info("index=%s found size=%s, unit=%s, calculated=%s, but this index already has calculated size of calculated=%s, not changing it"
258 |                                         % (index_name, size, unit, calc_size, indexes[index_name].size_per_day_in_mb))
259 |                         else:
260 |                             indexes[index_name].size_per_day_in_mb = calc_size
261 | 
262 |     ################
263 |     #
264 |     # End environment specific
265 |     #
266 |     ################
267 | 
268 |     """
269 |      Determine for the specified index the ratio of the index size compared to
270 |      the raw data volume overall (monitoring console keeps these stats)
271 |      this works just fine *if* the number of hours is valid, if there are data
272 |      parsing issues then it may look like a bucket has 6000 hours worth of
273 |      data but in reality it's just timestamp parsing problem(s)
274 |     """
275 |     def determine_recommended_bucket_size(self, index_name, num_hours_per_bucket):
276 |         # added rawSize>0 as streaming hot buckets appear to show 0 rawSize but
277 |         # a sizeOnDiskMB is measured so this results in an unusually large
278 |         # MB/hour number!
279 |         json_result = self.run_search_query(
280 |             " | dbinspect index=%s | eval hours=(endEpoch-startEpoch)/60/60 "\
281 |             " | where hours>1 AND rawSize>0 | eval sizePerHour=sizeOnDiskMB/hours " \
282 |             " | stats avg(sizePerHour) AS averageSizePerHour " \
283 |             " | eval bucket_size=averageSizePerHour*%s " \
284 |             " | fields bucket_size" % (index_name, num_hours_per_bucket))
285 | 
286 | 
287 |         logger.debug("Bucket size json_result=%s" % (json_result))
288 | 
289 |         if "results" not in json_result or len(json_result["results"]) != 1:
290 |             logger.info("No results found for index=%s with dbinspect command" % (index_name))
291 |             return float(0)
292 | 
293 |         bucket_size = float(json_result["results"][0]["bucket_size"])
294 |         logger.debug("index=%s bucket_size=%s")
295 | 
296 |         return bucket_size
297 | 
298 |     # Over a time period determine how much license was used on average per day,
299 |     # maxmimum during the period and also return number of days of license data
300 |     def determine_license_usage_per_day(self, index_name, earliest_license,
301 |                                     latest_license):
302 |         # Generic query
303 |         json_result = self.run_search_query(
304 |             "search index=_internal source=*license_usage.log sourcetype=splunkd "\
305 |             "earliest=%s latest=%s idx=%s "\
306 |             "| bin _time span=1d "\
307 |             "| stats sum(b) AS totalBytes by idx, _time "\
308 |             "| stats avg(totalBytes) AS avgBytesPerDay, "\
309 |             "  max(totalBytes) AS maxBytesPerDay, earliest(_time) AS firstSeen "\
310 |             "| eval avgMBPerDay=round(avgBytesPerDay/1024/1024), "\
311 |             "  maxMBPerDay=round(maxBytesPerDay/1024/1024), "\
312 |             "  firstSeen=(now()-firstSeen)/60/60/24 "\
313 |             "| fields avgMBPerDay, maxMBPerDay, firstSeen" \
314 |             % (earliest_license, latest_license, index_name))
315 | 
316 |         logger.debug("index=%s earliest_time=%s latest_time=%s json_result=%s" % (index_name, earliest_license, latest_license, json_result))
317 | 
318 |         if "results" not in json_result or len(json_result["results"]) != 1:
319 |             logger.info("No results found for license query for index=%s earliest_time=%s" % (index_name, earliest_license))
320 |             return 0, 0, 0
321 | 
322 |         # We just want the avgMBPerDay in license usage terms
323 |         avg_mb_per_day = json_result["results"][0]["avgMBPerDay"]
324 |         days_of_license_usage = json_result["results"][0]["firstSeen"]
325 |         max_mb_per_day = json_result["results"][0]["maxMBPerDay"]
326 | 
327 |         logger.debug("determine_license_usage_per_day recording index=%s avg_mb_per_day=%s days_of_license_usage=%s max_mb_per_day=%s" % (index_name, avg_mb_per_day, days_of_license_usage, max_mb_per_day))
328 |         # We return the average MB per day, days of license usage available and the max, all as the int type
329 |         return int(avg_mb_per_day), int(float(days_of_license_usage)), int(max_mb_per_day)
330 | 
331 |     """
332 |      There are many potential ways to determine the index compression ratio,
333 |      multiple options were considered
334 |      * Use dbinspect and view the last 14 days, there are 2 flaws with this
335 |        logic, the first is that this will show all buckets modified during
336 |        the last 14 days
337 |        The second issue is that dbinspect shows the parsed time, so any data
338 |        with time parsing will have unusual earliest/latest times so we cannot
339 |        confirm what data was added during that time period, and therefore
340 |        cannot accurately calculate how much data the last 2 weeks
341 |        of license usage used
342 |      * The second option was to use introspection data for the growth info
343 |        this works fine *except* we cannot determine how much was deleted
344 |        therefore we cannot accurately calculate the data size for the
345 |        license period we measure
346 | 
347 |        Note that the REST API also works instead of the introspection index but
348 |        this was faster and slightly easier in some ways...
349 |     """
350 |     def determine_compression_ratio(self, index_name, indexerhostnamefilter,
351 |                                     use_introspection_data):
352 |         # Introspection data is quicker but slightly less accurate for the
353 |         # earliest time, therefore it's a trade off in terms of
354 |         # which one we choose
355 |         if use_introspection_data:
356 |             json_result = self.run_search_query(
357 |                 "search earliest=-10m index=_introspection component=Indexes host=%s data.name=%s "\
358 |                 "| eval minTime=coalesce('data.bucket_dirs.cold.event_min_time', 'data.bucket_dirs.home.event_min_time'), "\
359 |                 " maxTime=coalesce('data.bucket_dirs.home.event_max_time', 'data.bucket_dirs.cold.event_max_time') "\
360 |                 "| stats first(data.total_size) AS total_size, "\
361 |                 "  first(data.total_raw_size) AS total_raw_size, "\
362 |                 "  min(minTime) AS earliest_time, "\
363 |                 "  max(maxTime) AS newest_time by data.name, host "\
364 |                 "| stats sum(total_size) AS total_size, "\
365 |                 "  sum(total_raw_size) AS total_raw_size, "\
366 |                 "  max(total_size) AS max_total_size, "\
367 |                 "  min(earliest_time) AS earliest_time, "\
368 |                 "  max(newest_time) AS newest_time by data.name "\
369 |                 "| eval ratio=total_size/total_raw_size, "\
370 |                 "  earliest_time = ceiling((now() - earliest_time) / 86400) , "\
371 |                 "  newest_time = floor((now() - newest_time) / 86400)" \
372 |                 "| eval earliest_time = if(isnotnull(earliest_time), "\
373 |                 "  earliest_time, 0) "\
374 |                 "| fields ratio, max_total_size, earliest_time, newest_time"
375 |                 % (indexerhostnamefilter, index_name))
376 |         else:
377 |             search = "| rest /services/data/indexes/%s splunk_server=%s "\
378 |                     "| join title splunk_server type=outer "\
379 |                     "[| rest /services/data/indexes-extended/%s splunk_server=%s]" % (index_name, indexerhostnamefilter, index_name, indexerhostnamefilter)
380 | 
381 |             search = search + """| eval minTime=strptime(minTime,"%Y-%m-%dT%H:%M:%S%z"), maxTime=strptime(maxTime,"%Y-%m-%dT%H:%M:%S%z")
382 |                     | stats sum(currentDBSizeMB) AS total_size,
383 |                      max(currentDBSizeMB) AS max_total_size,
384 |                      sum(total_raw_size) AS total_raw_size,
385 |                      min(minTime) AS earliest_time
386 |                      max(maxTime) AS newest_time
387 |                     | eval ratio=total_size/total_raw_size,
388 |                       earliest_time = ceiling((now() - earliest_time) / 86400),
389 |                       newest_time = floor((now() - newest_time) / 86400)
390 |                     | eval earliest_time = if(isnotnull(earliest_time), earliest_time, 0)
391 |                     | fields ratio, max_total_size, earliest_time, newest_time"""
392 |             json_result = self.run_search_query(search)
393 | 
394 |         logger.debug("determine_compression_ratio index_name=%s use_introspection_data=%s json_result=%s"% (index_name, use_introspection_data, json_result))
395 | 
396 |         # If we don't have both results we have no data for this index
397 |         if "results" not in json_result or len(json_result["results"]) != 1:
398 |             logger.error("No results='%s' from querying index=%s with indexhostnamefilter=%s" % (json_result, index_name, indexerhostnamefilter))
399 |             # Return a comp ratio of 0.5 as a guess just in case...
400 |             return float(0.5), float(0), int(0), int(0)
401 | 
402 |         if len(json_result["results"][0]) != 4:
403 |             logger.error("Unexpected results, expected 4 results and got results='%s' "\
404 |                 "from querying index=%s with indexhostnamefilter=%s"
405 |                           % (json_result, index_name, indexerhostnamefilter))
406 |             # Return a comp ratio of 0.5 as a guess just in case...
407 |             return float(0.5), float(0), int(0), int(0)
408 | 
409 |         ratio = float(json_result["results"][0]["ratio"])
410 |         maxtotalsize = float(json_result["results"][0]["max_total_size"])
411 |         earliest_time = json_result["results"][0]["earliest_time"]
412 |         newest_time = json_result["results"][0]["newest_time"]
413 | 
414 |         logger.debug("determine_compression_ratio index=%s ratio=%s maxtotalsize=%s earliest_time=%s newest_time=%s" % (index_name, ratio, maxtotalsize, earliest_time, newest_time))
415 | 
416 |         return ratio, maxtotalsize, earliest_time, newest_time
417 | 
418 |     # List only directories and not files under a particular directory
419 |     def listdirs(self, dir):
420 |         return [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
421 | 
422 |     # If copying and pasting into the shell we want $ symbols escaped and space symbols escaped
423 |     def replace_dollar_symbols(self, thestr):
424 |         if thestr.find("$") != -1 or thestr.find(" ") != -1:
425 |             logger.debug(r"Replacing $ symbol with \$ and ' ' with '\ ' in case someone copies this output into a shell command")
426 |             thestr = thestr.replace(r"$", r"\$")
427 |             thestr = thestr.replace(" ", r"\ ")
428 |         return thestr
429 | 
430 |     # Run an OS process with a timeout, this way if a command gets "stuck"
431 |     # waiting for input it is killed
432 |     # Had inconsistent results using Popen without a threaded process
433 |     # thanks to https://stackoverflow.com/questions/6893968/how-to-get-the-return-value-from-a-thread-in-python
434 |     def run_os_process(self, command, logger, timeout=20):
435 |         def target(q):
436 |             logger.debug("Begin OS process run of command=\"%s\"" % (command))
437 |             process = Popen(command, stdout=PIPE, stderr=PIPE, shell=True)
438 |             (stdoutdata, stderrdata) = process.communicate()
439 |             if process.returncode != 0:
440 |                 logger.debug("OS process exited with non-zero_code=%s \
441 |                     , for command=\"%s\"" % (process.returncode, command))
442 |                 q.put(stdoutdata)
443 |                 q.put(stderrdata)
444 |                 q.put(False)
445 |             else:
446 |                 logger.debug("OS process exited with code=0, for command=\"%s\"" % (command))
447 |                 q.put(stdoutdata)
448 |                 q.put(stderrdata)
449 |                 q.put(True)
450 | 
451 |         #Keep the arguments in the queue for use once the thread finishes
452 |         q = six.moves.queue.Queue()
453 |         thread = threading.Thread(target=target, args=(q,))
454 |         thread.daemon = False
455 |         thread.start()
456 |         thread.result_queue = q
457 |         thread.join(timeout)
458 |         if thread.is_alive():
459 |             process.terminate()
460 |             thread.join()
461 |             logger.warn("OS timeout=%s seconds while running command=\"%s\"" % (timeout, command))
462 |             return "", "timeout after %s seconds" % (timeout), False
463 |         logger.debug("Successful run of OS process command=\"%s\" within timeout=%s" % (command, timeout))
464 | 
465 |         return q.get(), q.get(), q.get()
466 | 


--------------------------------------------------------------------------------
/bin/knowledge_obj_extraction_btool.py:
--------------------------------------------------------------------------------
  1 | from subprocess import Popen, PIPE, check_output
  2 | import re
  3 | import logging
  4 | from logging.config import dictConfig
  5 | import argparse
  6 | import os
  7 | from configparser import RawConfigParser
  8 | import csv
  9 | import urllib.parse
 10 | import sys
 11 | 
 12 | ####################################################################################################
 13 | #
 14 | # knowledge_obj_extraction_btool
 15 | #
 16 | #  The idea of this script is to use the btool command to combine configuration
 17 | #  and to then output the combined config + metadata file into a new directory
 18 | #
 19 | #  A filter.csv file can be used the format can be:
 20 | #   type, name
 21 | #   savedsearches,mysavedsearch
 22 | #   views,myview
 23 | #
 24 | #  If a type is not in the filter then all are included by default (i.e. if you don't have a views line, all views are included)
 25 | #
 26 | ####################################################################################################
 27 | 
 28 | #Setup the logging, the plan was to default to INFO and change to DEBUG level but it's currently the
 29 | #opposite version of this
 30 | logging_config = dict(
 31 |     version = 1,
 32 |     formatters = {
 33 |         'f': {'format':
 34 |               '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'}
 35 |         },
 36 |     handlers = {
 37 |         'h': {'class': 'logging.StreamHandler',
 38 |               'formatter': 'f',
 39 |               'level': logging.DEBUG},
 40 |         'file': {'class' : 'logging.handlers.RotatingFileHandler',
 41 |               'filename' : '/tmp/knowledgeobj_extraction.log',
 42 |               'formatter': 'f',
 43 |               'maxBytes' :  10485760,
 44 |               'level': logging.DEBUG,
 45 |               'backupCount': 5 }
 46 |         },
 47 |     root = {
 48 |         'handlers': ['h','file'],
 49 |         'level': logging.DEBUG,
 50 |         },
 51 | )
 52 | 
 53 | dictConfig(logging_config)
 54 | 
 55 | logger = logging.getLogger()
 56 | 
 57 | #Create the argument parser
 58 | parser = argparse.ArgumentParser(description='Print splunk configuration from savedsearches or other files to standard out, ignoring system default config')
 59 | parser.add_argument('-splunkhome', help='Directory of SPLUNK_HOME so that bin/splunk btool can be run or etc/apps/metadata can be parsed', required=False, default="/opt/splunk")
 60 | parser.add_argument('-type', help='Type of knowledge object to extract', required=True, choices=['app','collections','commands','datamodels','eventtypes','lookups','macros','panels','props','savedsearches','tags','times','transforms','views','workflow_actions', 'all'])
 61 | parser.add_argument('-app', help='Splunk app to extract the knowledge object from', required=True)
 62 | parser.add_argument('-filterCSV', help='A CSV list to filter the names of the objects, lines should be type/name, or just the name if filtering without the "all" type', required=False)
 63 | parser.add_argument('-debugMode', help='(optional) turn on DEBUG level logging (defaults to INFO)', action='store_true')
 64 | parser.add_argument('-outputDir', help='Directory to output files to', required=True)
 65 | parser.add_argument('-doNotMergeDefault', help='(optional) if enabled then default/savedsearches.conf is separate from local/savedsearches.conf (otherwise only local exists). If using this option you may want to use knowledge_obj_extraction_conffiles instead...', action='store_true')
 66 | 
 67 | args = parser.parse_args()
 68 | 
 69 | #If we want debugMode, keep the debug logging, otherwise drop back to INFO level
 70 | if not args.debugMode:
 71 |     logging.getLogger().setLevel(logging.INFO)
 72 | 
 73 | logger.info("knowledge object extraction starts")
 74 | 
 75 | # Run the btool command and parse the output
 76 | def parse_btool_output(splunkhome, splunk_type, app, do_not_merge_default, filter_list, filter_type):
 77 |     # these types do not have .conf files as such
 78 |     if splunk_type == "lookups" or splunk_type == "views":
 79 |         logger.info("Skipping views/lookups in btool, this is only required for metadata")
 80 |         return [], []
 81 | 
 82 |     # Run python btool and read the output, use debug switch to obtain
 83 |     # file names
 84 |     logger.debug(f"Running {splunkhome}/bin/splunk btool {splunk_type} --list --debug")
 85 |     output = check_output([f"{splunkhome}/bin/splunk", "btool", splunk_type,
 86 |                           "list", "--debug", f"--app={app}"]).decode('utf-8')
 87 |     output = output.split("\n")
 88 | 
 89 |     # if we end with a newline remove it from the list
 90 |     if output[-1] == "":
 91 |         output.pop()
 92 | 
 93 |     # Work line by line on the btool output
 94 |     string_res_list = []
 95 |     string_res_list_default = []
 96 |     # backslashes are stripped by btool but required for multi-line searches or descriptions
 97 |     # this implementation is complicated, the knwoled_obj_extraction_conffiles is much more straightforward
 98 |     # however it cannot merge config...
 99 |     backslash_required = True
100 |     last_conf_file = ""
101 |     stanza_printed = False
102 | 
103 |     for line in output:
104 |         logger.debug(f"Working with line={line}")
105 | 
106 |         # Search for the [<indexname>]
107 |         # Split the string /opt/splunk/etc/slave-apps/_cluster/local/indexes.conf                [_internal]
108 |         # into 2 pieces
109 |         regex = re.compile(r"^(/[^ ]+) +(.*)")
110 |         result = regex.match(line)
111 |         if not result:
112 |             logger.debug(f"No match on line={line} assuming the whole line is the result...")
113 |             string_res = line
114 |             conf_file = last_conf_file
115 |         else:
116 |             conf_file = result.group(1)
117 |             last_conf_file = conf_file
118 |             string_res = result.group(2)
119 | 
120 |         # we may or may not be outputting a default config file
121 |         if do_not_merge_default and conf_file.find("/default/") != -1:
122 |             default_file = True
123 |         else:
124 |             default_file = False
125 | 
126 |         # vsid's are legacy configuration related to viewstates, we don't need them
127 |         if string_res.find("vsid") == 0:
128 |             logger.info(f"skipping vsid line, string_res={string_res} line={line}")
129 |             continue
130 | 
131 |         # it's a stanza line [mysearch]
132 |         if len(string_res) > 0 and string_res[0] == "[":
133 |             stanza_name = string_res[1:len(string_res)-1]
134 |             logger.debug(f"working with stanza={stanza_name}")
135 | 
136 |             # check the filter list and if we find a match don't add empty lines in
137 |             filter_lookup = f"{splunk_type}_{stanza_name}"
138 |             if len(filter_list) > 0 and splunk_type in filter_type and not filter_lookup in filter_list:
139 |                 logger.info(f"stanza={stanza_name} with filter={filter_lookup} not found in filter list, skipping")
140 |                 continue
141 | 
142 |             if default_file:
143 |                 string_res_list_default.append("")
144 |                 stanza_printed = True
145 |             else:
146 |                 string_res_list.append("")
147 |                 stanza_printed = False
148 |                 if len(string_res_list_default) > 0 and string_res_list_default[-1] != "":
149 |                     string_res_list_default.append("")
150 | 
151 |         # if we are in the stanza name that is filtered, don't output the lines from it
152 |         filter_lookup = f"{splunk_type}_{stanza_name}"
153 |         if len(filter_list) > 0 and splunk_type in filter_type and not filter_lookup in filter_list:
154 |             logger.info(f"stanza={stanza_name} with filter={filter_lookup} not found in filter list, skipping")
155 |             continue
156 | 
157 |         if line == "":
158 |             # blank lines can be kept with the \ on them
159 |             if line.find(splunkhome) != 0:
160 |                 if default_file:
161 |                     if not stanza_printed:
162 |                         string_res_list_default.append(f"[{stanza_name}]")
163 |                         stanza_printed = True
164 |                     string_res_list_default.append(string_res_list_default.pop() + "\\")
165 |                 else:
166 |                     string_res_list.append(string_res_list.pop() + "\\")
167 |                 backslash_required = False
168 | 
169 |             if default_file:
170 |                 if not stanza_printed:
171 |                     string_res_list_default.append(f"[{stanza_name}]")
172 |                     stanza_printed = True
173 |                 string_res_list_default.append("\\")
174 |             else:
175 |                 string_res_list.append("\\")
176 |             continue
177 |         logger.debug(f"conf_file={conf_file}, string_res={string_res}")
178 | 
179 |         if backslash_required and line.find(splunkhome) != 0:
180 |             if default_file:
181 |                 if not stanza_printed:
182 |                     string_res_list_default.append(f"[{stanza_name}]")
183 |                     stanza_printed = True
184 |                 string_res_list_default.append(string_res_list_default.pop() + "\\")
185 |             else:
186 |                 string_res_list.append(string_res_list.pop() + "\\")
187 |             backslash_required = False
188 | 
189 |         if line.find(splunkhome) != 0:
190 |             backslash_required = True
191 | 
192 |         if default_file:
193 |             if not stanza_printed:
194 |                 string_res_list_default.append(f"[{stanza_name}]")
195 |                 stanza_printed = True
196 |             string_res_list_default.append(string_res)
197 |         else:
198 |             string_res_list.append(string_res)
199 |     return string_res_list_default, string_res_list
200 | 
201 | # all is the alias for just about any configuration we want to retrieve from the metadata/config excluding viewstates
202 | if args.type=='all':
203 |     splunk_type = ['app','collections','commands','datamodels','eventtypes','lookups','macros','panels','props','savedsearches','tags','times','transforms','views','workflow_actions','nav']
204 | else:
205 |     splunk_type = [ args.type ]
206 | 
207 | # Create the directories we will need
208 | if not os.path.isdir(args.outputDir):
209 |     os.mkdir(args.outputDir)
210 | 
211 | local_dir = f'{args.outputDir}/local'
212 | if not os.path.isdir(local_dir):
213 |     os.mkdir(local_dir)
214 | 
215 | metadata_dir = f'{args.outputDir}/metadata'
216 | if not os.path.isdir(metadata_dir):
217 |     os.mkdir(metadata_dir)
218 | 
219 | default_dir = f'{args.outputDir}/default'
220 | if args.doNotMergeDefault and not os.path.isdir(default_dir):
221 |     os.mkdir(default_dir)
222 | 
223 | ################
224 | #
225 | # Setup filtering
226 | #
227 | ################
228 | filter_list = []
229 | filter_list_encoded = []
230 | filter_type = {}
231 | if args.filterCSV:
232 |     with open(args.filterCSV, newline='') as csvfile:
233 |         reader = csv.DictReader(csvfile)
234 |         for row in reader:
235 |             if 'type' in row:
236 |                 logger.debug(f"Reading type={row['type']}, name={row['name']} from file={args.filterCSV}")
237 |                 filter_list.append(f"{row['type']}_{row['name']}")
238 |                 # the encoded version is for the metadata file, metadata url encodes the stanza names
239 |                 encoded_name = urllib.parse.quote(row['name'])
240 |                 filter_list_encoded.append(f"{row['type']}/{encoded_name}")
241 |                 logger.debug(f"Adding {row['type']}/{encoded_name} to the encoded list")
242 |                 # add to dict for later filtering
243 |                 filter_type[row['type']] = None
244 |             else:
245 |                 if args.type=='all':
246 |                     logger.error("args.type=all however CSV does not have a type and name column, these are required to filter with all")
247 |                     sys.exit(1)
248 |                 logger.debug(f"Reading {args.type}_{row['name']} from file={args.filterCSV}")
249 |                 filter_list.append(f"{args.type}_{row['name']}")
250 |                 # the encoded version is for the metadata file, metadata url encodes the stanza names
251 |                 encoded_name = urllib.parse.quote(f"{args.type}_{row['name']}")
252 |                 filter_list_encoded.append(encoded_name)
253 |                 filter_list_encoded.append(f"{args.type}/{encoded_name}")
254 |                 logger.debug(f"Adding {args.type}/{encoded_name} to the encoded list")
255 |                 # add to dict for later filtering
256 |                 filter_type[args.type] = None
257 | 
258 | # keep a list of filtered types for when we parse the config
259 | filter_type = list(filter_type.keys())
260 | 
261 | # each splunk_type is a separate configuration file
262 | for a_type in splunk_type:
263 |     string_res_list_default, string_res_list = parse_btool_output(args.splunkhome, a_type, args.app, args.doNotMergeDefault, filter_list, filter_type)
264 | 
265 |     if len(string_res_list) > 0:
266 |         with open(f'{local_dir}/{a_type}.conf', 'w') as f:
267 |             for line in string_res_list:
268 |                 f.write(f"{line}\n")
269 | 
270 |     if args.doNotMergeDefault:
271 |         if len(string_res_list_default) > 0:
272 |             with open(f'{default_dir}/{a_type}.conf', 'w') as f:
273 |                 for line in string_res_list_default:
274 |                     f.write(f"{line}\n")
275 | 
276 | logger.info("knowledge object extraction ends")
277 | 
278 | logger.info("Metadata extraction begins")
279 | 
280 | # remove the [] stanza entry and leave all others
281 | def readline_generator(fp, splunk_type, filter_list, filter_type):
282 |     line = fp.readline()
283 |     skip = False
284 |     stanza = ""
285 |     while line:
286 |         if line[0] == "[":
287 |             stanza = line[1:len(line)-1]
288 |             if stanza.find(f"{splunk_type}/") == 0:
289 |                 logger.debug(f"stanza matches type expected, type={splunk_type} stanza={stanza} line={line}")
290 |                 skip = False
291 |             else:
292 |                 skip = True
293 | 
294 |         if stanza != "":
295 |             filter_name = stanza[:-1]
296 |         else:
297 |             filter_name = ""
298 | 
299 |         # the metadata parser cannot handle the [] empty stanza, so don't output the line
300 |         if stanza == "]":
301 |             skip = True
302 |         elif len(filter_list) > 0 and splunk_type in filter_type and not filter_name in filter_list:
303 |             logger.info(f"type={splunk_type} stanza={stanza} line={line} however it was not in the filtered list so skipping this entry")
304 |             skip = True
305 |         if not skip:
306 |             logger.debug(f"readline_gen={line}")
307 |             yield line
308 |         line = fp.readline()
309 | 
310 | configur = RawConfigParser(strict=False)
311 | 
312 | # if we are included default.metadata files...
313 | if args.doNotMergeDefault:
314 |     configur_default = RawConfigParser(strict=False)
315 |     # this is not efficient but this script is only used a few times...
316 |     for a_type in splunk_type:
317 |         default_meta_file = f"{args.splunkhome}/etc/apps/{args.app}/metadata/default.meta"
318 |         if not os.path.isfile(default_meta_file):
319 |             logger.warning(f"file={default_meta_file} does not exist skipping this file")
320 |             continue
321 |         # read the default.meta first
322 |         logger.debug(f"Opening file {default_meta_file} for parsing")
323 |         fp = open(default_meta_file, 'r')
324 |         configur_default.read_file(readline_generator(fp, a_type, filter_list_encoded, filter_type))
325 |         fp.close()
326 | else:
327 |     # this is not efficient but this script is only used a few times...
328 |     for a_type in splunk_type:
329 |         # read the default.meta first
330 |         default_meta_file = f"{args.splunkhome}/etc/apps/{args.app}/metadata/default.meta"
331 |         if not os.path.isfile(default_meta_file):
332 |             logger.warning(f"file={default_meta_file} does not exist skipping this file")
333 |             continue
334 |         logger.debug(f"Opening file {default_meta_file} for parsing")
335 |         fp = open(default_meta_file, 'r')
336 |         configur.read_file(readline_generator(fp, a_type, filter_list_encoded, filter_type))
337 |         fp.close()
338 | 
339 | # for the local.meta file
340 | for a_type in splunk_type:
341 |     # let local.meta override default.meta where appropriate
342 |     local_meta_file = f"{args.splunkhome}/etc/apps/{args.app}/metadata/local.meta"
343 |     if not os.path.isfile(local_meta_file):
344 |         logger.warning(f"file={local_meta_file} does not exist skipping this file")
345 |         continue
346 |     logger.debug(f"Opening file {local_meta_file} for parsing")
347 |     fp = open(local_meta_file, 'r')
348 |     configur.read_file(readline_generator(fp, a_type, filter_list_encoded, filter_type))
349 |     fp.close()
350 | 
351 | # write out the new (potentially) combined metadata file for migration
352 | with open(f'{metadata_dir}/local.meta', 'w') as f:
353 |   configur.write(f)
354 | 
355 | if args.doNotMergeDefault:
356 |     with open(f'{metadata_dir}/default.meta', 'w') as f:
357 |         configur_default.write(f)
358 | 
359 | # if we have no data we just created an empty file so remove it
360 | if os.path.isfile(f'{metadata_dir}/local.meta') and os.stat(f'{metadata_dir}/local.meta').st_size == 0:
361 |     os.remove(f'{metadata_dir}/local.meta')
362 | if os.path.isfile(f'{metadata_dir}/default.meta') and os.stat(f'{metadata_dir}/default.meta').st_size == 0:
363 |     os.remove(f'{metadata_dir}/default.meta')
364 | 
365 | logger.info("Metadata extraction ends, please note you must add the [] stanzas back in manually (or use the knowledge_obj_extraction_conffiles.py version)")


--------------------------------------------------------------------------------
/bin/knowledge_obj_extraction_conffiles.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from logging.config import dictConfig
  3 | import argparse
  4 | import os
  5 | import csv
  6 | import urllib.parse
  7 | import sys
  8 | import shutil
  9 | 
 10 | ####################################################################################################
 11 | #
 12 | # knowledge_obj_extraction_conffiles
 13 | #
 14 | #  The idea of this script is to extract configuration from .conf files and metadata files
 15 | #  and output the result to a new directory
 16 | #
 17 | #  A filter.csv file can be used the format can be:
 18 | #   type, name
 19 | #   savedsearches,mysavedsearch
 20 | #   views,myview
 21 | #
 22 | #  If a type is not in the filter then all are included by default (i.e. if you don't have a views line, all views are included)
 23 | #
 24 | ####################################################################################################
 25 | 
 26 | #Setup the logging, the plan was to default to INFO and change to DEBUG level but it's currently the
 27 | #opposite version of this
 28 | logging_config = dict(
 29 |     version = 1,
 30 |     formatters = {
 31 |         'f': {'format':
 32 |               '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'}
 33 |         },
 34 |     handlers = {
 35 |         'h': {'class': 'logging.StreamHandler',
 36 |               'formatter': 'f',
 37 |               'level': logging.DEBUG},
 38 |         'file': {'class' : 'logging.handlers.RotatingFileHandler',
 39 |               'filename' : '/tmp/knowledgeobj_extraction.log',
 40 |               'formatter': 'f',
 41 |               'maxBytes' :  10485760,
 42 |               'level': logging.DEBUG,
 43 |               'backupCount': 5 }
 44 |         },
 45 |     root = {
 46 |         'handlers': ['h','file'],
 47 |         'level': logging.DEBUG,
 48 |         },
 49 | )
 50 | 
 51 | dictConfig(logging_config)
 52 | 
 53 | logger = logging.getLogger()
 54 | 
 55 | #Create the argument parser
 56 | parser = argparse.ArgumentParser(description='Print splunk configuration from savedsearches or other files to standard out, ignoring system default config')
 57 | parser.add_argument('-splunkhome', help='Directory of SPLUNK_HOME so that bin/splunk btool can be run or etc/apps/metadata can be parsed', required=False, default="/opt/splunk")
 58 | parser.add_argument('-type', help='Type of knowledge object to extract', required=True, choices=['app','collections','commands','datamodels','eventtypes','lookups','macros','panels','props','savedsearches','tags','times','transforms','views','workflow_actions', 'all'])
 59 | parser.add_argument('-app', help='Splunk app to extract the knowledge object from', required=True)
 60 | parser.add_argument('-filterCSV', help='A CSV list to filter the names of the objects, lines should be type/name, or just the name if filtering without the "all" type', required=False)
 61 | parser.add_argument('-debugMode', help='(optional) turn on DEBUG level logging (defaults to INFO)', action='store_true')
 62 | parser.add_argument('-outputDir', help='Directory to output files to', required=True)
 63 | parser.add_argument('-migrateUserDirs', help='Copy the $SPLUNK_HOME/etc/users/<app> as well into outputDir?', action='store_true')
 64 | parser.add_argument('-outputAppName', help='Specify the app to be created in $outputDir/users/<appname>/config, defaults to the -app name passed in if not specified')
 65 | 
 66 | args = parser.parse_args()
 67 | 
 68 | #If we want debugMode, keep the debug logging, otherwise drop back to INFO level
 69 | if not args.debugMode:
 70 |     logging.getLogger().setLevel(logging.INFO)
 71 | 
 72 | logger.info("knowledge object extraction starts")
 73 | 
 74 | # parse a single config file and extract any filtered lines
 75 | def parse_single_conf_file(conf_file, app, splunk_type, filter_type, filter_list):
 76 |     string_res_list = []
 77 | 
 78 |     # if the file does not exist in this app we just return an empty list
 79 |     if not os.path.isfile(conf_file):
 80 |         logger.info(f"conf_file={conf_file} not found in app={app}")
 81 |         return string_res_list
 82 | 
 83 |     with open(conf_file, "r") as fp:
 84 |         stanza_name = ""
 85 |         # read through each line of the file
 86 |         for line in fp:
 87 |             logger.debug(f"Working with line={line} file={conf_file}")
 88 |             # vsid's are legacy configuration related to viewstates, we don't need them
 89 |             if line.find("vsid") == 0:
 90 |                 logger.info(f"skipping vsid line, line={line}")
 91 |                 continue
 92 | 
 93 |             # it's a stanza line [mysearch]
 94 |             if len(line) > 0 and line[0] == "[":
 95 |                 stanza_name = line[1:len(line)-2]
 96 |                 logger.debug(f"working with stanza={stanza_name}")
 97 | 
 98 |             # if we are in the stanza name that is filtered, don't output the lines from it
 99 |             # we filter only if we have >0 entries for the said filter_type (i.e. we might filter savedsearches but not views/lookups/et cetera)
100 |             filter_lookup = f"{splunk_type}_{stanza_name}"
101 |             if len(filter_list) > 0 and splunk_type in filter_type and not filter_lookup in filter_list:
102 |                 logger.info(f"stanza={stanza_name} with filter={filter_lookup} not found in filter list, skipping")
103 |                 continue
104 | 
105 |             string_res_list.append(line)
106 |     return string_res_list
107 | 
108 | # parse all conf files for an app
109 | def parse_conf_files(app_dir, splunk_type, app, filter_list, filter_type):
110 |     # these types do not have .conf files as such
111 |     if splunk_type in ["lookups", "views", "appserver", "static"]:
112 |         logger.info("Skipping views/lookups/appserver/static in conf file reading, this is only required for metadata or file copying")
113 |         return [], []
114 | 
115 |     # attempt to parse the default/<type>.conf files
116 |     conf_file = f"{app_dir}/default/{splunk_type}.conf"
117 |     string_res_list_default = parse_single_conf_file(conf_file, app, splunk_type, filter_type, filter_list)
118 | 
119 |     # attempt to parse the local/<type>.conf files
120 |     conf_file = f"{app_dir}/local/{splunk_type}.conf"
121 |     string_res_list = parse_single_conf_file(conf_file, app, splunk_type, filter_type, filter_list)
122 | 
123 |     return string_res_list_default, string_res_list
124 | 
125 | # parse metadata files, these are slightly different to conf files
126 | def parse_metadata_file(metadata_file, app, splunk_type, filter_type, filter_list):
127 |     string_res_list = []
128 |     if not os.path.isfile(metadata_file):
129 |         logger.info(f"metadata_file={metadata_file} not found in app={app}")
130 |         return string_res_list
131 | 
132 |     with open(metadata_file, "r") as fp:
133 |         for line in fp:
134 |             logger.debug(f"Working with line={line}")
135 |             if len(line.strip()) == 0 or line.find("#") == 0:
136 |                 continue            
137 |             elif len(line) > 0 and line[0] == "[":
138 |                 stanza_name = line[1:len(line)-2]
139 |                 logger.debug(f"working with stanza={stanza_name}")
140 | 
141 |                 # we may have [props] or we might have [savedsearches/mysearch]
142 |                 if stanza_name.find("/") != -1:
143 |                     stanza_type = stanza_name[0:stanza_name.find("/")]
144 |                 else:
145 |                     stanza_type = stanza_name
146 |                 logger.debug(f"stanza_type={stanza_type}")
147 | 
148 |             if not stanza_type in splunk_type and not stanza_name=="":
149 |                 logger.info(f"skipping splunk_type={splunk_type} stanza_type={stanza_type}")
150 |                 continue
151 |             # check the filter list
152 |             elif len(filter_list) > 0 and stanza_type in filter_type and not stanza_name in filter_list:
153 |                 logger.info(f"stanza={stanza_name} not found in filter list, skipping")
154 |                 continue
155 | 
156 |             # hack to change system to none / prevent global objects unless requested
157 |             if line.find("export = system") == 0:
158 |                 line="export = none"
159 | 
160 |             string_res_list.append(line)
161 |     return string_res_list
162 | 
163 | # all is the alias for just about any configuration we want to retrieve from the metadata/config excluding viewstates
164 | if args.type=='all':
165 |     splunk_type = ['app','collections','commands','datamodels','eventtypes','lookups','macros','panels','props','savedsearches','tags','times','transforms','views','workflow_actions','nav','appserver','static']
166 | else:
167 |     splunk_type = [ args.type ]
168 | 
169 | local_dir = f'{args.outputDir}/apps/local'
170 | if not os.path.isdir(local_dir):
171 |     os.makedirs(local_dir, exist_ok=True)
172 | 
173 | metadata_dir = f'{args.outputDir}/apps/metadata'
174 | if not os.path.isdir(metadata_dir):
175 |     os.mkdir(metadata_dir)
176 | 
177 | default_dir = f'{args.outputDir}/apps/default'
178 | if not os.path.isdir(default_dir):
179 |     os.mkdir(default_dir)
180 | 
181 | lookups_dir = f'{args.outputDir}/apps/lookups'
182 | 
183 | ################
184 | #
185 | # Setup filtering
186 | #
187 | ################
188 | filter_list = []
189 | filter_list_encoded = []
190 | filter_type = {}
191 | if args.filterCSV:
192 |     with open(args.filterCSV, newline='') as csvfile:
193 |         reader = csv.DictReader(csvfile)
194 |         for row in reader:
195 |             if 'type' in row:
196 |                 logger.debug(f"Reading type={row['type']}, name={row['name']} from file={args.filterCSV}")
197 |                 filter_list.append(f"{row['type']}_{row['name']}")
198 |                 # the encoded version is for the metadata file, metadata url encodes the stanza names
199 |                 encoded_name = urllib.parse.quote(row['name'])
200 |                 filter_list_encoded.append(f"{row['type']}/{encoded_name}")
201 |                 logger.debug(f"Adding {row['type']}/{encoded_name} to the encoded list")
202 |                 # add to dict for later filtering
203 |                 filter_type[row['type']] = None
204 |             else:
205 |                 if args.type=='all':
206 |                     logger.error("args.type=all however CSV does not have a type and name column, these are required to filter with all")
207 |                     sys.exit(1)
208 |                 logger.debug(f"Reading {args.type}_{row['name']} from file={args.filterCSV}")
209 |                 filter_list.append(f"{args.type}_{row['name']}")
210 |                 # the encoded version is for the metadata file, metadata url encodes the stanza names
211 |                 encoded_name = urllib.parse.quote(row['name'])
212 |                 filter_list_encoded.append(f"{args.type}/{encoded_name}")
213 |                 logger.debug(f"Adding {args.type}/{encoded_name} to the encoded list")
214 |                 filter_type[args.type] = None
215 | 
216 | # keep a list of filtered types for when we parse the config
217 | filter_type = list(filter_type.keys())
218 | 
219 | user_dirs_to_check = []
220 | if args.migrateUserDirs:
221 |     user_dirs = os.listdir(f"{args.splunkhome}/etc/users")
222 |     logger.debug(f"Checking user_dirs={user_dirs}")
223 |     for user in user_dirs:
224 |         a_user_dir = f"{args.splunkhome}/etc/users/{user}"
225 |         if not os.path.isdir(a_user_dir):
226 |             continue
227 |         if args.app in os.listdir(a_user_dir):
228 |             user_dir_found = f"{a_user_dir}/{args.app}"
229 |             logger.debug(f"Appending directory user_dir_found={user_dir_found}")
230 |             user_dirs_to_check.append(user_dir_found)
231 | 
232 | # each splunk_type is a separate configuration file
233 | for a_type in splunk_type:
234 |     app_dir = f"{args.splunkhome}/etc/apps/{args.app}"
235 |     string_res_list_default, string_res_list = parse_conf_files(app_dir, a_type, args.app, filter_list, filter_type)
236 | 
237 |     if len(string_res_list) > 0:
238 |         with open(f'{local_dir}/{a_type}.conf', 'w') as f:
239 |             for line in string_res_list:
240 |                 f.write(f"{line}")
241 | 
242 |     if len(string_res_list_default) > 0:
243 |         with open(f'{default_dir}/{a_type}.conf', 'w') as f:
244 |             for line in string_res_list_default:
245 |                 f.write(f"{line}")
246 | 
247 |     # deal with user-level objects too
248 |     if args.migrateUserDirs:
249 |         for a_dir in user_dirs_to_check:
250 |             string_res_list_default, string_res_list = parse_conf_files(a_dir, a_type, args.app, filter_list, filter_type)
251 |             if len(string_res_list) > 0:
252 |                 user_dir = os.path.basename(os.path.dirname(a_dir))
253 |                 if args.outputAppName:
254 |                     output_app_name = args.outputAppName
255 |                 else:
256 |                     output_app_name = args.app
257 |                 dest_dir = f"{args.outputDir}/users/{user_dir}/{output_app_name}"
258 |                 os.makedirs(f"{dest_dir}/local", exist_ok=True)
259 |                 with open(f'{dest_dir}/local/{a_type}.conf', 'w') as f:
260 |                     for line in string_res_list:
261 |                         f.write(f"{line}")
262 | 
263 | logger.info("knowledge object extraction ends")
264 | 
265 | logger.info("Metadata extraction begins")
266 | 
267 | # there is only 1 default metadata file for all types of config
268 | default_meta_file = f"{args.splunkhome}/etc/apps/{args.app}/metadata/default.meta"
269 | if not os.path.isfile(default_meta_file):
270 |     logger.warning(f"file={default_meta_file} does not exist skipping this file")
271 | else:
272 |     string_res_list = parse_metadata_file(default_meta_file, args.app, splunk_type, filter_type, filter_list_encoded)
273 |     if len(string_res_list) > 0:
274 |         with open(f'{metadata_dir}/default.meta', 'w') as f:
275 |             for line in string_res_list:
276 |                 f.write(f"{line}")
277 | 
278 | 
279 | # there is only 1 local metadata file for all types of config
280 | local_meta_file = f"{args.splunkhome}/etc/apps/{args.app}/metadata/local.meta"
281 | if not os.path.isfile(local_meta_file):
282 |     logger.warning(f"file={local_meta_file} does not exist skipping this file")
283 | else:
284 |     string_res_list = parse_metadata_file(local_meta_file, args.app, splunk_type, filter_type, filter_list_encoded)
285 |     if len(string_res_list) > 0:
286 |         with open(f'{metadata_dir}/local.meta', 'w') as f:
287 |             for line in string_res_list:
288 |                 f.write(f"{line}")
289 | 
290 | # if we are migrating user directories we may need user metadata too
291 | if args.migrateUserDirs:
292 |     for a_dir in user_dirs_to_check:
293 |         user_dir = os.path.basename(os.path.dirname(a_dir))
294 |         string_res_list = parse_metadata_file(f"{a_dir}/metadata/local.meta", args.app, splunk_type, filter_type, filter_list_encoded)
295 | 
296 |         if len(string_res_list) > 0:
297 |             if args.outputAppName:
298 |                 output_app_name = args.outputAppName
299 |             else:
300 |                 output_app_name = args.app
301 |             metadata_dir = f"{args.outputDir}/users/{user_dir}/{output_app_name}/metadata"
302 |             os.makedirs(metadata_dir, exist_ok=True)
303 |             with open(f"{metadata_dir}/local.meta", 'w') as f:
304 |                 for line in string_res_list:
305 |                     f.write(f"{line}")
306 | 
307 | logger.info("Metadata extraction ends")
308 | 
309 | ################
310 | #
311 | # Copy other files (i.e. files that exist on the filesystem
312 | #
313 | ################
314 | default_src_dir = f"{args.splunkhome}/etc/apps/{args.app}/default"
315 | local_src_dir = f"{args.splunkhome}/etc/apps/{args.app}/local"
316 | lookups_src_dir = f"{args.splunkhome}/etc/apps/{args.app}/lookups"
317 | 
318 | def find_and_copy_files(dir, dest_dir, type, extension, filter_type, filter_list):
319 |     if os.path.isdir(dir):
320 |         files = os.listdir(dir)
321 |         for file_name in files:
322 |             lookup = f"{type}_{file_name[0:file_name.find(extension)]}"
323 |             if len(filter_list) > 0 and type in filter_type and not lookup in filter_list:
324 |                 logger.info(f"dir={dir} file={file_name} lookup={lookup} did not match filter list, skipping")
325 |             else:
326 |                 logger.debug(f"copying file dir={dir} file={file_name} lookup={lookup} to dest_dir={dest_dir}")
327 |                 if not os.path.isdir(dest_dir):
328 |                     # create all required irectories
329 |                     os.makedirs(dest_dir, exist_ok=True)
330 |                 if not os.path.isfile(f"{dir}/{file_name}"):
331 |                     logger.info(f"dir={dir} file={file_name} is not a file, skipping")
332 |                     continue
333 |                 shutil.copy2(f"{dir}/{file_name}", dest_dir)
334 | 
335 | for type in splunk_type:
336 |     if type == "datamodels":
337 |         datamodels_src = f"{default_src_dir}/data/models"
338 |         datamodels_dest = f"{default_dir}/data/models"
339 |         find_and_copy_files(datamodels_src, datamodels_dest, "datamodels", ".json", filter_type, filter_list)
340 |         datamodels_src = f"{local_src_dir}/data/models"
341 |         datamodels_dest = f"{local_dir}/data/models"
342 |         find_and_copy_files(datamodels_src, datamodels_dest, "datamodels", ".json", filter_type, filter_list)
343 |     elif type == "lookups":
344 |         find_and_copy_files(lookups_src_dir, lookups_dir, "lookups", ".csv", filter_type, filter_list)
345 |     elif type == "panels":
346 |         panels_src = f"{default_src_dir}/data/ui/panels"
347 |         panels_dst = f"{default_dir}/data/ui/panels"
348 |         find_and_copy_files(panels_src, panels_dst, "panels", ".xml", filter_type, filter_list)
349 |         panels_src = f"{local_src_dir}/data/ui/panels"
350 |         panels_dst = f"{local_dir}/data/ui/panels"
351 |         find_and_copy_files(panels_src, panels_dst, "panels", ".xml", filter_type, filter_list)
352 |     elif type=="views":
353 |         views_src = f"{default_src_dir}/data/ui/views"
354 |         views_dst = f"{default_dir}/data/ui/views"
355 |         find_and_copy_files(views_src, views_dst, "views", ".xml", filter_type, filter_list)
356 |         views_src = f"{local_src_dir}/data/ui/views"
357 |         views_dst = f"{local_dir}/data/ui/views"
358 |         find_and_copy_files(views_src, views_dst, "views", ".xml", filter_type, filter_list)
359 |     elif type=="static":
360 |         static_src = f"{args.splunkhome}/etc/apps/{args.app}/static"
361 |         static_dst = f"{args.outputDir}/apps/static"
362 |         find_and_copy_files(static_src, static_dst, "static", ".png", filter_type, filter_list)
363 |     elif type=="appserver":
364 |         appserver_src = f"{args.splunkhome}/etc/apps/{args.app}/appserver"
365 |         appserver_dst = f"{args.outputDir}/apps/appserver"
366 |         shutil.copytree(appserver_src, appserver_dst)
367 |     elif type=="nav":
368 |         default_nav = f"{default_src_dir}/data/ui/nav/default.xml"
369 |         if os.path.isfile(default_nav):
370 |             nav_dir = f"{default_dir}/data/ui/nav"
371 |             if not os.path.isdir(nav_dir):
372 |                 os.makedirs(nav_dir, exist_ok=True)
373 |             shutil.copy2(default_nav, nav_dir)
374 |         local_nav = f"{local_src_dir}/data/ui/views/default.xml"
375 |         if os.path.isfile(local_nav):
376 |             nav_dir = f"{local_dir}/data/ui/nav"
377 |             if not os.path.isdir(nav_dir):
378 |                 os.makedirs(nav_dir, exist_ok=True)
379 |             shutil.copy2(local_nav, nav_dir)
380 | 
381 | # if we are migrating user directories we may need other files as well
382 | if args.migrateUserDirs:
383 |     for a_dir in user_dirs_to_check:
384 |         user_dir = os.path.basename(os.path.dirname(a_dir))
385 | 
386 |         if args.outputAppName:
387 |             output_app_name = args.outputAppName
388 |         else:
389 |             output_app_name = args.app
390 | 
391 |         for type in splunk_type:
392 |             if type == "datamodels":
393 |                 datamodels_src = f"{a_dir}/local/data/models"
394 |                 datamodels_dest = f"{args.outputDir}/users/{user_dir}/{output_app_name}/local/data/models"
395 |                 logger.debug(f"datamodels_src={datamodels_src} datamodels_dest={datamodels_dest}")
396 |                 find_and_copy_files(datamodels_src, datamodels_dest, "datamodels", ".json", filter_type, filter_list)
397 |             elif type == "lookups":
398 |                 lookups_src = f"{a_dir}/lookups"
399 |                 lookups_dest = f"{args.outputDir}/users/{user_dir}/{output_app_name}/lookups"
400 |                 logger.debug(f"src={lookups_src} dest={lookups_dest}")
401 |                 find_and_copy_files(lookups_src, lookups_dest, "lookups", ".csv", filter_type, filter_list)
402 |             elif type == "panels":
403 |                 panels_src = f"{a_dir}/local/data/ui/panels"
404 |                 panels_dst = f"{args.outputDir}/users/{user_dir}/{output_app_name}/local/data/ui/panels"
405 |                 logger.debug(f"src={panels_src} dest={panels_dst}")
406 |                 find_and_copy_files(panels_src, panels_dst, "panels", ".xml", filter_type, filter_list)
407 |             elif type=="views":
408 |                 views_src = f"{a_dir}/local/data/ui/views"
409 |                 views_dst = f"{args.outputDir}/users/{user_dir}/{output_app_name}/local/data/ui/views"
410 |                 logger.debug(f"src={views_src} dest={views_dst}")
411 |                 find_and_copy_files(views_src, views_dst, "views", ".xml", filter_type, filter_list)
412 | 


--------------------------------------------------------------------------------
/bin/reownItems.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | if [ $# -lt 2 ]; then
 4 |   echo "Please pass in the username to work on"
 5 |   echo "Please pass in the 2nd username to own the items to"
 6 |   echo "Please pass in a 3rd argument to actually run otherwise this runs in debug mode"
 7 | fi
 8 | 
 9 | debugMode="true"
10 | if [ $# -eq 3 ]; then
11 |   debugMode="false"
12 | fi
13 | 
14 | username="$1"
15 | newOwner="$2"
16 | grep -R $username /opt/splunk/etc/* | grep -v "\.js" | grep -E "\.meta|\.conf" | cut -d ":" -f1 | sort | uniq > /tmp/allFilesFoundToReown.txt
17 | for aFile in `cat /tmp/allFilesFoundToReown.txt`; do
18 |   echo $aFile
19 |   #Hardcoding because /opt/splunk/etc/apps/<appName>
20 |   type=`echo $aFile | cut -d "/" -f 5`
21 | 
22 |   app=""
23 |   if [ "$type" = "users" ]; then
24 |     app=`echo $aFile | cut -d "/" -f 7`
25 |   else
26 |     app=`echo $aFile | cut -d "/" -f 6`
27 |   fi
28 | 
29 |   #Extract the lines for [views/...] or similar and combine it with the "owner = " line somewhere below it if it should exist
30 |   #Then remove the [ ] around the view/props/savedsearch
31 |   grep -E "^\[|owner" $aFile | sed -e ':a' -e 'N' -e '$!ba' -e 's/\nowner/ owner/g' | grep $username | cut -d "]" -f1 | cut -d "[" -f2 > /tmp/allEntitiesToReown.txt
32 | 
33 |   #For each entity we have to reown them
34 |   for entity in `cat /tmp/allEntitiesToReown.txt`; do
35 |     entityType=`echo $entity | cut -d "/" -f1`
36 |     entityName=`echo $entity | cut -d "/" -f2`
37 |     entityName2=`echo $entity | cut -d "/" -f3`
38 | 
39 |     echo "app is $app and owner is $newOwner"
40 |     if [ "$entityType" = "savedsearches" ] ; then
41 |         echo "Saved search"
42 |         sharing=`$SPLUNK_HOME/bin/splunk _internal call "/servicesNS/$newOwner/$app/saved/searches/$entityName" | grep sharing | cut -d ">" -f2 | cut -d "<" -f1`
43 |         echo $SPLUNK_HOME/bin/splunk _internal call "/servicesNS/$newOwner/$app/saved/searches/$entityName/acl" -post:owner $newOwner -post:sharing $sharing
44 |         if [ $debugMode = "false" ]; then
45 |             $SPLUNK_HOME/bin/splunk _internal call "/servicesNS/$newOwner/$app/saved/searches/$entityName/acl" -post:owner $newOwner -post:sharing $sharing
46 |         fi
47 |     elif [ "$entityType" = "views" ] ; then
48 |         echo "view type"
49 |         sharing=`$SPLUNK_HOME/bin/splunk _internal call "/servicesNS/$newOwner/$app/data/ui/views/$entityName" | grep sharing | cut -d ">" -f2 | cut -d "<" -f1`
50 |         echo $SPLUNK_HOME/bin/splunk _internal call "/servicesNS/$newOwner/$app/data/ui/views/$entityName/acl" -post:owner $newOwner -post:sharing $sharing
51 |         if [ $debugMode = "false" ]; then
52 |             $SPLUNK_HOME/bin/splunk _internal call "/servicesNS/$newOwner/$app/data/ui/views/$entityName/acl" -post:owner $newOwner -post:sharing $sharing
53 |         fi
54 |     #Props are 3 level deep
55 |     elif [ "$entityType" = "props" ] ; then
56 |         echo "props type"
57 |         echo $SPLUNK_HOME/bin/splunk _internal call "/servicesNS/$newOwner/$app/data/props/extractions/$entityName%20%3A%20$entityName2"
58 |         sharing=`$SPLUNK_HOME/bin/splunk _internal call "/servicesNS/$newOwner/$app/data/props/extractions/$entityName%20%3A%20$entityName2" | grep sharing | cut -d ">" -f2 | cut -d "<" -f1`
59 |         #echo `$SPLUNK_HOME/bin/splunk _internal call "/servicesNS/$newOwner/$app/data/props/extractions/$entityName%20%3A%20$entityName2"`
60 |         echo $SPLUNK_HOME/bin/splunk _internal call "/servicesNS/$newOwner/$app/data/props/extractions/$entityName%20%3A%20$entityName2/acl" -post:owner $newOwner -post:sharing $sharing
61 |         if [ $debugMode = "false" ]; then
62 |             $SPLUNK_HOME/bin/splunk _internal call "/servicesNS/$newOwner/$app/data/props/extractions/$entityName%20%3A%20$entityName2/acl" -post:owner $newOwner -post:sharing $sharing
63 |         fi
64 |     elif [ "$entityType" = "datamodels" ]; then
65 |         echo "data models"
66 |         echo $SPLUNK_HOME/bin/splunk _internal call "/servicesNS/nobody/$app/data/models/$entityName/acl" -post:owner $newOwner -post:sharing $sharing
67 |         if [ $debugMode = "false" ]; then
68 |             $SPLUNK_HOME/bin/splunk _internal call "/servicesNS/nobody/$app/data/models/$entityName/acl" -post:owner $newOwner -post:sharing $sharing
69 |         fi
70 |     fi
71 |   done
72 | done
73 | 


--------------------------------------------------------------------------------
/bin/roll_and_resync_buckets.py:
--------------------------------------------------------------------------------
 1 | #run this with splunk cmd python3 or python3
 2 | import requests
 3 | import json
 4 | base_url="https://localhost:8089"
 5 | url = base_url + "/services/cluster/master/buckets?filter=meets_multisite_replication_count=false&output_mode=json&count=0"
 6 | auth=('admin','changeme')
 7 | res=requests.get(url,auth=auth,verify=False)
 8 | 
 9 | dict = json.loads(res.text)
10 | 
11 | roll_bucket_url = base_url + "/services/cluster/master/control/control/roll-hot-buckets"
12 | resync_bucket_url = base_url + "/services/cluster/master/control/control/resync_bucket_from_peer"
13 | for entry in dict['entry']:
14 |     #print(entry['content'])
15 |     rep_count_by_site = entry['content']['rep_count_by_site']
16 |     site_count = len(rep_count_by_site)
17 |     copy_count = 2
18 |     if site_count == 1:
19 |         copy_count = list(entry['content']['rep_count_by_site'].values())[0]
20 |     name = entry['name']
21 |     if rep_count_by_site == 1 and copy_count < 2:
22 |         print(f'{name} is only found on 1 site, rolling hot bucket')
23 |         data = { 'bucket_id': name }
24 |         print(f'requests.post({roll_bucket_url}, data={data}, verify=False)')
25 |         res=requests.post(roll_bucket_url, auth=auth, data=data, verify=False)
26 |         if res.status_code != requests.codes.ok:
27 |             print(f'code={res.status_code} text={res.text}')
28 |     else:
29 |         print(f'{name} is found on {rep_count_by_site} sites with {copy_count}, resyncing hot bucket')
30 |         peer = list(entry['content']['peers'].keys())[0]
31 |         data = { 'bucket_id': name, 'peer': peer }
32 |         print(f'requests.post({resync_bucket_url}, data={data}, verify=False)')
33 |         res=requests.post(resync_bucket_url, auth=auth, data=data, verify=False)
34 |         if res.status_code != requests.codes.ok:
35 |             print(f'code={res.status_code} text={res.text}')
36 | 


--------------------------------------------------------------------------------
/bin/roll_and_resync_buckets_v2.py:
--------------------------------------------------------------------------------
 1 | #run this with splunk cmd python3 or python3
 2 | import requests
 3 | import json
 4 | import time
 5 | import sys
 6 | 
 7 | wait_for_seconds = 60 * 10
 8 | 
 9 | base_url="https://localhost:8089"
10 | 
11 | try:
12 |     with open('/opt/splunk/.password','r') as file:
13 |         password = file.readlines()[0].strip()
14 | except:
15 |     print("Unable to open password file")
16 |     sys.exit(2)
17 | 
18 | # role with capabilities /opt/splunk/bin/splunk _internal call /services/authorization/roles -post:capabilities edit_indexer_cluster -post:capabilities list_indexer_cluster -post:name roll_buckets_automated -auth ... required
19 | auth=('roll_buckets_automated', password)
20 | 
21 | url = base_url + "/services/cluster/manager/fixup?output_mode=json&count=0&level=replication_factor"
22 | res = requests.get(url,auth=auth,verify=False)
23 | 
24 | dict = json.loads(res.text)
25 | print(f"status_code={res.status_code} on url={url}")
26 | 
27 | roll_bucket_url = base_url + "/services/cluster/master/control/control/roll-hot-buckets"
28 | resync_bucket_url = base_url + "/services/cluster/master/control/control/resync_bucket_from_peer"
29 | 
30 | current_time = round(time.time())
31 | 
32 | resync_required = False
33 | 
34 | for entry in dict['entry']:
35 |     data_latest = entry['content']['latest']
36 |     data_initial = entry['content']['initial']
37 |     if data_latest['reason'].find("bucket hasn't rolled yet") != -1:
38 |         name = entry['name']
39 |         reason = data_latest['reason']
40 |         print(f"bucket={name} requires role due to {reason}")
41 |         bucket_timestamp = data_initial['timestamp']
42 |         diff = current_time - bucket_timestamp
43 |         if diff > wait_for_seconds*2:
44 |             resync_required = True
45 |         elif diff > wait_for_seconds:
46 |             print(f'bucket={name} requires role due to {reason}, and is beyond {wait_for_seconds} seconds')
47 |             data = { 'bucket_id': name }
48 |             print(f'requests.post("{roll_bucket_url}", data={data}, verify=False)')
49 |             res=requests.post(roll_bucket_url, auth=auth, data=data, verify=False)
50 |             if res.status_code != requests.codes.ok:
51 |                 print(f'bucket={name} code={res.status_code} text={res.text}')
52 | 
53 |         # by this time we have tried to roll the buckets, so now a re-sync might be required instead
54 |         if diff > (wait_for_seconds*2):
55 |             url = base_url + "/services/cluster/master/buckets/" + name + "?output_mode=json"
56 |             res = requests.get(url,auth=auth,verify=False)
57 |             dict_buckets = json.loads(res.text)
58 |             print(f"status_code={res.status_code} on url={url}")
59 |             peer = list(dict_buckets['entry'][0]['content']['peers'].keys())[0]
60 |             data = { 'bucket_id': name, 'peer': peer }
61 |             print(f'requests.post("{resync_bucket_url}", data={data}, verify=False)')
62 |             res=requests.post(resync_bucket_url, auth=auth, data=data, verify=False)
63 |             if res.status_code != requests.codes.ok:
64 |                 print(f'bucket={name} code={res.status_code} text={res.text}')
65 |             
66 |         time.sleep(1)
67 | 


--------------------------------------------------------------------------------
/bin/roll_and_resync_buckets_v2.sh:
--------------------------------------------------------------------------------
 1 | count=`ps -ef | grep /opt/splunk/etc/scripts/roll_and_resync_buckets_v2.py  | grep -v grep | wc -l`
 2 | 
 3 | if [ $count -ne 0 ]; then
 4 |   pid=`ps -ef | grep /opt/splunk/etc/scripts/roll_and_resync_buckets_v2.py  | grep -v grep  | awk '{ print $2 }'`
 5 |   file_mod_time=$(stat --format='%Y' /proc/${pid})
 6 |   # Get the current time in seconds since the epoch
 7 |   current_time=$(date +"%s")
 8 |   time_difference=$((current_time - file_mod_time))
 9 |   if [ "$time_difference" -gt 3600 ]; then
10 |     echo "$pid has continued to run for >1 hour, killing and starting again"
11 |     kill $pid
12 |   else
13 |     exit 1
14 |   fi
15 | fi
16 | 
17 | /opt/splunk/bin/splunk cmd python3 /opt/splunk/etc/scripts/roll_and_resync_buckets_v2.py 2>&1 | tee /tmp/roll_and_resync_buckets_v2_Splunkd.log
18 | 


--------------------------------------------------------------------------------
/bin/splunk_offline.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Offline Splunk indexers pre-shutdown of OS
 3 | After=Splunkd.service
 4 | 
 5 | [Service]
 6 | Type=oneshot
 7 | ExecStop=/opt/splunk/scripts/splunk_offline.sh
 8 | TimeoutStopSec=600
 9 | RemainAfterExit=yes
10 | 
11 | [Install]
12 | WantedBy=shutdown.target reboot.target halt.target multi-user.target Splunkd.service
13 | 


--------------------------------------------------------------------------------
/bin/splunk_offline.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | log=/opt/splunk/var/log/splunk/splunk_offline_script.log
 3 | echo "`date` splunk_offline script begins" | tee -a ${log}
 4 | #/opt/splunk/scripts/collect_stacks.sh -s 100 -b -o /opt/splunk/stacks &
 5 | # this requires a role with edit_indexer_cluster=enabled
 6 | /opt/splunk/bin/splunk offline -auth splunk_offline_automated:add_password_here 2>&1 | tee -a ${log}
 7 | #tail -n 10 /opt/splunk/var/log/splunk/splunkd.log | tee -a ${log}
 8 | /opt/splunk/bin/splunk status 2>&1 | tee -a ${log}
 9 | echo "`date` splunk_offline script exiting" | tee -a ${log}
10 | # we run as root so change ownership back to splunk
11 | chown splunk:splunk /opt/splunk/var/log/splunk/splunk_offline_script.log
12 | 


--------------------------------------------------------------------------------
/bin/syslog_migration_to_hec_assistant.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from logging.config import dictConfig
  3 | import argparse
  4 | import os
  5 | import re
  6 | 
  7 | ####################################################################################################
  8 | #
  9 | # syslog_migration_to_hec_assistant
 10 | #
 11 | ####################################################################################################
 12 | 
 13 | #Setup the logging, the plan was to default to INFO and change to DEBUG level but it's currently the
 14 | #opposite version of this
 15 | logging_config = dict(
 16 |     version = 1,
 17 |     formatters = {
 18 |         'f': {'format':
 19 |               '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'}
 20 |         },
 21 |     handlers = {
 22 |         'h': {'class': 'logging.StreamHandler',
 23 |               'formatter': 'f',
 24 |               'level': logging.DEBUG},
 25 |         'file': {'class' : 'logging.handlers.RotatingFileHandler',
 26 |               'filename' : '/tmp/syslog_migration_to_hec_assistant.log',
 27 |               'formatter': 'f',
 28 |               'maxBytes' :  10485760,
 29 |               'level': logging.DEBUG,
 30 |               'backupCount': 5 }
 31 |         },
 32 |     root = {
 33 |         'handlers': ['h','file'],
 34 |         'level': logging.DEBUG,
 35 |         },
 36 | )
 37 | 
 38 | dictConfig(logging_config)
 39 | 
 40 | logger = logging.getLogger()
 41 | 
 42 | #Create the argument parser
 43 | parser = argparse.ArgumentParser(description='Print splunk configuration from savedsearches or other files to standard out, ignoring system default config')
 44 | parser.add_argument('-inputfile', help='Input file to read (inputs.conf)', required=True)
 45 | parser.add_argument('-debugMode', help='(optional) turn on DEBUG level logging (defaults to INFO)', action='store_true')
 46 | parser.add_argument('-syslogngdir', help='Directory to syslog-ng config to search', required=True)
 47 | parser.add_argument('-overwritemode', help='Do you want to overrwite the syslog-ng config files with updated HEC versions?', action='store_true')
 48 | 
 49 | args = parser.parse_args()
 50 | 
 51 | #If we want debugMode, keep the debug logging, otherwise drop back to INFO level
 52 | if not args.debugMode:
 53 |     logging.getLogger().setLevel(logging.INFO)
 54 | 
 55 | logger.info("syslog_migration_to_hec_assistant starts")
 56 | 
 57 | def parse_syslog_conf(conf_file):
 58 |     # if the file does not exist in this app we just return an empty list
 59 |     if not os.path.isfile(conf_file):
 60 |         logger.info(f"conf_file={conf_file} not found")
 61 |         return ""
 62 | 
 63 |     logger.info(f"working on conf_file={conf_file}")
 64 |     with open(conf_file, "r") as fp:
 65 |         file_match = ""
 66 |         text_match = ""
 67 |         # read through each line of the file
 68 |         for line in fp:
 69 |             logger.debug(f"Working with line={line} file={conf_file}")
 70 |             if line[0] == "#":
 71 |                 continue
 72 | 
 73 |             # it's a stanza line [monitor:///]
 74 |             if line.find("file(\"") != -1 and line.find("key-file") == -1 and line.find("cert-file") == -1:
 75 |                 # we're within the file line
 76 |                 # rex match the parts we need if they exist such as template, file, 
 77 |                 # map out the templates to the new hec templates where appropriate
 78 |                 # print the updated HEC template
 79 |                 # print file location
 80 |                 file_match = re.match(r".*?file\(\"\s*([^\"]+)", line)
 81 |                 template_match = re.match(r".*?template\(\s*([^\)]+)", line)
 82 |                 logger.debug(f"file_match={file_match.group(1)}")
 83 | 
 84 |                 if template_match:
 85 |                     text_match = template_match.group(1)
 86 |                     text_match = text_match + "_hec"
 87 |                     #print(text_match)
 88 | 
 89 |     if file_match == "":
 90 |         return "", text_match
 91 |     return file_match.group(1), text_match
 92 | 
 93 | 
 94 | files = os.listdir(args.syslogngdir)
 95 | files = [args.syslogngdir+'/'+f for f in files if os.path.isfile(args.syslogngdir+'/'+f)] #Filtering only the files.
 96 | logger.debug(f"syslog-files={files}")
 97 | 
 98 | all_syslog_files= {}
 99 | for a_file in files:
100 |     all_syslog_files[a_file] = parse_syslog_conf(a_file)
101 | 
102 | #print(all_syslog_files)
103 | 
104 | # parse a single config file and extract any filtered lines
105 | def parse_single_conf_file(conf_file, all_syslog_config):
106 |     all_syslog_dest_config = {}
107 | 
108 |     # if the file does not exist in this app we just return an empty list
109 |     if not os.path.isfile(conf_file):
110 |         logger.info(f"conf_file={conf_file} not found")
111 |         return {}
112 | 
113 |     with open(conf_file, "r") as fp:
114 |         stanza_name = ""
115 |         # read through each line of the file
116 |         current_stanza = ""
117 |         for line in fp:
118 |             logger.debug(f"Working with line={line} file={conf_file}")
119 |             if line[0] == "#":
120 |                 continue
121 | 
122 |             # it's a stanza line [monitor:///]
123 |             if len(line) > 0 and line[0] == "[":
124 |                 if current_stanza != "":
125 |                     for key in all_syslog_config.keys():
126 |                         syslog_file_name, template = all_syslog_config[key]
127 |                         # we sometimes use /.../ at the end of the line
128 |                         if file_name.find("/.../") != -1:
129 |                             logger.debug(f"before file_name={file_name}")
130 |                             file_name = file_name[0:-5]
131 |                             logger.debug(f"after file_name={file_name}")                        
132 |                         if syslog_file_name.find(file_name) != -1:
133 |                             #print(f"potential match with stanza_name={stanza_name} key={key} syslog_file_name={syslog_file_name} template={template}")
134 |                             if host_segment != "":
135 |                                 syslog_file_name_split = syslog_file_name.split('/')
136 |                                 host_variable = syslog_file_name_split[int(host_segment)][1:]
137 |                                 logger.debug(f"host_variable={host_variable}")
138 |                             else:
139 |                                 host_variable = "HOST"
140 |                             if tcp_routing != "":
141 |                                 logger.debug(f"tcp_routing={tcp_routing}")
142 |                             if index == "" or sourcetype == "":
143 |                                 logger.debug("index or sourcetype not set?")
144 |                             if host_variable != "HOST":
145 |                                 template = template + "_" + host_variable                                
146 |                             destination_name = syslog_file_name[16:syslog_file_name.find("/",17)]
147 |                             source_name = syslog_file_name[0:syslog_file_name.find("/",17)]
148 |                             new_dest = f"""destination d_hec_{destination_name} {{
149 |     http(
150 |         url("https://localhost:8088/services/collector/event")
151 |         method("POST")
152 |         log-fifo-size(100000000)
153 |         workers(5)
154 |         batch-lines(5000)
155 |         batch-bytes(4096kb)
156 |         batch-timeout(3000)
157 |         timeout(30)
158 |         user_agent("syslog-ng User Agent")
159 |         headers("Authorization: Splunk d_hec_{destination_name}")
160 |         persist-name("{destination_name}")
161 |         disk-buffer(
162 |             reliable(no)
163 |             disk-buf-size(73400320)
164 |             dir("/var/log/syslog/buffers/{destination_name}")
165 |         )
166 |         retries(5)
167 |         tls(
168 |             peer-verify(no)
169 |         )
170 |         #body('{{ "event": "$(template {template})", "source": "{source_name}", "time": "${{R_UNIXTIME}}", "host": "${{{host_variable}}}" }}')
171 |         body('$(template {template})')        
172 |     );
173 | }};
174 | """ 
175 |                             all_syslog_dest_config[key] = [ new_dest, "d_hec_" + destination_name ]
176 |                             file_short_name = key[key.rfind('/')+1:key.rfind(".")]
177 |                             print(f"""
178 | [http://syslog-{file_short_name}]
179 | description=HEC token for {file_short_name}
180 | token = d_hec_{destination_name} 
181 | index = {index}indexes = {index}source = {source_name}\nsourcetype = {sourcetype.strip()}""")
182 | 
183 |                             if tcp_routing != "":
184 |                                 print(f"outputgroup={tcp_routing}")
185 |                 stanza_name = line[1:len(line)-2]                
186 |                 current_stanza = stanza_name
187 |                 file_name = stanza_name[10:]
188 |                 logger.debug(f"file_name={file_name}")
189 |                 logger.debug(f"working with stanza={stanza_name}")
190 |                 index = ""
191 |                 sourcetype = ""
192 |                 host_segment = ""
193 |                 tcp_routing = ""                            
194 |             elif line.find("index") != -1:
195 |                 index = re.sub("\s*index\s*=\s*","",line)
196 |                 logger.debug(f"index={index}")
197 |             elif line.find("sourcetype") != -1:
198 |                 sourcetype = re.sub("\s*sourcetype\s*=\s*","",line)
199 |                 logger.debug(f"sourcetype={sourcetype}")
200 |             elif line.find("host_segment") != -1:
201 |                 host_segment = re.sub("\s*host_segment\s*=\s*","",line)
202 |                 logger.debug(f"host_segment={host_segment}")
203 |             elif line.find("_TCP_ROUTING") != -1:
204 |                 tcp_routing = re.sub("\s*_TCP_ROUTING\s*=\s*","",line)
205 |                 logger.debug(f"tcp_routing={tcp_routing}")
206 | 
207 |     return all_syslog_dest_config
208 | 
209 | all_syslog_dest_config = parse_single_conf_file(args.inputfile, all_syslog_files)
210 | #print(all_syslog_config)
211 | #print(all_syslog_dest_config)
212 | 
213 | def replace_within_syslog_conf(conf_file, syslog_dest_config, dest_name):
214 |     # if the file does not exist in this app we just return an empty list
215 |     if not os.path.isfile(conf_file):
216 |         logger.info(f"conf_file={conf_file} not found")
217 |         return ""
218 | 
219 |     all_lines = ""
220 |     logger.info(f"working on conf_file={conf_file}")
221 |     with open(conf_file, "r") as fp:        
222 |         # read through each line of the file
223 |         inside_dest = False        
224 |         for line in fp:
225 |             logger.debug(f"Working with line={line} file={conf_file}")
226 |             if line[0] == "#":
227 |                 all_lines = all_lines + line
228 |                 continue
229 |             # it's a stanza line [monitor:///]
230 |             if line.find("destination ") != -1 and line.find("{") != -1:
231 |                 inside_dest = True                            
232 |             elif inside_dest and line.find("};") != -1:
233 |                 inside_dest = False
234 |                 all_lines = all_lines + '\n' + syslog_dest_config + '\n'
235 |             elif inside_dest:
236 |                 continue
237 |             elif line.find("destination(") != -1 or line.find("destination (") != -1:
238 |                 # this is a destination line so destination (d_file_udp_destname);
239 |                 # modify it to our new destination
240 |                 all_lines = all_lines + f"        destination({dest_name});\n"
241 |             else:
242 |                 all_lines = all_lines + line
243 |     return all_lines
244 | 
245 | for a_file in files:
246 |     if a_file in all_syslog_dest_config:
247 |         new_file = replace_within_syslog_conf(a_file, all_syslog_dest_config[a_file][0], all_syslog_dest_config[a_file][1])
248 |         if args.overwritemode:
249 |             with open(a_file, 'w') as f:
250 |                 f.write(new_file)
251 |         else:
252 |             print(new_file)
253 | 


--------------------------------------------------------------------------------