├── .github └── workflows │ └── secrets-scanner.yaml ├── .gitignore ├── Dockerfile ├── LICENSE.txt ├── README.md └── scalyr /.github/workflows/secrets-scanner.yaml: -------------------------------------------------------------------------------- 1 | name: TruffleHog Secrets Scan 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | schedule: 10 | - cron: '0 4 * * *' 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | TruffleHog: 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - name: Checkout code 21 | uses: actions/checkout@v3 22 | with: 23 | fetch-depth: 0 24 | 25 | # Special check which ensures that the clone performed above is not shallow. We need the 26 | # complete git history for scanning to work correctly in all the situations. In some cases 27 | # if a shallow clone is used, trufflehog won't not fail with an error, but it would simply 28 | # not detect any files and that could be dangerous. 29 | - name: Shallow repo check 30 | run: | 31 | if git rev-parse --is-shallow-repository | grep -q "true"; then 32 | echo "Encountered a shallow repository, trufflehog may not work as expected!" 33 | exit 1 34 | fi 35 | 36 | - name: scan-pr 37 | uses: trufflesecurity/trufflehog@main 38 | if: ${{ github.event_name == 'pull_request' }} 39 | with: 40 | path: ./ 41 | base: ${{ github.event.repository.default_branch }} 42 | head: HEAD 43 | extra_args: --debug --only-verified 44 | 45 | - name: scan-push 46 | uses: trufflesecurity/trufflehog@main 47 | if: ${{ github.event_name == 'push' }} 48 | with: 49 | path: ./ 50 | base: "" 51 | head: ${{ github.ref_name }} 52 | extra_args: --debug --only-verified 53 | 54 | # As part of cron trigger we scan the whole repo directory. 55 | # NOTE: Since trufflehog GHA is meant to be used in context of push / pr it can't be 56 | # used dorectly to scan the whole repo directory. This may take a while, but it's good idea 57 | # to run it on a daily basis. 58 | - name: scan-cron 59 | if: ${{ github.event_name == 'schedule' }} 60 | run: | 61 | docker run --rm -v "$PWD:/workdir" trufflesecurity/trufflehog:latest git \ 62 | file:///workdir --fail --no-update --debug --only-verified 63 | 64 | - name: Notify Slack on Failure 65 | if: ${{ failure() && github.ref_name == 'master' }} 66 | uses: act10ns/slack@ed1309ab9862e57e9e583e51c7889486b9a00b0f # v2.0.0 67 | env: 68 | SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} 69 | with: 70 | status: ${{ job.status }} 71 | steps: ${{ toJson(steps) }} 72 | channel: '#eng-dataset-o11y' 73 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3-alpine3.16 2 | 3 | COPY scalyr /bin/scalyr 4 | RUN chmod u+x /bin/scalyr 5 | ENTRYPOINT ["/bin/scalyr"] 6 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | scalyr-tool 2 | =========== 3 | 4 | Command-line tool for accessing Scalyr services. The following commands are currently supported: 5 | 6 | - [**query**](#querying-logs): Retrieve log data 7 | - [**power-query**](#power-queries): Execute PowerQuery 8 | - [**numeric-query**](#fetching-numeric-data): Retrieve numeric / graph data 9 | - [**facet-query**](#fetching-facet-counts): Retrieve common values for a field 10 | - [**timeseries-query**](#fetching-numeric-data-using-a-timeseries): Retrieve numeric / graph data from a timeseries 11 | - [**get-file**](#retrieving-configuration-files): Fetch a configuration file 12 | - [**put-file**](#creating-or-updating-configuration-files): Create or update a configuration file 13 | - [**delete-file**](#creating-or-updating-configuration-files): Delete a configuration file 14 | - [**list-files**](#listing-configuration-files): List all configuration files 15 | - [**tail**](#tailing-logs): Provide a live 'tail' of a log 16 | 17 | 18 | ## Installation 19 | 20 | Simply download the script file and make it executable. For instance: 21 | 22 | curl https://raw.githubusercontent.com/scalyr/scalyr-tool/master/scalyr > scalyr 23 | chmod u+x scalyr 24 | mv scalyr (some directory on your command path) 25 | 26 | You also need to make your Scalyr API tokens available to the tool. You can specify the token 27 | on the command line using the `--token` argument. However, it is more convenient to store your 28 | tokens in environment variables. This also keeps the tokens out of your console window and 29 | command history. On Unix systems, you can add the following to a file like `.bash_profile`: 30 | 31 | export scalyr_readlog_token='XXX' 32 | export scalyr_readconfig_token='YYY' 33 | export scalyr_writeconfig_token='ZZZ' 34 | 35 | The values for XXX, YYY, and ZZZ can be found at [scalyr.com/keys](https://www.scalyr.com/keys) -- look 36 | for "Read Logs", "Read Config", and "Write Config" tokens, respectively. 37 | 38 | Setting a custom Scalyr server can be done using the `--server` argument but also via environment variable: 39 | 40 | export scalyr_server='https://eu.scalyr.com' 41 | 42 | After adding these to `.bash_profile`, make sure to also paste them into your current console session, 43 | so that they take effect immediately. Alternatively, run `source ~/.bash_profile`. 44 | 45 | ## Querying logs 46 | 47 | The "query" command allows you to search and filter your logs, or simply retrieve raw log data. The 48 | capabilities are similar to the regular [log view](https://www.scalyr.com/events?mode=log), though you 49 | can retrieve more data at once and have several output format options. 50 | 51 | Here are some usage examples: 52 | 53 | # Display the last 10 log records 54 | scalyr query 55 | 56 | # Display the last 100 log records, showing only timestamp, severity, and message. 57 | # (Timestamp and severity are always displayed.) 58 | scalyr query --count=100 --columns='timestamp,severity,message' 59 | 60 | # Display the first 10 log records beginning at 3:00 PM today, from host100. 61 | scalyr query '$serverHost="host100"' --start='3:00 PM' 62 | 63 | # Display the last 1000 entries in the log tagged as source=accessLog. Print only the status 64 | # and path, in CSV format. 65 | scalyr query '$source="accessLog"' --output=csv --columns='status,uriPath' --count=1000 66 | 67 | Complete argument list: 68 | 69 | scalyr query [filter] [options...] 70 | The filter specifies which log records to return. It uses the same syntax as the "Expression" 71 | field in the [log view](https://www.scalyr.com/events?mode=log). 72 | 73 | --start=xxx 74 | Specify the beginning of the time range to query. Uses the same syntax as the "Start" field in 75 | the log view. Defaults to 1 day ago, or to 1 day before the end time if an end time is given. 76 | --end=xxx 77 | Specify the end of the time range to query. Uses the same syntax as the "End" field in the log 78 | view. Defaults to the current time, or to 1 day after the start time if a start time is given. 79 | --count=nnn 80 | How many log records to retrieve, from 1 to 5000. Defaults to 10. 81 | --mode=head|tail 82 | Whether to display log records from the start or end of the time range. Defaults to head if a 83 | start time is given, otherwise to tail. 84 | --columns="..." 85 | Which log attributes to display. Used mainly for logs for which you have specified a parser to 86 | extract attributes from the raw text. Specify one or more attribute names, separated by commas. 87 | --output=multiline|singleline|csv|json|json-pretty 88 | How to display the log records (see below). 89 | --version 90 | Prints the current version number of this tool. 91 | --priority=high|low 92 | Specifies the execution priority for this query; defaults to "high". Use "low" for scripted 93 | operations where a delay of a second or so is acceptable. Rate limits are tighter for high- 94 | priority queries. 95 | --token=xxx 96 | Specify the API token. For this command, should be a "Read Logs" token. 97 | --verbose 98 | Writes detailed progress information to stderr. 99 | --proxy=: 100 | An address to connect through when using a proxy. If not set will also take the value from one of the following 101 | environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY 102 | 103 | #### Output formats 104 | 105 | By default, the query command outputs log records in a fairly verbose format designed for manual 106 | viewing. You can get something more like a classic log view by specifying a columns list, as shown in 107 | one of the examples above. 108 | 109 | The 'singleline' output option is similar to the default, but places all of a record's attributes on 110 | a single line. This is denser, but can be harder to read. 111 | 112 | The 'csv' output option emits one line per log record, in Excel comma-separated-value format (with 113 | `CRLF` as the line separator, per the [spec](https://tools.ietf.org/html/rfc4180#page-2)). To use 114 | this option, you must specify the `--columns` argument. 115 | 116 | The 'json-pretty' output option also emits the JSON response from the server, but prettyprinted. 117 | 118 | #### Usage limits 119 | 120 | Your command line and API queries are limited to 30,000 milliseconds of server processing time, 121 | replenished at 36,000 milliseconds per hour. If you exceed this limit, your queries will be intermittently 122 | refused. (Your other uses of Scalyr, such as log uploading or queries via the web site, will not be impacted.) 123 | If you need a higher limit, drop us a line at support@scalyr.com. 124 | 125 | ## Power Queries 126 | 127 | The "power-query" command allows you to execute a PowerQuery. The 128 | capabilities are similar to the regular [PowerQuery](https://www.scalyr.com/query), though you 129 | can retrieve more data at once and have several output format options. 130 | 131 | Here are some usage examples: 132 | 133 | # Display log volume summary by forlogfile for the last 24 hours 134 | scalyr power-query "tag='logVolume' metric='logBytes' | group sum(value) by forlogfile" --start="24h" 135 | 136 | # Display a table of requests, errors and error rate for the last 7 days, in pretty-printed JSON 137 | scalyr power-query "dataset = 'accesslog' | group requests = count(), errors = count(status == 404) \ 138 | by uriPath | let rate = errors / requests | filter rate > 0.01 | sort -rate" --start="7d" --end="0d" \ 139 | --output=json-pretty 140 | 141 | Complete argument list: 142 | 143 | scalyr power-query [query] [options...] 144 | The query specifies the PowerQuery. It uses the same syntax as the "PowerQueries" 145 | page which is documented [here](https://app.scalyr.com/help/power-queries). 146 | 147 | --start=xxx 148 | Specify the beginning of the time range to query. Uses the same syntax as the "Start" field in 149 | the PowerQueries page. This field is required. 150 | --end=xxx 151 | Specify the end of the time range to query. Uses the same syntax as the "End" field in the PowerQueries page. 152 | Defaults to 1 day after the start time if a start time is given. 153 | --output=csv|json|json-pretty 154 | How to display the log records (see below). 155 | --version 156 | Prints the current version number of this tool. 157 | --priority=high|low 158 | Specifies the execution priority for this query; defaults to "high". Use "low" for scripted 159 | operations where a delay of a second or so is acceptable. Rate limits are tighter for high- 160 | priority queries. 161 | --token=xxx 162 | Specify the API token. For this command, should be a "Read Logs" token. 163 | --verbose 164 | Writes detailed progress information to stderr. 165 | --proxy=: 166 | An address to connect through when using a proxy. If not set will also take the value from one of the following 167 | environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY 168 | 169 | #### Output formats 170 | 171 | By default, the power-query command outputs in 'csv' format, which emits one line per log record, in Excel 172 | comma-separated-value format (with `CRLF` as the line separator, per the [spec](https://tools.ietf.org/html/rfc4180#page-2)). 173 | 174 | The 'json' output option, not surprisingly, emits a JSON response. 175 | 176 | The 'json-pretty' output option also emits the JSON response from the server, but prettyprinted. 177 | 178 | #### Usage limits 179 | 180 | Your command line and API queries are limited to 30,000 milliseconds of server processing time, 181 | replenished at 36,000 milliseconds per hour. If you exceed this limit, your queries will be intermittently 182 | refused. (Your other uses of Scalyr, such as log uploading or queries via the web site, will not be impacted.) 183 | If you need a higher limit, drop us a line at support@scalyr.com. 184 | 185 | ## Tailing logs 186 | 187 | The 'tail' command is similar to the '[query](#querying-logs)' command, except it runs continually, printing query results to stdout. 188 | 189 | Here are some usage examples: 190 | 191 | # Display a live tail of all log records 192 | scalyr tail 193 | 194 | # Display a live tail of all log records from host100. 195 | scalyr tail '$serverHost="host100"' 196 | 197 | # Display a live tail of all log records containing the text [WARN] 198 | # Note: the [] need to be quoted to be processed as text by Scalyr. 199 | # You also need to quote/escape the quotes so they are not eaten by the shell 200 | scalyr tail '"[WARN]"' 201 | 202 | # Display a live tail of log messages, including attributes 203 | scalyr tail --output multiline 204 | 205 | Complete argument list: 206 | 207 | scalyr tail [filter] [options...] 208 | The filter specifies which log records to return. It uses the same syntax as the "Expression" 209 | field in the [log view](https://www.scalyr.com/events?mode=log). 210 | 211 | --lines K, 212 | -n K 213 | Output the previous K lines when starting the tail. Defaults to 10. 214 | 215 | --output multiline|singleline|messageonly 216 | Similar to the multiline and singleline options for the 'query' command, but also has a 'messageonly' 217 | mode that will only display the raw log message, and not any additional attributes. 218 | Defaults to 'messageonly'. 219 | 220 | #### Usage limits 221 | 222 | The 'tail' command is currently restricted to read a maximum of 1,000 log records per 10 seconds. Additionally, 223 | tails will automatically expire after 10 mins. Please contact support@scalyr.com if you require an increase to 224 | these limits. 225 | 226 | #### Server clocks 227 | 228 | If the clocks on the servers sending log messages to Scalyr are significantly out of sync then some messages may not appear in the live tail. For example, if you send us a new log message with a timestamp old enough that it's not in the 1,000 most recent messages when it arrives at the Scalyr servers, then it will not be displayed by the live tail tool. 229 | 230 | ## Fetching numeric data 231 | 232 | The "numeric-query" command allows you to retrieve numeric data, e.g. for graphing. You can count the 233 | rate of events matching some criterion (e.g. error rate), or retrieve a numeric field (e.g. response 234 | size). 235 | 236 | A numeric query is equivalent to a [timeseries-query](#fetching-numeric-data-using-a-timeseries) with argument 237 | `--no-create-summaries` and without `--only-use-summaries`. If you will be invoking the same query repeatedly (e.g. in a script), 238 | you may want to use the timeseries query command rather than `numeric-query`. 239 | 240 | The commands take the same options and return the same data, but for `timeseries-query` invocations without 241 | `--no-create-summaries` we create a timeseries on the backend for each unique filter/function pair. 242 | This query will execute near-instantaneously, and avoid consuming your account's query budget (see below). 243 | 244 | Here are some usage examples: 245 | 246 | # Count the rate (per second) of occurrences of "/login" in all logs, in each of the last 24 hours 247 | scalyr numeric-query '"/login"' --start 24h --buckets 24 248 | 249 | # Display the average response size of all requests in the last hour 250 | scalyr numeric-query '$dataset="accesslog"' --function 'bytes' --start 1h 251 | 252 | Complete argument list: 253 | 254 | scalyr numeric-query [filter] --start xxx [options...] 255 | The filter specifies which log records to process. It uses the same syntax as the "Expression" 256 | field in the [log view](https://www.scalyr.com/events?mode=log). 257 | 258 | --function=xxx 259 | The value to compute from the matching events. You can use any function listed in 260 | https://www.scalyr.com/help/query-language#graphFunctions, except for fraction(expr). For 261 | example: 'mean(x)' or 'median(responseTime)', if x and responseTime are fields of your log. 262 | You can also specify a simple field name, such as 'responseTime', to return the mean value of 263 | that field. If you omit the function argument, the rate of matching events per second will be 264 | returned. Specifying 'rate' yields the same result. Finally, you can specify "count", to compute 265 | the number of matching events in each time period (as defined by the "buckets" option). 266 | --start=xxx 267 | Specify the beginning of the time range to query. Uses the same syntax as the "Start" field in 268 | the log view. You must specify this argument. 269 | --end=xxx 270 | Specify the end of the time range to query. Uses the same syntax as the "End" field in the log 271 | view. Defaults to the current time. 272 | --buckets=nnn 273 | The number of numeric values to return. The time range is divided into this many equal slices. 274 | For instance, suppose you query a four-hour period, with buckets = 4. The query will return four 275 | numbers, each covering a one-hour period. You may specify a value from 1 to 5000; 1 is the default. 276 | --output=csv|json|json-pretty 277 | How to display the results. 'csv' prints all values on a single line, separated by commas. 278 | 'json' prints the raw JSON response from the server, as documented at 279 | https://www.scalyr.com/help/api#numericQuery. 'json-pretty' also prints the JSON response, 280 | but prettyprinted. 281 | --priority=high|low 282 | Specifies the execution priority for this query; defaults to "high". Use "low" for scripted 283 | operations where a delay of a second or so is acceptable. Rate limits are tighter for high- 284 | priority queries. 285 | --token=xxx 286 | Specify the API token. For this command, should be a "Read Logs" token. 287 | --version 288 | Prints the current version number of this tool. 289 | --verbose 290 | Writes detailed progress information to stderr. 291 | --proxy=: 292 | An address to connect through when using a proxy. If not set will also take the value from one of the following 293 | environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY 294 | 295 | ## Fetching facet counts 296 | 297 | The "facet-query' command allows you to retrieve the most common values for a field. For instance, you can 298 | find the most common URLs accessed on your site, the most common user-agent strings, or the most common 299 | response codes returned. (If a very large number of events match your search criteria, the results will be 300 | based on a random subsample of at least 500,000 matching events.) 301 | 302 | The default output format is CSV, sorted by count desc: 303 | 304 | ``` 305 | count,value 306 | 4,value-the-first 307 | 2,"other value" 308 | ``` 309 | 310 | _Note that CSV output uses `CRLF` as the line separator._ 311 | 312 | 313 | Here are some usage examples: 314 | 315 | curl 'https://www.scalyr.com/api/facetQuery?queryType=facet&field=uriPath&startTime=1h&token=XXX' 316 | 317 | # Display the most common HTTP request URLs in the last 24 hours. 318 | scalyr facet-query '$dataset="accesslog"' uriPath --start 24h 319 | 320 | # Display the most common HTTP response codes for requests to index.html. 321 | scalyr facet-query 'uriPath="/index.html"' status --start 24h 322 | 323 | Complete argument list: 324 | 325 | scalyr facet-query filter field --start xxx [options...] 326 | The filter specifies which log records to process. It uses the same syntax as the "Expression" 327 | field in the [log view](https://www.scalyr.com/events?mode=log). 328 | 329 | --count=nnn 330 | How many distinct values to return. You may specify a value from 1 to 1000; 100 is the default. 331 | --start=xxx 332 | Specify the beginning of the time range to query. Uses the same syntax as the "Start" field in 333 | the log view. You must specify this argument. 334 | --end=xxx 335 | Specify the end of the time range to query. Uses the same syntax as the "End" field in the log 336 | view. Defaults to the current time. 337 | --output=csv|json|json-pretty 338 | How to display the results. 'csv' prints one value (and its count) per line, separated by commas. 339 | 'json' prints the raw JSON response from the server, as documented at 340 | https://www.scalyr.com/help/api#numericQuery. 'json-pretty' also prints the JSON response, 341 | but prettyprinted. 342 | --priority=high|low 343 | Specifies the execution priority for this query; defaults to "high". Use "low" for scripted 344 | operations where a delay of a second or so is acceptable. Rate limits are tighter for high- 345 | priority queries. 346 | --token=xxx 347 | Specify the API token. For this command, should be a "Read Logs" token. 348 | --version 349 | Prints the current version number of this tool. 350 | --verbose 351 | Writes detailed progress information to stderr. 352 | --proxy=: 353 | An address to connect through when using a proxy. If not set will also take the value from one of the following 354 | environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY 355 | 356 | 357 | #### Usage limits 358 | 359 | Your command line and API queries are limited to 30,000 milliseconds of server processing time, 360 | replenished at 36,000 milliseconds per hour. If you exceed this limit, your queries will be intermittently 361 | refused. (Your other uses of Scalyr, such as log uploading or queries via the web site, will not be impacted.) 362 | If you need a higher limit, drop us a line at support@scalyr.com. 363 | 364 | 365 | ## Fetching numeric data using a timeseries 366 | 367 | A timeseries precomputes a numeric query, allowing you to execute queries almost instantaneously, and without 368 | consuming your account's query budget. This is especially useful if you are using the Scalyr API to feed a 369 | home-built dashboard, alerting system, or other automated tool. Note that the [Scalyr API](https://www.scalyr.com/help/api#timeseriesQuery) 370 | allows multiple timeseries queries in a single API invocation, but the command-line tool only supports 371 | one query at a time. 372 | 373 | When a new timeseries is defined, we immediately start live updating of that timeseries from the ingestion pipeline. 374 | In addition, we begin a background process to extend the timeseries backward in time, so that it covers the full 375 | timespan of your query. This backfill process is automatic, and if you later issue the same query with an even 376 | earlier start time, we will extend the backfill to cover that as well. 377 | To change this behavior, use `--no-create-summaries`. 378 | 379 | A related argument, `--only-use-summaries`, controls whether this API call should only use preexisting timeseries or should 380 | execute the queries against the event database if no matching summary exists. If this argument is used, then your API call 381 | is guaranteed to return quickly and to execute inexpensively, but with possibly empty results. If this argument is not used, 382 | the call may be slower & more expensive, but will be complete. 383 | For example, issuing a new query over the past 3 weeks with `--only-use-summaries` will return quickly 384 | no matter what, but will initially return empty results until backfill (covering the past 3 weeks) is complete. 385 | This can be a cost-effective way to seed a new timeseries with a long backfill period when you don't need 386 | results right away. 387 | 388 | Issuing a timeseries command with `--no-create-summaries` and without `--only-use-summaries` is equivalent to a 389 | [numeric-query](#Fetching numeric data) command. 390 | 391 | Usage is identical to the numeric-query command: 392 | 393 | scalyr timeseries-query '$dataset="accesslog"' --function 'bytes' --start 24h --buckets 24 394 | 395 | Complete argument list: 396 | 397 | scalyr timeseries-query [filter] [--function xxx] --start xxx [options...] 398 | Just like numeric-query if `--no-create-summaries` is specified. Otherwise Scalyr will create a timeseries for 399 | you in the background. 400 | --start=xxx 401 | Specify the beginning of the time range to query. Uses the same syntax as the "Start" field in 402 | the log view. You must specify this argument. 403 | --end=xxx 404 | Specify the end of the time range to query. Uses the same syntax as the "End" field in the log 405 | view. Defaults to the current time. 406 | --function=xxx 407 | The value to compute from the matching events. You can use any function listed in 408 | https://www.scalyr.com/help/query-language#graphFunctions, except for fraction(expr). For 409 | example: 'mean(x)' or 'median(responseTime)', if x and responseTime are fields of your log. 410 | You can also specify a simple field name, such as 'responseTime', to return the mean value of 411 | that field. If you omit the function argument, the rate of matching events per second will be 412 | returned. Specifying 'rate' yields the same result. Finally, you can specify "count", to compute 413 | the number of matching events in each time period (as defined by the "buckets" option). 414 | --buckets=nnn 415 | The number of numeric values to return. The time range is divided into this many equal slices. 416 | For instance, suppose you query a four-hour period, with buckets = 4. The query will return four 417 | numbers, each covering a one-hour period. You may specify a value from 1 to 5000; 1 is the default. 418 | --output=csv|json|json-pretty 419 | How to display the results. 'csv' prints all values on a single line, separated by commas. 420 | 'json' prints the raw JSON response from the server, as documented at 421 | https://www.scalyr.com/help/api#numericQuery. 'json-pretty' also prints the JSON response, 422 | but prettyprinted. 423 | --priority=high|low 424 | Specifies the execution priority for this query; defaults to "high". Use "low" for scripted 425 | operations where a delay of a second or so is acceptable. Rate limits are tighter for high- 426 | priority queries. 427 | --only-use-summaries 428 | Specifies to only query summaries, and not to search the column store for any summaries not yet populated. 429 | No results will be returned unless the summaries queried have been backfilled. 430 | --no-create-summaries 431 | Specifies to not create summaries for this query. 432 | --token=xxx 433 | Specify the API token. For this command, should be a "Read Logs" token. 434 | --version 435 | Prints the current version number of this tool. 436 | --verbose 437 | Writes detailed progress information to stderr. 438 | --proxy=: 439 | An address to connect through when using a proxy. If not set will also take the value from one of the following 440 | environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY 441 | 442 | 443 | 444 | 445 | ## Retrieving configuration files 446 | 447 | The "get-file" command allows you to retrieve a configuration file, writing the file text to stdout. 448 | Configuration files are used to define log parsers, dashboards, alerting rules, and more. Any page 449 | on the Scalyr web site which contains a full-page text editor, is editing a configuration file. 450 | 451 | Using the get-file command is simple: 452 | 453 | # Display the alerts file 454 | scalyr get-file /alerts 455 | 456 | # Display the "Foo" dashboard 457 | scalyr get-file /dashboards/Foo 458 | 459 | Complete argument list: 460 | 461 | scalyr get-file file-path [options...] 462 | 463 | --version 464 | Prints the current version number of this tool. 465 | --token=xxx 466 | Specify the API token. For this command, should be a "Read Config" token. 467 | --verbose 468 | Writes detailed progress information to stderr. 469 | --proxy=: 470 | An address to connect through when using a proxy. If not set will also take the value from one of the following 471 | environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY 472 | 473 | 474 | ## Creating or updating configuration files 475 | 476 | The "put-file" command allows you to create or overwrite a configuration file, taking the new 477 | file content from stdin. 478 | 479 | Using the put-file command is simple: 480 | 481 | # Overwrite the alerts file 482 | scalyr put-file /alerts < alerts.json 483 | 484 | # Create or overwrite the "Foo" dashboard 485 | scalyr put-file /dashboards/Foo < fooDashboard.json 486 | 487 | Complete argument list: 488 | 489 | scalyr put-file file-path [options...] 490 | 491 | --version 492 | Prints the current version number of this tool. 493 | --token=xxx 494 | Specify the API token. For this command, should be a "Write Config" token. 495 | --verbose 496 | Writes detailed progress information to stderr. 497 | --proxy=: 498 | An address to connect through when using a proxy. If not set will also take the value from one of the following 499 | environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY 500 | --validate 501 | Validate if input is a valid HOCON. Validation relies on pyhocon parser 502 | (see https://github.com/chimpler/pyhocon). 503 | 504 | ## Deleting configuration files 505 | 506 | The "delete-file" command allows you to delete a configuration file: 507 | 508 | Using the delete-file command is simple: 509 | 510 | # Delete the "Foo" dashboard 511 | scalyr delete-file /dashboards/Foo 512 | 513 | ## Listing configuration files 514 | 515 | The "list-files" command lists all configuration files: 516 | 517 | scalyr list-files 518 | 519 | Complete argument list: 520 | 521 | scalyr list-files [options...] 522 | 523 | --version 524 | Prints the current version number of this tool. 525 | --token=xxx 526 | Specify the API token. For this command, should be a "Read Config" token. 527 | --verbose 528 | Writes detailed progress information to stderr. 529 | --proxy=: 530 | An address to connect through when using a proxy. If not set will also take the value from one of the following 531 | environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY 532 | 533 | 534 | ## TODO 535 | 536 | Add option to use LF, rather than CRLF, when outputting CSV (for `facet-query` in particular). 537 | 538 | 539 | ## Revision History 540 | 541 | #### Feb. 21, 2014: version 0.1 542 | 543 | Initial release. 544 | -------------------------------------------------------------------------------- /scalyr: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Scalyr command-line utility 4 | 5 | import sys 6 | import os 7 | import argparse 8 | import time 9 | import datetime 10 | import json 11 | import csv 12 | 13 | # unicode string type, set to str initially as 'str' exists in both 14 | # python 2 and 3. If python 2 we will override it with the unicode 15 | # type. If python 3 we'll do nothing as 'str' is the unicode type 16 | # for python 3 17 | _unicode_type = str 18 | 19 | # are we python2 or not 20 | _python2 = False 21 | 22 | try: 23 | # Python 2 versions 24 | import httplib 25 | import StringIO 26 | from urlparse import urlparse 27 | 28 | # if we are here then it's python 2 so set _unicode_type to unicode 29 | _unicode_type = unicode 30 | _python2 = True 31 | 32 | except ImportError: 33 | # Python 3+ versions 34 | import http.client as httplib 35 | import io as StringIO 36 | from urllib.parse import urlparse 37 | 38 | from collections import deque 39 | 40 | # Define some constants 41 | TOOL_VERSION = "0.4" 42 | 43 | 44 | # Return the API token from the command line or environment variables. 45 | def getApiToken(args, environmentVariableName, permissionType): 46 | apiToken = args.token 47 | if apiToken == '': 48 | apiToken = os.getenv(environmentVariableName, '') 49 | if apiToken == '': 50 | print_stderr('Please specify an API token granting ' + permissionType + ' permission. You can place it in the') 51 | print_stderr('command line as --token "XXX", or in the environment variable "' + environmentVariableName + '".') 52 | print_stderr('Use of an environment variable is recommended, to avoid displaying your API token in the') 53 | print_stderr('console or your command history. You can find API tokens at https://www.scalyr.com/keys.') 54 | sys.exit(1) 55 | 56 | return apiToken 57 | 58 | 59 | def getProxyAddress(args): 60 | proxyAddress = args.proxy 61 | 62 | if proxyAddress is None: 63 | # Check if we have a environment variable 64 | if os.environ.get('http_proxy'): 65 | proxyAddress = os.environ.get('http_proxy') 66 | elif os.environ.get('HTTP_PROXY'): 67 | proxyAddress = os.environ.get('HTTP_PROXY') 68 | if os.environ.get('https_proxy'): 69 | proxyAddress = os.environ.get('https_proxy') 70 | elif os.environ.get('HTTPS_PROXY'): 71 | proxyAddress = os.environ.get('HTTPS_PROXY') 72 | # Setup proxy connection 73 | 74 | if proxyAddress: 75 | useProxy = True 76 | else: 77 | useProxy = False 78 | 79 | return proxyAddress, useProxy 80 | 81 | 82 | def parseProxyAddress(url): 83 | parsedProxyAddress = urlparse(url) 84 | 85 | return parsedProxyAddress 86 | 87 | 88 | # Print a string to stderr. 89 | def print_stderr(message): 90 | sys.stderr.write(str(message) + '\n') 91 | 92 | 93 | def output_encoded(message): 94 | """Converts a string to ensure the output will be utf8 so that output can be safely redirected 95 | to a file without causing conversion errors. This works differently in Python2 vs Python3 96 | 97 | In Python2, we need to explicitly convert all unicode strings to utf8 otherwise we'll get a conversion 98 | error if output contains unicode *and* output has been redirected to a file. 99 | In Python3, output is utf8 by default and we need to leave the string alone - if we manually convert 100 | python3 strings to utf8 then the output will wrap all strings in b'' 101 | 102 | """ 103 | if _python2 and type(message) is _unicode_type: 104 | return message.encode('utf-8') 105 | 106 | return message 107 | 108 | 109 | # Send a request to the server, and return the parsed JSON response. 110 | # args: Our parsed command-line arguments 111 | # uri: Request path for this RPC, e.g. "api/query" 112 | # parametetDict: The dictionary to be sent (JSON-encoded) to the server as the request body 113 | def sendRequest(args, uri, parameterDict): 114 | parameterJson = json.dumps(parameterDict) 115 | 116 | queryStartTime = datetime.datetime.now() 117 | 118 | verbose = args.verbose 119 | if verbose: 120 | print_stderr("Using arguments: %s" % args) 121 | 122 | # Allow to set custom scalyr_server via environment variable 123 | serverAddress = args.server 124 | 125 | if serverAddress is None: 126 | serverAddress = os.environ.get('scalyr_server', 'https://www.scalyr.com') 127 | 128 | proxyAddress, useProxy = getProxyAddress(args) 129 | 130 | if verbose: 131 | if useProxy: 132 | print_stderr("Using proxy: %s" % proxyAddress) 133 | else: 134 | print_stderr("No proxy configuration found") 135 | 136 | useSSL = True 137 | if serverAddress.startswith("http://"): 138 | useSSL = False 139 | serverAddress = serverAddress[7:] 140 | elif serverAddress.startswith("https://"): 141 | serverAddress = serverAddress[8:] 142 | 143 | if verbose: 144 | if useSSL: 145 | protocol = 'https' 146 | else: 147 | protocol = 'http' 148 | print_stderr("Connecting to %s via %s" % (serverAddress, protocol)) 149 | 150 | conn = None 151 | if useSSL and useProxy: 152 | proxyAddress = parseProxyAddress(proxyAddress) 153 | conn = httplib.HTTPSConnection(proxyAddress.hostname, proxyAddress.port) 154 | conn.set_tunnel(serverAddress) 155 | elif useProxy: 156 | proxyAddress = parseProxyAddress(proxyAddress) 157 | conn = httplib.HTTPConnection(proxyAddress.hostname, proxyAddress.port) 158 | conn.set_tunnel(serverAddress) 159 | elif useSSL: 160 | conn = httplib.HTTPSConnection(serverAddress) 161 | else: 162 | conn = httplib.HTTPConnection(serverAddress) 163 | 164 | headers = {"Content-type": "application/json"} 165 | 166 | if verbose: 167 | print_stderr("Request headers:") 168 | for i in headers: 169 | print_stderr(" %s: %s" % (i, headers[i])) 170 | print_stderr("Request body:") 171 | print_stderr(json.dumps(json.loads(parameterJson), sort_keys=True, indent=2, separators=(',', ': '))) 172 | 173 | conn.request("POST", uri, parameterJson, headers) 174 | 175 | # Retrieve and parse the response. 176 | response = conn.getresponse() 177 | responseBody = response.read().decode('utf8') 178 | 179 | if verbose or (response.status != 200): 180 | print_stderr("After %s seconds, Scalyr server returned %s bytes; status %d / %s" % ( 181 | int((datetime.datetime.now() - queryStartTime).total_seconds() * 1000) / 1000.0, len(responseBody), 182 | response.status, response.reason)) 183 | 184 | if response.status != 200: 185 | print_stderr('Response body: ' + responseBody) 186 | sys.exit(1) 187 | 188 | try: 189 | parsedResponse = json.loads(responseBody) 190 | except ValueError: 191 | print_stderr('Scalyr server returned invalid response:') 192 | print_stderr(responseBody) 193 | sys.exit(1) 194 | 195 | status = parsedResponse['status'] 196 | if not status.startswith('success'): 197 | print_stderr('Scalyr server returned error: %s (%s)' % (parsedResponse['message'], status)) 198 | sys.exit(1) 199 | 200 | return (parsedResponse, responseBody) 201 | 202 | 203 | # Implement the "scalyr get-file" command. 204 | def commandGetFile(parser): 205 | parser.add_argument('filepath', 206 | help='server pathname of the file to retrieve, e.g. "/scalyr/alerts"') 207 | args = parser.parse_args() 208 | 209 | # Send the request to the server. 210 | response, rawResponse = sendRequest(args, '/getFile', { 211 | "token": getApiToken(args, 'scalyr_readconfig_token', 'Read Config'), 212 | "path": args.filepath, 213 | }) 214 | 215 | # Print the file content. 216 | if response['status'] == 'success/noSuchFile': 217 | print_stderr('File "%s" does not exist' % (args.filepath)) 218 | else: 219 | createDate = datetime.datetime.fromtimestamp(int(response['createDate']) / 1000) 220 | modDate = datetime.datetime.fromtimestamp(int(response['modDate']) / 1000) 221 | 222 | print_stderr('Retrieved file "%s", version %d, created %s, modified %s, length %s' % ( 223 | args.filepath, response['version'], createDate, modDate, len(response['content']))) 224 | print(output_encoded(response['content'])) 225 | 226 | 227 | # Implement the "scalyr put-file" command. 228 | def commandPutFile(parser): 229 | # Parse the command-line arguments. 230 | parser.add_argument('filepath', 231 | help='server pathname of the file to upload, e.g. "/scalyr/alerts"') 232 | parser.add_argument('--validate', 233 | action='store_true', 234 | help='validate if input is a valid HOCON') 235 | args = parser.parse_args() 236 | 237 | content = sys.stdin.read() 238 | if args.validate: 239 | 240 | from importlib import import_module 241 | # import required module 242 | try: 243 | module = import_module("pyhocon.converter") 244 | parser = getattr(module, "ConfigFactory") 245 | except ModuleNotFoundError: 246 | print_stderr('"pyhocon" is needed for validation:' 247 | ' run "pip install pyhocon" to install') 248 | sys.exit(1) 249 | try: 250 | parser.parse_string(content) 251 | # broad exception clause to cover 252 | # pyhocon and pyparsing exceptions 253 | except Exception as err: 254 | print_stderr('Input is invalid, error: %s' % err) 255 | sys.exit(1) 256 | 257 | response, rawResponse = sendRequest(args, '/putFile', { 258 | "token": getApiToken(args, 'scalyr_writeconfig_token', 'Write Config'), 259 | "path": args.filepath, 260 | "content": content 261 | }) 262 | 263 | # Confirm file was updated. 264 | print_stderr('File "%s" updated' % (args.filepath)) 265 | 266 | # Implement the "scalyr delete-file" command. 267 | def commandDeleteFile(parser): 268 | # Parse the command-line arguments. 269 | parser.add_argument('filepath', 270 | help='server pathname of the file to delete, e.g. "/scalyr/alerts"') 271 | args = parser.parse_args() 272 | 273 | # Send the request to the server. 274 | response, rawResponse = sendRequest(args, '/putFile', { 275 | "token": getApiToken(args, 'scalyr_writeconfig_token', 'Write Config'), 276 | "path": args.filepath, 277 | "deleteFile": True 278 | }) 279 | 280 | # Confirm file was deleted. 281 | print_stderr('File "%s" deleted' % (args.filepath)) 282 | 283 | 284 | # Implement the "scalyr list-files" command. 285 | def commandListFiles(parser): 286 | # Parse the command-line arguments. 287 | args = parser.parse_args() 288 | 289 | # Send the request to the server. 290 | response, rawResponse = sendRequest(args, '/listFiles', { 291 | "token": getApiToken(args, 'scalyr_readconfig_token', 'Read Config') 292 | }) 293 | 294 | # Print the file content. 295 | paths = response['paths'] 296 | for i in range(len(paths)): 297 | print(paths[i]) 298 | 299 | 300 | # Implement the "scalyr query" command. 301 | def commandQuery(parser): 302 | # Parse the command-line arguments. 303 | parser.add_argument('filter', nargs='?', default='', 304 | help='search term or filter expression') 305 | parser.add_argument('--start', default='', 306 | help='beginning of the time range to query') 307 | parser.add_argument('--end', default='', 308 | help='end of the time range to query') 309 | parser.add_argument('--count', type=int, default=10, 310 | help='how many log records to retrieve (1-5000)') 311 | parser.add_argument('--mode', choices=['head', 'tail', ''], default='', 312 | help='specifies whether to show logs from the beginning or end of the time range') 313 | parser.add_argument('--columns', default='', 314 | help='comma-delimited list of columns to display; defaults to all') 315 | parser.add_argument('--output', choices=['singleline', 'multiline', 'csv', 'json', 'json-pretty'], 316 | default='multiline', 317 | help='specifies the format in which matching log messages are displayed') 318 | parser.add_argument('--priority', choices=['high', 'low'], default='high', 319 | help='specifies the execution priority for this query. Use low for scripted operations where a delay of a second or so is acceptable.') 320 | parser.add_argument('--continuationToken', default='', 321 | help='specifies the continuation token to use from a previous request') 322 | 323 | args = parser.parse_args() 324 | 325 | columns = args.columns 326 | output = args.output 327 | if output == 'csv' and columns == '': 328 | print_stderr('For CSV output, you must supply a nonempty --columns option') 329 | sys.exit(1) 330 | 331 | # Get the API token. 332 | apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs') 333 | 334 | params = { 335 | "token": apiToken, 336 | "queryType": "log", 337 | "filter": args.filter, 338 | "startTime": args.start, 339 | "endTime": args.end, 340 | "maxCount": args.count, 341 | "pageMode": args.mode, 342 | "columns": columns, 343 | "priority": args.priority 344 | } 345 | 346 | if args.continuationToken: 347 | params['continuationToken'] = args.continuationToken 348 | 349 | # Send the query to the server. 350 | response, rawResponse = sendRequest(args, '/api/query', params) 351 | 352 | # Print the log records. 353 | matches = response['matches'] 354 | 355 | if args.output == 'json': 356 | print(output_encoded(rawResponse)) 357 | elif args.output == 'json-pretty': 358 | print(output_encoded(json.dumps(response, ensure_ascii=args.no_escape_unicode, sort_keys=True, indent=2, 359 | separators=(',', ': ')))) 360 | elif args.output == 'csv': 361 | columnList = columns.split(',') 362 | 363 | ar = [] 364 | for i in range(len(columnList)): 365 | ar.append(columnList[i]) 366 | 367 | csvBuffer = StringIO.StringIO() 368 | csvWriter = csv.writer(csvBuffer, dialect='excel') 369 | csvWriter.writerow(ar) 370 | for i in range(len(matches)): 371 | match = matches[i] 372 | attributes = match.get('attributes') 373 | for i in range(len(columnList)): 374 | column = columnList[i] 375 | if column in match: 376 | ar[i] = output_encoded(match.get(column)) 377 | elif column in attributes: 378 | ar[i] = output_encoded(attributes.get(column)) 379 | else: 380 | ar[i] = '' 381 | csvWriter.writerow(ar) 382 | 383 | print(csvBuffer.getvalue()) 384 | else: 385 | # Readable text format (singleline or multiline) 386 | for i in range(len(matches)): 387 | printReadableRow(args.output, matches[i]) 388 | 389 | 390 | def printReadableRow(output, match): 391 | rawTimestamp = match.get('timestamp') 392 | timestamp = "" 393 | if rawTimestamp: 394 | timestamp = str(datetime.datetime.fromtimestamp(int(rawTimestamp) / 1E9)) + ": " 395 | 396 | rawSeverity = match.get('severity') 397 | severity = "" 398 | if rawSeverity or 0 == rawSeverity: 399 | severity = ['L', 'K', 'J', 'I', 'W', 'E', 'F'][rawSeverity] + " " 400 | 401 | # get message, minus any trailing whitespace 402 | message = match.get('message') 403 | if not message: 404 | message = '' 405 | if output == 'messageonly': 406 | output = 'singleline' 407 | 408 | message = message.rstrip() 409 | 410 | message = output_encoded(message) 411 | attributes = match.get('attributes') 412 | 413 | if output == 'singleline': 414 | thisline = '%s%s%s' % (timestamp, severity, message) 415 | for attrName in sorted(attributes.keys()): 416 | thisline += ' %s=%s' % (attrName, attributes[attrName]) 417 | print(thisline) 418 | elif output == 'messageonly': 419 | print(message) 420 | else: 421 | print('%s%s%s' % (timestamp, severity, message)) 422 | for attrName in sorted(attributes.keys()): 423 | print(' %s = %s' % (attrName, attributes[attrName])) 424 | 425 | 426 | # Print the output of a numeric-query or timeseries-query command. 427 | def printNumericResults(values, outputFormat, rawResponse, response): 428 | if outputFormat == 'json': 429 | print(rawResponse) 430 | elif outputFormat == 'json-pretty': 431 | print(json.dumps(response, sort_keys=True, indent=2, separators=(',', ': '))) 432 | else: 433 | # csv 434 | print(','.join(map(str, values))) 435 | 436 | 437 | def get_match_unique_id(match): 438 | return (match['timestamp'], match['session']) 439 | 440 | 441 | def liveTail(apiToken, args): 442 | maximum_tail_time = 10 * 60 443 | readback_limit = 10 * 60 # 10 minutes 444 | max_matches = 1000 445 | poll_interval = 10 446 | 447 | initial_lines = args.lines 448 | 449 | if initial_lines > max_matches: 450 | print_stderr("Output of previous lines is limited to 1000 lines only") 451 | initial_lines = max_matches 452 | 453 | start_time = time.time() 454 | current_time = time.time() 455 | first = True 456 | 457 | previous_matches = deque(maxlen=max_matches) 458 | 459 | print_stderr("Beginning live tail..") 460 | while current_time - start_time < maximum_tail_time: 461 | 462 | # Send the query to the server. 463 | response, rawResponse = sendRequest(args, '/api/query', { 464 | "token": apiToken, 465 | "queryType": "log", 466 | "filter": args.filter, 467 | "startTime": int(current_time - readback_limit), 468 | "endTime": int(current_time), 469 | "maxCount": max_matches, 470 | "pageMode": "tail", 471 | "columns": '', 472 | "priority": "low" 473 | }) 474 | 475 | # get a list of matches that we haven't seen yet 476 | new_matches = [match for match in response['matches'] if get_match_unique_id(match) not in previous_matches] 477 | 478 | # if the "not in previous_matches" test didn't find any duplicates, that means there have been 479 | # so many new log events that our new query didn't overlap with the previous query. Therefore, we'll 480 | # have missed some of the intervening messages. 481 | row_count = len(new_matches) 482 | if not first and row_count >= max_matches: 483 | print_stderr("[WARN] Too many messages for tail to keep up; some messages are not being shown.") 484 | 485 | # print out any new matches, and add the ids to the list of matches we have already seen. 486 | # Also keep track of how many rows we have seen already, so the first time through we can 487 | # print the last few lines 488 | for match in new_matches: 489 | if not first or row_count <= initial_lines: 490 | printReadableRow(args.output, match) 491 | previous_matches.append(get_match_unique_id(match)) 492 | row_count -= 1 493 | 494 | # sleep for any remaining time 495 | new_time = time.time() 496 | elapsed_time = new_time - current_time 497 | remaining_time = poll_interval - elapsed_time 498 | if remaining_time > 0: 499 | time.sleep(remaining_time) 500 | 501 | current_time = time.time() 502 | first = False 503 | print_stderr( 504 | "----\nTo minimize server load, live tail expires after 10 minutes. If this is bothersome, please let us know at support@scalyr.com.\n") 505 | 506 | 507 | # Implement the 'scalyr tail' command 508 | def commandTail(parser): 509 | # Build the args 510 | parser.add_argument('filter', nargs='?', default='', 511 | help='search term or filter expression') 512 | parser.add_argument('--output', choices=['singleline', 'multiline', 'messageonly'], default='messageonly', 513 | help='specifies the format in which matching log messages are displayed') 514 | parser.add_argument('--lines', '-n', type=int, default=10, 515 | help='Output the previous \'n\' lines at the start of the tail') 516 | 517 | # Get the API token 518 | args = parser.parse_args() 519 | apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs') 520 | 521 | try: 522 | liveTail(apiToken, args) 523 | except KeyboardInterrupt: 524 | print_stderr("\nLive tail has quit.") 525 | 526 | 527 | # Implement the "scalyr numeric-query" command. 528 | def commandNumericQuery(parser): 529 | # Parse the command-line arguments. 530 | parser.add_argument('filter', nargs=1, default='', 531 | help='search term or filter expression') 532 | parser.add_argument('--function', default='', 533 | help='the value to compute from the events matching the filter') 534 | parser.add_argument('--start', required=True, 535 | help='beginning of the time range to query') 536 | parser.add_argument('--end', default='', 537 | help='end of the time range to query') 538 | parser.add_argument('--buckets', type=int, default=1, 539 | help='how many buckets to divide the time interval into (1-5000)') 540 | parser.add_argument('--output', choices=['csv', 'json', 'json-pretty'], default='csv', 541 | help='specifies the format in which numbers are emitted') 542 | parser.add_argument('--priority', choices=['high', 'low'], default='high', 543 | help='specifies the execution priority for this query. Use low for scripted operations where a delay of a second or so is acceptable.') 544 | args = parser.parse_args() 545 | 546 | # Get the API token. 547 | apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs') 548 | 549 | # Send the query to the server. 550 | response, rawResponse = sendRequest(args, '/api/numericQuery', { 551 | "token": apiToken, 552 | "queryType": "numeric", 553 | "filter": args.filter[0], 554 | "function": args.function, 555 | "startTime": args.start, 556 | "endTime": args.end, 557 | "buckets": args.buckets, 558 | "priority": args.priority 559 | }) 560 | 561 | printNumericResults(response['values'], args.output, rawResponse, response) 562 | 563 | 564 | # Print the output of a facet-query command. 565 | def printFacetResults(matchCount, values, outputFormat, rawResponse, response, ensure_ascii=True): 566 | if outputFormat == 'json': 567 | print(output_encoded(rawResponse)) 568 | elif outputFormat == 'json-pretty': 569 | print(output_encoded( 570 | json.dumps(response, ensure_ascii=ensure_ascii, sort_keys=True, indent=2, separators=(',', ': ')))) 571 | else: 572 | # csv 573 | csvBuffer = StringIO.StringIO() 574 | csvWriter = csv.writer(csvBuffer, dialect='excel') 575 | csvWriter.writerow(['count', 'value']) 576 | for i in range(len(values)): 577 | valueAndCount = values[i] 578 | csvWriter.writerow([valueAndCount.get('count'), output_encoded(valueAndCount.get('value'))]) 579 | print(csvBuffer.getvalue()) 580 | 581 | 582 | # Implement the "scalyr facet-query" command. 583 | def commandFacetQuery(parser): 584 | # Parse the command-line arguments. 585 | parser.add_argument('filter', nargs=1, 586 | help='search term or filter expression') 587 | parser.add_argument('field', nargs=1, 588 | help='the field whose values should be retrieved') 589 | parser.add_argument('--count', type=int, default=100, 590 | help='maximum number of unique values to retrieve (1-1000)') 591 | parser.add_argument('--start', required=True, 592 | help='beginning of the time range to query') 593 | parser.add_argument('--end', default='', 594 | help='end of the time range to query') 595 | parser.add_argument('--output', choices=['csv', 'json', 'json-pretty'], default='csv', 596 | help='specifies the format in which values are emitted') 597 | parser.add_argument('--priority', choices=['high', 'low'], default='high', 598 | help='specifies the execution priority for this query. Use low for scripted operations where a delay of a second or so is acceptable.') 599 | args = parser.parse_args() 600 | 601 | # Get the API token. 602 | apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs') 603 | 604 | # Send the query to the server. 605 | response, rawResponse = sendRequest(args, '/api/facetQuery', { 606 | "token": apiToken, 607 | "queryType": "facet", 608 | "filter": args.filter[0], 609 | "field": args.field[0], 610 | "maxCount": args.count, 611 | "startTime": args.start, 612 | "endTime": args.end, 613 | "priority": args.priority 614 | }) 615 | 616 | printFacetResults(response['matchCount'], response['values'], args.output, rawResponse, response, 617 | ensure_ascii=args.no_escape_unicode) 618 | 619 | 620 | # Print the output of a power-query command. 621 | def printPowerResults(outputFormat, rawResponse, response, ensure_ascii=True): 622 | matchingEvents, omittedEvents = response['matchingEvents'], response['omittedEvents'] 623 | columns, values, warnings = response['columns'], response['values'], response['warnings'] 624 | 625 | def col_name(col): 626 | return col.get('name') 627 | 628 | if outputFormat == 'json': 629 | print(output_encoded(rawResponse)) 630 | elif outputFormat == 'json-pretty': 631 | print(output_encoded( 632 | json.dumps(response, ensure_ascii=ensure_ascii, sort_keys=True, indent=2, separators=(',', ': ')))) 633 | else: 634 | # csv 635 | csvBuffer = StringIO.StringIO() 636 | csvWriter = csv.writer(csvBuffer, dialect='excel') 637 | 638 | if (len(warnings) > 0): 639 | csvWriter.writerow(warnings) 640 | if (omittedEvents > 0): 641 | csvWriter.writerow(["%d of %d events (%.1f%%) were omitted due to memory limits" % ( 642 | omittedEvents, matchingEvents, (omittedEvents * 100) / matchingEvents)]) 643 | 644 | csvWriter.writerow(map(output_encoded, map(col_name, columns))) 645 | for i in range(len(values)): 646 | csvWriter.writerow(map(output_encoded, values[i])) 647 | print(csvBuffer.getvalue()) 648 | 649 | 650 | # Implement the "scalyr power-query" command. 651 | def commandPowerQuery(parser): 652 | # Parse the command-line arguments. 653 | parser.add_argument('filter', nargs=1, 654 | help='scalyr power query') 655 | parser.add_argument('--start', required=True, 656 | help='beginning of the time range to query') 657 | parser.add_argument('--end', default='', 658 | help='end of the time range to query') 659 | parser.add_argument('--output', choices=['csv', 'json', 'json-pretty'], default='csv', 660 | help='specifies the format in which values are emitted') 661 | parser.add_argument('--priority', choices=['high', 'low'], default='high', 662 | help='specifies the execution priority for this query. Use low for scripted operations where a delay of a second or so is acceptable.') 663 | args = parser.parse_args() 664 | 665 | # Get the API token. 666 | apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs') 667 | 668 | # Send the query to the server. 669 | response, rawResponse = sendRequest(args, '/api/powerQuery', { 670 | "token": apiToken, 671 | "queryType": "complex", 672 | "query": args.filter[0], 673 | "startTime": args.start, 674 | "endTime": args.end, 675 | "priority": args.priority 676 | }) 677 | 678 | printPowerResults(args.output, rawResponse, response, ensure_ascii=args.no_escape_unicode) 679 | 680 | 681 | # Implement the "scalyr timeseries-query" command. 682 | def commandTimeseriesQuery(parser): 683 | # Parse the command-line arguments. 684 | parser.add_argument('filter', nargs=1, default='', 685 | help='search term or filter expression') 686 | parser.add_argument('--function', default='', 687 | help='the value to compute from the events matching the filter') 688 | parser.add_argument('--start', required=True, 689 | help='beginning of the time range to query') 690 | parser.add_argument('--end', default='', 691 | help='end of the time range to query') 692 | parser.add_argument('--buckets', type=int, default=1, 693 | help='how many buckets to divide the time interval into (1-5000)') 694 | parser.add_argument('--output', choices=['csv', 'json', 'json-pretty'], default='csv', 695 | help='specifies the format in which numbers are emitted') 696 | parser.add_argument('--priority', choices=['high', 'low'], default='high', 697 | help='specifies the execution priority for this query. Use low for scripted operations where a delay of a second or so is acceptable.') 698 | parser.add_argument('--only-use-summaries', dest='onlyUseSummaries', action='store_true', default=False, 699 | help='specifies to query only summaries, and will not search the column store for any summaries not yet populated') 700 | 701 | # Note: we are inverting this (negative) argument into args.createSummaries. Use of this argument will set createSummaries = False in the query. 702 | parser.add_argument('--no-create-summaries', dest='createSummaries', action='store_false', default=True, 703 | help='specifies to not create summaries for this query') 704 | args = parser.parse_args() 705 | 706 | # Get the API token. 707 | apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs') 708 | 709 | # build the query 710 | query = { 711 | "queryType": "numeric", 712 | "filter": args.filter[0], 713 | "function": args.function, 714 | "startTime": args.start, 715 | "endTime": args.end, 716 | "buckets": args.buckets, 717 | "priority": args.priority, 718 | "onlyUseSummaries": args.onlyUseSummaries, 719 | "createSummaries": args.createSummaries 720 | } 721 | 722 | # Send the query to the server. 723 | response, rawResponse = sendRequest(args, '/api/timeseriesQuery', { 724 | "token": apiToken, 725 | "queries": [query] 726 | }) 727 | 728 | # Print the results. 729 | printNumericResults(response['results'][0]['values'], args.output, rawResponse, response) 730 | 731 | 732 | def scalyrToolCli(): 733 | # All available commands 734 | all_commands = { 735 | 'query': commandQuery, 736 | 'tail': commandTail, 737 | 'numeric-query': commandNumericQuery, 738 | 'facet-query': commandFacetQuery, 739 | 'power-query': commandPowerQuery, 740 | 'timeseries-query': commandTimeseriesQuery, 741 | 'timerseries-query': commandTimeseriesQuery, # mispelling; kept for backwards compatibility 742 | 'get-file': commandGetFile, 743 | 'put-file': commandPutFile, 744 | 'delete-file': commandDeleteFile, 745 | 'list-files': commandListFiles, 746 | } 747 | 748 | # Define arguments that are the same for all commands 749 | parser = argparse.ArgumentParser( 750 | description='Scalyr command-line tool. See https://github.com/scalyr/scalyr-tool for documentation.') 751 | parser.add_argument('command', choices=all_commands.keys(), 752 | help='specifies the action to be performed') 753 | parser.add_argument('--version', action='version', version='%(prog)s ' + TOOL_VERSION) 754 | parser.add_argument('--server', 755 | help='URL for the Scalyr API server. Defaults to https://www.scalyr.com. If you are using eu.scalyr.com then this should be set to https://eu.scalyr.com.') 756 | parser.add_argument('--token', default='', 757 | help='API access token') 758 | parser.add_argument('--verbose', action='store_true', default=False, 759 | help='enables additional diagnostic output') 760 | 761 | parser.add_argument('--no-escape-unicode', action="store_false", 762 | help='When true, the json-pretty output format will show unicode characters rather than escaped unicode characters (the default for json-pretty is to use escaped characters)') 763 | 764 | parser.add_argument('--proxy', help='Proxy to connect through') 765 | 766 | command = None 767 | # Because the options are command-specific, we cannot fully parse the arguments until we know the 768 | # command... but we also need to know which command to invoke so that we can add the command-specific 769 | # options and then execute the command. So, here, we just guess what the command is by looking over all the 770 | # arguments and seeing the first one that matches one of the possible commands. 771 | for arg in sys.argv[1:]: 772 | if arg in all_commands: 773 | command = arg 774 | 775 | # If we could not find a possible command, then just try to parse the commandline with the current options, which 776 | # we know will fail since there's no valid value for 'command'. 777 | if command is None: 778 | tmp_args = parser.parse_args() 779 | 780 | # Invoke the command's function from the all_command's mapping. We really should change this to a more 781 | # object oriented approach. 782 | command_func = all_commands[command] 783 | command_func(parser) 784 | 785 | 786 | if __name__ == '__main__': 787 | scalyrToolCli() 788 | --------------------------------------------------------------------------------