├── .github
    └── workflows
    │   └── secrets-scanner.yaml
├── .gitignore
├── Dockerfile
├── LICENSE.txt
├── README.md
└── scalyr


/.github/workflows/secrets-scanner.yaml:
--------------------------------------------------------------------------------
 1 | name: TruffleHog Secrets Scan
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 |   pull_request:
 7 |     branches:
 8 |       - master
 9 |   schedule:
10 |     - cron: '0 4 * * *'
11 | 
12 | permissions:
13 |   contents: read
14 | 
15 | jobs:
16 |   TruffleHog:
17 |     runs-on: ubuntu-latest
18 | 
19 |     steps:
20 |       - name: Checkout code
21 |         uses: actions/checkout@v3
22 |         with:
23 |           fetch-depth: 0
24 | 
25 |       # Special check which ensures that the clone performed above is not shallow. We need the
26 |       # complete git history for scanning to work correctly in all the situations. In some cases
27 |       # if a shallow clone is used, trufflehog won't not fail with an error, but it would simply
28 |       # not detect any files and that could be dangerous.
29 |       - name: Shallow repo check
30 |         run: |
31 |           if git rev-parse --is-shallow-repository | grep -q "true"; then
32 |             echo "Encountered a shallow repository, trufflehog may not work as expected!"
33 |             exit 1
34 |           fi
35 | 
36 |       - name: scan-pr
37 |         uses: trufflesecurity/trufflehog@main
38 |         if: ${{ github.event_name == 'pull_request' }}
39 |         with:
40 |           path: ./
41 |           base: ${{ github.event.repository.default_branch }}
42 |           head: HEAD
43 |           extra_args: --debug --only-verified
44 | 
45 |       - name: scan-push
46 |         uses: trufflesecurity/trufflehog@main
47 |         if: ${{ github.event_name == 'push' }}
48 |         with:
49 |           path: ./
50 |           base: ""
51 |           head: ${{ github.ref_name }}
52 |           extra_args: --debug --only-verified
53 | 
54 |       # As part of cron trigger we scan the whole repo directory.
55 |       # NOTE: Since trufflehog GHA is meant to be used in context of push / pr it can't be
56 |       # used dorectly to scan the whole repo directory. This may take a while, but it's good idea
57 |       # to run it on a daily basis.
58 |       - name: scan-cron
59 |         if: ${{ github.event_name == 'schedule' }}
60 |         run: |
61 |           docker run --rm -v "$PWD:/workdir" trufflesecurity/trufflehog:latest git \
62 |             file:///workdir --fail --no-update --debug --only-verified
63 | 
64 |       - name: Notify Slack on Failure
65 |         if: ${{ failure() && github.ref_name == 'master' }}
66 |         uses: act10ns/slack@ed1309ab9862e57e9e583e51c7889486b9a00b0f # v2.0.0
67 |         env:
68 |           SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
69 |         with:
70 |           status: ${{ job.status }}
71 |           steps: ${{ toJson(steps) }}
72 |           channel: '#eng-dataset-o11y'
73 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3-alpine3.16
2 | 
3 | COPY scalyr /bin/scalyr
4 | RUN chmod u+x /bin/scalyr
5 | ENTRYPOINT ["/bin/scalyr"]
6 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |                           Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | scalyr-tool
  2 | ===========
  3 | 
  4 | Command-line tool for accessing Scalyr services. The following commands are currently supported:
  5 | 
  6 | - [**query**](#querying-logs): Retrieve log data
  7 | - [**power-query**](#power-queries): Execute PowerQuery
  8 | - [**numeric-query**](#fetching-numeric-data): Retrieve numeric / graph data
  9 | - [**facet-query**](#fetching-facet-counts): Retrieve common values for a field
 10 | - [**timeseries-query**](#fetching-numeric-data-using-a-timeseries): Retrieve numeric / graph data from a timeseries
 11 | - [**get-file**](#retrieving-configuration-files): Fetch a configuration file
 12 | - [**put-file**](#creating-or-updating-configuration-files): Create or update a configuration file
 13 | - [**delete-file**](#creating-or-updating-configuration-files): Delete a configuration file
 14 | - [**list-files**](#listing-configuration-files): List all configuration files
 15 | - [**tail**](#tailing-logs): Provide a live 'tail' of a log
 16 | 
 17 | 
 18 | ## Installation
 19 | 
 20 | Simply download the script file and make it executable. For instance:
 21 | 
 22 |     curl https://raw.githubusercontent.com/scalyr/scalyr-tool/master/scalyr > scalyr
 23 |     chmod u+x scalyr
 24 |     mv scalyr (some directory on your command path)
 25 | 
 26 | You also need to make your Scalyr API tokens available to the tool. You can specify the token
 27 | on the command line using the `--token` argument. However, it is more convenient to store your
 28 | tokens in environment variables. This also keeps the tokens out of your console window and
 29 | command history. On Unix systems, you can add the following to a file like `.bash_profile`:
 30 | 
 31 |     export scalyr_readlog_token='XXX'
 32 |     export scalyr_readconfig_token='YYY'
 33 |     export scalyr_writeconfig_token='ZZZ'
 34 | 
 35 | The values for XXX, YYY, and ZZZ can be found at [scalyr.com/keys](https://www.scalyr.com/keys) -- look
 36 | for "Read Logs", "Read Config", and "Write Config" tokens, respectively.
 37 | 
 38 | Setting a custom Scalyr server can be done using the `--server` argument but also via environment variable:
 39 | 
 40 |     export scalyr_server='https://eu.scalyr.com'
 41 | 
 42 | After adding these to `.bash_profile`, make sure to also paste them into your current console session,
 43 | so that they take effect immediately. Alternatively, run `source ~/.bash_profile`.
 44 | 
 45 | ## Querying logs
 46 | 
 47 | The "query" command allows you to search and filter your logs, or simply retrieve raw log data. The
 48 | capabilities are similar to the regular [log view](https://www.scalyr.com/events?mode=log), though you
 49 | can retrieve more data at once and have several output format options.
 50 | 
 51 | Here are some usage examples:
 52 | 
 53 |     # Display the last 10 log records
 54 |     scalyr query
 55 | 
 56 |     # Display the last 100 log records, showing only timestamp, severity, and message.
 57 |     # (Timestamp and severity are always displayed.)
 58 |     scalyr query --count=100 --columns='timestamp,severity,message'
 59 | 
 60 |     # Display the first 10 log records beginning at 3:00 PM today, from host100.
 61 |     scalyr query '$serverHost="host100"' --start='3:00 PM'
 62 | 
 63 |     # Display the last 1000 entries in the log tagged as source=accessLog. Print only the status
 64 |     # and path, in CSV format.
 65 |     scalyr query '$source="accessLog"' --output=csv --columns='status,uriPath' --count=1000
 66 | 
 67 | Complete argument list:
 68 | 
 69 |     scalyr query [filter] [options...]
 70 |         The filter specifies which log records to return. It uses the same syntax as the "Expression"
 71 |         field in the [log view](https://www.scalyr.com/events?mode=log).
 72 | 
 73 |     --start=xxx
 74 |         Specify the beginning of the time range to query. Uses the same syntax as the "Start" field in
 75 |         the log view. Defaults to 1 day ago, or to 1 day before the end time if an end time is given.
 76 |     --end=xxx
 77 |         Specify the end of the time range to query. Uses the same syntax as the "End" field in the log
 78 |         view. Defaults to the current time, or to 1 day after the start time if a start time is given.
 79 |     --count=nnn
 80 |         How many log records to retrieve, from 1 to 5000. Defaults to 10.
 81 |     --mode=head|tail
 82 |         Whether to display log records from the start or end of the time range. Defaults to head if a
 83 |         start time is given, otherwise to tail.
 84 |     --columns="..."
 85 |         Which log attributes to display. Used mainly for logs for which you have specified a parser to
 86 |         extract attributes from the raw text. Specify one or more attribute names, separated by commas.
 87 |     --output=multiline|singleline|csv|json|json-pretty
 88 |         How to display the log records (see below).
 89 |     --version
 90 |         Prints the current version number of this tool.
 91 |     --priority=high|low
 92 |         Specifies the execution priority for this query; defaults to "high". Use "low" for scripted
 93 |         operations where a delay of a second or so is acceptable. Rate limits are tighter for high-
 94 |         priority queries.
 95 |     --token=xxx
 96 |         Specify the API token. For this command, should be a "Read Logs" token.
 97 |     --verbose
 98 |         Writes detailed progress information to stderr.
 99 |     --proxy=<ip>:<port>
100 |         An address to connect through when using a proxy. If not set will also take the value from one of the following
101 |         environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY
102 | 
103 | #### Output formats
104 | 
105 | By default, the query command outputs log records in a fairly verbose format designed for manual
106 | viewing. You can get something more like a classic log view by specifying a columns list, as shown in
107 | one of the examples above.
108 | 
109 | The 'singleline' output option is similar to the default, but places all of a record's attributes on
110 | a single line. This is denser, but can be harder to read.
111 | 
112 | The 'csv' output option emits one line per log record, in Excel comma-separated-value format (with
113 | `CRLF` as the line separator, per the [spec](https://tools.ietf.org/html/rfc4180#page-2)). To use
114 | this option, you must specify the `--columns` argument.
115 | 
116 | The 'json-pretty' output option also emits the JSON response from the server, but prettyprinted.
117 | 
118 | #### Usage limits
119 | 
120 | Your command line and API queries are limited to 30,000 milliseconds of server processing time,
121 | replenished at 36,000 milliseconds per hour. If you exceed this limit, your queries will be intermittently
122 | refused. (Your other uses of Scalyr, such as log uploading or queries via the web site, will not be impacted.)
123 | If you need a higher limit, drop us a line at support@scalyr.com.
124 | 
125 | ## Power Queries
126 | 
127 | The "power-query" command allows you to execute a PowerQuery. The
128 | capabilities are similar to the regular [PowerQuery](https://www.scalyr.com/query), though you
129 | can retrieve more data at once and have several output format options.
130 | 
131 | Here are some usage examples:
132 | 
133 |     # Display log volume summary by forlogfile for the last 24 hours
134 |     scalyr power-query "tag='logVolume' metric='logBytes' | group sum(value) by forlogfile" --start="24h"
135 | 
136 |     # Display a table of requests, errors and error rate for the last 7 days, in pretty-printed JSON
137 |     scalyr power-query "dataset = 'accesslog' | group requests = count(), errors = count(status == 404) \
138 |     by uriPath | let rate = errors / requests | filter rate > 0.01 | sort -rate" --start="7d" --end="0d" \
139 |     --output=json-pretty
140 | 
141 | Complete argument list:
142 | 
143 |     scalyr power-query [query] [options...]
144 |         The query specifies the PowerQuery. It uses the same syntax as the "PowerQueries"
145 |         page which is documented [here](https://app.scalyr.com/help/power-queries).
146 | 
147 |     --start=xxx
148 |         Specify the beginning of the time range to query. Uses the same syntax as the "Start" field in
149 |         the PowerQueries page. This field is required.
150 |     --end=xxx
151 |         Specify the end of the time range to query. Uses the same syntax as the "End" field in the PowerQueries page.
152 |         Defaults to 1 day after the start time if a start time is given.
153 |     --output=csv|json|json-pretty
154 |         How to display the log records (see below).
155 |     --version
156 |         Prints the current version number of this tool.
157 |     --priority=high|low
158 |         Specifies the execution priority for this query; defaults to "high". Use "low" for scripted
159 |         operations where a delay of a second or so is acceptable. Rate limits are tighter for high-
160 |         priority queries.
161 |     --token=xxx
162 |         Specify the API token. For this command, should be a "Read Logs" token.
163 |     --verbose
164 |         Writes detailed progress information to stderr.
165 |     --proxy=<ip>:<port>
166 |         An address to connect through when using a proxy. If not set will also take the value from one of the following
167 |         environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY
168 | 
169 | #### Output formats
170 | 
171 | By default, the power-query command outputs in 'csv' format, which emits one line per log record, in Excel
172 | comma-separated-value format (with `CRLF` as the line separator, per the [spec](https://tools.ietf.org/html/rfc4180#page-2)).
173 | 
174 | The 'json' output option, not surprisingly, emits a JSON response.
175 | 
176 | The 'json-pretty' output option also emits the JSON response from the server, but prettyprinted.
177 | 
178 | #### Usage limits
179 | 
180 | Your command line and API queries are limited to 30,000 milliseconds of server processing time,
181 | replenished at 36,000 milliseconds per hour. If you exceed this limit, your queries will be intermittently
182 | refused. (Your other uses of Scalyr, such as log uploading or queries via the web site, will not be impacted.)
183 | If you need a higher limit, drop us a line at support@scalyr.com.
184 | 
185 | ## Tailing logs
186 | 
187 | The 'tail' command is similar to the '[query](#querying-logs)' command, except it runs continually, printing query results to stdout.
188 | 
189 | Here are some usage examples:
190 | 
191 |     # Display a live tail of all log records
192 |     scalyr tail
193 | 
194 |     # Display a live tail of all log records from host100.
195 |     scalyr tail '$serverHost="host100"'
196 | 
197 |     # Display a live tail of all log records containing the text [WARN]
198 |     #   Note: the [] need to be quoted to be processed as text by Scalyr.
199 |     #   You also need to quote/escape the quotes so they are not eaten by the shell
200 |     scalyr tail '"[WARN]"'
201 | 
202 |     # Display a live tail of log messages, including attributes
203 |     scalyr tail --output multiline
204 | 
205 | Complete argument list:
206 | 
207 |     scalyr tail [filter] [options...]
208 |         The filter specifies which log records to return. It uses the same syntax as the "Expression"
209 |         field in the [log view](https://www.scalyr.com/events?mode=log).
210 | 
211 |     --lines K,
212 |     -n K
213 |         Output the previous K lines when starting the tail.  Defaults to 10.
214 | 
215 |     --output multiline|singleline|messageonly
216 |         Similar to the multiline and singleline options for the 'query' command, but also has a 'messageonly'
217 |         mode that will only display the raw log message, and not any additional attributes.
218 |         Defaults to 'messageonly'.
219 | 
220 | #### Usage limits
221 | 
222 | The 'tail' command is currently restricted to read a maximum of 1,000 log records per 10 seconds.  Additionally,
223 | tails will automatically expire after 10 mins.  Please contact support@scalyr.com if you require an increase to
224 | these limits.
225 | 
226 | #### Server clocks
227 | 
228 | If the clocks on the servers sending log messages to Scalyr are significantly out of sync then some messages may not appear in the live tail.  For example, if you send us a new log message with a timestamp old enough that it's not in the 1,000 most recent messages when it arrives at the Scalyr servers, then it will not be displayed by the live tail tool.
229 | 
230 | ## Fetching numeric data
231 | 
232 | The "numeric-query" command allows you to retrieve numeric data, e.g. for graphing. You can count the
233 | rate of events matching some criterion (e.g. error rate), or retrieve a numeric field (e.g. response
234 | size).
235 | 
236 | A numeric query is equivalent to a [timeseries-query](#fetching-numeric-data-using-a-timeseries) with argument
237 | `--no-create-summaries` and without `--only-use-summaries`. If you will be invoking the same query repeatedly (e.g. in a script),
238 | you may want to use the timeseries query command rather than `numeric-query`.
239 | 
240 | The commands take the same options and return the same data, but for `timeseries-query` invocations without
241 | `--no-create-summaries` we create a timeseries on the backend for each unique filter/function pair.  
242 | This query will execute near-instantaneously, and avoid consuming your account's query budget (see below).
243 | 
244 | Here are some usage examples:
245 | 
246 |     # Count the rate (per second) of occurrences of "/login" in all logs, in each of the last 24 hours
247 |     scalyr numeric-query '"/login"' --start 24h --buckets 24
248 | 
249 |     # Display the average response size of all requests in the last hour
250 |     scalyr numeric-query '$dataset="accesslog"' --function 'bytes' --start 1h
251 | 
252 | Complete argument list:
253 | 
254 |     scalyr numeric-query [filter] --start xxx [options...]
255 |         The filter specifies which log records to process. It uses the same syntax as the "Expression"
256 |         field in the [log view](https://www.scalyr.com/events?mode=log).
257 | 
258 |     --function=xxx
259 |         The value to compute from the matching events. You can use any function listed in
260 |         https://www.scalyr.com/help/query-language#graphFunctions, except for fraction(expr). For
261 |         example: 'mean(x)' or 'median(responseTime)', if x and responseTime are fields of your log.
262 |         You can also specify a simple field name, such as 'responseTime', to return the mean value of
263 |         that field. If you omit the function argument, the rate of matching events per second will be
264 |         returned. Specifying 'rate' yields the same result. Finally, you can specify "count", to compute
265 |         the number of matching events in each time period (as defined by the "buckets" option).
266 |     --start=xxx
267 |         Specify the beginning of the time range to query. Uses the same syntax as the "Start" field in
268 |         the log view. You must specify this argument.
269 |     --end=xxx
270 |         Specify the end of the time range to query. Uses the same syntax as the "End" field in the log
271 |         view. Defaults to the current time.
272 |     --buckets=nnn
273 |         The number of numeric values to return. The time range is divided into this many equal slices.
274 |         For instance, suppose you query a four-hour period, with buckets = 4. The query will return four
275 |         numbers, each covering a one-hour period. You may specify a value from 1 to 5000; 1 is the default.
276 |     --output=csv|json|json-pretty
277 |         How to display the results. 'csv' prints all values on a single line, separated by commas.
278 |         'json' prints the raw JSON response from the server, as documented at
279 |         https://www.scalyr.com/help/api#numericQuery. 'json-pretty' also prints the JSON response,
280 |         but prettyprinted.
281 |     --priority=high|low
282 |         Specifies the execution priority for this query; defaults to "high". Use "low" for scripted
283 |         operations where a delay of a second or so is acceptable. Rate limits are tighter for high-
284 |         priority queries.
285 |     --token=xxx
286 |         Specify the API token. For this command, should be a "Read Logs" token.
287 |     --version
288 |         Prints the current version number of this tool.
289 |     --verbose
290 |         Writes detailed progress information to stderr.
291 |     --proxy=<ip>:<port>
292 |         An address to connect through when using a proxy. If not set will also take the value from one of the following
293 |         environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY
294 | 
295 | ## Fetching facet counts
296 | 
297 | The "facet-query' command allows you to retrieve the most common values for a field. For instance, you can
298 | find the most common URLs accessed on your site, the most common user-agent strings, or the most common
299 | response codes returned. (If a very large number of events match your search criteria, the results will be
300 | based on a random subsample of at least 500,000 matching events.)
301 | 
302 | The default output format is CSV, sorted by count desc:
303 | 
304 | ```
305 | count,value
306 | 4,value-the-first
307 | 2,"other value"
308 | ```
309 | 
310 | _Note that CSV output uses `CRLF` as the line separator._
311 | 
312 | 
313 | Here are some usage examples:
314 | 
315 |     curl 'https://www.scalyr.com/api/facetQuery?queryType=facet&field=uriPath&startTime=1h&token=XXX'
316 | 
317 |     # Display the most common HTTP request URLs in the last 24 hours.
318 |     scalyr facet-query '$dataset="accesslog"' uriPath --start 24h
319 | 
320 |     # Display the most common HTTP response codes for requests to index.html.
321 |     scalyr facet-query 'uriPath="/index.html"' status --start 24h
322 | 
323 | Complete argument list:
324 | 
325 |     scalyr facet-query filter field --start xxx [options...]
326 |         The filter specifies which log records to process. It uses the same syntax as the "Expression"
327 |         field in the [log view](https://www.scalyr.com/events?mode=log).
328 | 
329 |     --count=nnn
330 |         How many distinct values to return. You may specify a value from 1 to 1000; 100 is the default.
331 |     --start=xxx
332 |         Specify the beginning of the time range to query. Uses the same syntax as the "Start" field in
333 |         the log view. You must specify this argument.
334 |     --end=xxx
335 |         Specify the end of the time range to query. Uses the same syntax as the "End" field in the log
336 |         view. Defaults to the current time.
337 |     --output=csv|json|json-pretty
338 |         How to display the results. 'csv' prints one value (and its count) per line, separated by commas.
339 |         'json' prints the raw JSON response from the server, as documented at
340 |         https://www.scalyr.com/help/api#numericQuery. 'json-pretty' also prints the JSON response,
341 |         but prettyprinted.
342 |     --priority=high|low
343 |         Specifies the execution priority for this query; defaults to "high". Use "low" for scripted
344 |         operations where a delay of a second or so is acceptable. Rate limits are tighter for high-
345 |         priority queries.
346 |     --token=xxx
347 |         Specify the API token. For this command, should be a "Read Logs" token.
348 |     --version
349 |         Prints the current version number of this tool.
350 |     --verbose
351 |         Writes detailed progress information to stderr.
352 |     --proxy=<ip>:<port>
353 |         An address to connect through when using a proxy. If not set will also take the value from one of the following
354 |         environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY
355 | 
356 | 
357 | #### Usage limits
358 | 
359 | Your command line and API queries are limited to 30,000 milliseconds of server processing time,
360 | replenished at 36,000 milliseconds per hour. If you exceed this limit, your queries will be intermittently
361 | refused. (Your other uses of Scalyr, such as log uploading or queries via the web site, will not be impacted.)
362 | If you need a higher limit, drop us a line at support@scalyr.com.
363 | 
364 | 
365 | ## Fetching numeric data using a timeseries
366 | 
367 | A timeseries precomputes a numeric query, allowing you to execute queries almost instantaneously, and without
368 | consuming your account's query budget.  This is especially useful if you are using the Scalyr API to feed a
369 | home-built dashboard, alerting system, or other automated tool. Note that the [Scalyr API](https://www.scalyr.com/help/api#timeseriesQuery)
370 | allows multiple timeseries queries in a single API invocation, but the command-line tool only supports
371 | one query at a time.
372 | 
373 | When a new timeseries is defined, we immediately start live updating of that timeseries from the ingestion pipeline.
374 | In addition, we begin a background process to extend the timeseries backward in time, so that it covers the full
375 | timespan of your query. This backfill process is automatic, and if you later issue the same query with an even
376 | earlier start time, we will extend the backfill to cover that as well.
377 | To change this behavior, use `--no-create-summaries`.
378 | 
379 | A related argument, `--only-use-summaries`, controls whether this API call should only use preexisting timeseries or should
380 | execute the queries against the event database if no matching summary exists. If this argument is used, then your API call
381 | is guaranteed to return quickly and to execute inexpensively, but with possibly empty results. If this argument is not used,
382 | the call may be slower & more expensive, but will be complete.
383 | For example, issuing a new query over the past 3 weeks with `--only-use-summaries` will return quickly
384 | no matter what, but will initially return empty results until backfill (covering the past 3 weeks) is complete.
385 | This can be a cost-effective way to seed a new timeseries with a long backfill period when you don't need
386 | results right away.
387 | 
388 | Issuing a timeseries command with `--no-create-summaries` and without `--only-use-summaries` is equivalent to a
389 | [numeric-query](#Fetching numeric data) command.
390 | 
391 | Usage is identical to the numeric-query command:
392 | 
393 |     scalyr timeseries-query '$dataset="accesslog"' --function 'bytes' --start 24h --buckets 24
394 | 
395 | Complete argument list:
396 | 
397 |     scalyr timeseries-query [filter] [--function xxx] --start xxx [options...]
398 |         Just like numeric-query if `--no-create-summaries` is specified. Otherwise Scalyr will create a timeseries for
399 |         you in the background.
400 |     --start=xxx
401 |         Specify the beginning of the time range to query. Uses the same syntax as the "Start" field in
402 |         the log view. You must specify this argument.
403 |     --end=xxx
404 |         Specify the end of the time range to query. Uses the same syntax as the "End" field in the log
405 |         view. Defaults to the current time.
406 |     --function=xxx
407 |         The value to compute from the matching events. You can use any function listed in
408 |         https://www.scalyr.com/help/query-language#graphFunctions, except for fraction(expr). For
409 |         example: 'mean(x)' or 'median(responseTime)', if x and responseTime are fields of your log.
410 |         You can also specify a simple field name, such as 'responseTime', to return the mean value of
411 |         that field. If you omit the function argument, the rate of matching events per second will be
412 |         returned. Specifying 'rate' yields the same result. Finally, you can specify "count", to compute
413 |         the number of matching events in each time period (as defined by the "buckets" option).
414 |     --buckets=nnn
415 |         The number of numeric values to return. The time range is divided into this many equal slices.
416 |         For instance, suppose you query a four-hour period, with buckets = 4. The query will return four
417 |         numbers, each covering a one-hour period. You may specify a value from 1 to 5000; 1 is the default.
418 |     --output=csv|json|json-pretty
419 |         How to display the results. 'csv' prints all values on a single line, separated by commas.
420 |         'json' prints the raw JSON response from the server, as documented at
421 |         https://www.scalyr.com/help/api#numericQuery. 'json-pretty' also prints the JSON response,
422 |         but prettyprinted.
423 |     --priority=high|low
424 |         Specifies the execution priority for this query; defaults to "high". Use "low" for scripted
425 |         operations where a delay of a second or so is acceptable. Rate limits are tighter for high-
426 |         priority queries.
427 |     --only-use-summaries
428 |         Specifies to only query summaries, and not to search the column store for any summaries not yet populated.
429 |         No results will be returned unless the summaries queried have been backfilled.
430 |     --no-create-summaries
431 |         Specifies to not create summaries for this query.
432 |     --token=xxx
433 |         Specify the API token. For this command, should be a "Read Logs" token.
434 |     --version
435 |         Prints the current version number of this tool.
436 |     --verbose
437 |         Writes detailed progress information to stderr.
438 |     --proxy=<ip>:<port>
439 |         An address to connect through when using a proxy. If not set will also take the value from one of the following
440 |         environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY
441 | 
442 | 
443 | 
444 | 
445 | ## Retrieving configuration files
446 | 
447 | The "get-file" command allows you to retrieve a configuration file, writing the file text to stdout.
448 | Configuration files are used to define log parsers, dashboards, alerting rules, and more. Any page
449 | on the Scalyr web site which contains a full-page text editor, is editing a configuration file.
450 | 
451 | Using the get-file command is simple:
452 | 
453 |     # Display the alerts file
454 |     scalyr get-file /alerts
455 | 
456 |     # Display the "Foo" dashboard
457 |     scalyr get-file /dashboards/Foo
458 | 
459 | Complete argument list:
460 | 
461 |     scalyr get-file file-path [options...]
462 | 
463 |     --version
464 |         Prints the current version number of this tool.
465 |     --token=xxx
466 |         Specify the API token. For this command, should be a "Read Config" token.
467 |     --verbose
468 |         Writes detailed progress information to stderr.
469 |     --proxy=<ip>:<port>
470 |         An address to connect through when using a proxy. If not set will also take the value from one of the following
471 |         environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY
472 | 
473 | 
474 | ## Creating or updating configuration files
475 | 
476 | The "put-file" command allows you to create or overwrite a configuration file, taking the new
477 | file content from stdin.
478 | 
479 | Using the put-file command is simple:
480 | 
481 |     # Overwrite the alerts file
482 |     scalyr put-file /alerts < alerts.json
483 | 
484 |     # Create or overwrite the "Foo" dashboard
485 |     scalyr put-file /dashboards/Foo < fooDashboard.json
486 | 
487 | Complete argument list:
488 | 
489 |     scalyr put-file file-path [options...]
490 | 
491 |     --version
492 |         Prints the current version number of this tool.
493 |     --token=xxx
494 |         Specify the API token. For this command, should be a "Write Config" token.
495 |     --verbose
496 |         Writes detailed progress information to stderr.
497 |     --proxy=<ip>:<port>
498 |         An address to connect through when using a proxy. If not set will also take the value from one of the following
499 |         environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY
500 |     --validate
501 |         Validate if input is a valid HOCON. Validation relies on pyhocon parser
502 |         (see https://github.com/chimpler/pyhocon).
503 | 
504 | ## Deleting configuration files
505 | 
506 | The "delete-file" command allows you to delete a configuration file:
507 | 
508 | Using the delete-file command is simple:
509 | 
510 |     # Delete the "Foo" dashboard
511 |     scalyr delete-file /dashboards/Foo
512 | 
513 | ## Listing configuration files
514 | 
515 | The "list-files" command lists all configuration files:
516 | 
517 |     scalyr list-files
518 | 
519 | Complete argument list:
520 | 
521 |     scalyr list-files [options...]
522 | 
523 |     --version
524 |         Prints the current version number of this tool.
525 |     --token=xxx
526 |         Specify the API token. For this command, should be a "Read Config" token.
527 |     --verbose
528 |         Writes detailed progress information to stderr.
529 |     --proxy=<ip>:<port>
530 |         An address to connect through when using a proxy. If not set will also take the value from one of the following
531 |         environment variables if any are set: http_proxy, HTTP_PROXY, https_proxy, HTTPS_PROXY
532 | 
533 | 
534 | ## TODO
535 | 
536 | Add option to use LF, rather than CRLF, when outputting CSV (for `facet-query` in particular).
537 | 
538 | 
539 | ## Revision History
540 | 
541 | #### Feb. 21, 2014: version 0.1
542 | 
543 | Initial release.
544 | 


--------------------------------------------------------------------------------
/scalyr:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Scalyr command-line utility
  4 | 
  5 | import sys
  6 | import os
  7 | import argparse
  8 | import time
  9 | import datetime
 10 | import json
 11 | import csv
 12 | 
 13 | # unicode string type, set to str initially as 'str' exists in both
 14 | # python 2 and 3.  If python 2 we will override it with the unicode
 15 | # type.  If python 3 we'll do nothing as 'str' is the unicode type
 16 | # for python 3
 17 | _unicode_type = str
 18 | 
 19 | # are we python2 or not
 20 | _python2 = False
 21 | 
 22 | try:
 23 |     # Python 2 versions
 24 |     import httplib
 25 |     import StringIO
 26 |     from urlparse import urlparse
 27 | 
 28 |     # if we are here then it's python 2 so set _unicode_type to unicode
 29 |     _unicode_type = unicode
 30 |     _python2 = True
 31 | 
 32 | except ImportError:
 33 |     # Python 3+ versions
 34 |     import http.client as httplib
 35 |     import io as StringIO
 36 |     from urllib.parse import urlparse
 37 | 
 38 | from collections import deque
 39 | 
 40 | # Define some constants
 41 | TOOL_VERSION = "0.4"
 42 | 
 43 | 
 44 | # Return the API token from the command line or environment variables.
 45 | def getApiToken(args, environmentVariableName, permissionType):
 46 |     apiToken = args.token
 47 |     if apiToken == '':
 48 |         apiToken = os.getenv(environmentVariableName, '')
 49 |     if apiToken == '':
 50 |         print_stderr('Please specify an API token granting ' + permissionType + ' permission. You can place it in the')
 51 |         print_stderr('command line as --token "XXX", or in the environment variable "' + environmentVariableName + '".')
 52 |         print_stderr('Use of an environment variable is recommended, to avoid displaying your API token in the')
 53 |         print_stderr('console or your command history. You can find API tokens at https://www.scalyr.com/keys.')
 54 |         sys.exit(1)
 55 | 
 56 |     return apiToken
 57 | 
 58 | 
 59 | def getProxyAddress(args):
 60 |     proxyAddress = args.proxy
 61 | 
 62 |     if proxyAddress is None:
 63 |         # Check if we have a environment variable
 64 |         if os.environ.get('http_proxy'):
 65 |             proxyAddress = os.environ.get('http_proxy')
 66 |         elif os.environ.get('HTTP_PROXY'):
 67 |             proxyAddress = os.environ.get('HTTP_PROXY')
 68 |         if os.environ.get('https_proxy'):
 69 |             proxyAddress = os.environ.get('https_proxy')
 70 |         elif os.environ.get('HTTPS_PROXY'):
 71 |             proxyAddress = os.environ.get('HTTPS_PROXY')
 72 |     # Setup proxy connection
 73 | 
 74 |     if proxyAddress:
 75 |         useProxy = True
 76 |     else:
 77 |         useProxy = False
 78 | 
 79 |     return proxyAddress, useProxy
 80 | 
 81 | 
 82 | def parseProxyAddress(url):
 83 |     parsedProxyAddress = urlparse(url)
 84 | 
 85 |     return parsedProxyAddress
 86 | 
 87 | 
 88 | # Print a string to stderr.
 89 | def print_stderr(message):
 90 |     sys.stderr.write(str(message) + '\n')
 91 | 
 92 | 
 93 | def output_encoded(message):
 94 |     """Converts a string to ensure the output will be utf8 so that output can be safely redirected
 95 |     to a file without causing conversion errors.  This works differently in Python2 vs Python3
 96 | 
 97 |     In Python2, we need to explicitly convert all unicode strings to utf8 otherwise we'll get a conversion
 98 |         error if output contains unicode *and* output has been redirected to a file.
 99 |     In Python3, output is utf8 by default and we need to leave the string alone - if we manually convert
100 |         python3 strings to utf8 then the output will wrap all strings in b''
101 | 
102 |     """
103 |     if _python2 and type(message) is _unicode_type:
104 |         return message.encode('utf-8')
105 | 
106 |     return message
107 | 
108 | 
109 | # Send a request to the server, and return the parsed JSON response.
110 | # args: Our parsed command-line arguments
111 | # uri: Request path for this RPC, e.g. "api/query"
112 | # parametetDict: The dictionary to be sent (JSON-encoded) to the server as the request body
113 | def sendRequest(args, uri, parameterDict):
114 |     parameterJson = json.dumps(parameterDict)
115 | 
116 |     queryStartTime = datetime.datetime.now()
117 | 
118 |     verbose = args.verbose
119 |     if verbose:
120 |         print_stderr("Using arguments: %s" % args)
121 | 
122 |     # Allow to set custom scalyr_server via environment variable
123 |     serverAddress = args.server
124 | 
125 |     if serverAddress is None:
126 |         serverAddress = os.environ.get('scalyr_server', 'https://www.scalyr.com')
127 | 
128 |     proxyAddress, useProxy = getProxyAddress(args)
129 | 
130 |     if verbose:
131 |         if useProxy:
132 |             print_stderr("Using proxy: %s" % proxyAddress)
133 |         else:
134 |             print_stderr("No proxy configuration found")
135 | 
136 |     useSSL = True
137 |     if serverAddress.startswith("http://"):
138 |         useSSL = False
139 |         serverAddress = serverAddress[7:]
140 |     elif serverAddress.startswith("https://"):
141 |         serverAddress = serverAddress[8:]
142 | 
143 |     if verbose:
144 |         if useSSL:
145 |             protocol = 'https'
146 |         else:
147 |             protocol = 'http'
148 |         print_stderr("Connecting to %s via %s" % (serverAddress, protocol))
149 | 
150 |     conn = None
151 |     if useSSL and useProxy:
152 |         proxyAddress = parseProxyAddress(proxyAddress)
153 |         conn = httplib.HTTPSConnection(proxyAddress.hostname, proxyAddress.port)
154 |         conn.set_tunnel(serverAddress)
155 |     elif useProxy:
156 |         proxyAddress = parseProxyAddress(proxyAddress)
157 |         conn = httplib.HTTPConnection(proxyAddress.hostname, proxyAddress.port)
158 |         conn.set_tunnel(serverAddress)
159 |     elif useSSL:
160 |         conn = httplib.HTTPSConnection(serverAddress)
161 |     else:
162 |         conn = httplib.HTTPConnection(serverAddress)
163 | 
164 |     headers = {"Content-type": "application/json"}
165 | 
166 |     if verbose:
167 |         print_stderr("Request headers:")
168 |         for i in headers:
169 |             print_stderr("  %s: %s" % (i, headers[i]))
170 |         print_stderr("Request body:")
171 |         print_stderr(json.dumps(json.loads(parameterJson), sort_keys=True, indent=2, separators=(',', ': ')))
172 | 
173 |     conn.request("POST", uri, parameterJson, headers)
174 | 
175 |     # Retrieve and parse the response.
176 |     response = conn.getresponse()
177 |     responseBody = response.read().decode('utf8')
178 | 
179 |     if verbose or (response.status != 200):
180 |         print_stderr("After %s seconds, Scalyr server returned %s bytes; status %d / %s" % (
181 |         int((datetime.datetime.now() - queryStartTime).total_seconds() * 1000) / 1000.0, len(responseBody),
182 |         response.status, response.reason))
183 | 
184 |     if response.status != 200:
185 |         print_stderr('Response body: ' + responseBody)
186 |         sys.exit(1)
187 | 
188 |     try:
189 |         parsedResponse = json.loads(responseBody)
190 |     except ValueError:
191 |         print_stderr('Scalyr server returned invalid response:')
192 |         print_stderr(responseBody)
193 |         sys.exit(1)
194 | 
195 |     status = parsedResponse['status']
196 |     if not status.startswith('success'):
197 |         print_stderr('Scalyr server returned error: %s (%s)' % (parsedResponse['message'], status))
198 |         sys.exit(1)
199 | 
200 |     return (parsedResponse, responseBody)
201 | 
202 | 
203 | # Implement the "scalyr get-file" command.
204 | def commandGetFile(parser):
205 |     parser.add_argument('filepath',
206 |                         help='server pathname of the file to retrieve, e.g. "/scalyr/alerts"')
207 |     args = parser.parse_args()
208 | 
209 |     # Send the request to the server.
210 |     response, rawResponse = sendRequest(args, '/getFile', {
211 |         "token": getApiToken(args, 'scalyr_readconfig_token', 'Read Config'),
212 |         "path": args.filepath,
213 |     })
214 | 
215 |     # Print the file content.
216 |     if response['status'] == 'success/noSuchFile':
217 |         print_stderr('File "%s" does not exist' % (args.filepath))
218 |     else:
219 |         createDate = datetime.datetime.fromtimestamp(int(response['createDate']) / 1000)
220 |         modDate = datetime.datetime.fromtimestamp(int(response['modDate']) / 1000)
221 | 
222 |         print_stderr('Retrieved file "%s", version %d, created %s, modified %s, length %s' % (
223 |         args.filepath, response['version'], createDate, modDate, len(response['content'])))
224 |         print(output_encoded(response['content']))
225 | 
226 | 
227 | # Implement the "scalyr put-file" command.
228 | def commandPutFile(parser):
229 |     # Parse the command-line arguments.
230 |     parser.add_argument('filepath',
231 |                         help='server pathname of the file to upload, e.g. "/scalyr/alerts"')
232 |     parser.add_argument('--validate',
233 |                         action='store_true',
234 |                         help='validate if input is a valid HOCON')
235 |     args = parser.parse_args()
236 | 
237 |     content = sys.stdin.read()
238 |     if args.validate:
239 | 
240 |         from importlib import import_module
241 |         # import required module
242 |         try:
243 |             module = import_module("pyhocon.converter")
244 |             parser = getattr(module, "ConfigFactory")
245 |         except ModuleNotFoundError:
246 |             print_stderr('"pyhocon" is needed for validation:'
247 |                          ' run "pip install pyhocon" to install')
248 |             sys.exit(1)
249 |         try:
250 |             parser.parse_string(content)
251 |         # broad exception clause to cover
252 |         # pyhocon and pyparsing exceptions
253 |         except Exception as err:
254 |             print_stderr('Input is invalid, error: %s' % err)
255 |             sys.exit(1)
256 | 
257 |     response, rawResponse = sendRequest(args, '/putFile', {
258 |         "token": getApiToken(args, 'scalyr_writeconfig_token', 'Write Config'),
259 |         "path": args.filepath,
260 |         "content": content
261 |     })
262 | 
263 |     # Confirm file was updated.
264 |     print_stderr('File "%s" updated' % (args.filepath))
265 | 
266 | # Implement the "scalyr delete-file" command.
267 | def commandDeleteFile(parser):
268 |     # Parse the command-line arguments.
269 |     parser.add_argument('filepath',
270 |                         help='server pathname of the file to delete, e.g. "/scalyr/alerts"')
271 |     args = parser.parse_args()
272 | 
273 |     # Send the request to the server.
274 |     response, rawResponse = sendRequest(args, '/putFile', {
275 |         "token": getApiToken(args, 'scalyr_writeconfig_token', 'Write Config'),
276 |         "path": args.filepath,
277 |         "deleteFile": True
278 |     })
279 | 
280 |     # Confirm file was deleted.
281 |     print_stderr('File "%s" deleted' % (args.filepath))
282 | 
283 | 
284 | # Implement the "scalyr list-files" command.
285 | def commandListFiles(parser):
286 |     # Parse the command-line arguments.
287 |     args = parser.parse_args()
288 | 
289 |     # Send the request to the server.
290 |     response, rawResponse = sendRequest(args, '/listFiles', {
291 |         "token": getApiToken(args, 'scalyr_readconfig_token', 'Read Config')
292 |     })
293 | 
294 |     # Print the file content.
295 |     paths = response['paths']
296 |     for i in range(len(paths)):
297 |         print(paths[i])
298 | 
299 | 
300 | # Implement the "scalyr query" command.
301 | def commandQuery(parser):
302 |     # Parse the command-line arguments.
303 |     parser.add_argument('filter', nargs='?', default='',
304 |                         help='search term or filter expression')
305 |     parser.add_argument('--start', default='',
306 |                         help='beginning of the time range to query')
307 |     parser.add_argument('--end', default='',
308 |                         help='end of the time range to query')
309 |     parser.add_argument('--count', type=int, default=10,
310 |                         help='how many log records to retrieve (1-5000)')
311 |     parser.add_argument('--mode', choices=['head', 'tail', ''], default='',
312 |                         help='specifies whether to show logs from the beginning or end of the time range')
313 |     parser.add_argument('--columns', default='',
314 |                         help='comma-delimited list of columns to display; defaults to all')
315 |     parser.add_argument('--output', choices=['singleline', 'multiline', 'csv', 'json', 'json-pretty'],
316 |                         default='multiline',
317 |                         help='specifies the format in which matching log messages are displayed')
318 |     parser.add_argument('--priority', choices=['high', 'low'], default='high',
319 |                         help='specifies the execution priority for this query. Use low for scripted operations where a delay of a second or so is acceptable.')
320 |     parser.add_argument('--continuationToken', default='',
321 |                         help='specifies the continuation token to use from a previous request')
322 | 
323 |     args = parser.parse_args()
324 | 
325 |     columns = args.columns
326 |     output = args.output
327 |     if output == 'csv' and columns == '':
328 |         print_stderr('For CSV output, you must supply a nonempty --columns option')
329 |         sys.exit(1)
330 | 
331 |     # Get the API token.
332 |     apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs')
333 | 
334 |     params = {
335 |         "token": apiToken,
336 |         "queryType": "log",
337 |         "filter": args.filter,
338 |         "startTime": args.start,
339 |         "endTime": args.end,
340 |         "maxCount": args.count,
341 |         "pageMode": args.mode,
342 |         "columns": columns,
343 |         "priority": args.priority
344 |     }
345 | 
346 |     if args.continuationToken:
347 |         params['continuationToken'] = args.continuationToken
348 | 
349 |     # Send the query to the server.
350 |     response, rawResponse = sendRequest(args, '/api/query', params)
351 | 
352 |     # Print the log records.
353 |     matches = response['matches']
354 | 
355 |     if args.output == 'json':
356 |         print(output_encoded(rawResponse))
357 |     elif args.output == 'json-pretty':
358 |         print(output_encoded(json.dumps(response, ensure_ascii=args.no_escape_unicode, sort_keys=True, indent=2,
359 |                                         separators=(',', ': '))))
360 |     elif args.output == 'csv':
361 |         columnList = columns.split(',')
362 | 
363 |         ar = []
364 |         for i in range(len(columnList)):
365 |             ar.append(columnList[i])
366 | 
367 |         csvBuffer = StringIO.StringIO()
368 |         csvWriter = csv.writer(csvBuffer, dialect='excel')
369 |         csvWriter.writerow(ar)
370 |         for i in range(len(matches)):
371 |             match = matches[i]
372 |             attributes = match.get('attributes')
373 |             for i in range(len(columnList)):
374 |                 column = columnList[i]
375 |                 if column in match:
376 |                     ar[i] = output_encoded(match.get(column))
377 |                 elif column in attributes:
378 |                     ar[i] = output_encoded(attributes.get(column))
379 |                 else:
380 |                     ar[i] = ''
381 |             csvWriter.writerow(ar)
382 | 
383 |         print(csvBuffer.getvalue())
384 |     else:
385 |         # Readable text format (singleline or multiline)
386 |         for i in range(len(matches)):
387 |             printReadableRow(args.output, matches[i])
388 | 
389 | 
390 | def printReadableRow(output, match):
391 |     rawTimestamp = match.get('timestamp')
392 |     timestamp = ""
393 |     if rawTimestamp:
394 |         timestamp = str(datetime.datetime.fromtimestamp(int(rawTimestamp) / 1E9)) + ": "
395 | 
396 |     rawSeverity = match.get('severity')
397 |     severity = ""
398 |     if rawSeverity or 0 == rawSeverity:
399 |         severity = ['L', 'K', 'J', 'I', 'W', 'E', 'F'][rawSeverity] + " "
400 | 
401 |     # get message, minus any trailing whitespace
402 |     message = match.get('message')
403 |     if not message:
404 |         message = ''
405 |         if output == 'messageonly':
406 |             output = 'singleline'
407 | 
408 |     message = message.rstrip()
409 | 
410 |     message = output_encoded(message)
411 |     attributes = match.get('attributes')
412 | 
413 |     if output == 'singleline':
414 |         thisline = '%s%s%s' % (timestamp, severity, message)
415 |         for attrName in sorted(attributes.keys()):
416 |             thisline += ' %s=%s' % (attrName, attributes[attrName])
417 |         print(thisline)
418 |     elif output == 'messageonly':
419 |         print(message)
420 |     else:
421 |         print('%s%s%s' % (timestamp, severity, message))
422 |         for attrName in sorted(attributes.keys()):
423 |             print('  %s = %s' % (attrName, attributes[attrName]))
424 | 
425 | 
426 | # Print the output of a numeric-query or timeseries-query command.
427 | def printNumericResults(values, outputFormat, rawResponse, response):
428 |     if outputFormat == 'json':
429 |         print(rawResponse)
430 |     elif outputFormat == 'json-pretty':
431 |         print(json.dumps(response, sort_keys=True, indent=2, separators=(',', ': ')))
432 |     else:
433 |         # csv
434 |         print(','.join(map(str, values)))
435 | 
436 | 
437 | def get_match_unique_id(match):
438 |     return (match['timestamp'], match['session'])
439 | 
440 | 
441 | def liveTail(apiToken, args):
442 |     maximum_tail_time = 10 * 60
443 |     readback_limit = 10 * 60  # 10 minutes
444 |     max_matches = 1000
445 |     poll_interval = 10
446 | 
447 |     initial_lines = args.lines
448 | 
449 |     if initial_lines > max_matches:
450 |         print_stderr("Output of previous lines is limited to 1000 lines only")
451 |         initial_lines = max_matches
452 | 
453 |     start_time = time.time()
454 |     current_time = time.time()
455 |     first = True
456 | 
457 |     previous_matches = deque(maxlen=max_matches)
458 | 
459 |     print_stderr("Beginning live tail..")
460 |     while current_time - start_time < maximum_tail_time:
461 | 
462 |         # Send the query to the server.
463 |         response, rawResponse = sendRequest(args, '/api/query', {
464 |             "token": apiToken,
465 |             "queryType": "log",
466 |             "filter": args.filter,
467 |             "startTime": int(current_time - readback_limit),
468 |             "endTime": int(current_time),
469 |             "maxCount": max_matches,
470 |             "pageMode": "tail",
471 |             "columns": '',
472 |             "priority": "low"
473 |         })
474 | 
475 |         # get a list of matches that we haven't seen yet
476 |         new_matches = [match for match in response['matches'] if get_match_unique_id(match) not in previous_matches]
477 | 
478 |         # if the "not in previous_matches" test didn't find any duplicates, that means there have been
479 |         # so many new log events that our new query didn't overlap with the previous query. Therefore, we'll
480 |         # have missed some of the intervening messages.
481 |         row_count = len(new_matches)
482 |         if not first and row_count >= max_matches:
483 |             print_stderr("[WARN] Too many messages for tail to keep up; some messages are not being shown.")
484 | 
485 |         # print out any new matches, and add the ids to the list of matches we have already seen.
486 |         # Also keep track of how many rows we have seen already, so the first time through we can
487 |         # print the last few lines
488 |         for match in new_matches:
489 |             if not first or row_count <= initial_lines:
490 |                 printReadableRow(args.output, match)
491 |             previous_matches.append(get_match_unique_id(match))
492 |             row_count -= 1
493 | 
494 |         # sleep for any remaining time
495 |         new_time = time.time()
496 |         elapsed_time = new_time - current_time
497 |         remaining_time = poll_interval - elapsed_time
498 |         if remaining_time > 0:
499 |             time.sleep(remaining_time)
500 | 
501 |         current_time = time.time()
502 |         first = False
503 |     print_stderr(
504 |         "----\nTo minimize server load, live tail expires after 10 minutes. If this is bothersome, please let us know at support@scalyr.com.\n")
505 | 
506 | 
507 | # Implement the 'scalyr tail' command
508 | def commandTail(parser):
509 |     # Build the args
510 |     parser.add_argument('filter', nargs='?', default='',
511 |                         help='search term or filter expression')
512 |     parser.add_argument('--output', choices=['singleline', 'multiline', 'messageonly'], default='messageonly',
513 |                         help='specifies the format in which matching log messages are displayed')
514 |     parser.add_argument('--lines', '-n', type=int, default=10,
515 |                         help='Output the previous \'n\' lines at the start of the tail')
516 | 
517 |     # Get the API token
518 |     args = parser.parse_args()
519 |     apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs')
520 | 
521 |     try:
522 |         liveTail(apiToken, args)
523 |     except KeyboardInterrupt:
524 |         print_stderr("\nLive tail has quit.")
525 | 
526 | 
527 | # Implement the "scalyr numeric-query" command.
528 | def commandNumericQuery(parser):
529 |     # Parse the command-line arguments.
530 |     parser.add_argument('filter', nargs=1, default='',
531 |                         help='search term or filter expression')
532 |     parser.add_argument('--function', default='',
533 |                         help='the value to compute from the events matching the filter')
534 |     parser.add_argument('--start', required=True,
535 |                         help='beginning of the time range to query')
536 |     parser.add_argument('--end', default='',
537 |                         help='end of the time range to query')
538 |     parser.add_argument('--buckets', type=int, default=1,
539 |                         help='how many buckets to divide the time interval into (1-5000)')
540 |     parser.add_argument('--output', choices=['csv', 'json', 'json-pretty'], default='csv',
541 |                         help='specifies the format in which numbers are emitted')
542 |     parser.add_argument('--priority', choices=['high', 'low'], default='high',
543 |                         help='specifies the execution priority for this query. Use low for scripted operations where a delay of a second or so is acceptable.')
544 |     args = parser.parse_args()
545 | 
546 |     # Get the API token.
547 |     apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs')
548 | 
549 |     # Send the query to the server.
550 |     response, rawResponse = sendRequest(args, '/api/numericQuery', {
551 |         "token": apiToken,
552 |         "queryType": "numeric",
553 |         "filter": args.filter[0],
554 |         "function": args.function,
555 |         "startTime": args.start,
556 |         "endTime": args.end,
557 |         "buckets": args.buckets,
558 |         "priority": args.priority
559 |     })
560 | 
561 |     printNumericResults(response['values'], args.output, rawResponse, response)
562 | 
563 | 
564 | # Print the output of a facet-query command.
565 | def printFacetResults(matchCount, values, outputFormat, rawResponse, response, ensure_ascii=True):
566 |     if outputFormat == 'json':
567 |         print(output_encoded(rawResponse))
568 |     elif outputFormat == 'json-pretty':
569 |         print(output_encoded(
570 |             json.dumps(response, ensure_ascii=ensure_ascii, sort_keys=True, indent=2, separators=(',', ': '))))
571 |     else:
572 |         # csv
573 |         csvBuffer = StringIO.StringIO()
574 |         csvWriter = csv.writer(csvBuffer, dialect='excel')
575 |         csvWriter.writerow(['count', 'value'])
576 |         for i in range(len(values)):
577 |             valueAndCount = values[i]
578 |             csvWriter.writerow([valueAndCount.get('count'), output_encoded(valueAndCount.get('value'))])
579 |         print(csvBuffer.getvalue())
580 | 
581 | 
582 | # Implement the "scalyr facet-query" command.
583 | def commandFacetQuery(parser):
584 |     # Parse the command-line arguments.
585 |     parser.add_argument('filter', nargs=1,
586 |                         help='search term or filter expression')
587 |     parser.add_argument('field', nargs=1,
588 |                         help='the field whose values should be retrieved')
589 |     parser.add_argument('--count', type=int, default=100,
590 |                         help='maximum number of unique values to retrieve (1-1000)')
591 |     parser.add_argument('--start', required=True,
592 |                         help='beginning of the time range to query')
593 |     parser.add_argument('--end', default='',
594 |                         help='end of the time range to query')
595 |     parser.add_argument('--output', choices=['csv', 'json', 'json-pretty'], default='csv',
596 |                         help='specifies the format in which values are emitted')
597 |     parser.add_argument('--priority', choices=['high', 'low'], default='high',
598 |                         help='specifies the execution priority for this query. Use low for scripted operations where a delay of a second or so is acceptable.')
599 |     args = parser.parse_args()
600 | 
601 |     # Get the API token.
602 |     apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs')
603 | 
604 |     # Send the query to the server.
605 |     response, rawResponse = sendRequest(args, '/api/facetQuery', {
606 |         "token": apiToken,
607 |         "queryType": "facet",
608 |         "filter": args.filter[0],
609 |         "field": args.field[0],
610 |         "maxCount": args.count,
611 |         "startTime": args.start,
612 |         "endTime": args.end,
613 |         "priority": args.priority
614 |     })
615 | 
616 |     printFacetResults(response['matchCount'], response['values'], args.output, rawResponse, response,
617 |                       ensure_ascii=args.no_escape_unicode)
618 | 
619 | 
620 | # Print the output of a power-query command.
621 | def printPowerResults(outputFormat, rawResponse, response, ensure_ascii=True):
622 |     matchingEvents, omittedEvents = response['matchingEvents'], response['omittedEvents']
623 |     columns, values, warnings = response['columns'], response['values'], response['warnings']
624 | 
625 |     def col_name(col):
626 |         return col.get('name')
627 | 
628 |     if outputFormat == 'json':
629 |         print(output_encoded(rawResponse))
630 |     elif outputFormat == 'json-pretty':
631 |         print(output_encoded(
632 |             json.dumps(response, ensure_ascii=ensure_ascii, sort_keys=True, indent=2, separators=(',', ': '))))
633 |     else:
634 |         # csv
635 |         csvBuffer = StringIO.StringIO()
636 |         csvWriter = csv.writer(csvBuffer, dialect='excel')
637 | 
638 |         if (len(warnings) > 0):
639 |             csvWriter.writerow(warnings)
640 |         if (omittedEvents > 0):
641 |             csvWriter.writerow(["%d of %d events (%.1f%%) were omitted due to memory limits" % (
642 |             omittedEvents, matchingEvents, (omittedEvents * 100) / matchingEvents)])
643 | 
644 |         csvWriter.writerow(map(output_encoded, map(col_name, columns)))
645 |         for i in range(len(values)):
646 |             csvWriter.writerow(map(output_encoded, values[i]))
647 |         print(csvBuffer.getvalue())
648 | 
649 | 
650 | # Implement the "scalyr power-query" command.
651 | def commandPowerQuery(parser):
652 |     # Parse the command-line arguments.
653 |     parser.add_argument('filter', nargs=1,
654 |                         help='scalyr power query')
655 |     parser.add_argument('--start', required=True,
656 |                         help='beginning of the time range to query')
657 |     parser.add_argument('--end', default='',
658 |                         help='end of the time range to query')
659 |     parser.add_argument('--output', choices=['csv', 'json', 'json-pretty'], default='csv',
660 |                         help='specifies the format in which values are emitted')
661 |     parser.add_argument('--priority', choices=['high', 'low'], default='high',
662 |                         help='specifies the execution priority for this query. Use low for scripted operations where a delay of a second or so is acceptable.')
663 |     args = parser.parse_args()
664 | 
665 |     # Get the API token.
666 |     apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs')
667 | 
668 |     # Send the query to the server.
669 |     response, rawResponse = sendRequest(args, '/api/powerQuery', {
670 |         "token": apiToken,
671 |         "queryType": "complex",
672 |         "query": args.filter[0],
673 |         "startTime": args.start,
674 |         "endTime": args.end,
675 |         "priority": args.priority
676 |     })
677 | 
678 |     printPowerResults(args.output, rawResponse, response, ensure_ascii=args.no_escape_unicode)
679 | 
680 | 
681 | # Implement the "scalyr timeseries-query" command.
682 | def commandTimeseriesQuery(parser):
683 |     # Parse the command-line arguments.
684 |     parser.add_argument('filter', nargs=1, default='',
685 |                         help='search term or filter expression')
686 |     parser.add_argument('--function', default='',
687 |                         help='the value to compute from the events matching the filter')
688 |     parser.add_argument('--start', required=True,
689 |                         help='beginning of the time range to query')
690 |     parser.add_argument('--end', default='',
691 |                         help='end of the time range to query')
692 |     parser.add_argument('--buckets', type=int, default=1,
693 |                         help='how many buckets to divide the time interval into (1-5000)')
694 |     parser.add_argument('--output', choices=['csv', 'json', 'json-pretty'], default='csv',
695 |                         help='specifies the format in which numbers are emitted')
696 |     parser.add_argument('--priority', choices=['high', 'low'], default='high',
697 |                         help='specifies the execution priority for this query. Use low for scripted operations where a delay of a second or so is acceptable.')
698 |     parser.add_argument('--only-use-summaries', dest='onlyUseSummaries', action='store_true', default=False,
699 |                         help='specifies to query only summaries, and will not search the column store for any summaries not yet populated')
700 | 
701 |     # Note: we are inverting this (negative) argument into args.createSummaries. Use of this argument will set createSummaries = False in the query.
702 |     parser.add_argument('--no-create-summaries', dest='createSummaries', action='store_false', default=True,
703 |                         help='specifies to not create summaries for this query')
704 |     args = parser.parse_args()
705 | 
706 |     # Get the API token.
707 |     apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs')
708 | 
709 |     # build the query
710 |     query = {
711 |         "queryType": "numeric",
712 |         "filter": args.filter[0],
713 |         "function": args.function,
714 |         "startTime": args.start,
715 |         "endTime": args.end,
716 |         "buckets": args.buckets,
717 |         "priority": args.priority,
718 |         "onlyUseSummaries": args.onlyUseSummaries,
719 |         "createSummaries": args.createSummaries
720 |     }
721 | 
722 |     # Send the query to the server.
723 |     response, rawResponse = sendRequest(args, '/api/timeseriesQuery', {
724 |         "token": apiToken,
725 |         "queries": [query]
726 |     })
727 | 
728 |     # Print the results.
729 |     printNumericResults(response['results'][0]['values'], args.output, rawResponse, response)
730 | 
731 | 
732 | def scalyrToolCli():
733 |     # All available commands
734 |     all_commands = {
735 |         'query': commandQuery,
736 |         'tail': commandTail,
737 |         'numeric-query': commandNumericQuery,
738 |         'facet-query': commandFacetQuery,
739 |         'power-query': commandPowerQuery,
740 |         'timeseries-query': commandTimeseriesQuery,
741 |         'timerseries-query': commandTimeseriesQuery,  # mispelling; kept for backwards compatibility
742 |         'get-file': commandGetFile,
743 |         'put-file': commandPutFile,
744 |         'delete-file': commandDeleteFile,
745 |         'list-files': commandListFiles,
746 |     }
747 | 
748 |     # Define arguments that are the same for all commands
749 |     parser = argparse.ArgumentParser(
750 |         description='Scalyr command-line tool. See https://github.com/scalyr/scalyr-tool for documentation.')
751 |     parser.add_argument('command', choices=all_commands.keys(),
752 |                         help='specifies the action to be performed')
753 |     parser.add_argument('--version', action='version', version='%(prog)s ' + TOOL_VERSION)
754 |     parser.add_argument('--server',
755 |                         help='URL for the Scalyr API server.  Defaults to https://www.scalyr.com. If you are using eu.scalyr.com then this should be set to https://eu.scalyr.com.')
756 |     parser.add_argument('--token', default='',
757 |                         help='API access token')
758 |     parser.add_argument('--verbose', action='store_true', default=False,
759 |                         help='enables additional diagnostic output')
760 | 
761 |     parser.add_argument('--no-escape-unicode', action="store_false",
762 |                         help='When true, the json-pretty output format will show unicode characters rather than escaped unicode characters (the default for json-pretty is to use escaped characters)')
763 | 
764 |     parser.add_argument('--proxy', help='Proxy to connect through')
765 | 
766 |     command = None
767 |     # Because the options are command-specific, we cannot fully parse the arguments until we know the
768 |     # command... but we also need to know which command to invoke so that we can add the command-specific
769 |     # options and then execute the command.  So, here, we just guess what the command is by looking over all the
770 |     # arguments and seeing the first one that matches one of the possible commands.
771 |     for arg in sys.argv[1:]:
772 |         if arg in all_commands:
773 |             command = arg
774 | 
775 |     # If we could not find a possible command, then just try to parse the commandline with the current options, which
776 |     # we know will fail since there's no valid value for 'command'.
777 |     if command is None:
778 |         tmp_args = parser.parse_args()
779 | 
780 |     # Invoke the command's function from the all_command's mapping.  We really should change this to a more
781 |     # object oriented approach.
782 |     command_func = all_commands[command]
783 |     command_func(parser)
784 | 
785 | 
786 | if __name__ == '__main__':
787 |     scalyrToolCli()
788 | 


--------------------------------------------------------------------------------