├── LICENSE
├── README.md
├── anomaly_detector.py
├── arima_train.py
├── compare_fourier_prophet.py
├── data
└── prometheus.example.com
│ └── go_goroutines
│ ├── 20180618
│ ├── 20180618.json.bz2
│ └── 201806182359.json.bz2
│ └── 20180617.json.bz2
├── exp_smoothing_train.py
├── format_to_pandas.py
├── format_to_pandas_demo.sh
├── fourier_train.py
├── graphing_ts.py
├── holt_winters_train.py
├── imgs
├── anomaly_detection1.png
├── anomaly_detection2.png
├── arima3.png
├── compare_prophet_fourier3.png
├── compare_prophet_fourier4.png
├── compare_prophet_fourier5.png
├── detect_anomaly_accumulator.png
├── detect_anomaly_combined.png
├── detect_anomaly_tail_prob.png
├── example_ts.png
├── exp_smoothing3.png
├── forecasting_data.png
├── fourier3.png
├── fourier4.png
├── fourier_extrapolation.png
├── fourier_extrapolation_behind.png
├── histogram_graph.png
├── histogram_graph2.png
├── imgs
│ ├── detect_anomaly_accumulator.png
│ ├── detect_anomaly_combined.png
│ ├── detect_anomaly_tail_prob.png
│ ├── example_ts.png
│ ├── fourier_extrapolation.png
│ ├── fourier_extrapolation_behind.png
│ └── partitioned_ts.png
├── metadata_instance_label_cloudprovider_aws_api_request_duration_seconds.png
├── metadata_instance_label_kubelet_docker.png
├── metadata_operation_label_kubelet_docker2.png
├── metadata_operation_label_legend_kubelet_docker.png
├── partitioned_ts.png
├── prophet.png
├── prophet3.png
├── summary_graph3.png
├── summary_graph4.png
├── t-sne_embedding.png
└── t_sne_embedding2.png
├── metadata_analysis
├── get_single_ts_all.py
├── graph_metadata.py
├── graph_specific_ts.py
├── plot_metadata_labels.py
└── t_sne_for_metadata.py
├── notebooks
├── .ipynb_checkpoints
│ ├── ARIMA modelling-checkpoint.ipynb
│ ├── Exponential Smoothing and ARIMA on Real Data-checkpoint.ipynb
│ ├── Parse Json to Pandas Dataframes-checkpoint.ipynb
│ └── Prophet Model Forecasting-checkpoint.ipynb
├── ARIMA modelling.ipynb
├── Anomaly Detection Decision Rules.ipynb
├── Counter Gauge Metric Analysis.ipynb
├── Exponential Smoothing Real Data.ipynb
├── Exponential Smoothing and ARIMA on Real Data.ipynb
├── Fourier Analysis Forecasting.ipynb
├── Parse Json to Pandas Dataframes.ipynb
├── Prophet Model Forecasting.ipynb
├── Verify Alerts with Spark and Explanatory Statistics - CEPH.ipynb
└── imgs
│ ├── arima.png
│ ├── detect_anomaly_accumulator.png
│ ├── detect_anomaly_combined.png
│ ├── detect_anomaly_tail_prob.png
│ ├── example_ts.png
│ ├── exp_smoothing.png
│ ├── fourier_extrapolation.png
│ ├── fourier_extrapolation_behind.png
│ ├── imgs
│ ├── detect_anomaly_accumulator.png
│ ├── detect_anomaly_combined.png
│ ├── detect_anomaly_tail_prob.png
│ ├── example_ts.png
│ ├── fourier_extrapolation.png
│ ├── fourier_extrapolation_behind.png
│ └── partitioned_ts.png
│ ├── kubelet_docker_instance_label.png
│ ├── kubelet_docker_op_type_label.png
│ ├── partitioned_ts.png
│ ├── prophet.png
│ └── t-sne_embedding.png
├── presentations
├── devconf_presentation.pdf
├── final_presentation.pdf
├── lightning_talk.ppdf
├── mid-summer_presentation.pdf
└── pipeline_arch.png
├── prophet_train.py
└── run_compare_mdls.sh
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data Science on Prometheus Metrics
2 | ## **Table of Contents**
3 | 1. [Metric Types](#metrics)
4 | 2. [Metadata Analysis](#metadata)
5 | 3. [Data Preprocessing](#preprocessing)
6 | 4. [Time Series Forecasting](#forcasting)
7 | 5. [Anomaly Detection Decision Rules](#ad)
8 | 6. [Anomaly Detection for Histograms and Summaries](#ad-hist-summ)
9 | 7. [Conclusion](#conclusion)
10 | ## **The Metrics**
11 | #### Anatomy of Metrics
12 | For more information, visit https://prometheus.io/docs/concepts/metric_types/
13 |
14 | Gauge Metric for a given target:
15 | * a time series
16 |
17 | Counter Metric for a given target:
18 | * a monotonically increasing time series
19 |
20 | Histogram Metric for a given target:
21 | * *a collection of bucket time series* - n buckets in the histogram correspond to n time series. the +Inf bucket time series is the same as the count time series.
22 | * *a count time series* -
23 | a cumulative count of all observations we have seen thus far
24 | * *a sum time series* - a cumulative sum of all observations we have seen thus far
25 |
26 | Summary Metric for a given target:
27 | * *quantile time series* - there are n quantiles corresponding to n time series
28 | * *count time series* - a cumulative count of all observations we have seen thus far
29 | * *sum time series* - a cumulative sum of all observations we have seen thus far
30 |
31 | #### Key Insights on Metrics
32 | 1. The value of a counter is useless - the instantaneous value of a counter is dependent on when Prometheus decides to reset the counter. it is often not indicative of the state of the system. Counters are only useful when we look at how they change over time. For this reason, when we talk about a counter we will automatically preprocess the counter into a difference series where difference(t) = raw(t) - raw(t-1)
33 | 2. The metrics are received and stored in the form of packets. All quantile packets are stored in a quantile folder, all count packets in a count folder, etc. We parse these packets and reformat them into dictionaries for each time series type and metric. The key refers to the metadata (target info and metric contents labels) and the value is a pandas dataframe with timestamp and value for each unique metadata configuration. Essentially each key-value pair represents a single incoming time series. For more information, check out [this notebook](notebooks/Parse%20Json%20to%20Pandas%20Dataframes.ipynb)
34 |
35 | ## **The Metadata**
36 |
37 | A given time series is assigned a unique metadata label configuration which includes user-specified and default labels. We used some basic visualization techniques for metadata analysis including graphing and dimensionality reduction.
38 |
39 | One of the default labels that occurs in every metric and metadata packet is the instance. The instance is an ip which corresponds to the scraped target. However, these ip addresses are refreshed over time. In the graphs below, we can see that certain instances pop up and go away at different times. Because the instance change typically results from one or two targets going down, most instances are re-assigned simultaneously which means that we cannot pinpoint the continuity between the old and new instance labels.
40 |
41 | Metadata analysis scripts can be found in [this folder](metadata_analysis/).
42 | 
43 | **Graph 1:** instance values over time for all targets of kubelet_docker_operation_latency_microseconds_quantile. Every value on the y-axis corresponds to a specific instance ip address (ex: ip-172-31-70-31.us-east-2.compute.internal)
44 |
45 | 
46 | **Graph 2:** instance values over time for all targets of cloudprovider_aws_api_request_duration_seconds. Every value on the y-axis corresponds to a specific instance ip address (ex: ip-172-31-70-31.us-east-2.compute.internal)
47 |
48 |
49 | 
50 | **Graph 3:** operation_type values over time for all targets of kubelet_docker_operation_latency_microseconds_quantile. legend provided below. Note that all operation_types exist for all timestamps except for remove_image.
51 | 
52 |
53 | #### T-SNE Embedding of dataset
54 | T-sne embedding is a dimensionality reduction technique used for mapping high dimensional data into a lower dimension for visualization. In this case, our dimensions are labels in the metadata. We use a standard t-sne package from sci-kit learn and represented our categorical data in terms of numerical values from 1 to len(label values). The result is shown below.
55 |
56 | [Here](metadata_analysis/t_sne_for_metadata.py) is the script for generating these visualizations.
57 | 
58 | **Graph 4:** t-sne embedding for all data points in kubelet_docker_operation_latency_microseconds_quantile. Note that the data seems to be clustered in some way.
59 | 
60 | **Graph 5:** t-sne embedding for all data points in kubelet_docker_operation_latency_microseconds_quantile colored by instance (there are too many intances for colors, so colors are re-used). Note that the coloring corresponds to the clustering which indicates that our data is likely clustered by instance.
61 |
62 | ## **Data Preprocessing**
63 | In order to run forecasting and time series analysis on our Prometheus data, we had to reformat the metric packets received from Prometheus into a format which can be fed into a forecasting model. We found Pandas DataFrames to be very useful for this purpose.
64 |
65 | I designed a script that will convert a collection of json files in a specified directory into a dictionary of DataFrames housed in a Pickle file.
66 |
67 | **Input Format**: a directory which houses .json or .json.bz2 files. Each of these files may contain one or more json packets. These files can be taken directly from a local folder or remotely from a Ceph folder (additional configuration may need to be completed for Ceph retrieval). These json files are intended to be taken from a single metric.
68 |
69 | **Output Format**: a pickle file which houses a dictionary of Pandas DataFrames. The keys are the metadata configurations in a string format. To use this output, here are a few helpful commands:
70 |
71 |
72 | `data_dictionary = pickle.load(my_output_file)`
73 | `data_dictionary["{'__name__': 'kubelet_docker_operations_latency_microseconds','beta_kubernetes_io_arch': 'amd64'..."]`
74 |
75 | **Or Manually using the Command Line Arguments**
76 |
77 | `python format_to_pandas.py --metric METRIC --input INPUT_DIR --output OUTPUT_DIR`
78 |
79 | for more information about the command line arguments, use the help message `./format_to_pandas.py --help` or have a look at the [sample run](format_to_pandas.py) on sample data.
80 |
81 |
82 | [this](https://docs.google.com/spreadsheets/d/1CB14X5xd1dPH2x9m_ko_2rfz6BrilPZPYcJA_kQWBUo/edit?usp=sharing) spreadsheet has a working list of metrics and their associated data sparsity
83 | ## **Forecasting**
84 |
85 | 
86 | **Graph 6:** A time series from kubelet_docker_operations_latency_microseconds with the following metadata: *{'name': 'http_request_duration_microseconds', 'app': 'openshift-web-console', 'handler': 'prometheus', 'instance': '10.129.0.18:8443', 'job': 'kubernetes-service-endpoints', 'kubernetes_name': 'webconsole', 'kubernetes_namespace': 'openshift-web-console', 'quantile': '0.9'}*
87 | #### Exponential Smoothing
88 | [This notebook](notebooks/Exponential%20Smoothing%20and%20ARIMA%20on%20Real%20Data.ipynb) has an introduction to Exponential Smoothing and a few examples.
89 | The implementation for this model came from the [statsmodels python package](http://www.statsmodels.org/dev/tsa.html)
90 | 
91 | **Graph 7:** Exponential Smoothing on a Time Series. Note that the forecast (yhat) remains the same as the last training value. This means that we do not take into account the seasonality or volatility of the series using this model.
92 | #### ARIMA Modelling
93 | [This notebook](notebooks/ARIMA%20modelling.ipynb) has an introduction to ARIMA and a few examples of ARIMA modelling.
94 | The implementation for this model came from the [statsmodels python package](http://www.statsmodels.org/dev/tsa.html).
95 | 
96 | **Graph 8:** ARIMA Modelling on a Time Series. Note that the forecast (yhat) decays to the median very quickly. It seems that this model does not take into account the seasonality of the data. For this example, we used ARIMA(1,0,1).
97 | #### Prophet Modelling
98 | [This notebook](notebooks/Prophet%20Model%20Forecasting.ipynb) has an introduction to Prophet and a few examples of Prophet modelling.[Here](https://peerj.com/preprints/3190.pdf) is Facebook's paper on Prophet modelling
99 | 
100 | **Graph 9:** Prophet Modelling on a Time Series. Note that the model seems to train according to trend, and the bounds (yhat_upper and yhat_lower) are reasonably accurate. This specific example likely provides too little data for Prophet to detect anomalies accurately.
101 | 
102 | **Graph 10:** Prophet Modelling on a Time Series from kubelet_docker_operations_latency_microseconds. Notice how there are large gaps in the training and testing data. This is characteristic of many of the time series we get from Prometheus because there are often dead times in the systems. Prophet seems to handle these gaps pretty well.
103 | #### Fourier Extrapolation
104 | [This notebook](notebooks/Fourier%20Analysis%20Forecasting.ipynb) has an introduction to Fourier Analysis and a few examples.
105 | 
106 | **Graph 11:** Fourier Extrapolation on a Time Series. Note that this model does an excellent job of reproducing the seasonality of the training set. It responds very well when there is a clear pattern in the data.
107 | 
108 | **Graph 12:** Fourier Extrapolation on a Time Series from kubelet_docker_operations_latency_microseconds. Note that this forecast seems to be shifted upwards and reproduces the point-wise peaks in the traning set.
109 | #### Model Comparisons
110 | Comparing Prophet and Fourier
111 | 
112 | **Graph 13:** A Comparison between Prophet and Fourier for a time series from kubelet_docker_operations_latency_microseconds. Prophet does a good job of predicting the mean, while Fourier accurately predicts the seasonality of the data.
113 | 
114 | **Graph 14:** A Comparison between Prophet and Fourier for a time series from kubelet_docker_operations_latency_microseconds. Prophet does a good job of predicting the mean, while Fourier accurately predicts the seasonality of the data.
115 | 
116 | **Graph 15:** A Comparison between Prophet and Fourier for a time series from kubelet_docker_operations_latency_microseconds. A and B are the same forecasts from above, while C and D represent two specific types of data. C had a training set with a few point-wise extremes. We can see here that Fourier is very sensitive to those extremities and will attempt to model them in the forecast, while Prophet does a good job of identifying the mean. In D, we have a training set which has a drastic change in seasonality halfway through. Again, Prophet seems rather robust against that sudden change while Fourier continues the original pattern with a slight variation.
117 | #### Further Research
118 | * RNNs (LSTMs)
119 | * Generalization to additional metrics
120 | * Verification from domain experts
121 | ## **Anomaly Detection Decision Rules**
122 | [This notebook](notebooks/Anomaly%20Detection%20Decision%20Rules.ipynb) provides details on our anomaly detection decision rules that we've employed.
123 |
124 | ## **Anomaly Detection for Histograms and Summaries**
125 | #### **Histogram and Summary Visualization**
126 | Let's begin by taking a look at the graphs of histogram and summary components over time.
127 |
128 | 
129 | **Graph 18:** The buckets of apiserver_admission_controller_admission_latencies_seconds_count over time. While at first glance it looks like there is only the 976562.5 bucket, this is actually a graph of all the buckets. The buckets are all the same values. This indicates that the buckets are likely to be mis-configured, but are nevertheless, treated as the same value for our uses in anomaly detection.
130 | 
131 | **Graph 19:** The buckets of cloudprovider_aws_api_request_duration_seconds over time. From this graph we can see that a majority of the time series raw data values lie in between 0.25 and 10.
132 | 
133 | **Graph 20:** The quantiles of kubelet_docker_operations_latency_microseconds over time. The 0.5 quantile will always be below the 0.9 quantile which will always be below the 0.99 quantile.
134 | 
135 | **Graph 21:** The quantiles of kubelet_docker_operations_latency_microseconds over time. Notice that the 0.99 quantile is much more volatile than the median. This is because the 0.99 quantile represents the extreme values whereas the 0.5 quantile represents the median which is often much more stable.
136 | #### **Key Observations**
137 | Histograms and Summaries can be seen as a collection of gauges and counters. We can take each quantile/bucket, count, and sum as an individual time series and apply anomaly detection on them individually, or we can apply some sort of anomaly detection based on their correlation.
138 |
139 | #### **Anomaly Detection Rule Outline**
140 | Below is an example of an alerting decision scheme for Summary metric alerting. It can also be extended to Histogram metric alerting by replacing the quantiles with buckets.
141 | Let's assign the function AD(time series) to be the following:
142 |
143 |
144 | AD (time series):
145 | Return True if anomaly detected
146 | Return False otherwise
147 |
148 | Let's define quantiles is the set of all time series quantiles.
149 | Below is a block decision chain for when to send an alert if there is an anomaly detected in some set of the time series.
150 |
151 | If 0.5 ∈ quantiles:
152 | If AD(0.5) OR [ AD(quant1) AND AD(quant2) AND AD(quant3) ] : (∀ quant1, quant2, quant3 ∈ quantiles)
153 | Send alert
154 | Else if AD(quant1) AND AD(quant2): (∀ quant1, quant2 ∈ quantiles)
155 | Send alert
156 | If AD(sum) AND AD(count):
157 | Send alert
158 | Else if AD(quant) AND [ AD(sum) OR AD(count) ] : (∀ quant ∈ quantiles)
159 | Send alert
160 |
161 |
162 | ## **Conclusion**
163 | My project aimed to explore the connections between metadata labels, time series forecasting, and anomaly detection in an attempt to gain valuable quantitative insights on Prometheus metrics. The summer started with a dive into Spark and Prophet Modelling. We found Spark to be very clunky for quick analysis of Prometheus data, so next we moved to local data with Pandas DataFrames. With these DataFrames, we applied various metadata analysis and forecasting techniques to our data.
164 |
165 | In addition to the analysis here, my teammates worked on deploying a real-time anomaly detection pipeline on Prometheus metrics for sending automatic alerts to developers.
166 |
167 | For future work, we would like to do more forecast and anomaly detection testing on a variety of metrics. Since we have the pipeline working, this will probably require setting up real-time anomaly detection for different targets and monitoring them in Grafana.
168 |
--------------------------------------------------------------------------------
/anomaly_detector.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import matplotlib.pyplot as plt
3 | import numpy as np
4 | from scipy.stats import norm
5 |
6 | import matplotlib.transforms as mtransforms
7 |
8 | class Accumulator:
9 | def __init__(self,thresh):
10 | self._counter = 0
11 | self.thresh = thresh
12 | def inc(self, val):
13 | self._counter += val
14 | def count(self):
15 | return self._counter
16 |
17 | class AnomalyDetector:
18 | def __init__(self, window=8000, small_window=80, epsilon=0.61, bounds_thresh=22000, peak_thresh=130000, acc_thresh=1000):
19 | # accumulator parameters
20 | self.large_window = window
21 | self.small_window = small_window
22 | self.epsilon = epsilon
23 | # tail probability parameters
24 | self.bounds_thresh = bounds_thresh
25 | self.peak_thresh = peak_thresh
26 | self.acc_thresh = acc_thresh
27 |
28 | def anomaly_tail_distribution(self, w, w_prime):
29 | if len(w) != self.large_window:
30 | return "ERROR: input values do not match window size"
31 | mu = np.mean(w)
32 | std = np.std(w)
33 | mu_bar = np.mean(w_prime)
34 |
35 | L_t = norm.sf(((mu_bar - mu)/std))
36 | # print(L_t)
37 | if L_t >= 1 - self.epsilon:
38 | return 1
39 | return 0
40 |
41 | def anomaly_accumulator(self, y, y_hat):
42 | s_t = []
43 | anomaly_inds = []
44 | acc_thresh = self.acc_thresh
45 | acc = Accumulator(acc_thresh)
46 | for i in range(0, len(y_hat)):
47 | diff = y_hat[i] - y[i]
48 | if abs(diff) > self.bounds_thresh:
49 | # upper bound anomaly, increment counter
50 | acc.inc(1)
51 | elif y[i] > self.peak_thresh:
52 | # found peak, decrement so that acc will decay to 0
53 | acc.inc(-3)
54 | else:
55 | # no anomaly, decrement by 2
56 | acc.inc(-2)
57 |
58 | if acc.count() > acc.thresh:
59 | anomaly_inds.append(i)
60 |
61 | s_t.append(max(diff, 0))
62 | return s_t, anomaly_inds
63 | def get_anomalies(self, y, y_hat):
64 | if len(y) != len(y_hat):
65 | return "ERROR: lengths of inputs do not match"
66 | s_t, anomaly_inds_acc = self.anomaly_accumulator(y, y_hat)
67 | cum_window = self.large_window+self.small_window
68 |
69 | anomaly_inds_tail = []
70 | print("st:", len(s_t))
71 | print("cum_wind:", cum_window)
72 | for i in range(0,(len(s_t)-cum_window)):
73 | window = s_t[i:int(i+self.large_window)]
74 | small_window = s_t[int(i+self.large_window):int(i+cum_window)]
75 | val = self.anomaly_tail_distribution(window, small_window)
76 | anomaly_inds_tail.append(val)
77 | anomaly_inds_tail = np.argwhere(anomaly_inds_tail).flatten()
78 |
79 | print("a_i_tail: ", len(anomaly_inds_tail))
80 | print("a_i_accum: ", len(anomaly_inds_acc))
81 | # get intersection of both
82 | set_tail = set(anomaly_inds_tail)
83 | set_acc = set(anomaly_inds_acc)
84 | flag_anomaly = set_tail.intersection(set_acc)
85 | return flag_anomaly
86 |
87 | def detect_anomalies(predictions, data):
88 | if len(predictions) != len(data) :
89 | raise IndexError
90 |
91 | # parameters
92 | lower_bound_thresh = predictions["yhat_lower"].min()
93 | upper_bound_thresh = predictions["yhat_upper"].max()
94 | diff_thresh = 2*data["values"].std()
95 | acc_thresh = int(0.1*np.shape(predictions)[0])
96 | epsilon = .1
97 |
98 | diffs = []
99 | acc = Accumulator(acc_thresh)
100 | preds = np.array(predictions["yhat"])
101 | dat = np.array(data["values"])
102 | for i in range(0, np.shape(predictions)[0]):
103 | diff = preds[i] - dat[i]
104 | if abs(diff) > diff_thresh:
105 | # upper bound anomaly, increment counter
106 | acc.inc(1)
107 | elif dat[i] < lower_bound_thresh:
108 | # found trough, decrement so that acc will decay to 0
109 | acc.inc(-3)
110 | elif dat[i] > upper_bound_thresh:
111 | # found peak, decrement so that acc will decay to 0
112 | acc.inc(-3)
113 | else:
114 | # no anomaly, decrement by 2
115 | acc.inc(-2)
116 |
117 | diffs.append(max(diff, 0))
118 |
119 | if acc.count() > acc.thresh:
120 | acc_anomaly = True
121 | else:
122 | acc_anomaly = False
123 | w_size = int(0.8*len(data))
124 | w_prime_size = len(data) - w_size
125 |
126 | w = diffs[0:w_size]
127 | w_prime = diffs[w_size:]
128 |
129 | w_mu = np.mean(w)
130 | w_std = np.std(w)
131 | w_prime_mu = np.mean(w_prime)
132 |
133 | if w_std == 0:
134 | L_t = 0
135 | else:
136 | L_t = 1 - norm.sf((w_prime_mu - w_mu)/w_std)
137 |
138 | print(L_t)
139 | if L_t >= 1 - epsilon:
140 | tail_prob_anomaly = True
141 | else:
142 | tail_prob_anomaly = False
143 |
144 | return acc_anomaly and tail_prob_anomaly
145 |
146 |
147 |
148 | def graph(train, test, forecast, anomalies, metric_name):
149 | len_train = len(train)
150 | fig = plt.figure(figsize=(20,10))
151 | ax = plt.axes()
152 | ax.plot(np.array(train["timestamps"]), np.array(train["values"]), 'b', label = 'train', linewidth = 3)
153 | ax.plot(np.array(test["timestamps"]), np.array(test["values"]), 'g', label = 'test', linewidth = 3)
154 | ax.plot(np.array(forecast["ds"]), np.array(forecast["yhat"]), 'y', label = 'yhat')
155 | title = "Forecast for " + metric_name
156 | ax.set_title(title)
157 | ax.set_xlabel("Timestamp")
158 | ax.set_ylabel("Value")
159 | trans = mtransforms.blended_transform_factory(ax.transData, ax.transAxes)
160 | for a in anomalies:
161 | bool_arr = np.repeat(False,len(forecast))
162 | for i in range(a,a+100):
163 | bool_arr[i] = True
164 | ax.fill_between(np.array(forecast["ds"]),0,1, where=bool_arr, facecolor='red', alpha=0.5, transform=trans)
165 | plt.legend(loc=3)
166 | plt.show()
167 |
168 | metric_name = "http_request_duration_microseconds_quantile_728"
169 | filename = "../fourier_forecasts/forecast_" + metric_name + ".pkl"
170 | pkl_file = open(filename, "rb")
171 | forecast = pickle.load(pkl_file)
172 | train = pickle.load(pkl_file)
173 | test = pickle.load(pkl_file)
174 | pkl_file.close()
175 | forecast = forecast[np.shape(train)[0]:]
176 | print(len(forecast))
177 | print(len(test))
178 |
179 | inc = 0
180 | anomaly_inds = []
181 | for i in range(0,len(test)-100,100):
182 | if detect_anomalies(forecast[i:i+100], test[i:i+100]) :
183 | inc += 1
184 | anomaly_inds.append(i)
185 | print(inc)
186 |
187 | #ad = AnomalyDetector()
188 | #anomaly_inds = ad.get_anomalies(test, forecast[-len(test):])
189 | graph(train, test, forecast, anomaly_inds, metric_name)
190 |
--------------------------------------------------------------------------------
/arima_train.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | import pandas as pd
4 | import warnings
5 | import matplotlib.pyplot as plt
6 | warnings.filterwarnings("ignore")
7 | import collections
8 | import argparse
9 | from statsmodels.tsa.arima_model import ARIMA
10 | import statsmodels.api as sm
11 | from datetime import datetime
12 | from pandas.tools.plotting import autocorrelation_plot
13 |
14 |
15 | class Exp_Smoothing:
16 | def __init__(self, train, test):
17 | self.train = np.array(train["values"])
18 | self.ds_train = np.array(train["timestamps"])
19 | self.test = np.array(test["values"])
20 | self.ds_test = np.array(test["timestamps"])
21 |
22 | def fit_model(self, n_predict):
23 | start_date = min(self.ds_train)
24 | print(type(start_date))
25 | dates = sm.tsa.datetools.dates_from_range("2018m3", length=len(self.ds_train))
26 |
27 | df_train = pd.Series(self.train, index=dates)
28 | #autocorrelation_plot(df_train)
29 | #plt.show()
30 | model = ARIMA(df_train, order=(1,0,1))
31 | model_fit = model.fit(disp=0)
32 | self.forecast = model_fit.forecast(steps=len(test))
33 |
34 | ds = self.ds_test
35 |
36 | self.forecast = pd.DataFrame({"ds": ds, "yhat": self.forecast[0]})
37 | print(len(self.forecast["yhat"]))
38 | print(len(self.test))
39 | return self.forecast
40 |
41 | def graph(self, metric_name, key):
42 | plt.figure(figsize=(40,10))
43 |
44 |
45 | plt.plot(self.ds_train, self.train, 'b', label = 'train', linewidth = 3)
46 | print(np.array(self.forecast["yhat"]))
47 | plt.plot(self.ds_test, self.test, 'k', label = 'test', linewidth = 3)
48 | plt.plot(np.array(self.ds_test), np.array(self.forecast["yhat"]), 'g', label = 'yhat')
49 | # pl.plot(np.array(self.forecast["ds"]), np.array(self.forecast["yhat_upper"]), 'y', label = 'yhat_upper')
50 | # pl.plot(np.array(self.forecast["ds"]), np.array(self.forecast["yhat_lower"]), 'y', label = 'yhat_lower')
51 |
52 | plt.legend()
53 | plt.savefig("../testing/exp_smoothing_graphs/graph_" + metric_name + "_" + str(key) + ".png")
54 | plt.show()
55 |
56 |
57 |
58 | def calc_delta(vals):
59 | diff = vals - np.roll(vals, 1)
60 | diff[0] = 0
61 | return diff
62 |
63 | def monotonically_inc(vals):
64 | # check corner case
65 | if len(vals) == 1:
66 | return True
67 | diff = calc_delta(vals)
68 | diff[np.where(vals == 0)] = 0
69 |
70 | if ((diff < 0).sum() == 0):
71 | return True
72 | else:
73 | return False
74 |
75 | if __name__ == "__main__":
76 | parser = argparse.ArgumentParser(description="frun Prophet training on time series")
77 |
78 | parser.add_argument("--metric", type=str, help='metric name', required=True)
79 |
80 | parser.add_argument("--key", type=int, help='key number')
81 |
82 | args = parser.parse_args()
83 |
84 | metric_name = args.metric
85 |
86 | pkl_file = open("../pkl_data/" + metric_name + "_dataframes.pkl", "rb")
87 | dfs = pickle.load(pkl_file)
88 | pkl_file.close()
89 | key_vals = list(dfs.keys())
90 |
91 | selected = [args.key]
92 | for ind in selected:
93 | key = key_vals[ind]
94 | df = dfs[key]
95 | df = df.sort_values(by=['timestamps'])
96 |
97 | print(key)
98 | df["values"] = df["values"].apply(pd.to_numeric)
99 | vals = np.array(df["values"].tolist())
100 |
101 | # check if metric is a counter, if so, run AD on difference
102 | if monotonically_inc(vals):
103 | print("monotonically_inc")
104 | vals = calc_delta(vals)
105 | df["values"] = vals
106 |
107 | train = df[0:int(0.7*len(vals))]
108 | test = df[int(0.7*len(vals)):]
109 | print(len(test))
110 | es = Exp_Smoothing(train, test)
111 | forecast = es.fit_model(len(test))
112 |
113 | f = open("../testing/exp_smoothing_forecasts/forecast_" + metric_name + "_" + str(args.key) + ".pkl", "wb")
114 | pickle.dump(forecast, f)
115 | pickle.dump(train, f)
116 | pickle.dump(test,f)
117 | f.close()
118 |
119 | es.graph(metric_name, args.key)
120 |
--------------------------------------------------------------------------------
/compare_fourier_prophet.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | from matplotlib.pylab import plt
3 | import numpy as np
4 | import argparse
5 |
6 | def graph(train_df, test_df, p_forecast, f_forecast, metric, key):
7 | fig = plt.figure(figsize=(40,10))
8 | forecast_ds = np.array(f_forecast["ds"])
9 | print(len(forecast_ds))
10 | print(len(train_df))
11 | forecast_ds = forecast_ds[int(train_df["values"].count()):]
12 |
13 |
14 | plt.plot(np.array(train_df["ds"]), np.array(train_df["y"]),'b', label="train", linewidth=3)
15 | plt.plot(np.array(test_df["ds"]), np.array(test_df["y"]), 'k', label="test", linewidth=3)
16 |
17 | plt.savefig( "../testing/compare_fourier_prophet/" + str(key) + "_raw_" + metric + ".png", transparent=True)
18 | prophet = np.array(p_forecast["yhat"])
19 | prophet_upper = np.array(p_forecast["yhat_upper"])
20 | prophet_lower = np.array(p_forecast["yhat_lower"])
21 |
22 | fourier = f_forecast["yhat"]
23 | fourier = fourier[len(train_df["values"]):]
24 | print(len(forecast_ds))
25 | print(len(fourier))
26 | plt.plot(forecast_ds, fourier, 'g', label="fourier_yhat", linewidth=3)
27 | plt.savefig( "../testing/compare_fourier_prophet/" + str(key) + "_fourier_" + metric + ".png", transparent=True)
28 |
29 | prophet = prophet[len(train_df["values"]):]
30 | prophet_upper = prophet_upper[len(train_df["values"]):]
31 | prophet_lower = prophet_lower[len(train_df["values"]):]
32 | plt.plot(forecast_ds, prophet, '*y', label="prophet_yhat", linewidth=3)
33 | plt.plot(forecast_ds, prophet_upper, 'y', label="yhat_upper", linewidth=3)
34 | plt.plot(forecast_ds, prophet_lower, 'y', label="yhat_lower", linewidth=3)
35 |
36 |
37 | plt.plot()
38 | plt.xlabel("Timestamp")
39 | plt.ylabel("Value")
40 | plt.legend(loc=1)
41 | plt.title("Prophet Model Forecast")
42 | plt.savefig( "../testing/compare_fourier_prophet/" + str(key) + "_compare_" + metric + ".png", transparent=True)
43 | plt.close()
44 |
45 |
46 | fig = plt.figure(figsize=(40,10))
47 | forecast_ds = np.array(f_forecast["ds"])
48 | forecast_ds = forecast_ds[len(train_df["values"]):]
49 |
50 |
51 | plt.plot(np.array(train_df["ds"]), np.array(train_df["y"]),'b', label="train", linewidth=3)
52 | plt.plot(np.array(test_df["ds"]), np.array(test_df["y"]), 'k', label="test", linewidth=3)
53 |
54 | prophet = np.array(p_forecast["yhat"])
55 | prophet_upper = np.array(p_forecast["yhat_upper"])
56 | prophet_lower = np.array(p_forecast["yhat_lower"])
57 | prophet = prophet[len(train_df["values"]):]
58 | prophet_upper = prophet_upper[len(train_df["values"]):]
59 | prophet_lower = prophet_lower[len(train_df["values"]):]
60 | plt.plot(forecast_ds, prophet, '*y', label="prophet_yhat", linewidth=3)
61 | plt.plot(forecast_ds, prophet_upper, 'y', label="yhat_upper", linewidth=3)
62 | plt.plot(forecast_ds, prophet_lower, 'y', label="yhat_lower", linewidth=3)
63 | plt.savefig( "../testing/compare_fourier_prophet/" + str(key) + "_prophet_" + metric + ".png", transparent=True)
64 | plt.close()
65 | if __name__ == '__main__':
66 | parser = argparse.ArgumentParser(description="run Fourier training on time series")
67 |
68 | parser.add_argument("--metric", type=str, help='metric name', required=True)
69 | parser.add_argument("--key", type=int, help='key number')
70 |
71 | args = parser.parse_args()
72 |
73 | fname = "../prophet_forecasts/prophet_model_" + args.metric + "_" + str(args.key) + ".pkl"
74 | f = open(fname, "rb")
75 | p_forecast = pickle.load(f)
76 | print(len(p_forecast))
77 | p_train = pickle.load(f)
78 | print(len(p_train))
79 | p_test = pickle.load(f)
80 | print(len(p_test))
81 | f.close()
82 |
83 | fname = "../fourier_forecasts/forecast_" + args.metric + "_" + str(args.key) + ".pkl"
84 | f = open(fname, "rb")
85 | f_forecast = pickle.load(f)
86 | print(len(f_forecast))
87 | f_train = pickle.load(f)
88 | print(len(f_train))
89 | f_test = pickle.load(f)
90 | print(len(f_test))
91 | f.close()
92 |
93 | graph(p_train, p_test, p_forecast, f_forecast, args.metric, args.key)
94 |
--------------------------------------------------------------------------------
/data/prometheus.example.com/go_goroutines/20180617.json.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/data/prometheus.example.com/go_goroutines/20180617.json.bz2
--------------------------------------------------------------------------------
/data/prometheus.example.com/go_goroutines/20180618/20180618.json.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/data/prometheus.example.com/go_goroutines/20180618/20180618.json.bz2
--------------------------------------------------------------------------------
/data/prometheus.example.com/go_goroutines/20180618/201806182359.json.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/data/prometheus.example.com/go_goroutines/20180618/201806182359.json.bz2
--------------------------------------------------------------------------------
/exp_smoothing_train.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | import pandas as pd
4 | import warnings
5 | import matplotlib.pyplot as plt
6 | warnings.filterwarnings("ignore")
7 | import collections
8 | import argparse
9 | from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
10 |
11 |
12 | class Exp_Smoothing:
13 | def __init__(self, train, test):
14 | self.train = np.array(train["values"])
15 | self.ds_train = np.array(train["timestamps"])
16 | self.test = np.array(test["values"])
17 | self.ds_test = np.array(test["timestamps"])
18 |
19 | def fit_model(self, n_predict):
20 |
21 | fit = SimpleExpSmoothing(self.train).fit()
22 | forecast = fit.forecast(n_predict)
23 |
24 | ds = self.ds_test
25 |
26 | self.forecast = pd.DataFrame({"ds": ds, "yhat": forecast})
27 |
28 | return self.forecast
29 |
30 | def graph(self, metric_name, key):
31 | plt.figure(figsize=(40,10))
32 |
33 | plt.plot(np.array(self.forecast["ds"]), np.array(self.forecast["yhat"]), 'g', label = 'yhat')
34 | plt.plot(self.ds_train, self.train, 'b', label = 'train', linewidth = 3)
35 | plt.plot(self.ds_test, self.test, 'k', label = 'test', linewidth = 3)
36 | # pl.plot(np.array(self.forecast["ds"]), np.array(self.forecast["yhat_upper"]), 'y', label = 'yhat_upper')
37 | # pl.plot(np.array(self.forecast["ds"]), np.array(self.forecast["yhat_lower"]), 'y', label = 'yhat_lower')
38 |
39 | plt.legend()
40 | plt.savefig("../testing/exp_smoothing_graphs/graph_" + metric_name + "_" + str(key) + ".png")
41 | plt.show()
42 |
43 |
44 |
45 | def calc_delta(vals):
46 | diff = vals - np.roll(vals, 1)
47 | diff[0] = 0
48 | return diff
49 |
50 | def monotonically_inc(vals):
51 | # check corner case
52 | if len(vals) == 1:
53 | return True
54 | diff = calc_delta(vals)
55 | diff[np.where(vals == 0)] = 0
56 |
57 | if ((diff < 0).sum() == 0):
58 | return True
59 | else:
60 | return False
61 |
62 | if __name__ == "__main__":
63 | parser = argparse.ArgumentParser(description="frun Prophet training on time series")
64 |
65 | parser.add_argument("--metric", type=str, help='metric name', required=True)
66 |
67 | parser.add_argument("--key", type=int, help='key number')
68 |
69 | args = parser.parse_args()
70 |
71 | metric_name = args.metric
72 |
73 | pkl_file = open("../pkl_data/" + metric_name + "_dataframes.pkl", "rb")
74 | dfs = pickle.load(pkl_file)
75 | pkl_file.close()
76 | key_vals = list(dfs.keys())
77 |
78 | selected = [args.key]
79 | for ind in selected:
80 | key = key_vals[ind]
81 | df = dfs[key]
82 | df = df.sort_values(by=['timestamps'])
83 |
84 | print(key)
85 | df["values"] = df["values"].apply(pd.to_numeric)
86 | vals = np.array(df["values"].tolist())
87 |
88 | # check if metric is a counter, if so, run AD on difference
89 | if monotonically_inc(vals):
90 | print("monotonically_inc")
91 | vals = calc_delta(vals)
92 | df["values"] = vals
93 |
94 | train = df[0:int(0.7*len(vals))]
95 | test = df[int(0.7*len(vals)):]
96 |
97 | es = Exp_Smoothing(train, test)
98 | forecast = es.fit_model(test.shape[0])
99 |
100 | f = open("../testing/exp_smoothing_forecasts/forecast_" + metric_name + "_" + str(args.key) + ".pkl", "wb")
101 | pickle.dump(forecast, f)
102 | pickle.dump(train, f)
103 | pickle.dump(test,f)
104 | f.close()
105 |
106 | es.graph(metric_name, args.key)
107 |
--------------------------------------------------------------------------------
/format_to_pandas.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import json
3 | import pandas as pd
4 | import fnmatch
5 | import os
6 | import bz2
7 | import pickle
8 | import argparse
9 | import gc
10 | from pprint import pprint
11 |
12 | # read files in list and convert to pandas dataframes
13 | def load_files(files):
14 | dfs = {}
15 | for file in files:
16 | # check file format and read appropriately
17 | if file.endswith('json'):
18 | f = open(file, 'rb')
19 | else:
20 | f = bz2.BZ2File(file, 'rb')
21 |
22 | jsons = json.load(f)
23 | f.close()
24 |
25 | # iterate through packets in file
26 | for pkt in jsons:
27 | # create a new dataframe with packet timestamp and values
28 | df = pd.DataFrame.from_dict(pkt["values"])
29 | df = df.rename( columns={0:"ds", 1:"y"})
30 | df["ds"] = pd.to_datetime(df["ds"], unit='s')
31 | df = df.sort_values(by=["ds"])
32 | df.y = pd.to_numeric(df['y'], errors='coerce')
33 | df = df.dropna()
34 | md = str(pkt["metric"])
35 | # append generated dataframe and metadata to collection
36 | try:
37 | dfs[md] = dfs[md].append(df, ignore_index=True)
38 | except:
39 | dfs[md] = df
40 | return dfs
41 |
42 | # take a list of dataframes and their metadata and collapse to a
43 | # collection of unique time series (based on unique metadata)
44 | def collapse_to_unique(dfs_master, dfs_new):
45 | # iterate through metadata
46 | dfs_remaining = {}
47 | for md in dfs_new.keys():
48 | try:
49 | # find metadata in our master list
50 | # if this throws an error, simply add it to the list
51 | dfs_master[md] = dfs_master[md].append(dfs_new[md], ignore_index=True)
52 | except:
53 | dfs_remaining[md] = dfs_new[md]
54 | return dfs_master, dfs_remaining
55 |
56 | # create pickle file containing data
57 | def save_checkpoint(pds, file):
58 | if file[-4:] != ".pkl":
59 | file = file + ".pkl"
60 | f = open(file, "wb")
61 | pickle.dump(pds, f)
62 | f.close()
63 | return file
64 |
65 | # load pickle file containing data
66 | def load_checkpoint(file):
67 | f = open(file, "rb")
68 | pds = pickle.load(f)
69 | f.close()
70 | return pds
71 |
72 | # load all files and convert to a list of pandas dataframes
73 | def convert_to_pandas(files, batch_size):
74 | checkpoints = []
75 | # # separate files into batches
76 | batches = [files[batch_size*i:batch_size*(i+1)] for i in range(int(len(files)/batch_size) + 1)]
77 | print("Batches: ", len(batches))
78 | i = 0
79 | for batch in batches:
80 | print("Load batch %i" % i, end="\r")
81 | i += 1
82 | # get new portion of dataframes and add to master set
83 | pds_new = load_files(batch)
84 | cp = save_checkpoint(pds_new, "raw_" + str(i))
85 | checkpoints.append(cp)
86 | gc.collect()
87 | print("Loaded %i batches" % i)
88 |
89 | pds = []
90 | # iterate checkpoint by checkpoint and add data to unique collection
91 | # of time series
92 | collapsed_fs = []
93 | i = 0
94 | for cp in checkpoints:
95 | i += 1
96 | print("Processing batch %i" % i, end="\r")
97 | pds_new = load_checkpoint(cp)
98 | # load data in batches and combine dataframes
99 | for f in collapsed_fs:
100 | pds = load_checkpoint(f)
101 | pds, pds_new = collapse_to_unique(pds, pds_new)
102 | save_checkpoint(pds, f)
103 | gc.collect()
104 | if len(pds_new) > 0:
105 | f_new = save_checkpoint(pds_new, "collapsed_" + str(i))
106 | # print("Generated ", f_new)
107 | collapsed_fs.append(f_new)
108 | gc.collect()
109 | print("Processed %i batches" % i)
110 | return pds
111 |
112 | # get main input arguments and return formatted data
113 | def read_input(data_folder, metric, batch_size):
114 | # metric-specific data folder
115 | folder = os.path.join(data_folder, metric)
116 |
117 | # get all files in folder
118 | files = []
119 | for root, d_names, f_names in os.walk(folder):
120 | for f in f_names:
121 | if f.endswith('bz2') or f.endswith('json'):
122 | files.append(os.path.join(root, f))
123 | files.sort()
124 | print("Processing %s files" % len(files))
125 |
126 | pd_frames = convert_to_pandas(files, batch_size)
127 |
128 | return pd_frames
129 |
130 | # remove all temp pickle files generated during this program
131 | # TODO: use tempfiles for temporary files
132 | def combine_checkpoints(master_file):
133 | df = {}
134 | files = os.listdir()
135 | for file in files:
136 | if fnmatch.fnmatch(file, "collapsed_*.pkl"):
137 | try:
138 | f = open(file, "rb")
139 | dfs = pickle.load(f)
140 | f.close()
141 | df.update(dfs)
142 | except:
143 | continue
144 | os.system("rm " + file)
145 | elif fnmatch.fnmatch(file, "raw_*.pkl"):
146 | os.system("rm " + file)
147 | f = open(master_file + ".pkl", "wb")
148 | pickle.dump(df, f)
149 | f.close()
150 |
151 | def main():
152 | print("Formatting Data")
153 | pd_frames = read_input(args.input, args.metric, args.batch_size)
154 | print("Conversion successful")
155 |
156 | os.makedirs(args.output)
157 | master_file = os.path.join(args.output, args.metric)
158 |
159 | combine_checkpoints(master_file)
160 |
161 | print("Saved data:", master_file)
162 |
163 | if __name__ == '__main__':
164 |
165 | parser = argparse.ArgumentParser(description="format time series data into an array of pandas dataframes. input folder architecture: input folder must contain a folder with the metric name. Inside the metric folder will be sum/, count/, quant/, or bucket/ according to the metric_type. ex: data/metric_name/files. data/ is input directory")
166 |
167 | parser.add_argument("--metric", type=str, help='metric name', required=True)
168 |
169 | parser.add_argument("-i", "--input", default='', help='input directory')
170 |
171 | parser.add_argument("-o", "--output", default='', help='output directory')
172 |
173 | parser.add_argument("--batch_size", default=1, type=int, help="number of data files to process at once. use this flag if handling big dataset (recommended: 20)")
174 |
175 |
176 | args = parser.parse_args()
177 |
178 | main()
--------------------------------------------------------------------------------
/format_to_pandas_demo.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # mkdir data/prometheus.example.com/
4 |
5 | time python format_to_pandas.py \
6 | --metric go_goroutines \
7 | --input data/prometheus.example.com \
8 | --output data/prometheus.example.com_pkl \
9 | --batch_size 20
10 |
--------------------------------------------------------------------------------
/fourier_train.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | from numpy import fft
4 | import pandas as pd
5 | import warnings
6 | import matplotlib.pyplot as plt
7 | warnings.filterwarnings("ignore")
8 | import collections
9 | import argparse
10 |
11 | class FourierForecast:
12 | def __init__(self, train, test):
13 | self.train = np.array(train["values"])
14 | self.ds_train = np.array(train["timestamps"])
15 | self.test = np.array(test["values"])
16 | self.ds_test = np.array(test["timestamps"])
17 |
18 | def fourierExtrapolation(self, n_predict, n_harm):
19 | n = self.train.size # number of harmonics in model
20 | t = np.arange(0, n)
21 | p = np.polyfit(t, self.train, 1) # find linear trend in x
22 | train_notrend = self.train - p[0] * t # detrended x
23 | train_freqdom = fft.fft(train_notrend) # detrended x in frequency domain
24 | f = fft.fftfreq(n) # frequencies
25 | indexes = np.arange(n).tolist()
26 |
27 | # sort indexes by frequency, lower -> higher
28 | indexes.sort(key = lambda i:np.absolute(f[i]))
29 |
30 | t = np.arange(0, n + n_predict)
31 | restored_sig = np.zeros(t.size)
32 | for i in indexes[:1 + n_harm * 2]:
33 | ampli = np.absolute(train_freqdom[i]) / n # amplitude
34 | phase = np.angle(train_freqdom[i]) # phase
35 | restored_sig += ampli * np.cos(2 * np.pi * f[i] * t + phase)
36 | return restored_sig + p[0] * t
37 |
38 | def fit_model(self, n_predict):
39 |
40 | minimum = np.min(self.train)
41 | stddev = np.std(self.train)
42 |
43 | upper = np.max(self.train) + stddev
44 | lower = minimum - stddev
45 |
46 | if minimum > 0:
47 | lower = max(0, lower)
48 |
49 | # n_harm = 1/3 of number of data points was chosen by visual inspection
50 | n_harm = int(len(self.train)/3)
51 | forecast = self.fourierExtrapolation(n_predict, n_harm)
52 |
53 | ds = np.append(self.ds_train, self.ds_test)
54 |
55 | self.forecast = pd.DataFrame({"ds": ds, "yhat": forecast, "yhat_upper": upper,"yhat_lower": lower})
56 |
57 | return self.forecast
58 |
59 | def graph(self):
60 | plt.figure(figsize=(40,10))
61 | # ds = np.arange(0, len(np.array(self.forecast["ds"])))
62 | # ds_train = np.arange(0,len(self.ds_train))
63 | # ds_test = np.arange(len(self.ds_train),len(self.ds_train) + len(self.ds_test))
64 | # plt.plot(ds_train, self.train, 'b', label = 'train', linewidth = 3)
65 | # plt.plot(ds_test, self.test, 'g', label = 'test', linewidth = 3)
66 | # plt.plot(ds, np.array(self.forecast["yhat"]), 'y', label = 'yhat')
67 | ds_forecast = np.array(self.forecast["ds"])
68 | forecast = np.array(self.forecast["yhat"])
69 |
70 | ds_forecast = ds_forecast[len(self.ds_train):]
71 | forecast = forecast[len(self.ds_train):]
72 | plt.plot(self.ds_train, self.train, 'b', label = 'train', linewidth = 3)
73 | plt.plot(self.ds_test, self.test, 'g', label = 'test', linewidth = 3)
74 | plt.plot(ds_forecast,forecast, 'y', label = 'yhat')
75 |
76 | # plt.plot(np.array(self.forecast["ds"]), np.array(self.forecast["yhat_upper"]), 'y', label = 'yhat_upper')
77 | # plt.plot(np.array(self.forecast["ds"]), np.array(self.forecast["yhat_lower"]), 'y', label = 'yhat_lower')
78 |
79 | plt.legend()
80 |
81 | def calc_delta(vals):
82 | diff = vals - np.roll(vals, 1)
83 | diff[0] = 0
84 | return diff
85 |
86 | def monotonically_inc(vals):
87 | # check corner case
88 | if len(vals) == 1:
89 | return True
90 | diff = calc_delta(vals)
91 | diff[np.where(vals == 0)] = 0
92 |
93 | if ((diff < 0).sum() == 0):
94 | return True
95 | else:
96 | return False
97 |
98 |
99 | if __name__ == "__main__":
100 | parser = argparse.ArgumentParser(description="frun Prophet training on time series")
101 |
102 | parser.add_argument("--metric", type=str, help='metric name', required=True)
103 |
104 | parser.add_argument("--key", type=int, help='key number')
105 |
106 | args = parser.parse_args()
107 |
108 | metric_name = args.metric
109 |
110 | pkl_file = open("../pkl_data/" + metric_name + "_dataframes.pkl", "rb")
111 | #pkl_file = open("../data/real_data_test.pkl", "rb")
112 | dfs = pickle.load(pkl_file)
113 | pkl_file.close()
114 | key_vals = list(dfs.keys())
115 |
116 | selected = [args.key]
117 | for ind in selected:
118 | key = key_vals[ind]
119 | #df = dfs["{'__name__': 'http_request_duration_microseconds', 'beta_kubernetes_io_arch': 'amd64', 'beta_kubernetes_io_instance_type': 'm4.xlarge', 'beta_kubernetes_io_os': 'linux', 'failure_domain_beta_kubernetes_io_region': 'us-east-2', 'failure_domain_beta_kubernetes_io_zone': 'us-east-2a', 'handler': 'prometheus', 'hostname': 'free-stg-node-compute-e0756', 'instance': 'ip-172-31-76-144.us-east-2.compute.internal', 'job': 'kubernetes-nodes-exporter', 'kubernetes_io_hostname': 'ip-172-31-76-144.us-east-2.compute.internal', 'logging_infra_fluentd': 'true', 'node_role_kubernetes_io_compute': 'true', 'quantile': '0.99', 'region': 'us-east-2', 'type': 'compute'}"]
120 | df = dfs[key]
121 | # df["timestamps"] = df["ds"]
122 | # df["values"] = df["y"]
123 | df = df.sort_values(by=['timestamps'])
124 |
125 | print(key)
126 | df["values"] = df["values"].apply(pd.to_numeric)
127 | vals = np.array(df["values"].tolist())
128 |
129 | # check if metric is a counter, if so, run AD on difference
130 | if monotonically_inc(vals):
131 | print("monotonically_inc")
132 | vals = calc_delta(vals)
133 | df["values"] = vals
134 |
135 | train = df[0:int(0.7*len(vals))]
136 | test = df[int(0.7*len(vals)):]
137 |
138 | # graph(vals)
139 | ff = FourierForecast(train, test)
140 | forecast = ff.fit_model(test.shape[0])
141 |
142 | f = open("../fourier_forecasts/forecast_" + metric_name + "_" + str(args.key) + ".pkl", "wb")
143 | pickle.dump(forecast, f)
144 | pickle.dump(train, f)
145 | pickle.dump(test,f)
146 | f.close()
147 |
148 | ff.graph()
149 | plt.savefig("../presentation/graphs/" + str(args.key) + "_" + args.metric + ".png", transparent=True)
150 | plt.close()
151 |
--------------------------------------------------------------------------------
/graphing_ts.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | from numpy import fft
4 | import pandas as pd
5 | import warnings
6 | import matplotlib.pyplot as plt
7 | warnings.filterwarnings("ignore")
8 | from scipy.stats import chisquare
9 | import collections
10 | import bz2
11 | def fourierExtrapolation(x, n_predict, n_harm):
12 | n = x.size
13 | #n_harm = 100 # number of harmonics in model
14 | t = np.arange(0, n)
15 | p = np.polyfit(t, x, 1) # find linear trend in x
16 | x_notrend = x - p[0] * t # detrended x
17 | x_freqdom = fft.fft(x_notrend) # detrended x in frequency domain
18 | f = fft.fftfreq(n) # frequencies
19 | indexes = np.arange(n).tolist()
20 | # sort indexes by frequency, lower -> higher
21 | indexes.sort(key = lambda i:np.absolute(f[i]))
22 |
23 | t = np.arange(0, n + n_predict)
24 | restored_sig = np.zeros(t.size)
25 | for i in indexes[:1 + n_harm * 2]:
26 | ampli = np.absolute(x_freqdom[i]) / n # amplitude
27 | phase = np.angle(x_freqdom[i]) # phase
28 | restored_sig += ampli * np.cos(2 * np.pi * f[i] * t + phase)
29 | return restored_sig + p[0] * t
30 |
31 | def fit_model(train, n_predict):
32 |
33 | model = collections.namedtuple('model',['upper','lower','forecast'])
34 |
35 | minimum = np.min(train)
36 | stddev = np.std(train)
37 |
38 | model.upper = np.max(train) + stddev
39 | model.lower = minimum - stddev
40 | if minimum > 0:
41 | model.lower = max(0, model.lower)
42 |
43 | # n_harm = 1/3 of number of data points was chosen by visual inspection
44 | n_harm = int(len(train)/3)
45 |
46 | model.forecast = fourierExtrapolation(train, n_predict, n_harm)
47 |
48 | return model
49 |
50 | def window_AD(forecast, test, win_size):
51 | num_bins = 5
52 |
53 | new_forecast = forecast[-len(test):]
54 | # windows = [np.arange(win_size*i,win_size*(i+1)) for i in range(int(len(test)/win_size) + 1)]
55 | # windows[-1] = np.arange(windows[-1][0], len(test))
56 | win_test = test[1:win_size]
57 | win_forecast = new_forecast[1:win_size]
58 | p_vals = []
59 | for j in range(0, len(test)):
60 | print(j+len(forecast)-len(test))
61 | win_test = test[1:win_size]
62 | for i in range(0,len(test)):
63 | test_hist, bin_edges = np.histogram(win_test, bins=num_bins)
64 | big_vals = np.where(win_forecast > bin_edges[-1])
65 | small_vals = np.where(win_forecast < bin_edges[0])
66 | f_hist, bin_edges = np.histogram(win_forecast, bins=bin_edges)
67 | # print(np.sum(test_hist))
68 | # print(np.sum(f_hist))
69 | f_hist[-1] = f_hist[-1] + len(big_vals)
70 | f_hist[0] = f_hist[0] + len(small_vals)
71 | test_hist = [x+1 for x in test_hist]
72 | f_hist = [x+1 for x in f_hist]
73 | # print(test_hist)
74 | # print(f_hist)
75 | vals = chisquare(f_hist, f_exp=test_hist )
76 | # print(vals[1])
77 | p_vals.append(vals[1])
78 | # win_test = np.roll(win_test, 1)
79 | new_forecast = np.roll(new_forecast, 1)
80 | win_forecast = new_forecast[1:win_size]
81 | # if p_val > .75:
82 | # return True
83 | print(np.max(np.array(p_vals)))
84 | p_vals = []
85 | test = np.roll(test, 1)
86 | return False
87 |
88 | def detect_anomalies(model, test):
89 | if np.max(test) > model.upper:
90 | print("yep")
91 | #return "point-wise anomaly - upper bound exceeded\nbound: " + str(model.upper) + "\nexceeded value: " + str(np.max(test))
92 | if np.min(test) < model.lower:
93 | print('yep')
94 | #return "point-wise anomaly - lower bound exceeded"
95 | else:
96 | # run histogram-based AD
97 | if window_AD(model.forecast, test, 60):
98 | return "5-min window anomaly detected"
99 | return "running histogram-based AD"
100 | return "no anomalies detected"
101 |
102 | def graph(series):
103 | x_series = np.arange(series.size)
104 | plt.plot(x_series, series, 'b', label = 'x', linewidth = 3)
105 | # pl.plot(x_test, test, 'g*', label = 'x', linewidth = 3)
106 | #pl.plot(x_extrapolation, extrapolation, 'r', label = 'extrapolation')
107 |
108 | plt.legend()
109 | pkl_file = open("../pkl_data/http_request_duration_microseconds_quantile_dataframes.pkl", "rb")
110 | dfs = pickle.load(pkl_file)
111 | pkl_file.close()
112 | print(type(dfs))
113 | key_vals = list(dfs.keys())
114 | print(len(key_vals))
115 |
116 |
117 | pkl_file = open("../data/real_data_test.pkl", "wb")
118 | pickle.dump(dfs, pkl_file)
119 | pkl_file.close()
120 | i = 0
121 | for key in key_vals[0:800]:
122 | print(key)
123 | df = dfs["{'__name__': 'http_request_duration_microseconds', 'beta_kubernetes_io_arch': 'amd64', 'beta_kubernetes_io_instance_type': 'm4.xlarge', 'beta_kubernetes_io_os': 'linux', 'failure_domain_beta_kubernetes_io_region': 'us-east-2', 'failure_domain_beta_kubernetes_io_zone': 'us-east-2a', 'handler': 'prometheus', 'hostname': 'free-stg-node-compute-e0756', 'instance': 'ip-172-31-76-144.us-east-2.compute.internal', 'job': 'kubernetes-nodes-exporter', 'kubernetes_io_hostname': 'ip-172-31-76-144.us-east-2.compute.internal', 'logging_infra_fluentd': 'true', 'node_role_kubernetes_io_compute': 'true', 'quantile': '0.99', 'region': 'us-east-2', 'type': 'compute'}"]
124 | # df = dfs[key]
125 | df["values"] = df["values"].apply(pd.to_numeric)
126 | df = df.sort_values(by=['timestamps'])
127 | vals = np.array(df["values"].tolist())
128 | # train = vals[0:int(0.7*len(vals))]
129 | # test = vals[int(0.7*len(vals)):]
130 | # print(np.max(test))
131 | # print(np.where(test == np.max(test)))
132 | # x_vals = np.arange(0,len(vals))
133 | # x_test = x_vals[int(0.7*len(vals)):]
134 | # x_train = x_vals[0:int(0.7*len(vals))]
135 | # mdl = fit_model(train, len(test))
136 | print(i)
137 | i += 1
138 | # print(detect_anomalies(mdl, test))
139 |
140 | graph(vals)
141 | # plt.show()
142 | plt.savefig("../time_series_graphing/graphs_http_total/fourier_" + str(i) + ".png")
143 | plt.close()
--------------------------------------------------------------------------------
/holt_winters_train.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | import pandas as pd
4 | import warnings
5 | import matplotlib.pyplot as plt
6 | warnings.filterwarnings("ignore")
7 | import collections
8 | import argparse
9 | from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
10 |
11 |
12 | class Exp_Smoothing:
13 | def __init__(self, train, test):
14 | self.train = np.array(train["values"])
15 | self.ds_train = np.array(train["timestamps"])
16 | self.test = np.array(test["values"])
17 | self.ds_test = np.array(test["timestamps"])
18 |
19 | def fit_model(self, n_predict):
20 |
21 | fit = ExponentialSmoothing(self.train, seasonal_periods=4, trend='add', seasonal='add').fit(use_boxcox=True)
22 | forecast = fit.forecast(n_predict)
23 |
24 | ds = self.ds_test
25 |
26 | self.forecast = pd.DataFrame({"ds": ds, "yhat": forecast})
27 |
28 | return self.forecast
29 |
30 | def graph(self, metric_name, key):
31 | plt.figure(figsize=(40,10))
32 |
33 | plt.plot(np.array(self.forecast["ds"]), np.array(self.forecast["yhat"]), 'y', label = 'yhat')
34 | plt.plot(self.ds_train, self.train, '*b', label = 'train', linewidth = 3)
35 | plt.plot(self.ds_test, self.test, '*g', label = 'test', linewidth = 3)
36 | # pl.plot(np.array(self.forecast["ds"]), np.array(self.forecast["yhat_upper"]), 'y', label = 'yhat_upper')
37 | # pl.plot(np.array(self.forecast["ds"]), np.array(self.forecast["yhat_lower"]), 'y', label = 'yhat_lower')
38 |
39 | plt.legend()
40 | plt.savefig("../testing/exp_smoothing_graphs/graph_" + metric_name + "_" + str(key) + ".png")
41 | plt.show()
42 |
43 |
44 |
45 | def calc_delta(vals):
46 | diff = vals - np.roll(vals, 1)
47 | diff[0] = 0
48 | return diff
49 |
50 | def monotonically_inc(vals):
51 | # check corner case
52 | if len(vals) == 1:
53 | return True
54 | diff = calc_delta(vals)
55 | diff[np.where(vals == 0)] = 0
56 |
57 | if ((diff < 0).sum() == 0):
58 | return True
59 | else:
60 | return False
61 |
62 | if __name__ == "__main__":
63 | parser = argparse.ArgumentParser(description="frun Prophet training on time series")
64 |
65 | parser.add_argument("--metric", type=str, help='metric name', required=True)
66 |
67 | parser.add_argument("--key", type=int, help='key number')
68 |
69 | args = parser.parse_args()
70 |
71 | metric_name = args.metric
72 |
73 | pkl_file = open("../pkl_data/" + metric_name + "_dataframes.pkl", "rb")
74 | dfs = pickle.load(pkl_file)
75 | pkl_file.close()
76 | key_vals = list(dfs.keys())
77 |
78 | selected = [args.key]
79 | for ind in selected:
80 | key = key_vals[ind]
81 | df = dfs[key]
82 | df = df.sort_values(by=['timestamps'])
83 |
84 | print(key)
85 | df["values"] = df["values"].apply(pd.to_numeric)
86 | vals = np.array(df["values"].tolist())
87 |
88 | # check if metric is a counter, if so, run AD on difference
89 | if monotonically_inc(vals):
90 | print("monotonically_inc")
91 | vals = calc_delta(vals)
92 | df["values"] = vals
93 |
94 | train = df[0:int(0.7*len(vals))]
95 | test = df[int(0.7*len(vals)):]
96 |
97 | es = Exp_Smoothing(train, test)
98 | forecast = es.fit_model(test.shape[0])
99 |
100 | f = open("../testing/exp_smoothing_forecasts/forecast_" + metric_name + "_" + str(args.key) + ".pkl", "wb")
101 | pickle.dump(forecast, f)
102 | pickle.dump(train, f)
103 | pickle.dump(test,f)
104 | f.close()
105 |
106 | es.graph(metric_name, args.key)
107 |
--------------------------------------------------------------------------------
/imgs/anomaly_detection1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/anomaly_detection1.png
--------------------------------------------------------------------------------
/imgs/anomaly_detection2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/anomaly_detection2.png
--------------------------------------------------------------------------------
/imgs/arima3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/arima3.png
--------------------------------------------------------------------------------
/imgs/compare_prophet_fourier3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/compare_prophet_fourier3.png
--------------------------------------------------------------------------------
/imgs/compare_prophet_fourier4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/compare_prophet_fourier4.png
--------------------------------------------------------------------------------
/imgs/compare_prophet_fourier5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/compare_prophet_fourier5.png
--------------------------------------------------------------------------------
/imgs/detect_anomaly_accumulator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/detect_anomaly_accumulator.png
--------------------------------------------------------------------------------
/imgs/detect_anomaly_combined.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/detect_anomaly_combined.png
--------------------------------------------------------------------------------
/imgs/detect_anomaly_tail_prob.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/detect_anomaly_tail_prob.png
--------------------------------------------------------------------------------
/imgs/example_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/example_ts.png
--------------------------------------------------------------------------------
/imgs/exp_smoothing3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/exp_smoothing3.png
--------------------------------------------------------------------------------
/imgs/forecasting_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/forecasting_data.png
--------------------------------------------------------------------------------
/imgs/fourier3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/fourier3.png
--------------------------------------------------------------------------------
/imgs/fourier4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/fourier4.png
--------------------------------------------------------------------------------
/imgs/fourier_extrapolation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/fourier_extrapolation.png
--------------------------------------------------------------------------------
/imgs/fourier_extrapolation_behind.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/fourier_extrapolation_behind.png
--------------------------------------------------------------------------------
/imgs/histogram_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/histogram_graph.png
--------------------------------------------------------------------------------
/imgs/histogram_graph2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/histogram_graph2.png
--------------------------------------------------------------------------------
/imgs/imgs/detect_anomaly_accumulator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/imgs/detect_anomaly_accumulator.png
--------------------------------------------------------------------------------
/imgs/imgs/detect_anomaly_combined.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/imgs/detect_anomaly_combined.png
--------------------------------------------------------------------------------
/imgs/imgs/detect_anomaly_tail_prob.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/imgs/detect_anomaly_tail_prob.png
--------------------------------------------------------------------------------
/imgs/imgs/example_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/imgs/example_ts.png
--------------------------------------------------------------------------------
/imgs/imgs/fourier_extrapolation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/imgs/fourier_extrapolation.png
--------------------------------------------------------------------------------
/imgs/imgs/fourier_extrapolation_behind.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/imgs/fourier_extrapolation_behind.png
--------------------------------------------------------------------------------
/imgs/imgs/partitioned_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/imgs/partitioned_ts.png
--------------------------------------------------------------------------------
/imgs/metadata_instance_label_cloudprovider_aws_api_request_duration_seconds.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/metadata_instance_label_cloudprovider_aws_api_request_duration_seconds.png
--------------------------------------------------------------------------------
/imgs/metadata_instance_label_kubelet_docker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/metadata_instance_label_kubelet_docker.png
--------------------------------------------------------------------------------
/imgs/metadata_operation_label_kubelet_docker2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/metadata_operation_label_kubelet_docker2.png
--------------------------------------------------------------------------------
/imgs/metadata_operation_label_legend_kubelet_docker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/metadata_operation_label_legend_kubelet_docker.png
--------------------------------------------------------------------------------
/imgs/partitioned_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/partitioned_ts.png
--------------------------------------------------------------------------------
/imgs/prophet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/prophet.png
--------------------------------------------------------------------------------
/imgs/prophet3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/prophet3.png
--------------------------------------------------------------------------------
/imgs/summary_graph3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/summary_graph3.png
--------------------------------------------------------------------------------
/imgs/summary_graph4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/summary_graph4.png
--------------------------------------------------------------------------------
/imgs/t-sne_embedding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/t-sne_embedding.png
--------------------------------------------------------------------------------
/imgs/t_sne_embedding2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/imgs/t_sne_embedding2.png
--------------------------------------------------------------------------------
/metadata_analysis/get_single_ts_all.py:
--------------------------------------------------------------------------------
1 | metadata = {'__name__': 'kubelet_docker_operations_latency_microseconds', 'beta_kubernetes_io_arch': 'amd64', 'beta_kubernetes_io_instance_type': 'm4.xlarge', 'beta_kubernetes_io_os': 'linux', 'clam_controller_enabled': 'True', 'clam_server_enabled': 'True', 'failure_domain_beta_kubernetes_io_region': 'us-east-2', 'failure_domain_beta_kubernetes_io_zone': 'us-east-2a', 'fluentd_test': 'true', 'hostname': 'free-stg-master-5c6a9', 'instance': 'ip-172-31-73-251.us-east-2.compute.internal', 'job': 'kubernetes-nodes', 'kubernetes_io_hostname': 'ip-172-31-73-251.us-east-2.compute.internal', 'node_role_kubernetes_io_compute': 'true', 'node_role_kubernetes_io_master': 'true', 'operation_type': 'version', 'quantile': '0.5', 'region': 'us-east-2', 'type': 'master'}
2 |
3 | import json
4 | from datetime import datetime
5 | import matplotlib.pyplot as plt
6 | import matplotlib.dates as dt
7 | import pandas as pd
8 | import re
9 | import string
10 | import random
11 | import numpy as np
12 | import fnmatch
13 | import os
14 | import sys
15 | import bz2
16 | import pickle
17 | if len(sys.argv) != 2:
18 | print("incorrect number of command line arguments")
19 | print("received: ", len(sys.argv))
20 | print("expected: 2")
21 | exit(1)
22 |
23 | m_name = sys.argv[1]
24 | metric_type = ""
25 | data_folder = "../data/"
26 |
27 | # find bucket/quantile, sum, and count files in metric folder
28 | filename_bkt = []
29 |
30 | try:
31 | for file in os.listdir(data_folder + m_name + "/bucket/"):
32 | if fnmatch.fnmatch(file, "*.json.bz2"):
33 | metric_type = "hist"
34 | f_name = data_folder + m_name + "/bucket/" + file
35 | filename_bkt.append(f_name)
36 | except:
37 | for file in os.listdir(data_folder + m_name + "/quantile/quant2/"):
38 | if fnmatch.fnmatch(file, "*.json"):
39 | metric_type = "summary"
40 | f_name = data_folder + m_name + "/quantile/quant2/" + file
41 | filename_bkt.append(f_name)
42 |
43 | print("Metric: ", m_name)
44 | if metric_type == "hist":
45 | label = "le"
46 | elif metric_type == "summary":
47 | label = "quantile"
48 | else:
49 | print("no metric type detected")
50 | exit(1)
51 |
52 | print("Metric: ", m_name)
53 | if metric_type == "hist":
54 | label = "le"
55 | elif metric_type == "summary":
56 | label = "quantile"
57 | else:
58 | print("no metric type detected")
59 | exit(1)
60 |
61 | results_folder = "../results/"
62 |
63 |
64 | # load appropriate data
65 | inc = 0
66 | num_jsons = 10
67 | jsons_bkt = []
68 | print(len(filename_bkt))
69 | dfs = []
70 | for file in filename_bkt:
71 | f = open(file, 'rb')
72 | # f = bz2.BZ2File(file, 'rb')
73 | one_json = json.load(f)
74 | f.close()
75 | for pkt in one_json:
76 | df = pd.DataFrame.from_dict(pkt["values"])
77 | df = df.rename( columns={0:"timestamps", 1:"values"})
78 | df["timestamps"] = pd.to_datetime(df["timestamps"], unit='s')
79 | df = df.sort_values(by=["timestamps"])
80 | meta_keys = np.array(list(pkt["metric"].keys()))
81 | meta_vals = np.array(list(pkt["metric"].values()))
82 | md = dict(zip(meta_keys, meta_vals))
83 | if md == metadata:
84 | dfs.append(df)
85 | print(len(dfs))
86 | # if inc == num_jsons:
87 | # break
88 | print(inc)
89 | inc += 1
90 |
91 |
92 | file = "df_one_ts" + ".pkl"
93 | pickle_file = open(file, "wb")
94 | pickle.dump(dfs, pickle_file)
95 | pickle_file.close()
96 |
97 |
98 |
--------------------------------------------------------------------------------
/metadata_analysis/graph_metadata.py:
--------------------------------------------------------------------------------
1 | import json
2 | from datetime import datetime
3 | import matplotlib.pyplot as plt
4 | import matplotlib.dates as dt
5 | import re
6 | import string
7 | import random
8 | import numpy as np
9 | import fnmatch
10 | import os
11 | import sys
12 | import bz2
13 | from matplotlib.backends.backend_pdf import PdfPages
14 | from matplotlib.collections import EventCollection
15 | import time
16 |
17 | colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
18 | if len(sys.argv) != 2:
19 | print("incorrect number of command line arguments")
20 | print("received: ", len(sys.argv))
21 | print("expected: 2")
22 | exit(1)
23 |
24 | m_name = sys.argv[1]
25 | metric_type = ""
26 | data_folder = "../data/"
27 |
28 | # find bucket/quantile, sum, and count files in metric folder
29 | filename_bkt = []
30 |
31 | try:
32 | for file in os.listdir(data_folder + m_name + "/bucket/"):
33 | if fnmatch.fnmatch(file, "*.json.bz2"):
34 | metric_type = "hist"
35 | f_name = data_folder + m_name + "/bucket/" + file
36 | filename_bkt.append(f_name)
37 | except:
38 | for file in os.listdir(data_folder + m_name + "/quantile/"):
39 | if fnmatch.fnmatch(file, "*.json.bz2"):
40 | metric_type = "summary"
41 | f_name = data_folder + m_name + "/quantile/" + file
42 | filename_bkt.append(f_name)
43 |
44 | print("Metric: ", m_name)
45 | if metric_type == "hist":
46 | label = "le"
47 | elif metric_type == "summary":
48 | label = "quantile"
49 | else:
50 | print("no metric type detected")
51 | exit(1)
52 |
53 | results_folder = "../results/"
54 | png_name = results_folder + m_name + '_graphs.png'
55 | png_name2 = results_folder + m_name + '_graphs_legend.png'
56 |
57 | def parse_jsons(jsons, select_label="__name__"):
58 | times = []
59 | master_md = {}
60 | md = []
61 | for one_json in jsons:
62 | for pkt in one_json:
63 | timestamps = []
64 | timestamps_int = []
65 | for i in pkt["values"]:
66 | timestamps.append(datetime.fromtimestamp(float(i[0])))
67 | metadata = pkt["metric"]
68 | for lbl in metadata.keys():
69 | lbl_val = metadata[lbl]
70 | if lbl in master_md.keys():
71 | if lbl_val in master_md[lbl]:
72 | continue
73 | else:
74 | master_md[lbl].append(lbl_val)
75 | else:
76 | temp_list = []
77 | temp_list.append(lbl_val)
78 | master_md[lbl] = temp_list
79 | times.append(timestamps)
80 | md.append(metadata)
81 | return master_md, times, md
82 |
83 |
84 | fig = plt.figure(figsize=(20,10))
85 | lbls = set()
86 | label_axis = {}
87 | select_label = ""
88 | while(True):
89 | master_md = []
90 | times = []
91 | times_int = []
92 | md = []
93 | #filename_bkt = filename_bkt[1:2]
94 | if len(filename_bkt) == 0:
95 | break
96 | print(len(filename_bkt))
97 |
98 | jsons_bkt = []
99 | for i in range(0,len(filename_bkt)):
100 | file = filename_bkt[i]
101 | print(i)
102 | print(file)
103 | f = bz2.BZ2File(file, 'rb')
104 | jsons_bkt.append(json.load(f))
105 | f.close()
106 | if i >= 50:
107 | break
108 |
109 | try:
110 | filename_bkt = filename_bkt[50:]
111 | except:
112 | filename_bkt = []
113 |
114 | master_md, times, md = parse_jsons(jsons_bkt, label)
115 |
116 | for lbl in master_md:
117 | print("\n==", lbl, len(master_md[lbl]))
118 | for lbl_val in master_md[lbl]:
119 | print("\t", lbl_val)
120 |
121 | if select_label == "":
122 | select_label = input("\n\nSelect a label to graph:\n")
123 | try:
124 | label_vals = master_md[select_label]
125 | except:
126 | print("Not a valid label. Exiting..")
127 | exit(1)
128 |
129 | graph = {}
130 | for md_i in range(0, len(md)):
131 | metadata = md[md_i]
132 | try:
133 | label_val = metadata[select_label]
134 | except:
135 | continue
136 |
137 | try:
138 | graph[label_val].extend(times[md_i])
139 | except:
140 | graph[label_val] = times[md_i]
141 |
142 |
143 | for j in graph.keys():
144 | lbls.add(j)
145 | print("number of label values: ", len(graph.keys()))
146 |
147 | for i in lbls:
148 | print(i)
149 | try:
150 | x = dt.date2num(graph[i])
151 | except:
152 | continue
153 | try:
154 | val = label_axis[i]
155 | y = [val]*len(x)
156 | except:
157 | val = len(label_axis)
158 | label_axis[i] = val
159 | y = [val]*len(x)
160 |
161 | plt.plot(x, y, ',', color=colors[(val+1)%len(colors)])
162 |
163 |
164 | del metadata
165 | del md
166 | del master_md
167 | del times
168 |
169 | title = select_label
170 | plt.gcf().autofmt_xdate()
171 | plt.suptitle(m_name)
172 | plt.title(title)
173 | plt.xlabel("Timestamp")
174 | plt.xticks(rotation=25)
175 | plt.ylabel("Value")
176 | plt.yticks(np.arange(len(label_axis.keys())))
177 |
178 | ax = plt.gca()
179 | xfmt = dt.DateFormatter('%Y-%m-%d %H:%M:%S')
180 | ax.xaxis.set_major_formatter(xfmt)
181 | #plt.show()
182 | plt.savefig(png_name)
183 | plt.close()
184 |
185 | # plot the legend table
186 | plt.figure(figsize=(20,10))
187 | print(label_axis.keys())
188 | n_lbls = np.array(list(label_axis.keys()))
189 | n_lbls.shape = (len(lbls), 1)
190 | vals = np.array(list(label_axis.values()))
191 | vals.shape = (len(vals), 1)
192 | table_vals = np.append(vals, n_lbls, 1)
193 | t = plt.table(cellText=table_vals, colLabels=["Number", label], cellLoc='center', loc='center')
194 | # t.set_fontsize(18)
195 | t.scale(1,3)
196 | plt.axis("off")
197 | plt.title("LEGEND")
198 | plt.savefig(png_name2)
--------------------------------------------------------------------------------
/metadata_analysis/graph_specific_ts.py:
--------------------------------------------------------------------------------
1 | import json
2 | from datetime import datetime
3 | import matplotlib.pyplot as plt
4 | import matplotlib.dates as dt
5 | import re
6 | import string
7 | import random
8 | import numpy as np
9 | import fnmatch
10 | import os
11 | import sys
12 | import ast
13 | from matplotlib.backends.backend_pdf import PdfPages
14 |
15 | if len(sys.argv) != 2:
16 | print("incorrect number of command line arguments")
17 | print("received: ", len(sys.argv))
18 | print("expected: 2")
19 | exit(1)
20 |
21 | file = sys.argv[1]
22 | lines = [line.rstrip('\n') for line in open(file)]
23 | m_name = lines[0]
24 | target_metadata = lines[1]
25 | target_metadata = target_metadata.replace("'", "\"")
26 | target_metadata = json.loads(target_metadata)
27 |
28 | data_folder = "../data/"
29 | metric_type = "hist"
30 | #metadata = "{'beta_kubernetes_io_arch': 'amd64', 'beta_kubernetes_io_fluentd_ds_ready': 'true', 'beta_kubernetes_io_instance_type': 'm4.xlarge', 'beta_kubernetes_io_os': 'linux', 'clam_controller_enabled': 'True', 'failure_domain_beta_kubernetes_io_region': 'us-east-2', 'failure_domain_beta_kubernetes_io_zone': 'us-east-2a', 'fluentd_test': 'true', 'hostname': 'free-stg-master-03fb6', 'instance': 'ip-172-31-78-254.us-east-2.compute.internal', 'job': 'kubernetes-nodes', 'kubernetes_io_hostname': 'ip-172-31-78-254.us-east-2.compute.internal', 'node_role_kubernetes_io_master': 'true', 'operation_type': 'list_images', 'region': 'us-east-2', 'type': 'master'}"
31 |
32 | # find bucket/quantile, sum, and count files in metric folder
33 | for file in os.listdir(data_folder + m_name + "/"):
34 | if fnmatch.fnmatch(file, "bucket_*.json"):
35 | metric_type = "hist"
36 | filename_bkt = data_folder + m_name + "/" + file
37 | elif fnmatch.fnmatch(file, "quantile_*.json"):
38 | metric_type = "summary"
39 | filename_bkt = data_folder + m_name + "/" + file
40 | if fnmatch.fnmatch(file, "count_*.json"):
41 | filename_cnt = data_folder + m_name + "/" + file
42 | if fnmatch.fnmatch(file, "sum_*.json"):
43 | filename_sum = data_folder + m_name + "/" + file
44 | if metric_type == "hist" or metric_type == "summary":
45 | print("Metric: ", m_name)
46 | else:
47 | print("no metric type detected")
48 | exit(1)
49 |
50 | results_folder = "../results/"
51 | pp_graph = PdfPages(results_folder + m_name + '_graphs.pdf')
52 | pp_hist = PdfPages(results_folder + m_name + '_hists.pdf')
53 |
54 | # load appropriate data
55 | f = open(filename_bkt)
56 | jsonFile_bkt = json.load(f)
57 | f.close()
58 |
59 | f2 = open(filename_cnt)
60 | jsonFile_cnt = json.load(f2)
61 | f2.close()
62 |
63 | f3 = open(filename_sum)
64 | jsonFile_sum = json.load(f3)
65 | f3.close()
66 |
67 | # each index corresponds to one graph
68 | # each graph is a list of lists
69 | # list of list of lists
70 | # graph = all_series[i]
71 | # one_series = graph[i]
72 | # one_data_point = one_series[i]
73 | b_val = []
74 | b_time = []
75 | b_md = []
76 | b_le = []
77 | for pkt in jsonFile_bkt:
78 | timestamps = []
79 | vals = []
80 | for i in pkt["values"]:
81 | if i[1] != 'NaN':
82 | vals.append(float(i[1]))
83 | timestamps.append(datetime.fromtimestamp(float(i[0])))
84 | metadata = pkt["metric"]
85 | if metric_type == "hist":
86 | le = metadata["le"]
87 | del metadata["le"]
88 | elif metric_type == "summary":
89 | le = metadata["quantile"]
90 | del metadata["quantile"]
91 | metric_name = metadata["__name__"]
92 | del metadata["__name__"]
93 | if metadata == target_metadata:
94 | metadata = str(metadata)
95 | if len(vals) > 0:
96 | b_val.append(vals)
97 | b_time.append(timestamps)
98 | b_md.append(metadata)
99 | b_le.append(le)
100 |
101 | s_val = []
102 | s_time = []
103 | s_md = []
104 | for pkt in jsonFile_sum:
105 | timestamps = []
106 | vals = []
107 | for i in pkt["values"]:
108 | if i[1] != 'NaN':
109 | vals.append(float(i[1]))
110 | timestamps.append(datetime.fromtimestamp(float(i[0])))
111 | metadata = pkt["metric"]
112 | metric_name = metadata["__name__"]
113 | del metadata["__name__"]
114 | metadata = str(metadata)
115 | if len(vals) > 0:
116 | s_val.append(vals)
117 | s_time.append(timestamps)
118 | s_md.append(metadata)
119 |
120 | c_val = []
121 | c_time = []
122 | c_md = []
123 | for pkt in jsonFile_cnt:
124 | timestamps = []
125 | vals = []
126 | for i in pkt["values"]:
127 | if i[1] != 'NaN':
128 | vals.append(float(i[1]))
129 | timestamps.append(datetime.fromtimestamp(float(i[0])))
130 | metadata = pkt["metric"]
131 | metric_name = metadata["__name__"]
132 | del metadata["__name__"]
133 | metadata = str(metadata)
134 | if len(vals) > 0:
135 | c_val.append(vals)
136 | c_time.append(timestamps)
137 | c_md.append(metadata)
138 |
139 |
140 |
141 | graphs = {}
142 | graph_label = []
143 | graph_xs = {}
144 | for md_i in range(0,len(b_md)):
145 | metadata = str(b_md[md_i])
146 | label = b_le[md_i]
147 | try:
148 | graphs[metadata][label].extend(b_val[md_i])
149 | graph_xs[metadata][label].extend(b_time[md_i])
150 | except:
151 | try:
152 | graphs[metadata][label] = b_val[md_i]
153 | graph_xs[metadata][label] = b_time[md_i]
154 | except:
155 | label_dict = {}
156 | label_dict[label] = b_val[md_i]
157 | label_t_dict = {}
158 | label_t_dict[label] = b_time[md_i]
159 | graphs[metadata] = label_dict
160 | graph_xs[metadata] = label_t_dict
161 |
162 |
163 |
164 | inc = 0
165 | print("number of graphs: ", len(graphs.keys()))
166 | for i in graphs.keys():
167 | if (inc+1) % 50 == 0:
168 | pp_graph.close()
169 | pp_graph = PdfPages(results_folder + str(inc+1) + "_" + m_name + '_graphs.pdf')
170 | pp_hist.close()
171 | pp_hist = PdfPages(results_folder + str(inc+1) + "_" + m_name + '_hists.pdf')
172 | print(inc)
173 | graph_title = i
174 | xs = graph_xs[i]
175 | ys = graphs[i]
176 | #if graph_title == "{'instance': '172.31.65.74:8444', 'job': 'kubernetes-controllers', 'request': 'detach_volume'}":
177 |
178 | title = re.sub("(.{200})", "\\1\n", graph_title, 0, re.DOTALL)
179 | if len(graph_title) > 50:
180 | graph_title= graph_title[1:50]
181 | plt.figure(figsize=(20,10))
182 | for j in ys.keys():
183 | plt.plot(xs[j], ys[j], '*')
184 | plt.gcf().autofmt_xdate()
185 | plt.suptitle(metric_name)
186 | plt.title(title)
187 | plt.legend(ys.keys())
188 | plt.xlabel("Timestamp")
189 | plt.ylabel("Value")
190 |
191 | #savefile = "graphs/" + insts[i] + "_" + graph_title + ".png"
192 | plt.savefig(pp_graph, format='pdf')
193 | plt.close()
194 |
195 | main_title = re.sub("(.{200})", "\\1\n", graph_title, 0, re.DOTALL)
196 | if len(graph_title) > 50:
197 | graph_title= graph_title[1:50]
198 | plt.figure(figsize=(20,10))
199 | for j in ys.keys():
200 | time = xs[j][0]
201 | break
202 | for j in range(0, len(s_time[inc])):
203 | if s_time[inc][j] == time:
204 | sum_val = s_val[inc][j]
205 | break
206 | for j in range(0, len(c_time[inc])):
207 | if c_time[inc][j] == time:
208 | count_val = c_val[inc][j]
209 | break
210 |
211 |
212 | graph_label = list(xs.keys())
213 | tmp = graph_label
214 | tmp.sort()
215 | if metric_type == "hist":
216 | inf = tmp[0]
217 |
218 | # take away the +Inf bucket
219 | tmp = tmp[1::]
220 |
221 | # sort the remaining integers/floats
222 | tmp.sort(key=float)
223 |
224 | # append +Inf to the end
225 | tmp.append(inf)
226 |
227 | sorted_y = []
228 | for j in tmp:
229 | for k in graph_label:
230 | if j == k:
231 | sorted_y.append(ys[k][0])
232 | break
233 |
234 | graph_label = tmp
235 | bar_vals = np.arange(len(graph_label))
236 | plt.bar(bar_vals, height =sorted_y)
237 | plt.xticks(bar_vals, graph_label)
238 | plt.gcf().autofmt_xdate()
239 | plt.suptitle(main_title)
240 | title = "Count: " + str(count_val) + ", Sum: " + str(sum_val)
241 | plt.title(title, fontsize=20)
242 | plt.xlabel("Bucket")
243 | plt.ylabel("Value" )
244 |
245 | # #savefile = "hists/" + insts[i] + ".png"
246 | plt.savefig(pp_hist, format='pdf')
247 | plt.close()
248 | inc += 1
249 |
250 | pp_graph.close()
251 | pp_hist.close()
--------------------------------------------------------------------------------
/metadata_analysis/plot_metadata_labels.py:
--------------------------------------------------------------------------------
1 | import json
2 | from datetime import datetime
3 | import matplotlib.pyplot as plt
4 | import matplotlib.dates as dt
5 | import re
6 | import string
7 | import random
8 | import numpy as np
9 | import bz2
10 | from matplotlib.backends.backend_pdf import PdfPages
11 | pp = PdfPages('label_hists2.pdf')
12 | import os
13 |
14 | label = "instance"
15 | folder = "kubelet_docker_operations_latency_microseconds/"
16 | files = os.listdir(folder)
17 | jsons = []
18 |
19 | inc = 0
20 | print(len(files))
21 | md = []
22 | for file in files:
23 | inc += 1
24 | print(inc)
25 | filen = folder + file
26 | try:
27 | f = bz2.BZ2File(filen, 'rb')
28 | jsonFile = json.load(f)
29 | f.close()
30 | except IsADirectoryError:
31 | continue
32 | for pkt in jsonFile:
33 | metadata = pkt["metric"]
34 | del metadata["__name__"]
35 | md.append(metadata)
36 |
37 | lbls = {}
38 | for i in range(0, len(md)):
39 | for key in md[i].keys():
40 | if key in lbls.keys():
41 | lbls[key].append(md[i][key])
42 | else:
43 | lbls[key] = [md[i][key]]
44 |
45 | for key in lbls.keys():
46 | vals = lbls[key]
47 | plt.figure(figsize=(10,5))
48 | plt.hist(vals)
49 | #plt.gcf().autofmt_xdate()
50 | #plt.legend(lbl)
51 | plt.title(key)
52 | plt.xlabel("Label Value")
53 | plt.ylabel("Count")
54 | plt.savefig(pp, format='pdf')
55 | plt.close()
56 |
57 | pp.close()
--------------------------------------------------------------------------------
/metadata_analysis/t_sne_for_metadata.py:
--------------------------------------------------------------------------------
1 | import json
2 | from datetime import datetime
3 | import matplotlib.pyplot as plt
4 | import matplotlib.dates as dt
5 | import re
6 | import string
7 | import random
8 | import numpy as np
9 | import fnmatch
10 | import os
11 | import sys
12 | import bz2
13 | from matplotlib.backends.backend_pdf import PdfPages
14 | from matplotlib.collections import EventCollection
15 | from sklearn.manifold import TSNE
16 | import time
17 | import pickle
18 |
19 | if len(sys.argv) != 2:
20 | print("incorrect number of command line arguments")
21 | print("received: ", len(sys.argv))
22 | print("expected: 2")
23 | exit(1)
24 |
25 | m_name = sys.argv[1]
26 | metric_type = ""
27 | data_folder = "../data/"
28 |
29 | # find bucket/quantile, sum, and count files in metric folder
30 | filename_bkt = []
31 |
32 | try:
33 | for file in os.listdir(data_folder + m_name + "/bucket/"):
34 | if fnmatch.fnmatch(file, "*.json.bz2"):
35 | metric_type = "hist"
36 | f_name = data_folder + m_name + "/bucket/" + file
37 | filename_bkt.append(f_name)
38 | except:
39 | for file in os.listdir(data_folder + m_name + "/quantile/"):
40 | if fnmatch.fnmatch(file, "*.json.bz2"):
41 | metric_type = "summary"
42 | f_name = data_folder + m_name + "/quantile/" + file
43 | filename_bkt.append(f_name)
44 |
45 | print("Metric: ", m_name)
46 | if metric_type == "hist":
47 | label = "le"
48 | elif metric_type == "summary":
49 | label = "quantile"
50 | else:
51 | print("no metric type detected")
52 | exit(1)
53 |
54 | results_folder = "../results/"
55 | png_name = results_folder + m_name + '_graphs.png'
56 | png_name2 = results_folder + m_name + '_graphs_legend.png'
57 |
58 | def parse_jsons(jsons, select_label="__name__"):
59 | X = np.zeros(shape=[1, 200])
60 | master_labels = []
61 | label_ints = []
62 | mds = []
63 | for one_json in jsons:
64 | for row in range(0, len(one_json)):
65 | metadata = one_json[row]["metric"]
66 | labels = list(metadata.keys())
67 | label_vals = list(metadata.values())
68 | x_feature = np.zeros(shape=[1,200])
69 | for i in range(0, len(labels)):
70 | flag = True
71 | for j in range(0,len(master_labels)):
72 | if master_labels[j] == labels[i]:
73 | if label_vals[i] in label_ints[j]:
74 | x_feature[0,j] = label_ints[j][label_vals[i]]
75 | else:
76 | label_ints[j][label_vals[i]] = len(label_ints[j])+1
77 | flag = False
78 | if flag:
79 | master_labels.append(labels[i])
80 | label_ints_tmp = {}
81 | label_ints_tmp[label_vals[i]] = 1
82 | x_feature[0,len(label_ints)] = label_ints_tmp[label_vals[i]]
83 | label_ints.append(label_ints_tmp)
84 |
85 | mds.append(metadata)
86 | X = np.vstack((X, x_feature))
87 | X = X[1:,:]
88 | return X, master_labels, label_ints, mds
89 |
90 | jsons_bkt = []
91 | for i in range(0,len(filename_bkt)):
92 | file = filename_bkt[i]
93 | print(i)
94 | print(file)
95 | f = bz2.BZ2File(file, 'rb')
96 | jsons_bkt.append(json.load(f))
97 | f.close()
98 | if i >= 15:
99 | break
100 |
101 | X, master_labels, label_ints, mds = parse_jsons(jsons_bkt, label)
102 |
103 | X_embedded = TSNE(n_components=2).fit_transform(X)
104 | file = open("x_vals", "wb")
105 | pickle.dump(X, file)
106 | pickle.dump(X_embedded, file)
107 | pickle.dump(master_labels, file)
108 | pickle.dump(label_ints, file)
109 | pickle.dump(mds, file)
110 | file.close()
111 |
112 |
113 | print(X_embedded.shape)
114 | plt.figure(figsize=(20,10))
115 | plt.scatter(X_embedded[:,0], X_embedded[:,1],cmap=plt.cm.Spectral)
116 | plt.show()
117 | # fig = plt.figure(figsize=(20,10))
118 | # lbls = set()
119 | # label_axis = {}
120 | # select_label = ""
121 | # while(True):
122 | # master_md = []
123 | # times = []
124 | # times_int = []
125 | # md = []
126 | # #filename_bkt = filename_bkt[1:2]
127 | # if len(filename_bkt) == 0:
128 | # break
129 | # print(len(filename_bkt))
130 |
131 | # jsons_bkt = []
132 | # for i in range(0,len(filename_bkt)):
133 | # file = filename_bkt[i]
134 | # print(i)
135 | # print(file)
136 | # f = bz2.BZ2File(file, 'rb')
137 | # jsons_bkt.append(json.load(f))
138 | # f.close()
139 | # if i >= 50:
140 | # break
141 |
142 | # try:
143 | # filename_bkt = filename_bkt[50:]
144 | # except:
145 | # filename_bkt = []
146 |
147 | # master_md, times, md = parse_jsons(jsons_bkt, label)
148 |
149 | # for lbl in master_md:
150 | # print("\n==", lbl, len(master_md[lbl]))
151 | # for lbl_val in master_md[lbl]:
152 | # print("\t", lbl_val)
153 |
154 | # if select_label == "":
155 | # select_label = input("\n\nSelect a label to graph:\n")
156 | # try:
157 | # label_vals = master_md[select_label]
158 | # except:
159 | # print("Not a valid label. Exiting..")
160 | # exit(1)
161 |
162 | # graph = {}
163 | # for md_i in range(0, len(md)):
164 | # metadata = md[md_i]
165 | # try:
166 | # label_val = metadata[select_label]
167 | # except:
168 | # continue
169 |
170 | # try:
171 | # graph[label_val].extend(times[md_i])
172 | # except:
173 | # graph[label_val] = times[md_i]
174 |
175 |
176 | # for j in graph.keys():
177 | # lbls.add(j)
178 | # print("number of label values: ", len(graph.keys()))
179 |
180 | # for i in lbls:
181 | # print(i)
182 | # try:
183 | # x = dt.date2num(graph[i])
184 | # except:
185 | # continue
186 | # try:
187 | # val = label_axis[i]
188 | # y = [val]*len(x)
189 | # except:
190 | # val = len(label_axis)
191 | # label_axis[i] = val
192 | # y = [val]*len(x)
193 |
194 | # plt.plot(x, y, ',', color=colors[(val+1)%len(colors)])
195 |
196 |
197 | # del metadata
198 | # del md
199 | # del master_md
200 | # del times
201 |
202 | # title = select_label
203 | # plt.gcf().autofmt_xdate()
204 | # plt.suptitle(m_name)
205 | # plt.title(title)
206 | # plt.xlabel("Timestamp")
207 | # plt.xticks(rotation=25)
208 | # plt.ylabel("Value")
209 | # plt.yticks(np.arange(len(label_axis.keys())))
210 |
211 | # ax = plt.gca()
212 | # xfmt = dt.DateFormatter('%Y-%m-%d %H:%M:%S')
213 | # ax.xaxis.set_major_formatter(xfmt)
214 | # #plt.show()
215 | # plt.savefig(png_name)
216 | # plt.close()
217 |
218 | # # plot the legend table
219 | # plt.figure(figsize=(20,10))
220 | # print(label_axis.keys())
221 | # n_lbls = np.array(list(label_axis.keys()))
222 | # n_lbls.shape = (len(lbls), 1)
223 | # vals = np.array(list(label_axis.values()))
224 | # vals.shape = (len(vals), 1)
225 | # table_vals = np.append(vals, n_lbls, 1)
226 | # t = plt.table(cellText=table_vals, colLabels=["Number", label], cellLoc='center', loc='center')
227 | # # t.set_fontsize(18)
228 | # t.scale(1,3)
229 | # plt.axis("off")
230 | # plt.title("LEGEND")
231 | # plt.savefig(png_name2)
--------------------------------------------------------------------------------
/notebooks/.ipynb_checkpoints/Parse Json to Pandas Dataframes-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Parse Json to Pandas Dataframes\n",
8 | "This script is used to convert json packets into a dictionary where the key is a unique metadata configuration and the value is a Pandas dataframe. The Pandas dataframe has a ds column and a y column corresponding to the timestamp and corresponding value in the time series. The dictionary is then stored in a Pickle file."
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import json\n",
18 | "import pandas as pd\n",
19 | "import fnmatch\n",
20 | "import os\n",
21 | "import bz2\n",
22 | "import pickle\n",
23 | "import gc"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 2,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "# read files in list and convert to pandas dataframes\n",
33 | "def load_files(files, file_format):\n",
34 | " dfs = {}\n",
35 | " for file in files:\n",
36 | " # check file format and read appropriately\n",
37 | " if file_format == \".json\":\n",
38 | " f = open(file, 'rb')\n",
39 | " else:\n",
40 | " f = bz2.BZ2File(file, 'rb')\n",
41 | " jsons = json.load(f)\n",
42 | " f.close()\n",
43 | "\n",
44 | " # iterate through packets in file\n",
45 | " for pkt in jsons:\n",
46 | " # create a new dataframe with packet timestamp and values\n",
47 | " df = pd.DataFrame.from_dict(pkt[\"values\"])\n",
48 | " df = df.rename( columns={0:\"ds\", 1:\"y\"})\n",
49 | " df[\"ds\"] = pd.to_datetime(df[\"ds\"], unit='s')\n",
50 | " df = df.sort_values(by=[\"ds\"])\n",
51 | " df.y = pd.to_numeric(df['y'], errors='coerce')\n",
52 | " df = df.dropna()\n",
53 | " md = str(pkt[\"metric\"])\n",
54 | " # append generated dataframe and metadata to collection\n",
55 | " try:\n",
56 | " dfs[md] = dfs[md].append(df, ignore_index=True)\n",
57 | " except:\n",
58 | " dfs[md] = df\n",
59 | " return dfs"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 3,
65 | "metadata": {},
66 | "outputs": [],
67 | "source": [
68 | "# take a list of dataframes and their metadata and collapse to a\n",
69 | "# collection of unique time series (based on unique metadata)\n",
70 | "def collapse_to_unique(dfs_master, dfs_new):\n",
71 | " # iterate through metadata\n",
72 | " dfs_remaining = {}\n",
73 | " for md in dfs_new.keys():\n",
74 | " try:\n",
75 | " # find metadata in our master list\n",
76 | " # if this throws an error, simply add it to the list\n",
77 | " dfs_master[md] = dfs_master[md].append(dfs_new[md], ignore_index=True)\n",
78 | " except:\n",
79 | " dfs_remaining[md] = dfs_new[md]\n",
80 | " return dfs_master, dfs_remaining"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 4,
86 | "metadata": {},
87 | "outputs": [],
88 | "source": [
89 | "# create pickle file containing data\n",
90 | "def save_checkpoint(pds, file):\n",
91 | " if file[-4:] != \".pkl\":\n",
92 | " file = file + \".pkl\"\n",
93 | " f = open(file, \"wb\")\n",
94 | " pickle.dump(pds, f)\n",
95 | " f.close()\n",
96 | " return file\n",
97 | "\n",
98 | "# load pickle file containing data\n",
99 | "def load_checkpoint(file):\n",
100 | " f = open(file, \"rb\")\n",
101 | " pds = pickle.load(f)\n",
102 | " f.close()\n",
103 | " return pds\n",
104 | "# remove all temp pickle files generated during this program\n",
105 | "def combine_checkpoints(master_file):\n",
106 | " df = {}\n",
107 | " files = os.listdir()\n",
108 | " for file in files:\n",
109 | " if fnmatch.fnmatch(file, \"collapsed_*.pkl\"):\n",
110 | " try:\n",
111 | " f = open(file, \"rb\")\n",
112 | " dfs = pickle.load(f)\n",
113 | " f.close()\n",
114 | " df.update(dfs)\n",
115 | " except:\n",
116 | " continue\n",
117 | " os.system(\"rm \" + file)\n",
118 | " elif fnmatch.fnmatch(file, \"raw_*.pkl\"):\n",
119 | " os.system(\"rm \" + file)\n",
120 | " f = open(master_file + \".pkl\", \"wb\")\n",
121 | " pickle.dump(df, f)\n",
122 | " f.close()"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 5,
128 | "metadata": {},
129 | "outputs": [],
130 | "source": [
131 | "# load all files and convert to a list of pandas dataframes\n",
132 | "def convert_to_pandas(files, file_format, batch_size):\n",
133 | " checkpoints = []\n",
134 | " # # separate files into batches\n",
135 | " batches = [files[batch_size*i:batch_size*(i+1)] for i in range(int(len(files)/batch_size) + 1)]\n",
136 | " print(\"num_batches\", len(batches))\n",
137 | " i = 0\n",
138 | " for batch in batches:\n",
139 | " print(i)\n",
140 | " i += 1\n",
141 | " # get new portion of dataframes and add to master set\n",
142 | " pds_new = load_files(batch, file_format)\n",
143 | " cp = save_checkpoint(pds_new, \"raw_\" + str(i))\n",
144 | " checkpoints.append(cp)\n",
145 | " gc.collect()\n",
146 | "\n",
147 | " pds = []\n",
148 | " # iterate checkpoint by checkpoint and add data to unique collection\n",
149 | " # of time series\n",
150 | " collapsed_fs = []\n",
151 | " i = 0\n",
152 | " for cp in checkpoints:\n",
153 | " i += 1\n",
154 | " print(i)\n",
155 | " pds_new = load_checkpoint(cp)\n",
156 | " print(i)\n",
157 | " # load data in batches and combine dataframes\n",
158 | " for f in collapsed_fs:\n",
159 | " pds = load_checkpoint(f)\n",
160 | " pds, pds_new = collapse_to_unique(pds, pds_new)\n",
161 | " save_checkpoint(pds, f)\n",
162 | " gc.collect()\n",
163 | " if len(pds_new) > 0:\n",
164 | " f_new = save_checkpoint(pds_new, \"collapsed_\" + str(i)) \n",
165 | " print(\"Generated \", f_new)\n",
166 | " collapsed_fs.append(f_new) \n",
167 | " print(i)\n",
168 | " gc.collect()\n",
169 | " return pds"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 6,
175 | "metadata": {},
176 | "outputs": [],
177 | "source": [
178 | "# get all appropriately formatted files in a folder\n",
179 | "def retrieve_filenames(path, file_format):\n",
180 | " filenames = []\n",
181 | " for file in os.listdir(path):\n",
182 | " # check if this file has correct ending (regex)\n",
183 | " if fnmatch.fnmatch(file, \"*\" + file_format):\n",
184 | " f_name = path + file\n",
185 | " filenames.append(f_name)\n",
186 | " return filenames\n",
187 | "\n",
188 | "# get main input arguments and return formatted data\n",
189 | "def read_input(data_folder, metric, file_format, batch_size):\n",
190 | " # metric-specific data folder\n",
191 | " folder = data_folder + metric + \"/\"\n",
192 | " # get all files in folder\n",
193 | " files = os.listdir(folder)\n",
194 | "\n",
195 | " # automatically detect metric type\n",
196 | " if \"quantile\" in files:\n",
197 | " metric_type = \"summary\"\n",
198 | " label = \"quantile\"\n",
199 | " filenames = retrieve_filenames(folder + \"quantile/\", file_format)\n",
200 | "# filenames_count = retrieve_filenames(folder + \"count/\", file_format)\n",
201 | "# filenames_sum = retrieve_filenames(folder + \"sum/\", file_format)\n",
202 | " elif \"bucket\" in files:\n",
203 | " metric_type = \"histogram\"\n",
204 | " label = \"le\"\n",
205 | " filenames = retrieve_filenames(folder + \"bucket/\", file_format)\n",
206 | "# filenames_count = retrieve_filenames(folder + \"count/\", file_format)\n",
207 | "# filenames_sum = retrieve_filenames(folder + \"sum/\", file_format)\n",
208 | " else:\n",
209 | " metric_type = \"counter/gauge\"\n",
210 | " label = \"\"\n",
211 | " filenames = retrieve_filenames(folder, file_format)\n",
212 | " \n",
213 | " pd_frames = convert_to_pandas(filenames, file_format, batch_size)\n",
214 | "\n",
215 | " return pd_frames"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 7,
221 | "metadata": {},
222 | "outputs": [],
223 | "source": [
224 | "def main():\n",
225 | " print(\"Formatting Data\")\n",
226 | " pd_frames = read_input(input_dir, metric, fformat, batch_size)\n",
227 | " print(\"Conversion successful\")\n",
228 | "\n",
229 | " master_file = output_dir + metric\n",
230 | "\n",
231 | " combine_checkpoints(master_file)\n",
232 | "\n",
233 | " print(\"Saved data:\", master_file)"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": null,
239 | "metadata": {},
240 | "outputs": [
241 | {
242 | "name": "stdout",
243 | "output_type": "stream",
244 | "text": [
245 | "Formatting Data\n",
246 | "num_batches 11\n",
247 | "0\n"
248 | ]
249 | }
250 | ],
251 | "source": [
252 | "if __name__ == '__main__':\n",
253 | "\n",
254 | " # input parameters\n",
255 | " metric = \"http_request_duration_microseconds\"\n",
256 | " fformat='.json.bz2'\n",
257 | " input_dir = \"data/\"\n",
258 | " output_dir = \"\"\n",
259 | " batch_size= 20\n",
260 | "\n",
261 | " main()"
262 | ]
263 | }
264 | ],
265 | "metadata": {
266 | "kernelspec": {
267 | "display_name": "Python 3",
268 | "language": "python",
269 | "name": "python3"
270 | },
271 | "language_info": {
272 | "codemirror_mode": {
273 | "name": "ipython",
274 | "version": 3
275 | },
276 | "file_extension": ".py",
277 | "mimetype": "text/x-python",
278 | "name": "python",
279 | "nbconvert_exporter": "python",
280 | "pygments_lexer": "ipython3",
281 | "version": "3.6.5"
282 | }
283 | },
284 | "nbformat": 4,
285 | "nbformat_minor": 2
286 | }
287 |
--------------------------------------------------------------------------------
/notebooks/.ipynb_checkpoints/Prophet Model Forecasting-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "Requirement already satisfied: fbprophet in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (0.3.post2)\n",
13 | "Requirement already satisfied: Cython>=0.22 in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (from fbprophet) (0.28.2)\n",
14 | "Requirement already satisfied: pystan>=2.14 in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (from fbprophet) (2.17.1.0)\n",
15 | "Requirement already satisfied: numpy>=1.10.0 in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (from fbprophet) (1.14.3)\n",
16 | "Requirement already satisfied: pandas>=0.20.1 in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (from fbprophet) (0.23.0)\n",
17 | "Requirement already satisfied: matplotlib>=2.0.0 in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (from fbprophet) (2.2.2)\n",
18 | "Requirement already satisfied: python-dateutil>=2.5.0 in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (from pandas>=0.20.1->fbprophet) (2.7.3)\n",
19 | "Requirement already satisfied: pytz>=2011k in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (from pandas>=0.20.1->fbprophet) (2018.4)\n",
20 | "Requirement already satisfied: cycler>=0.10 in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (from matplotlib>=2.0.0->fbprophet) (0.10.0)\n",
21 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (from matplotlib>=2.0.0->fbprophet) (2.2.0)\n",
22 | "Requirement already satisfied: six>=1.10 in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (from matplotlib>=2.0.0->fbprophet) (1.11.0)\n",
23 | "Requirement already satisfied: kiwisolver>=1.0.1 in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (from matplotlib>=2.0.0->fbprophet) (1.0.1)\n",
24 | "Requirement already satisfied: setuptools in /home/nfrumkin/anaconda3/lib/python3.6/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.0.0->fbprophet) (39.1.0)\n",
25 | "\u001b[33mYou are using pip version 10.0.1, however version 18.0 is available.\n",
26 | "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n"
27 | ]
28 | }
29 | ],
30 | "source": [
31 | "!pip install fbprophet\n",
32 | "from fbprophet import Prophet\n",
33 | "import pandas as pd\n",
34 | "import numpy as np\n",
35 | "import matplotlib.pylab as plt\n",
36 | "import datetime as dt\n",
37 | "import pickle"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 2,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "class ProphetForecast:\n",
47 | " def __init__(self, train, test):\n",
48 | " self.train = train\n",
49 | " self.test = test\n",
50 | "\n",
51 | " def fit_model(self, n_predict):\n",
52 | " m = Prophet(daily_seasonality=False, weekly_seasonality=False, yearly_seasonality=False)\n",
53 | " m.fit(self.train)\n",
54 | " future = m.make_future_dataframe(periods= len(self.test),freq= '1MIN')\n",
55 | " self.forecast = m.predict(future)\n",
56 | "\n",
57 | " return self.forecast\n",
58 | "\n",
59 | " def graph(self):\n",
60 | " fig = plt.figure(figsize=(40,10))\n",
61 | " plt.plot(np.array(self.train[\"ds\"]), np.array(self.train[\"y\"]),'b', label=\"train\", linewidth=3)\n",
62 | " plt.plot(np.array(self.test[\"ds\"]), np.array(self.test[\"y\"]), 'g', label=\"test\", linewidth=3)\n",
63 | "\n",
64 | " forecast_ds = np.array(self.forecast[\"ds\"])\n",
65 | " plt.plot(forecast_ds, np.array(self.forecast[\"yhat\"]), 'o', label=\"yhat\", linewidth=3)\n",
66 | " plt.plot(forecast_ds, np.array(self.forecast[\"yhat_upper\"]), 'y', label=\"yhat_upper\", linewidth=3)\n",
67 | " plt.plot(forecast_ds, np.array(self.forecast[\"yhat_lower\"]), 'y', label=\"yhat_lower\", linewidth=3)\n",
68 | " plt.xlabel(\"Timestamp\")\n",
69 | " plt.ylabel(\"Value\")\n",
70 | " plt.legend(loc=1)\n",
71 | " plt.title(\"Prophet Model Forecast\")\n",
72 | " plt.show()"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 3,
78 | "metadata": {},
79 | "outputs": [],
80 | "source": [
81 | "def calc_delta(vals):\n",
82 | " diff = vals - np.roll(vals, 1)\n",
83 | " diff[0] = 0\n",
84 | " return diff\n",
85 | "\n",
86 | "def monotonically_inc(vals):\n",
87 | " # check corner case\n",
88 | " if len(vals) == 1:\n",
89 | " return True\n",
90 | " diff = calc_delta(vals)\n",
91 | " diff[np.where(vals == 0)] = 0\n",
92 | "\n",
93 | " if ((diff < 0).sum() == 0):\n",
94 | " return True\n",
95 | " else:\n",
96 | " return False"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 4,
102 | "metadata": {},
103 | "outputs": [],
104 | "source": [
105 | "metric_name = \"http_request_duration_microseconds_quantile\"\n",
106 | "pkl_file = open(\"data/\" + metric_name + \"_dataframes.pkl\", \"rb\")\n",
107 | "dfs = pickle.load(pkl_file)\n",
108 | "pkl_file.close()\n",
109 | "key_vals = list(dfs.keys())"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {},
116 | "outputs": [
117 | {
118 | "name": "stdout",
119 | "output_type": "stream",
120 | "text": [
121 | "{'__name__': 'http_request_duration_microseconds', 'beta_kubernetes_io_arch': 'amd64', 'beta_kubernetes_io_instance_type': 'm4.xlarge', 'beta_kubernetes_io_os': 'linux', 'failure_domain_beta_kubernetes_io_region': 'us-east-2', 'failure_domain_beta_kubernetes_io_zone': 'us-east-2a', 'handler': 'prometheus', 'hostname': 'free-stg-node-compute-e0756', 'instance': 'ip-172-31-76-144.us-east-2.compute.internal', 'job': 'kubernetes-nodes-exporter', 'kubernetes_io_hostname': 'ip-172-31-76-144.us-east-2.compute.internal', 'logging_infra_fluentd': 'true', 'node_role_kubernetes_io_compute': 'true', 'quantile': '0.99', 'region': 'us-east-2', 'type': 'compute'}\n"
122 | ]
123 | }
124 | ],
125 | "source": [
126 | "selected = [728,738]\n",
127 | "for ind in selected:\n",
128 | " key = key_vals[ind]\n",
129 | " df = dfs[key]\n",
130 | " df = df.sort_values(by=['timestamps'])\n",
131 | " print(key)\n",
132 | " df[\"values\"] = df[\"values\"].apply(pd.to_numeric)\n",
133 | " vals = np.array(df[\"values\"].tolist())\n",
134 | "\n",
135 | " df[\"ds\"] = df[\"timestamps\"]\n",
136 | " df[\"y\"] = df[\"values\"]\n",
137 | " # check if metric is a counter, if so, run AD on difference\n",
138 | " if monotonically_inc(vals):\n",
139 | " print(\"monotonically_inc\")\n",
140 | " vals = calc_delta(vals)\n",
141 | " df[\"values\"] = vals.tolist()\n",
142 | "\n",
143 | " train = df[0:int(0.7*len(vals))]\n",
144 | " test = df[int(0.7*len(vals)):]\n",
145 | "\n",
146 | " pf = ProphetForecast(train, test)\n",
147 | " forecast = pf.fit_model(len(test))\n",
148 | "\n",
149 | " pf.graph()"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {},
156 | "outputs": [],
157 | "source": []
158 | }
159 | ],
160 | "metadata": {
161 | "kernelspec": {
162 | "display_name": "Python 3",
163 | "language": "python",
164 | "name": "python3"
165 | },
166 | "language_info": {
167 | "codemirror_mode": {
168 | "name": "ipython",
169 | "version": 3
170 | },
171 | "file_extension": ".py",
172 | "mimetype": "text/x-python",
173 | "name": "python",
174 | "nbconvert_exporter": "python",
175 | "pygments_lexer": "ipython3",
176 | "version": "3.6.5"
177 | }
178 | },
179 | "nbformat": 4,
180 | "nbformat_minor": 2
181 | }
182 |
--------------------------------------------------------------------------------
/notebooks/Parse Json to Pandas Dataframes.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Parse Json to Pandas Dataframes\n",
8 | "This script is used to convert json packets into a dictionary where the key is a unique metadata configuration and the value is a Pandas dataframe. The Pandas dataframe has a ds column and a y column corresponding to the timestamp and corresponding value in the time series. The dictionary is then stored in a Pickle file."
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import json\n",
18 | "import pandas as pd\n",
19 | "import fnmatch\n",
20 | "import os\n",
21 | "import bz2\n",
22 | "import pickle\n",
23 | "import gc"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 2,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "# read files in list and convert to pandas dataframes\n",
33 | "def load_files(files, file_format):\n",
34 | " dfs = {}\n",
35 | " for file in files:\n",
36 | " # check file format and read appropriately\n",
37 | " if file_format == \".json\":\n",
38 | " f = open(file, 'rb')\n",
39 | " else:\n",
40 | " f = bz2.BZ2File(file, 'rb')\n",
41 | " jsons = json.load(f)\n",
42 | " f.close()\n",
43 | "\n",
44 | " # iterate through packets in file\n",
45 | " for pkt in jsons:\n",
46 | " # create a new dataframe with packet timestamp and values\n",
47 | " df = pd.DataFrame.from_dict(pkt[\"values\"])\n",
48 | " df = df.rename( columns={0:\"ds\", 1:\"y\"})\n",
49 | " df[\"ds\"] = pd.to_datetime(df[\"ds\"], unit='s')\n",
50 | " df = df.sort_values(by=[\"ds\"])\n",
51 | " df.y = pd.to_numeric(df['y'], errors='coerce')\n",
52 | " df = df.dropna()\n",
53 | " md = str(pkt[\"metric\"])\n",
54 | " # append generated dataframe and metadata to collection\n",
55 | " try:\n",
56 | " dfs[md] = dfs[md].append(df, ignore_index=True)\n",
57 | " except:\n",
58 | " dfs[md] = df\n",
59 | " return dfs"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 3,
65 | "metadata": {},
66 | "outputs": [],
67 | "source": [
68 | "# take a list of dataframes and their metadata and collapse to a\n",
69 | "# collection of unique time series (based on unique metadata)\n",
70 | "def collapse_to_unique(dfs_master, dfs_new):\n",
71 | " # iterate through metadata\n",
72 | " dfs_remaining = {}\n",
73 | " for md in dfs_new.keys():\n",
74 | " try:\n",
75 | " # find metadata in our master list\n",
76 | " # if this throws an error, simply add it to the list\n",
77 | " dfs_master[md] = dfs_master[md].append(dfs_new[md], ignore_index=True)\n",
78 | " except:\n",
79 | " dfs_remaining[md] = dfs_new[md]\n",
80 | " return dfs_master, dfs_remaining"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 4,
86 | "metadata": {},
87 | "outputs": [],
88 | "source": [
89 | "# create pickle file containing data\n",
90 | "def save_checkpoint(pds, file):\n",
91 | " if file[-4:] != \".pkl\":\n",
92 | " file = file + \".pkl\"\n",
93 | " f = open(file, \"wb\")\n",
94 | " pickle.dump(pds, f)\n",
95 | " f.close()\n",
96 | " return file\n",
97 | "\n",
98 | "# load pickle file containing data\n",
99 | "def load_checkpoint(file):\n",
100 | " f = open(file, \"rb\")\n",
101 | " pds = pickle.load(f)\n",
102 | " f.close()\n",
103 | " return pds\n",
104 | "# remove all temp pickle files generated during this program\n",
105 | "def combine_checkpoints(master_file):\n",
106 | " df = {}\n",
107 | " files = os.listdir()\n",
108 | " for file in files:\n",
109 | " if fnmatch.fnmatch(file, \"collapsed_*.pkl\"):\n",
110 | " try:\n",
111 | " f = open(file, \"rb\")\n",
112 | " dfs = pickle.load(f)\n",
113 | " f.close()\n",
114 | " df.update(dfs)\n",
115 | " except:\n",
116 | " continue\n",
117 | " os.system(\"rm \" + file)\n",
118 | " elif fnmatch.fnmatch(file, \"raw_*.pkl\"):\n",
119 | " os.system(\"rm \" + file)\n",
120 | " f = open(master_file + \".pkl\", \"wb\")\n",
121 | " pickle.dump(df, f)\n",
122 | " f.close()"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 5,
128 | "metadata": {},
129 | "outputs": [],
130 | "source": [
131 | "# load all files and convert to a list of pandas dataframes\n",
132 | "def convert_to_pandas(files, file_format, batch_size):\n",
133 | " checkpoints = []\n",
134 | " # # separate files into batches\n",
135 | " batches = [files[batch_size*i:batch_size*(i+1)] for i in range(int(len(files)/batch_size) + 1)]\n",
136 | " print(\"num_batches\", len(batches))\n",
137 | " i = 0\n",
138 | " for batch in batches:\n",
139 | " print(i)\n",
140 | " i += 1\n",
141 | " # get new portion of dataframes and add to master set\n",
142 | " pds_new = load_files(batch, file_format)\n",
143 | " cp = save_checkpoint(pds_new, \"raw_\" + str(i))\n",
144 | " checkpoints.append(cp)\n",
145 | " gc.collect()\n",
146 | "\n",
147 | " pds = []\n",
148 | " # iterate checkpoint by checkpoint and add data to unique collection\n",
149 | " # of time series\n",
150 | " collapsed_fs = []\n",
151 | " i = 0\n",
152 | " for cp in checkpoints:\n",
153 | " i += 1\n",
154 | " print(i)\n",
155 | " pds_new = load_checkpoint(cp)\n",
156 | " print(i)\n",
157 | " # load data in batches and combine dataframes\n",
158 | " for f in collapsed_fs:\n",
159 | " pds = load_checkpoint(f)\n",
160 | " pds, pds_new = collapse_to_unique(pds, pds_new)\n",
161 | " save_checkpoint(pds, f)\n",
162 | " gc.collect()\n",
163 | " if len(pds_new) > 0:\n",
164 | " f_new = save_checkpoint(pds_new, \"collapsed_\" + str(i)) \n",
165 | " print(\"Generated \", f_new)\n",
166 | " collapsed_fs.append(f_new) \n",
167 | " print(i)\n",
168 | " gc.collect()\n",
169 | " return pds"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 6,
175 | "metadata": {},
176 | "outputs": [],
177 | "source": [
178 | "# get all appropriately formatted files in a folder\n",
179 | "def retrieve_filenames(path, file_format):\n",
180 | " filenames = []\n",
181 | " for file in os.listdir(path):\n",
182 | " # check if this file has correct ending (regex)\n",
183 | " if fnmatch.fnmatch(file, \"*\" + file_format):\n",
184 | " f_name = path + file\n",
185 | " filenames.append(f_name)\n",
186 | " return filenames\n",
187 | "\n",
188 | "# get main input arguments and return formatted data\n",
189 | "def read_input(data_folder, metric, file_format, batch_size):\n",
190 | " # metric-specific data folder\n",
191 | " folder = data_folder + metric + \"/\"\n",
192 | " # get all files in folder\n",
193 | " files = os.listdir(folder)\n",
194 | "\n",
195 | " # automatically detect metric type\n",
196 | " if \"quantile\" in files:\n",
197 | " metric_type = \"summary\"\n",
198 | " label = \"quantile\"\n",
199 | " filenames = retrieve_filenames(folder + \"quantile/\", file_format)\n",
200 | "# filenames_count = retrieve_filenames(folder + \"count/\", file_format)\n",
201 | "# filenames_sum = retrieve_filenames(folder + \"sum/\", file_format)\n",
202 | " elif \"bucket\" in files:\n",
203 | " metric_type = \"histogram\"\n",
204 | " label = \"le\"\n",
205 | " filenames = retrieve_filenames(folder + \"bucket/\", file_format)\n",
206 | "# filenames_count = retrieve_filenames(folder + \"count/\", file_format)\n",
207 | "# filenames_sum = retrieve_filenames(folder + \"sum/\", file_format)\n",
208 | " else:\n",
209 | " metric_type = \"counter/gauge\"\n",
210 | " label = \"\"\n",
211 | " filenames = retrieve_filenames(folder, file_format)\n",
212 | " \n",
213 | " pd_frames = convert_to_pandas(filenames, file_format, batch_size)\n",
214 | "\n",
215 | " return pd_frames"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 7,
221 | "metadata": {},
222 | "outputs": [],
223 | "source": [
224 | "def main():\n",
225 | " print(\"Formatting Data\")\n",
226 | " pd_frames = read_input(input_dir, metric, fformat, batch_size)\n",
227 | " print(\"Conversion successful\")\n",
228 | "\n",
229 | " master_file = output_dir + metric\n",
230 | "\n",
231 | " combine_checkpoints(master_file)\n",
232 | "\n",
233 | " print(\"Saved data:\", master_file)"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 8,
239 | "metadata": {},
240 | "outputs": [
241 | {
242 | "name": "stdout",
243 | "output_type": "stream",
244 | "text": [
245 | "Formatting Data\n",
246 | "num_batches 11\n",
247 | "0\n",
248 | "1\n",
249 | "2\n",
250 | "3\n",
251 | "4\n",
252 | "5\n",
253 | "6\n",
254 | "7\n",
255 | "8\n",
256 | "9\n",
257 | "10\n",
258 | "1\n",
259 | "1\n",
260 | "Generated collapsed_1.pkl\n",
261 | "1\n",
262 | "2\n",
263 | "2\n",
264 | "Generated collapsed_2.pkl\n",
265 | "2\n",
266 | "3\n",
267 | "3\n",
268 | "Generated collapsed_3.pkl\n",
269 | "3\n",
270 | "4\n",
271 | "4\n",
272 | "Generated collapsed_4.pkl\n",
273 | "4\n",
274 | "5\n",
275 | "5\n",
276 | "Generated collapsed_5.pkl\n",
277 | "5\n",
278 | "6\n",
279 | "6\n",
280 | "Generated collapsed_6.pkl\n",
281 | "6\n",
282 | "7\n",
283 | "7\n",
284 | "Generated collapsed_7.pkl\n",
285 | "7\n",
286 | "8\n",
287 | "8\n",
288 | "Generated collapsed_8.pkl\n",
289 | "8\n",
290 | "9\n",
291 | "9\n",
292 | "Generated collapsed_9.pkl\n",
293 | "9\n",
294 | "10\n",
295 | "10\n",
296 | "Generated collapsed_10.pkl\n",
297 | "10\n",
298 | "11\n",
299 | "11\n",
300 | "11\n",
301 | "Conversion successful\n",
302 | "Saved data: http_request_duration_microseconds\n"
303 | ]
304 | }
305 | ],
306 | "source": [
307 | "if __name__ == '__main__':\n",
308 | "\n",
309 | " # input parameters\n",
310 | " metric = \"http_request_duration_microseconds\"\n",
311 | " fformat='.json.bz2'\n",
312 | " input_dir = \"data/\"\n",
313 | " output_dir = \"\"\n",
314 | " batch_size= 20\n",
315 | "\n",
316 | " main()"
317 | ]
318 | }
319 | ],
320 | "metadata": {
321 | "kernelspec": {
322 | "display_name": "Python 3",
323 | "language": "python",
324 | "name": "python3"
325 | },
326 | "language_info": {
327 | "codemirror_mode": {
328 | "name": "ipython",
329 | "version": 3
330 | },
331 | "file_extension": ".py",
332 | "mimetype": "text/x-python",
333 | "name": "python",
334 | "nbconvert_exporter": "python",
335 | "pygments_lexer": "ipython3",
336 | "version": "3.6.5"
337 | }
338 | },
339 | "nbformat": 4,
340 | "nbformat_minor": 2
341 | }
342 |
--------------------------------------------------------------------------------
/notebooks/imgs/arima.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/arima.png
--------------------------------------------------------------------------------
/notebooks/imgs/detect_anomaly_accumulator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/detect_anomaly_accumulator.png
--------------------------------------------------------------------------------
/notebooks/imgs/detect_anomaly_combined.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/detect_anomaly_combined.png
--------------------------------------------------------------------------------
/notebooks/imgs/detect_anomaly_tail_prob.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/detect_anomaly_tail_prob.png
--------------------------------------------------------------------------------
/notebooks/imgs/example_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/example_ts.png
--------------------------------------------------------------------------------
/notebooks/imgs/exp_smoothing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/exp_smoothing.png
--------------------------------------------------------------------------------
/notebooks/imgs/fourier_extrapolation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/fourier_extrapolation.png
--------------------------------------------------------------------------------
/notebooks/imgs/fourier_extrapolation_behind.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/fourier_extrapolation_behind.png
--------------------------------------------------------------------------------
/notebooks/imgs/imgs/detect_anomaly_accumulator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/imgs/detect_anomaly_accumulator.png
--------------------------------------------------------------------------------
/notebooks/imgs/imgs/detect_anomaly_combined.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/imgs/detect_anomaly_combined.png
--------------------------------------------------------------------------------
/notebooks/imgs/imgs/detect_anomaly_tail_prob.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/imgs/detect_anomaly_tail_prob.png
--------------------------------------------------------------------------------
/notebooks/imgs/imgs/example_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/imgs/example_ts.png
--------------------------------------------------------------------------------
/notebooks/imgs/imgs/fourier_extrapolation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/imgs/fourier_extrapolation.png
--------------------------------------------------------------------------------
/notebooks/imgs/imgs/fourier_extrapolation_behind.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/imgs/fourier_extrapolation_behind.png
--------------------------------------------------------------------------------
/notebooks/imgs/imgs/partitioned_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/imgs/partitioned_ts.png
--------------------------------------------------------------------------------
/notebooks/imgs/kubelet_docker_instance_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/kubelet_docker_instance_label.png
--------------------------------------------------------------------------------
/notebooks/imgs/kubelet_docker_op_type_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/kubelet_docker_op_type_label.png
--------------------------------------------------------------------------------
/notebooks/imgs/partitioned_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/partitioned_ts.png
--------------------------------------------------------------------------------
/notebooks/imgs/prophet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/prophet.png
--------------------------------------------------------------------------------
/notebooks/imgs/t-sne_embedding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/notebooks/imgs/t-sne_embedding.png
--------------------------------------------------------------------------------
/presentations/devconf_presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/presentations/devconf_presentation.pdf
--------------------------------------------------------------------------------
/presentations/final_presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/presentations/final_presentation.pdf
--------------------------------------------------------------------------------
/presentations/lightning_talk.ppdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/presentations/lightning_talk.ppdf
--------------------------------------------------------------------------------
/presentations/mid-summer_presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/presentations/mid-summer_presentation.pdf
--------------------------------------------------------------------------------
/presentations/pipeline_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AICoE/prometheus-data-science/72cc52b808274fc5b7b6623ef42e5abec7255667/presentations/pipeline_arch.png
--------------------------------------------------------------------------------
/prophet_train.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | from fbprophet import Prophet
3 | import pandas as pd
4 | import numpy as np
5 | import matplotlib.pylab as plt
6 | import datetime as dt
7 | import argparse
8 |
9 | class ProphetForecast:
10 | def __init__(self, train, test):
11 | self.train = train
12 | self.test = test
13 |
14 | def fit_model(self, n_predict):
15 | m = Prophet(daily_seasonality=False, weekly_seasonality=False, yearly_seasonality=False)
16 | m.fit(self.train)
17 | future = m.make_future_dataframe(periods= len(self.test),freq= '1MIN')
18 | self.forecast = m.predict(future)
19 |
20 | return self.forecast
21 |
22 | def graph(self):
23 | fig = plt.figure(figsize=(40,10))
24 | # plt.plot(np.array(self.train["ds"]), np.array(self.train["y"]),'b', label="train", linewidth=3)
25 | # plt.plot(np.array(self.test["ds"]), np.array(self.test["y"]), 'g', label="test", linewidth=3)
26 | ds_forecast = np.array(self.forecast["ds"])
27 | forecast = np.array(self.forecast["yhat"])
28 |
29 | forecast_lower = np.array(self.forecast["yhat_lower"])
30 | forecast_upper = np.array(self.forecast["yhat_upper"])
31 |
32 | ds_forecast = ds_forecast[len(self.train["y"]):]
33 | forecast = forecast[len(self.train["y"]):]
34 | forecast_upper = forecast_upper[len(self.train["y"]):]
35 | forecast_lower = forecast_lower[len(self.train["y"]):]
36 | plt.plot(self.train["ds"], self.train["y"], 'b', label = 'train', linewidth = 3)
37 | plt.plot(self.test["ds"], self.test["y"], 'g', label = 'test', linewidth = 3)
38 | plt.plot(ds_forecast,forecast, 'y', label = 'yhat')
39 | forecast_ds = np.array(self.forecast["ds"])
40 | # plt.plot(forecast_ds, np.array(self.forecast["yhat"]), 'o', label="yhat", linewidth=3)
41 | plt.plot(ds_forecast, forecast_upper, 'y', label="yhat_upper", linewidth=3)
42 | plt.plot(ds_forecast, forecast_lower, 'y', label="yhat_lower", linewidth=3)
43 | plt.xlabel("Timestamp")
44 | plt.ylabel("Value")
45 | plt.legend(loc=1)
46 | plt.title("Prophet Model Forecast")
47 |
48 | def calc_delta(vals):
49 | diff = vals - np.roll(vals, 1)
50 | diff[0] = 0
51 | return diff
52 |
53 | def monotonically_inc(vals):
54 | # check corner case
55 | if len(vals) == 1:
56 | return True
57 | diff = calc_delta(vals)
58 | diff[np.where(vals == 0)] = 0
59 |
60 | if ((diff < 0).sum() == 0):
61 | return True
62 | else:
63 | return False
64 |
65 | if __name__ == "__main__":
66 |
67 | parser = argparse.ArgumentParser(description="run Prophet training on time series")
68 |
69 | parser.add_argument("--metric", type=str, help='metric name', required=True)
70 |
71 | parser.add_argument("--key", type=int, help='key number')
72 | args = parser.parse_args()
73 |
74 | metric_name = args.metric
75 | # pkl_file = open("../pkl_data/" + metric_name + "_dataframes.pkl", "rb")
76 | pkl_file = open("../data/real_data_test.pkl", "rb")
77 | dfs = pickle.load(pkl_file)
78 | pkl_file.close()
79 | key_vals = list(dfs.keys())
80 |
81 | selected = [args.key]
82 | for ind in selected:
83 | key = key_vals[ind]
84 | df = dfs[key]
85 | #df = dfs["{'__name__': 'kubelet_docker_operations_latency_microseconds', 'beta_kubernetes_io_arch': 'amd64', 'beta_kubernetes_io_os': 'linux', 'instance': 'cpt-0001.ocp.prod.upshift.eng.rdu2.redhat.com', 'job': 'kubernetes-nodes', 'kubernetes_io_hostname': 'cpt-0001.ocp.prod.upshift.eng.rdu2.redhat.com', 'operation_type': 'version', 'provider': 'rhos', 'quantile': '0.5', 'region': 'compute', 'size': 'small'}"]
86 | df["ds"] = df["timestamps"]
87 | df["y"] = df["values"]
88 | df = df.sort_values(by=['ds'])
89 | print(key)
90 | df["y"] = df["y"].apply(pd.to_numeric)
91 | vals = np.array(df["y"].tolist())
92 |
93 | df["ds"] = df["ds"]
94 | df["y"] = df["y"]
95 | # check if metric is a counter, if so, run AD on difference
96 | if monotonically_inc(vals):
97 | print("monotonically_inc")
98 | vals = calc_delta(vals)
99 | df["y"] = vals.tolist()
100 |
101 | train = df[0:int(0.7*len(vals))]
102 | test = df[int(0.7*len(vals)):]
103 |
104 | pf = ProphetForecast(train, test)
105 | forecast = pf.fit_model(len(test))
106 |
107 | f = open("../prophet_forecasts/prophet_model_" + metric_name + "_" + str(args.key) + ".pkl", "wb")
108 | pickle.dump(forecast,f)
109 | print(type(forecast))
110 | pickle.dump(train, f)
111 | pickle.dump(test,f)
112 | f.close()
113 |
114 | pf.graph()
115 | plt.savefig("../presentation/graphs/prophet_" + str(args.key) + "_" + args.metric + ".png", transparent=True)
116 |
117 |
118 |
119 |
--------------------------------------------------------------------------------
/run_compare_mdls.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env bash
2 |
3 | METRIC='http_request_duration_microseconds_quantile'
4 | KEY=60
5 | python prophet_train.py --metric $METRIC --key $KEY
6 | python fourier_train.py --metric $METRIC --key $KEY
7 | python compare_fourier_prophet.py --metric $METRIC --key $KEY
8 |
--------------------------------------------------------------------------------