├── .circleci
    └── config.yml
├── .gitignore
├── DATAFORMAT.md
├── Dockerfile
├── METRICS.md
├── Makefile
├── README.md
├── docker-compose.yaml
├── requirements.txt
├── setup.py
├── tests
    ├── helpers
    │   ├── __init__.py
    │   └── utils.py
    ├── test_integration_function.py
    ├── test_process_output.py
    └── test_usage_report.py
└── usage_report
    ├── __init__.py
    ├── annotations
        ├── annotations_fxhealth.json
        ├── annotations_hardware.json
        ├── annotations_webusage.json
        └── readme.md
    ├── resources
        └── experiments.json
    ├── usage_report.py
    └── utils
        ├── __init__.py
        ├── activeuser.py
        ├── avg_daily_usage.py
        ├── avg_intensity.py
        ├── helpers.py
        ├── localedistribution.py
        ├── newuser.py
        ├── osdistribution.py
        ├── pct_addon.py
        ├── pct_latest_version.py
        ├── process_output.py
        ├── s3_utils.py
        ├── top10addons.py
        └── trackingprotection.py


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | ####################
 2 | # CircleCI configuration reference:
 3 | #   https://circleci.com/docs/2.0/configuration-reference
 4 | ####################
 5 | 
 6 | version: 2
 7 | 
 8 | #####################################################
 9 | # Jobs: see https://circleci.com/docs/2.0/jobs-steps/
10 | #####################################################
11 | 
12 | jobs:
13 |   test:
14 |     docker:
15 |       - image: mozilla/cidockerbases:docker-latest
16 |     steps:
17 |       - checkout
18 |       - setup_remote_docker
19 |       - run:
20 |           name: Build image
21 |           command: make build
22 |       - run:
23 |           name: Test Code
24 |           command: make test
25 |       - run:
26 |           name: Lint
27 |           command: make lint
28 | 
29 | workflows:
30 |   version: 2
31 |   build-test-deploy:
32 |     jobs:
33 |       - test:
34 |           filters:
35 |             tags:
36 |               only: /.*/
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .tox
2 | .pytest_cache
3 | .coverage*
4 | Fx_Usage_Report.egg-info
5 | *.pyc
6 | 


--------------------------------------------------------------------------------
/DATAFORMAT.md:
--------------------------------------------------------------------------------
 1 | # Data Format 
 2 | 
 3 | This job will first process the metrics into pandas dataframes with the following fields: 
 4 | 
 5 | `submission_date_s3|country| metric1| metric2| etc.|`
 6 | 
 7 | or 
 8 | 
 9 | `submission_date_s3|country| metric| dimension| value|`
10 | 
11 | However, because Ensemble requires the data to be in a specific Ensemble JSON format, the data is kept in a different reshaped form: 
12 | 
13 | ```
14 | {
15 |   "Germany": [
16 |     {
17 |       "date": "2017-01-01",
18 |       "metrics": {
19 |         "YAU": 999,
20 |         "etc": "etc",
21 |         "locale": {
22 |           "DE": 0.99,
23 |           "etc": "etc"
24 |         }
25 |       }
26 |     },
27 |     {
28 |       "date": "etc",
29 |       "metrics": {
30 |         "etc": "etc"
31 |       }
32 |     }
33 |   ],
34 |   "United States": [
35 |     {
36 |       "date": "etc",
37 |       "metrics": {
38 |         "etc": "etc"
39 |       }
40 |     }
41 |   ]
42 | }
43 | ```
44 | 
45 | The job will use the processed pandas tables to update the Ensemble JSON kept in the S3 bucket. 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM openjdk:8
 2 | 
 3 | # add a non-privileged user for running the application
 4 | RUN groupadd --gid 10001 app && \
 5 |     useradd -g app --uid 10001 --shell /usr/sbin/nologin --create-home --home-dir /app app
 6 | 
 7 | WORKDIR /app
 8 | 
 9 | # Install python
10 | RUN apt-get update && \
11 |     apt-get -y --no-install-recommends install python2.7 python-pip python-setuptools
12 | 
13 | ENV PYTHONPATH $PYTHONPATH:/app/usage_report:/app/tests
14 | 
15 | COPY requirements.txt /app
16 | RUN pip install --upgrade pip
17 | RUN pip install -r requirements.txt
18 | 
19 | COPY . /app
20 | 
21 | USER app
22 | 


--------------------------------------------------------------------------------
/METRICS.md:
--------------------------------------------------------------------------------
 1 | # Metric Descriptions
 2 | 
 3 | #### User Activity
 4 | 
 5 | | Metric name / Code Ref | Description |
 6 | |------------------------------|-------------|
 7 | | Yearly Active User / `YAU` | The number of clients who used Firefox in the past 365 days. |
 8 | | Monthly Active Users / `MAU` | The number of clients who used Firefox in the past 28 days. |
 9 | | Daily Usage / `avg_daily_usage(hours)` | Average daily use of a typical client from the past 7 days. Calculated by getting the average daily use for each client from the last week (on days they used), and then averaging across all clients. |
10 | | Average Intensity / `avg_intensity` | Average daily intensity of use of a typical client from the past 7 days. Intensity of use is defined as the proportion of the time a client is interacting with the browser when the browser is open. Calculated by getting the average daily intensity for each client from the last week (on days they used), and then averaging across all clients. |
11 | | New Profile Rate / `pct_new_user` | Percentage of WAU (clients who used Firefox in the past 7 days) that are new clients (created profile that week). |
12 | | Latest Version / `pct_latest_version` | Percentage of WAU on the newest version (or newer) of Firefox (for that week). Note, Firefox updates are often released with different throttling rates (i.e. 10% of population in week 1, etc.). |
13 | 
14 | #### Usage Behavior
15 | 
16 | | Metric name / Code Ref | Description |
17 | |------------------------------|-------------|
18 | | Top Languages / `locale, locale, pct_on_locale` | Percentage of WAU on each language setting (locale). Top 5 per week only. |
19 | | Always On Tracking Protection / `pct_TP` | Percentage of WAU with Always On Tracking Protection enabled for default browsing. Note, this pref was not exposed to users until Firefox 57 (2017-11-14) and does not include Private Browsing Mode. | 
20 | | Has Add-on / `pct_addon` | Percentage of WAU with at least 1 user installed addon. | 
21 | | Top Add-ons / `top10addons, addon_name, pct_with_addon` | The top 10 most common user installed addons from the last 7 days. |
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: help clean test coverage release build
 2 | 
 3 | help:
 4 | 	@echo "  lint -  check style with flake8"
 5 | 	@echo "  test -  run tests quickly with the default Python"
 6 | 	@echo "  build - Builds the docker images for the docker-compose setup"
 7 | 	@echo "  clean - Stops and removes all docker containers"
 8 | 	@echo "  run -   Run a command"
 9 | 	@echo "  shell - Opens a Bash shell"
10 | 
11 | lint:
12 | 	docker-compose run app flake8 usage_report tests --max-line-length 100
13 | 
14 | test:
15 | 	docker-compose run app py.test
16 | 
17 | build:
18 | 	docker-compose build
19 | 
20 | clean: stop
21 | 	docker-compose rm -f
22 | 
23 | shell:
24 | 	docker-compose run app bash
25 | 
26 | run:
27 | 	docker-compose run app $(COMMAND)
28 | 
29 | stop:
30 | 	docker-compose down
31 | 	docker-compose stop
32 | 
33 | up:
34 | 	docker-compose up
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Firefox Public Data
 2 | 
 3 | The [Firefox Public Data](https://metrics.mozilla.com/protected/usage-report-demo/dashboard/user-activity) (FxPD) project is a public facing website which tracks various merics over time and helps the general public understand what kind of data is being tracked by Mozilla and how it is used. It is modeled after and evolved out of the [Firefox Hardware Report](https://hardware.metrics.mozilla.com/), which is now included as a part of FxPD. 
 4 | 
 5 | This repository contains the code used to pull and process the data for the **User Activity** and **Usage Behavior** subsections of the **Desktop** sections of the report. 
 6 | 
 7 | The website itself is generated by the [Ensemble](https://github.com/mozilla/ensemble) and [Ensemble Transposer](https://github.com/mozilla/ensemble-transposer) repos. 
 8 | 
 9 | # Data
10 | 
11 | The data is pulled from Firefox desktop [telemetry](https://wiki.mozilla.org/Telemetry), specifically the [main summary](https://docs.telemetry.mozilla.org/datasets/batch_view/main_summary/reference.html) view of the data. 
12 | 
13 | The data is on a weekly resolution (one datapoint per week), and includes the metrics below. The metrics are estimated from a 10% sample of the Release, Beta, ESR, and Other channels, and broken down by the top 10 countries and worldwide overall aggregate. The historical data is kept in an S3 bucket as a JSON file. 
14 | 
15 | This job (the repo) is designed to be run once a week and will produce the data for a single week. It will then update the historical data in the S3 bucket. 
16 | 
17 | For backfills, this job needs to be run for each week of the backfill. 
18 | 
19 | 
20 | #### Metrics
21 | 
22 | For the list of metrics, see [METRICS.md](METRICS.md). 
23 | 
24 | #### Data Structure
25 | 
26 | For a description of the structure of the data output, see [DATAFORMAT.md](DATAFORMAT.md).
27 | 
28 | # Developing
29 | 
30 | #### Run the Job
31 | 
32 | To initiate a test run of this job, you can clone this repo onto an ATMO cluster. First run
33 | 
34 | 	$ pip install py4j --upgrade
35 | 
36 | from your cluster console to get the latest version of `py4j`.
37 | 
38 | 
39 | Next, clone the repo, and from the repo's top-level directory, run:
40 | 
41 | 	$ python usage_report/usage_report.py --date [some date, i.e. 20180201] --no-output
42 | 
43 | which will aggregate usage statistics from the last 7 days by default. It is recommended when testing to specifiy the `--lag-days` flag to `1` for quicker iterations, i.e
44 | 
45 | 	$ python usage_report/usage_report.py --date 20180201 --lag-days 1 --no-output
46 | 
47 | *Note: there is currently no output to S3, so testing like this is not a problem. However when testing runs in this way, always make sure to include the flag* `--no-output`
48 | 
49 | #### Testing
50 | 
51 | Each metric has it's own set of unit tests. Code to extract a particular metric are found in `.py` files in `usage_report/utils/`, which are integrated in `usage_report/usage_report.py`.
52 | 
53 | To run these tests, first ensure you have Docker installed. First build the container using
54 | 
55 |     $ make build
56 | 
57 | then run the tests with
58 | 
59 | 	$ make test
60 | 
61 | finally,
62 | 
63 |     $ make lint
64 | 
65 | runs the linter.
66 | 


--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '2'
 2 | 
 3 | services:
 4 |   app:
 5 |     build:
 6 |       context: .
 7 |       dockerfile: Dockerfile
 8 |     restart: "no"
 9 |     command: "true"
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | arrow==0.10.0
 2 | boto3==1.9.199
 3 | click==6.7
 4 | click_datetime==0.2
 5 | flake8==3.7.8
 6 | numpy==1.13.3
 7 | pandas==0.24.2
 8 | pyspark==2.2.2
 9 | pytest==4.6.4
10 | scipy==1.0.0rc1
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from setuptools import setup, find_packages
 3 | 
 4 | test_deps = [
 5 |     'coverage',
 6 |     'pytest',
 7 |     'pytest-cov',
 8 |     'pytest-timeout',
 9 |     'moto',
10 |     'mock',
11 | ]
12 | 
13 | extras = {
14 |     'testing': test_deps,
15 | }
16 | 
17 | setup(
18 |     name='fx_usage_report',
19 |     version='0.1',
20 |     description='Python ETL job for the Firefox Usage Report',
21 |     author='Firefox Public Data Platform',
22 |     author_email='fx-public-data@mozilla.com',
23 |     url='https://github.com/mozilla/Fx_Usage_Report.git',
24 |     packages=find_packages(exclude=['tests']),
25 |     include_package_data=True,
26 |     install_requires=[
27 |         'arrow==0.10.0',
28 |         'click==6.7',
29 |         'click_datetime==0.2',
30 |         'numpy==1.13.3',
31 |         'pyspark==2.2.0.post0',
32 |         'scipy==1.0.0rc1',
33 |     ],
34 |     tests_require=test_deps,
35 |     extras_require=extras,
36 | )
37 | 


--------------------------------------------------------------------------------
/tests/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/Fx_Usage_Report/489ca258b14776c01f3021080b2dd686d239dea3/tests/helpers/__init__.py


--------------------------------------------------------------------------------
/tests/helpers/utils.py:
--------------------------------------------------------------------------------
 1 | from pyspark.sql import Row
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | def is_same(spark, df, expected, verbose=False):
 6 |     expected_df = spark.sparkContext \
 7 |         .parallelize(expected) \
 8 |         .map(lambda r: Row(**OrderedDict(sorted(r.items())))) \
 9 |         .toDF()
10 | 
11 |     cols = sorted(df.columns)
12 |     intersection = df.select(*cols).intersect(expected_df)
13 |     df_len, expected_len, actual_len = df.count(), expected_df.count(), intersection.count()
14 | 
15 |     if verbose:
16 |         print "\nInput Dataframe\n"
17 |         print df.select(*cols).collect()
18 |         print "\nExpected Dataframe\n"
19 |         print expected_df.collect()
20 | 
21 |     assert df_len == expected_len
22 |     assert actual_len == expected_len, "Missing {} Rows".format(expected_len - actual_len)
23 | 


--------------------------------------------------------------------------------
/tests/test_integration_function.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import pytest
  3 | from helpers.utils import is_same
  4 | from usage_report.usage_report import agg_usage, get_spark
  5 | from pyspark.sql import Row
  6 | 
  7 | #  Makes utils available
  8 | pytest.register_assert_rewrite('tests.helpers.utils')
  9 | 
 10 | 
 11 | @pytest.fixture
 12 | def spark():
 13 |     return get_spark()
 14 | 
 15 | 
 16 | @pytest.fixture
 17 | def main_summary_data_multiple():
 18 |     ''' data with multiple counties and days including the following cases:
 19 |           - multiple countries
 20 |             a)include countries that are not in country list
 21 |             b)include countries into country_list that are not in data
 22 |           - clients with only pings from outside date range
 23 |           - clients with some pings from outside date range
 24 |     '''
 25 |     a1 = [Row(addon_id=u'disableSHA1rollout', name=u'SHA-1 deprecation staged rollout',
 26 |               foreign_install=False, is_system=False),
 27 |           Row(addon_id=u'e10srollout@mozilla.org', name=u'Multi-process staged rollout',
 28 |               foreign_install=False, is_system=True)]
 29 | 
 30 |     return (
 31 |         (("20180201", 100, 20, "DE", "client1", "57.0.1", 17060,
 32 |           "Windows_NT", 10.0, a1, {0: 0, 1: 1}, 'en-US'),
 33 |          ("20180201", 100, 20, "DE", "client1", "57.0.1", 17060,
 34 |           "Windows_NT", 10.0, a1, {0: 0, 1: 1}, "en-US"),
 35 |          ("20180201", 100, 20, "DE", "client2", "58.0", 17563,
 36 |           "Darwin", 10.0, a1, None, "DE"),  # 17563 -> 20180201
 37 |          ("20180201", 100, 20, "MX", "client3", "58.0", 17563,
 38 |           "Darwin", 10.0, a1, None, "en-US"),
 39 |          ("20180201", 100, 20, "DE", "client4", "58.0", 17554,
 40 |           "Darwin", 10.0, a1, None, "en-US"),
 41 |          ("20180131", 100, 20, "DE", "client5", "58.0", 17363,
 42 |           "Darwin", 10.0, a1, None, "DE"),
 43 |          ("20180101", 100, 20, "DE", "client5", "57.0", 17364,
 44 |           "Darwin", 10.0, a1, None, "DE"),
 45 |          ("20180101", 100, 20, "DE", "client6", "57.0", 17364,
 46 |           "Darwin", 10.0, a1, None, "DE")),
 47 |         ["submission_date_s3", "subsession_length", "active_ticks",
 48 |          "country", "client_id", "app_version", "profile_creation_date",
 49 |          "os", "os_version", "active_addons", "histogram_parent_tracking_protection_enabled",
 50 |          "locale"]
 51 |     )
 52 | 
 53 | 
 54 | @pytest.fixture
 55 | def main_summary_data_null_value():
 56 |     ''' data with all/some of a given field are null, '', or zero
 57 |           - 'app_version' is all showing ''
 58 |           - 'profile_creation_date' has None
 59 |           - 'active_ticks' has zero
 60 |           - 'client2' has multiple fields missing
 61 |     '''
 62 |     a1 = [Row(addon_id=u'disableSHA1rollout', name=u'SHA-1 deprecation staged rollout',
 63 |               foreign_install=False, is_system=False),
 64 |           Row(addon_id=u'e10srollout@mozilla.org', name=u'Multi-process staged rollout',
 65 |               foreign_install=False, is_system=True)]
 66 | 
 67 |     return (
 68 |         (("20180201", 100, 20, "DE", "client1", "", 17060,
 69 |           "Windows_NT", 10.0, a1, {0: 0, 1: 1}, "en-US"),
 70 |          ("20180201", 100, 20, "DE", "client1", "", 17060,
 71 |           "Windows_NT", 10.0, a1, {0: 0, 1: 1}, "en-US"),
 72 |          ("20180201", 100, 0, "DE", "client2", "", None,
 73 |           "Darwin", 10.0, a1, None, "DE"),  # 17564 -> 20180201
 74 |          ("20180201", 100, 20, "DE", "client4", "", 17554,
 75 |           "Darwin", 10.0, a1, None, "en-US"),
 76 |          ("20180131", 100, 20, "DE", "client5", "", 17563,
 77 |           "Darwin", 10.0, a1, None, "DE")),
 78 |         ["submission_date_s3", "subsession_length", "active_ticks",
 79 |          "country", "client_id", "app_version", "profile_creation_date",
 80 |          "os", "os_version", "active_addons", "histogram_parent_tracking_protection_enabled",
 81 |          "locale"]
 82 |     )
 83 | 
 84 | 
 85 | def test_integration_multiple_countries_and_days_no_country_list(spark, main_summary_data_multiple):
 86 |     ''' tests without country list for data including the following cases:
 87 |           - multiple countries
 88 |             a)include countries that are not in country list
 89 |             b)include countries into country_list that are not in data
 90 |           - clients with only pings from outside date range
 91 |           - clients with some pings from outside date range
 92 |     '''
 93 |     main_summary = spark.createDataFrame(*main_summary_data_multiple)
 94 |     usage, locales, top10addon = agg_usage(main_summary, date='20180201',
 95 |                                            period=7, sample_factor=100.0 / 1,
 96 |                                            country_list=None)
 97 | 
 98 |     expected_usage = [
 99 |         {
100 |             "submission_date_s3": "20180201",
101 |             "country": "All",
102 |             "avg_daily_usage(hours)": 600.0 / 3600 / 5.0,
103 |             "avg_intensity": 1.0,
104 |             "pct_latest_version": 80.0,
105 |             "pct_TP": 20.0,
106 |             "MAU": 500,
107 |             "YAU": 600,
108 |             "pct_new_user": 40.0,
109 |             "pct_addon": 100.0
110 |         }
111 |     ]
112 | 
113 |     expected_locales = [
114 |         {
115 |             "country": "All",
116 |             "submission_date_s3": "20180201",
117 |             "locale": "en-US",
118 |             "pct_on_locale": 60.0
119 |         },
120 |         {
121 |             "country": "All",
122 |             "submission_date_s3": "20180201",
123 |             "locale": "DE",
124 |             "pct_on_locale": 40.0
125 |         }
126 |     ]
127 | 
128 |     expected_addons = [
129 |         {
130 |             "country": "All",
131 |             "submission_date_s3": "20180201",
132 |             "addon_id": u'disableSHA1rollout',
133 |             "addon_name": u'SHA-1 deprecation staged rollout',
134 |             "pct_with_addon": 100.0
135 |         }
136 |     ]
137 | 
138 |     is_same(spark, usage, expected_usage)
139 |     is_same(spark, locales, expected_locales)
140 |     is_same(spark, top10addon, expected_addons)
141 | 
142 | 
143 | def test_integration_multiple_countries_and_days_country_list(spark, main_summary_data_multiple):
144 |     ''' tests with country list for data including the following cases:
145 |           - multiple countries
146 |             a)include countries that are not in country list
147 |             b)include countries into country_list that are not in data
148 |           - clients with only pings from outside date range
149 |           - clients with some pings from outside date range
150 |     '''
151 |     main_summary = spark.createDataFrame(*main_summary_data_multiple)
152 |     usage, locales, top10addon = agg_usage(main_summary, date='20180201',
153 |                                            period=7, sample_factor=100.0 / 1,
154 |                                            country_list=['DE', 'CN'])
155 | 
156 |     expected_usage = [
157 |         {
158 |             "submission_date_s3": "20180201",
159 |             "country": "All",
160 |             "avg_daily_usage(hours)": 600.0 / 3600 / 5.0,
161 |             "avg_intensity": 1.0,
162 |             "pct_latest_version": 80.0,
163 |             "pct_TP": 20.0,
164 |             "MAU": 500,
165 |             "YAU": 600,
166 |             "pct_new_user": 40.0,
167 |             "pct_addon": 100.0
168 |         },
169 |         {
170 |             "submission_date_s3": "20180201",
171 |             "country": "DE",
172 |             "avg_daily_usage(hours)": 500.0 / 3600 / 4.0,
173 |             "avg_intensity": 1.0,
174 |             "pct_latest_version": 75.0,
175 |             "pct_TP": 25.0,
176 |             "MAU": 400,
177 |             "YAU": 500,
178 |             "pct_new_user": 25.0,
179 |             "pct_addon": 100.0
180 |         },
181 | 
182 |     ]
183 | 
184 |     expected_locales = [
185 |         {
186 |             "country": "All",
187 |             "submission_date_s3": "20180201",
188 |             "locale": "en-US",
189 |             "pct_on_locale": 60.0
190 |         },
191 |         {
192 |             "country": "All",
193 |             "submission_date_s3": "20180201",
194 |             "locale": "DE",
195 |             "pct_on_locale": 40.0
196 |         },
197 |         {
198 |             "country": "DE",
199 |             "submission_date_s3": "20180201",
200 |             "locale": "en-US",
201 |             "pct_on_locale": 50.0
202 |         },
203 |         {
204 |             "country": "DE",
205 |             "submission_date_s3": "20180201",
206 |             "locale": "DE",
207 |             "pct_on_locale": 50.0
208 |         }
209 |     ]
210 | 
211 |     expected_addons = [
212 |         {
213 |             "country": "All",
214 |             "submission_date_s3": "20180201",
215 |             "addon_id": u'disableSHA1rollout',
216 |             "addon_name": u'SHA-1 deprecation staged rollout',
217 |             "pct_with_addon": 100.0
218 |         },
219 |         {
220 |             "country": "DE",
221 |             "submission_date_s3": "20180201",
222 |             "addon_id": u'disableSHA1rollout',
223 |             "addon_name": u'SHA-1 deprecation staged rollout',
224 |             "pct_with_addon": 100.0
225 |         }
226 |     ]
227 | 
228 |     is_same(spark, usage, expected_usage)
229 |     is_same(spark, locales, expected_locales)
230 |     is_same(spark, top10addon, expected_addons)
231 | 
232 | 
233 | def test_integration_missing_fields_no_country_list(spark, main_summary_data_null_value):
234 |     ''' tests without country list for data with all/some of a given field are null, '', or zero
235 |     '''
236 |     main_summary = spark.createDataFrame(*main_summary_data_null_value)
237 |     usage, locales, top10addon = agg_usage(main_summary, date='20180201',
238 |                                            period=7, sample_factor=100.0 / 1,
239 |                                            country_list=None)
240 | 
241 |     expected_usage = [
242 |         {
243 |             "submission_date_s3": "20180201",
244 |             "country": "All",
245 |             "avg_daily_usage(hours)": 500.0 / 3600 / 4.0,
246 |             "avg_intensity": 0.75,
247 |             "pct_latest_version": 0.0,
248 |             "pct_TP": 25.0,
249 |             "MAU": 400,
250 |             "YAU": 400,
251 |             "pct_new_user": 25.0,
252 |             "pct_addon": 100.0
253 |         }
254 |     ]
255 | 
256 |     expected_locales = [
257 |         {
258 |             "country": "All",
259 |             "submission_date_s3": "20180201",
260 |             "locale": "en-US",
261 |             "pct_on_locale": 50.0
262 |         },
263 |         {
264 |             "country": "All",
265 |             "submission_date_s3": "20180201",
266 |             "locale": "DE",
267 |             "pct_on_locale": 50.0
268 |         }
269 |     ]
270 | 
271 |     expected_addons = [
272 |         {
273 |             "country": "All",
274 |             "submission_date_s3": "20180201",
275 |             "addon_id": u'disableSHA1rollout',
276 |             "addon_name": u'SHA-1 deprecation staged rollout',
277 |             "pct_with_addon": 100.0
278 |         }
279 |     ]
280 | 
281 |     is_same(spark, usage, expected_usage)
282 |     is_same(spark, locales, expected_locales)
283 |     is_same(spark, top10addon, expected_addons)
284 | 
285 | 
286 | def test_integration_missing_fields_country_list(spark, main_summary_data_null_value):
287 |     ''' tests with country list for data with all/some of a given field are null, '', or zero
288 |     '''
289 |     main_summary = spark.createDataFrame(*main_summary_data_null_value)
290 |     usage, locales, top10addon = agg_usage(main_summary, date='20180201',
291 |                                            period=7, sample_factor=100.0 / 1,
292 |                                            country_list=['DE'])
293 | 
294 |     expected_usage = [
295 |         {
296 |             "submission_date_s3": "20180201",
297 |             "country": "All",
298 |             "avg_daily_usage(hours)": 500.0 / 3600 / 4.0,
299 |             "avg_intensity": 0.75,
300 |             "pct_latest_version": 0.0,
301 |             "pct_TP": 25.0,
302 |             "MAU": 400,
303 |             "YAU": 400,
304 |             "pct_new_user": 25.0,
305 |             "pct_addon": 100.0
306 |         },
307 |         {
308 |             "submission_date_s3": "20180201",
309 |             "country": "DE",
310 |             "avg_daily_usage(hours)": 500.0 / 3600 / 4.0,
311 |             "avg_intensity": 0.75,
312 |             "pct_latest_version": 0.0,
313 |             "pct_TP": 25.0,
314 |             "MAU": 400,
315 |             "YAU": 400,
316 |             "pct_new_user": 25.0,
317 |             "pct_addon": 100.0
318 |         }
319 |     ]
320 | 
321 |     expected_locales = [
322 |         {
323 |             "country": "All",
324 |             "submission_date_s3": "20180201",
325 |             "locale": "en-US",
326 |             "pct_on_locale": 50.0
327 |         },
328 |         {
329 |             "country": "All",
330 |             "submission_date_s3": "20180201",
331 |             "locale": "DE",
332 |             "pct_on_locale": 50.0
333 |         },
334 |         {
335 |             "country": "DE",
336 |             "submission_date_s3": "20180201",
337 |             "locale": "en-US",
338 |             "pct_on_locale": 50.0
339 |         },
340 |         {
341 |             "country": "DE",
342 |             "submission_date_s3": "20180201",
343 |             "locale": "DE",
344 |             "pct_on_locale": 50.0
345 |         }
346 |     ]
347 | 
348 |     expected_addons = [
349 |         {
350 |             "country": "All",
351 |             "submission_date_s3": "20180201",
352 |             "addon_id": u'disableSHA1rollout',
353 |             "addon_name": u'SHA-1 deprecation staged rollout',
354 |             "pct_with_addon": 100.0
355 |         },
356 |         {
357 |             "country": "DE",
358 |             "submission_date_s3": "20180201",
359 |             "addon_id": u'disableSHA1rollout',
360 |             "addon_name": u'SHA-1 deprecation staged rollout',
361 |             "pct_with_addon": 100.0
362 |         }
363 |     ]
364 | 
365 |     is_same(spark, usage, expected_usage)
366 |     is_same(spark, locales, expected_locales)
367 |     is_same(spark, top10addon, expected_addons)
368 | 


--------------------------------------------------------------------------------
/tests/test_process_output.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from usage_report.usage_report import agg_usage, get_spark
  3 | from pyspark.sql import Row
  4 | from usage_report.utils.process_output import all_metrics_per_day
  5 | from usage_report.utils.process_output import update_history
  6 | 
  7 | 
  8 | #  Makes utils available
  9 | pytest.register_assert_rewrite('tests.helpers.utils')
 10 | 
 11 | 
 12 | @pytest.fixture
 13 | def spark():
 14 |     return get_spark()
 15 | 
 16 | 
 17 | @pytest.fixture
 18 | def main_summary_data():
 19 |     a1 = [Row(addon_id=u'disableSHA1rollout', name=u'SHA-1 deprecation staged rollout',
 20 |               foreign_install=False, is_system=False),
 21 |           Row(addon_id=u'e10srollout@mozilla.org', name=u'Multi-process staged rollout',
 22 |               foreign_install=False, is_system=True)]
 23 | 
 24 |     return (
 25 |         (("20180201", 100, 20, "DE", "client1", "57.0.1", 17060,
 26 |           "Windows_NT", 10.0, a1, {0: 0, 1: 1}, 'en-US'),
 27 |          ("20180201", 100, 20, "DE", "client1", "57.0.1", 17060,
 28 |           "Windows_NT", 10.0, a1, {}, "en-US"),
 29 |          ("20180201", 100, 20, "DE", "client2", "58.0", 17563,
 30 |           "Darwin", 10.0, a1, None, "DE")),  # 17563 -> 20180201
 31 |         ["submission_date_s3", "subsession_length", "active_ticks",
 32 |          "country", "client_id", "app_version", "profile_creation_date",
 33 |          "os", "os_version", "active_addons", "histogram_parent_tracking_protection_enabled",
 34 |          "locale"]
 35 |     )
 36 | 
 37 | 
 38 | def test_processing_one_day(spark, main_summary_data):
 39 |     main_summary = spark.createDataFrame(*main_summary_data)
 40 |     usage, locales, top10addon = agg_usage(main_summary, date='20180201',
 41 |                                            period=1, sample_factor=100.0 / 1,
 42 |                                            country_list=['DE'])
 43 |     usage_df = usage.toPandas()
 44 |     locales_df = locales.toPandas()
 45 |     top10addon_df = top10addon.toPandas()
 46 | 
 47 |     fxhealth, webusage = all_metrics_per_day(['DE'],
 48 |                                              usage_pd_df=usage_df,
 49 |                                              locales_pd_df=locales_df,
 50 |                                              topaddons_pd_df=top10addon_df)
 51 | 
 52 |     expected_fxhealth = {
 53 |         'DE': {"date": "2018-02-01",
 54 |                "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
 55 |                            "avg_intensity": 1.0,
 56 |                            "pct_latest_version": 50.0,
 57 |                            "MAU": 200.0,
 58 |                            "YAU": 200.0,
 59 |                            "pct_new_user": 50.0}},
 60 |         'All': {"date": "2018-02-01",
 61 |                 "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
 62 |                             "avg_intensity": 1.0,
 63 |                             "pct_latest_version": 50.0,
 64 |                             "MAU": 200.0,
 65 |                             "YAU": 200.0,
 66 |                             "pct_new_user": 50.0}}
 67 |                         }
 68 | 
 69 |     expected_webusage = {
 70 |         'DE': {"date": "2018-02-01",
 71 |                "metrics": {"pct_TP": 50.0,
 72 |                            "pct_addon": 100.0,
 73 |                            "locale": {u"en-US": 50.0,
 74 |                                       u"DE": 50.0},
 75 |                            "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}},
 76 |         'All': {"date": "2018-02-01",
 77 |                 "metrics": {"pct_TP": 50.0,
 78 |                             "pct_addon": 100.0,
 79 |                             "locale": {u"en-US": 50.0,
 80 |                                        u"DE": 50.0},
 81 |                             "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}}
 82 |                         }
 83 | 
 84 |     assert expected_fxhealth == fxhealth
 85 |     assert expected_webusage == webusage
 86 | 
 87 | 
 88 | def test_update_history_fxhealth_with_history(spark, main_summary_data):
 89 |     main_summary = spark.createDataFrame(*main_summary_data)
 90 |     usage, locales, top10addon = agg_usage(main_summary, date='20180201',
 91 |                                            period=1, sample_factor=100.0 / 1,
 92 |                                            country_list=['DE'])
 93 |     usage_df = usage.toPandas()
 94 |     locales_df = locales.toPandas()
 95 |     top10addon_df = top10addon.toPandas()
 96 | 
 97 |     fxhealth, webusage = all_metrics_per_day(['DE'],
 98 |                                              usage_pd_df=usage_df,
 99 |                                              locales_pd_df=locales_df,
100 |                                              topaddons_pd_df=top10addon_df)
101 | 
102 |     old_fxhealth = {
103 |         'DE': [
104 |                 {"date": "2018-01-01",
105 |                  "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
106 |                              "avg_intensity": 1.0,
107 |                              "pct_latest_version": 50.0,
108 |                              "MAU": 200.0,
109 |                              "YAU": 200.0,
110 |                              "pct_new_user": 50.0}}
111 |               ],
112 |         'All': [
113 |                 {"date": "2018-01-01",
114 |                  "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
115 |                              "avg_intensity": 1.0,
116 |                              "pct_latest_version": 50.0,
117 |                              "MAU": 200.0,
118 |                              "YAU": 200.0,
119 |                              "pct_new_user": 50.0}}
120 |                ]
121 |                    }
122 |     updated_fxhealth = update_history(fxhealth, old_fxhealth)
123 | 
124 |     expected_fxhealth = {
125 |         'DE': [
126 |                 {"date": "2018-01-01",
127 |                  "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
128 |                              "avg_intensity": 1.0,
129 |                              "pct_latest_version": 50.0,
130 |                              "MAU": 200.0,
131 |                              "YAU": 200.0,
132 |                              "pct_new_user": 50.0}},
133 |                 {"date": "2018-02-01",
134 |                          "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
135 |                                      "avg_intensity": 1.0,
136 |                                      "pct_latest_version": 50.0,
137 |                                      "MAU": 200.0,
138 |                                      "YAU": 200.0,
139 |                                      "pct_new_user": 50.0}}
140 |               ],
141 |         'All': [
142 |                 {"date": "2018-01-01",
143 |                  "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
144 |                              "avg_intensity": 1.0,
145 |                              "pct_latest_version": 50.0,
146 |                              "MAU": 200.0,
147 |                              "YAU": 200.0,
148 |                              "pct_new_user": 50.0}},
149 |                 {"date": "2018-02-01",
150 |                  "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
151 |                              "avg_intensity": 1.0,
152 |                              "pct_latest_version": 50.0,
153 |                              "MAU": 200.0,
154 |                              "YAU": 200.0,
155 |                              "pct_new_user": 50.0}}
156 |                ]
157 |                         }
158 | 
159 |     assert expected_fxhealth == updated_fxhealth
160 | 
161 | 
162 | def test_update_history_fxhealth_without_history(spark, main_summary_data):
163 |     main_summary = spark.createDataFrame(*main_summary_data)
164 |     usage, locales, top10addon = agg_usage(main_summary, date='20180201',
165 |                                            period=1, sample_factor=100.0 / 1,
166 |                                            country_list=['DE'])
167 |     usage_df = usage.toPandas()
168 |     locales_df = locales.toPandas()
169 |     top10addon_df = top10addon.toPandas()
170 | 
171 |     fxhealth, webusage = all_metrics_per_day(['DE'],
172 |                                              usage_pd_df=usage_df,
173 |                                              locales_pd_df=locales_df,
174 |                                              topaddons_pd_df=top10addon_df)
175 | 
176 |     updated_fxhealth = update_history(fxhealth, None)
177 | 
178 |     expected_fxhealth = {
179 |         'DE': [
180 |                 {"date": "2018-02-01",
181 |                  "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
182 |                              "avg_intensity": 1.0,
183 |                              "pct_latest_version": 50.0,
184 |                              "MAU": 200.0,
185 |                              "YAU": 200.0,
186 |                              "pct_new_user": 50.0}}
187 |               ],
188 |         'All': [
189 |                 {"date": "2018-02-01",
190 |                  "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
191 |                              "avg_intensity": 1.0,
192 |                              "pct_latest_version": 50.0,
193 |                              "MAU": 200.0,
194 |                              "YAU": 200.0,
195 |                              "pct_new_user": 50.0}}
196 |                ]
197 |                    }
198 | 
199 |     assert expected_fxhealth == updated_fxhealth
200 | 
201 | 
202 | def test_update_history_webusage_with_history(spark, main_summary_data):
203 |     main_summary = spark.createDataFrame(*main_summary_data)
204 |     usage, locales, top10addon = agg_usage(main_summary, date='20180201',
205 |                                            period=1, sample_factor=100.0 / 1,
206 |                                            country_list=['DE'])
207 |     usage_df = usage.toPandas()
208 |     locales_df = locales.toPandas()
209 |     top10addon_df = top10addon.toPandas()
210 | 
211 |     fxhealth, webusage = all_metrics_per_day(['DE'],
212 |                                              usage_pd_df=usage_df,
213 |                                              locales_pd_df=locales_df,
214 |                                              topaddons_pd_df=top10addon_df)
215 | 
216 |     old_webusage = {
217 |         'DE': [
218 |                 {"date": "2018-01-01",
219 |                  "metrics": {"pct_TP": 50.0,
220 |                              "pct_addon": 100.0,
221 |                              "locale": {u"en-US": 50.0,
222 |                                         u"DE": 50.0},
223 |                              "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}}
224 |               ],
225 |         'All': [
226 |                 {"date": "2018-01-01",
227 |                  "metrics": {"pct_TP": 50.0,
228 |                              "pct_addon": 100.0,
229 |                              "locale": {u"en-US": 50.0,
230 |                                         u"DE": 50.0},
231 |                              "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}}
232 |                ]
233 |                         }
234 | 
235 |     updated_webusage = update_history(webusage, old_webusage)
236 | 
237 |     expected_webusage = {
238 |         'DE': [
239 |                {"date": "2018-01-01",
240 |                 "metrics": {"pct_TP": 50.0,
241 |                             "pct_addon": 100.0,
242 |                             "locale": {u"en-US": 50.0,
243 |                                        u"DE": 50.0},
244 |                             "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}},
245 |                {"date": "2018-02-01",
246 |                 "metrics": {"pct_TP": 50.0,
247 |                             "pct_addon": 100.0,
248 |                             "locale": {u"en-US": 50.0,
249 |                                        u"DE": 50.0},
250 |                             "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}}
251 |               ],
252 |         'All': [
253 |                 {"date": "2018-01-01",
254 |                  "metrics": {"pct_TP": 50.0,
255 |                              "pct_addon": 100.0,
256 |                              "locale": {u"en-US": 50.0,
257 |                                         u"DE": 50.0},
258 |                              "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}},
259 |                 {"date": "2018-02-01",
260 |                  "metrics": {"pct_TP": 50.0,
261 |                              "pct_addon": 100.0,
262 |                              "locale": {u"en-US": 50.0,
263 |                                         u"DE": 50.0},
264 |                              "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}}
265 | 
266 |                ]
267 |                         }
268 | 
269 |     assert expected_webusage == updated_webusage
270 | 
271 | 
272 | def test_update_history_webusage_without_history(spark, main_summary_data):
273 |     main_summary = spark.createDataFrame(*main_summary_data)
274 |     usage, locales, top10addon = agg_usage(main_summary, date='20180201',
275 |                                            period=1, sample_factor=100.0 / 1,
276 |                                            country_list=['DE'])
277 |     usage_df = usage.toPandas()
278 |     locales_df = locales.toPandas()
279 |     top10addon_df = top10addon.toPandas()
280 | 
281 |     fxhealth, webusage = all_metrics_per_day(['DE'],
282 |                                              usage_pd_df=usage_df,
283 |                                              locales_pd_df=locales_df,
284 |                                              topaddons_pd_df=top10addon_df)
285 | 
286 |     updated_webusage = update_history(webusage, None)
287 | 
288 |     expected_webusage = {
289 |         'DE': [
290 |                 {"date": "2018-02-01",
291 |                  "metrics": {"pct_TP": 50.0,
292 |                              "pct_addon": 100.0,
293 |                              "locale": {u"en-US": 50.0,
294 |                                         u"DE": 50.0},
295 |                              "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}}
296 |               ],
297 |         'All': [
298 |                 {"date": "2018-02-01",
299 |                  "metrics": {"pct_TP": 50.0,
300 |                              "pct_addon": 100.0,
301 |                              "locale": {u"en-US": 50.0,
302 |                                         u"DE": 50.0},
303 |                              "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}}
304 |                ]
305 |                         }
306 | 
307 |     assert expected_webusage == updated_webusage
308 | 


--------------------------------------------------------------------------------
/tests/test_usage_report.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import pytest
  3 | from helpers.utils import is_same
  4 | from usage_report.utils.avg_intensity import get_avg_intensity
  5 | from usage_report.utils.avg_daily_usage import get_daily_avg_session
  6 | from usage_report.utils.pct_latest_version import pct_new_version
  7 | from usage_report.utils.activeuser import getMAU, getYAU
  8 | from usage_report.utils.newuser import new_users
  9 | from usage_report.utils.osdistribution import os_on_date
 10 | from usage_report.utils.top10addons import top_10_addons_on_date
 11 | from usage_report.utils.pct_addon import get_addon
 12 | from usage_report.utils.localedistribution import locale_on_date
 13 | from usage_report.usage_report import agg_usage, get_spark
 14 | from pyspark.sql import Row
 15 | from usage_report.utils.trackingprotection import pct_tracking_protection
 16 | 
 17 | 
 18 | #  Makes utils available
 19 | pytest.register_assert_rewrite('tests.helpers.utils')
 20 | 
 21 | 
 22 | @pytest.fixture
 23 | def spark():
 24 |     return get_spark()
 25 | 
 26 | 
 27 | @pytest.fixture
 28 | def main_summary_data():
 29 |     a1 = [Row(addon_id=u'disableSHA1rollout', name=u'SHA-1 deprecation staged rollout',
 30 |               foreign_install=False, is_system=False),
 31 |           Row(addon_id=u'e10srollout@mozilla.org', name=u'Multi-process staged rollout',
 32 |               foreign_install=False, is_system=True)]
 33 | 
 34 |     a2 = [Row(addon_id=u'disableSHA1rollout', name=u'SHA-1 deprecation staged rollout',
 35 |               foreign_install=False, is_system=False),
 36 |           Row(addon_id=u'e10srollout@mozilla.org', name=u'Multi-process staged rollout',
 37 |               foreign_install=False, is_system=True)]
 38 | 
 39 |     return (
 40 |         (("20180201", 100, 20, "DE", "client1", "57.0.1", 17060,
 41 |           "Windows_NT", 10.0, a1, {0: 0, 1: 1}, 'en-US'),
 42 |          ("20180201", 100, 20, "DE", "client1", "57.0.1", 17060,
 43 |           "Windows_NT", 10.0, a1, {}, "en-US"),
 44 |          ("20180201", 100, 20, "DE", "client2", "58.0", 17563,
 45 |           "Darwin", 10.0, a2, None, "DE")),  # 17563 -> 20180201
 46 |         ["submission_date_s3", "subsession_length", "active_ticks",
 47 |          "country", "client_id", "app_version", "profile_creation_date",
 48 |          "os", "os_version", "active_addons", "histogram_parent_tracking_protection_enabled",
 49 |          "locale"]
 50 |     )
 51 | 
 52 | 
 53 | def test_get_avg_intensity_no_country_list(spark, main_summary_data):
 54 |     main_summary = spark.createDataFrame(*main_summary_data)
 55 |     without_country_list = get_avg_intensity(main_summary, "20180201")
 56 | 
 57 |     expected = [
 58 |         {
 59 |             "country": "All",
 60 |             "submission_date_s3": "20180201",
 61 |             "avg_intensity": 1.0
 62 |         }
 63 |     ]
 64 | 
 65 |     is_same(spark, without_country_list, expected)
 66 | 
 67 | 
 68 | def test_get_avg_intensity_country_list(spark, main_summary_data):
 69 |     main_summary = spark.createDataFrame(*main_summary_data)
 70 |     with_country_list = get_avg_intensity(main_summary, "20180201", country_list=["DE"])
 71 | 
 72 |     expected = [
 73 |         {
 74 |             "country": "All",
 75 |             "submission_date_s3": "20180201",
 76 |             "avg_intensity": 1.0
 77 |         },
 78 |         {
 79 |             "country": "DE",
 80 |             "submission_date_s3": "20180201",
 81 |             "avg_intensity": 1.0
 82 |         }
 83 |     ]
 84 | 
 85 |     is_same(spark, with_country_list, expected)
 86 | 
 87 | 
 88 | def test_get_avg_daily_usage_no_country_list(spark, main_summary_data):
 89 |     main_summary = spark.createDataFrame(*main_summary_data)
 90 |     without_country_list = get_daily_avg_session(main_summary, "20180201")
 91 | 
 92 |     expected = [
 93 |         {
 94 |             "country": "All",
 95 |             "submission_date_s3": "20180201",
 96 |             "avg_daily_usage(hours)": 300.0 / 3600 / 2.0
 97 |         }
 98 |     ]
 99 | 
100 |     is_same(spark, without_country_list, expected)
101 | 
102 | 
103 | def test_get_avg_daily_usage_country_list(spark, main_summary_data):
104 |     main_summary = spark.createDataFrame(*main_summary_data)
105 |     with_country_list = get_daily_avg_session(main_summary, "20180201", country_list=["DE"])
106 | 
107 |     expected = [
108 |         {
109 |             "country": "All",
110 |             "submission_date_s3": "20180201",
111 |             "avg_daily_usage(hours)": 300.0 / 3600 / 2.0
112 |         },
113 |         {
114 |             "country": "DE",
115 |             "submission_date_s3": "20180201",
116 |             "avg_daily_usage(hours)": 300.0 / 3600 / 2.0
117 |         }
118 |     ]
119 | 
120 |     is_same(spark, with_country_list, expected)
121 | 
122 | 
123 | def test_pct_latest_version_no_country_list(spark, main_summary_data):
124 |     main_summary = spark.createDataFrame(*main_summary_data)
125 |     without_country_list = pct_new_version(main_summary, "20180201")
126 | 
127 |     expected = [
128 |         {
129 |             "country": "All",
130 |             "submission_date_s3": "20180201",
131 |             "pct_latest_version": 50.0
132 |         }
133 |     ]
134 | 
135 |     is_same(spark, without_country_list, expected)
136 | 
137 | 
138 | def test_pct_latest_version_country_list(spark, main_summary_data):
139 |     main_summary = spark.createDataFrame(*main_summary_data)
140 |     with_country_list = pct_new_version(main_summary, "20180201",
141 |                                         country_list=['DE'])
142 | 
143 |     expected = [
144 |         {
145 |             "country": "All",
146 |             "submission_date_s3": "20180201",
147 |             "pct_latest_version": 50.0
148 |         },
149 |         {
150 |             "country": "DE",
151 |             "submission_date_s3": "20180201",
152 |             "pct_latest_version": 50.0
153 |         }
154 |     ]
155 | 
156 |     is_same(spark, with_country_list, expected)
157 | 
158 | 
159 | def test_MAU_no_country_list(spark, main_summary_data):
160 |     main_summary = spark.createDataFrame(*main_summary_data)
161 |     without_country_list = getMAU(main_summary,
162 |                                   '20180201',
163 |                                   sample_factor=100.0 / 1)
164 | 
165 |     expected = [
166 |         {
167 |             "country": "All",
168 |             "active_users": 200,
169 |             "submission_date_s3": "20180201"
170 |         }
171 |     ]
172 | 
173 |     is_same(spark, without_country_list, expected, verbose=True)
174 | 
175 | 
176 | def test_MAU_country_list(spark, main_summary_data):
177 |     main_summary = spark.createDataFrame(*main_summary_data)
178 |     with_country_list = getMAU(main_summary,
179 |                                '20180201',
180 |                                sample_factor=100.0 / 1,
181 |                                country_list=["DE"])
182 | 
183 |     expected = [
184 |         {
185 |             "country": "All",
186 |             "MAU": 200,
187 |             "submission_date_s3": "20180201"
188 |         },
189 |         {
190 |             "country": "DE",
191 |             "MAU": 200,
192 |             "submission_date_s3": "20180201"
193 |         }
194 |     ]
195 | 
196 |     is_same(spark, with_country_list, expected)
197 | 
198 | 
199 | def test_YAU_no_country_list(spark, main_summary_data):
200 |     main_summary = spark.createDataFrame(*main_summary_data)
201 |     without_country_list = getYAU(main_summary,
202 |                                   '20180201',
203 |                                   sample_factor=100.0 / 1)
204 | 
205 |     expected = [
206 |         {
207 |             "country": "All",
208 |             "MAU": 200,
209 |             "submission_date_s3": "20180201"
210 |         }
211 |     ]
212 | 
213 |     is_same(spark, without_country_list, expected)
214 | 
215 | 
216 | def test_YAU_country_list(spark, main_summary_data):
217 |     main_summary = spark.createDataFrame(*main_summary_data)
218 |     with_country_list = getYAU(main_summary,
219 |                                '20180201',
220 |                                sample_factor=100.0 / 1,
221 |                                country_list=["DE"])
222 | 
223 |     expected = [
224 |         {
225 |             "country": "All",
226 |             "YAU": 200,
227 |             "submission_date_s3": "20180201"
228 |         },
229 |         {
230 |             "country": "DE",
231 |             "YAU": 200,
232 |             "submission_date_s3": "20180201"
233 |         }
234 |     ]
235 | 
236 |     is_same(spark, with_country_list, expected)
237 | 
238 | 
239 | def test_new_users_no_country_list(spark, main_summary_data):
240 |     main_summary = spark.createDataFrame(*main_summary_data)
241 |     without_country_list = new_users(main_summary,
242 |                                      '20180201')
243 | 
244 |     expected = [
245 |         {
246 |             "country": "All",
247 |             "submission_date_S3": "20180201",
248 |             "pct_new_user": 50.0
249 |         }
250 |     ]
251 | 
252 |     is_same(spark, without_country_list, expected)
253 | 
254 | 
255 | def test_new_users_country_list(spark, main_summary_data):
256 |     main_summary = spark.createDataFrame(*main_summary_data)
257 |     with_country_list = new_users(main_summary,
258 |                                   '20180201',
259 |                                   country_list=["DE"])
260 | 
261 |     expected = [
262 |         {
263 |             "country": "All",
264 |             "submission_date_S3": "20180201",
265 |             "pct_new_user": 50.0
266 |         },
267 |         {
268 |             "country": "DE",
269 |             "submission_date_S3": "20180201",
270 |             "pct_new_user": 50.0
271 |         }
272 |     ]
273 | 
274 |     is_same(spark, with_country_list, expected)
275 | 
276 | 
277 | def test_os_distribution_no_country_list(spark, main_summary_data):
278 |     main_summary = spark.createDataFrame(*main_summary_data)
279 |     without_country_list = os_on_date(main_summary,
280 |                                       '20180201')
281 | 
282 |     expected = [
283 |         {
284 |             "country": "All",
285 |             "submission_date_s3": "20180201",
286 |             "os": "Windows 10",
287 |             "pct_on_os": 50.0
288 |         },
289 |         {
290 |             "country": "All",
291 |             "submission_date_s3": "20180201",
292 |             "os": "Mac OS X",
293 |             "pct_on_os": 50.0
294 |         }
295 |     ]
296 | 
297 |     is_same(spark, without_country_list, expected)
298 | 
299 | 
300 | def test_os_distribution_country_list(spark, main_summary_data):
301 |     main_summary = spark.createDataFrame(*main_summary_data)
302 |     with_country_list = os_on_date(main_summary,
303 |                                    '20180201',
304 |                                    country_list=['DE'])
305 | 
306 |     expected = [
307 |         {
308 |             "country": "All",
309 |             "submission_date_s3": "20180201",
310 |             "os": "Windows 10",
311 |             "pct_on_os": 50.0
312 |         },
313 |         {
314 |             "country": "All",
315 |             "submission_date_s3": "20180201",
316 |             "os": "Mac OS X",
317 |             "pct_on_os": 50.0
318 |         },
319 |         {
320 |             "country": "DE",
321 |             "submission_date_s3": "20180201",
322 |             "os": "Mac OS X",
323 |             "pct_on_os": 50.0
324 |         },
325 |         {
326 |             "country": "DE",
327 |             "submission_date_s3": "20180201",
328 |             "os": "Windows 10",
329 |             "pct_on_os": 50.0
330 |         }
331 |     ]
332 | 
333 |     is_same(spark, with_country_list, expected)
334 | 
335 | 
336 | def test_top_10_addons_no_country_list(spark, main_summary_data):
337 |     main_summary = spark.createDataFrame(*main_summary_data)
338 | 
339 |     without_country_list = top_10_addons_on_date(main_summary, '20180201', 5)
340 |     expected = [
341 |         {
342 |             "country": "All",
343 |             "submission_date_s3": "20180201",
344 |             "addon_id": u'disableSHA1rollout',
345 |             "addon_name": u'SHA-1 deprecation staged rollout',
346 |             "pct_with_addon": 100.0
347 |         }
348 |     ]
349 | 
350 |     is_same(spark, without_country_list, expected)
351 | 
352 | 
353 | def test_top_10_addons_country_list(spark, main_summary_data):
354 |     main_summary = spark.createDataFrame(*main_summary_data)
355 | 
356 |     with_country_list = top_10_addons_on_date(main_summary, '20180201', 5, country_list=['DE'])
357 | 
358 |     expected = [
359 |         {
360 |             "country": "All",
361 |             "submission_date_s3": "20180201",
362 |             "addon_id": u'disableSHA1rollout',
363 |             "addon_name": u'SHA-1 deprecation staged rollout',
364 |             "pct_with_addon": 100.0
365 |         },
366 |         {
367 |             "country": "DE",
368 |             "submission_date_s3": "20180201",
369 |             "addon_id": u'disableSHA1rollout',
370 |             "addon_name": u'SHA-1 deprecation staged rollout',
371 |             "pct_with_addon": 100.0
372 |         }
373 |     ]
374 | 
375 |     is_same(spark, with_country_list, expected)
376 | 
377 | 
378 | def test_has_addons_no_country_list(spark, main_summary_data):
379 |     main_summary = spark.createDataFrame(*main_summary_data)
380 | 
381 |     without_country_list = get_addon(main_summary, '20180201')
382 |     expected = [
383 |         {
384 |             "country": "All",
385 |             "submission_date_s3": "20180201",
386 |             "pct_addon": 100.0
387 |         }
388 |     ]
389 | 
390 |     is_same(spark, without_country_list, expected)
391 | 
392 | 
393 | def test_has_addons_country_list(spark, main_summary_data):
394 |     main_summary = spark.createDataFrame(*main_summary_data)
395 | 
396 |     with_country_list = get_addon(main_summary, '20180201', country_list=['DE'])
397 |     expected = [
398 |         {
399 |             "country": "All",
400 |             "submission_date_s3": "20180201",
401 |             "pct_addon": 100.0
402 |         },
403 |         {
404 |             "country": "DE",
405 |             "submission_date_s3": "20180201",
406 |             "pct_addon": 100.0
407 |         }
408 |     ]
409 | 
410 |     is_same(spark, with_country_list, expected)
411 | 
412 | 
413 | def test_pct_tracking_protection_no_country_list(spark, main_summary_data):
414 |     main_summary = spark.createDataFrame(*main_summary_data)
415 |     without_country_list = pct_tracking_protection(main_summary, '20180201')
416 | 
417 |     expected = [
418 |         {
419 |             "submission_date_s3": "20180201",
420 |             "country": "All",
421 |             "pct_TP": 50.0
422 |         }
423 |     ]
424 | 
425 |     is_same(spark, without_country_list, expected)
426 | 
427 | 
428 | def test_pct_tracking_protection_country_list(spark, main_summary_data):
429 |     main_summary = spark.createDataFrame(*main_summary_data)
430 |     with_country_list = pct_tracking_protection(main_summary,
431 |                                                 '20180201',
432 |                                                 country_list=["DE"])
433 |     expected = [
434 |         {
435 |             "submission_date_s3": "20180201",
436 |             "country": "All",
437 |             "pct_TP": 50.0
438 |         },
439 |         {
440 |             "submission_date_s3": "20180201",
441 |             "country": "DE",
442 |             "pct_TP": 50.0
443 |         }
444 |     ]
445 | 
446 |     is_same(spark, with_country_list, expected)
447 | 
448 | 
449 | def test_locale_no_country_list(spark, main_summary_data):
450 |     main_summary = spark.createDataFrame(*main_summary_data)
451 |     without_country_list = locale_on_date(main_summary, '20180201', 4)
452 |     expected = [
453 |         {
454 |             "country": "All",
455 |             "submission_date_s3": "20180201",
456 |             "locale": "en-US",
457 |             "pct_on_locale": 50.0
458 |         },
459 |         {
460 |             "country": "All",
461 |             "submission_date_s3": "20180201",
462 |             "locale": "DE",
463 |             "pct_on_locale": 50.0
464 |         }
465 |     ]
466 | 
467 |     is_same(spark, without_country_list, expected)
468 | 
469 | 
470 | def test_locale_country_list(spark, main_summary_data):
471 |     main_summary = spark.createDataFrame(*main_summary_data)
472 |     with_country_list = locale_on_date(main_summary, '20180201', 4, country_list=['DE'])
473 | 
474 |     expected = [
475 |         {
476 |             "country": "All",
477 |             "submission_date_s3": "20180201",
478 |             "locale": "en-US",
479 |             "pct_on_locale": 50.0
480 |         },
481 |         {
482 |             "country": "All",
483 |             "submission_date_s3": "20180201",
484 |             "locale": "DE",
485 |             "pct_on_locale": 50.0
486 |         },
487 |         {
488 |             "country": "DE",
489 |             "submission_date_s3": "20180201",
490 |             "locale": "en-US",
491 |             "pct_on_locale": 50.0
492 |         },
493 |         {
494 |             "country": "DE",
495 |             "submission_date_s3": "20180201",
496 |             "locale": "DE",
497 |             "pct_on_locale": 50.0
498 |         }
499 |     ]
500 | 
501 |     is_same(spark, with_country_list, expected)
502 | 
503 | 
504 | def test_integration_no_country_list(spark, main_summary_data):
505 |     main_summary = spark.createDataFrame(*main_summary_data)
506 |     usage, locales, top10addon = agg_usage(main_summary, date='20180201',
507 |                                            period=1, sample_factor=100.0 / 1,
508 |                                            country_list=None)
509 | 
510 |     expected_usage = [
511 |         {
512 |             "submission_date_s3": "20180201",
513 |             "country": "All",
514 |             "avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
515 |             "avg_intensity": 1.0,
516 |             "pct_latest_version": 50.0,
517 |             "pct_TP": 50.0,
518 |             "MAU": 200,
519 |             "YAU": 200,
520 |             "pct_new_user": 50.0,
521 |             "pct_addon": 100.0
522 |         }
523 |     ]
524 | 
525 |     expected_locales = [
526 |         {
527 |             "country": "All",
528 |             "submission_date_s3": "20180201",
529 |             "locale": "en-US",
530 |             "pct_on_locale": 50.0
531 |         },
532 |         {
533 |             "country": "All",
534 |             "submission_date_s3": "20180201",
535 |             "locale": "DE",
536 |             "pct_on_locale": 50.0
537 |         }
538 |     ]
539 | 
540 |     expected_addons = [
541 |         {
542 |             "country": "All",
543 |             "submission_date_s3": "20180201",
544 |             "addon_id": u'disableSHA1rollout',
545 |             "addon_name": u'SHA-1 deprecation staged rollout',
546 |             "pct_with_addon": 100.0
547 |         }
548 |     ]
549 | 
550 |     is_same(spark, usage, expected_usage)
551 |     is_same(spark, locales, expected_locales)
552 |     is_same(spark, top10addon, expected_addons)
553 | 
554 | 
555 | def test_integration_country_list(spark, main_summary_data):
556 |     main_summary = spark.createDataFrame(*main_summary_data)
557 |     usage, locales, top10addon = agg_usage(main_summary, date='20180201',
558 |                                            period=1, sample_factor=100.0 / 1,
559 |                                            country_list=['DE'])
560 | 
561 |     expected_usage = [
562 |         {
563 |             "submission_date_s3": "20180201",
564 |             "country": "All",
565 |             "avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
566 |             "avg_intensity": 1.0,
567 |             "pct_latest_version": 50.0,
568 |             "pct_TP": 50.0,
569 |             "MAU": 200,
570 |             "YAU": 200,
571 |             "pct_new_user": 50.0,
572 |             "pct_addon": 100.0
573 |         },
574 |         {
575 |             "submission_date_s3": "20180201",
576 |             "country": "DE",
577 |             "avg_daily_usage(hours)": 300.0 / 3600 / 2.0,
578 |             "avg_intensity": 1.0,
579 |             "pct_latest_version": 50.0,
580 |             "pct_TP": 50.0,
581 |             "MAU": 200,
582 |             "YAU": 200,
583 |             "pct_new_user": 50.0,
584 |             "pct_addon": 100.0
585 |         }
586 |     ]
587 | 
588 |     expected_locales = [
589 |         {
590 |             "country": "All",
591 |             "submission_date_s3": "20180201",
592 |             "locale": "en-US",
593 |             "pct_on_locale": 50.0
594 |         },
595 |         {
596 |             "country": "All",
597 |             "submission_date_s3": "20180201",
598 |             "locale": "DE",
599 |             "pct_on_locale": 50.0
600 |         },
601 |         {
602 |             "country": "DE",
603 |             "submission_date_s3": "20180201",
604 |             "locale": "en-US",
605 |             "pct_on_locale": 50.0
606 |         },
607 |         {
608 |             "country": "DE",
609 |             "submission_date_s3": "20180201",
610 |             "locale": "DE",
611 |             "pct_on_locale": 50.0
612 |         }
613 |     ]
614 | 
615 |     expected_addons = [
616 |         {
617 |             "country": "All",
618 |             "submission_date_s3": "20180201",
619 |             "addon_id": u'disableSHA1rollout',
620 |             "addon_name": u'SHA-1 deprecation staged rollout',
621 |             "pct_with_addon": 100.0
622 |         },
623 |         {
624 |             "country": "DE",
625 |             "submission_date_s3": "20180201",
626 |             "addon_id": u'disableSHA1rollout',
627 |             "addon_name": u'SHA-1 deprecation staged rollout',
628 |             "pct_with_addon": 100.0
629 |         }
630 |     ]
631 | 
632 |     is_same(spark, usage, expected_usage)
633 |     is_same(spark, locales, expected_locales)
634 |     is_same(spark, top10addon, expected_addons)
635 | 


--------------------------------------------------------------------------------
/usage_report/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/Fx_Usage_Report/489ca258b14776c01f3021080b2dd686d239dea3/usage_report/__init__.py


--------------------------------------------------------------------------------
/usage_report/annotations/annotations_fxhealth.json:
--------------------------------------------------------------------------------
   1 | {
   2 |   "Brazil": [
   3 |     {
   4 |       "annotation": {
   5 |         "pct_latest_version": "FF53"
   6 |       },
   7 |       "date": "2017-04-19"
   8 |     },
   9 |     {
  10 |       "annotation": {
  11 |         "pct_latest_version": "FF54"
  12 |       },
  13 |       "date": "2017-06-13"
  14 |     },
  15 |     {
  16 |       "annotation": {
  17 |         "pct_latest_version": "FF55"
  18 |       },
  19 |       "date": "2017-08-08"
  20 |     },
  21 |     {
  22 |       "annotation": {
  23 |         "pct_latest_version": "FF56"
  24 |       },
  25 |       "date": "2017-09-28"
  26 |     },
  27 |     {
  28 |       "annotation": {
  29 |         "pct_latest_version": "FF57"
  30 |       },
  31 |       "date": "2017-11-14"
  32 |     },
  33 |     {
  34 |       "annotation": {
  35 |         "MAU": "Summer Slump"
  36 |       },
  37 |       "date": "2018-01-21"
  38 |     },
  39 |     {
  40 |       "annotation": {
  41 |         "pct_latest_version": "FF58"
  42 |       },
  43 |       "date": "2018-01-23"
  44 |     },
  45 |     {
  46 |       "annotation": {
  47 |         "pct_latest_version": "FF59"
  48 |       },
  49 |       "date": "2018-03-13"
  50 |     },
  51 |     {
  52 |       "annotation": {
  53 |         "pct_latest_version": "FF60"
  54 |       },
  55 |       "date": "2018-05-09"
  56 |     },
  57 |     {
  58 |       "annotation": {
  59 |         "pct_latest_version": "FF61"
  60 |       },
  61 |       "date": "2018-06-26"
  62 |     },
  63 |     {
  64 |       "annotation": {
  65 |         "pct_latest_version": "FF62"
  66 |       },
  67 |       "date": "2018-09-05"
  68 |     },
  69 |     {
  70 |       "annotation": {
  71 |         "pct_latest_version": "FF63"
  72 |       },
  73 |       "date": "2018-10-23"
  74 |     },
  75 |     {
  76 |       "annotation": {
  77 |         "pct_latest_version": "FF64"
  78 |       },
  79 |       "date": "2018-12-11"
  80 |     },
  81 |     {
  82 |       "annotation": {
  83 |         "MAU": "Summer Slump"
  84 |       },
  85 |       "date": "2019-01-01"
  86 |     },
  87 |     {
  88 |       "annotation": {
  89 |         "pct_latest_version": "FF65"
  90 |       },
  91 |       "date": "2019-01-29"
  92 |     },
  93 |     {
  94 |       "annotation": {
  95 |         "pct_latest_version": "FF66"
  96 |       },
  97 |       "date": "2019-03-19"
  98 |     },
  99 |     {
 100 |       "annotation": {
 101 |         "MAU": "data deleted (addons outage)"
 102 |       },
 103 |       "date": "2019-05-05"
 104 |     },
 105 |     {
 106 |       "annotation": {
 107 |         "YAU": "data deleted (addons outage)"
 108 |       },
 109 |       "date": "2019-05-05"
 110 |     },
 111 |     {
 112 |       "annotation": {
 113 |         "avg_daily_usage(hours)": "data deleted (addons outage)"
 114 |       },
 115 |       "date": "2019-05-05"
 116 |     },
 117 |     {
 118 |       "annotation": {
 119 |         "pct_latest_version": "FF67"
 120 |       },
 121 |       "date": "2019-05-21"
 122 |     },
 123 |     {
 124 |       "annotation": {
 125 |         "pct_latest_version": "FF68"
 126 |       },
 127 |       "date": "2019-07-09"
 128 |     },
 129 |     {
 130 |       "annotation": {
 131 |         "pct_latest_version": "FF69"
 132 |       },
 133 |       "date": "2019-09-03"
 134 |     },
 135 |     {
 136 |       "annotation": {
 137 |         "pct_latest_version": "FF70"
 138 |       },
 139 |       "date": "2019-10-22"
 140 |     },
 141 |     {
 142 |       "annotation": {
 143 |         "pct_latest_version": "FF71"
 144 |       },
 145 |       "date": "2019-12-03"
 146 |     },
 147 |     {
 148 |       "annotation": {
 149 |         "pct_latest_version": "FF72"
 150 |       },
 151 |       "date": "2020-01-07"
 152 |     },
 153 |     {
 154 |       "annotation": {
 155 |         "pct_latest_version": "FF73"
 156 |       },
 157 |       "date": "2020-02-11"
 158 |     },
 159 |     {
 160 |       "annotation": {
 161 |         "pct_latest_version": "FF74"
 162 |       },
 163 |       "date": "2020-03-10"
 164 |     }
 165 |   ],
 166 |   "China": [
 167 |     {
 168 |       "annotation": {
 169 |         "pct_latest_version": "FF53"
 170 |       },
 171 |       "date": "2017-04-19"
 172 |     },
 173 |     {
 174 |       "annotation": {
 175 |         "pct_latest_version": "FF54"
 176 |       },
 177 |       "date": "2017-06-13"
 178 |     },
 179 |     {
 180 |       "annotation": {
 181 |         "pct_latest_version": "FF55"
 182 |       },
 183 |       "date": "2017-08-08"
 184 |     },
 185 |     {
 186 |       "annotation": {
 187 |         "MAU": "Summer Slump"
 188 |       },
 189 |       "date": "2017-08-20"
 190 |     },
 191 |     {
 192 |       "annotation": {
 193 |         "pct_latest_version": "FF56"
 194 |       },
 195 |       "date": "2017-09-28"
 196 |     },
 197 |     {
 198 |       "annotation": {
 199 |         "pct_latest_version": "FF57"
 200 |       },
 201 |       "date": "2017-11-14"
 202 |     },
 203 |     {
 204 |       "annotation": {
 205 |         "pct_latest_version": "FF58"
 206 |       },
 207 |       "date": "2018-01-23"
 208 |     },
 209 |     {
 210 |       "annotation": {
 211 |         "MAU": "Spring Festival"
 212 |       },
 213 |       "date": "2018-02-25"
 214 |     },
 215 |     {
 216 |       "annotation": {
 217 |         "pct_latest_version": "FF59"
 218 |       },
 219 |       "date": "2018-03-13"
 220 |     },
 221 |     {
 222 |       "annotation": {
 223 |         "pct_latest_version": "FF60"
 224 |       },
 225 |       "date": "2018-05-09"
 226 |     },
 227 |     {
 228 |       "annotation": {
 229 |         "pct_latest_version": "FF61"
 230 |       },
 231 |       "date": "2018-06-26"
 232 |     },
 233 |     {
 234 |       "annotation": {
 235 |         "MAU": "Summer Slump"
 236 |       },
 237 |       "date": "2018-08-20"
 238 |     },
 239 |     {
 240 |       "annotation": {
 241 |         "pct_latest_version": "FF62"
 242 |       },
 243 |       "date": "2018-09-05"
 244 |     },
 245 |     {
 246 |       "annotation": {
 247 |         "pct_latest_version": "FF63"
 248 |       },
 249 |       "date": "2018-10-23"
 250 |     },
 251 |     {
 252 |       "annotation": {
 253 |         "pct_latest_version": "FF64"
 254 |       },
 255 |       "date": "2018-12-11"
 256 |     },
 257 |     {
 258 |       "annotation": {
 259 |         "pct_latest_version": "FF65"
 260 |       },
 261 |       "date": "2019-01-29"
 262 |     },
 263 |     {
 264 |       "annotation": {
 265 |         "MAU": "Spring Festival"
 266 |       },
 267 |       "date": "2019-02-05"
 268 |     },
 269 |     {
 270 |       "annotation": {
 271 |         "pct_latest_version": "FF66"
 272 |       },
 273 |       "date": "2019-03-19"
 274 |     },
 275 |     {
 276 |       "annotation": {
 277 |         "MAU": "data deleted (addons outage)"
 278 |       },
 279 |       "date": "2019-05-05"
 280 |     },
 281 |     {
 282 |       "annotation": {
 283 |         "YAU": "data deleted (addons outage)"
 284 |       },
 285 |       "date": "2019-05-05"
 286 |     },
 287 |     {
 288 |       "annotation": {
 289 |         "avg_daily_usage(hours)": "data deleted (addons outage)"
 290 |       },
 291 |       "date": "2019-05-05"
 292 |     },
 293 |     {
 294 |       "annotation": {
 295 |         "pct_latest_version": "FF67"
 296 |       },
 297 |       "date": "2019-05-21"
 298 |     },
 299 |     {
 300 |       "annotation": {
 301 |         "pct_latest_version": "FF68"
 302 |       },
 303 |       "date": "2019-07-09"
 304 |     },
 305 |     {
 306 |       "annotation": {
 307 |         "pct_latest_version": "FF69"
 308 |       },
 309 |       "date": "2019-09-03"
 310 |     },
 311 |     {
 312 |       "annotation": {
 313 |         "pct_latest_version": "FF70"
 314 |       },
 315 |       "date": "2019-10-22"
 316 |     },
 317 |     {
 318 |       "annotation": {
 319 |         "pct_latest_version": "FF71"
 320 |       },
 321 |       "date": "2019-12-03"
 322 |     },
 323 |     {
 324 |       "annotation": {
 325 |         "pct_latest_version": "FF72"
 326 |       },
 327 |       "date": "2020-01-07"
 328 |     },
 329 |     {
 330 |       "annotation": {
 331 |         "pct_latest_version": "FF73"
 332 |       },
 333 |       "date": "2020-02-11"
 334 |     },
 335 |     {
 336 |       "annotation": {
 337 |         "pct_latest_version": "FF74"
 338 |       },
 339 |       "date": "2020-03-10"
 340 |     }
 341 |   ],
 342 |   "France": [
 343 |     {
 344 |       "annotation": {
 345 |         "pct_latest_version": "FF53"
 346 |       },
 347 |       "date": "2017-04-19"
 348 |     },
 349 |     {
 350 |       "annotation": {
 351 |         "pct_latest_version": "FF54"
 352 |       },
 353 |       "date": "2017-06-13"
 354 |     },
 355 |     {
 356 |       "annotation": {
 357 |         "pct_latest_version": "FF55"
 358 |       },
 359 |       "date": "2017-08-08"
 360 |     },
 361 |     {
 362 |       "annotation": {
 363 |         "MAU": "Summer Slump"
 364 |       },
 365 |       "date": "2017-08-27"
 366 |     },
 367 |     {
 368 |       "annotation": {
 369 |         "pct_latest_version": "FF56"
 370 |       },
 371 |       "date": "2017-09-28"
 372 |     },
 373 |     {
 374 |       "annotation": {
 375 |         "MAU": "Autumn Holidays"
 376 |       },
 377 |       "date": "2017-11-05"
 378 |     },
 379 |     {
 380 |       "annotation": {
 381 |         "pct_latest_version": "FF57"
 382 |       },
 383 |       "date": "2017-11-14"
 384 |     },
 385 |     {
 386 |       "annotation": {
 387 |         "MAU": "Winter Holidays"
 388 |       },
 389 |       "date": "2018-01-14"
 390 |     },
 391 |     {
 392 |       "annotation": {
 393 |         "pct_latest_version": "FF58"
 394 |       },
 395 |       "date": "2018-01-23"
 396 |     },
 397 |     {
 398 |       "annotation": {
 399 |         "pct_latest_version": "FF59"
 400 |       },
 401 |       "date": "2018-03-13"
 402 |     },
 403 |     {
 404 |       "annotation": {
 405 |         "pct_latest_version": "FF60"
 406 |       },
 407 |       "date": "2018-05-09"
 408 |     },
 409 |     {
 410 |       "annotation": {
 411 |         "pct_latest_version": "FF61"
 412 |       },
 413 |       "date": "2018-06-26"
 414 |     },
 415 |     {
 416 |       "annotation": {
 417 |         "MAU": "Summer Slump"
 418 |       },
 419 |       "date": "2018-08-27"
 420 |     },
 421 |     {
 422 |       "annotation": {
 423 |         "pct_latest_version": "FF62"
 424 |       },
 425 |       "date": "2018-09-05"
 426 |     },
 427 |     {
 428 |       "annotation": {
 429 |         "pct_latest_version": "FF63"
 430 |       },
 431 |       "date": "2018-10-23"
 432 |     },
 433 |     {
 434 |       "annotation": {
 435 |         "pct_latest_version": "FF64"
 436 |       },
 437 |       "date": "2018-12-11"
 438 |     },
 439 |     {
 440 |       "annotation": {
 441 |         "MAU": "Autumn Holidays"
 442 |       },
 443 |       "date": "2018-11-03"
 444 |     },
 445 |     {
 446 |       "annotation": {
 447 |         "MAU": "Winter Holidays"
 448 |       },
 449 |       "date": "2019-01-01"
 450 |     },
 451 |     {
 452 |       "annotation": {
 453 |         "pct_latest_version": "FF65"
 454 |       },
 455 |       "date": "2019-01-29"
 456 |     },
 457 |     {
 458 |       "annotation": {
 459 |         "pct_latest_version": "FF66"
 460 |       },
 461 |       "date": "2019-03-19"
 462 |     },
 463 |     {
 464 |       "annotation": {
 465 |         "MAU": "data deleted (addons outage)"
 466 |       },
 467 |       "date": "2019-05-05"
 468 |     },
 469 |     {
 470 |       "annotation": {
 471 |         "YAU": "data deleted (addons outage)"
 472 |       },
 473 |       "date": "2019-05-05"
 474 |     },
 475 |     {
 476 |       "annotation": {
 477 |         "avg_daily_usage(hours)": "data deleted (addons outage)"
 478 |       },
 479 |       "date": "2019-05-05"
 480 |     },
 481 |     {
 482 |       "annotation": {
 483 |         "pct_latest_version": "FF67"
 484 |       },
 485 |       "date": "2019-05-21"
 486 |     },
 487 |     {
 488 |       "annotation": {
 489 |         "pct_latest_version": "FF68"
 490 |       },
 491 |       "date": "2019-07-09"
 492 |     },
 493 |     {
 494 |       "annotation": {
 495 |         "pct_latest_version": "FF69"
 496 |       },
 497 |       "date": "2019-09-03"
 498 |     },
 499 |     {
 500 |       "annotation": {
 501 |         "pct_latest_version": "FF70"
 502 |       },
 503 |       "date": "2019-10-22"
 504 |     },
 505 |     {
 506 |       "annotation": {
 507 |         "pct_latest_version": "FF71"
 508 |       },
 509 |       "date": "2019-12-03"
 510 |     },
 511 |     {
 512 |       "annotation": {
 513 |         "pct_latest_version": "FF72"
 514 |       },
 515 |       "date": "2020-01-07"
 516 |     },
 517 |     {
 518 |       "annotation": {
 519 |         "pct_latest_version": "FF73"
 520 |       },
 521 |       "date": "2020-02-11"
 522 |     },
 523 |     {
 524 |       "annotation": {
 525 |         "pct_latest_version": "FF74"
 526 |       },
 527 |       "date": "2020-03-10"
 528 |     }
 529 |   ],
 530 |   "Germany": [
 531 |     {
 532 |       "annotation": {
 533 |         "pct_latest_version": "FF53"
 534 |       },
 535 |       "date": "2017-04-19"
 536 |     },
 537 |     {
 538 |       "annotation": {
 539 |         "pct_latest_version": "FF54"
 540 |       },
 541 |       "date": "2017-06-13"
 542 |     },
 543 |     {
 544 |       "annotation": {
 545 |         "pct_latest_version": "FF55"
 546 |       },
 547 |       "date": "2017-08-08"
 548 |     },
 549 |     {
 550 |       "annotation": {
 551 |         "MAU": "Summer Slump"
 552 |       },
 553 |       "date": "2017-08-27"
 554 |     },
 555 |     {
 556 |       "annotation": {
 557 |         "pct_latest_version": "FF56"
 558 |       },
 559 |       "date": "2017-09-28"
 560 |     },
 561 |     {
 562 |       "annotation": {
 563 |         "pct_latest_version": "FF57"
 564 |       },
 565 |       "date": "2017-11-14"
 566 |     },
 567 |     {
 568 |       "annotation": {
 569 |         "MAU": "Winter Holidays"
 570 |       },
 571 |       "date": "2018-01-07"
 572 |     },
 573 |     {
 574 |       "annotation": {
 575 |         "pct_latest_version": "FF58"
 576 |       },
 577 |       "date": "2018-01-23"
 578 |     },
 579 |     {
 580 |       "annotation": {
 581 |         "pct_latest_version": "FF59"
 582 |       },
 583 |       "date": "2018-03-13"
 584 |     },
 585 |     {
 586 |       "annotation": {
 587 |         "pct_latest_version": "FF60"
 588 |       },
 589 |       "date": "2018-05-09"
 590 |     },
 591 |     {
 592 |       "annotation": {
 593 |         "pct_latest_version": "FF61"
 594 |       },
 595 |       "date": "2018-06-26"
 596 |     },
 597 |     {
 598 |       "annotation": {
 599 |         "MAU": "Summer Slump"
 600 |       },
 601 |       "date": "2018-08-27"
 602 |     },
 603 |     {
 604 |       "annotation": {
 605 |         "pct_latest_version": "FF62"
 606 |       },
 607 |       "date": "2018-09-05"
 608 |     },
 609 |     {
 610 |       "annotation": {
 611 |         "pct_latest_version": "FF63"
 612 |       },
 613 |       "date": "2018-10-23"
 614 |     },
 615 |     {
 616 |       "annotation": {
 617 |         "pct_latest_version": "FF64"
 618 |       },
 619 |       "date": "2018-12-11"
 620 |     },
 621 |     {
 622 |       "annotation": {
 623 |         "MAU": "Winter Holidays"
 624 |       },
 625 |       "date": "2019-01-01"
 626 |     },
 627 |     {
 628 |       "annotation": {
 629 |         "pct_latest_version": "FF65"
 630 |       },
 631 |       "date": "2019-01-29"
 632 |     },
 633 |     {
 634 |       "annotation": {
 635 |         "pct_latest_version": "FF66"
 636 |       },
 637 |       "date": "2019-03-19"
 638 |     },
 639 |     {
 640 |       "annotation": {
 641 |         "MAU": "data deleted (addons outage)"
 642 |       },
 643 |       "date": "2019-05-05"
 644 |     },
 645 |     {
 646 |       "annotation": {
 647 |         "YAU": "data deleted (addons outage)"
 648 |       },
 649 |       "date": "2019-05-05"
 650 |     },
 651 |     {
 652 |       "annotation": {
 653 |         "avg_daily_usage(hours)": "data deleted (addons outage)"
 654 |       },
 655 |       "date": "2019-05-05"
 656 |     },
 657 |     {
 658 |       "annotation": {
 659 |         "pct_latest_version": "FF67"
 660 |       },
 661 |       "date": "2019-05-21"
 662 |     },
 663 |     {
 664 |       "annotation": {
 665 |         "pct_latest_version": "FF68"
 666 |       },
 667 |       "date": "2019-07-09"
 668 |     },
 669 |     {
 670 |       "annotation": {
 671 |         "pct_latest_version": "FF69"
 672 |       },
 673 |       "date": "2019-09-03"
 674 |     },
 675 |     {
 676 |       "annotation": {
 677 |         "pct_latest_version": "FF70"
 678 |       },
 679 |       "date": "2019-10-22"
 680 |     },
 681 |     {
 682 |       "annotation": {
 683 |         "pct_latest_version": "FF71"
 684 |       },
 685 |       "date": "2019-12-03"
 686 |     },
 687 |     {
 688 |       "annotation": {
 689 |         "pct_latest_version": "FF72"
 690 |       },
 691 |       "date": "2020-01-07"
 692 |     },
 693 |     {
 694 |       "annotation": {
 695 |         "pct_latest_version": "FF73"
 696 |       },
 697 |       "date": "2020-02-11"
 698 |     },
 699 |     {
 700 |       "annotation": {
 701 |         "pct_latest_version": "FF74"
 702 |       },
 703 |       "date": "2020-03-10"
 704 |     }
 705 |   ],
 706 |   "India": [
 707 |     {
 708 |       "annotation": {
 709 |         "pct_latest_version": "FF53"
 710 |       },
 711 |       "date": "2017-04-19"
 712 |     },
 713 |     {
 714 |       "annotation": {
 715 |         "MAU": "Summer Slump"
 716 |       },
 717 |       "date": "2017-05-28"
 718 |     },
 719 |     {
 720 |       "annotation": {
 721 |         "pct_latest_version": "FF54"
 722 |       },
 723 |       "date": "2017-06-13"
 724 |     },
 725 |     {
 726 |       "annotation": {
 727 |         "pct_latest_version": "FF55"
 728 |       },
 729 |       "date": "2017-08-08"
 730 |     },
 731 |     {
 732 |       "annotation": {
 733 |         "pct_latest_version": "FF56"
 734 |       },
 735 |       "date": "2017-09-28"
 736 |     },
 737 |     {
 738 |       "annotation": {
 739 |         "pct_latest_version": "FF57"
 740 |       },
 741 |       "date": "2017-11-14"
 742 |     },
 743 |     {
 744 |       "annotation": {
 745 |         "pct_latest_version": "FF58"
 746 |       },
 747 |       "date": "2018-01-23"
 748 |     },
 749 |     {
 750 |       "annotation": {
 751 |         "pct_latest_version": "FF59"
 752 |       },
 753 |       "date": "2018-03-13"
 754 |     },
 755 |     {
 756 |       "annotation": {
 757 |         "pct_latest_version": "FF60"
 758 |       },
 759 |       "date": "2018-05-09"
 760 |     },
 761 |     {
 762 |       "annotation": {
 763 |         "MAU": "Summer Slump"
 764 |       },
 765 |       "date": "2018-05-28"
 766 |     },
 767 |     {
 768 |       "annotation": {
 769 |         "pct_latest_version": "FF61"
 770 |       },
 771 |       "date": "2018-06-26"
 772 |     },
 773 |     {
 774 |       "annotation": {
 775 |         "pct_latest_version": "FF62"
 776 |       },
 777 |       "date": "2018-09-05"
 778 |     },
 779 |     {
 780 |       "annotation": {
 781 |         "pct_latest_version": "FF63"
 782 |       },
 783 |       "date": "2018-10-23"
 784 |     },
 785 |     {
 786 |       "annotation": {
 787 |         "pct_latest_version": "FF64"
 788 |       },
 789 |       "date": "2018-12-11"
 790 |     },
 791 |     {
 792 |       "annotation": {
 793 |         "MAU": "Winter Holidays"
 794 |       },
 795 |       "date": "2019-01-01"
 796 |     },
 797 |     {
 798 |       "annotation": {
 799 |         "pct_latest_version": "FF65"
 800 |       },
 801 |       "date": "2019-01-29"
 802 |     },
 803 |     {
 804 |       "annotation": {
 805 |         "pct_latest_version": "FF66"
 806 |       },
 807 |       "date": "2019-03-19"
 808 |     },
 809 |     {
 810 |       "annotation": {
 811 |         "MAU": "data deleted (addons outage)"
 812 |       },
 813 |       "date": "2019-05-05"
 814 |     },
 815 |     {
 816 |       "annotation": {
 817 |         "YAU": "data deleted (addons outage)"
 818 |       },
 819 |       "date": "2019-05-05"
 820 |     },
 821 |     {
 822 |       "annotation": {
 823 |         "avg_daily_usage(hours)": "data deleted (addons outage)"
 824 |       },
 825 |       "date": "2019-05-05"
 826 |     },
 827 |     {
 828 |       "annotation": {
 829 |         "pct_latest_version": "FF67"
 830 |       },
 831 |       "date": "2019-05-21"
 832 |     },
 833 |     {
 834 |       "annotation": {
 835 |         "pct_latest_version": "FF68"
 836 |       },
 837 |       "date": "2019-07-09"
 838 |     },
 839 |     {
 840 |       "annotation": {
 841 |         "pct_latest_version": "FF69"
 842 |       },
 843 |       "date": "2019-09-03"
 844 |     },
 845 |     {
 846 |       "annotation": {
 847 |         "pct_latest_version": "FF70"
 848 |       },
 849 |       "date": "2019-10-22"
 850 |     },
 851 |     {
 852 |       "annotation": {
 853 |         "pct_latest_version": "FF71"
 854 |       },
 855 |       "date": "2019-12-03"
 856 |     },
 857 |     {
 858 |       "annotation": {
 859 |         "pct_latest_version": "FF72"
 860 |       },
 861 |       "date": "2020-01-07"
 862 |     },
 863 |     {
 864 |       "annotation": {
 865 |         "pct_latest_version": "FF73"
 866 |       },
 867 |       "date": "2020-02-11"
 868 |     },
 869 |     {
 870 |       "annotation": {
 871 |         "pct_latest_version": "FF74"
 872 |       },
 873 |       "date": "2020-03-10"
 874 |     }
 875 |   ],
 876 |   "Indonesia": [
 877 |     {
 878 |       "annotation": {
 879 |         "pct_latest_version": "FF53"
 880 |       },
 881 |       "date": "2017-04-19"
 882 |     },
 883 |     {
 884 |       "annotation": {
 885 |         "pct_latest_version": "FF54"
 886 |       },
 887 |       "date": "2017-06-13"
 888 |     },
 889 |     {
 890 |       "annotation": {
 891 |         "MAU": "Hari Raya Idul Fitri (Ramadan Ends)"
 892 |       },
 893 |       "date": "2017-06-25"
 894 |     },
 895 |     {
 896 |       "annotation": {
 897 |         "pct_latest_version": "FF55"
 898 |       },
 899 |       "date": "2017-08-08"
 900 |     },
 901 |     {
 902 |       "annotation": {
 903 |         "pct_latest_version": "FF56"
 904 |       },
 905 |       "date": "2017-09-28"
 906 |     },
 907 |     {
 908 |       "annotation": {
 909 |         "pct_latest_version": "FF57"
 910 |       },
 911 |       "date": "2017-11-14"
 912 |     },
 913 |     {
 914 |       "annotation": {
 915 |         "MAU": "Winter Holidays"
 916 |       },
 917 |       "date": "2018-01-07"
 918 |     },
 919 |     {
 920 |       "annotation": {
 921 |         "pct_latest_version": "FF58"
 922 |       },
 923 |       "date": "2018-01-23"
 924 |     },
 925 |     {
 926 |       "annotation": {
 927 |         "pct_latest_version": "FF59"
 928 |       },
 929 |       "date": "2018-03-13"
 930 |     },
 931 |     {
 932 |       "annotation": {
 933 |         "pct_latest_version": "FF60"
 934 |       },
 935 |       "date": "2018-05-09"
 936 |     },
 937 |     {
 938 |       "annotation": {
 939 |         "pct_latest_version": "FF61"
 940 |       },
 941 |       "date": "2018-06-26"
 942 |     },
 943 |     {
 944 |       "annotation": {
 945 |         "MAU": "Hari Raya Idul Fitri (Ramadan Ends)"
 946 |       },
 947 |       "date": "2018-07-15"
 948 |     },
 949 |     {
 950 |       "annotation": {
 951 |         "pct_latest_version": "FF62"
 952 |       },
 953 |       "date": "2018-09-05"
 954 |     },
 955 |     {
 956 |       "annotation": {
 957 |         "pct_latest_version": "FF63"
 958 |       },
 959 |       "date": "2018-10-23"
 960 |     },
 961 |     {
 962 |       "annotation": {
 963 |         "pct_latest_version": "FF64"
 964 |       },
 965 |       "date": "2018-12-11"
 966 |     },    
 967 |     {
 968 |       "annotation": {
 969 |         "MAU": "Winter Holidays"
 970 |       },
 971 |       "date": "2019-01-01"
 972 |     },
 973 |     {
 974 |       "annotation": {
 975 |         "pct_latest_version": "FF65"
 976 |       },
 977 |       "date": "2019-01-29"
 978 |     },
 979 |     {
 980 |       "annotation": {
 981 |         "pct_latest_version": "FF66"
 982 |       },
 983 |       "date": "2019-03-19"
 984 |     },
 985 |     {
 986 |       "annotation": {
 987 |         "MAU": "data deleted (addons outage)"
 988 |       },
 989 |       "date": "2019-05-05"
 990 |     },
 991 |     {
 992 |       "annotation": {
 993 |         "YAU": "data deleted (addons outage)"
 994 |       },
 995 |       "date": "2019-05-05"
 996 |     },
 997 |     {
 998 |       "annotation": {
 999 |         "avg_daily_usage(hours)": "data deleted (addons outage)"
1000 |       },
1001 |       "date": "2019-05-05"
1002 |     },
1003 |     {
1004 |       "annotation": {
1005 |         "pct_latest_version": "FF67"
1006 |       },
1007 |       "date": "2019-05-21"
1008 |     },
1009 |     {
1010 |       "annotation": {
1011 |         "pct_latest_version": "FF68"
1012 |       },
1013 |       "date": "2019-07-09"
1014 |     },
1015 |     {
1016 |       "annotation": {
1017 |         "pct_latest_version": "FF69"
1018 |       },
1019 |       "date": "2019-09-03"
1020 |     },
1021 |     {
1022 |       "annotation": {
1023 |         "pct_latest_version": "FF70"
1024 |       },
1025 |       "date": "2019-10-22"
1026 |     },
1027 |     {
1028 |       "annotation": {
1029 |         "pct_latest_version": "FF71"
1030 |       },
1031 |       "date": "2019-12-03"
1032 |     },
1033 |     {
1034 |       "annotation": {
1035 |         "pct_latest_version": "FF72"
1036 |       },
1037 |       "date": "2020-01-07"
1038 |     },
1039 |     {
1040 |       "annotation": {
1041 |         "pct_latest_version": "FF73"
1042 |       },
1043 |       "date": "2020-02-11"
1044 |     },
1045 |     {
1046 |       "annotation": {
1047 |         "pct_latest_version": "FF74"
1048 |       },
1049 |       "date": "2020-03-10"
1050 |     }
1051 |   ],
1052 |   "Italy": [
1053 |     {
1054 |       "annotation": {
1055 |         "pct_latest_version": "FF53"
1056 |       },
1057 |       "date": "2017-04-19"
1058 |     },
1059 |     {
1060 |       "annotation": {
1061 |         "pct_latest_version": "FF54"
1062 |       },
1063 |       "date": "2017-06-13"
1064 |     },
1065 |     {
1066 |       "annotation": {
1067 |         "pct_latest_version": "FF55"
1068 |       },
1069 |       "date": "2017-08-08"
1070 |     },
1071 |     {
1072 |       "annotation": {
1073 |         "MAU": "Summer Slump"
1074 |       },
1075 |       "date": "2017-08-27"
1076 |     },
1077 |     {
1078 |       "annotation": {
1079 |         "pct_latest_version": "FF56"
1080 |       },
1081 |       "date": "2017-09-28"
1082 |     },
1083 |     {
1084 |       "annotation": {
1085 |         "pct_latest_version": "FF57"
1086 |       },
1087 |       "date": "2017-11-14"
1088 |     },
1089 |     {
1090 |       "annotation": {
1091 |         "MAU": "Winter Holidays"
1092 |       },
1093 |       "date": "2018-01-14"
1094 |     },
1095 |     {
1096 |       "annotation": {
1097 |         "pct_latest_version": "FF58"
1098 |       },
1099 |       "date": "2018-01-23"
1100 |     },
1101 |     {
1102 |       "annotation": {
1103 |         "pct_latest_version": "FF59"
1104 |       },
1105 |       "date": "2018-03-13"
1106 |     },
1107 |     {
1108 |       "annotation": {
1109 |         "pct_latest_version": "FF60"
1110 |       },
1111 |       "date": "2018-05-09"
1112 |     },
1113 |     {
1114 |       "annotation": {
1115 |         "pct_latest_version": "FF61"
1116 |       },
1117 |       "date": "2018-06-26"
1118 |     },
1119 |     {
1120 |       "annotation": {
1121 |         "MAU": "Summer Slump"
1122 |       },
1123 |       "date": "2018-08-27"
1124 |     },
1125 |     {
1126 |       "annotation": {
1127 |         "pct_latest_version": "FF62"
1128 |       },
1129 |       "date": "2018-09-05"
1130 |     },
1131 |     {
1132 |       "annotation": {
1133 |         "pct_latest_version": "FF63"
1134 |       },
1135 |       "date": "2018-10-23"
1136 |     },
1137 |     {
1138 |       "annotation": {
1139 |         "pct_latest_version": "FF64"
1140 |       },
1141 |       "date": "2018-12-11"
1142 |     },
1143 |     {
1144 |       "annotation": {
1145 |         "MAU": "Winter Holidays"
1146 |       },
1147 |       "date": "2019-01-01"
1148 |     },
1149 |     {
1150 |       "annotation": {
1151 |         "pct_latest_version": "FF65"
1152 |       },
1153 |       "date": "2019-01-29"
1154 |     },
1155 |     {
1156 |       "annotation": {
1157 |         "pct_latest_version": "FF66"
1158 |       },
1159 |       "date": "2019-03-19"
1160 |     },
1161 |     {
1162 |       "annotation": {
1163 |         "MAU": "data deleted (addons outage)"
1164 |       },
1165 |       "date": "2019-05-05"
1166 |     },
1167 |     {
1168 |       "annotation": {
1169 |         "YAU": "data deleted (addons outage)"
1170 |       },
1171 |       "date": "2019-05-05"
1172 |     },
1173 |     {
1174 |       "annotation": {
1175 |         "avg_daily_usage(hours)": "data deleted (addons outage)"
1176 |       },
1177 |       "date": "2019-05-05"
1178 |     },
1179 |     {
1180 |       "annotation": {
1181 |         "pct_latest_version": "FF67"
1182 |       },
1183 |       "date": "2019-05-21"
1184 |     },
1185 |     {
1186 |       "annotation": {
1187 |         "pct_latest_version": "FF68"
1188 |       },
1189 |       "date": "2019-07-09"
1190 |     },
1191 |     {
1192 |       "annotation": {
1193 |         "pct_latest_version": "FF69"
1194 |       },
1195 |       "date": "2019-09-03"
1196 |     },
1197 |     {
1198 |       "annotation": {
1199 |         "pct_latest_version": "FF70"
1200 |       },
1201 |       "date": "2019-10-22"
1202 |     },
1203 |     {
1204 |       "annotation": {
1205 |         "pct_latest_version": "FF71"
1206 |       },
1207 |       "date": "2019-12-03"
1208 |     },
1209 |     {
1210 |       "annotation": {
1211 |         "pct_latest_version": "FF72"
1212 |       },
1213 |       "date": "2020-01-07"
1214 |     },
1215 |     {
1216 |       "annotation": {
1217 |         "pct_latest_version": "FF73"
1218 |       },
1219 |       "date": "2020-02-11"
1220 |     },
1221 |     {
1222 |       "annotation": {
1223 |         "pct_latest_version": "FF74"
1224 |       },
1225 |       "date": "2020-03-10"
1226 |     }
1227 |   ],
1228 |   "Poland": [
1229 |     {
1230 |       "annotation": {
1231 |         "pct_latest_version": "FF53"
1232 |       },
1233 |       "date": "2017-04-19"
1234 |     },
1235 |     {
1236 |       "annotation": {
1237 |         "pct_latest_version": "FF54"
1238 |       },
1239 |       "date": "2017-06-13"
1240 |     },
1241 |     {
1242 |       "annotation": {
1243 |         "pct_latest_version": "FF55"
1244 |       },
1245 |       "date": "2017-08-08"
1246 |     },
1247 |     {
1248 |       "annotation": {
1249 |         "MAU": "Summer Slump"
1250 |       },
1251 |       "date": "2017-08-27"
1252 |     },
1253 |     {
1254 |       "annotation": {
1255 |         "pct_latest_version": "FF56"
1256 |       },
1257 |       "date": "2017-09-28"
1258 |     },
1259 |     {
1260 |       "annotation": {
1261 |         "pct_latest_version": "FF57"
1262 |       },
1263 |       "date": "2017-11-14"
1264 |     },
1265 |     {
1266 |       "annotation": {
1267 |         "pct_latest_version": "FF58"
1268 |       },
1269 |       "date": "2018-01-23"
1270 |     },
1271 |     {
1272 |       "annotation": {
1273 |         "pct_latest_version": "FF59"
1274 |       },
1275 |       "date": "2018-03-13"
1276 |     },
1277 |     {
1278 |       "annotation": {
1279 |         "pct_latest_version": "FF60"
1280 |       },
1281 |       "date": "2018-05-09"
1282 |     },
1283 |     {
1284 |       "annotation": {
1285 |         "pct_latest_version": "FF61"
1286 |       },
1287 |       "date": "2018-06-26"
1288 |     },
1289 |     {
1290 |       "annotation": {
1291 |         "MAU": "Summer Slump"
1292 |       },
1293 |       "date": "2018-08-27"
1294 |     },
1295 |     {
1296 |       "annotation": {
1297 |         "pct_latest_version": "FF62"
1298 |       },
1299 |       "date": "2018-09-05"
1300 |     },
1301 |     {
1302 |       "annotation": {
1303 |         "pct_latest_version": "FF63"
1304 |       },
1305 |       "date": "2018-10-23"
1306 |     },
1307 |     {
1308 |       "annotation": {
1309 |         "pct_latest_version": "FF64"
1310 |       },
1311 |       "date": "2018-12-11"
1312 |     },
1313 |     {
1314 |       "annotation": {
1315 |         "MAU": "Winter Holidays"
1316 |       },
1317 |       "date": "2019-01-01"
1318 |     },
1319 |     {
1320 |       "annotation": {
1321 |         "pct_latest_version": "FF65"
1322 |       },
1323 |       "date": "2019-01-29"
1324 |     },
1325 |     {
1326 |       "annotation": {
1327 |         "pct_latest_version": "FF66"
1328 |       },
1329 |       "date": "2019-03-19"
1330 |     },
1331 |     {
1332 |       "annotation": {
1333 |         "MAU": "data deleted (addons outage)"
1334 |       },
1335 |       "date": "2019-05-05"
1336 |     },
1337 |     {
1338 |       "annotation": {
1339 |         "YAU": "data deleted (addons outage)"
1340 |       },
1341 |       "date": "2019-05-05"
1342 |     },
1343 |     {
1344 |       "annotation": {
1345 |         "avg_daily_usage(hours)": "data deleted (addons outage)"
1346 |       },
1347 |       "date": "2019-05-05"
1348 |     },
1349 |     {
1350 |       "annotation": {
1351 |         "pct_latest_version": "FF67"
1352 |       },
1353 |       "date": "2019-05-21"
1354 |     },
1355 |     {
1356 |       "annotation": {
1357 |         "pct_latest_version": "FF68"
1358 |       },
1359 |       "date": "2019-07-09"
1360 |     },
1361 |     {
1362 |       "annotation": {
1363 |         "pct_latest_version": "FF69"
1364 |       },
1365 |       "date": "2019-09-03"
1366 |     },
1367 |     {
1368 |       "annotation": {
1369 |         "pct_latest_version": "FF70"
1370 |       },
1371 |       "date": "2019-10-22"
1372 |     },
1373 |     {
1374 |       "annotation": {
1375 |         "pct_latest_version": "FF71"
1376 |       },
1377 |       "date": "2019-12-03"
1378 |     },
1379 |     {
1380 |       "annotation": {
1381 |         "pct_latest_version": "FF72"
1382 |       },
1383 |       "date": "2020-01-07"
1384 |     },
1385 |     {
1386 |       "annotation": {
1387 |         "pct_latest_version": "FF73"
1388 |       },
1389 |       "date": "2020-02-11"
1390 |     },
1391 |     {
1392 |       "annotation": {
1393 |         "pct_latest_version": "FF74"
1394 |       },
1395 |       "date": "2020-03-10"
1396 |     }
1397 |   ],
1398 |   "Russia": [
1399 |     {
1400 |       "annotation": {
1401 |         "pct_latest_version": "FF53"
1402 |       },
1403 |       "date": "2017-04-19"
1404 |     },
1405 |     {
1406 |       "annotation": {
1407 |         "pct_latest_version": "FF54"
1408 |       },
1409 |       "date": "2017-06-13"
1410 |     },
1411 |     {
1412 |       "annotation": {
1413 |         "pct_latest_version": "FF55"
1414 |       },
1415 |       "date": "2017-08-08"
1416 |     },
1417 |     {
1418 |       "annotation": {
1419 |         "MAU": "Summer Slump"
1420 |       },
1421 |       "date": "2017-08-13"
1422 |     },
1423 |     {
1424 |       "annotation": {
1425 |         "pct_latest_version": "FF56"
1426 |       },
1427 |       "date": "2017-09-28"
1428 |     },
1429 |     {
1430 |       "annotation": {
1431 |         "pct_latest_version": "FF57"
1432 |       },
1433 |       "date": "2017-11-14"
1434 |     },
1435 |     {
1436 |       "annotation": {
1437 |         "MAU": "Winter Holidays"
1438 |       },
1439 |       "date": "2018-01-21"
1440 |     },
1441 |     {
1442 |       "annotation": {
1443 |         "pct_latest_version": "FF58"
1444 |       },
1445 |       "date": "2018-01-23"
1446 |     },
1447 |     {
1448 |       "annotation": {
1449 |         "pct_latest_version": "FF59"
1450 |       },
1451 |       "date": "2018-03-13"
1452 |     },
1453 |     {
1454 |       "annotation": {
1455 |         "pct_latest_version": "FF60"
1456 |       },
1457 |       "date": "2018-05-09"
1458 |     },
1459 |     {
1460 |       "annotation": {
1461 |         "pct_latest_version": "FF61"
1462 |       },
1463 |       "date": "2018-06-26"
1464 |     },
1465 |     {
1466 |       "annotation": {
1467 |         "MAU": "Summer Slump"
1468 |       },
1469 |       "date": "2018-08-13"
1470 |     },
1471 |     {
1472 |       "annotation": {
1473 |         "pct_latest_version": "FF62"
1474 |       },
1475 |       "date": "2018-09-05"
1476 |     },
1477 |     {
1478 |       "annotation": {
1479 |         "pct_latest_version": "FF63"
1480 |       },
1481 |       "date": "2018-10-23"
1482 |     },
1483 |     {
1484 |       "annotation": {
1485 |         "pct_latest_version": "FF64"
1486 |       },
1487 |       "date": "2018-12-11"
1488 |     },
1489 |     {
1490 |       "annotation": {
1491 |         "MAU": "Winter Holidays"
1492 |       },
1493 |       "date": "2019-01-01"
1494 |     },
1495 |     {
1496 |       "annotation": {
1497 |         "pct_latest_version": "FF65"
1498 |       },
1499 |       "date": "2019-01-29"
1500 |     },
1501 |     {
1502 |       "annotation": {
1503 |         "pct_latest_version": "FF66"
1504 |       },
1505 |       "date": "2019-03-19"
1506 |     },
1507 |     {
1508 |       "annotation": {
1509 |         "MAU": "data deleted (addons outage)"
1510 |       },
1511 |       "date": "2019-05-05"
1512 |     },
1513 |     {
1514 |       "annotation": {
1515 |         "YAU": "data deleted (addons outage)"
1516 |       },
1517 |       "date": "2019-05-05"
1518 |     },
1519 |     {
1520 |       "annotation": {
1521 |         "avg_daily_usage(hours)": "data deleted (addons outage)"
1522 |       },
1523 |       "date": "2019-05-05"
1524 |     },
1525 |     {
1526 |       "annotation": {
1527 |         "pct_latest_version": "FF67"
1528 |       },
1529 |       "date": "2019-05-21"
1530 |     },
1531 |     {
1532 |       "annotation": {
1533 |         "pct_latest_version": "FF68"
1534 |       },
1535 |       "date": "2019-07-09"
1536 |     },
1537 |     {
1538 |       "annotation": {
1539 |         "pct_latest_version": "FF69"
1540 |       },
1541 |       "date": "2019-09-03"
1542 |     },
1543 |     {
1544 |       "annotation": {
1545 |         "pct_latest_version": "FF70"
1546 |       },
1547 |       "date": "2019-10-22"
1548 |     },
1549 |     {
1550 |       "annotation": {
1551 |         "pct_latest_version": "FF71"
1552 |       },
1553 |       "date": "2019-12-03"
1554 |     },
1555 |     {
1556 |       "annotation": {
1557 |         "pct_latest_version": "FF72"
1558 |       },
1559 |       "date": "2020-01-07"
1560 |     },
1561 |     {
1562 |       "annotation": {
1563 |         "pct_latest_version": "FF73"
1564 |       },
1565 |       "date": "2020-02-11"
1566 |     },
1567 |     {
1568 |       "annotation": {
1569 |         "pct_latest_version": "FF74"
1570 |       },
1571 |       "date": "2020-03-10"
1572 |     }
1573 |   ],
1574 |   "United States": [
1575 |     {
1576 |       "annotation": {
1577 |         "pct_latest_version": "FF53"
1578 |       },
1579 |       "date": "2017-04-19"
1580 |     },
1581 |     {
1582 |       "annotation": {
1583 |         "pct_latest_version": "FF54"
1584 |       },
1585 |       "date": "2017-06-13"
1586 |     },
1587 |     {
1588 |       "annotation": {
1589 |         "MAU": "Summer Slump"
1590 |       },
1591 |       "date": "2017-07-30"
1592 |     },
1593 |     {
1594 |       "annotation": {
1595 |         "pct_latest_version": "FF55"
1596 |       },
1597 |       "date": "2017-08-08"
1598 |     },
1599 |     {
1600 |       "annotation": {
1601 |         "pct_latest_version": "FF56"
1602 |       },
1603 |       "date": "2017-09-28"
1604 |     },
1605 |     {
1606 |       "annotation": {
1607 |         "pct_latest_version": "FF57"
1608 |       },
1609 |       "date": "2017-11-14"
1610 |     },
1611 |     {
1612 |       "annotation": {
1613 |         "MAU": "Winter Holidays"
1614 |       },
1615 |       "date": "2018-01-14"
1616 |     },
1617 |     {
1618 |       "annotation": {
1619 |         "pct_latest_version": "FF58"
1620 |       },
1621 |       "date": "2018-01-23"
1622 |     },
1623 |     {
1624 |       "annotation": {
1625 |         "pct_latest_version": "FF59"
1626 |       },
1627 |       "date": "2018-03-13"
1628 |     },
1629 |     {
1630 |       "annotation": {
1631 |         "pct_latest_version": "FF60"
1632 |       },
1633 |       "date": "2018-05-09"
1634 |     },
1635 |     {
1636 |       "annotation": {
1637 |         "pct_latest_version": "FF61"
1638 |       },
1639 |       "date": "2018-06-26"
1640 |     },
1641 |     {
1642 |       "annotation": {
1643 |         "MAU": "Summer Slump"
1644 |       },
1645 |       "date": "2018-07-30"
1646 |     },
1647 |     {
1648 |       "annotation": {
1649 |         "pct_latest_version": "FF62"
1650 |       },
1651 |       "date": "2018-09-05"
1652 |     },
1653 |     {
1654 |       "annotation": {
1655 |         "pct_latest_version": "FF63"
1656 |       },
1657 |       "date": "2018-10-23"
1658 |     },
1659 |     {
1660 |       "annotation": {
1661 |         "pct_latest_version": "FF64"
1662 |       },
1663 |       "date": "2018-12-11"
1664 |     },
1665 |     {
1666 |       "annotation": {
1667 |         "MAU": "Winter Holidays"
1668 |       },
1669 |       "date": "2019-01-01"
1670 |     },
1671 |     {
1672 |       "annotation": {
1673 |         "pct_latest_version": "FF65"
1674 |       },
1675 |       "date": "2019-01-29"
1676 |     },
1677 |     {
1678 |       "annotation": {
1679 |         "pct_latest_version": "FF66"
1680 |       },
1681 |       "date": "2019-03-19"
1682 |     },
1683 |     {
1684 |       "annotation": {
1685 |         "MAU": "data deleted (addons outage)"
1686 |       },
1687 |       "date": "2019-05-05"
1688 |     },
1689 |     {
1690 |       "annotation": {
1691 |         "YAU": "data deleted (addons outage)"
1692 |       },
1693 |       "date": "2019-05-05"
1694 |     },
1695 |     {
1696 |       "annotation": {
1697 |         "avg_daily_usage(hours)": "data deleted (addons outage)"
1698 |       },
1699 |       "date": "2019-05-05"
1700 |     },
1701 |     {
1702 |       "annotation": {
1703 |         "pct_latest_version": "FF67"
1704 |       },
1705 |       "date": "2019-05-21"
1706 |     },
1707 |     {
1708 |       "annotation": {
1709 |         "pct_latest_version": "FF68"
1710 |       },
1711 |       "date": "2019-07-09"
1712 |     },
1713 |     {
1714 |       "annotation": {
1715 |         "pct_latest_version": "FF69"
1716 |       },
1717 |       "date": "2019-09-03"
1718 |     },
1719 |     {
1720 |       "annotation": {
1721 |         "pct_latest_version": "FF70"
1722 |       },
1723 |       "date": "2019-10-22"
1724 |     },
1725 |     {
1726 |       "annotation": {
1727 |         "pct_latest_version": "FF71"
1728 |       },
1729 |       "date": "2019-12-03"
1730 |     },
1731 |     {
1732 |       "annotation": {
1733 |         "pct_latest_version": "FF72"
1734 |       },
1735 |       "date": "2020-01-07"
1736 |     },
1737 |     {
1738 |       "annotation": {
1739 |         "pct_latest_version": "FF73"
1740 |       },
1741 |       "date": "2020-02-11"
1742 |     },
1743 |     {
1744 |       "annotation": {
1745 |         "pct_latest_version": "FF74"
1746 |       },
1747 |       "date": "2020-03-10"
1748 |     }
1749 |   ],
1750 |   "Worldwide": [
1751 |     {
1752 |       "annotation": {
1753 |         "pct_latest_version": "FF53"
1754 |       },
1755 |       "date": "2017-04-19"
1756 |     },
1757 |     {
1758 |       "annotation": {
1759 |         "pct_latest_version": "FF54"
1760 |       },
1761 |       "date": "2017-06-13"
1762 |     },
1763 |     {
1764 |       "annotation": {
1765 |         "pct_latest_version": "FF55"
1766 |       },
1767 |       "date": "2017-08-08"
1768 |     },
1769 |     {
1770 |       "annotation": {
1771 |         "MAU": "Summer Slump"
1772 |       },
1773 |       "date": "2017-08-20"
1774 |     },
1775 |     {
1776 |       "annotation": {
1777 |         "pct_latest_version": "FF56"
1778 |       },
1779 |       "date": "2017-09-28"
1780 |     },
1781 |     {
1782 |       "annotation": {
1783 |         "pct_latest_version": "FF57"
1784 |       },
1785 |       "date": "2017-11-14"
1786 |     },
1787 |     {
1788 |       "annotation": {
1789 |         "MAU": "Winter Holidays"
1790 |       },
1791 |       "date": "2018-01-14"
1792 |     },
1793 |     {
1794 |       "annotation": {
1795 |         "pct_latest_version": "FF58"
1796 |       },
1797 |       "date": "2018-01-23"
1798 |     },
1799 |     {
1800 |       "annotation": {
1801 |         "pct_latest_version": "FF59"
1802 |       },
1803 |       "date": "2018-03-13"
1804 |     },
1805 |     {
1806 |       "annotation": {
1807 |         "pct_latest_version": "FF60"
1808 |       },
1809 |       "date": "2018-05-09"
1810 |     },
1811 |     {
1812 |       "annotation": {
1813 |         "pct_latest_version": "FF61"
1814 |       },
1815 |       "date": "2018-06-26"
1816 |     },
1817 |     {
1818 |       "annotation": {
1819 |         "MAU": "Summer Slump"
1820 |       },
1821 |       "date": "2018-08-20"
1822 |     },
1823 |     {
1824 |       "annotation": {
1825 |         "pct_latest_version": "FF62"
1826 |       },
1827 |       "date": "2018-09-05"
1828 |     },
1829 |     {
1830 |       "annotation": {
1831 |         "pct_latest_version": "FF63"
1832 |       },
1833 |       "date": "2018-10-23"
1834 |     },
1835 |     {
1836 |       "annotation": {
1837 |         "pct_latest_version": "FF64"
1838 |       },
1839 |       "date": "2018-12-11"
1840 |     },
1841 |     {
1842 |       "annotation": {
1843 |         "MAU": "Winter Holidays"
1844 |       },
1845 |       "date": "2019-01-01"
1846 |     },
1847 |     {
1848 |       "annotation": {
1849 |         "pct_latest_version": "FF65"
1850 |       },
1851 |       "date": "2019-01-29"
1852 |     },
1853 |     {
1854 |       "annotation": {
1855 |         "pct_latest_version": "FF66"
1856 |       },
1857 |       "date": "2019-03-19"
1858 |     },
1859 |     {
1860 |       "annotation": {
1861 |         "MAU": "data deleted (addons outage)"
1862 |       },
1863 |       "date": "2019-05-05"
1864 |     },
1865 |     {
1866 |       "annotation": {
1867 |         "YAU": "data deleted (addons outage)"
1868 |       },
1869 |       "date": "2019-05-05"
1870 |     },
1871 |     {
1872 |       "annotation": {
1873 |         "avg_daily_usage(hours)": "data deleted (addons outage)"
1874 |       },
1875 |       "date": "2019-05-05"
1876 |     },
1877 |     {
1878 |       "annotation": {
1879 |         "pct_latest_version": "FF67"
1880 |       },
1881 |       "date": "2019-05-21"
1882 |     },
1883 |     {
1884 |       "annotation": {
1885 |         "pct_latest_version": "FF68"
1886 |       },
1887 |       "date": "2019-07-09"
1888 |     },
1889 |     {
1890 |       "annotation": {
1891 |         "pct_latest_version": "FF69"
1892 |       },
1893 |       "date": "2019-09-03"
1894 |     },
1895 |     {
1896 |       "annotation": {
1897 |         "pct_latest_version": "FF70"
1898 |       },
1899 |       "date": "2019-10-22"
1900 |     },
1901 |     {
1902 |       "annotation": {
1903 |         "pct_latest_version": "FF71"
1904 |       },
1905 |       "date": "2019-12-03"
1906 |     },
1907 |     {
1908 |       "annotation": {
1909 |         "pct_latest_version": "FF72"
1910 |       },
1911 |       "date": "2020-01-07"
1912 |     },
1913 |     {
1914 |       "annotation": {
1915 |         "pct_latest_version": "FF73"
1916 |       },
1917 |       "date": "2020-02-11"
1918 |     },
1919 |     {
1920 |       "annotation": {
1921 |         "pct_latest_version": "FF74"
1922 |       },
1923 |       "date": "2020-03-10"
1924 |     }
1925 |   ]
1926 | }
1927 | 


--------------------------------------------------------------------------------
/usage_report/annotations/annotations_hardware.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "default": [
 3 |     {
 4 |       "annotation": {
 5 |         "cpuCores": "XP and Vista leave dataset",
 6 |         "cpuSpeed": "XP and Vista leave dataset",
 7 |         "cpuVendor": "XP and Vista leave dataset",
 8 |         "gpuModel": "XP and Vista leave dataset",
 9 |         "gpuVendor": "XP and Vista leave dataset",
10 |         "hasFlash": "XP and Vista leave dataset",
11 |         "osArch": "XP and Vista leave dataset",
12 |         "osName": "XP and Vista leave dataset",
13 |         "ram": "XP and Vista leave dataset",
14 |         "resolution": "XP and Vista leave dataset"
15 |       },
16 |       "date": "2017-03-05"
17 |     },
18 |     {
19 |       "annotation": {
20 |         "browserArch": "64-bit updates unthrottled on Win7+ for 2GB+ users"
21 |       },
22 |       "date": "2017-10-22"
23 |     }
24 |   ]
25 | }
26 | 


--------------------------------------------------------------------------------
/usage_report/annotations/annotations_webusage.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "Brazil": [
  3 |     {
  4 |       "annotation": {
  5 |         "pct_TP": "FF57",
  6 |         "pct_addon": "legacy addons disabled"
  7 |       },
  8 |       "date": "2017-11-14"
  9 |     },
 10 |     {
 11 |       "annotation": {
 12 |         "pct_addon": "data deleted (addons outage)"
 13 |       },
 14 |       "date": "2019-05-05"
 15 |     }
 16 |   ],
 17 |   "China": [
 18 |     {
 19 |       "annotation": {
 20 |         "pct_TP": "FF57",
 21 |         "pct_addon": "legacy addons disabled"
 22 |       },
 23 |       "date": "2017-11-14"
 24 |     },
 25 |     {
 26 |       "annotation": {
 27 |         "pct_addon": "data deleted (addons outage)"
 28 |       },
 29 |       "date": "2019-05-05"
 30 |     }
 31 |   ],
 32 |   "France": [
 33 |     {
 34 |       "annotation": {
 35 |         "pct_TP": "FF57",
 36 |         "pct_addon": "legacy addons disabled"
 37 |       },
 38 |       "date": "2017-11-14"
 39 |     },
 40 |     {
 41 |       "annotation": {
 42 |         "pct_addon": "data deleted (addons outage)"
 43 |       },
 44 |       "date": "2019-05-05"
 45 |     }
 46 |   ],
 47 |   "Germany": [
 48 |     {
 49 |       "annotation": {
 50 |         "pct_TP": "FF57",
 51 |         "pct_addon": "legacy addons disabled"
 52 |       },
 53 |       "date": "2017-11-14"
 54 |     },
 55 |     {
 56 |       "annotation": {
 57 |         "pct_addon": "data deleted (addons outage)"
 58 |       },
 59 |       "date": "2019-05-05"
 60 |     }
 61 |   ],
 62 |   "India": [
 63 |     {
 64 |       "annotation": {
 65 |         "pct_TP": "FF57",
 66 |         "pct_addon": "legacy addons disabled"
 67 |       },
 68 |       "date": "2017-11-14"
 69 |     },
 70 |     {
 71 |       "annotation": {
 72 |         "pct_addon": "data deleted (addons outage)"
 73 |       },
 74 |       "date": "2019-05-05"
 75 |     }
 76 |   ],
 77 |   "Indonesia": [
 78 |     {
 79 |       "annotation": {
 80 |         "pct_TP": "FF57",
 81 |         "pct_addon": "legacy addons disabled"
 82 |       },
 83 |       "date": "2017-11-14"
 84 |     },
 85 |     {
 86 |       "annotation": {
 87 |         "pct_addon": "data deleted (addons outage)"
 88 |       },
 89 |       "date": "2019-05-05"
 90 |     }
 91 |   ],
 92 |   "Italy": [
 93 |     {
 94 |       "annotation": {
 95 |         "pct_TP": "FF57",
 96 |         "pct_addon": "legacy addons disabled"
 97 |       },
 98 |       "date": "2017-11-14"
 99 |     },
100 |     {
101 |       "annotation": {
102 |         "pct_addon": "data deleted (addons outage)"
103 |       },
104 |       "date": "2019-05-05"
105 |     }
106 |   ],
107 |   "Poland": [
108 |     {
109 |       "annotation": {
110 |         "pct_TP": "FF57",
111 |         "pct_addon": "legacy addons disabled"
112 |       },
113 |       "date": "2017-11-14"
114 |     },
115 |     {
116 |       "annotation": {
117 |         "pct_addon": "data deleted (addons outage)"
118 |       },
119 |       "date": "2019-05-05"
120 |     }
121 |   ],
122 |   "Russia": [
123 |     {
124 |       "annotation": {
125 |         "pct_TP": "FF57",
126 |         "pct_addon": "legacy addons disabled"
127 |       },
128 |       "date": "2017-11-14"
129 |     },
130 |     {
131 |       "annotation": {
132 |         "pct_addon": "data deleted (addons outage)"
133 |       },
134 |       "date": "2019-05-05"
135 |     }
136 |   ],
137 |   "United States": [
138 |     {
139 |       "annotation": {
140 |         "pct_TP": "FF57",
141 |         "pct_addon": "legacy addons disabled"
142 |       },
143 |       "date": "2017-11-14"
144 |     },
145 |     {
146 |       "annotation": {
147 |         "pct_addon": "data deleted (addons outage)"
148 |       },
149 |       "date": "2019-05-05"
150 |     }
151 |   ],
152 |   "Worldwide": [
153 |     {
154 |       "annotation": {
155 |         "pct_TP": "FF57",
156 |         "pct_addon": "legacy addons disabled"
157 |       },
158 |       "date": "2017-11-14"
159 |     },
160 |     {
161 |       "annotation": {
162 |         "pct_addon": "data deleted (addons outage)"
163 |       },
164 |       "date": "2019-05-05"
165 |     }
166 |   ]
167 | }
168 | 


--------------------------------------------------------------------------------
/usage_report/annotations/readme.md:
--------------------------------------------------------------------------------
 1 | # Annotations Structure/Format
 2 | 
 3 | Annotations should be json files in the following structure for [ensemble transposer](https://github.com/mozilla/ensemble-transposer) to read: 
 4 | 
 5 | ```json
 6 | {
 7 |   "country1": [
 8 |     {
 9 |       "annotation": {
10 |         "plot/metric1 name": "annotation text",
11 |         "plot/metric2 name": "etc"
12 |       },
13 |       "date": "some_date"
14 |     },
15 |     {
16 |       "annotation": {
17 |         "etc": "etc"
18 |       },
19 |       "date": "etc"
20 |     }
21 |   ],
22 |   "country2": [
23 |     "etc"
24 |   ]
25 | }
26 | ```
27 | 
28 | The keys "date" and "annotation" should always be named as such.
29 | 
30 | Note on formatting for human readability: json files can be human-readable formatted using [jq](https://stedolan.github.io/jq/), with the following command: 
31 | 
32 | ```
33 | jq --sort-keys . original.json > formatted.json
34 | ```


--------------------------------------------------------------------------------
/usage_report/usage_report.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | import os
  3 | from pyspark.sql import SparkSession
  4 | from pyspark.sql.functions import col
  5 | from utils.activeuser import getMAU, getYAU
  6 | from utils.avg_daily_usage import get_daily_avg_session
  7 | from utils.avg_intensity import get_avg_intensity
  8 | from utils.helpers import load_main_summary
  9 | from utils.localedistribution import locale_on_date
 10 | from utils.newuser import new_users
 11 | from utils.pct_addon import get_addon
 12 | from utils.pct_latest_version import pct_new_version
 13 | from utils.process_output import all_metrics_per_day, rename_keys, update_history
 14 | from utils.s3_utils import read_from_s3, write_to_s3
 15 | from utils.top10addons import top_10_addons_on_date
 16 | from utils.trackingprotection import pct_tracking_protection
 17 | 
 18 | # country names and mappings
 19 | # this list is formulated from
 20 | # https://sql.telemetry.mozilla.org/queries/51430/source
 21 | # may want to change
 22 | COUNTRY_NAME_MAPPINGS = {
 23 |     'All': 'Worldwide',
 24 |     'US': 'United States',
 25 |     'DE': 'Germany',
 26 |     'FR': 'France',
 27 |     'IN': 'India',
 28 |     'BR': 'Brazil',
 29 |     'CN': 'China',
 30 |     'ID': 'Indonesia',
 31 |     'RU': 'Russia',
 32 |     'IT': 'Italy',
 33 |     'PL': 'Poland'
 34 | }
 35 | 
 36 | TOP_TEN_COUNTRIES = list(COUNTRY_NAME_MAPPINGS.keys())
 37 | TOP_TEN_COUNTRIES.remove('All')
 38 | 
 39 | MASTER_VERSION = 'master'
 40 | ALLOWED_CHANNELS = [
 41 |     'release',
 42 |     'beta',
 43 |     'esr',
 44 |     'Other'
 45 | ]
 46 | 
 47 | DEFAULT_TZ = 'UTC'
 48 | 
 49 | ANNOTATIONS_DIR = os.path.join('usage_report', 'annotations')
 50 | ANNOTATIONS_SUFFIX = '.json'
 51 | 
 52 | 
 53 | def get_spark():
 54 |     spark = (SparkSession
 55 |              .builder
 56 |              .appName("usage_report")
 57 |              .getOrCreate())
 58 | 
 59 |     spark.conf.set('spark.sql.session.timeZone', DEFAULT_TZ)
 60 | 
 61 |     return spark
 62 | 
 63 | 
 64 | def agg_usage(data, **kwargs):
 65 |     date = kwargs['date']
 66 |     period = kwargs['period']
 67 |     country_list = kwargs['country_list']
 68 |     sample_factor = kwargs['sample_factor']
 69 | 
 70 |     avg_daily_session_length = get_daily_avg_session(data,
 71 |                                                      date,
 72 |                                                      period=period,
 73 |                                                      country_list=country_list)
 74 | 
 75 |     avg_daily_intensity = get_avg_intensity(data,
 76 |                                             date,
 77 |                                             period=period,
 78 |                                             country_list=country_list)
 79 | 
 80 |     pct_last_version = pct_new_version(data,
 81 |                                        date,
 82 |                                        period=period,
 83 |                                        country_list=country_list)
 84 | 
 85 |     # for mau and yau, start_date = date
 86 |     # since we only want ONE number for each week
 87 |     mau = getMAU(data,
 88 |                  date,
 89 |                  sample_factor=sample_factor,
 90 |                  country_list=country_list)
 91 | 
 92 |     yau = getYAU(data,
 93 |                  date,
 94 |                  sample_factor=sample_factor,
 95 |                  country_list=country_list)
 96 | 
 97 |     new_user_counts = new_users(data,
 98 |                                 date,
 99 |                                 period=period,
100 |                                 country_list=country_list)
101 | 
102 |     top10addon = top_10_addons_on_date(data,
103 |                                        date,
104 |                                        topN=10,
105 |                                        period=period,
106 |                                        country_list=country_list)
107 | 
108 |     has_addon = get_addon(data,
109 |                           date,
110 |                           period=period,
111 |                           country_list=country_list)
112 | 
113 |     locales = locale_on_date(data,
114 |                              date,
115 |                              topN=5,
116 |                              period=period,
117 |                              country_list=country_list)
118 | 
119 |     tracking_pro = pct_tracking_protection(data,
120 |                                            date,
121 |                                            period=period,
122 |                                            country_list=country_list)
123 | 
124 |     on = ['submission_date_s3', 'country']
125 |     usage = (avg_daily_session_length
126 |              .join(avg_daily_intensity, on=on)
127 |              .join(pct_last_version, on=on)
128 |              .join(mau, on=on)
129 |              .join(yau, on=on)
130 |              .join(new_user_counts, on=on)
131 |              .join(has_addon, on=on)
132 |              .join(tracking_pro, on=on))
133 | 
134 |     return usage, locales, top10addon
135 | 
136 | 
137 | @click.command()
138 | @click.option('--date', required=True)
139 | @click.option('--lag-days', default=7)
140 | @click.option('--sample', default=1, help='percent sample as int [1, 100]')
141 | @click.option('--no-output', default=False, is_flag=True)
142 | @click.option('--input-bucket', default='telemetry-parquet')
143 | @click.option('--input-prefix', default='main_summary')
144 | @click.option('--input-version', default='v4')
145 | @click.option('--output-bucket', default='telemetry-test-bucket')
146 | @click.option('--output-prefix', default='usage_report_data')  # TBD, this is a placeholder
147 | @click.option('--output-version', default='v1')  # TBD, this is a placeholder
148 | @click.option('--spark-provider', type=click.Choice(['emr', 'dataproc']), default='emr')
149 | def main(date, lag_days, sample, no_output, input_bucket, input_prefix, input_version,
150 |          output_bucket, output_prefix, output_version, spark_provider):
151 | 
152 |     spark = get_spark()
153 | 
154 |     # all counts will be multipled by sample_factor
155 |     sample_factor = 100.0 / sample
156 | 
157 |     # load main_summary with unbounded history, since YAU
158 |     # looks at past 365 days
159 |     ms = (
160 |         load_main_summary(spark, input_bucket, input_prefix, input_version, spark_provider)
161 |         .filter("submission_date_s3 <= '{}'".format(date))
162 |         .filter("sample_id < {}".format(sample))
163 |         .filter(col("normalized_channel").isin(ALLOWED_CHANNELS))
164 |         .filter("app_name = 'Firefox'"))
165 | 
166 |     usage, locales, top10addon = agg_usage(ms, date=date, period=lag_days,
167 |                                            sample_factor=sample_factor,
168 |                                            country_list=TOP_TEN_COUNTRIES)
169 |     usage.printSchema()
170 |     usage_df = usage.toPandas()
171 | 
172 |     locales.printSchema()
173 |     locales_df = locales.toPandas()
174 | 
175 |     top10addon.printSchema()
176 |     top10addon_df = top10addon.toPandas()
177 | 
178 |     print "Converting data to JSON"
179 |     fxhealth, webusage = all_metrics_per_day(TOP_TEN_COUNTRIES,
180 |                                              usage_df,
181 |                                              locales_df,
182 |                                              top10addon_df)
183 | 
184 |     # rename countries for presentation
185 |     fxhealth = rename_keys(fxhealth, COUNTRY_NAME_MAPPINGS)
186 |     webusage = rename_keys(webusage, COUNTRY_NAME_MAPPINGS)
187 |     print fxhealth
188 |     print webusage
189 | 
190 |     # get previous data
191 |     s3_key_prefix = output_prefix + '/' + output_version + '/{}/'
192 |     s3_key_fxhealth = s3_key_prefix + 'fxhealth.json'
193 |     s3_key_webusage = s3_key_prefix + 'webusage.json'
194 | 
195 |     old_fxhealth = read_from_s3(output_bucket, s3_key_fxhealth.format(MASTER_VERSION))
196 |     old_webusage = read_from_s3(output_bucket, s3_key_webusage.format(MASTER_VERSION))
197 | 
198 |     # update previous data
199 |     fxhealth_data_full = update_history(fxhealth, old_fxhealth)
200 |     webusage_data_full = update_history(webusage, old_webusage)
201 | 
202 |     if no_output:
203 |         print "no output generated due to user request"
204 |     else:
205 |         print "Writing new data to:", output_bucket
206 |         print s3_key_fxhealth.format(MASTER_VERSION)
207 |         print s3_key_webusage.format(MASTER_VERSION)
208 |         print "Writing old data to:", output_bucket
209 |         print s3_key_fxhealth.format(date)
210 |         print s3_key_webusage.format(date)
211 | 
212 |         # write historical data, indexed by date
213 |         write_to_s3(output_bucket, s3_key_fxhealth.format(date), old_fxhealth)
214 |         write_to_s3(output_bucket, s3_key_webusage.format(date), old_webusage)
215 | 
216 |         # write updated data
217 |         write_to_s3(output_bucket, s3_key_fxhealth.format(MASTER_VERSION), fxhealth_data_full)
218 |         write_to_s3(output_bucket, s3_key_webusage.format(MASTER_VERSION), webusage_data_full)
219 | 
220 |         # write annotations
221 |         annote_files = [
222 |             (f, os.path.join(ANNOTATIONS_DIR, f))
223 |             for f in os.listdir(ANNOTATIONS_DIR)
224 |             if f.endswith(ANNOTATIONS_SUFFIX)
225 |         ]
226 | 
227 |         for filename, full_path in annote_files:
228 |             with open(full_path, 'r') as f:
229 |                 data = f.read()
230 |             s3_path = s3_key_prefix.format(MASTER_VERSION) + filename
231 |             write_to_s3(output_bucket, s3_path, data)
232 | 
233 | 
234 | if __name__ == '__main__':
235 |     main()
236 | 


--------------------------------------------------------------------------------
/usage_report/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/Fx_Usage_Report/489ca258b14776c01f3021080b2dd686d239dea3/usage_report/utils/__init__.py


--------------------------------------------------------------------------------
/usage_report/utils/activeuser.py:
--------------------------------------------------------------------------------
 1 | from pyspark.sql.functions import lit, col, countDistinct
 2 | from helpers import date_plus_x_days
 3 | 
 4 | # to name columns based on period
 5 | PERIOD_DESC = {
 6 |     28: "MAU",
 7 |     365: "YAU",
 8 |     7: 'WAU'
 9 | }
10 | 
11 | 
12 | def getPAU(data, date, period, sample_factor=1, country_list=None):
13 |     """ Calculates the PAU for a given period for each time in epoch_times.
14 | 
15 |         This function is fast for finding the PAU for a small number of dates.
16 | 
17 |         Paramaters:
18 | 
19 |         data - This should be a sample of the main server ping data frame.
20 |         date - The day to calulate PAU for. This is given in epoch time.
21 |         period - The number of days that we count distinct number of users.
22 |                  For example MAU has a period = 28 and YAU has a period = 365.
23 |         sample_factor - the factor to multiply counts by, pre-calculated based
24 |                         on sample
25 |         country_list - A list of countries that we want to calculate the
26 |                        PAU for.
27 | 
28 |         Output:
29 | 
30 |         A data frame, this data frame has 3 columns
31 |             submission_date_s3, country, PAU(WAU/MAU/YAU).
32 |     """
33 |     def process_data(data, begin, date):
34 |         return (
35 |             data.filter(col('submission_date_s3') > begin)
36 |                 .filter(col('submission_date_s3') <= date)
37 |                 .groupBy('country')
38 |                 .agg((sample_factor * countDistinct('client_id')).alias(active_users_col))
39 |                 .select('*',
40 |                         lit(begin).alias(start_date_col),
41 |                         lit(date).alias('submission_date_s3')))
42 | 
43 |     data_all = data.drop('country').select('*', lit('All').alias('country'))
44 |     if country_list is not None:
45 |         data_country = data.filter(col('country').isin(country_list))
46 |     # define column names based on period
47 |     active_users_col = PERIOD_DESC.get(period, "other")
48 |     start_date_col = 'start_date_' + PERIOD_DESC.get(period, "other")
49 | 
50 |     begin = date_plus_x_days(date, -period)
51 | 
52 |     current_count = process_data(data_all, begin, date)
53 |     if country_list is not None:
54 |         df_country = process_data(data_country, begin, date)
55 |         current_count = current_count.union(df_country)
56 | 
57 |     return current_count.select('submission_date_s3', 'country', active_users_col)
58 | 
59 | 
60 | def getMAU(data, date, sample_factor=1, country_list=None):
61 |     """ Helper function for getPAU with period 28.
62 |     """
63 |     return getPAU(data, date, 28, sample_factor, country_list)
64 | 
65 | 
66 | def getYAU(data, date, sample_factor=1, country_list=None):
67 |     """ Helper function for getPAU with period 365.
68 |     """
69 |     return getPAU(data, date, 365, sample_factor, country_list)
70 | 


--------------------------------------------------------------------------------
/usage_report/utils/avg_daily_usage.py:
--------------------------------------------------------------------------------
 1 | from helpers import date_plus_x_days, keep_countries_and_all
 2 | 
 3 | from pyspark.sql.functions import lit
 4 | import pyspark.sql.functions as F
 5 | 
 6 | 
 7 | def get_daily_avg_session(
 8 |         data,
 9 |         date,
10 |         period=7,
11 |         country_list=None):
12 |     """ Calculate Average Daily usage of the last 7 days for a particular date
13 | 
14 |         Parameters:
15 |         data: sample of the main server ping data frame
16 |         date: string, with the format of 'yyyyMMdd'
17 |         country_list: a list of country names in string
18 | 
19 |         Returns:
20 |         a dataframe with four columns:
21 |             'submission_date_s3',
22 |             'country',
23 |             'avg_daily_usage(hours)'
24 |     """
25 | 
26 |     data_all = keep_countries_and_all(data, country_list)
27 |     begin = date_plus_x_days(date, -period)
28 | 
29 |     data_agg = data_all\
30 |         .filter("submission_date_s3 <= '{}' and submission_date_s3 > '{}'"
31 |                 .format(date, begin))\
32 |         .filter("subsession_length <= 86400") .filter("subsession_length > 0")\
33 |         .groupBy('country',
34 |                  'client_id',
35 |                  'submission_date_s3')\
36 |         .agg(F.sum('subsession_length').alias('total_daily_time'))\
37 |         .select('country',
38 |                 'client_id',
39 |                 'submission_date_s3',
40 |                 F.when(F.col('total_daily_time') > 86400, 86400)
41 |                  .otherwise(F.col('total_daily_time'))
42 |                  .alias('total_daily_time'))
43 | 
44 |     country_avg_session = data_agg\
45 |         .groupBy('country', 'client_id')\
46 |         .agg(F.avg('total_daily_time').alias('client_7d_avg'))\
47 |         .groupBy('country')\
48 |         .agg(F.avg('client_7d_avg').alias('avg_daily_subsession_length'))\
49 |         .select(lit(date).alias('submission_date_s3'), '*')
50 | 
51 |     df = country_avg_session.orderBy(
52 |         'submission_date_s3', 'country')
53 | 
54 |     df = df.withColumn(
55 |         'avg_daily_usage(hours)',
56 |         df.avg_daily_subsession_length / 3600)
57 | 
58 |     return df.select('submission_date_s3', 'country', 'avg_daily_usage(hours)')
59 | 


--------------------------------------------------------------------------------
/usage_report/utils/avg_intensity.py:
--------------------------------------------------------------------------------
 1 | from helpers import date_plus_x_days, keep_countries_and_all
 2 | 
 3 | from pyspark.sql.functions import col, lit
 4 | import pyspark.sql.functions as F
 5 | 
 6 | 
 7 | def get_avg_intensity(data, date, period=7, country_list=None):
 8 |     """ Calculate Average Intensity of the last 7 days for a particular date
 9 | 
10 |         Parameters:
11 |         data: sample of the main server ping data frame
12 |         date: string, with the format of 'yyyyMMdd'
13 |         period: The number of days before to run the analysis on.
14 |         country_list: a list of country names in string
15 | 
16 |         Returns:
17 |         a dataframe with three columns: 'submission_date_s3', 'country', 'avg_intensity'
18 |     """
19 |     data_all = keep_countries_and_all(data, country_list)
20 |     begin = date_plus_x_days(date, -period)
21 | 
22 |     data_agg = data_all\
23 |         .filter("submission_date_s3 <= '{0}' and submission_date_s3 > '{1}'".format(date, begin))\
24 |         .filter("subsession_length <= 86400")\
25 |         .filter("subsession_length > 0")\
26 |         .filter('active_ticks <= 17280')\
27 |         .groupBy('country', 'client_id', 'submission_date_s3')\
28 |         .agg(F.sum('subsession_length').alias('total_daily_time'),
29 |              F.sum('active_ticks').alias('total_daily_ticks'))\
30 |         .select('country',
31 |                 'client_id',
32 |                 'submission_date_s3',
33 |                 F.when(F.col('total_daily_time') > 86400, 86400)
34 |                  .otherwise(F.col('total_daily_time'))
35 |                  .alias('total_daily_time'),
36 |                 F.when(F.col('total_daily_ticks') > 17280, 17280)
37 |                  .otherwise(F.col('total_daily_ticks'))
38 |                  .alias('total_daily_ticks'))\
39 |         .select('*',
40 |                 (col('total_daily_ticks') * 5 / col('total_daily_time'))
41 |                 .alias('daily_intensity'))\
42 |         .select('country',
43 |                 'client_id',
44 |                 'submission_date_s3',
45 |                 F.when(F.col('daily_intensity') > 1, 1)
46 |                  .otherwise(F.col('daily_intensity'))
47 |                  .alias('daily_intensity'))
48 | 
49 |     country_avg_intensity = data_agg\
50 |         .groupBy('country', 'client_id')\
51 |         .agg(F.avg('daily_intensity').alias('avg_7d_intensity'))\
52 |         .groupBy('country')\
53 |         .agg(F.avg('avg_7d_intensity').alias('avg_intensity'))\
54 |         .select(lit(date).alias('submission_date_s3'), '*')
55 | 
56 |     df = country_avg_intensity.orderBy('submission_date_s3', 'country')
57 | 
58 |     return df
59 | 


--------------------------------------------------------------------------------
/usage_report/utils/helpers.py:
--------------------------------------------------------------------------------
 1 | import datetime as dt
 2 | import pyspark.sql.functions as F
 3 | 
 4 | 
 5 | def date_plus_x_days(date, x):
 6 |     '''
 7 |     '''
 8 | 
 9 |     new_date = dt.datetime.strptime(date, '%Y%m%d') + dt.timedelta(days=x)
10 |     return new_date.strftime('%Y%m%d')
11 | 
12 | 
13 | def keep_countries_and_all(data, country_list):
14 |     """ Takes the main ping server and makes a country `All` and keeps only countries
15 |         in country_list and All.
16 | 
17 |         Parameters:
18 |             data: The main ping server.
19 |             country_list: The list of countries to keep.
20 |     """
21 |     data_all = data.withColumn('country', F.lit('All'))
22 | 
23 |     if country_list is not None:
24 |         data_countries = data.filter(F.col('country').isin(country_list))
25 |         data_all = data_all.union(data_countries)
26 | 
27 |     return data_all
28 | 
29 | 
30 | def get_dest(bucket, prefix, version, spark_provider='emr', date=None, sample_id=None):
31 |     '''
32 |     Stiches together an s3 or gcs destination.
33 |     :param bucket: s3 or gcs bucket
34 |     :param prefix: s3 or gcs prefix (within bucket)
35 |     :param version: dataset version
36 |     :param spark_provider: either 'emr' or 'dataproc'
37 |     :return str ->
38 |     s3|gs://bucket/prefix/version/submission_date_s3=[date]/sample_id=[sid]
39 |     '''
40 | 
41 |     if spark_provider == 'dataproc':
42 |         storage_prefix = 'gs://'
43 |     else:
44 |         storage_prefix = 's3://'
45 | 
46 |     suffix = ''
47 |     if date is not None:
48 |         suffix += "/submission_date_s3={}".format(date)
49 |     if sample_id is not None:
50 |         suffix += "/sample_id={}".format(sample_id)
51 |     full_dest = storage_prefix + '/'.join([bucket, prefix, version]) + suffix + '/'
52 |     return full_dest
53 | 
54 | 
55 | def load_main_summary(spark, input_bucket, input_prefix, input_version, spark_provider='emr'):
56 |     '''
57 |     Loads main_summary from the bucket constructed from
58 |     input_bucket, input_prefix, input_version
59 |     :param spark: SparkSession object
60 |     :param input_bucket: s3 bucket (telemetry-parquet)
61 |     :param input_prefix: s3 prefix (main_summary)
62 |     :param input_version: dataset version (v4)
63 |     :param spark_provider: either 'emr' or 'dataproc'
64 |     :return SparkDF
65 |     '''
66 | 
67 |     dest = get_dest(input_bucket, input_prefix, input_version, spark_provider)
68 |     return (spark
69 |             .read
70 |             .option("mergeSchema", True)
71 |             .parquet(dest))
72 | 


--------------------------------------------------------------------------------
/usage_report/utils/localedistribution.py:
--------------------------------------------------------------------------------
 1 | from pyspark.sql.functions import lit, col, desc, countDistinct
 2 | from pyspark.sql import Window
 3 | import pyspark.sql.functions as F
 4 | from helpers import date_plus_x_days, keep_countries_and_all
 5 | 
 6 | 
 7 | def locale_on_date(data, date, topN, period=7, country_list=None):
 8 |     """ Gets the ratio of the top locales in each country over the last week.
 9 | 
10 |     parameters:
11 |         data: The main ping server
12 |         date: The date to find the locale distribution
13 |         topN: The number of locales to get for each country. Only does the top N.
14 |         period: The number of days before looked at in the analyisis
15 |         country_list: The list to find look at in the analysis
16 | 
17 |     output:
18 |        dataframe with columns:
19 |            ['country', 'submission_date_s3', 'locale', 'pct_on_locale']
20 |     """
21 |     data_all = keep_countries_and_all(data, country_list)
22 |     begin = date_plus_x_days(date, -period)
23 | 
24 |     wau = data_all\
25 |         .filter((col('submission_date_s3') <= date) & (col('submission_date_s3') > begin))\
26 |         .groupBy('country')\
27 |         .agg(countDistinct('client_id').alias('WAU'))
28 | 
29 |     locale_wau = data_all\
30 |         .filter((col('submission_date_s3') <= date) & (col('submission_date_s3') > begin))\
31 |         .groupBy('country', 'locale')\
32 |         .agg(countDistinct('client_id').alias('WAU_on_locale'))\
33 |         .select(lit(begin).alias('start_date'), lit(date).alias('submission_date_s3'),
34 |                 'country', 'WAU_on_locale', 'locale')
35 | 
36 |     res = locale_wau.join(wau, 'country', how='left')\
37 |         .select('start_date', 'submission_date_s3',
38 |                 'country', 'WAU_on_locale', 'locale', 'WAU')
39 | 
40 |     rank_window = Window.partitionBy('country', 'submission_date_s3').orderBy(desc('WAU_on_locale'))
41 | 
42 |     return res.select('*', F.row_number().over(rank_window).alias('rank'))\
43 |         .filter(col('rank') <= topN)\
44 |         .select('submission_date_s3', 'country', 'locale',
45 |                 (100.0 * col('WAU_on_locale') / col('WAU')).alias('pct_on_locale'))
46 | 


--------------------------------------------------------------------------------
/usage_report/utils/newuser.py:
--------------------------------------------------------------------------------
 1 | from activeuser import getPAU
 2 | from pyspark.sql.functions import col, lit, countDistinct, from_unixtime
 3 | from helpers import date_plus_x_days
 4 | 
 5 | 
 6 | def getWAU(data, date, country_list=None):
 7 |     """ Helper function for getPAU with period 7 days.
 8 |     """
 9 |     return getPAU(data, date, period=7, country_list=country_list)
10 | 
11 | 
12 | def new_users(data, date, period=7, country_list=None):
13 |     """Gets the percentage of WAU that are new users.
14 | 
15 |         Parameters:
16 | 
17 |         data - This should be the entire main server ping data frame.
18 |         date -  data to start calculating for
19 |         period - The number of days before looked at in the analysis
20 |         country_list - A list of countries that we want to calculate the
21 |                        PAU for.
22 | 
23 |         Returns:
24 |           A dataframe with columns
25 |             submission_date_s3, country, pct_new_users
26 |     """
27 | 
28 |     cols = ['submission_date_s3', 'client_id', 'profile_creation_date',
29 |             'country']
30 | 
31 |     wau = getWAU(data, date, country_list=country_list)
32 |     df = data.drop('country').select('*', lit('All').alias('country'))
33 | 
34 |     if country_list is not None:
35 |         df = (
36 |             df.select(cols).union(data.select(cols)
37 |                                   .filter(col('country').isin(country_list))))
38 |     begin = date_plus_x_days(date, -period)
39 |     new_profiles = (df.filter(df.submission_date_s3 <= date)
40 |                       .filter(df.submission_date_s3 > begin)
41 |                       .withColumn('pcd_str',
42 |                                   from_unixtime(col('profile_creation_date') * 24 * 60 * 60,
43 |                                                 format='yyyyMMdd'))
44 |                       .filter(col('pcd_str') <= date)
45 |                       .filter(col('pcd_str') > begin))
46 | 
47 |     new_user_counts = (
48 |           new_profiles
49 |           .groupBy('country')
50 |           .agg((countDistinct('client_id')).alias('new_users')))
51 | 
52 |     return wau.join(new_user_counts, on=['country'], how='left')\
53 |               .select('submission_date_s3',
54 |                       'country',
55 |                       (100.0 * col('new_users') / col('WAU')).alias('pct_new_user'))
56 | 


--------------------------------------------------------------------------------
/usage_report/utils/osdistribution.py:
--------------------------------------------------------------------------------
 1 | # deprecated for now to avoid overlap with FHR os
 2 | from pyspark.sql.functions import col, countDistinct, lit, when
 3 | from helpers import date_plus_x_days, keep_countries_and_all
 4 | 
 5 | 
 6 | def window_version(os_version):
 7 |     """
 8 |       Takes the Windows Kernel version number and
 9 |       produces the associated consumer windows version.
10 |     """
11 |     return when(os_version == '10.0', 'Windows 10')\
12 |         .when(os_version == '6.1', 'Windows 7')\
13 |         .when(os_version == '6.2', 'Windows 8')\
14 |         .when(os_version == '6.3', 'Windows 8')\
15 |         .when(os_version == '5.1', 'Windows XP')\
16 |         .when(os_version == '5.2', 'Windows XP')\
17 |         .when(os_version == '6.0', 'Windows Vista')\
18 |         .otherwise('Other Windows')
19 | 
20 | 
21 | def nice_os(os, os_version):
22 |     """ Splits the major windows versions up and keeps mac os x and linux combined."""
23 |     return when(os == 'Windows_NT', window_version(os_version))\
24 |         .when(os == 'Windows_95', 'Other Windows')\
25 |         .when(os == 'Windows_98', 'Other Windows')\
26 |         .when(os == "Darwin", "Mac OS X")\
27 |         .otherwise('Other')
28 | 
29 | 
30 | def os_on_date(data, date,  period=7, country_list=None):
31 |     """ Gets the distribution of OS usage calculated on the WAU on 1 day.
32 | 
33 |         Parameters:
34 |         data: Usually the main summary data frame.
35 |         date: day to get the os distribution for the past week.
36 |         period: The number of days to calculate the distibution. By default it finds os
37 |                 distribution over a week.
38 |         country_list: The countries to do the analysis. If None then it does it for the whole
39 |                       world.
40 | 
41 |         Returns:
42 |             submission_date_s3, country, os, pct_on_os
43 |        """
44 | 
45 |     data_all = keep_countries_and_all(data, country_list)
46 |     begin = date_plus_x_days(date, -period)
47 |     data_all = data_all.select('client_id', 'submission_date_s3', 'country',
48 |                                nice_os(col('os'), col('os_version')).alias('nice_os'))
49 | 
50 |     # Calculate the WAU
51 |     wau = data_all\
52 |         .filter((col('submission_date_s3') <= date) & (col('submission_date_s3') > begin))\
53 |         .groupBy('country')\
54 |         .agg(countDistinct('client_id').alias('WAU'))\
55 | 
56 |     os_wau = data_all\
57 |         .filter((col('submission_date_s3') <= date) &
58 |                 (col('submission_date_s3') > begin))\
59 |         .groupBy('country', 'nice_os')\
60 |         .agg(countDistinct('client_id').alias('WAU_on_OS'))\
61 |         .select(lit(begin).alias('start_date'), lit(date).alias('submission_date_s3'),
62 |                 'country', 'WAU_on_OS', 'nice_os')
63 | 
64 |     res = os_wau.join(wau, 'country', how='left')\
65 |                 .select('start_date', 'submission_date_s3',
66 |                         'country', 'WAU_on_OS', 'nice_os', 'WAU')
67 | 
68 |     return res.select('submission_date_s3', 'country', col('nice_os').alias('os'),
69 |                       (100.0 * col('WAU_on_OS') / col('WAU')).alias('pct_on_os'))
70 | 


--------------------------------------------------------------------------------
/usage_report/utils/pct_addon.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from helpers import date_plus_x_days, keep_countries_and_all
 3 | 
 4 | # from pyspark.sql.functions import col, lit, mean, split
 5 | import pyspark.sql.functions as F
 6 | 
 7 | 
 8 | def get_test_pilot_addons():
 9 |     '''
10 |     Fetches all the live test pilot experiments listed in
11 |     the experiments.json file.
12 |     returns a list of addon_ids
13 |     '''
14 |     file_path = "usage_report/resources/experiments.json"
15 |     with open(file_path) as f:
16 |         data = json.load(f)
17 |     all_tp_addons = ["@testpilot-addon"] + [i.get("addon_id")
18 |                                             for i in data['results']
19 |                                             if i.get("addon_id")]
20 |     return all_tp_addons
21 | 
22 | 
23 | # grab all tp addons without a mozilla suffix
24 | NON_MOZ_TP = [i for i in get_test_pilot_addons() if "@mozilla" not in i]
25 | 
26 | # this study is everywhere
27 | UNIFIED_SEARCH_STR = '@unified-urlbar-shield-study-'
28 | 
29 | 
30 | def get_addon(data,
31 |               date,
32 |               period=7,
33 |               country_list=None):
34 |     """ Calculate the proportion of WAU that have a "self installed" addon for a specific date
35 | 
36 |         Parameters:
37 |             data: sample of the main server ping data frame
38 |             date: string, with the format of 'yyyyMMdd'
39 |             period: The number of days before looked at in the analysis
40 |             country_list: a list of country names in string
41 | 
42 |         Returns:
43 |             a dataframe showing all the information for each date in the period
44 |               - three columns: 'submission_date_s3', 'country', 'pct_Addon'
45 |     """
46 | 
47 |     data_all = keep_countries_and_all(data, country_list)
48 |     begin = date_plus_x_days(date, -period)
49 | 
50 |     addon_filter = (~F.col('addon.is_system')) & (~F.col('addon.foreign_install')) &\
51 |                    (~F.col('addon.addon_id').isin(NON_MOZ_TP)) &\
52 |                    (~F.col('addon.addon_id').like('%@mozilla%')) &\
53 |                    (~F.col('addon.addon_id').like('%@shield.mozilla%')) &\
54 |                    (~F.col('addon.addon_id').like('%' + UNIFIED_SEARCH_STR + '%'))
55 | 
56 |     WAU = data_all\
57 |         .filter("submission_date_s3 <= '{0}' and submission_date_s3 > '{1}'".format(date, begin))\
58 |         .groupBy('country')\
59 |         .agg(F.countDistinct('client_id').alias('WAU'))
60 | 
61 |     addon_count = data_all\
62 |         .filter("submission_date_s3 <= '{0}' and submission_date_s3 > '{1}'".format(date, begin))\
63 |         .select('submission_date_s3', 'country', 'client_id',
64 |                 F.explode('active_addons').alias('addon'))\
65 |         .filter(addon_filter)\
66 |         .groupBy('country')\
67 |         .agg(F.countDistinct('client_id').alias('add_on_count'))
68 | 
69 |     join_df = WAU.join(addon_count, 'country', how='left')\
70 |         .withColumn("pct_addon", (100.0 * F.col("add_on_count") / F.col("WAU")))\
71 |         .select(F.lit(date).alias('submission_date_s3'), '*')
72 | 
73 |     return join_df.select('submission_date_s3', 'country', 'pct_addon')
74 | 


--------------------------------------------------------------------------------
/usage_report/utils/pct_latest_version.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import json
 3 | import urllib
 4 | 
 5 | from pyspark.sql.functions import split
 6 | import pyspark.sql.functions as F
 7 | from helpers import date_plus_x_days, keep_countries_and_all
 8 | 
 9 | RELEASE_VERSIONS_URL = "https://product-details.mozilla.org/1.0/firefox_history_major_releases.json"
10 | 
11 | 
12 | def get_latest_version(date, url):
13 |     """ check a url and get the latest release given a date
14 |         Param:
15 |         date: date in question. should be YYYYMMDD format (str)
16 |         url: url where the Firefox release history json lives
17 |         Return: release major version for that date (50, not 50.0)
18 |     """
19 |     date = datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')
20 |     response = urllib.urlopen(url)
21 |     jrelease = json.loads(response.read())
22 |     jrelease = dict((v, k) for k, v in jrelease.iteritems())
23 |     last_update = max([release_date for release_date in jrelease.keys() if release_date <= date])
24 |     return jrelease[last_update].split('.')[0]
25 | 
26 | 
27 | def pct_new_version(data,
28 |                     date,
29 |                     period=7,
30 |                     country_list=None,
31 |                     url=RELEASE_VERSIONS_URL):
32 |     """ Calculate the proportion of active users on the latest release version every day.
33 |         Parameters:
34 |         data: sample of the main server ping data frame
35 |         date: The day to calculate the metric
36 |         period: number of days to use to calculate metric
37 |         country_list: a list of country names in string
38 |         url: path to the json file containing all the firefox release information to date
39 |         Returns:
40 |         a dataframe with five columns - 'country', 'submission_date_s3',
41 |                                         'pct_latest_version'
42 |     """
43 | 
44 |     data_all = keep_countries_and_all(data, country_list)
45 |     begin = date_plus_x_days(date, -period)
46 | 
47 |     latest_version = get_latest_version(date, url)
48 |     data_filtered = data_all.filter("""
49 |                                     {0} >= '{1}' and {0} <= '{2}'
50 |                                     """.format("submission_date_s3", begin, date))\
51 |                             .withColumn('app_major_version',
52 |                                         split('app_version',
53 |                                               r'\.').getItem(0))\
54 |                             .select('submission_date_s3',
55 |                                     'client_id',
56 |                                     'app_major_version',
57 |                                     'country')
58 | 
59 |     WAU = data_filtered.groupBy('country')\
60 |                        .agg(F.countDistinct('client_id').alias('WAU'))
61 |     WAU_latest = data_filtered.filter(F.col('app_major_version') >= F.lit(latest_version))\
62 |                               .groupBy('country')\
63 |                               .agg(F.countDistinct('client_id').alias('WAU_is_latest'))
64 |     join_df = WAU.join(WAU_latest, 'country', 'left')\
65 |                  .withColumn("pct_latest_version", (100.0 * F.col("WAU_is_latest") / F.col("WAU")))\
66 |                  .select(F.lit(date).alias('submission_date_s3'),
67 |                          'country',
68 |                          F.coalesce('pct_latest_version', F.lit(0)).alias('pct_latest_version'))
69 |     return join_df
70 | 


--------------------------------------------------------------------------------
/usage_report/utils/process_output.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | 
  3 | # Note: the code makes the following assumptions otherwise
  4 | # it will throw an error by design:
  5 | #     dataframes are for a single submission_date_s3
  6 | #     historical data and new data have same countries and metrics
  7 | #     single metric per country (for single metrics)
  8 | # it will not throw error for:
  9 | #     faceted metrics, unique facets (addons have repeat names)
 10 | #     countries differ from previous (print warning)
 11 | #     metrics differ from last entry (print warning)
 12 | 
 13 | FXHEALTH_METRICS = ['YAU',
 14 |                     'MAU',
 15 |                     'pct_new_user',
 16 |                     'pct_latest_version',
 17 |                     'avg_daily_usage(hours)',
 18 |                     'avg_intensity']
 19 | 
 20 | WEBUSAGE_METRICS_1DIM = ['pct_addon',
 21 |                          'pct_TP']
 22 | 
 23 | WEBUSAGE_METRICS_2DIM = {'locale': ('locale', 'pct_on_locale'),
 24 |                          'top10addons': ('addon_name', 'pct_with_addon')}
 25 | 
 26 | 
 27 | def check_unique(df, metric_col):
 28 |     """ make sure df has 1 row for single value metrics
 29 |     params: df, pandas df, metric_col, str
 30 |     return: nothing, but raise error if assumptions not met
 31 |     """
 32 |     if len(df[metric_col]) != 1:
 33 |         raise ValueError('there should be 1 metric')
 34 | 
 35 | 
 36 | def check_dataframes(*dfs):
 37 |     """ check dataframes for assumptions we make for processing
 38 |     which are same country and same date
 39 |     params: dfs, at least 2 dataframes
 40 |     return: nothing, but raise error if assumptions not met
 41 |     """
 42 |     country_sets = map(lambda x: set(x['country']), dfs)
 43 |     date_sets = map(lambda x: set(x['submission_date_s3']), dfs)
 44 |     if not all([a == b for a, b
 45 |                 in zip(country_sets[:-1],
 46 |                        country_sets[1:])]):
 47 |         raise ValueError('countries are different')
 48 |     if not all([a == b for a, b
 49 |                 in zip(date_sets[:-1],
 50 |                        date_sets[1:])]):
 51 |         raise ValueError('dates are different')
 52 |     if len(date_sets[0]) != 1:
 53 |         raise ValueError('wrong number of dates')
 54 | 
 55 | 
 56 | def one_dim_extract(pd_df,
 57 |                     country,
 58 |                     metric_col):
 59 |     """get a metric for a country from a pandas df
 60 |     params: pd_df, pandas dataframe, (ex. usage)
 61 |     country: str
 62 |     metric_col: str
 63 |     return: a single numeric of some kind
 64 |     """
 65 |     pd_df_filtered = pd_df[pd_df['country'] == country].reset_index(drop=True)
 66 |     check_unique(pd_df_filtered, metric_col)
 67 |     return pd_df_filtered.iloc[0][metric_col]
 68 | 
 69 | 
 70 | def two_dim_extract(pd_df,
 71 |                     country,
 72 |                     facet_col,
 73 |                     metric_col):
 74 |     """ extract {facet: metric} dict from a pandas dataframe
 75 |     params: pd_df, faceted pandas dataframe, (ex, locales, top10addon)
 76 |     country, str
 77 |     facet_col, str (ex. 'locale')
 78 |     metric_col, str (ex. 'pct_on_locale')
 79 |     return:
 80 |     nested_dict, dict, keys are facets, values are metrics
 81 |     """
 82 |     nested_dict = {}
 83 |     pd_df_filtered = pd_df[pd_df['country'] == country].reset_index(drop=True)
 84 |     for i in pd_df_filtered.index:
 85 |         i_row = pd_df_filtered.iloc[i]
 86 |         nested_dict[i_row[facet_col]] = i_row[metric_col]
 87 |     return nested_dict
 88 | 
 89 | 
 90 | def fxhealth_per_day_country(usage_pd_df,
 91 |                              country):
 92 |     """ get fxhealth metrics
 93 |     params: usage_pd_df, pandas df
 94 |     country: country, str
 95 |     return: day_dict, {date: dict's date,
 96 |                        metrics:
 97 |                            {metric: value,
 98 |                             ...}}
 99 |     """
100 |     day_dict = {}
101 |     date = usage_pd_df['submission_date_s3'][0]
102 |     date = datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')
103 | 
104 |     day_dict['date'] = date
105 |     day_dict['metrics'] = {}
106 | 
107 |     for metric in FXHEALTH_METRICS:
108 |         day_dict['metrics'][metric] = one_dim_extract(usage_pd_df,
109 |                                                       country,
110 |                                                       metric)
111 |     return day_dict
112 | 
113 | 
114 | def webusage_per_day_country(usage_pd_df,
115 |                              locales_pd_df,
116 |                              topaddons_pd_df,
117 |                              country):
118 |     """ get webusage metrics
119 |     params: usage_pd_df, pandas df, 1dim metrics
120 |     locales_pd_df, topaddons_pd_df, pandas df, 2dim metrics
121 |     country, str
122 |     return: day_dict, {date: dict's date,
123 |                        metrics:
124 |                            {metric: value,
125 |                             metric:
126 |                                 {facet: value,
127 |                                  ...}, ...}}
128 |     """
129 |     day_dict = {}
130 |     date = usage_pd_df['submission_date_s3'][0]
131 |     date = datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')
132 | 
133 |     day_dict['date'] = date
134 |     day_dict['metrics'] = {}
135 | 
136 |     for metric in WEBUSAGE_METRICS_1DIM:
137 |         day_dict['metrics'][metric] = one_dim_extract(usage_pd_df,
138 |                                                       country,
139 |                                                       metric)
140 | 
141 |     for df, metric in [(locales_pd_df, 'locale'),
142 |                        (topaddons_pd_df, 'top10addons')]:
143 |         day_dict['metrics'][metric] = two_dim_extract(df,
144 |                                                       country,
145 |                                                       WEBUSAGE_METRICS_2DIM[metric][0],
146 |                                                       WEBUSAGE_METRICS_2DIM[metric][1])
147 |     return day_dict
148 | 
149 | 
150 | def all_metrics_per_day(country_list, usage_pd_df, locales_pd_df, topaddons_pd_df):
151 |     """ get fxhealth and webusage metrics, all countries
152 |     params: country_list, list of strings
153 |     various dfs, pandas dfs
154 |     return: tuple of dicts
155 |     """
156 |     check_dataframes(usage_pd_df,
157 |                      locales_pd_df,
158 |                      topaddons_pd_df)
159 |     fxhealth, webusage = {}, {}
160 |     country_list = country_list + ['All']
161 |     for country in country_list:
162 |         fxhealth[country] = fxhealth_per_day_country(usage_pd_df,
163 |                                                      country)
164 |         webusage[country] = webusage_per_day_country(usage_pd_df,
165 |                                                      locales_pd_df,
166 |                                                      topaddons_pd_df,
167 |                                                      country)
168 |     return (fxhealth, webusage)
169 | 
170 | 
171 | def rename_keys(input_dict, country_name_mappings):
172 |     """ copy dict with country keys renamed with full names
173 |     params: input_dict, a metric dict
174 |     country_name_mappings, dict, {abbr. country: full country}
175 |     return: output_dict, same as input with renamed keys
176 |     """
177 |     return {country_name_mappings[k]: v
178 |             for k, v in input_dict.iteritems()}
179 | 
180 | 
181 | def check_dict_keys(dict1, dict2, message):
182 |     """ check if keys are the same
183 |     params: dict1, dict2, comparison dicts
184 |     message, what to print
185 |     return: nothing
186 |     """
187 |     if set(dict1.keys()) != set(dict2.keys()):
188 |         print message
189 | 
190 | 
191 | def update_history(day_dict, history_dict=None):
192 |     """ updates history dict,
193 |     also checks country, metric, and date compat
194 |     params:
195 |     history_dict, dict, {'country': [{data1}, {data2}, ...], ...}
196 |     day_dict, dict,  {'country': {data}, ...}
197 |     return: copy of history_dict updated w/new day's data
198 |     """
199 |     if history_dict is None:
200 |         history_dict = {}
201 |         for country in day_dict.keys():
202 |             history_dict[country] = [day_dict[country]]
203 |     history_dict = history_dict.copy()
204 |     check_dict_keys(history_dict,
205 |                     day_dict,
206 |                     "warning: countries don't match")
207 |     for country in day_dict.keys():
208 |         if country in history_dict.keys():
209 |             check_dict_keys(history_dict[country][-1],
210 |                             day_dict[country],
211 |                             "warning: metrics don't match last entry ({})".format(country))
212 |             previous_dates = [entry['date'] for entry in history_dict[country]]
213 |             if day_dict[country]['date'] in previous_dates:
214 |                 replace_position = previous_dates.index(day_dict[country]['date'])
215 |                 history_dict[country][replace_position] = day_dict[country]
216 |             else:
217 |                 history_dict[country].append(day_dict[country])
218 |         else:
219 |             history_dict[country] = [day_dict[country]]
220 |     return history_dict
221 | 


--------------------------------------------------------------------------------
/usage_report/utils/s3_utils.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import json
 3 | 
 4 | 
 5 | def file_exists(bucket_name, filename, aws_access_key_id=None, aws_secret_access_key=None):
 6 |     """ check if a file exists in S3
 7 |     params: bucket_name, str, name of bucket
 8 |     filename, str, name of file (prefix + file name)
 9 |     aws_access_key_id, aws_secret_access_key, if None it should check env
10 |     return: True if file exists
11 |     """
12 |     s3 = boto3.Session(aws_access_key_id=aws_access_key_id,
13 |                        aws_secret_access_key=aws_secret_access_key).resource('s3')
14 |     bucket = s3.Bucket(bucket_name)
15 |     objs = list(bucket.objects.filter(Prefix=filename))
16 |     if len(objs) > 0 and objs[0].key == filename:
17 |         return True
18 |     else:
19 |         return False
20 | 
21 | 
22 | def read_from_s3(bucket_name, filename, aws_access_key_id=None, aws_secret_access_key=None):
23 |     """ read JSON from s3
24 |     params: bucket_name, str, name of bucket
25 |     filename, str, name of file (prefix + file name)
26 |     return: JSON as dict, None if file doesn't exist in S3
27 |     """
28 |     if file_exists(bucket_name, filename, aws_access_key_id, aws_secret_access_key):
29 |         s3 = boto3.Session(aws_access_key_id=aws_access_key_id,
30 |                            aws_secret_access_key=aws_secret_access_key).resource('s3')
31 |         content_object = s3.Object(bucket_name, filename)
32 |         file_content = content_object.get()['Body'].read().decode('utf-8')
33 |         return json.loads(file_content)
34 | 
35 | 
36 | def write_to_s3(bucket_name, filename, data, aws_access_key_id=None, aws_secret_access_key=None,
37 |                 acl='public-read'):
38 |     """ write dict as JSON to s3
39 |     params: bucket_name, str, name of bucket
40 |     filename, str, name of file (prefix + file name)
41 |     return: nothing
42 |     """
43 |     if isinstance(data, str):
44 |         body = data
45 |     else:
46 |         body = json.dumps(data, ensure_ascii=False)
47 | 
48 |     print "Uploading {}b of data to s3://{}/{}".format(len(body), bucket_name, filename)
49 | 
50 |     s3 = boto3.Session(aws_access_key_id=aws_access_key_id,
51 |                        aws_secret_access_key=aws_secret_access_key).resource('s3')
52 |     obj = s3.Object(bucket_name, filename)
53 |     obj.put(Body=body.encode('utf8'), ACL=acl)
54 | 


--------------------------------------------------------------------------------
/usage_report/utils/top10addons.py:
--------------------------------------------------------------------------------
 1 | import pyspark.sql.functions as F
 2 | from pyspark.sql.functions import lit, col, desc
 3 | from pyspark.sql import Window
 4 | import json
 5 | from helpers import date_plus_x_days, keep_countries_and_all
 6 | 
 7 | 
 8 | def get_test_pilot_addons():
 9 |     '''
10 |     Fetches all the live test pilot experiments listed in
11 |     the experiments.json file.
12 |     returns a list of addon_ids
13 |     '''
14 |     file_path = "usage_report/resources/experiments.json"
15 |     with open(file_path) as f:
16 |         data = json.load(f)
17 |     all_tp_addons = ["@testpilot-addon"] + [i.get("addon_id")
18 |                                             for i in data['results']
19 |                                             if i.get("addon_id")]
20 |     return all_tp_addons
21 | 
22 | 
23 | # grab all tp addons without a mozilla suffix
24 | NON_MOZ_TP = [i for i in get_test_pilot_addons() if "@mozilla" not in i]
25 | 
26 | # this study is everywhere
27 | UNIFIED_SEARCH_STR = '@unified-urlbar-shield-study-'
28 | 
29 | 
30 | def top_10_addons_on_date(data, date, topN, period=7, country_list=None):
31 |     """ Gets the number of users in the past week who have used the top N addons,
32 |         broken down by country.
33 | 
34 |         Parameters:
35 |         data - The main ping server.
36 |         date - The day you which you want to get the top N addons.
37 |         topN - the number of addons to get.
38 |         period - number of days to use to calculate metric
39 |         country_list - a list of country names in string
40 | 
41 |         Returns:
42 |         Dataframe containing the number of users using each of the addons.
43 |         submission_date_s3, country, addon_id, name, percent_of_active_users
44 |     """
45 |     addon_filter = (~col('addon.is_system')) & (~col('addon.foreign_install')) & \
46 |         (~col('addon.addon_id').isin(NON_MOZ_TP)) & (~col('addon.addon_id').like('%@mozilla%')) &\
47 |         (~col('addon.addon_id').like('%@shield.mozilla%')) &\
48 |         (~col('addon.addon_id').like('%' + UNIFIED_SEARCH_STR + '%'))
49 | 
50 |     data_all = keep_countries_and_all(data, country_list)
51 |     begin = date_plus_x_days(date, -period)
52 | 
53 |     wau = data_all.filter((col('submission_date_s3') > begin) &
54 |                           (col('submission_date_s3') <= date))\
55 |         .groupBy('country')\
56 |         .agg(lit(date).alias('submission_date_s3'),
57 |              F.countDistinct('client_id').alias('wau'))
58 | 
59 |     counts = data_all.select('submission_date_s3', 'country', 'client_id',
60 |                              F.explode('active_addons').alias('addon'))\
61 |         .filter((col('submission_date_s3') > begin) &
62 |                 (col('submission_date_s3') <= date))\
63 |         .filter(addon_filter)\
64 |         .select('country', 'client_id', 'addon.addon_id', 'addon.name')\
65 |         .distinct()\
66 |         .groupBy('country', 'addon_id')\
67 |         .agg(F.count('*').alias('number_of_users'), F.last('name').alias('name'))\
68 |         .select('*', lit(date).alias('submission_date_s3'),
69 |                 lit(begin).alias('start_date'),
70 |                 F.row_number().over(Window.partitionBy('country')
71 |                                     .orderBy(desc('number_of_users'))
72 |                                     .rowsBetween(Window.unboundedPreceding, Window.currentRow))
73 |                               .alias('rank'))\
74 |         .filter(col('rank') <= topN)
75 | 
76 |     return counts.join(F.broadcast(wau), on=['country'], how='left')\
77 |         .select(lit(date).alias('submission_date_s3'), 'country',
78 |                 'addon_id', col('name').alias('addon_name'),
79 |                 (100.0 * col('number_of_users') / col('wau')).alias('pct_with_addon'))
80 | 


--------------------------------------------------------------------------------
/usage_report/utils/trackingprotection.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | # from pyspark.sql.functions import col, lit, mean, split
 4 | import pyspark.sql.functions as F
 5 | 
 6 | 
 7 | def pct_tracking_protection(data,
 8 |                             date,
 9 |                             period=7,
10 |                             country_list=None):
11 |     """ Calculate proportion of users in WAU that have a
12 |         tracking protection = on session/window (at least 1)
13 |         Parameters:
14 |         data: spark df, main summary
15 |         date: string, with the format 'yyyyMMdd'
16 |         period: int, period to check proportion for, 7 for WAU
17 |         country_list: a list of country names in string
18 | 
19 |         Returns:
20 |         a spark df with the following columns
21 |         - columns: | submission_date_s3 | country | pct_TP |
22 |     """
23 |     enddate = datetime.datetime.strptime(date, '%Y%m%d')
24 |     begin = enddate - datetime.timedelta(days=period)
25 |     begin = begin.strftime('%Y%m%d')
26 | 
27 |     data_all = data.drop('country')\
28 |                    .select('submission_date_s3',
29 |                            'client_id',
30 |                            F.col('histogram_parent_tracking_protection_enabled.1').alias('TP_on'),
31 |                            F.lit('All').alias('country'))
32 | 
33 |     if country_list:
34 |         data_countries = (
35 |           data.filter(F.col('country').isin(country_list))
36 |               .select('submission_date_s3',
37 |                       'client_id',
38 |                       F.col('histogram_parent_tracking_protection_enabled.1').alias('TP_on'),
39 |                       'country'))
40 |         data_all = data_all.union(data_countries)
41 | 
42 |     def get_number_of_users(df, count_name):
43 |         return df.groupBy('country')\
44 |                  .agg(F.countDistinct('client_id').alias(count_name))
45 | 
46 |     WAU = get_number_of_users(
47 |                       data_all.filter("""submission_date_s3 <= '{}'
48 |                                          and submission_date_s3 > '{}'
49 |                                       """.format(date, begin)),
50 |                       'WAU')
51 |     WAU_TP = get_number_of_users(
52 |                       data_all.filter("""submission_date_s3 <= '{}'
53 |                                          and submission_date_s3 > '{}'
54 |                                       """.format(date, begin))
55 |                               .filter(F.col('TP_on') > 0),
56 |                       'WAU_TP')
57 | 
58 |     join_df = WAU.join(WAU_TP, 'country', 'left')\
59 |                  .withColumn("pct_TP", (100.0 * F.col("WAU_TP") / F.col("WAU")))\
60 |                  .select(F.lit(date).alias('submission_date_s3'),
61 |                          'country',
62 |                          F.coalesce('pct_TP', F.lit(0)).alias('pct_TP'))
63 |     return join_df
64 | 


--------------------------------------------------------------------------------