├── .circleci └── config.yml ├── .gitignore ├── DATAFORMAT.md ├── Dockerfile ├── METRICS.md ├── Makefile ├── README.md ├── docker-compose.yaml ├── requirements.txt ├── setup.py ├── tests ├── helpers │ ├── __init__.py │ └── utils.py ├── test_integration_function.py ├── test_process_output.py └── test_usage_report.py └── usage_report ├── __init__.py ├── annotations ├── annotations_fxhealth.json ├── annotations_hardware.json ├── annotations_webusage.json └── readme.md ├── resources └── experiments.json ├── usage_report.py └── utils ├── __init__.py ├── activeuser.py ├── avg_daily_usage.py ├── avg_intensity.py ├── helpers.py ├── localedistribution.py ├── newuser.py ├── osdistribution.py ├── pct_addon.py ├── pct_latest_version.py ├── process_output.py ├── s3_utils.py ├── top10addons.py └── trackingprotection.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | #################### 2 | # CircleCI configuration reference: 3 | # https://circleci.com/docs/2.0/configuration-reference 4 | #################### 5 | 6 | version: 2 7 | 8 | ##################################################### 9 | # Jobs: see https://circleci.com/docs/2.0/jobs-steps/ 10 | ##################################################### 11 | 12 | jobs: 13 | test: 14 | docker: 15 | - image: mozilla/cidockerbases:docker-latest 16 | steps: 17 | - checkout 18 | - setup_remote_docker 19 | - run: 20 | name: Build image 21 | command: make build 22 | - run: 23 | name: Test Code 24 | command: make test 25 | - run: 26 | name: Lint 27 | command: make lint 28 | 29 | workflows: 30 | version: 2 31 | build-test-deploy: 32 | jobs: 33 | - test: 34 | filters: 35 | tags: 36 | only: /.*/ 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .tox 2 | .pytest_cache 3 | .coverage* 4 | Fx_Usage_Report.egg-info 5 | *.pyc 6 | -------------------------------------------------------------------------------- /DATAFORMAT.md: -------------------------------------------------------------------------------- 1 | # Data Format 2 | 3 | This job will first process the metrics into pandas dataframes with the following fields: 4 | 5 | `submission_date_s3|country| metric1| metric2| etc.|` 6 | 7 | or 8 | 9 | `submission_date_s3|country| metric| dimension| value|` 10 | 11 | However, because Ensemble requires the data to be in a specific Ensemble JSON format, the data is kept in a different reshaped form: 12 | 13 | ``` 14 | { 15 | "Germany": [ 16 | { 17 | "date": "2017-01-01", 18 | "metrics": { 19 | "YAU": 999, 20 | "etc": "etc", 21 | "locale": { 22 | "DE": 0.99, 23 | "etc": "etc" 24 | } 25 | } 26 | }, 27 | { 28 | "date": "etc", 29 | "metrics": { 30 | "etc": "etc" 31 | } 32 | } 33 | ], 34 | "United States": [ 35 | { 36 | "date": "etc", 37 | "metrics": { 38 | "etc": "etc" 39 | } 40 | } 41 | ] 42 | } 43 | ``` 44 | 45 | The job will use the processed pandas tables to update the Ensemble JSON kept in the S3 bucket. -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8 2 | 3 | # add a non-privileged user for running the application 4 | RUN groupadd --gid 10001 app && \ 5 | useradd -g app --uid 10001 --shell /usr/sbin/nologin --create-home --home-dir /app app 6 | 7 | WORKDIR /app 8 | 9 | # Install python 10 | RUN apt-get update && \ 11 | apt-get -y --no-install-recommends install python2.7 python-pip python-setuptools 12 | 13 | ENV PYTHONPATH $PYTHONPATH:/app/usage_report:/app/tests 14 | 15 | COPY requirements.txt /app 16 | RUN pip install --upgrade pip 17 | RUN pip install -r requirements.txt 18 | 19 | COPY . /app 20 | 21 | USER app 22 | -------------------------------------------------------------------------------- /METRICS.md: -------------------------------------------------------------------------------- 1 | # Metric Descriptions 2 | 3 | #### User Activity 4 | 5 | | Metric name / Code Ref | Description | 6 | |------------------------------|-------------| 7 | | Yearly Active User / `YAU` | The number of clients who used Firefox in the past 365 days. | 8 | | Monthly Active Users / `MAU` | The number of clients who used Firefox in the past 28 days. | 9 | | Daily Usage / `avg_daily_usage(hours)` | Average daily use of a typical client from the past 7 days. Calculated by getting the average daily use for each client from the last week (on days they used), and then averaging across all clients. | 10 | | Average Intensity / `avg_intensity` | Average daily intensity of use of a typical client from the past 7 days. Intensity of use is defined as the proportion of the time a client is interacting with the browser when the browser is open. Calculated by getting the average daily intensity for each client from the last week (on days they used), and then averaging across all clients. | 11 | | New Profile Rate / `pct_new_user` | Percentage of WAU (clients who used Firefox in the past 7 days) that are new clients (created profile that week). | 12 | | Latest Version / `pct_latest_version` | Percentage of WAU on the newest version (or newer) of Firefox (for that week). Note, Firefox updates are often released with different throttling rates (i.e. 10% of population in week 1, etc.). | 13 | 14 | #### Usage Behavior 15 | 16 | | Metric name / Code Ref | Description | 17 | |------------------------------|-------------| 18 | | Top Languages / `locale, locale, pct_on_locale` | Percentage of WAU on each language setting (locale). Top 5 per week only. | 19 | | Always On Tracking Protection / `pct_TP` | Percentage of WAU with Always On Tracking Protection enabled for default browsing. Note, this pref was not exposed to users until Firefox 57 (2017-11-14) and does not include Private Browsing Mode. | 20 | | Has Add-on / `pct_addon` | Percentage of WAU with at least 1 user installed addon. | 21 | | Top Add-ons / `top10addons, addon_name, pct_with_addon` | The top 10 most common user installed addons from the last 7 days. | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help clean test coverage release build 2 | 3 | help: 4 | @echo " lint - check style with flake8" 5 | @echo " test - run tests quickly with the default Python" 6 | @echo " build - Builds the docker images for the docker-compose setup" 7 | @echo " clean - Stops and removes all docker containers" 8 | @echo " run - Run a command" 9 | @echo " shell - Opens a Bash shell" 10 | 11 | lint: 12 | docker-compose run app flake8 usage_report tests --max-line-length 100 13 | 14 | test: 15 | docker-compose run app py.test 16 | 17 | build: 18 | docker-compose build 19 | 20 | clean: stop 21 | docker-compose rm -f 22 | 23 | shell: 24 | docker-compose run app bash 25 | 26 | run: 27 | docker-compose run app $(COMMAND) 28 | 29 | stop: 30 | docker-compose down 31 | docker-compose stop 32 | 33 | up: 34 | docker-compose up 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Firefox Public Data 2 | 3 | The [Firefox Public Data](https://metrics.mozilla.com/protected/usage-report-demo/dashboard/user-activity) (FxPD) project is a public facing website which tracks various merics over time and helps the general public understand what kind of data is being tracked by Mozilla and how it is used. It is modeled after and evolved out of the [Firefox Hardware Report](https://hardware.metrics.mozilla.com/), which is now included as a part of FxPD. 4 | 5 | This repository contains the code used to pull and process the data for the **User Activity** and **Usage Behavior** subsections of the **Desktop** sections of the report. 6 | 7 | The website itself is generated by the [Ensemble](https://github.com/mozilla/ensemble) and [Ensemble Transposer](https://github.com/mozilla/ensemble-transposer) repos. 8 | 9 | # Data 10 | 11 | The data is pulled from Firefox desktop [telemetry](https://wiki.mozilla.org/Telemetry), specifically the [main summary](https://docs.telemetry.mozilla.org/datasets/batch_view/main_summary/reference.html) view of the data. 12 | 13 | The data is on a weekly resolution (one datapoint per week), and includes the metrics below. The metrics are estimated from a 10% sample of the Release, Beta, ESR, and Other channels, and broken down by the top 10 countries and worldwide overall aggregate. The historical data is kept in an S3 bucket as a JSON file. 14 | 15 | This job (the repo) is designed to be run once a week and will produce the data for a single week. It will then update the historical data in the S3 bucket. 16 | 17 | For backfills, this job needs to be run for each week of the backfill. 18 | 19 | 20 | #### Metrics 21 | 22 | For the list of metrics, see [METRICS.md](METRICS.md). 23 | 24 | #### Data Structure 25 | 26 | For a description of the structure of the data output, see [DATAFORMAT.md](DATAFORMAT.md). 27 | 28 | # Developing 29 | 30 | #### Run the Job 31 | 32 | To initiate a test run of this job, you can clone this repo onto an ATMO cluster. First run 33 | 34 | $ pip install py4j --upgrade 35 | 36 | from your cluster console to get the latest version of `py4j`. 37 | 38 | 39 | Next, clone the repo, and from the repo's top-level directory, run: 40 | 41 | $ python usage_report/usage_report.py --date [some date, i.e. 20180201] --no-output 42 | 43 | which will aggregate usage statistics from the last 7 days by default. It is recommended when testing to specifiy the `--lag-days` flag to `1` for quicker iterations, i.e 44 | 45 | $ python usage_report/usage_report.py --date 20180201 --lag-days 1 --no-output 46 | 47 | *Note: there is currently no output to S3, so testing like this is not a problem. However when testing runs in this way, always make sure to include the flag* `--no-output` 48 | 49 | #### Testing 50 | 51 | Each metric has it's own set of unit tests. Code to extract a particular metric are found in `.py` files in `usage_report/utils/`, which are integrated in `usage_report/usage_report.py`. 52 | 53 | To run these tests, first ensure you have Docker installed. First build the container using 54 | 55 | $ make build 56 | 57 | then run the tests with 58 | 59 | $ make test 60 | 61 | finally, 62 | 63 | $ make lint 64 | 65 | runs the linter. 66 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | 3 | services: 4 | app: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | restart: "no" 9 | command: "true" 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | arrow==0.10.0 2 | boto3==1.9.199 3 | click==6.7 4 | click_datetime==0.2 5 | flake8==3.7.8 6 | numpy==1.13.3 7 | pandas==0.24.2 8 | pyspark==2.2.2 9 | pytest==4.6.4 10 | scipy==1.0.0rc1 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from setuptools import setup, find_packages 3 | 4 | test_deps = [ 5 | 'coverage', 6 | 'pytest', 7 | 'pytest-cov', 8 | 'pytest-timeout', 9 | 'moto', 10 | 'mock', 11 | ] 12 | 13 | extras = { 14 | 'testing': test_deps, 15 | } 16 | 17 | setup( 18 | name='fx_usage_report', 19 | version='0.1', 20 | description='Python ETL job for the Firefox Usage Report', 21 | author='Firefox Public Data Platform', 22 | author_email='fx-public-data@mozilla.com', 23 | url='https://github.com/mozilla/Fx_Usage_Report.git', 24 | packages=find_packages(exclude=['tests']), 25 | include_package_data=True, 26 | install_requires=[ 27 | 'arrow==0.10.0', 28 | 'click==6.7', 29 | 'click_datetime==0.2', 30 | 'numpy==1.13.3', 31 | 'pyspark==2.2.0.post0', 32 | 'scipy==1.0.0rc1', 33 | ], 34 | tests_require=test_deps, 35 | extras_require=extras, 36 | ) 37 | -------------------------------------------------------------------------------- /tests/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/Fx_Usage_Report/489ca258b14776c01f3021080b2dd686d239dea3/tests/helpers/__init__.py -------------------------------------------------------------------------------- /tests/helpers/utils.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import Row 2 | from collections import OrderedDict 3 | 4 | 5 | def is_same(spark, df, expected, verbose=False): 6 | expected_df = spark.sparkContext \ 7 | .parallelize(expected) \ 8 | .map(lambda r: Row(**OrderedDict(sorted(r.items())))) \ 9 | .toDF() 10 | 11 | cols = sorted(df.columns) 12 | intersection = df.select(*cols).intersect(expected_df) 13 | df_len, expected_len, actual_len = df.count(), expected_df.count(), intersection.count() 14 | 15 | if verbose: 16 | print "\nInput Dataframe\n" 17 | print df.select(*cols).collect() 18 | print "\nExpected Dataframe\n" 19 | print expected_df.collect() 20 | 21 | assert df_len == expected_len 22 | assert actual_len == expected_len, "Missing {} Rows".format(expected_len - actual_len) 23 | -------------------------------------------------------------------------------- /tests/test_integration_function.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | from helpers.utils import is_same 4 | from usage_report.usage_report import agg_usage, get_spark 5 | from pyspark.sql import Row 6 | 7 | # Makes utils available 8 | pytest.register_assert_rewrite('tests.helpers.utils') 9 | 10 | 11 | @pytest.fixture 12 | def spark(): 13 | return get_spark() 14 | 15 | 16 | @pytest.fixture 17 | def main_summary_data_multiple(): 18 | ''' data with multiple counties and days including the following cases: 19 | - multiple countries 20 | a)include countries that are not in country list 21 | b)include countries into country_list that are not in data 22 | - clients with only pings from outside date range 23 | - clients with some pings from outside date range 24 | ''' 25 | a1 = [Row(addon_id=u'disableSHA1rollout', name=u'SHA-1 deprecation staged rollout', 26 | foreign_install=False, is_system=False), 27 | Row(addon_id=u'e10srollout@mozilla.org', name=u'Multi-process staged rollout', 28 | foreign_install=False, is_system=True)] 29 | 30 | return ( 31 | (("20180201", 100, 20, "DE", "client1", "57.0.1", 17060, 32 | "Windows_NT", 10.0, a1, {0: 0, 1: 1}, 'en-US'), 33 | ("20180201", 100, 20, "DE", "client1", "57.0.1", 17060, 34 | "Windows_NT", 10.0, a1, {0: 0, 1: 1}, "en-US"), 35 | ("20180201", 100, 20, "DE", "client2", "58.0", 17563, 36 | "Darwin", 10.0, a1, None, "DE"), # 17563 -> 20180201 37 | ("20180201", 100, 20, "MX", "client3", "58.0", 17563, 38 | "Darwin", 10.0, a1, None, "en-US"), 39 | ("20180201", 100, 20, "DE", "client4", "58.0", 17554, 40 | "Darwin", 10.0, a1, None, "en-US"), 41 | ("20180131", 100, 20, "DE", "client5", "58.0", 17363, 42 | "Darwin", 10.0, a1, None, "DE"), 43 | ("20180101", 100, 20, "DE", "client5", "57.0", 17364, 44 | "Darwin", 10.0, a1, None, "DE"), 45 | ("20180101", 100, 20, "DE", "client6", "57.0", 17364, 46 | "Darwin", 10.0, a1, None, "DE")), 47 | ["submission_date_s3", "subsession_length", "active_ticks", 48 | "country", "client_id", "app_version", "profile_creation_date", 49 | "os", "os_version", "active_addons", "histogram_parent_tracking_protection_enabled", 50 | "locale"] 51 | ) 52 | 53 | 54 | @pytest.fixture 55 | def main_summary_data_null_value(): 56 | ''' data with all/some of a given field are null, '', or zero 57 | - 'app_version' is all showing '' 58 | - 'profile_creation_date' has None 59 | - 'active_ticks' has zero 60 | - 'client2' has multiple fields missing 61 | ''' 62 | a1 = [Row(addon_id=u'disableSHA1rollout', name=u'SHA-1 deprecation staged rollout', 63 | foreign_install=False, is_system=False), 64 | Row(addon_id=u'e10srollout@mozilla.org', name=u'Multi-process staged rollout', 65 | foreign_install=False, is_system=True)] 66 | 67 | return ( 68 | (("20180201", 100, 20, "DE", "client1", "", 17060, 69 | "Windows_NT", 10.0, a1, {0: 0, 1: 1}, "en-US"), 70 | ("20180201", 100, 20, "DE", "client1", "", 17060, 71 | "Windows_NT", 10.0, a1, {0: 0, 1: 1}, "en-US"), 72 | ("20180201", 100, 0, "DE", "client2", "", None, 73 | "Darwin", 10.0, a1, None, "DE"), # 17564 -> 20180201 74 | ("20180201", 100, 20, "DE", "client4", "", 17554, 75 | "Darwin", 10.0, a1, None, "en-US"), 76 | ("20180131", 100, 20, "DE", "client5", "", 17563, 77 | "Darwin", 10.0, a1, None, "DE")), 78 | ["submission_date_s3", "subsession_length", "active_ticks", 79 | "country", "client_id", "app_version", "profile_creation_date", 80 | "os", "os_version", "active_addons", "histogram_parent_tracking_protection_enabled", 81 | "locale"] 82 | ) 83 | 84 | 85 | def test_integration_multiple_countries_and_days_no_country_list(spark, main_summary_data_multiple): 86 | ''' tests without country list for data including the following cases: 87 | - multiple countries 88 | a)include countries that are not in country list 89 | b)include countries into country_list that are not in data 90 | - clients with only pings from outside date range 91 | - clients with some pings from outside date range 92 | ''' 93 | main_summary = spark.createDataFrame(*main_summary_data_multiple) 94 | usage, locales, top10addon = agg_usage(main_summary, date='20180201', 95 | period=7, sample_factor=100.0 / 1, 96 | country_list=None) 97 | 98 | expected_usage = [ 99 | { 100 | "submission_date_s3": "20180201", 101 | "country": "All", 102 | "avg_daily_usage(hours)": 600.0 / 3600 / 5.0, 103 | "avg_intensity": 1.0, 104 | "pct_latest_version": 80.0, 105 | "pct_TP": 20.0, 106 | "MAU": 500, 107 | "YAU": 600, 108 | "pct_new_user": 40.0, 109 | "pct_addon": 100.0 110 | } 111 | ] 112 | 113 | expected_locales = [ 114 | { 115 | "country": "All", 116 | "submission_date_s3": "20180201", 117 | "locale": "en-US", 118 | "pct_on_locale": 60.0 119 | }, 120 | { 121 | "country": "All", 122 | "submission_date_s3": "20180201", 123 | "locale": "DE", 124 | "pct_on_locale": 40.0 125 | } 126 | ] 127 | 128 | expected_addons = [ 129 | { 130 | "country": "All", 131 | "submission_date_s3": "20180201", 132 | "addon_id": u'disableSHA1rollout', 133 | "addon_name": u'SHA-1 deprecation staged rollout', 134 | "pct_with_addon": 100.0 135 | } 136 | ] 137 | 138 | is_same(spark, usage, expected_usage) 139 | is_same(spark, locales, expected_locales) 140 | is_same(spark, top10addon, expected_addons) 141 | 142 | 143 | def test_integration_multiple_countries_and_days_country_list(spark, main_summary_data_multiple): 144 | ''' tests with country list for data including the following cases: 145 | - multiple countries 146 | a)include countries that are not in country list 147 | b)include countries into country_list that are not in data 148 | - clients with only pings from outside date range 149 | - clients with some pings from outside date range 150 | ''' 151 | main_summary = spark.createDataFrame(*main_summary_data_multiple) 152 | usage, locales, top10addon = agg_usage(main_summary, date='20180201', 153 | period=7, sample_factor=100.0 / 1, 154 | country_list=['DE', 'CN']) 155 | 156 | expected_usage = [ 157 | { 158 | "submission_date_s3": "20180201", 159 | "country": "All", 160 | "avg_daily_usage(hours)": 600.0 / 3600 / 5.0, 161 | "avg_intensity": 1.0, 162 | "pct_latest_version": 80.0, 163 | "pct_TP": 20.0, 164 | "MAU": 500, 165 | "YAU": 600, 166 | "pct_new_user": 40.0, 167 | "pct_addon": 100.0 168 | }, 169 | { 170 | "submission_date_s3": "20180201", 171 | "country": "DE", 172 | "avg_daily_usage(hours)": 500.0 / 3600 / 4.0, 173 | "avg_intensity": 1.0, 174 | "pct_latest_version": 75.0, 175 | "pct_TP": 25.0, 176 | "MAU": 400, 177 | "YAU": 500, 178 | "pct_new_user": 25.0, 179 | "pct_addon": 100.0 180 | }, 181 | 182 | ] 183 | 184 | expected_locales = [ 185 | { 186 | "country": "All", 187 | "submission_date_s3": "20180201", 188 | "locale": "en-US", 189 | "pct_on_locale": 60.0 190 | }, 191 | { 192 | "country": "All", 193 | "submission_date_s3": "20180201", 194 | "locale": "DE", 195 | "pct_on_locale": 40.0 196 | }, 197 | { 198 | "country": "DE", 199 | "submission_date_s3": "20180201", 200 | "locale": "en-US", 201 | "pct_on_locale": 50.0 202 | }, 203 | { 204 | "country": "DE", 205 | "submission_date_s3": "20180201", 206 | "locale": "DE", 207 | "pct_on_locale": 50.0 208 | } 209 | ] 210 | 211 | expected_addons = [ 212 | { 213 | "country": "All", 214 | "submission_date_s3": "20180201", 215 | "addon_id": u'disableSHA1rollout', 216 | "addon_name": u'SHA-1 deprecation staged rollout', 217 | "pct_with_addon": 100.0 218 | }, 219 | { 220 | "country": "DE", 221 | "submission_date_s3": "20180201", 222 | "addon_id": u'disableSHA1rollout', 223 | "addon_name": u'SHA-1 deprecation staged rollout', 224 | "pct_with_addon": 100.0 225 | } 226 | ] 227 | 228 | is_same(spark, usage, expected_usage) 229 | is_same(spark, locales, expected_locales) 230 | is_same(spark, top10addon, expected_addons) 231 | 232 | 233 | def test_integration_missing_fields_no_country_list(spark, main_summary_data_null_value): 234 | ''' tests without country list for data with all/some of a given field are null, '', or zero 235 | ''' 236 | main_summary = spark.createDataFrame(*main_summary_data_null_value) 237 | usage, locales, top10addon = agg_usage(main_summary, date='20180201', 238 | period=7, sample_factor=100.0 / 1, 239 | country_list=None) 240 | 241 | expected_usage = [ 242 | { 243 | "submission_date_s3": "20180201", 244 | "country": "All", 245 | "avg_daily_usage(hours)": 500.0 / 3600 / 4.0, 246 | "avg_intensity": 0.75, 247 | "pct_latest_version": 0.0, 248 | "pct_TP": 25.0, 249 | "MAU": 400, 250 | "YAU": 400, 251 | "pct_new_user": 25.0, 252 | "pct_addon": 100.0 253 | } 254 | ] 255 | 256 | expected_locales = [ 257 | { 258 | "country": "All", 259 | "submission_date_s3": "20180201", 260 | "locale": "en-US", 261 | "pct_on_locale": 50.0 262 | }, 263 | { 264 | "country": "All", 265 | "submission_date_s3": "20180201", 266 | "locale": "DE", 267 | "pct_on_locale": 50.0 268 | } 269 | ] 270 | 271 | expected_addons = [ 272 | { 273 | "country": "All", 274 | "submission_date_s3": "20180201", 275 | "addon_id": u'disableSHA1rollout', 276 | "addon_name": u'SHA-1 deprecation staged rollout', 277 | "pct_with_addon": 100.0 278 | } 279 | ] 280 | 281 | is_same(spark, usage, expected_usage) 282 | is_same(spark, locales, expected_locales) 283 | is_same(spark, top10addon, expected_addons) 284 | 285 | 286 | def test_integration_missing_fields_country_list(spark, main_summary_data_null_value): 287 | ''' tests with country list for data with all/some of a given field are null, '', or zero 288 | ''' 289 | main_summary = spark.createDataFrame(*main_summary_data_null_value) 290 | usage, locales, top10addon = agg_usage(main_summary, date='20180201', 291 | period=7, sample_factor=100.0 / 1, 292 | country_list=['DE']) 293 | 294 | expected_usage = [ 295 | { 296 | "submission_date_s3": "20180201", 297 | "country": "All", 298 | "avg_daily_usage(hours)": 500.0 / 3600 / 4.0, 299 | "avg_intensity": 0.75, 300 | "pct_latest_version": 0.0, 301 | "pct_TP": 25.0, 302 | "MAU": 400, 303 | "YAU": 400, 304 | "pct_new_user": 25.0, 305 | "pct_addon": 100.0 306 | }, 307 | { 308 | "submission_date_s3": "20180201", 309 | "country": "DE", 310 | "avg_daily_usage(hours)": 500.0 / 3600 / 4.0, 311 | "avg_intensity": 0.75, 312 | "pct_latest_version": 0.0, 313 | "pct_TP": 25.0, 314 | "MAU": 400, 315 | "YAU": 400, 316 | "pct_new_user": 25.0, 317 | "pct_addon": 100.0 318 | } 319 | ] 320 | 321 | expected_locales = [ 322 | { 323 | "country": "All", 324 | "submission_date_s3": "20180201", 325 | "locale": "en-US", 326 | "pct_on_locale": 50.0 327 | }, 328 | { 329 | "country": "All", 330 | "submission_date_s3": "20180201", 331 | "locale": "DE", 332 | "pct_on_locale": 50.0 333 | }, 334 | { 335 | "country": "DE", 336 | "submission_date_s3": "20180201", 337 | "locale": "en-US", 338 | "pct_on_locale": 50.0 339 | }, 340 | { 341 | "country": "DE", 342 | "submission_date_s3": "20180201", 343 | "locale": "DE", 344 | "pct_on_locale": 50.0 345 | } 346 | ] 347 | 348 | expected_addons = [ 349 | { 350 | "country": "All", 351 | "submission_date_s3": "20180201", 352 | "addon_id": u'disableSHA1rollout', 353 | "addon_name": u'SHA-1 deprecation staged rollout', 354 | "pct_with_addon": 100.0 355 | }, 356 | { 357 | "country": "DE", 358 | "submission_date_s3": "20180201", 359 | "addon_id": u'disableSHA1rollout', 360 | "addon_name": u'SHA-1 deprecation staged rollout', 361 | "pct_with_addon": 100.0 362 | } 363 | ] 364 | 365 | is_same(spark, usage, expected_usage) 366 | is_same(spark, locales, expected_locales) 367 | is_same(spark, top10addon, expected_addons) 368 | -------------------------------------------------------------------------------- /tests/test_process_output.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from usage_report.usage_report import agg_usage, get_spark 3 | from pyspark.sql import Row 4 | from usage_report.utils.process_output import all_metrics_per_day 5 | from usage_report.utils.process_output import update_history 6 | 7 | 8 | # Makes utils available 9 | pytest.register_assert_rewrite('tests.helpers.utils') 10 | 11 | 12 | @pytest.fixture 13 | def spark(): 14 | return get_spark() 15 | 16 | 17 | @pytest.fixture 18 | def main_summary_data(): 19 | a1 = [Row(addon_id=u'disableSHA1rollout', name=u'SHA-1 deprecation staged rollout', 20 | foreign_install=False, is_system=False), 21 | Row(addon_id=u'e10srollout@mozilla.org', name=u'Multi-process staged rollout', 22 | foreign_install=False, is_system=True)] 23 | 24 | return ( 25 | (("20180201", 100, 20, "DE", "client1", "57.0.1", 17060, 26 | "Windows_NT", 10.0, a1, {0: 0, 1: 1}, 'en-US'), 27 | ("20180201", 100, 20, "DE", "client1", "57.0.1", 17060, 28 | "Windows_NT", 10.0, a1, {}, "en-US"), 29 | ("20180201", 100, 20, "DE", "client2", "58.0", 17563, 30 | "Darwin", 10.0, a1, None, "DE")), # 17563 -> 20180201 31 | ["submission_date_s3", "subsession_length", "active_ticks", 32 | "country", "client_id", "app_version", "profile_creation_date", 33 | "os", "os_version", "active_addons", "histogram_parent_tracking_protection_enabled", 34 | "locale"] 35 | ) 36 | 37 | 38 | def test_processing_one_day(spark, main_summary_data): 39 | main_summary = spark.createDataFrame(*main_summary_data) 40 | usage, locales, top10addon = agg_usage(main_summary, date='20180201', 41 | period=1, sample_factor=100.0 / 1, 42 | country_list=['DE']) 43 | usage_df = usage.toPandas() 44 | locales_df = locales.toPandas() 45 | top10addon_df = top10addon.toPandas() 46 | 47 | fxhealth, webusage = all_metrics_per_day(['DE'], 48 | usage_pd_df=usage_df, 49 | locales_pd_df=locales_df, 50 | topaddons_pd_df=top10addon_df) 51 | 52 | expected_fxhealth = { 53 | 'DE': {"date": "2018-02-01", 54 | "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 55 | "avg_intensity": 1.0, 56 | "pct_latest_version": 50.0, 57 | "MAU": 200.0, 58 | "YAU": 200.0, 59 | "pct_new_user": 50.0}}, 60 | 'All': {"date": "2018-02-01", 61 | "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 62 | "avg_intensity": 1.0, 63 | "pct_latest_version": 50.0, 64 | "MAU": 200.0, 65 | "YAU": 200.0, 66 | "pct_new_user": 50.0}} 67 | } 68 | 69 | expected_webusage = { 70 | 'DE': {"date": "2018-02-01", 71 | "metrics": {"pct_TP": 50.0, 72 | "pct_addon": 100.0, 73 | "locale": {u"en-US": 50.0, 74 | u"DE": 50.0}, 75 | "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}}, 76 | 'All': {"date": "2018-02-01", 77 | "metrics": {"pct_TP": 50.0, 78 | "pct_addon": 100.0, 79 | "locale": {u"en-US": 50.0, 80 | u"DE": 50.0}, 81 | "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}} 82 | } 83 | 84 | assert expected_fxhealth == fxhealth 85 | assert expected_webusage == webusage 86 | 87 | 88 | def test_update_history_fxhealth_with_history(spark, main_summary_data): 89 | main_summary = spark.createDataFrame(*main_summary_data) 90 | usage, locales, top10addon = agg_usage(main_summary, date='20180201', 91 | period=1, sample_factor=100.0 / 1, 92 | country_list=['DE']) 93 | usage_df = usage.toPandas() 94 | locales_df = locales.toPandas() 95 | top10addon_df = top10addon.toPandas() 96 | 97 | fxhealth, webusage = all_metrics_per_day(['DE'], 98 | usage_pd_df=usage_df, 99 | locales_pd_df=locales_df, 100 | topaddons_pd_df=top10addon_df) 101 | 102 | old_fxhealth = { 103 | 'DE': [ 104 | {"date": "2018-01-01", 105 | "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 106 | "avg_intensity": 1.0, 107 | "pct_latest_version": 50.0, 108 | "MAU": 200.0, 109 | "YAU": 200.0, 110 | "pct_new_user": 50.0}} 111 | ], 112 | 'All': [ 113 | {"date": "2018-01-01", 114 | "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 115 | "avg_intensity": 1.0, 116 | "pct_latest_version": 50.0, 117 | "MAU": 200.0, 118 | "YAU": 200.0, 119 | "pct_new_user": 50.0}} 120 | ] 121 | } 122 | updated_fxhealth = update_history(fxhealth, old_fxhealth) 123 | 124 | expected_fxhealth = { 125 | 'DE': [ 126 | {"date": "2018-01-01", 127 | "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 128 | "avg_intensity": 1.0, 129 | "pct_latest_version": 50.0, 130 | "MAU": 200.0, 131 | "YAU": 200.0, 132 | "pct_new_user": 50.0}}, 133 | {"date": "2018-02-01", 134 | "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 135 | "avg_intensity": 1.0, 136 | "pct_latest_version": 50.0, 137 | "MAU": 200.0, 138 | "YAU": 200.0, 139 | "pct_new_user": 50.0}} 140 | ], 141 | 'All': [ 142 | {"date": "2018-01-01", 143 | "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 144 | "avg_intensity": 1.0, 145 | "pct_latest_version": 50.0, 146 | "MAU": 200.0, 147 | "YAU": 200.0, 148 | "pct_new_user": 50.0}}, 149 | {"date": "2018-02-01", 150 | "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 151 | "avg_intensity": 1.0, 152 | "pct_latest_version": 50.0, 153 | "MAU": 200.0, 154 | "YAU": 200.0, 155 | "pct_new_user": 50.0}} 156 | ] 157 | } 158 | 159 | assert expected_fxhealth == updated_fxhealth 160 | 161 | 162 | def test_update_history_fxhealth_without_history(spark, main_summary_data): 163 | main_summary = spark.createDataFrame(*main_summary_data) 164 | usage, locales, top10addon = agg_usage(main_summary, date='20180201', 165 | period=1, sample_factor=100.0 / 1, 166 | country_list=['DE']) 167 | usage_df = usage.toPandas() 168 | locales_df = locales.toPandas() 169 | top10addon_df = top10addon.toPandas() 170 | 171 | fxhealth, webusage = all_metrics_per_day(['DE'], 172 | usage_pd_df=usage_df, 173 | locales_pd_df=locales_df, 174 | topaddons_pd_df=top10addon_df) 175 | 176 | updated_fxhealth = update_history(fxhealth, None) 177 | 178 | expected_fxhealth = { 179 | 'DE': [ 180 | {"date": "2018-02-01", 181 | "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 182 | "avg_intensity": 1.0, 183 | "pct_latest_version": 50.0, 184 | "MAU": 200.0, 185 | "YAU": 200.0, 186 | "pct_new_user": 50.0}} 187 | ], 188 | 'All': [ 189 | {"date": "2018-02-01", 190 | "metrics": {"avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 191 | "avg_intensity": 1.0, 192 | "pct_latest_version": 50.0, 193 | "MAU": 200.0, 194 | "YAU": 200.0, 195 | "pct_new_user": 50.0}} 196 | ] 197 | } 198 | 199 | assert expected_fxhealth == updated_fxhealth 200 | 201 | 202 | def test_update_history_webusage_with_history(spark, main_summary_data): 203 | main_summary = spark.createDataFrame(*main_summary_data) 204 | usage, locales, top10addon = agg_usage(main_summary, date='20180201', 205 | period=1, sample_factor=100.0 / 1, 206 | country_list=['DE']) 207 | usage_df = usage.toPandas() 208 | locales_df = locales.toPandas() 209 | top10addon_df = top10addon.toPandas() 210 | 211 | fxhealth, webusage = all_metrics_per_day(['DE'], 212 | usage_pd_df=usage_df, 213 | locales_pd_df=locales_df, 214 | topaddons_pd_df=top10addon_df) 215 | 216 | old_webusage = { 217 | 'DE': [ 218 | {"date": "2018-01-01", 219 | "metrics": {"pct_TP": 50.0, 220 | "pct_addon": 100.0, 221 | "locale": {u"en-US": 50.0, 222 | u"DE": 50.0}, 223 | "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}} 224 | ], 225 | 'All': [ 226 | {"date": "2018-01-01", 227 | "metrics": {"pct_TP": 50.0, 228 | "pct_addon": 100.0, 229 | "locale": {u"en-US": 50.0, 230 | u"DE": 50.0}, 231 | "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}} 232 | ] 233 | } 234 | 235 | updated_webusage = update_history(webusage, old_webusage) 236 | 237 | expected_webusage = { 238 | 'DE': [ 239 | {"date": "2018-01-01", 240 | "metrics": {"pct_TP": 50.0, 241 | "pct_addon": 100.0, 242 | "locale": {u"en-US": 50.0, 243 | u"DE": 50.0}, 244 | "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}}, 245 | {"date": "2018-02-01", 246 | "metrics": {"pct_TP": 50.0, 247 | "pct_addon": 100.0, 248 | "locale": {u"en-US": 50.0, 249 | u"DE": 50.0}, 250 | "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}} 251 | ], 252 | 'All': [ 253 | {"date": "2018-01-01", 254 | "metrics": {"pct_TP": 50.0, 255 | "pct_addon": 100.0, 256 | "locale": {u"en-US": 50.0, 257 | u"DE": 50.0}, 258 | "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}}, 259 | {"date": "2018-02-01", 260 | "metrics": {"pct_TP": 50.0, 261 | "pct_addon": 100.0, 262 | "locale": {u"en-US": 50.0, 263 | u"DE": 50.0}, 264 | "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}} 265 | 266 | ] 267 | } 268 | 269 | assert expected_webusage == updated_webusage 270 | 271 | 272 | def test_update_history_webusage_without_history(spark, main_summary_data): 273 | main_summary = spark.createDataFrame(*main_summary_data) 274 | usage, locales, top10addon = agg_usage(main_summary, date='20180201', 275 | period=1, sample_factor=100.0 / 1, 276 | country_list=['DE']) 277 | usage_df = usage.toPandas() 278 | locales_df = locales.toPandas() 279 | top10addon_df = top10addon.toPandas() 280 | 281 | fxhealth, webusage = all_metrics_per_day(['DE'], 282 | usage_pd_df=usage_df, 283 | locales_pd_df=locales_df, 284 | topaddons_pd_df=top10addon_df) 285 | 286 | updated_webusage = update_history(webusage, None) 287 | 288 | expected_webusage = { 289 | 'DE': [ 290 | {"date": "2018-02-01", 291 | "metrics": {"pct_TP": 50.0, 292 | "pct_addon": 100.0, 293 | "locale": {u"en-US": 50.0, 294 | u"DE": 50.0}, 295 | "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}} 296 | ], 297 | 'All': [ 298 | {"date": "2018-02-01", 299 | "metrics": {"pct_TP": 50.0, 300 | "pct_addon": 100.0, 301 | "locale": {u"en-US": 50.0, 302 | u"DE": 50.0}, 303 | "top10addons": {u'SHA-1 deprecation staged rollout': 100.0}}} 304 | ] 305 | } 306 | 307 | assert expected_webusage == updated_webusage 308 | -------------------------------------------------------------------------------- /tests/test_usage_report.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | from helpers.utils import is_same 4 | from usage_report.utils.avg_intensity import get_avg_intensity 5 | from usage_report.utils.avg_daily_usage import get_daily_avg_session 6 | from usage_report.utils.pct_latest_version import pct_new_version 7 | from usage_report.utils.activeuser import getMAU, getYAU 8 | from usage_report.utils.newuser import new_users 9 | from usage_report.utils.osdistribution import os_on_date 10 | from usage_report.utils.top10addons import top_10_addons_on_date 11 | from usage_report.utils.pct_addon import get_addon 12 | from usage_report.utils.localedistribution import locale_on_date 13 | from usage_report.usage_report import agg_usage, get_spark 14 | from pyspark.sql import Row 15 | from usage_report.utils.trackingprotection import pct_tracking_protection 16 | 17 | 18 | # Makes utils available 19 | pytest.register_assert_rewrite('tests.helpers.utils') 20 | 21 | 22 | @pytest.fixture 23 | def spark(): 24 | return get_spark() 25 | 26 | 27 | @pytest.fixture 28 | def main_summary_data(): 29 | a1 = [Row(addon_id=u'disableSHA1rollout', name=u'SHA-1 deprecation staged rollout', 30 | foreign_install=False, is_system=False), 31 | Row(addon_id=u'e10srollout@mozilla.org', name=u'Multi-process staged rollout', 32 | foreign_install=False, is_system=True)] 33 | 34 | a2 = [Row(addon_id=u'disableSHA1rollout', name=u'SHA-1 deprecation staged rollout', 35 | foreign_install=False, is_system=False), 36 | Row(addon_id=u'e10srollout@mozilla.org', name=u'Multi-process staged rollout', 37 | foreign_install=False, is_system=True)] 38 | 39 | return ( 40 | (("20180201", 100, 20, "DE", "client1", "57.0.1", 17060, 41 | "Windows_NT", 10.0, a1, {0: 0, 1: 1}, 'en-US'), 42 | ("20180201", 100, 20, "DE", "client1", "57.0.1", 17060, 43 | "Windows_NT", 10.0, a1, {}, "en-US"), 44 | ("20180201", 100, 20, "DE", "client2", "58.0", 17563, 45 | "Darwin", 10.0, a2, None, "DE")), # 17563 -> 20180201 46 | ["submission_date_s3", "subsession_length", "active_ticks", 47 | "country", "client_id", "app_version", "profile_creation_date", 48 | "os", "os_version", "active_addons", "histogram_parent_tracking_protection_enabled", 49 | "locale"] 50 | ) 51 | 52 | 53 | def test_get_avg_intensity_no_country_list(spark, main_summary_data): 54 | main_summary = spark.createDataFrame(*main_summary_data) 55 | without_country_list = get_avg_intensity(main_summary, "20180201") 56 | 57 | expected = [ 58 | { 59 | "country": "All", 60 | "submission_date_s3": "20180201", 61 | "avg_intensity": 1.0 62 | } 63 | ] 64 | 65 | is_same(spark, without_country_list, expected) 66 | 67 | 68 | def test_get_avg_intensity_country_list(spark, main_summary_data): 69 | main_summary = spark.createDataFrame(*main_summary_data) 70 | with_country_list = get_avg_intensity(main_summary, "20180201", country_list=["DE"]) 71 | 72 | expected = [ 73 | { 74 | "country": "All", 75 | "submission_date_s3": "20180201", 76 | "avg_intensity": 1.0 77 | }, 78 | { 79 | "country": "DE", 80 | "submission_date_s3": "20180201", 81 | "avg_intensity": 1.0 82 | } 83 | ] 84 | 85 | is_same(spark, with_country_list, expected) 86 | 87 | 88 | def test_get_avg_daily_usage_no_country_list(spark, main_summary_data): 89 | main_summary = spark.createDataFrame(*main_summary_data) 90 | without_country_list = get_daily_avg_session(main_summary, "20180201") 91 | 92 | expected = [ 93 | { 94 | "country": "All", 95 | "submission_date_s3": "20180201", 96 | "avg_daily_usage(hours)": 300.0 / 3600 / 2.0 97 | } 98 | ] 99 | 100 | is_same(spark, without_country_list, expected) 101 | 102 | 103 | def test_get_avg_daily_usage_country_list(spark, main_summary_data): 104 | main_summary = spark.createDataFrame(*main_summary_data) 105 | with_country_list = get_daily_avg_session(main_summary, "20180201", country_list=["DE"]) 106 | 107 | expected = [ 108 | { 109 | "country": "All", 110 | "submission_date_s3": "20180201", 111 | "avg_daily_usage(hours)": 300.0 / 3600 / 2.0 112 | }, 113 | { 114 | "country": "DE", 115 | "submission_date_s3": "20180201", 116 | "avg_daily_usage(hours)": 300.0 / 3600 / 2.0 117 | } 118 | ] 119 | 120 | is_same(spark, with_country_list, expected) 121 | 122 | 123 | def test_pct_latest_version_no_country_list(spark, main_summary_data): 124 | main_summary = spark.createDataFrame(*main_summary_data) 125 | without_country_list = pct_new_version(main_summary, "20180201") 126 | 127 | expected = [ 128 | { 129 | "country": "All", 130 | "submission_date_s3": "20180201", 131 | "pct_latest_version": 50.0 132 | } 133 | ] 134 | 135 | is_same(spark, without_country_list, expected) 136 | 137 | 138 | def test_pct_latest_version_country_list(spark, main_summary_data): 139 | main_summary = spark.createDataFrame(*main_summary_data) 140 | with_country_list = pct_new_version(main_summary, "20180201", 141 | country_list=['DE']) 142 | 143 | expected = [ 144 | { 145 | "country": "All", 146 | "submission_date_s3": "20180201", 147 | "pct_latest_version": 50.0 148 | }, 149 | { 150 | "country": "DE", 151 | "submission_date_s3": "20180201", 152 | "pct_latest_version": 50.0 153 | } 154 | ] 155 | 156 | is_same(spark, with_country_list, expected) 157 | 158 | 159 | def test_MAU_no_country_list(spark, main_summary_data): 160 | main_summary = spark.createDataFrame(*main_summary_data) 161 | without_country_list = getMAU(main_summary, 162 | '20180201', 163 | sample_factor=100.0 / 1) 164 | 165 | expected = [ 166 | { 167 | "country": "All", 168 | "active_users": 200, 169 | "submission_date_s3": "20180201" 170 | } 171 | ] 172 | 173 | is_same(spark, without_country_list, expected, verbose=True) 174 | 175 | 176 | def test_MAU_country_list(spark, main_summary_data): 177 | main_summary = spark.createDataFrame(*main_summary_data) 178 | with_country_list = getMAU(main_summary, 179 | '20180201', 180 | sample_factor=100.0 / 1, 181 | country_list=["DE"]) 182 | 183 | expected = [ 184 | { 185 | "country": "All", 186 | "MAU": 200, 187 | "submission_date_s3": "20180201" 188 | }, 189 | { 190 | "country": "DE", 191 | "MAU": 200, 192 | "submission_date_s3": "20180201" 193 | } 194 | ] 195 | 196 | is_same(spark, with_country_list, expected) 197 | 198 | 199 | def test_YAU_no_country_list(spark, main_summary_data): 200 | main_summary = spark.createDataFrame(*main_summary_data) 201 | without_country_list = getYAU(main_summary, 202 | '20180201', 203 | sample_factor=100.0 / 1) 204 | 205 | expected = [ 206 | { 207 | "country": "All", 208 | "MAU": 200, 209 | "submission_date_s3": "20180201" 210 | } 211 | ] 212 | 213 | is_same(spark, without_country_list, expected) 214 | 215 | 216 | def test_YAU_country_list(spark, main_summary_data): 217 | main_summary = spark.createDataFrame(*main_summary_data) 218 | with_country_list = getYAU(main_summary, 219 | '20180201', 220 | sample_factor=100.0 / 1, 221 | country_list=["DE"]) 222 | 223 | expected = [ 224 | { 225 | "country": "All", 226 | "YAU": 200, 227 | "submission_date_s3": "20180201" 228 | }, 229 | { 230 | "country": "DE", 231 | "YAU": 200, 232 | "submission_date_s3": "20180201" 233 | } 234 | ] 235 | 236 | is_same(spark, with_country_list, expected) 237 | 238 | 239 | def test_new_users_no_country_list(spark, main_summary_data): 240 | main_summary = spark.createDataFrame(*main_summary_data) 241 | without_country_list = new_users(main_summary, 242 | '20180201') 243 | 244 | expected = [ 245 | { 246 | "country": "All", 247 | "submission_date_S3": "20180201", 248 | "pct_new_user": 50.0 249 | } 250 | ] 251 | 252 | is_same(spark, without_country_list, expected) 253 | 254 | 255 | def test_new_users_country_list(spark, main_summary_data): 256 | main_summary = spark.createDataFrame(*main_summary_data) 257 | with_country_list = new_users(main_summary, 258 | '20180201', 259 | country_list=["DE"]) 260 | 261 | expected = [ 262 | { 263 | "country": "All", 264 | "submission_date_S3": "20180201", 265 | "pct_new_user": 50.0 266 | }, 267 | { 268 | "country": "DE", 269 | "submission_date_S3": "20180201", 270 | "pct_new_user": 50.0 271 | } 272 | ] 273 | 274 | is_same(spark, with_country_list, expected) 275 | 276 | 277 | def test_os_distribution_no_country_list(spark, main_summary_data): 278 | main_summary = spark.createDataFrame(*main_summary_data) 279 | without_country_list = os_on_date(main_summary, 280 | '20180201') 281 | 282 | expected = [ 283 | { 284 | "country": "All", 285 | "submission_date_s3": "20180201", 286 | "os": "Windows 10", 287 | "pct_on_os": 50.0 288 | }, 289 | { 290 | "country": "All", 291 | "submission_date_s3": "20180201", 292 | "os": "Mac OS X", 293 | "pct_on_os": 50.0 294 | } 295 | ] 296 | 297 | is_same(spark, without_country_list, expected) 298 | 299 | 300 | def test_os_distribution_country_list(spark, main_summary_data): 301 | main_summary = spark.createDataFrame(*main_summary_data) 302 | with_country_list = os_on_date(main_summary, 303 | '20180201', 304 | country_list=['DE']) 305 | 306 | expected = [ 307 | { 308 | "country": "All", 309 | "submission_date_s3": "20180201", 310 | "os": "Windows 10", 311 | "pct_on_os": 50.0 312 | }, 313 | { 314 | "country": "All", 315 | "submission_date_s3": "20180201", 316 | "os": "Mac OS X", 317 | "pct_on_os": 50.0 318 | }, 319 | { 320 | "country": "DE", 321 | "submission_date_s3": "20180201", 322 | "os": "Mac OS X", 323 | "pct_on_os": 50.0 324 | }, 325 | { 326 | "country": "DE", 327 | "submission_date_s3": "20180201", 328 | "os": "Windows 10", 329 | "pct_on_os": 50.0 330 | } 331 | ] 332 | 333 | is_same(spark, with_country_list, expected) 334 | 335 | 336 | def test_top_10_addons_no_country_list(spark, main_summary_data): 337 | main_summary = spark.createDataFrame(*main_summary_data) 338 | 339 | without_country_list = top_10_addons_on_date(main_summary, '20180201', 5) 340 | expected = [ 341 | { 342 | "country": "All", 343 | "submission_date_s3": "20180201", 344 | "addon_id": u'disableSHA1rollout', 345 | "addon_name": u'SHA-1 deprecation staged rollout', 346 | "pct_with_addon": 100.0 347 | } 348 | ] 349 | 350 | is_same(spark, without_country_list, expected) 351 | 352 | 353 | def test_top_10_addons_country_list(spark, main_summary_data): 354 | main_summary = spark.createDataFrame(*main_summary_data) 355 | 356 | with_country_list = top_10_addons_on_date(main_summary, '20180201', 5, country_list=['DE']) 357 | 358 | expected = [ 359 | { 360 | "country": "All", 361 | "submission_date_s3": "20180201", 362 | "addon_id": u'disableSHA1rollout', 363 | "addon_name": u'SHA-1 deprecation staged rollout', 364 | "pct_with_addon": 100.0 365 | }, 366 | { 367 | "country": "DE", 368 | "submission_date_s3": "20180201", 369 | "addon_id": u'disableSHA1rollout', 370 | "addon_name": u'SHA-1 deprecation staged rollout', 371 | "pct_with_addon": 100.0 372 | } 373 | ] 374 | 375 | is_same(spark, with_country_list, expected) 376 | 377 | 378 | def test_has_addons_no_country_list(spark, main_summary_data): 379 | main_summary = spark.createDataFrame(*main_summary_data) 380 | 381 | without_country_list = get_addon(main_summary, '20180201') 382 | expected = [ 383 | { 384 | "country": "All", 385 | "submission_date_s3": "20180201", 386 | "pct_addon": 100.0 387 | } 388 | ] 389 | 390 | is_same(spark, without_country_list, expected) 391 | 392 | 393 | def test_has_addons_country_list(spark, main_summary_data): 394 | main_summary = spark.createDataFrame(*main_summary_data) 395 | 396 | with_country_list = get_addon(main_summary, '20180201', country_list=['DE']) 397 | expected = [ 398 | { 399 | "country": "All", 400 | "submission_date_s3": "20180201", 401 | "pct_addon": 100.0 402 | }, 403 | { 404 | "country": "DE", 405 | "submission_date_s3": "20180201", 406 | "pct_addon": 100.0 407 | } 408 | ] 409 | 410 | is_same(spark, with_country_list, expected) 411 | 412 | 413 | def test_pct_tracking_protection_no_country_list(spark, main_summary_data): 414 | main_summary = spark.createDataFrame(*main_summary_data) 415 | without_country_list = pct_tracking_protection(main_summary, '20180201') 416 | 417 | expected = [ 418 | { 419 | "submission_date_s3": "20180201", 420 | "country": "All", 421 | "pct_TP": 50.0 422 | } 423 | ] 424 | 425 | is_same(spark, without_country_list, expected) 426 | 427 | 428 | def test_pct_tracking_protection_country_list(spark, main_summary_data): 429 | main_summary = spark.createDataFrame(*main_summary_data) 430 | with_country_list = pct_tracking_protection(main_summary, 431 | '20180201', 432 | country_list=["DE"]) 433 | expected = [ 434 | { 435 | "submission_date_s3": "20180201", 436 | "country": "All", 437 | "pct_TP": 50.0 438 | }, 439 | { 440 | "submission_date_s3": "20180201", 441 | "country": "DE", 442 | "pct_TP": 50.0 443 | } 444 | ] 445 | 446 | is_same(spark, with_country_list, expected) 447 | 448 | 449 | def test_locale_no_country_list(spark, main_summary_data): 450 | main_summary = spark.createDataFrame(*main_summary_data) 451 | without_country_list = locale_on_date(main_summary, '20180201', 4) 452 | expected = [ 453 | { 454 | "country": "All", 455 | "submission_date_s3": "20180201", 456 | "locale": "en-US", 457 | "pct_on_locale": 50.0 458 | }, 459 | { 460 | "country": "All", 461 | "submission_date_s3": "20180201", 462 | "locale": "DE", 463 | "pct_on_locale": 50.0 464 | } 465 | ] 466 | 467 | is_same(spark, without_country_list, expected) 468 | 469 | 470 | def test_locale_country_list(spark, main_summary_data): 471 | main_summary = spark.createDataFrame(*main_summary_data) 472 | with_country_list = locale_on_date(main_summary, '20180201', 4, country_list=['DE']) 473 | 474 | expected = [ 475 | { 476 | "country": "All", 477 | "submission_date_s3": "20180201", 478 | "locale": "en-US", 479 | "pct_on_locale": 50.0 480 | }, 481 | { 482 | "country": "All", 483 | "submission_date_s3": "20180201", 484 | "locale": "DE", 485 | "pct_on_locale": 50.0 486 | }, 487 | { 488 | "country": "DE", 489 | "submission_date_s3": "20180201", 490 | "locale": "en-US", 491 | "pct_on_locale": 50.0 492 | }, 493 | { 494 | "country": "DE", 495 | "submission_date_s3": "20180201", 496 | "locale": "DE", 497 | "pct_on_locale": 50.0 498 | } 499 | ] 500 | 501 | is_same(spark, with_country_list, expected) 502 | 503 | 504 | def test_integration_no_country_list(spark, main_summary_data): 505 | main_summary = spark.createDataFrame(*main_summary_data) 506 | usage, locales, top10addon = agg_usage(main_summary, date='20180201', 507 | period=1, sample_factor=100.0 / 1, 508 | country_list=None) 509 | 510 | expected_usage = [ 511 | { 512 | "submission_date_s3": "20180201", 513 | "country": "All", 514 | "avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 515 | "avg_intensity": 1.0, 516 | "pct_latest_version": 50.0, 517 | "pct_TP": 50.0, 518 | "MAU": 200, 519 | "YAU": 200, 520 | "pct_new_user": 50.0, 521 | "pct_addon": 100.0 522 | } 523 | ] 524 | 525 | expected_locales = [ 526 | { 527 | "country": "All", 528 | "submission_date_s3": "20180201", 529 | "locale": "en-US", 530 | "pct_on_locale": 50.0 531 | }, 532 | { 533 | "country": "All", 534 | "submission_date_s3": "20180201", 535 | "locale": "DE", 536 | "pct_on_locale": 50.0 537 | } 538 | ] 539 | 540 | expected_addons = [ 541 | { 542 | "country": "All", 543 | "submission_date_s3": "20180201", 544 | "addon_id": u'disableSHA1rollout', 545 | "addon_name": u'SHA-1 deprecation staged rollout', 546 | "pct_with_addon": 100.0 547 | } 548 | ] 549 | 550 | is_same(spark, usage, expected_usage) 551 | is_same(spark, locales, expected_locales) 552 | is_same(spark, top10addon, expected_addons) 553 | 554 | 555 | def test_integration_country_list(spark, main_summary_data): 556 | main_summary = spark.createDataFrame(*main_summary_data) 557 | usage, locales, top10addon = agg_usage(main_summary, date='20180201', 558 | period=1, sample_factor=100.0 / 1, 559 | country_list=['DE']) 560 | 561 | expected_usage = [ 562 | { 563 | "submission_date_s3": "20180201", 564 | "country": "All", 565 | "avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 566 | "avg_intensity": 1.0, 567 | "pct_latest_version": 50.0, 568 | "pct_TP": 50.0, 569 | "MAU": 200, 570 | "YAU": 200, 571 | "pct_new_user": 50.0, 572 | "pct_addon": 100.0 573 | }, 574 | { 575 | "submission_date_s3": "20180201", 576 | "country": "DE", 577 | "avg_daily_usage(hours)": 300.0 / 3600 / 2.0, 578 | "avg_intensity": 1.0, 579 | "pct_latest_version": 50.0, 580 | "pct_TP": 50.0, 581 | "MAU": 200, 582 | "YAU": 200, 583 | "pct_new_user": 50.0, 584 | "pct_addon": 100.0 585 | } 586 | ] 587 | 588 | expected_locales = [ 589 | { 590 | "country": "All", 591 | "submission_date_s3": "20180201", 592 | "locale": "en-US", 593 | "pct_on_locale": 50.0 594 | }, 595 | { 596 | "country": "All", 597 | "submission_date_s3": "20180201", 598 | "locale": "DE", 599 | "pct_on_locale": 50.0 600 | }, 601 | { 602 | "country": "DE", 603 | "submission_date_s3": "20180201", 604 | "locale": "en-US", 605 | "pct_on_locale": 50.0 606 | }, 607 | { 608 | "country": "DE", 609 | "submission_date_s3": "20180201", 610 | "locale": "DE", 611 | "pct_on_locale": 50.0 612 | } 613 | ] 614 | 615 | expected_addons = [ 616 | { 617 | "country": "All", 618 | "submission_date_s3": "20180201", 619 | "addon_id": u'disableSHA1rollout', 620 | "addon_name": u'SHA-1 deprecation staged rollout', 621 | "pct_with_addon": 100.0 622 | }, 623 | { 624 | "country": "DE", 625 | "submission_date_s3": "20180201", 626 | "addon_id": u'disableSHA1rollout', 627 | "addon_name": u'SHA-1 deprecation staged rollout', 628 | "pct_with_addon": 100.0 629 | } 630 | ] 631 | 632 | is_same(spark, usage, expected_usage) 633 | is_same(spark, locales, expected_locales) 634 | is_same(spark, top10addon, expected_addons) 635 | -------------------------------------------------------------------------------- /usage_report/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/Fx_Usage_Report/489ca258b14776c01f3021080b2dd686d239dea3/usage_report/__init__.py -------------------------------------------------------------------------------- /usage_report/annotations/annotations_fxhealth.json: -------------------------------------------------------------------------------- 1 | { 2 | "Brazil": [ 3 | { 4 | "annotation": { 5 | "pct_latest_version": "FF53" 6 | }, 7 | "date": "2017-04-19" 8 | }, 9 | { 10 | "annotation": { 11 | "pct_latest_version": "FF54" 12 | }, 13 | "date": "2017-06-13" 14 | }, 15 | { 16 | "annotation": { 17 | "pct_latest_version": "FF55" 18 | }, 19 | "date": "2017-08-08" 20 | }, 21 | { 22 | "annotation": { 23 | "pct_latest_version": "FF56" 24 | }, 25 | "date": "2017-09-28" 26 | }, 27 | { 28 | "annotation": { 29 | "pct_latest_version": "FF57" 30 | }, 31 | "date": "2017-11-14" 32 | }, 33 | { 34 | "annotation": { 35 | "MAU": "Summer Slump" 36 | }, 37 | "date": "2018-01-21" 38 | }, 39 | { 40 | "annotation": { 41 | "pct_latest_version": "FF58" 42 | }, 43 | "date": "2018-01-23" 44 | }, 45 | { 46 | "annotation": { 47 | "pct_latest_version": "FF59" 48 | }, 49 | "date": "2018-03-13" 50 | }, 51 | { 52 | "annotation": { 53 | "pct_latest_version": "FF60" 54 | }, 55 | "date": "2018-05-09" 56 | }, 57 | { 58 | "annotation": { 59 | "pct_latest_version": "FF61" 60 | }, 61 | "date": "2018-06-26" 62 | }, 63 | { 64 | "annotation": { 65 | "pct_latest_version": "FF62" 66 | }, 67 | "date": "2018-09-05" 68 | }, 69 | { 70 | "annotation": { 71 | "pct_latest_version": "FF63" 72 | }, 73 | "date": "2018-10-23" 74 | }, 75 | { 76 | "annotation": { 77 | "pct_latest_version": "FF64" 78 | }, 79 | "date": "2018-12-11" 80 | }, 81 | { 82 | "annotation": { 83 | "MAU": "Summer Slump" 84 | }, 85 | "date": "2019-01-01" 86 | }, 87 | { 88 | "annotation": { 89 | "pct_latest_version": "FF65" 90 | }, 91 | "date": "2019-01-29" 92 | }, 93 | { 94 | "annotation": { 95 | "pct_latest_version": "FF66" 96 | }, 97 | "date": "2019-03-19" 98 | }, 99 | { 100 | "annotation": { 101 | "MAU": "data deleted (addons outage)" 102 | }, 103 | "date": "2019-05-05" 104 | }, 105 | { 106 | "annotation": { 107 | "YAU": "data deleted (addons outage)" 108 | }, 109 | "date": "2019-05-05" 110 | }, 111 | { 112 | "annotation": { 113 | "avg_daily_usage(hours)": "data deleted (addons outage)" 114 | }, 115 | "date": "2019-05-05" 116 | }, 117 | { 118 | "annotation": { 119 | "pct_latest_version": "FF67" 120 | }, 121 | "date": "2019-05-21" 122 | }, 123 | { 124 | "annotation": { 125 | "pct_latest_version": "FF68" 126 | }, 127 | "date": "2019-07-09" 128 | }, 129 | { 130 | "annotation": { 131 | "pct_latest_version": "FF69" 132 | }, 133 | "date": "2019-09-03" 134 | }, 135 | { 136 | "annotation": { 137 | "pct_latest_version": "FF70" 138 | }, 139 | "date": "2019-10-22" 140 | }, 141 | { 142 | "annotation": { 143 | "pct_latest_version": "FF71" 144 | }, 145 | "date": "2019-12-03" 146 | }, 147 | { 148 | "annotation": { 149 | "pct_latest_version": "FF72" 150 | }, 151 | "date": "2020-01-07" 152 | }, 153 | { 154 | "annotation": { 155 | "pct_latest_version": "FF73" 156 | }, 157 | "date": "2020-02-11" 158 | }, 159 | { 160 | "annotation": { 161 | "pct_latest_version": "FF74" 162 | }, 163 | "date": "2020-03-10" 164 | } 165 | ], 166 | "China": [ 167 | { 168 | "annotation": { 169 | "pct_latest_version": "FF53" 170 | }, 171 | "date": "2017-04-19" 172 | }, 173 | { 174 | "annotation": { 175 | "pct_latest_version": "FF54" 176 | }, 177 | "date": "2017-06-13" 178 | }, 179 | { 180 | "annotation": { 181 | "pct_latest_version": "FF55" 182 | }, 183 | "date": "2017-08-08" 184 | }, 185 | { 186 | "annotation": { 187 | "MAU": "Summer Slump" 188 | }, 189 | "date": "2017-08-20" 190 | }, 191 | { 192 | "annotation": { 193 | "pct_latest_version": "FF56" 194 | }, 195 | "date": "2017-09-28" 196 | }, 197 | { 198 | "annotation": { 199 | "pct_latest_version": "FF57" 200 | }, 201 | "date": "2017-11-14" 202 | }, 203 | { 204 | "annotation": { 205 | "pct_latest_version": "FF58" 206 | }, 207 | "date": "2018-01-23" 208 | }, 209 | { 210 | "annotation": { 211 | "MAU": "Spring Festival" 212 | }, 213 | "date": "2018-02-25" 214 | }, 215 | { 216 | "annotation": { 217 | "pct_latest_version": "FF59" 218 | }, 219 | "date": "2018-03-13" 220 | }, 221 | { 222 | "annotation": { 223 | "pct_latest_version": "FF60" 224 | }, 225 | "date": "2018-05-09" 226 | }, 227 | { 228 | "annotation": { 229 | "pct_latest_version": "FF61" 230 | }, 231 | "date": "2018-06-26" 232 | }, 233 | { 234 | "annotation": { 235 | "MAU": "Summer Slump" 236 | }, 237 | "date": "2018-08-20" 238 | }, 239 | { 240 | "annotation": { 241 | "pct_latest_version": "FF62" 242 | }, 243 | "date": "2018-09-05" 244 | }, 245 | { 246 | "annotation": { 247 | "pct_latest_version": "FF63" 248 | }, 249 | "date": "2018-10-23" 250 | }, 251 | { 252 | "annotation": { 253 | "pct_latest_version": "FF64" 254 | }, 255 | "date": "2018-12-11" 256 | }, 257 | { 258 | "annotation": { 259 | "pct_latest_version": "FF65" 260 | }, 261 | "date": "2019-01-29" 262 | }, 263 | { 264 | "annotation": { 265 | "MAU": "Spring Festival" 266 | }, 267 | "date": "2019-02-05" 268 | }, 269 | { 270 | "annotation": { 271 | "pct_latest_version": "FF66" 272 | }, 273 | "date": "2019-03-19" 274 | }, 275 | { 276 | "annotation": { 277 | "MAU": "data deleted (addons outage)" 278 | }, 279 | "date": "2019-05-05" 280 | }, 281 | { 282 | "annotation": { 283 | "YAU": "data deleted (addons outage)" 284 | }, 285 | "date": "2019-05-05" 286 | }, 287 | { 288 | "annotation": { 289 | "avg_daily_usage(hours)": "data deleted (addons outage)" 290 | }, 291 | "date": "2019-05-05" 292 | }, 293 | { 294 | "annotation": { 295 | "pct_latest_version": "FF67" 296 | }, 297 | "date": "2019-05-21" 298 | }, 299 | { 300 | "annotation": { 301 | "pct_latest_version": "FF68" 302 | }, 303 | "date": "2019-07-09" 304 | }, 305 | { 306 | "annotation": { 307 | "pct_latest_version": "FF69" 308 | }, 309 | "date": "2019-09-03" 310 | }, 311 | { 312 | "annotation": { 313 | "pct_latest_version": "FF70" 314 | }, 315 | "date": "2019-10-22" 316 | }, 317 | { 318 | "annotation": { 319 | "pct_latest_version": "FF71" 320 | }, 321 | "date": "2019-12-03" 322 | }, 323 | { 324 | "annotation": { 325 | "pct_latest_version": "FF72" 326 | }, 327 | "date": "2020-01-07" 328 | }, 329 | { 330 | "annotation": { 331 | "pct_latest_version": "FF73" 332 | }, 333 | "date": "2020-02-11" 334 | }, 335 | { 336 | "annotation": { 337 | "pct_latest_version": "FF74" 338 | }, 339 | "date": "2020-03-10" 340 | } 341 | ], 342 | "France": [ 343 | { 344 | "annotation": { 345 | "pct_latest_version": "FF53" 346 | }, 347 | "date": "2017-04-19" 348 | }, 349 | { 350 | "annotation": { 351 | "pct_latest_version": "FF54" 352 | }, 353 | "date": "2017-06-13" 354 | }, 355 | { 356 | "annotation": { 357 | "pct_latest_version": "FF55" 358 | }, 359 | "date": "2017-08-08" 360 | }, 361 | { 362 | "annotation": { 363 | "MAU": "Summer Slump" 364 | }, 365 | "date": "2017-08-27" 366 | }, 367 | { 368 | "annotation": { 369 | "pct_latest_version": "FF56" 370 | }, 371 | "date": "2017-09-28" 372 | }, 373 | { 374 | "annotation": { 375 | "MAU": "Autumn Holidays" 376 | }, 377 | "date": "2017-11-05" 378 | }, 379 | { 380 | "annotation": { 381 | "pct_latest_version": "FF57" 382 | }, 383 | "date": "2017-11-14" 384 | }, 385 | { 386 | "annotation": { 387 | "MAU": "Winter Holidays" 388 | }, 389 | "date": "2018-01-14" 390 | }, 391 | { 392 | "annotation": { 393 | "pct_latest_version": "FF58" 394 | }, 395 | "date": "2018-01-23" 396 | }, 397 | { 398 | "annotation": { 399 | "pct_latest_version": "FF59" 400 | }, 401 | "date": "2018-03-13" 402 | }, 403 | { 404 | "annotation": { 405 | "pct_latest_version": "FF60" 406 | }, 407 | "date": "2018-05-09" 408 | }, 409 | { 410 | "annotation": { 411 | "pct_latest_version": "FF61" 412 | }, 413 | "date": "2018-06-26" 414 | }, 415 | { 416 | "annotation": { 417 | "MAU": "Summer Slump" 418 | }, 419 | "date": "2018-08-27" 420 | }, 421 | { 422 | "annotation": { 423 | "pct_latest_version": "FF62" 424 | }, 425 | "date": "2018-09-05" 426 | }, 427 | { 428 | "annotation": { 429 | "pct_latest_version": "FF63" 430 | }, 431 | "date": "2018-10-23" 432 | }, 433 | { 434 | "annotation": { 435 | "pct_latest_version": "FF64" 436 | }, 437 | "date": "2018-12-11" 438 | }, 439 | { 440 | "annotation": { 441 | "MAU": "Autumn Holidays" 442 | }, 443 | "date": "2018-11-03" 444 | }, 445 | { 446 | "annotation": { 447 | "MAU": "Winter Holidays" 448 | }, 449 | "date": "2019-01-01" 450 | }, 451 | { 452 | "annotation": { 453 | "pct_latest_version": "FF65" 454 | }, 455 | "date": "2019-01-29" 456 | }, 457 | { 458 | "annotation": { 459 | "pct_latest_version": "FF66" 460 | }, 461 | "date": "2019-03-19" 462 | }, 463 | { 464 | "annotation": { 465 | "MAU": "data deleted (addons outage)" 466 | }, 467 | "date": "2019-05-05" 468 | }, 469 | { 470 | "annotation": { 471 | "YAU": "data deleted (addons outage)" 472 | }, 473 | "date": "2019-05-05" 474 | }, 475 | { 476 | "annotation": { 477 | "avg_daily_usage(hours)": "data deleted (addons outage)" 478 | }, 479 | "date": "2019-05-05" 480 | }, 481 | { 482 | "annotation": { 483 | "pct_latest_version": "FF67" 484 | }, 485 | "date": "2019-05-21" 486 | }, 487 | { 488 | "annotation": { 489 | "pct_latest_version": "FF68" 490 | }, 491 | "date": "2019-07-09" 492 | }, 493 | { 494 | "annotation": { 495 | "pct_latest_version": "FF69" 496 | }, 497 | "date": "2019-09-03" 498 | }, 499 | { 500 | "annotation": { 501 | "pct_latest_version": "FF70" 502 | }, 503 | "date": "2019-10-22" 504 | }, 505 | { 506 | "annotation": { 507 | "pct_latest_version": "FF71" 508 | }, 509 | "date": "2019-12-03" 510 | }, 511 | { 512 | "annotation": { 513 | "pct_latest_version": "FF72" 514 | }, 515 | "date": "2020-01-07" 516 | }, 517 | { 518 | "annotation": { 519 | "pct_latest_version": "FF73" 520 | }, 521 | "date": "2020-02-11" 522 | }, 523 | { 524 | "annotation": { 525 | "pct_latest_version": "FF74" 526 | }, 527 | "date": "2020-03-10" 528 | } 529 | ], 530 | "Germany": [ 531 | { 532 | "annotation": { 533 | "pct_latest_version": "FF53" 534 | }, 535 | "date": "2017-04-19" 536 | }, 537 | { 538 | "annotation": { 539 | "pct_latest_version": "FF54" 540 | }, 541 | "date": "2017-06-13" 542 | }, 543 | { 544 | "annotation": { 545 | "pct_latest_version": "FF55" 546 | }, 547 | "date": "2017-08-08" 548 | }, 549 | { 550 | "annotation": { 551 | "MAU": "Summer Slump" 552 | }, 553 | "date": "2017-08-27" 554 | }, 555 | { 556 | "annotation": { 557 | "pct_latest_version": "FF56" 558 | }, 559 | "date": "2017-09-28" 560 | }, 561 | { 562 | "annotation": { 563 | "pct_latest_version": "FF57" 564 | }, 565 | "date": "2017-11-14" 566 | }, 567 | { 568 | "annotation": { 569 | "MAU": "Winter Holidays" 570 | }, 571 | "date": "2018-01-07" 572 | }, 573 | { 574 | "annotation": { 575 | "pct_latest_version": "FF58" 576 | }, 577 | "date": "2018-01-23" 578 | }, 579 | { 580 | "annotation": { 581 | "pct_latest_version": "FF59" 582 | }, 583 | "date": "2018-03-13" 584 | }, 585 | { 586 | "annotation": { 587 | "pct_latest_version": "FF60" 588 | }, 589 | "date": "2018-05-09" 590 | }, 591 | { 592 | "annotation": { 593 | "pct_latest_version": "FF61" 594 | }, 595 | "date": "2018-06-26" 596 | }, 597 | { 598 | "annotation": { 599 | "MAU": "Summer Slump" 600 | }, 601 | "date": "2018-08-27" 602 | }, 603 | { 604 | "annotation": { 605 | "pct_latest_version": "FF62" 606 | }, 607 | "date": "2018-09-05" 608 | }, 609 | { 610 | "annotation": { 611 | "pct_latest_version": "FF63" 612 | }, 613 | "date": "2018-10-23" 614 | }, 615 | { 616 | "annotation": { 617 | "pct_latest_version": "FF64" 618 | }, 619 | "date": "2018-12-11" 620 | }, 621 | { 622 | "annotation": { 623 | "MAU": "Winter Holidays" 624 | }, 625 | "date": "2019-01-01" 626 | }, 627 | { 628 | "annotation": { 629 | "pct_latest_version": "FF65" 630 | }, 631 | "date": "2019-01-29" 632 | }, 633 | { 634 | "annotation": { 635 | "pct_latest_version": "FF66" 636 | }, 637 | "date": "2019-03-19" 638 | }, 639 | { 640 | "annotation": { 641 | "MAU": "data deleted (addons outage)" 642 | }, 643 | "date": "2019-05-05" 644 | }, 645 | { 646 | "annotation": { 647 | "YAU": "data deleted (addons outage)" 648 | }, 649 | "date": "2019-05-05" 650 | }, 651 | { 652 | "annotation": { 653 | "avg_daily_usage(hours)": "data deleted (addons outage)" 654 | }, 655 | "date": "2019-05-05" 656 | }, 657 | { 658 | "annotation": { 659 | "pct_latest_version": "FF67" 660 | }, 661 | "date": "2019-05-21" 662 | }, 663 | { 664 | "annotation": { 665 | "pct_latest_version": "FF68" 666 | }, 667 | "date": "2019-07-09" 668 | }, 669 | { 670 | "annotation": { 671 | "pct_latest_version": "FF69" 672 | }, 673 | "date": "2019-09-03" 674 | }, 675 | { 676 | "annotation": { 677 | "pct_latest_version": "FF70" 678 | }, 679 | "date": "2019-10-22" 680 | }, 681 | { 682 | "annotation": { 683 | "pct_latest_version": "FF71" 684 | }, 685 | "date": "2019-12-03" 686 | }, 687 | { 688 | "annotation": { 689 | "pct_latest_version": "FF72" 690 | }, 691 | "date": "2020-01-07" 692 | }, 693 | { 694 | "annotation": { 695 | "pct_latest_version": "FF73" 696 | }, 697 | "date": "2020-02-11" 698 | }, 699 | { 700 | "annotation": { 701 | "pct_latest_version": "FF74" 702 | }, 703 | "date": "2020-03-10" 704 | } 705 | ], 706 | "India": [ 707 | { 708 | "annotation": { 709 | "pct_latest_version": "FF53" 710 | }, 711 | "date": "2017-04-19" 712 | }, 713 | { 714 | "annotation": { 715 | "MAU": "Summer Slump" 716 | }, 717 | "date": "2017-05-28" 718 | }, 719 | { 720 | "annotation": { 721 | "pct_latest_version": "FF54" 722 | }, 723 | "date": "2017-06-13" 724 | }, 725 | { 726 | "annotation": { 727 | "pct_latest_version": "FF55" 728 | }, 729 | "date": "2017-08-08" 730 | }, 731 | { 732 | "annotation": { 733 | "pct_latest_version": "FF56" 734 | }, 735 | "date": "2017-09-28" 736 | }, 737 | { 738 | "annotation": { 739 | "pct_latest_version": "FF57" 740 | }, 741 | "date": "2017-11-14" 742 | }, 743 | { 744 | "annotation": { 745 | "pct_latest_version": "FF58" 746 | }, 747 | "date": "2018-01-23" 748 | }, 749 | { 750 | "annotation": { 751 | "pct_latest_version": "FF59" 752 | }, 753 | "date": "2018-03-13" 754 | }, 755 | { 756 | "annotation": { 757 | "pct_latest_version": "FF60" 758 | }, 759 | "date": "2018-05-09" 760 | }, 761 | { 762 | "annotation": { 763 | "MAU": "Summer Slump" 764 | }, 765 | "date": "2018-05-28" 766 | }, 767 | { 768 | "annotation": { 769 | "pct_latest_version": "FF61" 770 | }, 771 | "date": "2018-06-26" 772 | }, 773 | { 774 | "annotation": { 775 | "pct_latest_version": "FF62" 776 | }, 777 | "date": "2018-09-05" 778 | }, 779 | { 780 | "annotation": { 781 | "pct_latest_version": "FF63" 782 | }, 783 | "date": "2018-10-23" 784 | }, 785 | { 786 | "annotation": { 787 | "pct_latest_version": "FF64" 788 | }, 789 | "date": "2018-12-11" 790 | }, 791 | { 792 | "annotation": { 793 | "MAU": "Winter Holidays" 794 | }, 795 | "date": "2019-01-01" 796 | }, 797 | { 798 | "annotation": { 799 | "pct_latest_version": "FF65" 800 | }, 801 | "date": "2019-01-29" 802 | }, 803 | { 804 | "annotation": { 805 | "pct_latest_version": "FF66" 806 | }, 807 | "date": "2019-03-19" 808 | }, 809 | { 810 | "annotation": { 811 | "MAU": "data deleted (addons outage)" 812 | }, 813 | "date": "2019-05-05" 814 | }, 815 | { 816 | "annotation": { 817 | "YAU": "data deleted (addons outage)" 818 | }, 819 | "date": "2019-05-05" 820 | }, 821 | { 822 | "annotation": { 823 | "avg_daily_usage(hours)": "data deleted (addons outage)" 824 | }, 825 | "date": "2019-05-05" 826 | }, 827 | { 828 | "annotation": { 829 | "pct_latest_version": "FF67" 830 | }, 831 | "date": "2019-05-21" 832 | }, 833 | { 834 | "annotation": { 835 | "pct_latest_version": "FF68" 836 | }, 837 | "date": "2019-07-09" 838 | }, 839 | { 840 | "annotation": { 841 | "pct_latest_version": "FF69" 842 | }, 843 | "date": "2019-09-03" 844 | }, 845 | { 846 | "annotation": { 847 | "pct_latest_version": "FF70" 848 | }, 849 | "date": "2019-10-22" 850 | }, 851 | { 852 | "annotation": { 853 | "pct_latest_version": "FF71" 854 | }, 855 | "date": "2019-12-03" 856 | }, 857 | { 858 | "annotation": { 859 | "pct_latest_version": "FF72" 860 | }, 861 | "date": "2020-01-07" 862 | }, 863 | { 864 | "annotation": { 865 | "pct_latest_version": "FF73" 866 | }, 867 | "date": "2020-02-11" 868 | }, 869 | { 870 | "annotation": { 871 | "pct_latest_version": "FF74" 872 | }, 873 | "date": "2020-03-10" 874 | } 875 | ], 876 | "Indonesia": [ 877 | { 878 | "annotation": { 879 | "pct_latest_version": "FF53" 880 | }, 881 | "date": "2017-04-19" 882 | }, 883 | { 884 | "annotation": { 885 | "pct_latest_version": "FF54" 886 | }, 887 | "date": "2017-06-13" 888 | }, 889 | { 890 | "annotation": { 891 | "MAU": "Hari Raya Idul Fitri (Ramadan Ends)" 892 | }, 893 | "date": "2017-06-25" 894 | }, 895 | { 896 | "annotation": { 897 | "pct_latest_version": "FF55" 898 | }, 899 | "date": "2017-08-08" 900 | }, 901 | { 902 | "annotation": { 903 | "pct_latest_version": "FF56" 904 | }, 905 | "date": "2017-09-28" 906 | }, 907 | { 908 | "annotation": { 909 | "pct_latest_version": "FF57" 910 | }, 911 | "date": "2017-11-14" 912 | }, 913 | { 914 | "annotation": { 915 | "MAU": "Winter Holidays" 916 | }, 917 | "date": "2018-01-07" 918 | }, 919 | { 920 | "annotation": { 921 | "pct_latest_version": "FF58" 922 | }, 923 | "date": "2018-01-23" 924 | }, 925 | { 926 | "annotation": { 927 | "pct_latest_version": "FF59" 928 | }, 929 | "date": "2018-03-13" 930 | }, 931 | { 932 | "annotation": { 933 | "pct_latest_version": "FF60" 934 | }, 935 | "date": "2018-05-09" 936 | }, 937 | { 938 | "annotation": { 939 | "pct_latest_version": "FF61" 940 | }, 941 | "date": "2018-06-26" 942 | }, 943 | { 944 | "annotation": { 945 | "MAU": "Hari Raya Idul Fitri (Ramadan Ends)" 946 | }, 947 | "date": "2018-07-15" 948 | }, 949 | { 950 | "annotation": { 951 | "pct_latest_version": "FF62" 952 | }, 953 | "date": "2018-09-05" 954 | }, 955 | { 956 | "annotation": { 957 | "pct_latest_version": "FF63" 958 | }, 959 | "date": "2018-10-23" 960 | }, 961 | { 962 | "annotation": { 963 | "pct_latest_version": "FF64" 964 | }, 965 | "date": "2018-12-11" 966 | }, 967 | { 968 | "annotation": { 969 | "MAU": "Winter Holidays" 970 | }, 971 | "date": "2019-01-01" 972 | }, 973 | { 974 | "annotation": { 975 | "pct_latest_version": "FF65" 976 | }, 977 | "date": "2019-01-29" 978 | }, 979 | { 980 | "annotation": { 981 | "pct_latest_version": "FF66" 982 | }, 983 | "date": "2019-03-19" 984 | }, 985 | { 986 | "annotation": { 987 | "MAU": "data deleted (addons outage)" 988 | }, 989 | "date": "2019-05-05" 990 | }, 991 | { 992 | "annotation": { 993 | "YAU": "data deleted (addons outage)" 994 | }, 995 | "date": "2019-05-05" 996 | }, 997 | { 998 | "annotation": { 999 | "avg_daily_usage(hours)": "data deleted (addons outage)" 1000 | }, 1001 | "date": "2019-05-05" 1002 | }, 1003 | { 1004 | "annotation": { 1005 | "pct_latest_version": "FF67" 1006 | }, 1007 | "date": "2019-05-21" 1008 | }, 1009 | { 1010 | "annotation": { 1011 | "pct_latest_version": "FF68" 1012 | }, 1013 | "date": "2019-07-09" 1014 | }, 1015 | { 1016 | "annotation": { 1017 | "pct_latest_version": "FF69" 1018 | }, 1019 | "date": "2019-09-03" 1020 | }, 1021 | { 1022 | "annotation": { 1023 | "pct_latest_version": "FF70" 1024 | }, 1025 | "date": "2019-10-22" 1026 | }, 1027 | { 1028 | "annotation": { 1029 | "pct_latest_version": "FF71" 1030 | }, 1031 | "date": "2019-12-03" 1032 | }, 1033 | { 1034 | "annotation": { 1035 | "pct_latest_version": "FF72" 1036 | }, 1037 | "date": "2020-01-07" 1038 | }, 1039 | { 1040 | "annotation": { 1041 | "pct_latest_version": "FF73" 1042 | }, 1043 | "date": "2020-02-11" 1044 | }, 1045 | { 1046 | "annotation": { 1047 | "pct_latest_version": "FF74" 1048 | }, 1049 | "date": "2020-03-10" 1050 | } 1051 | ], 1052 | "Italy": [ 1053 | { 1054 | "annotation": { 1055 | "pct_latest_version": "FF53" 1056 | }, 1057 | "date": "2017-04-19" 1058 | }, 1059 | { 1060 | "annotation": { 1061 | "pct_latest_version": "FF54" 1062 | }, 1063 | "date": "2017-06-13" 1064 | }, 1065 | { 1066 | "annotation": { 1067 | "pct_latest_version": "FF55" 1068 | }, 1069 | "date": "2017-08-08" 1070 | }, 1071 | { 1072 | "annotation": { 1073 | "MAU": "Summer Slump" 1074 | }, 1075 | "date": "2017-08-27" 1076 | }, 1077 | { 1078 | "annotation": { 1079 | "pct_latest_version": "FF56" 1080 | }, 1081 | "date": "2017-09-28" 1082 | }, 1083 | { 1084 | "annotation": { 1085 | "pct_latest_version": "FF57" 1086 | }, 1087 | "date": "2017-11-14" 1088 | }, 1089 | { 1090 | "annotation": { 1091 | "MAU": "Winter Holidays" 1092 | }, 1093 | "date": "2018-01-14" 1094 | }, 1095 | { 1096 | "annotation": { 1097 | "pct_latest_version": "FF58" 1098 | }, 1099 | "date": "2018-01-23" 1100 | }, 1101 | { 1102 | "annotation": { 1103 | "pct_latest_version": "FF59" 1104 | }, 1105 | "date": "2018-03-13" 1106 | }, 1107 | { 1108 | "annotation": { 1109 | "pct_latest_version": "FF60" 1110 | }, 1111 | "date": "2018-05-09" 1112 | }, 1113 | { 1114 | "annotation": { 1115 | "pct_latest_version": "FF61" 1116 | }, 1117 | "date": "2018-06-26" 1118 | }, 1119 | { 1120 | "annotation": { 1121 | "MAU": "Summer Slump" 1122 | }, 1123 | "date": "2018-08-27" 1124 | }, 1125 | { 1126 | "annotation": { 1127 | "pct_latest_version": "FF62" 1128 | }, 1129 | "date": "2018-09-05" 1130 | }, 1131 | { 1132 | "annotation": { 1133 | "pct_latest_version": "FF63" 1134 | }, 1135 | "date": "2018-10-23" 1136 | }, 1137 | { 1138 | "annotation": { 1139 | "pct_latest_version": "FF64" 1140 | }, 1141 | "date": "2018-12-11" 1142 | }, 1143 | { 1144 | "annotation": { 1145 | "MAU": "Winter Holidays" 1146 | }, 1147 | "date": "2019-01-01" 1148 | }, 1149 | { 1150 | "annotation": { 1151 | "pct_latest_version": "FF65" 1152 | }, 1153 | "date": "2019-01-29" 1154 | }, 1155 | { 1156 | "annotation": { 1157 | "pct_latest_version": "FF66" 1158 | }, 1159 | "date": "2019-03-19" 1160 | }, 1161 | { 1162 | "annotation": { 1163 | "MAU": "data deleted (addons outage)" 1164 | }, 1165 | "date": "2019-05-05" 1166 | }, 1167 | { 1168 | "annotation": { 1169 | "YAU": "data deleted (addons outage)" 1170 | }, 1171 | "date": "2019-05-05" 1172 | }, 1173 | { 1174 | "annotation": { 1175 | "avg_daily_usage(hours)": "data deleted (addons outage)" 1176 | }, 1177 | "date": "2019-05-05" 1178 | }, 1179 | { 1180 | "annotation": { 1181 | "pct_latest_version": "FF67" 1182 | }, 1183 | "date": "2019-05-21" 1184 | }, 1185 | { 1186 | "annotation": { 1187 | "pct_latest_version": "FF68" 1188 | }, 1189 | "date": "2019-07-09" 1190 | }, 1191 | { 1192 | "annotation": { 1193 | "pct_latest_version": "FF69" 1194 | }, 1195 | "date": "2019-09-03" 1196 | }, 1197 | { 1198 | "annotation": { 1199 | "pct_latest_version": "FF70" 1200 | }, 1201 | "date": "2019-10-22" 1202 | }, 1203 | { 1204 | "annotation": { 1205 | "pct_latest_version": "FF71" 1206 | }, 1207 | "date": "2019-12-03" 1208 | }, 1209 | { 1210 | "annotation": { 1211 | "pct_latest_version": "FF72" 1212 | }, 1213 | "date": "2020-01-07" 1214 | }, 1215 | { 1216 | "annotation": { 1217 | "pct_latest_version": "FF73" 1218 | }, 1219 | "date": "2020-02-11" 1220 | }, 1221 | { 1222 | "annotation": { 1223 | "pct_latest_version": "FF74" 1224 | }, 1225 | "date": "2020-03-10" 1226 | } 1227 | ], 1228 | "Poland": [ 1229 | { 1230 | "annotation": { 1231 | "pct_latest_version": "FF53" 1232 | }, 1233 | "date": "2017-04-19" 1234 | }, 1235 | { 1236 | "annotation": { 1237 | "pct_latest_version": "FF54" 1238 | }, 1239 | "date": "2017-06-13" 1240 | }, 1241 | { 1242 | "annotation": { 1243 | "pct_latest_version": "FF55" 1244 | }, 1245 | "date": "2017-08-08" 1246 | }, 1247 | { 1248 | "annotation": { 1249 | "MAU": "Summer Slump" 1250 | }, 1251 | "date": "2017-08-27" 1252 | }, 1253 | { 1254 | "annotation": { 1255 | "pct_latest_version": "FF56" 1256 | }, 1257 | "date": "2017-09-28" 1258 | }, 1259 | { 1260 | "annotation": { 1261 | "pct_latest_version": "FF57" 1262 | }, 1263 | "date": "2017-11-14" 1264 | }, 1265 | { 1266 | "annotation": { 1267 | "pct_latest_version": "FF58" 1268 | }, 1269 | "date": "2018-01-23" 1270 | }, 1271 | { 1272 | "annotation": { 1273 | "pct_latest_version": "FF59" 1274 | }, 1275 | "date": "2018-03-13" 1276 | }, 1277 | { 1278 | "annotation": { 1279 | "pct_latest_version": "FF60" 1280 | }, 1281 | "date": "2018-05-09" 1282 | }, 1283 | { 1284 | "annotation": { 1285 | "pct_latest_version": "FF61" 1286 | }, 1287 | "date": "2018-06-26" 1288 | }, 1289 | { 1290 | "annotation": { 1291 | "MAU": "Summer Slump" 1292 | }, 1293 | "date": "2018-08-27" 1294 | }, 1295 | { 1296 | "annotation": { 1297 | "pct_latest_version": "FF62" 1298 | }, 1299 | "date": "2018-09-05" 1300 | }, 1301 | { 1302 | "annotation": { 1303 | "pct_latest_version": "FF63" 1304 | }, 1305 | "date": "2018-10-23" 1306 | }, 1307 | { 1308 | "annotation": { 1309 | "pct_latest_version": "FF64" 1310 | }, 1311 | "date": "2018-12-11" 1312 | }, 1313 | { 1314 | "annotation": { 1315 | "MAU": "Winter Holidays" 1316 | }, 1317 | "date": "2019-01-01" 1318 | }, 1319 | { 1320 | "annotation": { 1321 | "pct_latest_version": "FF65" 1322 | }, 1323 | "date": "2019-01-29" 1324 | }, 1325 | { 1326 | "annotation": { 1327 | "pct_latest_version": "FF66" 1328 | }, 1329 | "date": "2019-03-19" 1330 | }, 1331 | { 1332 | "annotation": { 1333 | "MAU": "data deleted (addons outage)" 1334 | }, 1335 | "date": "2019-05-05" 1336 | }, 1337 | { 1338 | "annotation": { 1339 | "YAU": "data deleted (addons outage)" 1340 | }, 1341 | "date": "2019-05-05" 1342 | }, 1343 | { 1344 | "annotation": { 1345 | "avg_daily_usage(hours)": "data deleted (addons outage)" 1346 | }, 1347 | "date": "2019-05-05" 1348 | }, 1349 | { 1350 | "annotation": { 1351 | "pct_latest_version": "FF67" 1352 | }, 1353 | "date": "2019-05-21" 1354 | }, 1355 | { 1356 | "annotation": { 1357 | "pct_latest_version": "FF68" 1358 | }, 1359 | "date": "2019-07-09" 1360 | }, 1361 | { 1362 | "annotation": { 1363 | "pct_latest_version": "FF69" 1364 | }, 1365 | "date": "2019-09-03" 1366 | }, 1367 | { 1368 | "annotation": { 1369 | "pct_latest_version": "FF70" 1370 | }, 1371 | "date": "2019-10-22" 1372 | }, 1373 | { 1374 | "annotation": { 1375 | "pct_latest_version": "FF71" 1376 | }, 1377 | "date": "2019-12-03" 1378 | }, 1379 | { 1380 | "annotation": { 1381 | "pct_latest_version": "FF72" 1382 | }, 1383 | "date": "2020-01-07" 1384 | }, 1385 | { 1386 | "annotation": { 1387 | "pct_latest_version": "FF73" 1388 | }, 1389 | "date": "2020-02-11" 1390 | }, 1391 | { 1392 | "annotation": { 1393 | "pct_latest_version": "FF74" 1394 | }, 1395 | "date": "2020-03-10" 1396 | } 1397 | ], 1398 | "Russia": [ 1399 | { 1400 | "annotation": { 1401 | "pct_latest_version": "FF53" 1402 | }, 1403 | "date": "2017-04-19" 1404 | }, 1405 | { 1406 | "annotation": { 1407 | "pct_latest_version": "FF54" 1408 | }, 1409 | "date": "2017-06-13" 1410 | }, 1411 | { 1412 | "annotation": { 1413 | "pct_latest_version": "FF55" 1414 | }, 1415 | "date": "2017-08-08" 1416 | }, 1417 | { 1418 | "annotation": { 1419 | "MAU": "Summer Slump" 1420 | }, 1421 | "date": "2017-08-13" 1422 | }, 1423 | { 1424 | "annotation": { 1425 | "pct_latest_version": "FF56" 1426 | }, 1427 | "date": "2017-09-28" 1428 | }, 1429 | { 1430 | "annotation": { 1431 | "pct_latest_version": "FF57" 1432 | }, 1433 | "date": "2017-11-14" 1434 | }, 1435 | { 1436 | "annotation": { 1437 | "MAU": "Winter Holidays" 1438 | }, 1439 | "date": "2018-01-21" 1440 | }, 1441 | { 1442 | "annotation": { 1443 | "pct_latest_version": "FF58" 1444 | }, 1445 | "date": "2018-01-23" 1446 | }, 1447 | { 1448 | "annotation": { 1449 | "pct_latest_version": "FF59" 1450 | }, 1451 | "date": "2018-03-13" 1452 | }, 1453 | { 1454 | "annotation": { 1455 | "pct_latest_version": "FF60" 1456 | }, 1457 | "date": "2018-05-09" 1458 | }, 1459 | { 1460 | "annotation": { 1461 | "pct_latest_version": "FF61" 1462 | }, 1463 | "date": "2018-06-26" 1464 | }, 1465 | { 1466 | "annotation": { 1467 | "MAU": "Summer Slump" 1468 | }, 1469 | "date": "2018-08-13" 1470 | }, 1471 | { 1472 | "annotation": { 1473 | "pct_latest_version": "FF62" 1474 | }, 1475 | "date": "2018-09-05" 1476 | }, 1477 | { 1478 | "annotation": { 1479 | "pct_latest_version": "FF63" 1480 | }, 1481 | "date": "2018-10-23" 1482 | }, 1483 | { 1484 | "annotation": { 1485 | "pct_latest_version": "FF64" 1486 | }, 1487 | "date": "2018-12-11" 1488 | }, 1489 | { 1490 | "annotation": { 1491 | "MAU": "Winter Holidays" 1492 | }, 1493 | "date": "2019-01-01" 1494 | }, 1495 | { 1496 | "annotation": { 1497 | "pct_latest_version": "FF65" 1498 | }, 1499 | "date": "2019-01-29" 1500 | }, 1501 | { 1502 | "annotation": { 1503 | "pct_latest_version": "FF66" 1504 | }, 1505 | "date": "2019-03-19" 1506 | }, 1507 | { 1508 | "annotation": { 1509 | "MAU": "data deleted (addons outage)" 1510 | }, 1511 | "date": "2019-05-05" 1512 | }, 1513 | { 1514 | "annotation": { 1515 | "YAU": "data deleted (addons outage)" 1516 | }, 1517 | "date": "2019-05-05" 1518 | }, 1519 | { 1520 | "annotation": { 1521 | "avg_daily_usage(hours)": "data deleted (addons outage)" 1522 | }, 1523 | "date": "2019-05-05" 1524 | }, 1525 | { 1526 | "annotation": { 1527 | "pct_latest_version": "FF67" 1528 | }, 1529 | "date": "2019-05-21" 1530 | }, 1531 | { 1532 | "annotation": { 1533 | "pct_latest_version": "FF68" 1534 | }, 1535 | "date": "2019-07-09" 1536 | }, 1537 | { 1538 | "annotation": { 1539 | "pct_latest_version": "FF69" 1540 | }, 1541 | "date": "2019-09-03" 1542 | }, 1543 | { 1544 | "annotation": { 1545 | "pct_latest_version": "FF70" 1546 | }, 1547 | "date": "2019-10-22" 1548 | }, 1549 | { 1550 | "annotation": { 1551 | "pct_latest_version": "FF71" 1552 | }, 1553 | "date": "2019-12-03" 1554 | }, 1555 | { 1556 | "annotation": { 1557 | "pct_latest_version": "FF72" 1558 | }, 1559 | "date": "2020-01-07" 1560 | }, 1561 | { 1562 | "annotation": { 1563 | "pct_latest_version": "FF73" 1564 | }, 1565 | "date": "2020-02-11" 1566 | }, 1567 | { 1568 | "annotation": { 1569 | "pct_latest_version": "FF74" 1570 | }, 1571 | "date": "2020-03-10" 1572 | } 1573 | ], 1574 | "United States": [ 1575 | { 1576 | "annotation": { 1577 | "pct_latest_version": "FF53" 1578 | }, 1579 | "date": "2017-04-19" 1580 | }, 1581 | { 1582 | "annotation": { 1583 | "pct_latest_version": "FF54" 1584 | }, 1585 | "date": "2017-06-13" 1586 | }, 1587 | { 1588 | "annotation": { 1589 | "MAU": "Summer Slump" 1590 | }, 1591 | "date": "2017-07-30" 1592 | }, 1593 | { 1594 | "annotation": { 1595 | "pct_latest_version": "FF55" 1596 | }, 1597 | "date": "2017-08-08" 1598 | }, 1599 | { 1600 | "annotation": { 1601 | "pct_latest_version": "FF56" 1602 | }, 1603 | "date": "2017-09-28" 1604 | }, 1605 | { 1606 | "annotation": { 1607 | "pct_latest_version": "FF57" 1608 | }, 1609 | "date": "2017-11-14" 1610 | }, 1611 | { 1612 | "annotation": { 1613 | "MAU": "Winter Holidays" 1614 | }, 1615 | "date": "2018-01-14" 1616 | }, 1617 | { 1618 | "annotation": { 1619 | "pct_latest_version": "FF58" 1620 | }, 1621 | "date": "2018-01-23" 1622 | }, 1623 | { 1624 | "annotation": { 1625 | "pct_latest_version": "FF59" 1626 | }, 1627 | "date": "2018-03-13" 1628 | }, 1629 | { 1630 | "annotation": { 1631 | "pct_latest_version": "FF60" 1632 | }, 1633 | "date": "2018-05-09" 1634 | }, 1635 | { 1636 | "annotation": { 1637 | "pct_latest_version": "FF61" 1638 | }, 1639 | "date": "2018-06-26" 1640 | }, 1641 | { 1642 | "annotation": { 1643 | "MAU": "Summer Slump" 1644 | }, 1645 | "date": "2018-07-30" 1646 | }, 1647 | { 1648 | "annotation": { 1649 | "pct_latest_version": "FF62" 1650 | }, 1651 | "date": "2018-09-05" 1652 | }, 1653 | { 1654 | "annotation": { 1655 | "pct_latest_version": "FF63" 1656 | }, 1657 | "date": "2018-10-23" 1658 | }, 1659 | { 1660 | "annotation": { 1661 | "pct_latest_version": "FF64" 1662 | }, 1663 | "date": "2018-12-11" 1664 | }, 1665 | { 1666 | "annotation": { 1667 | "MAU": "Winter Holidays" 1668 | }, 1669 | "date": "2019-01-01" 1670 | }, 1671 | { 1672 | "annotation": { 1673 | "pct_latest_version": "FF65" 1674 | }, 1675 | "date": "2019-01-29" 1676 | }, 1677 | { 1678 | "annotation": { 1679 | "pct_latest_version": "FF66" 1680 | }, 1681 | "date": "2019-03-19" 1682 | }, 1683 | { 1684 | "annotation": { 1685 | "MAU": "data deleted (addons outage)" 1686 | }, 1687 | "date": "2019-05-05" 1688 | }, 1689 | { 1690 | "annotation": { 1691 | "YAU": "data deleted (addons outage)" 1692 | }, 1693 | "date": "2019-05-05" 1694 | }, 1695 | { 1696 | "annotation": { 1697 | "avg_daily_usage(hours)": "data deleted (addons outage)" 1698 | }, 1699 | "date": "2019-05-05" 1700 | }, 1701 | { 1702 | "annotation": { 1703 | "pct_latest_version": "FF67" 1704 | }, 1705 | "date": "2019-05-21" 1706 | }, 1707 | { 1708 | "annotation": { 1709 | "pct_latest_version": "FF68" 1710 | }, 1711 | "date": "2019-07-09" 1712 | }, 1713 | { 1714 | "annotation": { 1715 | "pct_latest_version": "FF69" 1716 | }, 1717 | "date": "2019-09-03" 1718 | }, 1719 | { 1720 | "annotation": { 1721 | "pct_latest_version": "FF70" 1722 | }, 1723 | "date": "2019-10-22" 1724 | }, 1725 | { 1726 | "annotation": { 1727 | "pct_latest_version": "FF71" 1728 | }, 1729 | "date": "2019-12-03" 1730 | }, 1731 | { 1732 | "annotation": { 1733 | "pct_latest_version": "FF72" 1734 | }, 1735 | "date": "2020-01-07" 1736 | }, 1737 | { 1738 | "annotation": { 1739 | "pct_latest_version": "FF73" 1740 | }, 1741 | "date": "2020-02-11" 1742 | }, 1743 | { 1744 | "annotation": { 1745 | "pct_latest_version": "FF74" 1746 | }, 1747 | "date": "2020-03-10" 1748 | } 1749 | ], 1750 | "Worldwide": [ 1751 | { 1752 | "annotation": { 1753 | "pct_latest_version": "FF53" 1754 | }, 1755 | "date": "2017-04-19" 1756 | }, 1757 | { 1758 | "annotation": { 1759 | "pct_latest_version": "FF54" 1760 | }, 1761 | "date": "2017-06-13" 1762 | }, 1763 | { 1764 | "annotation": { 1765 | "pct_latest_version": "FF55" 1766 | }, 1767 | "date": "2017-08-08" 1768 | }, 1769 | { 1770 | "annotation": { 1771 | "MAU": "Summer Slump" 1772 | }, 1773 | "date": "2017-08-20" 1774 | }, 1775 | { 1776 | "annotation": { 1777 | "pct_latest_version": "FF56" 1778 | }, 1779 | "date": "2017-09-28" 1780 | }, 1781 | { 1782 | "annotation": { 1783 | "pct_latest_version": "FF57" 1784 | }, 1785 | "date": "2017-11-14" 1786 | }, 1787 | { 1788 | "annotation": { 1789 | "MAU": "Winter Holidays" 1790 | }, 1791 | "date": "2018-01-14" 1792 | }, 1793 | { 1794 | "annotation": { 1795 | "pct_latest_version": "FF58" 1796 | }, 1797 | "date": "2018-01-23" 1798 | }, 1799 | { 1800 | "annotation": { 1801 | "pct_latest_version": "FF59" 1802 | }, 1803 | "date": "2018-03-13" 1804 | }, 1805 | { 1806 | "annotation": { 1807 | "pct_latest_version": "FF60" 1808 | }, 1809 | "date": "2018-05-09" 1810 | }, 1811 | { 1812 | "annotation": { 1813 | "pct_latest_version": "FF61" 1814 | }, 1815 | "date": "2018-06-26" 1816 | }, 1817 | { 1818 | "annotation": { 1819 | "MAU": "Summer Slump" 1820 | }, 1821 | "date": "2018-08-20" 1822 | }, 1823 | { 1824 | "annotation": { 1825 | "pct_latest_version": "FF62" 1826 | }, 1827 | "date": "2018-09-05" 1828 | }, 1829 | { 1830 | "annotation": { 1831 | "pct_latest_version": "FF63" 1832 | }, 1833 | "date": "2018-10-23" 1834 | }, 1835 | { 1836 | "annotation": { 1837 | "pct_latest_version": "FF64" 1838 | }, 1839 | "date": "2018-12-11" 1840 | }, 1841 | { 1842 | "annotation": { 1843 | "MAU": "Winter Holidays" 1844 | }, 1845 | "date": "2019-01-01" 1846 | }, 1847 | { 1848 | "annotation": { 1849 | "pct_latest_version": "FF65" 1850 | }, 1851 | "date": "2019-01-29" 1852 | }, 1853 | { 1854 | "annotation": { 1855 | "pct_latest_version": "FF66" 1856 | }, 1857 | "date": "2019-03-19" 1858 | }, 1859 | { 1860 | "annotation": { 1861 | "MAU": "data deleted (addons outage)" 1862 | }, 1863 | "date": "2019-05-05" 1864 | }, 1865 | { 1866 | "annotation": { 1867 | "YAU": "data deleted (addons outage)" 1868 | }, 1869 | "date": "2019-05-05" 1870 | }, 1871 | { 1872 | "annotation": { 1873 | "avg_daily_usage(hours)": "data deleted (addons outage)" 1874 | }, 1875 | "date": "2019-05-05" 1876 | }, 1877 | { 1878 | "annotation": { 1879 | "pct_latest_version": "FF67" 1880 | }, 1881 | "date": "2019-05-21" 1882 | }, 1883 | { 1884 | "annotation": { 1885 | "pct_latest_version": "FF68" 1886 | }, 1887 | "date": "2019-07-09" 1888 | }, 1889 | { 1890 | "annotation": { 1891 | "pct_latest_version": "FF69" 1892 | }, 1893 | "date": "2019-09-03" 1894 | }, 1895 | { 1896 | "annotation": { 1897 | "pct_latest_version": "FF70" 1898 | }, 1899 | "date": "2019-10-22" 1900 | }, 1901 | { 1902 | "annotation": { 1903 | "pct_latest_version": "FF71" 1904 | }, 1905 | "date": "2019-12-03" 1906 | }, 1907 | { 1908 | "annotation": { 1909 | "pct_latest_version": "FF72" 1910 | }, 1911 | "date": "2020-01-07" 1912 | }, 1913 | { 1914 | "annotation": { 1915 | "pct_latest_version": "FF73" 1916 | }, 1917 | "date": "2020-02-11" 1918 | }, 1919 | { 1920 | "annotation": { 1921 | "pct_latest_version": "FF74" 1922 | }, 1923 | "date": "2020-03-10" 1924 | } 1925 | ] 1926 | } 1927 | -------------------------------------------------------------------------------- /usage_report/annotations/annotations_hardware.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": [ 3 | { 4 | "annotation": { 5 | "cpuCores": "XP and Vista leave dataset", 6 | "cpuSpeed": "XP and Vista leave dataset", 7 | "cpuVendor": "XP and Vista leave dataset", 8 | "gpuModel": "XP and Vista leave dataset", 9 | "gpuVendor": "XP and Vista leave dataset", 10 | "hasFlash": "XP and Vista leave dataset", 11 | "osArch": "XP and Vista leave dataset", 12 | "osName": "XP and Vista leave dataset", 13 | "ram": "XP and Vista leave dataset", 14 | "resolution": "XP and Vista leave dataset" 15 | }, 16 | "date": "2017-03-05" 17 | }, 18 | { 19 | "annotation": { 20 | "browserArch": "64-bit updates unthrottled on Win7+ for 2GB+ users" 21 | }, 22 | "date": "2017-10-22" 23 | } 24 | ] 25 | } 26 | -------------------------------------------------------------------------------- /usage_report/annotations/annotations_webusage.json: -------------------------------------------------------------------------------- 1 | { 2 | "Brazil": [ 3 | { 4 | "annotation": { 5 | "pct_TP": "FF57", 6 | "pct_addon": "legacy addons disabled" 7 | }, 8 | "date": "2017-11-14" 9 | }, 10 | { 11 | "annotation": { 12 | "pct_addon": "data deleted (addons outage)" 13 | }, 14 | "date": "2019-05-05" 15 | } 16 | ], 17 | "China": [ 18 | { 19 | "annotation": { 20 | "pct_TP": "FF57", 21 | "pct_addon": "legacy addons disabled" 22 | }, 23 | "date": "2017-11-14" 24 | }, 25 | { 26 | "annotation": { 27 | "pct_addon": "data deleted (addons outage)" 28 | }, 29 | "date": "2019-05-05" 30 | } 31 | ], 32 | "France": [ 33 | { 34 | "annotation": { 35 | "pct_TP": "FF57", 36 | "pct_addon": "legacy addons disabled" 37 | }, 38 | "date": "2017-11-14" 39 | }, 40 | { 41 | "annotation": { 42 | "pct_addon": "data deleted (addons outage)" 43 | }, 44 | "date": "2019-05-05" 45 | } 46 | ], 47 | "Germany": [ 48 | { 49 | "annotation": { 50 | "pct_TP": "FF57", 51 | "pct_addon": "legacy addons disabled" 52 | }, 53 | "date": "2017-11-14" 54 | }, 55 | { 56 | "annotation": { 57 | "pct_addon": "data deleted (addons outage)" 58 | }, 59 | "date": "2019-05-05" 60 | } 61 | ], 62 | "India": [ 63 | { 64 | "annotation": { 65 | "pct_TP": "FF57", 66 | "pct_addon": "legacy addons disabled" 67 | }, 68 | "date": "2017-11-14" 69 | }, 70 | { 71 | "annotation": { 72 | "pct_addon": "data deleted (addons outage)" 73 | }, 74 | "date": "2019-05-05" 75 | } 76 | ], 77 | "Indonesia": [ 78 | { 79 | "annotation": { 80 | "pct_TP": "FF57", 81 | "pct_addon": "legacy addons disabled" 82 | }, 83 | "date": "2017-11-14" 84 | }, 85 | { 86 | "annotation": { 87 | "pct_addon": "data deleted (addons outage)" 88 | }, 89 | "date": "2019-05-05" 90 | } 91 | ], 92 | "Italy": [ 93 | { 94 | "annotation": { 95 | "pct_TP": "FF57", 96 | "pct_addon": "legacy addons disabled" 97 | }, 98 | "date": "2017-11-14" 99 | }, 100 | { 101 | "annotation": { 102 | "pct_addon": "data deleted (addons outage)" 103 | }, 104 | "date": "2019-05-05" 105 | } 106 | ], 107 | "Poland": [ 108 | { 109 | "annotation": { 110 | "pct_TP": "FF57", 111 | "pct_addon": "legacy addons disabled" 112 | }, 113 | "date": "2017-11-14" 114 | }, 115 | { 116 | "annotation": { 117 | "pct_addon": "data deleted (addons outage)" 118 | }, 119 | "date": "2019-05-05" 120 | } 121 | ], 122 | "Russia": [ 123 | { 124 | "annotation": { 125 | "pct_TP": "FF57", 126 | "pct_addon": "legacy addons disabled" 127 | }, 128 | "date": "2017-11-14" 129 | }, 130 | { 131 | "annotation": { 132 | "pct_addon": "data deleted (addons outage)" 133 | }, 134 | "date": "2019-05-05" 135 | } 136 | ], 137 | "United States": [ 138 | { 139 | "annotation": { 140 | "pct_TP": "FF57", 141 | "pct_addon": "legacy addons disabled" 142 | }, 143 | "date": "2017-11-14" 144 | }, 145 | { 146 | "annotation": { 147 | "pct_addon": "data deleted (addons outage)" 148 | }, 149 | "date": "2019-05-05" 150 | } 151 | ], 152 | "Worldwide": [ 153 | { 154 | "annotation": { 155 | "pct_TP": "FF57", 156 | "pct_addon": "legacy addons disabled" 157 | }, 158 | "date": "2017-11-14" 159 | }, 160 | { 161 | "annotation": { 162 | "pct_addon": "data deleted (addons outage)" 163 | }, 164 | "date": "2019-05-05" 165 | } 166 | ] 167 | } 168 | -------------------------------------------------------------------------------- /usage_report/annotations/readme.md: -------------------------------------------------------------------------------- 1 | # Annotations Structure/Format 2 | 3 | Annotations should be json files in the following structure for [ensemble transposer](https://github.com/mozilla/ensemble-transposer) to read: 4 | 5 | ```json 6 | { 7 | "country1": [ 8 | { 9 | "annotation": { 10 | "plot/metric1 name": "annotation text", 11 | "plot/metric2 name": "etc" 12 | }, 13 | "date": "some_date" 14 | }, 15 | { 16 | "annotation": { 17 | "etc": "etc" 18 | }, 19 | "date": "etc" 20 | } 21 | ], 22 | "country2": [ 23 | "etc" 24 | ] 25 | } 26 | ``` 27 | 28 | The keys "date" and "annotation" should always be named as such. 29 | 30 | Note on formatting for human readability: json files can be human-readable formatted using [jq](https://stedolan.github.io/jq/), with the following command: 31 | 32 | ``` 33 | jq --sort-keys . original.json > formatted.json 34 | ``` -------------------------------------------------------------------------------- /usage_report/usage_report.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from pyspark.sql import SparkSession 4 | from pyspark.sql.functions import col 5 | from utils.activeuser import getMAU, getYAU 6 | from utils.avg_daily_usage import get_daily_avg_session 7 | from utils.avg_intensity import get_avg_intensity 8 | from utils.helpers import load_main_summary 9 | from utils.localedistribution import locale_on_date 10 | from utils.newuser import new_users 11 | from utils.pct_addon import get_addon 12 | from utils.pct_latest_version import pct_new_version 13 | from utils.process_output import all_metrics_per_day, rename_keys, update_history 14 | from utils.s3_utils import read_from_s3, write_to_s3 15 | from utils.top10addons import top_10_addons_on_date 16 | from utils.trackingprotection import pct_tracking_protection 17 | 18 | # country names and mappings 19 | # this list is formulated from 20 | # https://sql.telemetry.mozilla.org/queries/51430/source 21 | # may want to change 22 | COUNTRY_NAME_MAPPINGS = { 23 | 'All': 'Worldwide', 24 | 'US': 'United States', 25 | 'DE': 'Germany', 26 | 'FR': 'France', 27 | 'IN': 'India', 28 | 'BR': 'Brazil', 29 | 'CN': 'China', 30 | 'ID': 'Indonesia', 31 | 'RU': 'Russia', 32 | 'IT': 'Italy', 33 | 'PL': 'Poland' 34 | } 35 | 36 | TOP_TEN_COUNTRIES = list(COUNTRY_NAME_MAPPINGS.keys()) 37 | TOP_TEN_COUNTRIES.remove('All') 38 | 39 | MASTER_VERSION = 'master' 40 | ALLOWED_CHANNELS = [ 41 | 'release', 42 | 'beta', 43 | 'esr', 44 | 'Other' 45 | ] 46 | 47 | DEFAULT_TZ = 'UTC' 48 | 49 | ANNOTATIONS_DIR = os.path.join('usage_report', 'annotations') 50 | ANNOTATIONS_SUFFIX = '.json' 51 | 52 | 53 | def get_spark(): 54 | spark = (SparkSession 55 | .builder 56 | .appName("usage_report") 57 | .getOrCreate()) 58 | 59 | spark.conf.set('spark.sql.session.timeZone', DEFAULT_TZ) 60 | 61 | return spark 62 | 63 | 64 | def agg_usage(data, **kwargs): 65 | date = kwargs['date'] 66 | period = kwargs['period'] 67 | country_list = kwargs['country_list'] 68 | sample_factor = kwargs['sample_factor'] 69 | 70 | avg_daily_session_length = get_daily_avg_session(data, 71 | date, 72 | period=period, 73 | country_list=country_list) 74 | 75 | avg_daily_intensity = get_avg_intensity(data, 76 | date, 77 | period=period, 78 | country_list=country_list) 79 | 80 | pct_last_version = pct_new_version(data, 81 | date, 82 | period=period, 83 | country_list=country_list) 84 | 85 | # for mau and yau, start_date = date 86 | # since we only want ONE number for each week 87 | mau = getMAU(data, 88 | date, 89 | sample_factor=sample_factor, 90 | country_list=country_list) 91 | 92 | yau = getYAU(data, 93 | date, 94 | sample_factor=sample_factor, 95 | country_list=country_list) 96 | 97 | new_user_counts = new_users(data, 98 | date, 99 | period=period, 100 | country_list=country_list) 101 | 102 | top10addon = top_10_addons_on_date(data, 103 | date, 104 | topN=10, 105 | period=period, 106 | country_list=country_list) 107 | 108 | has_addon = get_addon(data, 109 | date, 110 | period=period, 111 | country_list=country_list) 112 | 113 | locales = locale_on_date(data, 114 | date, 115 | topN=5, 116 | period=period, 117 | country_list=country_list) 118 | 119 | tracking_pro = pct_tracking_protection(data, 120 | date, 121 | period=period, 122 | country_list=country_list) 123 | 124 | on = ['submission_date_s3', 'country'] 125 | usage = (avg_daily_session_length 126 | .join(avg_daily_intensity, on=on) 127 | .join(pct_last_version, on=on) 128 | .join(mau, on=on) 129 | .join(yau, on=on) 130 | .join(new_user_counts, on=on) 131 | .join(has_addon, on=on) 132 | .join(tracking_pro, on=on)) 133 | 134 | return usage, locales, top10addon 135 | 136 | 137 | @click.command() 138 | @click.option('--date', required=True) 139 | @click.option('--lag-days', default=7) 140 | @click.option('--sample', default=1, help='percent sample as int [1, 100]') 141 | @click.option('--no-output', default=False, is_flag=True) 142 | @click.option('--input-bucket', default='telemetry-parquet') 143 | @click.option('--input-prefix', default='main_summary') 144 | @click.option('--input-version', default='v4') 145 | @click.option('--output-bucket', default='telemetry-test-bucket') 146 | @click.option('--output-prefix', default='usage_report_data') # TBD, this is a placeholder 147 | @click.option('--output-version', default='v1') # TBD, this is a placeholder 148 | @click.option('--spark-provider', type=click.Choice(['emr', 'dataproc']), default='emr') 149 | def main(date, lag_days, sample, no_output, input_bucket, input_prefix, input_version, 150 | output_bucket, output_prefix, output_version, spark_provider): 151 | 152 | spark = get_spark() 153 | 154 | # all counts will be multipled by sample_factor 155 | sample_factor = 100.0 / sample 156 | 157 | # load main_summary with unbounded history, since YAU 158 | # looks at past 365 days 159 | ms = ( 160 | load_main_summary(spark, input_bucket, input_prefix, input_version, spark_provider) 161 | .filter("submission_date_s3 <= '{}'".format(date)) 162 | .filter("sample_id < {}".format(sample)) 163 | .filter(col("normalized_channel").isin(ALLOWED_CHANNELS)) 164 | .filter("app_name = 'Firefox'")) 165 | 166 | usage, locales, top10addon = agg_usage(ms, date=date, period=lag_days, 167 | sample_factor=sample_factor, 168 | country_list=TOP_TEN_COUNTRIES) 169 | usage.printSchema() 170 | usage_df = usage.toPandas() 171 | 172 | locales.printSchema() 173 | locales_df = locales.toPandas() 174 | 175 | top10addon.printSchema() 176 | top10addon_df = top10addon.toPandas() 177 | 178 | print "Converting data to JSON" 179 | fxhealth, webusage = all_metrics_per_day(TOP_TEN_COUNTRIES, 180 | usage_df, 181 | locales_df, 182 | top10addon_df) 183 | 184 | # rename countries for presentation 185 | fxhealth = rename_keys(fxhealth, COUNTRY_NAME_MAPPINGS) 186 | webusage = rename_keys(webusage, COUNTRY_NAME_MAPPINGS) 187 | print fxhealth 188 | print webusage 189 | 190 | # get previous data 191 | s3_key_prefix = output_prefix + '/' + output_version + '/{}/' 192 | s3_key_fxhealth = s3_key_prefix + 'fxhealth.json' 193 | s3_key_webusage = s3_key_prefix + 'webusage.json' 194 | 195 | old_fxhealth = read_from_s3(output_bucket, s3_key_fxhealth.format(MASTER_VERSION)) 196 | old_webusage = read_from_s3(output_bucket, s3_key_webusage.format(MASTER_VERSION)) 197 | 198 | # update previous data 199 | fxhealth_data_full = update_history(fxhealth, old_fxhealth) 200 | webusage_data_full = update_history(webusage, old_webusage) 201 | 202 | if no_output: 203 | print "no output generated due to user request" 204 | else: 205 | print "Writing new data to:", output_bucket 206 | print s3_key_fxhealth.format(MASTER_VERSION) 207 | print s3_key_webusage.format(MASTER_VERSION) 208 | print "Writing old data to:", output_bucket 209 | print s3_key_fxhealth.format(date) 210 | print s3_key_webusage.format(date) 211 | 212 | # write historical data, indexed by date 213 | write_to_s3(output_bucket, s3_key_fxhealth.format(date), old_fxhealth) 214 | write_to_s3(output_bucket, s3_key_webusage.format(date), old_webusage) 215 | 216 | # write updated data 217 | write_to_s3(output_bucket, s3_key_fxhealth.format(MASTER_VERSION), fxhealth_data_full) 218 | write_to_s3(output_bucket, s3_key_webusage.format(MASTER_VERSION), webusage_data_full) 219 | 220 | # write annotations 221 | annote_files = [ 222 | (f, os.path.join(ANNOTATIONS_DIR, f)) 223 | for f in os.listdir(ANNOTATIONS_DIR) 224 | if f.endswith(ANNOTATIONS_SUFFIX) 225 | ] 226 | 227 | for filename, full_path in annote_files: 228 | with open(full_path, 'r') as f: 229 | data = f.read() 230 | s3_path = s3_key_prefix.format(MASTER_VERSION) + filename 231 | write_to_s3(output_bucket, s3_path, data) 232 | 233 | 234 | if __name__ == '__main__': 235 | main() 236 | -------------------------------------------------------------------------------- /usage_report/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/Fx_Usage_Report/489ca258b14776c01f3021080b2dd686d239dea3/usage_report/utils/__init__.py -------------------------------------------------------------------------------- /usage_report/utils/activeuser.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql.functions import lit, col, countDistinct 2 | from helpers import date_plus_x_days 3 | 4 | # to name columns based on period 5 | PERIOD_DESC = { 6 | 28: "MAU", 7 | 365: "YAU", 8 | 7: 'WAU' 9 | } 10 | 11 | 12 | def getPAU(data, date, period, sample_factor=1, country_list=None): 13 | """ Calculates the PAU for a given period for each time in epoch_times. 14 | 15 | This function is fast for finding the PAU for a small number of dates. 16 | 17 | Paramaters: 18 | 19 | data - This should be a sample of the main server ping data frame. 20 | date - The day to calulate PAU for. This is given in epoch time. 21 | period - The number of days that we count distinct number of users. 22 | For example MAU has a period = 28 and YAU has a period = 365. 23 | sample_factor - the factor to multiply counts by, pre-calculated based 24 | on sample 25 | country_list - A list of countries that we want to calculate the 26 | PAU for. 27 | 28 | Output: 29 | 30 | A data frame, this data frame has 3 columns 31 | submission_date_s3, country, PAU(WAU/MAU/YAU). 32 | """ 33 | def process_data(data, begin, date): 34 | return ( 35 | data.filter(col('submission_date_s3') > begin) 36 | .filter(col('submission_date_s3') <= date) 37 | .groupBy('country') 38 | .agg((sample_factor * countDistinct('client_id')).alias(active_users_col)) 39 | .select('*', 40 | lit(begin).alias(start_date_col), 41 | lit(date).alias('submission_date_s3'))) 42 | 43 | data_all = data.drop('country').select('*', lit('All').alias('country')) 44 | if country_list is not None: 45 | data_country = data.filter(col('country').isin(country_list)) 46 | # define column names based on period 47 | active_users_col = PERIOD_DESC.get(period, "other") 48 | start_date_col = 'start_date_' + PERIOD_DESC.get(period, "other") 49 | 50 | begin = date_plus_x_days(date, -period) 51 | 52 | current_count = process_data(data_all, begin, date) 53 | if country_list is not None: 54 | df_country = process_data(data_country, begin, date) 55 | current_count = current_count.union(df_country) 56 | 57 | return current_count.select('submission_date_s3', 'country', active_users_col) 58 | 59 | 60 | def getMAU(data, date, sample_factor=1, country_list=None): 61 | """ Helper function for getPAU with period 28. 62 | """ 63 | return getPAU(data, date, 28, sample_factor, country_list) 64 | 65 | 66 | def getYAU(data, date, sample_factor=1, country_list=None): 67 | """ Helper function for getPAU with period 365. 68 | """ 69 | return getPAU(data, date, 365, sample_factor, country_list) 70 | -------------------------------------------------------------------------------- /usage_report/utils/avg_daily_usage.py: -------------------------------------------------------------------------------- 1 | from helpers import date_plus_x_days, keep_countries_and_all 2 | 3 | from pyspark.sql.functions import lit 4 | import pyspark.sql.functions as F 5 | 6 | 7 | def get_daily_avg_session( 8 | data, 9 | date, 10 | period=7, 11 | country_list=None): 12 | """ Calculate Average Daily usage of the last 7 days for a particular date 13 | 14 | Parameters: 15 | data: sample of the main server ping data frame 16 | date: string, with the format of 'yyyyMMdd' 17 | country_list: a list of country names in string 18 | 19 | Returns: 20 | a dataframe with four columns: 21 | 'submission_date_s3', 22 | 'country', 23 | 'avg_daily_usage(hours)' 24 | """ 25 | 26 | data_all = keep_countries_and_all(data, country_list) 27 | begin = date_plus_x_days(date, -period) 28 | 29 | data_agg = data_all\ 30 | .filter("submission_date_s3 <= '{}' and submission_date_s3 > '{}'" 31 | .format(date, begin))\ 32 | .filter("subsession_length <= 86400") .filter("subsession_length > 0")\ 33 | .groupBy('country', 34 | 'client_id', 35 | 'submission_date_s3')\ 36 | .agg(F.sum('subsession_length').alias('total_daily_time'))\ 37 | .select('country', 38 | 'client_id', 39 | 'submission_date_s3', 40 | F.when(F.col('total_daily_time') > 86400, 86400) 41 | .otherwise(F.col('total_daily_time')) 42 | .alias('total_daily_time')) 43 | 44 | country_avg_session = data_agg\ 45 | .groupBy('country', 'client_id')\ 46 | .agg(F.avg('total_daily_time').alias('client_7d_avg'))\ 47 | .groupBy('country')\ 48 | .agg(F.avg('client_7d_avg').alias('avg_daily_subsession_length'))\ 49 | .select(lit(date).alias('submission_date_s3'), '*') 50 | 51 | df = country_avg_session.orderBy( 52 | 'submission_date_s3', 'country') 53 | 54 | df = df.withColumn( 55 | 'avg_daily_usage(hours)', 56 | df.avg_daily_subsession_length / 3600) 57 | 58 | return df.select('submission_date_s3', 'country', 'avg_daily_usage(hours)') 59 | -------------------------------------------------------------------------------- /usage_report/utils/avg_intensity.py: -------------------------------------------------------------------------------- 1 | from helpers import date_plus_x_days, keep_countries_and_all 2 | 3 | from pyspark.sql.functions import col, lit 4 | import pyspark.sql.functions as F 5 | 6 | 7 | def get_avg_intensity(data, date, period=7, country_list=None): 8 | """ Calculate Average Intensity of the last 7 days for a particular date 9 | 10 | Parameters: 11 | data: sample of the main server ping data frame 12 | date: string, with the format of 'yyyyMMdd' 13 | period: The number of days before to run the analysis on. 14 | country_list: a list of country names in string 15 | 16 | Returns: 17 | a dataframe with three columns: 'submission_date_s3', 'country', 'avg_intensity' 18 | """ 19 | data_all = keep_countries_and_all(data, country_list) 20 | begin = date_plus_x_days(date, -period) 21 | 22 | data_agg = data_all\ 23 | .filter("submission_date_s3 <= '{0}' and submission_date_s3 > '{1}'".format(date, begin))\ 24 | .filter("subsession_length <= 86400")\ 25 | .filter("subsession_length > 0")\ 26 | .filter('active_ticks <= 17280')\ 27 | .groupBy('country', 'client_id', 'submission_date_s3')\ 28 | .agg(F.sum('subsession_length').alias('total_daily_time'), 29 | F.sum('active_ticks').alias('total_daily_ticks'))\ 30 | .select('country', 31 | 'client_id', 32 | 'submission_date_s3', 33 | F.when(F.col('total_daily_time') > 86400, 86400) 34 | .otherwise(F.col('total_daily_time')) 35 | .alias('total_daily_time'), 36 | F.when(F.col('total_daily_ticks') > 17280, 17280) 37 | .otherwise(F.col('total_daily_ticks')) 38 | .alias('total_daily_ticks'))\ 39 | .select('*', 40 | (col('total_daily_ticks') * 5 / col('total_daily_time')) 41 | .alias('daily_intensity'))\ 42 | .select('country', 43 | 'client_id', 44 | 'submission_date_s3', 45 | F.when(F.col('daily_intensity') > 1, 1) 46 | .otherwise(F.col('daily_intensity')) 47 | .alias('daily_intensity')) 48 | 49 | country_avg_intensity = data_agg\ 50 | .groupBy('country', 'client_id')\ 51 | .agg(F.avg('daily_intensity').alias('avg_7d_intensity'))\ 52 | .groupBy('country')\ 53 | .agg(F.avg('avg_7d_intensity').alias('avg_intensity'))\ 54 | .select(lit(date).alias('submission_date_s3'), '*') 55 | 56 | df = country_avg_intensity.orderBy('submission_date_s3', 'country') 57 | 58 | return df 59 | -------------------------------------------------------------------------------- /usage_report/utils/helpers.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | import pyspark.sql.functions as F 3 | 4 | 5 | def date_plus_x_days(date, x): 6 | ''' 7 | ''' 8 | 9 | new_date = dt.datetime.strptime(date, '%Y%m%d') + dt.timedelta(days=x) 10 | return new_date.strftime('%Y%m%d') 11 | 12 | 13 | def keep_countries_and_all(data, country_list): 14 | """ Takes the main ping server and makes a country `All` and keeps only countries 15 | in country_list and All. 16 | 17 | Parameters: 18 | data: The main ping server. 19 | country_list: The list of countries to keep. 20 | """ 21 | data_all = data.withColumn('country', F.lit('All')) 22 | 23 | if country_list is not None: 24 | data_countries = data.filter(F.col('country').isin(country_list)) 25 | data_all = data_all.union(data_countries) 26 | 27 | return data_all 28 | 29 | 30 | def get_dest(bucket, prefix, version, spark_provider='emr', date=None, sample_id=None): 31 | ''' 32 | Stiches together an s3 or gcs destination. 33 | :param bucket: s3 or gcs bucket 34 | :param prefix: s3 or gcs prefix (within bucket) 35 | :param version: dataset version 36 | :param spark_provider: either 'emr' or 'dataproc' 37 | :return str -> 38 | s3|gs://bucket/prefix/version/submission_date_s3=[date]/sample_id=[sid] 39 | ''' 40 | 41 | if spark_provider == 'dataproc': 42 | storage_prefix = 'gs://' 43 | else: 44 | storage_prefix = 's3://' 45 | 46 | suffix = '' 47 | if date is not None: 48 | suffix += "/submission_date_s3={}".format(date) 49 | if sample_id is not None: 50 | suffix += "/sample_id={}".format(sample_id) 51 | full_dest = storage_prefix + '/'.join([bucket, prefix, version]) + suffix + '/' 52 | return full_dest 53 | 54 | 55 | def load_main_summary(spark, input_bucket, input_prefix, input_version, spark_provider='emr'): 56 | ''' 57 | Loads main_summary from the bucket constructed from 58 | input_bucket, input_prefix, input_version 59 | :param spark: SparkSession object 60 | :param input_bucket: s3 bucket (telemetry-parquet) 61 | :param input_prefix: s3 prefix (main_summary) 62 | :param input_version: dataset version (v4) 63 | :param spark_provider: either 'emr' or 'dataproc' 64 | :return SparkDF 65 | ''' 66 | 67 | dest = get_dest(input_bucket, input_prefix, input_version, spark_provider) 68 | return (spark 69 | .read 70 | .option("mergeSchema", True) 71 | .parquet(dest)) 72 | -------------------------------------------------------------------------------- /usage_report/utils/localedistribution.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql.functions import lit, col, desc, countDistinct 2 | from pyspark.sql import Window 3 | import pyspark.sql.functions as F 4 | from helpers import date_plus_x_days, keep_countries_and_all 5 | 6 | 7 | def locale_on_date(data, date, topN, period=7, country_list=None): 8 | """ Gets the ratio of the top locales in each country over the last week. 9 | 10 | parameters: 11 | data: The main ping server 12 | date: The date to find the locale distribution 13 | topN: The number of locales to get for each country. Only does the top N. 14 | period: The number of days before looked at in the analyisis 15 | country_list: The list to find look at in the analysis 16 | 17 | output: 18 | dataframe with columns: 19 | ['country', 'submission_date_s3', 'locale', 'pct_on_locale'] 20 | """ 21 | data_all = keep_countries_and_all(data, country_list) 22 | begin = date_plus_x_days(date, -period) 23 | 24 | wau = data_all\ 25 | .filter((col('submission_date_s3') <= date) & (col('submission_date_s3') > begin))\ 26 | .groupBy('country')\ 27 | .agg(countDistinct('client_id').alias('WAU')) 28 | 29 | locale_wau = data_all\ 30 | .filter((col('submission_date_s3') <= date) & (col('submission_date_s3') > begin))\ 31 | .groupBy('country', 'locale')\ 32 | .agg(countDistinct('client_id').alias('WAU_on_locale'))\ 33 | .select(lit(begin).alias('start_date'), lit(date).alias('submission_date_s3'), 34 | 'country', 'WAU_on_locale', 'locale') 35 | 36 | res = locale_wau.join(wau, 'country', how='left')\ 37 | .select('start_date', 'submission_date_s3', 38 | 'country', 'WAU_on_locale', 'locale', 'WAU') 39 | 40 | rank_window = Window.partitionBy('country', 'submission_date_s3').orderBy(desc('WAU_on_locale')) 41 | 42 | return res.select('*', F.row_number().over(rank_window).alias('rank'))\ 43 | .filter(col('rank') <= topN)\ 44 | .select('submission_date_s3', 'country', 'locale', 45 | (100.0 * col('WAU_on_locale') / col('WAU')).alias('pct_on_locale')) 46 | -------------------------------------------------------------------------------- /usage_report/utils/newuser.py: -------------------------------------------------------------------------------- 1 | from activeuser import getPAU 2 | from pyspark.sql.functions import col, lit, countDistinct, from_unixtime 3 | from helpers import date_plus_x_days 4 | 5 | 6 | def getWAU(data, date, country_list=None): 7 | """ Helper function for getPAU with period 7 days. 8 | """ 9 | return getPAU(data, date, period=7, country_list=country_list) 10 | 11 | 12 | def new_users(data, date, period=7, country_list=None): 13 | """Gets the percentage of WAU that are new users. 14 | 15 | Parameters: 16 | 17 | data - This should be the entire main server ping data frame. 18 | date - data to start calculating for 19 | period - The number of days before looked at in the analysis 20 | country_list - A list of countries that we want to calculate the 21 | PAU for. 22 | 23 | Returns: 24 | A dataframe with columns 25 | submission_date_s3, country, pct_new_users 26 | """ 27 | 28 | cols = ['submission_date_s3', 'client_id', 'profile_creation_date', 29 | 'country'] 30 | 31 | wau = getWAU(data, date, country_list=country_list) 32 | df = data.drop('country').select('*', lit('All').alias('country')) 33 | 34 | if country_list is not None: 35 | df = ( 36 | df.select(cols).union(data.select(cols) 37 | .filter(col('country').isin(country_list)))) 38 | begin = date_plus_x_days(date, -period) 39 | new_profiles = (df.filter(df.submission_date_s3 <= date) 40 | .filter(df.submission_date_s3 > begin) 41 | .withColumn('pcd_str', 42 | from_unixtime(col('profile_creation_date') * 24 * 60 * 60, 43 | format='yyyyMMdd')) 44 | .filter(col('pcd_str') <= date) 45 | .filter(col('pcd_str') > begin)) 46 | 47 | new_user_counts = ( 48 | new_profiles 49 | .groupBy('country') 50 | .agg((countDistinct('client_id')).alias('new_users'))) 51 | 52 | return wau.join(new_user_counts, on=['country'], how='left')\ 53 | .select('submission_date_s3', 54 | 'country', 55 | (100.0 * col('new_users') / col('WAU')).alias('pct_new_user')) 56 | -------------------------------------------------------------------------------- /usage_report/utils/osdistribution.py: -------------------------------------------------------------------------------- 1 | # deprecated for now to avoid overlap with FHR os 2 | from pyspark.sql.functions import col, countDistinct, lit, when 3 | from helpers import date_plus_x_days, keep_countries_and_all 4 | 5 | 6 | def window_version(os_version): 7 | """ 8 | Takes the Windows Kernel version number and 9 | produces the associated consumer windows version. 10 | """ 11 | return when(os_version == '10.0', 'Windows 10')\ 12 | .when(os_version == '6.1', 'Windows 7')\ 13 | .when(os_version == '6.2', 'Windows 8')\ 14 | .when(os_version == '6.3', 'Windows 8')\ 15 | .when(os_version == '5.1', 'Windows XP')\ 16 | .when(os_version == '5.2', 'Windows XP')\ 17 | .when(os_version == '6.0', 'Windows Vista')\ 18 | .otherwise('Other Windows') 19 | 20 | 21 | def nice_os(os, os_version): 22 | """ Splits the major windows versions up and keeps mac os x and linux combined.""" 23 | return when(os == 'Windows_NT', window_version(os_version))\ 24 | .when(os == 'Windows_95', 'Other Windows')\ 25 | .when(os == 'Windows_98', 'Other Windows')\ 26 | .when(os == "Darwin", "Mac OS X")\ 27 | .otherwise('Other') 28 | 29 | 30 | def os_on_date(data, date, period=7, country_list=None): 31 | """ Gets the distribution of OS usage calculated on the WAU on 1 day. 32 | 33 | Parameters: 34 | data: Usually the main summary data frame. 35 | date: day to get the os distribution for the past week. 36 | period: The number of days to calculate the distibution. By default it finds os 37 | distribution over a week. 38 | country_list: The countries to do the analysis. If None then it does it for the whole 39 | world. 40 | 41 | Returns: 42 | submission_date_s3, country, os, pct_on_os 43 | """ 44 | 45 | data_all = keep_countries_and_all(data, country_list) 46 | begin = date_plus_x_days(date, -period) 47 | data_all = data_all.select('client_id', 'submission_date_s3', 'country', 48 | nice_os(col('os'), col('os_version')).alias('nice_os')) 49 | 50 | # Calculate the WAU 51 | wau = data_all\ 52 | .filter((col('submission_date_s3') <= date) & (col('submission_date_s3') > begin))\ 53 | .groupBy('country')\ 54 | .agg(countDistinct('client_id').alias('WAU'))\ 55 | 56 | os_wau = data_all\ 57 | .filter((col('submission_date_s3') <= date) & 58 | (col('submission_date_s3') > begin))\ 59 | .groupBy('country', 'nice_os')\ 60 | .agg(countDistinct('client_id').alias('WAU_on_OS'))\ 61 | .select(lit(begin).alias('start_date'), lit(date).alias('submission_date_s3'), 62 | 'country', 'WAU_on_OS', 'nice_os') 63 | 64 | res = os_wau.join(wau, 'country', how='left')\ 65 | .select('start_date', 'submission_date_s3', 66 | 'country', 'WAU_on_OS', 'nice_os', 'WAU') 67 | 68 | return res.select('submission_date_s3', 'country', col('nice_os').alias('os'), 69 | (100.0 * col('WAU_on_OS') / col('WAU')).alias('pct_on_os')) 70 | -------------------------------------------------------------------------------- /usage_report/utils/pct_addon.py: -------------------------------------------------------------------------------- 1 | import json 2 | from helpers import date_plus_x_days, keep_countries_and_all 3 | 4 | # from pyspark.sql.functions import col, lit, mean, split 5 | import pyspark.sql.functions as F 6 | 7 | 8 | def get_test_pilot_addons(): 9 | ''' 10 | Fetches all the live test pilot experiments listed in 11 | the experiments.json file. 12 | returns a list of addon_ids 13 | ''' 14 | file_path = "usage_report/resources/experiments.json" 15 | with open(file_path) as f: 16 | data = json.load(f) 17 | all_tp_addons = ["@testpilot-addon"] + [i.get("addon_id") 18 | for i in data['results'] 19 | if i.get("addon_id")] 20 | return all_tp_addons 21 | 22 | 23 | # grab all tp addons without a mozilla suffix 24 | NON_MOZ_TP = [i for i in get_test_pilot_addons() if "@mozilla" not in i] 25 | 26 | # this study is everywhere 27 | UNIFIED_SEARCH_STR = '@unified-urlbar-shield-study-' 28 | 29 | 30 | def get_addon(data, 31 | date, 32 | period=7, 33 | country_list=None): 34 | """ Calculate the proportion of WAU that have a "self installed" addon for a specific date 35 | 36 | Parameters: 37 | data: sample of the main server ping data frame 38 | date: string, with the format of 'yyyyMMdd' 39 | period: The number of days before looked at in the analysis 40 | country_list: a list of country names in string 41 | 42 | Returns: 43 | a dataframe showing all the information for each date in the period 44 | - three columns: 'submission_date_s3', 'country', 'pct_Addon' 45 | """ 46 | 47 | data_all = keep_countries_and_all(data, country_list) 48 | begin = date_plus_x_days(date, -period) 49 | 50 | addon_filter = (~F.col('addon.is_system')) & (~F.col('addon.foreign_install')) &\ 51 | (~F.col('addon.addon_id').isin(NON_MOZ_TP)) &\ 52 | (~F.col('addon.addon_id').like('%@mozilla%')) &\ 53 | (~F.col('addon.addon_id').like('%@shield.mozilla%')) &\ 54 | (~F.col('addon.addon_id').like('%' + UNIFIED_SEARCH_STR + '%')) 55 | 56 | WAU = data_all\ 57 | .filter("submission_date_s3 <= '{0}' and submission_date_s3 > '{1}'".format(date, begin))\ 58 | .groupBy('country')\ 59 | .agg(F.countDistinct('client_id').alias('WAU')) 60 | 61 | addon_count = data_all\ 62 | .filter("submission_date_s3 <= '{0}' and submission_date_s3 > '{1}'".format(date, begin))\ 63 | .select('submission_date_s3', 'country', 'client_id', 64 | F.explode('active_addons').alias('addon'))\ 65 | .filter(addon_filter)\ 66 | .groupBy('country')\ 67 | .agg(F.countDistinct('client_id').alias('add_on_count')) 68 | 69 | join_df = WAU.join(addon_count, 'country', how='left')\ 70 | .withColumn("pct_addon", (100.0 * F.col("add_on_count") / F.col("WAU")))\ 71 | .select(F.lit(date).alias('submission_date_s3'), '*') 72 | 73 | return join_df.select('submission_date_s3', 'country', 'pct_addon') 74 | -------------------------------------------------------------------------------- /usage_report/utils/pct_latest_version.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import urllib 4 | 5 | from pyspark.sql.functions import split 6 | import pyspark.sql.functions as F 7 | from helpers import date_plus_x_days, keep_countries_and_all 8 | 9 | RELEASE_VERSIONS_URL = "https://product-details.mozilla.org/1.0/firefox_history_major_releases.json" 10 | 11 | 12 | def get_latest_version(date, url): 13 | """ check a url and get the latest release given a date 14 | Param: 15 | date: date in question. should be YYYYMMDD format (str) 16 | url: url where the Firefox release history json lives 17 | Return: release major version for that date (50, not 50.0) 18 | """ 19 | date = datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d') 20 | response = urllib.urlopen(url) 21 | jrelease = json.loads(response.read()) 22 | jrelease = dict((v, k) for k, v in jrelease.iteritems()) 23 | last_update = max([release_date for release_date in jrelease.keys() if release_date <= date]) 24 | return jrelease[last_update].split('.')[0] 25 | 26 | 27 | def pct_new_version(data, 28 | date, 29 | period=7, 30 | country_list=None, 31 | url=RELEASE_VERSIONS_URL): 32 | """ Calculate the proportion of active users on the latest release version every day. 33 | Parameters: 34 | data: sample of the main server ping data frame 35 | date: The day to calculate the metric 36 | period: number of days to use to calculate metric 37 | country_list: a list of country names in string 38 | url: path to the json file containing all the firefox release information to date 39 | Returns: 40 | a dataframe with five columns - 'country', 'submission_date_s3', 41 | 'pct_latest_version' 42 | """ 43 | 44 | data_all = keep_countries_and_all(data, country_list) 45 | begin = date_plus_x_days(date, -period) 46 | 47 | latest_version = get_latest_version(date, url) 48 | data_filtered = data_all.filter(""" 49 | {0} >= '{1}' and {0} <= '{2}' 50 | """.format("submission_date_s3", begin, date))\ 51 | .withColumn('app_major_version', 52 | split('app_version', 53 | r'\.').getItem(0))\ 54 | .select('submission_date_s3', 55 | 'client_id', 56 | 'app_major_version', 57 | 'country') 58 | 59 | WAU = data_filtered.groupBy('country')\ 60 | .agg(F.countDistinct('client_id').alias('WAU')) 61 | WAU_latest = data_filtered.filter(F.col('app_major_version') >= F.lit(latest_version))\ 62 | .groupBy('country')\ 63 | .agg(F.countDistinct('client_id').alias('WAU_is_latest')) 64 | join_df = WAU.join(WAU_latest, 'country', 'left')\ 65 | .withColumn("pct_latest_version", (100.0 * F.col("WAU_is_latest") / F.col("WAU")))\ 66 | .select(F.lit(date).alias('submission_date_s3'), 67 | 'country', 68 | F.coalesce('pct_latest_version', F.lit(0)).alias('pct_latest_version')) 69 | return join_df 70 | -------------------------------------------------------------------------------- /usage_report/utils/process_output.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | # Note: the code makes the following assumptions otherwise 4 | # it will throw an error by design: 5 | # dataframes are for a single submission_date_s3 6 | # historical data and new data have same countries and metrics 7 | # single metric per country (for single metrics) 8 | # it will not throw error for: 9 | # faceted metrics, unique facets (addons have repeat names) 10 | # countries differ from previous (print warning) 11 | # metrics differ from last entry (print warning) 12 | 13 | FXHEALTH_METRICS = ['YAU', 14 | 'MAU', 15 | 'pct_new_user', 16 | 'pct_latest_version', 17 | 'avg_daily_usage(hours)', 18 | 'avg_intensity'] 19 | 20 | WEBUSAGE_METRICS_1DIM = ['pct_addon', 21 | 'pct_TP'] 22 | 23 | WEBUSAGE_METRICS_2DIM = {'locale': ('locale', 'pct_on_locale'), 24 | 'top10addons': ('addon_name', 'pct_with_addon')} 25 | 26 | 27 | def check_unique(df, metric_col): 28 | """ make sure df has 1 row for single value metrics 29 | params: df, pandas df, metric_col, str 30 | return: nothing, but raise error if assumptions not met 31 | """ 32 | if len(df[metric_col]) != 1: 33 | raise ValueError('there should be 1 metric') 34 | 35 | 36 | def check_dataframes(*dfs): 37 | """ check dataframes for assumptions we make for processing 38 | which are same country and same date 39 | params: dfs, at least 2 dataframes 40 | return: nothing, but raise error if assumptions not met 41 | """ 42 | country_sets = map(lambda x: set(x['country']), dfs) 43 | date_sets = map(lambda x: set(x['submission_date_s3']), dfs) 44 | if not all([a == b for a, b 45 | in zip(country_sets[:-1], 46 | country_sets[1:])]): 47 | raise ValueError('countries are different') 48 | if not all([a == b for a, b 49 | in zip(date_sets[:-1], 50 | date_sets[1:])]): 51 | raise ValueError('dates are different') 52 | if len(date_sets[0]) != 1: 53 | raise ValueError('wrong number of dates') 54 | 55 | 56 | def one_dim_extract(pd_df, 57 | country, 58 | metric_col): 59 | """get a metric for a country from a pandas df 60 | params: pd_df, pandas dataframe, (ex. usage) 61 | country: str 62 | metric_col: str 63 | return: a single numeric of some kind 64 | """ 65 | pd_df_filtered = pd_df[pd_df['country'] == country].reset_index(drop=True) 66 | check_unique(pd_df_filtered, metric_col) 67 | return pd_df_filtered.iloc[0][metric_col] 68 | 69 | 70 | def two_dim_extract(pd_df, 71 | country, 72 | facet_col, 73 | metric_col): 74 | """ extract {facet: metric} dict from a pandas dataframe 75 | params: pd_df, faceted pandas dataframe, (ex, locales, top10addon) 76 | country, str 77 | facet_col, str (ex. 'locale') 78 | metric_col, str (ex. 'pct_on_locale') 79 | return: 80 | nested_dict, dict, keys are facets, values are metrics 81 | """ 82 | nested_dict = {} 83 | pd_df_filtered = pd_df[pd_df['country'] == country].reset_index(drop=True) 84 | for i in pd_df_filtered.index: 85 | i_row = pd_df_filtered.iloc[i] 86 | nested_dict[i_row[facet_col]] = i_row[metric_col] 87 | return nested_dict 88 | 89 | 90 | def fxhealth_per_day_country(usage_pd_df, 91 | country): 92 | """ get fxhealth metrics 93 | params: usage_pd_df, pandas df 94 | country: country, str 95 | return: day_dict, {date: dict's date, 96 | metrics: 97 | {metric: value, 98 | ...}} 99 | """ 100 | day_dict = {} 101 | date = usage_pd_df['submission_date_s3'][0] 102 | date = datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d') 103 | 104 | day_dict['date'] = date 105 | day_dict['metrics'] = {} 106 | 107 | for metric in FXHEALTH_METRICS: 108 | day_dict['metrics'][metric] = one_dim_extract(usage_pd_df, 109 | country, 110 | metric) 111 | return day_dict 112 | 113 | 114 | def webusage_per_day_country(usage_pd_df, 115 | locales_pd_df, 116 | topaddons_pd_df, 117 | country): 118 | """ get webusage metrics 119 | params: usage_pd_df, pandas df, 1dim metrics 120 | locales_pd_df, topaddons_pd_df, pandas df, 2dim metrics 121 | country, str 122 | return: day_dict, {date: dict's date, 123 | metrics: 124 | {metric: value, 125 | metric: 126 | {facet: value, 127 | ...}, ...}} 128 | """ 129 | day_dict = {} 130 | date = usage_pd_df['submission_date_s3'][0] 131 | date = datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d') 132 | 133 | day_dict['date'] = date 134 | day_dict['metrics'] = {} 135 | 136 | for metric in WEBUSAGE_METRICS_1DIM: 137 | day_dict['metrics'][metric] = one_dim_extract(usage_pd_df, 138 | country, 139 | metric) 140 | 141 | for df, metric in [(locales_pd_df, 'locale'), 142 | (topaddons_pd_df, 'top10addons')]: 143 | day_dict['metrics'][metric] = two_dim_extract(df, 144 | country, 145 | WEBUSAGE_METRICS_2DIM[metric][0], 146 | WEBUSAGE_METRICS_2DIM[metric][1]) 147 | return day_dict 148 | 149 | 150 | def all_metrics_per_day(country_list, usage_pd_df, locales_pd_df, topaddons_pd_df): 151 | """ get fxhealth and webusage metrics, all countries 152 | params: country_list, list of strings 153 | various dfs, pandas dfs 154 | return: tuple of dicts 155 | """ 156 | check_dataframes(usage_pd_df, 157 | locales_pd_df, 158 | topaddons_pd_df) 159 | fxhealth, webusage = {}, {} 160 | country_list = country_list + ['All'] 161 | for country in country_list: 162 | fxhealth[country] = fxhealth_per_day_country(usage_pd_df, 163 | country) 164 | webusage[country] = webusage_per_day_country(usage_pd_df, 165 | locales_pd_df, 166 | topaddons_pd_df, 167 | country) 168 | return (fxhealth, webusage) 169 | 170 | 171 | def rename_keys(input_dict, country_name_mappings): 172 | """ copy dict with country keys renamed with full names 173 | params: input_dict, a metric dict 174 | country_name_mappings, dict, {abbr. country: full country} 175 | return: output_dict, same as input with renamed keys 176 | """ 177 | return {country_name_mappings[k]: v 178 | for k, v in input_dict.iteritems()} 179 | 180 | 181 | def check_dict_keys(dict1, dict2, message): 182 | """ check if keys are the same 183 | params: dict1, dict2, comparison dicts 184 | message, what to print 185 | return: nothing 186 | """ 187 | if set(dict1.keys()) != set(dict2.keys()): 188 | print message 189 | 190 | 191 | def update_history(day_dict, history_dict=None): 192 | """ updates history dict, 193 | also checks country, metric, and date compat 194 | params: 195 | history_dict, dict, {'country': [{data1}, {data2}, ...], ...} 196 | day_dict, dict, {'country': {data}, ...} 197 | return: copy of history_dict updated w/new day's data 198 | """ 199 | if history_dict is None: 200 | history_dict = {} 201 | for country in day_dict.keys(): 202 | history_dict[country] = [day_dict[country]] 203 | history_dict = history_dict.copy() 204 | check_dict_keys(history_dict, 205 | day_dict, 206 | "warning: countries don't match") 207 | for country in day_dict.keys(): 208 | if country in history_dict.keys(): 209 | check_dict_keys(history_dict[country][-1], 210 | day_dict[country], 211 | "warning: metrics don't match last entry ({})".format(country)) 212 | previous_dates = [entry['date'] for entry in history_dict[country]] 213 | if day_dict[country]['date'] in previous_dates: 214 | replace_position = previous_dates.index(day_dict[country]['date']) 215 | history_dict[country][replace_position] = day_dict[country] 216 | else: 217 | history_dict[country].append(day_dict[country]) 218 | else: 219 | history_dict[country] = [day_dict[country]] 220 | return history_dict 221 | -------------------------------------------------------------------------------- /usage_report/utils/s3_utils.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import json 3 | 4 | 5 | def file_exists(bucket_name, filename, aws_access_key_id=None, aws_secret_access_key=None): 6 | """ check if a file exists in S3 7 | params: bucket_name, str, name of bucket 8 | filename, str, name of file (prefix + file name) 9 | aws_access_key_id, aws_secret_access_key, if None it should check env 10 | return: True if file exists 11 | """ 12 | s3 = boto3.Session(aws_access_key_id=aws_access_key_id, 13 | aws_secret_access_key=aws_secret_access_key).resource('s3') 14 | bucket = s3.Bucket(bucket_name) 15 | objs = list(bucket.objects.filter(Prefix=filename)) 16 | if len(objs) > 0 and objs[0].key == filename: 17 | return True 18 | else: 19 | return False 20 | 21 | 22 | def read_from_s3(bucket_name, filename, aws_access_key_id=None, aws_secret_access_key=None): 23 | """ read JSON from s3 24 | params: bucket_name, str, name of bucket 25 | filename, str, name of file (prefix + file name) 26 | return: JSON as dict, None if file doesn't exist in S3 27 | """ 28 | if file_exists(bucket_name, filename, aws_access_key_id, aws_secret_access_key): 29 | s3 = boto3.Session(aws_access_key_id=aws_access_key_id, 30 | aws_secret_access_key=aws_secret_access_key).resource('s3') 31 | content_object = s3.Object(bucket_name, filename) 32 | file_content = content_object.get()['Body'].read().decode('utf-8') 33 | return json.loads(file_content) 34 | 35 | 36 | def write_to_s3(bucket_name, filename, data, aws_access_key_id=None, aws_secret_access_key=None, 37 | acl='public-read'): 38 | """ write dict as JSON to s3 39 | params: bucket_name, str, name of bucket 40 | filename, str, name of file (prefix + file name) 41 | return: nothing 42 | """ 43 | if isinstance(data, str): 44 | body = data 45 | else: 46 | body = json.dumps(data, ensure_ascii=False) 47 | 48 | print "Uploading {}b of data to s3://{}/{}".format(len(body), bucket_name, filename) 49 | 50 | s3 = boto3.Session(aws_access_key_id=aws_access_key_id, 51 | aws_secret_access_key=aws_secret_access_key).resource('s3') 52 | obj = s3.Object(bucket_name, filename) 53 | obj.put(Body=body.encode('utf8'), ACL=acl) 54 | -------------------------------------------------------------------------------- /usage_report/utils/top10addons.py: -------------------------------------------------------------------------------- 1 | import pyspark.sql.functions as F 2 | from pyspark.sql.functions import lit, col, desc 3 | from pyspark.sql import Window 4 | import json 5 | from helpers import date_plus_x_days, keep_countries_and_all 6 | 7 | 8 | def get_test_pilot_addons(): 9 | ''' 10 | Fetches all the live test pilot experiments listed in 11 | the experiments.json file. 12 | returns a list of addon_ids 13 | ''' 14 | file_path = "usage_report/resources/experiments.json" 15 | with open(file_path) as f: 16 | data = json.load(f) 17 | all_tp_addons = ["@testpilot-addon"] + [i.get("addon_id") 18 | for i in data['results'] 19 | if i.get("addon_id")] 20 | return all_tp_addons 21 | 22 | 23 | # grab all tp addons without a mozilla suffix 24 | NON_MOZ_TP = [i for i in get_test_pilot_addons() if "@mozilla" not in i] 25 | 26 | # this study is everywhere 27 | UNIFIED_SEARCH_STR = '@unified-urlbar-shield-study-' 28 | 29 | 30 | def top_10_addons_on_date(data, date, topN, period=7, country_list=None): 31 | """ Gets the number of users in the past week who have used the top N addons, 32 | broken down by country. 33 | 34 | Parameters: 35 | data - The main ping server. 36 | date - The day you which you want to get the top N addons. 37 | topN - the number of addons to get. 38 | period - number of days to use to calculate metric 39 | country_list - a list of country names in string 40 | 41 | Returns: 42 | Dataframe containing the number of users using each of the addons. 43 | submission_date_s3, country, addon_id, name, percent_of_active_users 44 | """ 45 | addon_filter = (~col('addon.is_system')) & (~col('addon.foreign_install')) & \ 46 | (~col('addon.addon_id').isin(NON_MOZ_TP)) & (~col('addon.addon_id').like('%@mozilla%')) &\ 47 | (~col('addon.addon_id').like('%@shield.mozilla%')) &\ 48 | (~col('addon.addon_id').like('%' + UNIFIED_SEARCH_STR + '%')) 49 | 50 | data_all = keep_countries_and_all(data, country_list) 51 | begin = date_plus_x_days(date, -period) 52 | 53 | wau = data_all.filter((col('submission_date_s3') > begin) & 54 | (col('submission_date_s3') <= date))\ 55 | .groupBy('country')\ 56 | .agg(lit(date).alias('submission_date_s3'), 57 | F.countDistinct('client_id').alias('wau')) 58 | 59 | counts = data_all.select('submission_date_s3', 'country', 'client_id', 60 | F.explode('active_addons').alias('addon'))\ 61 | .filter((col('submission_date_s3') > begin) & 62 | (col('submission_date_s3') <= date))\ 63 | .filter(addon_filter)\ 64 | .select('country', 'client_id', 'addon.addon_id', 'addon.name')\ 65 | .distinct()\ 66 | .groupBy('country', 'addon_id')\ 67 | .agg(F.count('*').alias('number_of_users'), F.last('name').alias('name'))\ 68 | .select('*', lit(date).alias('submission_date_s3'), 69 | lit(begin).alias('start_date'), 70 | F.row_number().over(Window.partitionBy('country') 71 | .orderBy(desc('number_of_users')) 72 | .rowsBetween(Window.unboundedPreceding, Window.currentRow)) 73 | .alias('rank'))\ 74 | .filter(col('rank') <= topN) 75 | 76 | return counts.join(F.broadcast(wau), on=['country'], how='left')\ 77 | .select(lit(date).alias('submission_date_s3'), 'country', 78 | 'addon_id', col('name').alias('addon_name'), 79 | (100.0 * col('number_of_users') / col('wau')).alias('pct_with_addon')) 80 | -------------------------------------------------------------------------------- /usage_report/utils/trackingprotection.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | # from pyspark.sql.functions import col, lit, mean, split 4 | import pyspark.sql.functions as F 5 | 6 | 7 | def pct_tracking_protection(data, 8 | date, 9 | period=7, 10 | country_list=None): 11 | """ Calculate proportion of users in WAU that have a 12 | tracking protection = on session/window (at least 1) 13 | Parameters: 14 | data: spark df, main summary 15 | date: string, with the format 'yyyyMMdd' 16 | period: int, period to check proportion for, 7 for WAU 17 | country_list: a list of country names in string 18 | 19 | Returns: 20 | a spark df with the following columns 21 | - columns: | submission_date_s3 | country | pct_TP | 22 | """ 23 | enddate = datetime.datetime.strptime(date, '%Y%m%d') 24 | begin = enddate - datetime.timedelta(days=period) 25 | begin = begin.strftime('%Y%m%d') 26 | 27 | data_all = data.drop('country')\ 28 | .select('submission_date_s3', 29 | 'client_id', 30 | F.col('histogram_parent_tracking_protection_enabled.1').alias('TP_on'), 31 | F.lit('All').alias('country')) 32 | 33 | if country_list: 34 | data_countries = ( 35 | data.filter(F.col('country').isin(country_list)) 36 | .select('submission_date_s3', 37 | 'client_id', 38 | F.col('histogram_parent_tracking_protection_enabled.1').alias('TP_on'), 39 | 'country')) 40 | data_all = data_all.union(data_countries) 41 | 42 | def get_number_of_users(df, count_name): 43 | return df.groupBy('country')\ 44 | .agg(F.countDistinct('client_id').alias(count_name)) 45 | 46 | WAU = get_number_of_users( 47 | data_all.filter("""submission_date_s3 <= '{}' 48 | and submission_date_s3 > '{}' 49 | """.format(date, begin)), 50 | 'WAU') 51 | WAU_TP = get_number_of_users( 52 | data_all.filter("""submission_date_s3 <= '{}' 53 | and submission_date_s3 > '{}' 54 | """.format(date, begin)) 55 | .filter(F.col('TP_on') > 0), 56 | 'WAU_TP') 57 | 58 | join_df = WAU.join(WAU_TP, 'country', 'left')\ 59 | .withColumn("pct_TP", (100.0 * F.col("WAU_TP") / F.col("WAU")))\ 60 | .select(F.lit(date).alias('submission_date_s3'), 61 | 'country', 62 | F.coalesce('pct_TP', F.lit(0)).alias('pct_TP')) 63 | return join_df 64 | --------------------------------------------------------------------------------