├── .github └── workflows │ └── ci.yml ├── .gitignore ├── LICENSE ├── README.md ├── _config.yml ├── _layouts ├── default.html └── table.html ├── assets ├── img │ ├── asc.gif │ ├── bg.gif │ └── desc.gif └── js │ ├── jquery-3.7.1.min.js │ └── jquery.tablesorter.min.js ├── crawlplot.py ├── crawlstats.py ├── get_stats.sh ├── get_stats_and_plot.sh ├── index.md ├── plot.sh ├── plot ├── charset.py ├── crawl_size.py ├── crawler_metrics.py ├── domain.py ├── histogram.py ├── language.py ├── mimetype.py ├── mimetype_detected.py ├── overlap.py ├── table.py ├── tld.py └── tld_by_continent.py ├── plots ├── README.md ├── charsets-top-100.html ├── charsets.csv ├── charsets.md ├── crawl_coverage.png ├── crawler │ ├── crawldb_status.png │ ├── fetch_status_percentage.png │ ├── metrics.png │ ├── url_protocols.png │ └── url_protocols_percentage.png ├── crawlermetrics.md ├── crawloverlap.md ├── crawloverlap │ ├── crawlsimilarity_matrix_digest.png │ └── crawlsimilarity_matrix_url.png ├── crawlsize.md ├── crawlsize │ ├── cumulative.csv │ ├── cumulative.png │ ├── digest_last_n_crawls.png │ ├── domain.csv │ ├── domain.png │ ├── monthly.csv │ ├── monthly.png │ ├── monthly_new.csv │ ├── monthly_new.png │ ├── registered-domains.png │ ├── url_last_n_crawls.csv │ ├── url_last_n_crawls.png │ ├── url_page_ratio_last_n_crawls.csv │ ├── url_page_ratio_last_n_crawls.png │ └── url_status_by_year.png ├── domains-top-500.csv ├── domains-top-500.html ├── domains.md ├── languages-top-200.html ├── languages.csv ├── languages.md ├── mimetypes-top-100.html ├── mimetypes.csv ├── mimetypes.md ├── mimetypes_detected-top-100.html ├── mimetypes_detected.csv ├── throughput │ ├── fetch_throughput_2017_dezember.png │ ├── fetch_throughput_april.png │ ├── fetch_throughput_december.png │ ├── fetch_throughput_february.png │ ├── fetch_throughput_fetch_throughput_oct_dec_feb.png │ ├── fetch_throughput_fetch_throughput_oct_dec_feb_mar.png │ ├── fetch_throughput_fetch_throughput_oct_dec_mar.png │ └── fetch_throughput_october.png ├── tld │ ├── by-year-and-continent.md │ ├── comparison.md │ ├── groups-percentage.html │ ├── groups.md │ ├── groups.png │ ├── latest-crawl-groups.html │ ├── latest-crawl-tlds.html │ ├── latestcrawl.md │ ├── percentage.md │ ├── selected-crawl-comparison-spearman-all-tlds.html │ ├── selected-crawl-comparison-spearman-frequent-tlds.html │ ├── selected-crawl-comparison.html │ ├── selected-crawls-percentage.html │ ├── selected-tlds-by-year.csv │ ├── selected-tlds-by-year.html │ ├── tlds-by-year-and-continent.csv │ ├── tlds-by-year-and-continent.html │ └── tlds-by-year-and-continent.png └── tlds.md ├── requirements.txt ├── requirements_plot.txt ├── run_stats_hadoop.sh ├── setup.py ├── site.Dockerfile ├── stats.Dockerfile ├── stats ├── crawler │ ├── CC-MAIN-2016-18.json │ ├── CC-MAIN-2016-22.json │ ├── CC-MAIN-2016-26.json │ ├── CC-MAIN-2016-30.json │ ├── CC-MAIN-2016-36.json │ ├── CC-MAIN-2016-40.json │ ├── CC-MAIN-2016-44.json │ ├── CC-MAIN-2016-50.json │ ├── CC-MAIN-2017-04.json │ ├── CC-MAIN-2017-09.json │ ├── CC-MAIN-2017-13.json │ ├── CC-MAIN-2017-17.json │ ├── CC-MAIN-2017-22.json │ ├── CC-MAIN-2017-26.json │ ├── CC-MAIN-2017-30.json │ ├── CC-MAIN-2017-34.json │ ├── CC-MAIN-2017-39.json │ ├── CC-MAIN-2017-43.json │ ├── CC-MAIN-2017-47.json │ ├── CC-MAIN-2017-51.json │ ├── CC-MAIN-2018-05.json │ ├── CC-MAIN-2018-09.json │ ├── CC-MAIN-2018-13.json │ ├── CC-MAIN-2018-17.json │ ├── CC-MAIN-2018-22.json │ ├── CC-MAIN-2018-26.json │ ├── CC-MAIN-2018-30.json │ ├── CC-MAIN-2018-34.json │ ├── CC-MAIN-2018-39.json │ ├── CC-MAIN-2018-43.json │ ├── CC-MAIN-2018-47.json │ ├── CC-MAIN-2018-51.json │ ├── CC-MAIN-2019-04.json │ ├── CC-MAIN-2019-09.json │ ├── CC-MAIN-2019-13.json │ ├── CC-MAIN-2019-18.json │ ├── CC-MAIN-2019-22.json │ ├── CC-MAIN-2019-26.json │ ├── CC-MAIN-2019-30.json │ ├── CC-MAIN-2019-35.json │ ├── CC-MAIN-2019-39.json │ ├── CC-MAIN-2019-43.json │ ├── CC-MAIN-2019-47.json │ ├── CC-MAIN-2019-51.json │ ├── CC-MAIN-2020-05.json │ ├── CC-MAIN-2020-10.json │ ├── CC-MAIN-2020-16.json │ ├── CC-MAIN-2020-24.json │ ├── CC-MAIN-2020-29.json │ ├── CC-MAIN-2020-34.json │ ├── CC-MAIN-2020-40.json │ ├── CC-MAIN-2020-45.json │ ├── CC-MAIN-2020-50.json │ ├── CC-MAIN-2021-04.json │ ├── CC-MAIN-2021-10.json │ ├── CC-MAIN-2021-17.json │ ├── CC-MAIN-2021-21.json │ ├── CC-MAIN-2021-25.json │ ├── CC-MAIN-2021-31.json │ ├── CC-MAIN-2021-39.json │ ├── CC-MAIN-2021-43.json │ ├── CC-MAIN-2021-49.json │ ├── CC-MAIN-2022-05.json │ ├── CC-MAIN-2022-21.json │ ├── CC-MAIN-2022-27.json │ ├── CC-MAIN-2022-33.json │ ├── CC-MAIN-2022-40.json │ ├── CC-MAIN-2022-49.json │ ├── CC-MAIN-2023-06.json │ ├── CC-MAIN-2023-14.json │ ├── CC-MAIN-2023-23.json │ ├── CC-MAIN-2023-40.json │ ├── CC-MAIN-2023-50.json │ ├── CC-MAIN-2024-10.json │ ├── CC-MAIN-2024-18.json │ ├── CC-MAIN-2024-22.json │ ├── CC-MAIN-2024-26.json │ ├── CC-MAIN-2024-30.json │ ├── CC-MAIN-2024-33.json │ ├── CC-MAIN-2024-38.json │ ├── CC-MAIN-2024-42.json │ ├── CC-MAIN-2024-46.json │ ├── CC-MAIN-2024-51.json │ ├── CC-MAIN-2025-05.json │ ├── CC-MAIN-2025-08.json │ ├── CC-MAIN-2025-13.json │ ├── CC-MAIN-2025-18.json │ ├── CC-MAIN-2025-21.json │ ├── CC-MAIN-2025-26.json │ ├── CC-MAIN-2025-30.json │ ├── CC-MAIN-2025-33.json │ ├── CC-MAIN-2025-38.json │ ├── CC-MAIN-2025-43.json │ ├── CC-MAIN-2025-47.json │ └── README.md ├── tld_alexa_top_1m.py ├── tld_cisco_umbrella_top_1m.py └── tld_majestic_top_1m.py ├── tests └── test_crawlstat.py └── top_level_domain.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/README.md -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/_config.yml -------------------------------------------------------------------------------- /_layouts/default.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/_layouts/default.html -------------------------------------------------------------------------------- /_layouts/table.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/_layouts/table.html -------------------------------------------------------------------------------- /assets/img/asc.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/assets/img/asc.gif -------------------------------------------------------------------------------- /assets/img/bg.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/assets/img/bg.gif -------------------------------------------------------------------------------- /assets/img/desc.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/assets/img/desc.gif -------------------------------------------------------------------------------- /assets/js/jquery-3.7.1.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/assets/js/jquery-3.7.1.min.js -------------------------------------------------------------------------------- /assets/js/jquery.tablesorter.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/assets/js/jquery.tablesorter.min.js -------------------------------------------------------------------------------- /crawlplot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/crawlplot.py -------------------------------------------------------------------------------- /crawlstats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/crawlstats.py -------------------------------------------------------------------------------- /get_stats.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/get_stats.sh -------------------------------------------------------------------------------- /get_stats_and_plot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/get_stats_and_plot.sh -------------------------------------------------------------------------------- /index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/index.md -------------------------------------------------------------------------------- /plot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot.sh -------------------------------------------------------------------------------- /plot/charset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot/charset.py -------------------------------------------------------------------------------- /plot/crawl_size.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot/crawl_size.py -------------------------------------------------------------------------------- /plot/crawler_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot/crawler_metrics.py -------------------------------------------------------------------------------- /plot/domain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot/domain.py -------------------------------------------------------------------------------- /plot/histogram.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot/histogram.py -------------------------------------------------------------------------------- /plot/language.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot/language.py -------------------------------------------------------------------------------- /plot/mimetype.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot/mimetype.py -------------------------------------------------------------------------------- /plot/mimetype_detected.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot/mimetype_detected.py -------------------------------------------------------------------------------- /plot/overlap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot/overlap.py -------------------------------------------------------------------------------- /plot/table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot/table.py -------------------------------------------------------------------------------- /plot/tld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot/tld.py -------------------------------------------------------------------------------- /plot/tld_by_continent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plot/tld_by_continent.py -------------------------------------------------------------------------------- /plots/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/README.md -------------------------------------------------------------------------------- /plots/charsets-top-100.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/charsets-top-100.html -------------------------------------------------------------------------------- /plots/charsets.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/charsets.csv -------------------------------------------------------------------------------- /plots/charsets.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/charsets.md -------------------------------------------------------------------------------- /plots/crawl_coverage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawl_coverage.png -------------------------------------------------------------------------------- /plots/crawler/crawldb_status.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawler/crawldb_status.png -------------------------------------------------------------------------------- /plots/crawler/fetch_status_percentage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawler/fetch_status_percentage.png -------------------------------------------------------------------------------- /plots/crawler/metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawler/metrics.png -------------------------------------------------------------------------------- /plots/crawler/url_protocols.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawler/url_protocols.png -------------------------------------------------------------------------------- /plots/crawler/url_protocols_percentage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawler/url_protocols_percentage.png -------------------------------------------------------------------------------- /plots/crawlermetrics.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlermetrics.md -------------------------------------------------------------------------------- /plots/crawloverlap.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawloverlap.md -------------------------------------------------------------------------------- /plots/crawloverlap/crawlsimilarity_matrix_digest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawloverlap/crawlsimilarity_matrix_digest.png -------------------------------------------------------------------------------- /plots/crawloverlap/crawlsimilarity_matrix_url.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawloverlap/crawlsimilarity_matrix_url.png -------------------------------------------------------------------------------- /plots/crawlsize.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize.md -------------------------------------------------------------------------------- /plots/crawlsize/cumulative.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/cumulative.csv -------------------------------------------------------------------------------- /plots/crawlsize/cumulative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/cumulative.png -------------------------------------------------------------------------------- /plots/crawlsize/digest_last_n_crawls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/digest_last_n_crawls.png -------------------------------------------------------------------------------- /plots/crawlsize/domain.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/domain.csv -------------------------------------------------------------------------------- /plots/crawlsize/domain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/domain.png -------------------------------------------------------------------------------- /plots/crawlsize/monthly.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/monthly.csv -------------------------------------------------------------------------------- /plots/crawlsize/monthly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/monthly.png -------------------------------------------------------------------------------- /plots/crawlsize/monthly_new.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/monthly_new.csv -------------------------------------------------------------------------------- /plots/crawlsize/monthly_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/monthly_new.png -------------------------------------------------------------------------------- /plots/crawlsize/registered-domains.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/registered-domains.png -------------------------------------------------------------------------------- /plots/crawlsize/url_last_n_crawls.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/url_last_n_crawls.csv -------------------------------------------------------------------------------- /plots/crawlsize/url_last_n_crawls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/url_last_n_crawls.png -------------------------------------------------------------------------------- /plots/crawlsize/url_page_ratio_last_n_crawls.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/url_page_ratio_last_n_crawls.csv -------------------------------------------------------------------------------- /plots/crawlsize/url_page_ratio_last_n_crawls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/url_page_ratio_last_n_crawls.png -------------------------------------------------------------------------------- /plots/crawlsize/url_status_by_year.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/crawlsize/url_status_by_year.png -------------------------------------------------------------------------------- /plots/domains-top-500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/domains-top-500.csv -------------------------------------------------------------------------------- /plots/domains-top-500.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/domains-top-500.html -------------------------------------------------------------------------------- /plots/domains.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/domains.md -------------------------------------------------------------------------------- /plots/languages-top-200.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/languages-top-200.html -------------------------------------------------------------------------------- /plots/languages.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/languages.csv -------------------------------------------------------------------------------- /plots/languages.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/languages.md -------------------------------------------------------------------------------- /plots/mimetypes-top-100.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/mimetypes-top-100.html -------------------------------------------------------------------------------- /plots/mimetypes.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/mimetypes.csv -------------------------------------------------------------------------------- /plots/mimetypes.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/mimetypes.md -------------------------------------------------------------------------------- /plots/mimetypes_detected-top-100.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/mimetypes_detected-top-100.html -------------------------------------------------------------------------------- /plots/mimetypes_detected.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/mimetypes_detected.csv -------------------------------------------------------------------------------- /plots/throughput/fetch_throughput_2017_dezember.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/throughput/fetch_throughput_2017_dezember.png -------------------------------------------------------------------------------- /plots/throughput/fetch_throughput_april.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/throughput/fetch_throughput_april.png -------------------------------------------------------------------------------- /plots/throughput/fetch_throughput_december.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/throughput/fetch_throughput_december.png -------------------------------------------------------------------------------- /plots/throughput/fetch_throughput_february.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/throughput/fetch_throughput_february.png -------------------------------------------------------------------------------- /plots/throughput/fetch_throughput_fetch_throughput_oct_dec_feb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/throughput/fetch_throughput_fetch_throughput_oct_dec_feb.png -------------------------------------------------------------------------------- /plots/throughput/fetch_throughput_fetch_throughput_oct_dec_feb_mar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/throughput/fetch_throughput_fetch_throughput_oct_dec_feb_mar.png -------------------------------------------------------------------------------- /plots/throughput/fetch_throughput_fetch_throughput_oct_dec_mar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/throughput/fetch_throughput_fetch_throughput_oct_dec_mar.png -------------------------------------------------------------------------------- /plots/throughput/fetch_throughput_october.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/throughput/fetch_throughput_october.png -------------------------------------------------------------------------------- /plots/tld/by-year-and-continent.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/by-year-and-continent.md -------------------------------------------------------------------------------- /plots/tld/comparison.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/comparison.md -------------------------------------------------------------------------------- /plots/tld/groups-percentage.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/groups-percentage.html -------------------------------------------------------------------------------- /plots/tld/groups.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/groups.md -------------------------------------------------------------------------------- /plots/tld/groups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/groups.png -------------------------------------------------------------------------------- /plots/tld/latest-crawl-groups.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/latest-crawl-groups.html -------------------------------------------------------------------------------- /plots/tld/latest-crawl-tlds.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/latest-crawl-tlds.html -------------------------------------------------------------------------------- /plots/tld/latestcrawl.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/latestcrawl.md -------------------------------------------------------------------------------- /plots/tld/percentage.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/percentage.md -------------------------------------------------------------------------------- /plots/tld/selected-crawl-comparison-spearman-all-tlds.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/selected-crawl-comparison-spearman-all-tlds.html -------------------------------------------------------------------------------- /plots/tld/selected-crawl-comparison-spearman-frequent-tlds.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/selected-crawl-comparison-spearman-frequent-tlds.html -------------------------------------------------------------------------------- /plots/tld/selected-crawl-comparison.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/selected-crawl-comparison.html -------------------------------------------------------------------------------- /plots/tld/selected-crawls-percentage.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/selected-crawls-percentage.html -------------------------------------------------------------------------------- /plots/tld/selected-tlds-by-year.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/selected-tlds-by-year.csv -------------------------------------------------------------------------------- /plots/tld/selected-tlds-by-year.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/selected-tlds-by-year.html -------------------------------------------------------------------------------- /plots/tld/tlds-by-year-and-continent.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/tlds-by-year-and-continent.csv -------------------------------------------------------------------------------- /plots/tld/tlds-by-year-and-continent.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/tlds-by-year-and-continent.html -------------------------------------------------------------------------------- /plots/tld/tlds-by-year-and-continent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tld/tlds-by-year-and-continent.png -------------------------------------------------------------------------------- /plots/tlds.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/plots/tlds.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/requirements.txt -------------------------------------------------------------------------------- /requirements_plot.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/requirements_plot.txt -------------------------------------------------------------------------------- /run_stats_hadoop.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/run_stats_hadoop.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/setup.py -------------------------------------------------------------------------------- /site.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/site.Dockerfile -------------------------------------------------------------------------------- /stats.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats.Dockerfile -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2016-18.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2016-18.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2016-22.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2016-22.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2016-26.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2016-26.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2016-30.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2016-30.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2016-36.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2016-36.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2016-40.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2016-40.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2016-44.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2016-44.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2016-50.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2016-50.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2017-04.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2017-04.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2017-09.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2017-09.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2017-13.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2017-13.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2017-17.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2017-17.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2017-22.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2017-22.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2017-26.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2017-26.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2017-30.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2017-30.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2017-34.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2017-34.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2017-39.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2017-39.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2017-43.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2017-43.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2017-47.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2017-47.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2017-51.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2017-51.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2018-05.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2018-05.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2018-09.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2018-09.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2018-13.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2018-13.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2018-17.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2018-17.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2018-22.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2018-22.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2018-26.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2018-26.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2018-30.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2018-30.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2018-34.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2018-34.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2018-39.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2018-39.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2018-43.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2018-43.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2018-47.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2018-47.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2018-51.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2018-51.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2019-04.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2019-04.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2019-09.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2019-09.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2019-13.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2019-13.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2019-18.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2019-18.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2019-22.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2019-22.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2019-26.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2019-26.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2019-30.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2019-30.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2019-35.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2019-35.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2019-39.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2019-39.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2019-43.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2019-43.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2019-47.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2019-47.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2019-51.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2019-51.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2020-05.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2020-05.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2020-10.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2020-10.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2020-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2020-16.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2020-24.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2020-24.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2020-29.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2020-29.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2020-34.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2020-34.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2020-40.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2020-40.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2020-45.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2020-45.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2020-50.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2020-50.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2021-04.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2021-04.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2021-10.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2021-10.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2021-17.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2021-17.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2021-21.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2021-21.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2021-25.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2021-25.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2021-31.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2021-31.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2021-39.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2021-39.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2021-43.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2021-43.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2021-49.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2021-49.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2022-05.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2022-05.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2022-21.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2022-21.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2022-27.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2022-27.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2022-33.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2022-33.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2022-40.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2022-40.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2022-49.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2022-49.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2023-06.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2023-06.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2023-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2023-14.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2023-23.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2023-23.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2023-40.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2023-40.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2023-50.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2023-50.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2024-10.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2024-10.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2024-18.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2024-18.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2024-22.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2024-22.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2024-26.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2024-26.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2024-30.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2024-30.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2024-33.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2024-33.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2024-38.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2024-38.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2024-42.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2024-42.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2024-46.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2024-46.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2024-51.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2024-51.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2025-05.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2025-05.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2025-08.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2025-08.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2025-13.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2025-13.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2025-18.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2025-18.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2025-21.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2025-21.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2025-26.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2025-26.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2025-30.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2025-30.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2025-33.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2025-33.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2025-38.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2025-38.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2025-43.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2025-43.json -------------------------------------------------------------------------------- /stats/crawler/CC-MAIN-2025-47.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/CC-MAIN-2025-47.json -------------------------------------------------------------------------------- /stats/crawler/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/crawler/README.md -------------------------------------------------------------------------------- /stats/tld_alexa_top_1m.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/tld_alexa_top_1m.py -------------------------------------------------------------------------------- /stats/tld_cisco_umbrella_top_1m.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/tld_cisco_umbrella_top_1m.py -------------------------------------------------------------------------------- /stats/tld_majestic_top_1m.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/stats/tld_majestic_top_1m.py -------------------------------------------------------------------------------- /tests/test_crawlstat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/tests/test_crawlstat.py -------------------------------------------------------------------------------- /top_level_domain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commoncrawl/cc-crawl-statistics/HEAD/top_level_domain.py --------------------------------------------------------------------------------