├── .editorconfig
├── .env.development
├── .env.production
├── .eslintrc.json
├── .github
├── dependabot.yml
└── workflows
│ └── dashboard.yml
├── .gitignore
├── .prettierignore
├── .prettierrc.json
├── LICENSE
├── README.md
├── next-sitemap.config.js
├── next.config.js
├── package.json
├── postcss.config.js
├── public
├── download.sh
├── next.svg
└── vercel.svg
├── scripts
└── build-index.ts
├── src
├── app
│ ├── datasets
│ │ ├── clickhouse.tsx
│ │ ├── dataset_table.tsx
│ │ ├── page.tsx
│ │ ├── sql
│ │ │ ├── files_by_extension.sql
│ │ │ ├── largest_version.sql
│ │ │ ├── longest_files.sql
│ │ │ ├── most_unique.sql
│ │ │ ├── python_files_over_time.sql
│ │ │ └── sql.ts
│ │ └── syntax.tsx
│ ├── download
│ │ ├── example.sh
│ │ ├── example_2.sh
│ │ └── page.tsx
│ ├── favicon.ico
│ ├── globals.css
│ ├── layout.tsx
│ ├── layouts
│ │ └── markdown.tsx
│ ├── navbar.tsx
│ ├── page.tsx
│ ├── projects
│ │ ├── page.tsx
│ │ └── view
│ │ │ ├── layout.tsx
│ │ │ ├── page.tsx
│ │ │ └── project_info.tsx
│ ├── repositories
│ │ ├── [name]
│ │ │ └── page.tsx
│ │ ├── page.tsx
│ │ └── repo-stats.tsx
│ ├── stats
│ │ ├── chart-scroll.tsx
│ │ ├── chart.tsx
│ │ ├── colours.ts
│ │ ├── language-stats.tsx
│ │ ├── page.tsx
│ │ ├── shitpost-chart.tsx
│ │ ├── shitpost-model.tsx
│ │ ├── sql.tsx
│ │ ├── stats.tsx
│ │ ├── total_stats.tsx
│ │ └── utils.tsx
│ ├── swr-provider.tsx
│ └── table.tsx
├── data
│ └── .gitkeep
└── utils.ts
├── tailwind.config.js
└── tsconfig.json
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | charset = utf-8
5 | end_of_line = lf
6 | indent_size = 2
7 | indent_style = space
8 | insert_final_newline = true
9 | max_line_length = 120
10 | trim_trailing_whitespace = true
11 |
12 | [*.tsx]
13 | indent_size = 2
14 |
--------------------------------------------------------------------------------
/.env.development:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.env.production:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "next/core-web-vitals"
3 | }
4 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "github-actions" # See documentation for possible values
4 | directory: "/" # Location of package manifests
5 | schedule:
6 | interval: "weekly"
7 | groups:
8 | dependencies:
9 | patterns:
10 | - "*"
11 | - package-ecosystem: "npm"
12 | directory: "/" # Location of package manifests
13 | schedule:
14 | interval: "weekly"
15 | groups:
16 | dependencies:
17 | patterns:
18 | - "*"
19 |
--------------------------------------------------------------------------------
/.github/workflows/dashboard.yml:
--------------------------------------------------------------------------------
1 | # On every push this script is executed
2 | on:
3 | workflow_dispatch:
4 | push:
5 | branches:
6 | - main
7 | schedule:
8 | - cron: "0 10 * * *"
9 |
10 | concurrency: build
11 |
12 | name: Build and deploy GH Pages
13 | jobs:
14 | build:
15 | runs-on: ubuntu-latest
16 | steps:
17 | - name: checkout
18 | uses: actions/checkout@v4
19 |
20 | - uses: actions/setup-node@v4
21 | with:
22 | node-version: "lts/*"
23 | cache: "npm"
24 |
25 | - name: Setup Pages
26 | id: setup-pages
27 | uses: actions/configure-pages@v5
28 |
29 | - name: Install deps
30 | run: |
31 | npm install
32 |
33 | - name: Build site
34 | run: |
35 | npm run build -- --no-lint
36 | cp public/sitemap*.xml out/
37 | cp public/robots.txt out/
38 | cp -r public/data/ out/data/
39 | env:
40 | NEXT_PUBLIC_BASE_PATH: ${{ steps.setup-pages.outputs.base_path }}
41 | NEXT_PUBLIC_ASSET_PATH: ${{ steps.setup-pages.outputs.base_url }}
42 |
43 | - name: Upload artifact
44 | uses: actions/upload-pages-artifact@v3
45 | with:
46 | path: ${{ github.workspace }}/out/
47 |
48 | deploy:
49 | needs: build
50 | concurrency: dashboard-publish
51 |
52 | permissions:
53 | pages: write
54 | id-token: write
55 | actions: read
56 |
57 | # Deploy to the github-pages environment
58 | environment:
59 | name: github-pages
60 | url: ${{ steps.deployment.outputs.page_url }}
61 |
62 | runs-on: ubuntu-latest
63 | steps:
64 | - name: Deploy to GitHub Pages
65 | id: deployment
66 | uses: actions/deploy-pages@v4
67 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2 | src/data/repositories_with_releases.json
3 | # dependencies
4 | /node_modules
5 | /.pnp
6 | .pnp.js
7 |
8 | # testing
9 | /coverage
10 |
11 | # next.js
12 | /.next/
13 | /out/
14 |
15 | # production
16 | /build
17 |
18 | # misc
19 | .DS_Store
20 | *.pem
21 |
22 | # debug
23 | npm-debug.log*
24 | yarn-debug.log*
25 | yarn-error.log*
26 |
27 | # local env files
28 | .env*.local
29 |
30 | # vercel
31 | .vercel
32 |
33 | # typescript
34 | *.tsbuildinfo
35 | next-env.d.ts
36 |
37 | .idea/
38 | public/data/
39 | public/sitemap*.xml
40 | public/robots.txt
41 |
--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | .next/
2 | public/
3 |
--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {}
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Tom Forbes
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | This is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next-app`](https://github.com/vercel/next.js/tree/canary/packages/create-next-app).
2 |
3 | ## Getting Started
4 |
5 | First, run the development server:
6 |
7 | ```bash
8 | npm run dev
9 | # or
10 | yarn dev
11 | # or
12 | pnpm dev
13 | ```
14 |
15 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
16 |
17 | You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
18 |
19 | This project uses [`next/font`](https://nextjs.org/docs/basic-features/font-optimization) to automatically optimize and load Inter, a custom Google Font.
20 |
21 | ## Learn More
22 |
23 | To learn more about Next.js, take a look at the following resources:
24 |
25 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
26 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
27 |
28 | You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js/) - your feedback and contributions are welcome!
29 |
30 | ## Deploy on Vercel
31 |
32 | The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
33 |
34 | Check out our [Next.js deployment documentation](https://nextjs.org/docs/deployment) for more details.
35 |
--------------------------------------------------------------------------------
/next-sitemap.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('next-sitemap').IConfig} */
2 | module.exports = {
3 | siteUrl: 'https://py-code.org',
4 | sitemapSize: 40000,
5 | generateIndexSitemap: true,
6 | generateRobotsTxt: true,
7 | additionalPaths: async (config) => {
8 | const response = await fetch("https://data.py-code.org/data/pages.json")
9 | const packageList = await response.json();
10 | const packages = packageList.packages.toSorted((a, b) => a.toLowerCase().localeCompare(b.toLowerCase()));
11 |
12 | return packages.map((pkg) => ({
13 | loc: `https://py-code.org/projects/view?name=${pkg.toLowerCase()}`,
14 | }));
15 | },
16 | }
17 |
--------------------------------------------------------------------------------
/next.config.js:
--------------------------------------------------------------------------------
1 | const { join } = require("path");
2 | const { symlink, access, mkdir } = require("fs/promises");
3 | const basePath = (process.env.NEXT_PUBLIC_BASE_PATH || "").replace("http://", "https://");
4 | const assetPrefix = (process.env.NEXT_PUBLIC_ASSET_PATH || "").replace("http://", "https://");
5 |
6 | const nextConfig = {
7 | pageExtensions: ["ts", "tsx", "js", "jsx", "md", "mdx"],
8 | basePath,
9 | assetPrefix,
10 | // experimental: {
11 | // mdxRs: true,
12 | // },
13 | output: "export",
14 | webpack: (config, { isServer, dev }) => {
15 | config.experiments = Object.assign(config.experiments || {}, {
16 | asyncWebAssembly: true,
17 | layers: true,
18 | });
19 | if (!dev && isServer) {
20 | config.output.webassemblyModuleFilename = "chunks/[id].wasm";
21 | config.plugins.push(new WasmChunksFixPlugin());
22 | }
23 | return config;
24 | },
25 | };
26 | module.exports = nextConfig;
27 |
28 | class WasmChunksFixPlugin {
29 | apply(compiler) {
30 | compiler.hooks.thisCompilation.tap("WasmChunksFixPlugin", (compilation) => {
31 | compilation.hooks.processAssets.tap({ name: "WasmChunksFixPlugin" }, (assets) =>
32 | Object.entries(assets).forEach(([pathname, source]) => {
33 | if (!pathname.match(/\.wasm$/)) return;
34 | compilation.deleteAsset(pathname);
35 |
36 | const name = pathname.split("/")[1];
37 | const info = compilation.assetsInfo.get(pathname);
38 | compilation.emitAsset(name, source, info);
39 | }),
40 | );
41 | });
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "pypi-data-site",
3 | "version": "0.1.0",
4 | "private": true,
5 | "engines": {
6 | "node": ">=18.13.0"
7 | },
8 | "scripts": {
9 | "dev": "next dev",
10 | "build": "next build",
11 | "start": "next start",
12 | "lint": "next lint",
13 | "postbuild": "next-sitemap && npm run build-index",
14 | "build-index": "mkdir -p public/data/ && ts-node scripts/build-index.ts public/data/fuse-index.json",
15 | "postinstall": "wget https://raw.githubusercontent.com/pypi-data/data/main/stats/repositories_with_releases.json -O src/data/repositories_with_releases.json"
16 | },
17 | "dependencies": {
18 | "@heroicons/react": "^2.1.4",
19 | "@next/bundle-analyzer": "~14.2.4",
20 | "@tailwindcss/typography": "^0.5.13",
21 | "@tanstack/react-table": "^8.17.3",
22 | "byte-size": "^8.1.1",
23 | "chroma-js": "^2.4.2",
24 | "daisyui": "^4.12.7",
25 | "date-fns": "^3.6.0",
26 | "eslint": "^9.5.0",
27 | "eslint-config-next": "~14.2.4",
28 | "fuse.js": "^7.0.0",
29 | "human-format": "^1.2.0",
30 | "lodash.samplesize": "^4.2.0",
31 | "next": "~14.2.4",
32 | "next-sitemap": "^4.2.3",
33 | "prql-js": "==0.12.1",
34 | "react": "18.3.1",
35 | "react-dom": "18.3.1",
36 | "react-syntax-highlighter": "^15.5.0",
37 | "react-timestamp": "^6.0.0",
38 | "recharts": "^2.12.7",
39 | "sql-formatter": "^15.3.2",
40 | "swr": "^2.2.5",
41 | "ts-node": "^10.9.2",
42 | "typescript": "<5.6.0",
43 | "use-debounce": "^10.0.1"
44 | },
45 | "devDependencies": {
46 | "@types/byte-size": "^8.1.2",
47 | "@types/chroma-js": "^2.4.4",
48 | "@types/lodash.samplesize": "^4.2.9",
49 | "@types/node": "^20.14.8",
50 | "@types/react": "^18.3.3",
51 | "@types/react-dom": "^18.3.0",
52 | "@types/react-syntax-highlighter": "^15.5.13",
53 | "autoprefixer": "^10.4.19",
54 | "postcss": "^8.4.38",
55 | "prettier": "^3.3.2",
56 | "raw-loader": "^4.0.2",
57 | "tailwindcss": "^3.4.4"
58 | },
59 | "browserslist": [
60 | ">0.3%",
61 | "not dead",
62 | "not op_mini all"
63 | ]
64 | }
65 |
--------------------------------------------------------------------------------
/postcss.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | plugins: {
3 | tailwindcss: {},
4 | autoprefixer: {},
5 | },
6 | };
7 |
--------------------------------------------------------------------------------
/public/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | if [[ $# -eq 0 ]] ; then
4 | echo 'Usage: [path]'
5 | exit 1
6 | fi
7 |
8 | mkdir -p "$1"
9 |
10 | for url in $(curl https://raw.githubusercontent.com/pypi-data/data/main/links/repositories.txt); do
11 | git -C "$1" clone "$url" --depth=1 --no-checkout --branch=code
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/public/next.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/public/vercel.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/scripts/build-index.ts:
--------------------------------------------------------------------------------
1 | import Fuse from "fuse.js";
2 | import * as fs from "fs";
3 |
4 | async function fetchData() {
5 | const response = await fetch("https://data.py-code.org/data/pages.json");
6 | const packageList = await response.json();
7 |
8 | // @ts-ignore
9 | const packages: string[] = packageList.packages; //.slice(0, 100_000);
10 | // const packages: string[] = packageList.packages;
11 |
12 | const index = {
13 | json: Fuse.createIndex([], packages).toJSON(),
14 | packages: packages,
15 | };
16 | console.log(process.argv[2], JSON.stringify(index).length);
17 | // const encoded = zlib.deflateSync(JSON.stringify(index), { level: 9 });
18 | fs.writeFileSync(process.argv[2], JSON.stringify(index));
19 | }
20 |
21 | fetchData();
22 |
--------------------------------------------------------------------------------
/src/app/datasets/clickhouse.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import { useState } from "react";
4 |
5 | export default function ClickhouseView({ queries }: { queries: { name: string; query: string }[] }) {
6 | const [index, setIndex] = useState(0);
7 | const query = queries[index].query;
8 | const url = `https://play.clickhouse.com/play?user=play#${query}`;
9 | return (
10 | <>
11 |
12 | {queries.map(({ name }, i) => (
13 |
14 | {
17 | setIndex(i);
18 | }}
19 | >
20 | {name}
21 |
22 |
23 | ))}
24 |
25 |
26 |
31 | >
32 | );
33 | }
34 |
--------------------------------------------------------------------------------
/src/app/datasets/dataset_table.tsx:
--------------------------------------------------------------------------------
1 | import byteSize from "byte-size";
2 |
3 | export default function LinksTable({ data }: { data: { url: string; size: number }[] }) {
4 | return (
5 |
6 |
7 |
8 | URL
9 | Size
10 |
11 |
12 |
13 | {data
14 | .sort((a, b) => (a.size < b.size ? 1 : -1))
15 | .map((e) => (
16 |
17 |
18 | {e.url}
19 |
20 | {byteSize(e.size, { units: "iec", precision: 1 }).toString()}
21 |
22 | ))}
23 |
24 |
25 |
26 | {data.length.toLocaleString()} links
27 |
28 | {byteSize(
29 | data.reduce((acc, cur) => acc + cur.size, 0),
30 | { units: "iec", precision: 1 },
31 | ).toString()}
32 |
33 |
34 |
35 |
36 | );
37 | }
38 |
--------------------------------------------------------------------------------
/src/app/datasets/page.tsx:
--------------------------------------------------------------------------------
1 | import LinksTable from "@/app/datasets/dataset_table";
2 | import SyntaxHighlight from "@/app/datasets/syntax";
3 | import getStats from "@/app/stats/stats";
4 | import { ChevronDownIcon } from "@heroicons/react/24/solid";
5 | import Table from "@/app/table";
6 | import ClickhouseView from "@/app/datasets/clickhouse";
7 | import queries from "@/app/datasets/sql/sql";
8 |
9 | const SQLITE_URL = "https://github.com/pypi-data/pypi-json-data/releases/download/latest/pypi-data.sqlite.gz";
10 | const DATASET_URL = "https://github.com/pypi-data/data/raw/main/links/dataset.txt";
11 | const PYTHON_DATASET_URL = "https://github.com/pypi-data/data/raw/main/links/only_python_files.txt";
12 | const REPOSITORIES_DATASET_URL = "https://github.com/pypi-data/data/raw/main/stats/repositories.json";
13 | const REPOSITORIES_WITH_RELEASES_DATASET_URL =
14 | "https://github.com/pypi-data/data/raw/main/stats/repositories_with_releases.json";
15 | const CURL_EXAMPLE = `$ curl -L --remote-name-all $(curl -L "${DATASET_URL}")`;
16 | const PYTHON_CURL_EXAMPLE = `$ curl -L --remote-name-all $(curl -L "${PYTHON_DATASET_URL}")`;
17 | const SQLITE_CURL_EXAMPLE = `$ curl -L ${SQLITE_URL} | gzip -d > pypi-data.sqlite`;
18 | const DUCK_DB_EXAMPLE = `${CURL_EXAMPLE}
19 | $ duckdb -json -s "select * from '*.parquet' order by lines DESC limit 1"
20 | [
21 | {
22 | "project_name": "EvenOrOdd",
23 | "project_version": "0.1.10",
24 | "project_release": "EvenOrOdd-0.1.10-py3-none-any.whl",
25 | "uploaded_on": "2021-02-21 02:25:57.832",
26 | "path": "EvenOrOdd/EvenOrOdd.py",
27 | "size": "514133366",
28 | "hash": "ff7f863ad0bb4413c939fb5e9aa178a5a8855774262e1171b876d1d2b51e6998",
29 | "skip_reason": "too-large",
30 | "lines": "20010001"
31 | }
32 | ]
33 | `;
34 |
35 | const EVEN_OR_ODD_EXAMPLE = `$ wget https://files.pythonhosted.org/packages/b2/82/c4265814ed9e68880ba0892eddf1664c48bb490f37113d74d32fe4757192/EvenOrOdd-0.1.10-py3-none-any.whl
36 | $ unzip EvenOrOdd-0.1.10-py3-none-any.whl
37 | $ wc -l EvenOrOdd/EvenOrOdd.py
38 | 20010000 EvenOrOdd/EvenOrOdd.py
39 |
40 | $ tail -n6 EvenOrOdd/EvenOrOdd.py
41 | elif num == 9999996:
42 | return True
43 | elif num == 9999997:
44 | return False
45 | elif num == 9999998:
46 | return True
47 | elif num == 9999999:
48 | return False
49 | else:
50 | raise Exception("Number is not within bounds")
51 | `;
52 |
53 | export default async function Page() {
54 | const stats = await getStats();
55 |
56 | const sampleresp = await fetch("https://raw.githubusercontent.com/pypi-data/data/main/stats/random_sample.json");
57 | const sampledata = await sampleresp.json();
58 |
59 | const linkresp = await fetch(DATASET_URL);
60 | const links = (await linkresp.text()).split("\n").filter((e) => e.length > 0);
61 |
62 | const sqliteResponse = await fetch(SQLITE_URL, { method: "HEAD" });
63 | const sqliteSize = Number(sqliteResponse.headers.get("content-length"));
64 |
65 | const sqliteSchemaResponse = await fetch(
66 | "https://raw.githubusercontent.com/pypi-data/pypi-json-data/main/scripts/schema.sql",
67 | );
68 | const sqliteSchema = await sqliteSchemaResponse.text();
69 |
70 | const sqliteSizes = [{ url: SQLITE_URL, size: sqliteSize }];
71 |
72 | const sizes = await Promise.all(
73 | links.map(async (link) => {
74 | let resp = await fetch(link, {
75 | method: "HEAD",
76 | headers: {
77 | "accept-encoding": "",
78 | },
79 | });
80 | return {
81 | url: link,
82 | size: Number(resp.headers.get("content-length")),
83 | };
84 | }),
85 | );
86 |
87 | const pythonlinkresp = await fetch(PYTHON_DATASET_URL);
88 | const pythonlinks = (await pythonlinkresp.text()).split("\n").filter((e) => e.length > 0);
89 |
90 | const pythonsampleresp = await fetch(
91 | "https://raw.githubusercontent.com/pypi-data/data/main/stats/random_sample_python_only.json",
92 | );
93 | const pythonsampledata = await pythonsampleresp.json();
94 |
95 | const pythonsizes = await Promise.all(
96 | pythonlinks.map(async (link) => {
97 | let resp = await fetch(link, {
98 | method: "HEAD",
99 | headers: {
100 | "accept-encoding": "",
101 | },
102 | });
103 | return {
104 | url: link,
105 | size: Number(resp.headers.get("content-length")),
106 | };
107 | }),
108 | );
109 |
110 | const repo_metadata_example = await (await fetch(REPOSITORIES_DATASET_URL)).json();
111 | const repo_metadata_example_element = repo_metadata_example[0];
112 |
113 | const repo_metadata_sizes = await Promise.all(
114 | [REPOSITORIES_WITH_RELEASES_DATASET_URL, REPOSITORIES_DATASET_URL].map(async (link) => {
115 | let resp = await fetch(link, {
116 | method: "HEAD",
117 | headers: {
118 | "accept-encoding": "",
119 | },
120 | });
121 | return {
122 | url: link,
123 | size: Number(resp.headers.get("content-length")),
124 | };
125 | }),
126 | );
127 |
128 | return (
129 | <>
130 | Datasets
131 |
132 | Explore the data in your browser
133 |
134 |
135 |
136 |
137 | Download datasets locally
138 |
139 | There are several datasets available for use:
140 |
141 |
142 | Metadata about every file uploaded to PyPI
143 |
144 |
145 | SQLite dump of all PyPI metadata
146 |
147 |
148 | Repository metadata
149 |
150 |
151 | Unique Python files within every release
152 |
153 |
154 |
155 | These datasets allow you to analyse the contents of PyPI without having to download and process every package
156 | yourself. All of the statistics within the stats page are periodically generated using
157 | the datasets below.
158 |
159 |
160 |
161 |
162 |
163 |
166 |
About
167 |
168 | This dataset contains information about every file within every release uploaded to PyPi, including:
169 |
170 | Project name, version and release upload date
171 | File path, size and line count
172 | SHA256 hash
173 |
174 |
175 | The dataset should be accessed by downloading the files specified within{" "}
176 |
177 | {DATASET_URL}
178 | {" "}
179 | . The following command downloads the dataset from this URL:
180 |
181 | {CURL_EXAMPLE}
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 | Using DuckDB to process the dataset
190 |
191 |
192 |
193 |
194 | DuckDB is a great tool for processing the dataset. It is very fast
195 | and supports SQL queries over Parquet files. The following command uses DuckDB to find the largest
196 | file ever uploaded to PyPI:
197 |
198 |
{DUCK_DB_EXAMPLE}
199 |
Woah, a whopping 20 million lines of code! Lets confirm it:
200 |
{EVEN_OR_ODD_EXAMPLE}
201 |
Very funny, I hope this module is a joke 😅
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 | About skipped files
212 |
213 |
214 |
215 |
216 |
217 | The dataset contains a skip_reason column. If a file is not present in the git
218 | repositories then the reason for skipping is recorded here. On the right is a list of the current
219 | skip reasons and the number of files excluded from the git repositories for each reason.
220 |
221 |
222 | The exact reasons for skipping a file are not fully documented here, but ignored files
223 | include virtual environments accidentally uploaded to PyPI. text-long-lines means the
224 | file had very few lines, but the total size was large.
225 |
226 |
227 |
228 |
Skipped reasons:
229 |
skip_reason != "")}
231 | columns={[
232 | { name: "skip_reason" },
233 | { name: "count", type: "number" },
234 | {
235 | name: "total_size",
236 | type: "bytes",
237 | },
238 | ]}
239 | />
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
Current Links
248 |
249 |
250 |
251 |
Schema
252 | {JSON.stringify(sampledata, null, 2)}
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 | SQLite dump of all PyPI metadata
264 |
265 |
About
266 |
267 | This is a SQLite dump of all PyPI metadata fetched from the PyPI API. It is updated daily. It can be
268 | accessed directly from the following url:
269 |
270 | {SQLITE_URL}
271 |
272 | :
273 |
274 |
{SQLITE_CURL_EXAMPLE}
275 |
276 |
277 |
Links
278 |
279 |
280 |
281 |
Schema
282 | {sqliteSchema}
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 | Repository Metadata
294 |
295 |
296 | This dataset contains information about the pypi-data git repositories. The{" "}
297 | repositories_with_releases.json
298 | file contains a list of project names contained within each git repository.
299 |
300 |
301 |
302 |
303 |
About
304 |
305 | Current Links
306 |
307 |
308 |
309 |
Schema
310 |
311 | {JSON.stringify([repo_metadata_example_element], null, 2)}
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 | Unique Python files
324 |
325 |
326 | This dataset contains one row per unique Python file within every release uploaded to PyPI.
327 | Only the sha256 hash and a random path to the file is provided. This dataset is useful if you want to parse
328 | the Python files yourself, but want to avoid parsing the same file multiple times.
329 |
330 |
331 | Like the main dataset, the unique files dataset should be accessed by downloading the links
332 |
333 | from the following file
334 | {" "}
335 | :
336 |
337 |
{PYTHON_CURL_EXAMPLE}
338 |
339 |
340 |
About
341 |
342 | Current Links
343 |
344 |
345 |
346 |
Schema
347 | {JSON.stringify(pythonsampledata, null, 2)}
348 |
349 |
350 |
351 |
352 | >
353 | );
354 | }
355 |
--------------------------------------------------------------------------------
/src/app/datasets/sql/files_by_extension.sql:
--------------------------------------------------------------------------------
1 | select arrayElement(splitByChar('.', arrayElement(splitByChar('/', path), -1)), -1) as extension,
2 | count(*) as total_files,
3 | formatReadableSize(sum(size)) as total_size
4 | from pypi
5 | where skip_reason = ''
6 | group by 1
7 | order by sum(size) desc
8 | limit 10;
9 |
--------------------------------------------------------------------------------
/src/app/datasets/sql/largest_version.sql:
--------------------------------------------------------------------------------
1 | SELECT project_name,
2 | project_version,
3 | formatReadableSize(sum(size)) as total_size,
4 | count(*) as files
5 | FROM pypi
6 | group by 1, 2
7 | order by sum(size) desc
8 |
--------------------------------------------------------------------------------
/src/app/datasets/sql/longest_files.sql:
--------------------------------------------------------------------------------
1 | SELECT project_release,
2 | path,
3 | lines
4 | FROM pypi
5 | order by lines desc
6 | limit 10;
7 |
--------------------------------------------------------------------------------
/src/app/datasets/sql/most_unique.sql:
--------------------------------------------------------------------------------
1 | with project_files as (SELECT project_name,
2 | count(*) as files,
3 | count(distinct hash) as unique,
4 | round((unique / files) * 100) as percent_unique,
5 | formatReadableSize(sum(size)) as total_size
6 | FROM pypi
7 | where endsWith(path, '.py')
8 | group by 1)
9 | select *
10 | from project_files
11 | where files > (select quantile(0.995)(files) from project_files)
12 | order by 4 desc
13 | limit 10
14 |
--------------------------------------------------------------------------------
/src/app/datasets/sql/python_files_over_time.sql:
--------------------------------------------------------------------------------
1 | SELECT toYear(uploaded_on) as year,
2 | count(*) as python_files,
3 | count(distinct hash) as unique_files,
4 | sum(lines) as total_lines,
5 | round((unique_files/python_files) * 100) as unique_percent
6 |
7 | FROM pypi
8 | where endsWith(path, '.py')
9 | group by 1
10 | order by 1 desc
11 | limit 25;
12 |
--------------------------------------------------------------------------------
/src/app/datasets/sql/sql.ts:
--------------------------------------------------------------------------------
1 | import { format } from "sql-formatter";
2 |
3 | // @ts-ignore
4 | import mostUnique from "raw-loader!./most_unique.sql";
5 | // @ts-ignore
6 | import overTime from "raw-loader!./python_files_over_time.sql";
7 | // @ts-ignore
8 | import filesByExt from "raw-loader!./files_by_extension.sql";
9 | // @ts-ignore
10 | import longestFiles from "raw-loader!./longest_files.sql";
11 | // @ts-ignore
12 | import largestVersion from "raw-loader!./largest_version.sql";
13 |
14 | const queries = [
15 | createExample("Largest versions", largestVersion),
16 | createExample("Longest files", longestFiles),
17 | createExample("Largest projects by unique files", mostUnique),
18 | createExample("Unique files over time", overTime),
19 | createExample("Sizes by extension", filesByExt),
20 | ];
21 |
22 | export default queries;
23 |
24 | function createExample(name: string, sql: string) {
25 | const formatted = format(sql, {
26 | language: "sql",
27 | indentStyle: "tabularLeft",
28 | });
29 | const query = Buffer.from(formatted).toString("base64");
30 | return {
31 | name,
32 | query,
33 | };
34 | }
35 |
--------------------------------------------------------------------------------
/src/app/datasets/syntax.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import { PrismLight as SyntaxHighlighter } from "react-syntax-highlighter";
4 | import shell from "react-syntax-highlighter/dist/esm/languages/prism/shell-session";
5 | import json from "react-syntax-highlighter/dist/esm/languages/prism/json";
6 | import sql from "react-syntax-highlighter/dist/esm/languages/prism/sql";
7 | import { tomorrow } from "react-syntax-highlighter/dist/esm/styles/prism";
8 |
9 | SyntaxHighlighter.registerLanguage("shell", shell);
10 | SyntaxHighlighter.registerLanguage("json", json);
11 | SyntaxHighlighter.registerLanguage("sql", sql);
12 |
13 | export default function SyntaxHighlight({ language, children }: { language: string; children: string | string[] }) {
14 | return (
15 |
26 | {children}
27 |
28 | );
29 | }
30 |
--------------------------------------------------------------------------------
/src/app/download/example.sh:
--------------------------------------------------------------------------------
1 | wget https://py-code.org/download.sh
2 | chmod +x download.sh
3 | ./download.sh pypi_code
4 |
--------------------------------------------------------------------------------
/src/app/download/example_2.sh:
--------------------------------------------------------------------------------
1 | git rev-list --no-object-names --all --objects --filter=object:type=blob --all -- 'packages/4suite-xml/' | git cat-file --batch
2 |
--------------------------------------------------------------------------------
/src/app/download/page.tsx:
--------------------------------------------------------------------------------
1 | import byteSize from "byte-size";
2 | import SyntaxHighlight from "@/app/datasets/syntax";
3 | // @ts-ignore
4 | import contents from "raw-loader!@public/download.sh";
5 | // @ts-ignore
6 | import example from "raw-loader!./example.sh";
7 |
8 | // @ts-ignore
9 | import example_2 from "raw-loader!./example_2.sh";
10 | import { getData as getRepoData } from "@/utils";
11 |
12 | export default async function Download() {
13 | const repoData = await getRepoData();
14 | const total_size = repoData.reduce((acc, repo) => acc + repo.size, 0);
15 |
16 | return (
17 | <>
18 | Download PyPI
19 |
20 | Step 1: Ensure you have space
21 |
22 | The current size of all the repositories is{" "}
23 | {byteSize(total_size, {
24 | precision: 1,
25 | }).toString()}
26 | . Make sure you have enough space on your machine before continuing.
27 |
28 | Step 2: Clone the repositories
29 |
30 |
31 |
Clone the repositories using the following command:
32 |
{example}
33 |
34 | This will create a new directory called pypi_code and begin fetching all the data from
35 | GitHub. This will take several hours.
36 |
37 |
38 |
39 |
40 | download.sh contents:
41 |
42 |
{contents}
43 |
44 |
45 | Step 3: Use the data!
46 |
47 | The data is available by standard git tooling. To list all the files within the{" "}
48 | 4suite-xml package you could run:
49 |
50 | {example_2}
51 | And listing all files can be done with:
52 | git rev-list --objects --all
53 |
54 | There is also a dataset of all the unique Python files available for download.{" "}
55 | See here for more information .
56 |
57 |
58 | >
59 | );
60 | }
61 |
--------------------------------------------------------------------------------
/src/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pypi-data/website/HEAD/src/app/favicon.ico
--------------------------------------------------------------------------------
/src/app/globals.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 |
5 | @layer base {
6 | h1 {
7 | @apply text-4xl;
8 | @apply font-extrabold;
9 | @apply mb-3;
10 | }
11 |
12 | h1.card-title {
13 | @apply text-2xl;
14 | }
15 |
16 | .card-body h3 {
17 | @apply mt-4;
18 | }
19 |
20 | h2 {
21 | @apply text-2xl;
22 | @apply font-bold;
23 | @apply mb-3;
24 | }
25 |
26 | h3 {
27 | @apply text-xl;
28 | @apply mb-3;
29 | @apply font-bold;
30 | }
31 |
32 | p {
33 | @apply mb-1;
34 | }
35 |
36 | article {
37 | @apply mb-3;
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/app/layout.tsx:
--------------------------------------------------------------------------------
1 | import "./globals.css";
2 | import type { Metadata } from "next";
3 | import { Inter } from "next/font/google";
4 | import { SWRProvider } from "./swr-provider";
5 | import NavBar from "./navbar";
6 | import React, { Suspense } from "react";
7 |
8 | const inter = Inter({ subsets: ["latin"] });
9 |
10 | export const metadata: Metadata = {
11 | title: "PyPI Data",
12 | description: "PyPI code explorable on Github",
13 | };
14 |
15 | export default function RootLayout({ children }: { children: React.ReactNode }) {
16 | return (
17 |
18 |
19 |
20 |
21 |
22 | >}>{children}
23 |
24 |
25 |
34 |
35 |
36 |
37 | );
38 | }
39 |
--------------------------------------------------------------------------------
/src/app/layouts/markdown.tsx:
--------------------------------------------------------------------------------
1 | // @ts-ignore
2 | import React from "react";
3 |
4 | export default function Markdown({ children }: { children: React.ReactNode }) {
5 | return {children} ;
6 | }
7 |
--------------------------------------------------------------------------------
/src/app/navbar.tsx:
--------------------------------------------------------------------------------
1 | export default function NavBar() {
2 | return (
3 |
26 | );
27 | }
28 |
--------------------------------------------------------------------------------
/src/app/page.tsx:
--------------------------------------------------------------------------------
1 | import TotalStats from "@/app/stats/total_stats";
2 | import getStats from "@/app/stats/stats";
3 | import byteSize from "byte-size";
4 | import { getData as getRepoData } from "@/utils";
5 |
6 | export default async function Home() {
7 | const data = await getStats();
8 | const repoData = await getRepoData();
9 | const total_size = repoData.reduce((acc, repo) => acc + repo.size, 0);
10 | const chartData = data.stats_over_time.sort((a, b) => (a.month < b.month ? -1 : 1));
11 | const lastMonth = chartData[chartData.length - 2];
12 |
13 | return (
14 | <>
15 |
16 |
17 |
18 |
What is this?
19 |
20 |
21 | This project makes it easy to analyze the Python ecosystem by providing of all the code ever published to
22 | PyPI via git, parquet datasets with file metadata, and a set of tools to help analyze the data.
23 |
24 |
25 | Thanks to the power of git the contents of PyPI takes up only{" "}
26 | {byteSize(total_size, {
27 | precision: 1,
28 | }).toString()}{" "}
29 | on disk, and thanks to tools like libcst every Python
30 | file can be analysed on a consumer-grade laptop in a few hours.
31 |
32 |
33 |
34 | Download all the code
35 |
36 |
37 | Explore the datasets
38 |
39 |
40 |
41 |
42 |
43 |
56 | >
57 | );
58 | }
59 |
--------------------------------------------------------------------------------
/src/app/projects/page.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 | import { useRouter, useSearchParams } from "next/navigation";
3 | import { useEffect, useMemo, useState } from "react";
4 | import {default as Fuse, FuseIndex} from "fuse.js";
5 | import { useDebounce } from "use-debounce";
6 | import sampleSize from "lodash.samplesize";
7 | import useSWRImmutable from "swr/immutable";
8 |
9 | // const ASSET_PATH = "https://data.py-code.org" //(process.env.NEXT_PUBLIC_ASSET_PATH || "").replace("http://", "https://");
10 |
11 | export default function ProjectsList() {
12 | const router = useRouter();
13 | const searchParams = useSearchParams()!;
14 | const searchParam = searchParams.get("search") || "";
15 | let [search, setSearch] = useState(searchParam);
16 | const [debouncedSearch] = useDebounce(search, 500);
17 | const [isClient, setIsClient] = useState(false);
18 |
19 | const { data, error, isLoading } = useSWRImmutable(`/data/fuse-index.json`);
20 |
21 | const fuse = useMemo(() => {
22 | if (error || isLoading || data == null) {
23 | return null;
24 | }
25 | // @ts-ignore
26 | const idx: FuseIndex = Fuse.parseIndex(data.json);
27 | // @ts-ignore
28 | return new Fuse(
29 | data.packages,
30 | {
31 | includeScore: false,
32 | threshold: 0.3,
33 | distance: 10,
34 | // ignoreLocation: true,
35 | useExtendedSearch: false,
36 | },
37 | idx,
38 | );
39 | }, [data, error, isLoading]);
40 |
41 | useEffect(() => {
42 | setIsClient(true);
43 | }, []);
44 |
45 | useEffect(() => {
46 | if (!debouncedSearch) {
47 | return;
48 | }
49 | // @ts-ignore
50 | const params = new URLSearchParams(searchParams);
51 | params.set("search", debouncedSearch);
52 | router.replace(`/projects/?${params}`);
53 | }, [debouncedSearch, router, searchParams]);
54 |
55 | const searchResults = useMemo(() => {
56 | if (debouncedSearch.length > 3 && fuse) {
57 | console.time(`search ${debouncedSearch}`);
58 | let result = fuse.search(debouncedSearch, { limit: 50 });
59 | console.timeEnd(`search ${debouncedSearch}`);
60 | // @ts-ignore
61 | return result.map(({ item }) => item);
62 | } else if (debouncedSearch.length == 0 && isClient) {
63 | // Select 10 random packages
64 | // @ts-ignore
65 | return sampleSize((data && data.packages) || [], 10);
66 | }
67 | return [];
68 | }, [fuse, data, debouncedSearch, isClient]);
69 |
70 | const randomName = useMemo(() => {
71 | if (isClient && data) {
72 | // @ts-ignore
73 | return sampleSize(data.packages, 1)[0];
74 | } else {
75 | return null;
76 | }
77 | }, [data, isClient]);
78 |
79 | if (isLoading) {
80 | return ;
81 | } else if (error) {
82 | return (
83 |
84 |
90 |
96 |
97 |
Error loading search index! {error.toString()}
98 |
99 | );
100 | }
101 |
102 | return (
103 | <>
104 |
105 | Projects List
106 |
107 | This is a list of all the projects that have been uploaded to PyPI. You can fuzzy-search for a project by
108 | name. All searching is done client-side.
109 |
110 |
111 |
142 |
143 |
144 |
145 |
146 | Name
147 | Link
148 |
149 |
150 |
151 | {
152 | // @ts-ignore
153 | searchResults.map((p) => {
154 | return (
155 |
156 |
157 |
158 | {p}
159 |
160 |
161 |
162 |
163 | View
164 |
165 |
166 |
167 | );
168 | })}
169 |
170 |
171 |
172 | >
173 | );
174 | }
175 |
--------------------------------------------------------------------------------
/src/app/projects/view/layout.tsx:
--------------------------------------------------------------------------------
1 | import React, { Suspense } from "react";
2 |
3 | function ProjectFallback() {
4 | return <>Loading>;
5 | }
6 |
7 | export default function RootLayout({ children }: { children: React.ReactNode }) {
8 | return }>{children};
9 | }
10 |
--------------------------------------------------------------------------------
/src/app/projects/view/page.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 | import { useSearchParams } from "next/navigation";
3 | import ProjectInfo from "./project_info";
4 |
5 | export default function Page() {
6 | const searchParams = useSearchParams();
7 | const name = searchParams.get("name");
8 | if (name == null) {
9 | return (
10 |
11 |
17 |
23 |
24 |
Error! No project given
25 |
26 | );
27 | }
28 | return ;
29 | }
30 |
--------------------------------------------------------------------------------
/src/app/projects/view/project_info.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 | import useSWRImmutable from "swr/immutable";
3 | import Timestamp from "react-timestamp";
4 | import { useEffect } from "react";
5 |
6 | type PackageWithIndex = {
7 | index: number;
8 | package_filename: string;
9 | package: {
10 | project_name: string;
11 | project_version: string;
12 | url: string;
13 | upload_time: string;
14 | processed: boolean;
15 | };
16 | };
17 |
18 | type ProjectInfo = {
19 | name: string;
20 | packages_with_indexes: PackageWithIndex[];
21 | };
22 |
23 | function getInspectorLink(p: PackageWithIndex): string {
24 | const url = new URL(p.package.url);
25 | return `https://inspector.pypi.io/project/${p.package.project_name}/${p.package.project_version}${url.pathname}`;
26 | }
27 |
28 | //const ASSET_PATH = (process.env.NEXT_PUBLIC_ASSET_PATH || "").replace("http://", "https://");
29 | const ASSET_PATH = "https://data.py-code.org";
30 |
31 | export default function ProjectInfo({ name }: { name: string }) {
32 | const first_char = Array.from(name)[0];
33 | const { data, error, isLoading } = useSWRImmutable(`${ASSET_PATH}/data/packages/${first_char}/${name}.json`);
34 | useEffect(() => {
35 | if (!isLoading) {
36 | document.title = `PyPI code for ${name}`;
37 | const canonical = document.createElement("link");
38 | canonical.rel = "canonical";
39 | canonical.href = `https://py-code.org/projects/view?name=${name}`;
40 | document.head.appendChild(canonical);
41 | }
42 | }, [isLoading, name]);
43 | if (isLoading) {
44 | return Loading
;
45 | }
46 | const project_info: ProjectInfo = data;
47 | if (data === undefined) {
48 | return
49 | Project not found: {name}
50 | The project with the name {name} cannot be found.
51 | ;
52 | }
53 | return (
54 | <>
55 |
56 | Source code for {project_info.name}
57 |
58 | The PyPI project {project_info.name} has {project_info.packages_with_indexes.length} packages.
59 | Click the links below to view the source code for these packages on GitHub.
60 |
61 |
62 |
63 |
64 |
65 |
66 | Version
67 | Release
68 | Github
69 | Published on
70 | PyPi
71 |
72 |
73 |
74 | {project_info.packages_with_indexes.map((p, idx) => {
75 | return (
76 |
77 | {p.package.project_version}
78 | {p.package_filename}
79 |
80 |
85 | View Code
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | Download
94 |
95 |
96 | Inspector
97 |
98 |
99 |
100 | );
101 | })}
102 |
103 |
104 |
105 | >
106 | );
107 | }
108 |
--------------------------------------------------------------------------------
/src/app/repositories/[name]/page.tsx:
--------------------------------------------------------------------------------
1 | import {parseISO, format} from "date-fns";
2 | import byteSize from "byte-size";
3 | import { getData } from "@/utils";
4 |
5 | export default async function RepositoryDetail({ params }: { params: { name: string } }) {
6 | const data = await getData();
7 | const repo = data.find((repo) => repo.name === params.name);
8 | if (repo == undefined) {
9 | return Unknown repo ;
10 | }
11 | const earliest = parseISO(repo.stats.earliest_package);
12 | const latest = parseISO(repo.stats.latest_package);
13 | return (
14 | <>
15 |
16 | {repo.name}
17 |
18 | This repository contains {repo.stats.total_packages} packages published between{" "}
19 | {format(earliest, "dd/MM/yyyy")}{" "}
20 | and {format(latest, "dd/MM/yyyy")}. The compressed size of this repository is{" "}
21 | {byteSize(repo.size, { units: "iec", precision: 1 }).toString()}
22 |
23 |
24 | Link: {repo.url}
25 |
26 |
27 |
28 |
29 |
30 | Package
31 | Github
32 | Project
33 | Count
34 |
35 |
36 |
37 | {Object.entries(repo.projects)
38 | .sort(([, a], [, b]) => b - a)
39 | .map(([name, count]) => (
40 |
41 |
42 | {name}
43 |
44 |
45 |
46 | Browse Code on GitHub
47 |
48 |
49 |
50 |
51 | View all releases
52 |
53 |
54 | {count}
55 |
56 | ))}
57 |
58 |
59 | >
60 | );
61 | }
62 |
63 | export async function generateStaticParams() {
64 | const repos = await getData();
65 | return repos.map((repo) => ({
66 | name: repo.name,
67 | }));
68 | }
69 |
--------------------------------------------------------------------------------
/src/app/repositories/page.tsx:
--------------------------------------------------------------------------------
1 | import byteSize from "byte-size";
2 | import { parseISO, format, differenceInDays } from "date-fns";
3 | import RepoStats from "@/app/repositories/repo-stats";
4 | import { getData } from "@/utils";
5 |
6 | export default async function RepositoriesList() {
7 | const data = await getData();
8 |
9 | return (
10 | <>
11 |
12 | Repositories
13 |
14 |
15 |
16 |
17 | Repositories are the top level of the PyPI data. Each repository contains one or more projects published to
18 | PyPI. This page shows the list of repositories with the size and completion percent. Click on a repository to
19 | view a list of packages contained within.
20 |
21 |
22 |
23 |
24 |
25 |
26 | Name
27 | Range
28 | Days
29 | Size
30 | Packages
31 | Progress
32 |
33 |
34 |
35 | {data
36 | .sort((a, b) => b.index - a.index)
37 | .map((p) => {
38 | const earliest = parseISO(p.stats.earliest_package);
39 | const latest = parseISO(p.stats.latest_package);
40 | return (
41 |
42 |
43 |
44 | {p.name}
45 |
46 |
47 |
48 | {format(earliest, "dd/MM/yyyy")} to {format(latest, "dd/MM/yyyy")}
49 |
50 | {differenceInDays(latest, earliest)}
51 | {byteSize(p.size, { units: "iec", precision: 1 }).toString()}
52 | {p.stats.total_packages}
53 | {p.percent_done}
54 |
55 | );
56 | })}
57 |
58 |
59 | >
60 | );
61 | }
62 |
63 | export type RepoData = {
64 | name: string;
65 | index: number;
66 | percent_done: number;
67 | size: number;
68 | url: string;
69 | packages_url: string;
70 | stats: {
71 | earliest_package: string;
72 | latest_package: string;
73 | total_packages: number;
74 | done_packages: number;
75 | };
76 | projects: Map;
77 | };
78 |
79 |
--------------------------------------------------------------------------------
/src/app/repositories/repo-stats.tsx:
--------------------------------------------------------------------------------
1 | import { Bars3BottomRightIcon, CircleStackIcon, CodeBracketIcon } from "@heroicons/react/24/solid";
2 | import byteSize from "byte-size";
3 | import { RepoData } from "@/app/repositories/page";
4 |
5 | export default function RepoStats({ data }: { data: RepoData[] }) {
6 | const repo_count = data.length;
7 | const total_releases = data
8 | .reduce((acc, repo) => acc + repo.stats.total_packages, 0)
9 | .toLocaleString(undefined, { minimumFractionDigits: 0 });
10 | const total_size = data.reduce((acc, repo) => acc + repo.size, 0);
11 | return (
12 |
13 |
14 |
15 |
16 |
17 |
Repositories
18 |
{repo_count}
19 |
20 |
21 |
22 |
23 |
24 |
25 |
Total Releases
26 |
{total_releases.toLocaleString()}
27 |
28 |
29 |
30 |
31 |
32 |
33 |
Total uncompressed size
34 |
35 | {byteSize(total_size, {
36 | units: "iec",
37 | precision: 1,
38 | }).toString()}
39 |
40 |
41 |
42 | );
43 | }
44 |
--------------------------------------------------------------------------------
/src/app/stats/chart-scroll.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import { Chart } from "@/app/stats/chart";
4 | import { useEffect, useState } from "react";
5 | import ShowSQL from "@/app/stats/sql";
6 |
7 | interface ChartScrollProps {
8 | chartData: any[];
9 | charts: { name: string; valueNames: string[] }[];
10 | formats?: { [key: string]: "bytes" };
11 | sqlData?: string;
12 | cumulative?: boolean;
13 | showValueHeader?: (value: { [key: string]: string | number }) => string;
14 | }
15 |
16 | export default function ChartScroll({
17 | chartData,
18 | charts,
19 | sqlData,
20 | cumulative = false,
21 | formats = {},
22 | showValueHeader,
23 | }: ChartScrollProps) {
24 | const [chartIndex, setChartIndex] = useState(0);
25 | useEffect(() => {
26 | setChartIndex(0);
27 | }, [charts]);
28 | const selectedValueNames = chartIndex < charts.length ? charts[chartIndex].valueNames : charts[0].valueNames;
29 |
30 | return (
31 | <>
32 |
33 | {charts.map((chart, index) => {
34 | const isSelected = index === chartIndex;
35 | return (
36 |
37 | setChartIndex(index)}>
38 | {chart.name}
39 |
40 |
41 | );
42 | })}
43 |
44 |
51 | {sqlData && }
52 | >
53 | );
54 | }
55 |
--------------------------------------------------------------------------------
/src/app/stats/chart.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 | import {
3 | CartesianGrid,
4 | Cell,
5 | Customized,
6 | LabelList,
7 | Legend,
8 | Line,
9 | LineChart,
10 | Pie,
11 | PieChart as RechartPieChart,
12 | ResponsiveContainer,
13 | Tooltip,
14 | XAxis,
15 | YAxis,
16 | } from "recharts";
17 | import byteSize from "byte-size";
18 | import { cumulative_sum } from "@/app/stats/utils";
19 | import { genColours } from "./colours";
20 |
21 | // const COLORS = ["#0088FE", "#00C49F", "#FFBB28", "#ff5100", "#FF8042"];
22 |
23 | export function Chart({
24 | chartData,
25 | valueNames,
26 | cumulative = false,
27 | formats = {},
28 | showValueHeader,
29 | }: {
30 | chartData: { [key: string]: string | number }[];
31 | valueNames: string[];
32 | cumulative?: boolean;
33 | formats?: { [key: string]: "bytes" };
34 | showValueHeader?: (value: { [key: string]: string | number }) => string;
35 | }) {
36 | // remove null values from the chartData array, stopping after first non-null value is found
37 | const firstNonNullIndex = chartData.findIndex((value) => {
38 | for (const valueName of valueNames) {
39 | if (value[valueName] !== 0) {
40 | return true;
41 | }
42 | }
43 | return false;
44 | });
45 | if (firstNonNullIndex > 0) {
46 | chartData = chartData.slice(firstNonNullIndex);
47 | }
48 |
49 | if (cumulative) {
50 | chartData = cumulative_sum(chartData, valueNames);
51 | }
52 |
53 | const colours = genColours(valueNames.length);
54 |
55 | return (
56 | <>
57 | {showValueHeader && {showValueHeader(chartData[chartData.length - 1])} }
58 |
59 |
60 | {valueNames.map((valueName, index) => {
61 | const name = valueName.replaceAll("_", " ");
62 | return (
63 |
72 | );
73 | })}
74 |
75 |
76 | {
79 | // @ts-ignore
80 | return new Intl.NumberFormat("en").format(value);
81 | }}
82 | />
83 | {
85 | // @ts-ignore
86 | if (formats[item.dataKey] == "bytes" && typeof value === "number") {
87 | return byteSize(value, { precision: 2, units: "iec" }).toString();
88 | }
89 | // @ts-ignore
90 | return new Intl.NumberFormat("en").format(value);
91 | }}
92 | />
93 |
94 |
95 |
96 | >
97 | );
98 | }
99 |
100 | export function PieChart({
101 | chartData,
102 | dataKey,
103 | nameKey,
104 | limit,
105 | }: {
106 | chartData: any[];
107 | dataKey: string;
108 | nameKey: string;
109 | limit?: number;
110 | }) {
111 | if (limit) {
112 | let rest_total = chartData.slice(limit, chartData.length).reduce((acc, value) => acc + value[dataKey], 0);
113 | let rest_item = {
114 | [nameKey]: "Other",
115 | [dataKey]: rest_total,
116 | };
117 | chartData = [...chartData.slice(0, limit), rest_item];
118 | }
119 |
120 | const colours = genColours(chartData.length);
121 |
122 | return (
123 |
124 |
125 |
126 |
134 | {chartData.map((entry, index) => (
135 | |
136 | ))}
137 | {/* */}
138 |
139 |
140 |
141 |
142 |
143 |
144 | );
145 | }
146 |
--------------------------------------------------------------------------------
/src/app/stats/colours.ts:
--------------------------------------------------------------------------------
1 | import chroma from "chroma-js";
2 |
3 | // chroma.scale('RdYlBu').domain(myValues, 7, 'quantiles');
4 | let colors = ["orange", "skyblue", "red"];
5 |
6 | export function genColours(value: number): string[] {
7 | return chroma.scale(colors).colors(value);
8 | }
9 |
--------------------------------------------------------------------------------
/src/app/stats/language-stats.tsx:
--------------------------------------------------------------------------------
1 | export default async function getLanguageStats(): Promise {
2 | const res = await fetch("https://raw.githubusercontent.com/pypi-data/data/main/stats/language_stats.json");
3 |
4 | if (!res.ok) {
5 | throw new Error("Failed to fetch data");
6 | }
7 | const json_res = await res.text();
8 | let data = [];
9 | for (const line of json_res.split("\n")) {
10 | if (line !== "") {
11 | data.push(JSON.parse(line));
12 | }
13 | }
14 |
15 | return data as LanguageStats[];
16 | }
17 |
18 | export type LanguageStats = {
19 | month: string;
20 | total: number;
21 | has_async: number;
22 | has_async_comp: number;
23 |
24 | has_fstring: number;
25 | has_annotations: number;
26 |
27 | has_try_star: number;
28 | has_match: number;
29 | has_walrus: number;
30 |
31 | has_dataclasses: number;
32 |
33 | has_generator_expression: number;
34 | has_list_comp: number;
35 | has_dict_comp: number;
36 | has_set_comp: number;
37 | };
38 |
39 | export type TotalLanguageStats = {
40 | total: number;
41 | has_async: number;
42 | has_async_comp: number;
43 |
44 | has_fstring: number;
45 | has_annotations: number;
46 |
47 | has_try_star: number;
48 | has_match: number;
49 | has_walrus: number;
50 |
51 | has_dataclasses: number;
52 |
53 | has_generator_expression: number;
54 | has_list_comp: number;
55 | has_dict_comp: number;
56 | has_set_comp: number;
57 | };
58 |
59 | export async function getTotalLanguageStats(): Promise {
60 | const res = await fetch("https://raw.githubusercontent.com/pypi-data/data/main/stats/language_stats_totals.json");
61 |
62 | if (!res.ok) {
63 | throw new Error("Failed to fetch data");
64 | }
65 | const json_res = await res.json();
66 | return json_res as TotalLanguageStats;
67 | }
68 |
--------------------------------------------------------------------------------
/src/app/stats/page.tsx:
--------------------------------------------------------------------------------
1 | import getStats from "@/app/stats/stats";
2 | import TotalStats from "@/app/stats/total_stats";
3 | import { PieChart } from "@/app/stats/chart";
4 | import Table from "@/app/table";
5 | import { InformationCircleIcon } from "@heroicons/react/24/solid";
6 | import byteSize from "byte-size";
7 | import ChartScroll from "@/app/stats/chart-scroll";
8 | import ShowSQL from "@/app/stats/sql";
9 | import extrapolate from "@/app/stats/shitpost-model";
10 | import ShitpostChart from "@/app/stats/shitpost-chart";
11 | import { cumulative_sum } from "@/app/stats/utils";
12 | import getLanguageStats, { getTotalLanguageStats } from "@/app/stats/language-stats";
13 |
14 | function InfoBubble({ text }: { text: string }) {
15 | return (
16 |
17 |
18 | {text}
19 |
20 | );
21 | }
22 |
23 | export default async function Page() {
24 | const data = await getStats();
25 | const new_projects_over_time = data.new_projects_over_time.sort((a, b) => (a.month < b.month ? -1 : 1));
26 | const new_project_versions_over_time = data.new_project_versions_over_time.sort((a, b) =>
27 | a.month < b.month ? -1 : 1,
28 | );
29 | const new_releases_over_time = data.new_releases_over_time.sort((a, b) => (a.month < b.month ? -1 : 1));
30 | const chartData = data.stats_over_time.sort((a, b) => (a.month < b.month ? -1 : 1));
31 |
32 | const combined_over_time_stats = new_projects_over_time.map((el, i) => ({
33 | month: el.month,
34 | new_projects: el.count,
35 | new_project_versions: new_project_versions_over_time[i].count,
36 | new_releases: new_releases_over_time[i].count,
37 | total_files: chartData[i].total_files,
38 | total_lines: chartData[i].total_lines,
39 | total_size: chartData[i].total_size,
40 | }));
41 | console.log("This months stats:", combined_over_time_stats[combined_over_time_stats.length - 1]);
42 |
43 | const projectStats = data.project_level_breakdowns.sort((a, b) => (a.month < b.month ? -1 : 1));
44 |
45 | const secretTypesResponse = await fetch(
46 | "https://raw.githubusercontent.com/pypi-data/data/main/stats/github_secret_totals.json",
47 | );
48 | const secretTypes: Map = await secretTypesResponse.json();
49 | const secretTypesTable = Object.entries(secretTypes)
50 | .sort((a, b) => (a[1] < b[1] ? 1 : -1))
51 | .map(([type, count]) => ({
52 | type,
53 | count,
54 | }));
55 |
56 | const lastMonth = chartData[chartData.length - 1];
57 |
58 | const binarySizes = data.binary_sizes.map((el) => {
59 | const is_binary = el.is_binary ? "Binary" : "Text";
60 | const text = `${is_binary}: ${byteSize(el.total_size, { precision: 1, units: "iec" })}`;
61 | return {
62 | ...el,
63 | text,
64 | };
65 | });
66 |
67 | const tensorflow_total_size = data.projects_by_files
68 | .filter((el) => el.project_name.startsWith("tf-") || el.project_name.startsWith("tensorflow-"))
69 | .reduce((acc, el) => acc + el.total_size, 0);
70 | const total_size = binarySizes.reduce((acc, el) => acc + el.total_size, 0);
71 | const tensorflow_percentage = Math.round((tensorflow_total_size / total_size) * 100);
72 | const tensorflow_human_size = byteSize(tensorflow_total_size, { precision: 1, units: "iec" });
73 |
74 | const skip_reason_stats = data.skip_reason_stats.filter(({ skip_reason }) => skip_reason != "");
75 |
76 | const years = 8;
77 | const extrapolated = extrapolate(years, combined_over_time_stats);
78 | // const extrapolated_cumulative_slice = cumulative_sum(extrapolated, ["new_releases"]);
79 | const in_future_years = cumulative_sum(extrapolated, ["new_releases"])[extrapolated.length - 2];
80 |
81 | const languageStats = await getLanguageStats();
82 | const languageStatsByMonth = languageStats.sort((a, b) => (a.month < b.month ? -1 : 1));
83 |
84 | const totalLanguageStats = await getTotalLanguageStats();
85 | let totalLanguageCount = totalLanguageStats.total;
86 |
87 | return (
88 | <>
89 | The contents of PyPI, in numbers
90 |
91 |
92 |
93 |
94 |
95 | This page contains a breakdown of the contents of PyPI from parsing the contents of packages. You can{" "}
96 |
97 | download PyPI locally
98 | {" "}
99 | to do your own analysis or run{" "}
100 |
101 | SQL queries on the data in your browser
102 |
103 |
104 |
105 | Project Contents
106 |
107 | This data only counts unique projects , not versions. e.g if a project has published 10 versions
108 | in a month, each with a setup.py file, it will only be counted once.
109 |
110 |
127 |
128 | Language Features
129 |
130 | This data only counts unique projects , not versions. e.g if a project has published 10 versions
131 | in a month, each containing an async function, it will only be counted once.
132 |
133 |
134 |
135 |
159 |
160 |
161 |
Breakdown
162 |
key != "total")
166 | .map(([key, value]) => {
167 | const percent = Math.round((value / totalLanguageCount) * 100);
168 | return {
169 | Name: key.replace("has_", "").replace("_", " "),
170 | Projects: value,
171 | Percent: percent,
172 | };
173 | })
174 | .sort((a, b) => (a.Projects < b.Projects ? 1 : -1))}
175 | columns={[{ name: "Name" }, { name: "Projects", type: "number" }, { name: "Percent", type: "number" }]}
176 | />
177 |
178 |
179 |
180 | Secrets Detected
181 |
182 |
183 | PyPI contains a lot of secrets.
184 |
185 |
186 |
189 |
190 |
191 | Growth
192 |
193 | {/**/}
194 | {/*
*/}
195 |
201 | {/**/}
202 | {/*
*/}
203 | {/*
*/}
204 |
205 | Binary files
206 |
207 |
208 |
209 | This shows a breakdown of the binary files on PyPI, by extension. Binary files are the vast majority of the
210 | content on PyPI, accounting for nearly 75% of the uncompressed size.
211 |
212 |
213 |
214 |
215 |
227 |
228 |
229 |
230 |
231 |
Largest Projects by size
232 |
235 |
245 |
246 |
247 |
248 |
Stats By Extensions
249 |
250 |
260 |
261 |
262 |
263 |
264 |
265 | Files not committed to Github
266 |
267 |
268 | Not all files can be committed to GitHub due to size limits. Some have a few very, very long lines whilst
269 | others are junk like mistakenly added virtualenvs or VCS directories. This table shows a breakdown of the
270 | reasons why files where skipped.
271 |
272 |
289 |
290 |
291 | >
292 | );
293 | }
294 |
--------------------------------------------------------------------------------
/src/app/stats/shitpost-chart.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import ChartScroll from "@/app/stats/chart-scroll";
4 | import React, { useEffect, useState } from "react";
5 |
6 | function easeInExpo(x: number) {
7 | return x === 0 ? 0 : Math.pow(2, 10 * x - 10);
8 | }
9 |
10 | export default function ShitpostChart({
11 | chartData,
12 | extrapolated,
13 | years,
14 | future_value,
15 | }: {
16 | chartData: any[];
17 | extrapolated: { month: string; new_releases: number; total_size: number; total_files: number }[];
18 | years: number;
19 | future_value: number;
20 | }) {
21 | let [seeExtrapolate, setSeeExtrapolate] = useState(false);
22 | let [extrapolatedIndex, setExtrapolatedIndex] = useState(chartData.length);
23 | const extrapolatedLength = extrapolated.length - chartData.length;
24 | const extrapolatedOffset = extrapolatedIndex - chartData.length;
25 |
26 | const percentDone = extrapolatedOffset / extrapolatedLength;
27 | const percentLeft = 1 - percentDone;
28 |
29 | const humans = 8_000_000_000;
30 | const packages_per_human = future_value / humans;
31 |
32 | useEffect(() => {
33 | if (!seeExtrapolate && extrapolatedIndex !== 0) {
34 | setExtrapolatedIndex(chartData.length);
35 | }
36 | }, [seeExtrapolate, chartData, extrapolatedIndex]);
37 |
38 | useEffect(() => {
39 | if (seeExtrapolate && extrapolatedIndex < extrapolated.length - 1) {
40 | const timer = setTimeout(() => setExtrapolatedIndex(extrapolatedIndex + 1), easeInExpo(percentLeft) * 750);
41 | return () => clearTimeout(timer);
42 | }
43 | }, [percentLeft, chartData, extrapolated, seeExtrapolate, extrapolatedIndex]);
44 |
45 | const time = extrapolated[extrapolatedIndex].month;
46 |
47 | return (
48 | <>
49 |
50 |
51 |
55 | Intl.NumberFormat("en-US", {
56 | notation: "compact",
57 | compactDisplay: "long",
58 | maximumFractionDigits: 2,
59 | // @ts-ignore
60 | }).format(new_releases)
61 | }
62 | formats={{ total_size: "bytes" }}
63 | charts={[{ name: "Releases", valueNames: ["new_releases"] }]}
64 | />
65 |
66 |
67 |
71 | Intl.NumberFormat("en", {
72 | notation: "compact",
73 | style: "unit",
74 | unit: "byte",
75 | unitDisplay: "narrow",
76 | // @ts-ignore
77 | }).format(total_size)
78 | }
79 | formats={{ total_size: "bytes" }}
80 | charts={[{ name: "Size", valueNames: ["total_size"] }]}
81 | />
82 |
83 |
84 |
88 | Intl.NumberFormat("en-US", {
89 | notation: "compact",
90 | compactDisplay: "long",
91 | maximumFractionDigits: 1,
92 | // @ts-ignore
93 | }).format(total_files)
94 | }
95 | formats={{ total_size: "bytes" }}
96 | charts={[{ name: "Files", valueNames: ["total_files"] }]}
97 | />
98 |
99 |
100 |
101 | PyPI is growing fast. If this dangerous expansion not stopped, our advanced machine learning models predict that
102 | in only {years} years the number of packages will outnumber human beings.
103 |
104 |
109 | >
110 | );
111 | }
112 |
--------------------------------------------------------------------------------
/src/app/stats/shitpost-model.tsx:
--------------------------------------------------------------------------------
1 | import { cumulative_sum } from "@/app/stats/utils";
2 |
3 | export default function extrapolate(
4 | years: number,
5 | values: {
6 | month: string;
7 | new_releases: number;
8 | total_files: number;
9 | total_size: number;
10 | }[],
11 | ) {
12 | let releases_extrapolated = extrapolateSeries(
13 | years,
14 | values.map((el) => el.new_releases),
15 | );
16 | let files_extrapolated = extrapolateSeries(
17 | years,
18 | values.map((el) => el.total_files),
19 | );
20 | let size_extrapolated = extrapolateSeries(
21 | years,
22 | values.map((el) => el.total_size),
23 | );
24 |
25 | let extrapolated = [];
26 | let date = new Date(values[values.length - 1].month);
27 | for (let i = 0; i < years * 12; i++) {
28 | let month = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, "0")}-01`;
29 | // increase by growth per month
30 |
31 | extrapolated.push({
32 | month,
33 | new_releases: releases_extrapolated[i],
34 | total_files: files_extrapolated[i],
35 | total_size: size_extrapolated[i],
36 | });
37 |
38 | date = new Date(date.setMonth(date.getMonth() + 1));
39 | }
40 |
41 | // console.log(extrapolated.slice(10))
42 | return [...values, ...extrapolated];
43 | }
44 |
45 | function extrapolateSeries(years: number, values: number[]) {
46 | let time_slice = cumulative_sum(
47 | values.slice(values.length - 12, values.length).map((x) => ({ x })),
48 | ["x"],
49 | ).map(({ x }) => x);
50 | let releases_diff = time_slice[time_slice.length - 1] - time_slice[0];
51 | let growth_percent = releases_diff / time_slice[0];
52 | let growth_per_month = growth_percent / time_slice.length;
53 |
54 | let last_value = values[values.length - 1];
55 | let extrapolated = [];
56 | for (let i = 0; i < years * 12; i++) {
57 | // increase by growth per month
58 | // let increase = last_value * growth_per_month;
59 | last_value = last_value * growth_per_month;
60 | extrapolated.push(last_value);
61 | }
62 | return extrapolated;
63 | }
64 |
--------------------------------------------------------------------------------
/src/app/stats/sql.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import SyntaxHighlight from "@/app/datasets/syntax";
4 | import { useState } from "react";
5 |
6 | export default function ShowSQL({ sqlData }: { sqlData: string }) {
7 | const [expandSQL, setExpandSQL] = useState(false);
8 | return (
9 |
10 |
11 | setExpandSQL(!expandSQL)}>
12 | {expandSQL ? "Hide SQL" : "Show SQL"}
13 |
14 | {expandSQL && (
15 | {`-- https://github.com/pypi-data/data/\n\n${sqlData}`}
16 | )}
17 |
18 |
19 | );
20 | }
21 |
--------------------------------------------------------------------------------
/src/app/stats/stats.tsx:
--------------------------------------------------------------------------------
1 | import { compile, CompileOptions } from "prql-js/dist/bundler";
2 |
3 | export default async function getStats(): Promise {
4 | const res = await fetch("https://raw.githubusercontent.com/pypi-data/data/main/stats/totals.json");
5 |
6 | if (!res.ok) {
7 | throw new Error("Failed to fetch data");
8 | }
9 |
10 | const json_res = await res.json();
11 |
12 | const rawPrqlRes = await fetch("https://raw.githubusercontent.com/pypi-data/data/main/sql/_stats.prql");
13 | const rawPrql = await rawPrqlRes.text();
14 | const opts = new CompileOptions();
15 | opts.target = "sql.duckdb";
16 | opts.format = true;
17 | opts.signature_comment = false;
18 |
19 | const repo_stats = { sql: {} };
20 | for (const item of json_res as { name: string; stat: any[] }[]) {
21 | // @ts-ignore
22 | repo_stats[item.name] = item.stat;
23 | const sql = compile(`${rawPrql}\nrelation_to_json(${item.name})`, opts);
24 | if (sql === undefined) {
25 | throw Error(`Failed to compile PRQL for ${item.name}`);
26 | }
27 | // @ts-ignore
28 | repo_stats.sql[item.name] = sql.replace("$1", "'data_from_the_datasets_page/*.parquet'");
29 | }
30 | return repo_stats as RepoStats;
31 | }
32 |
33 | export type RepoStats = {
34 | total_stats: [TotalStat];
35 | stats_over_time: StatsOverTime[];
36 | skipped_files_stats: InnerStat[];
37 | binary_extension_stats: InnerStat[];
38 | extension_stats: InnerStat[];
39 | projects_by_files: ProjectStat[];
40 | skip_reason_stats: SkipReasonStat[];
41 | binary_sizes: [{ is_binary: boolean; total_files: number; total_size: number }];
42 | project_level_breakdowns: ProjectLevelBreakdown[];
43 | new_projects_over_time: [{ month: string; count: number }];
44 | new_project_versions_over_time: [{ month: string; count: number }];
45 | new_releases_over_time: [{ month: string; count: number }];
46 |
47 | sql: {
48 | stats_over_time: string;
49 | project_level_breakdowns: string;
50 | binary_sizes: string;
51 | binary_extension_stats: string;
52 | projects_by_files: string;
53 | extension_stats: string;
54 | };
55 | };
56 |
57 | export type TotalStat = {
58 | total_files: number;
59 | total_lines: number;
60 | total_size: number;
61 | unique_files: number;
62 | };
63 |
64 | export type ProjectLevelBreakdown = {
65 | month: String;
66 | total_project_uploads: number;
67 | project_version_releases: number;
68 |
69 | has_pyproject: number;
70 | has_setup_py: number;
71 | has_setup_py_and_pyproject: number;
72 | has_requirements_txt: number;
73 |
74 | init_py_files: number;
75 |
76 | has_markdown: number;
77 | has_rst: number;
78 |
79 | has_tests: number;
80 | has_tox: number;
81 | has_pytest: number;
82 |
83 | has_ini: number;
84 | has_json: number;
85 | has_xml: number;
86 | has_toml: number;
87 | has_yaml: number;
88 | has_rust: number;
89 | has_c_or_cpp: number;
90 |
91 | has_pyi: number;
92 | has_py_typed: number;
93 | };
94 |
95 | export type InnerStat = {
96 | extension: string;
97 | total_files: number;
98 | total_lines: number;
99 | total_size: number;
100 | unique_files: number;
101 | };
102 |
103 | export type ProjectStat = {
104 | project_name: string;
105 | unique_files: number;
106 | total_files: number;
107 | total_lines: number;
108 | total_size: number;
109 | };
110 |
111 | export type StatsOverTime = {
112 | month: string;
113 | total_files: number;
114 | total_size: number;
115 | total_lines: number;
116 | };
117 |
118 | export type SkipReasonStat = {
119 | skip_reason: string;
120 | total_projects: number;
121 | count: number;
122 | unique_files: number;
123 | total_size: number;
124 | total_lines: number;
125 | max_size: number;
126 | max_lines: number;
127 | };
128 |
--------------------------------------------------------------------------------
/src/app/stats/total_stats.tsx:
--------------------------------------------------------------------------------
1 | import byteSize from "byte-size";
2 | import { Bars3BottomRightIcon, BoltIcon, CircleStackIcon, CodeBracketIcon } from "@heroicons/react/24/solid";
3 | import { StatsOverTime, TotalStat } from "@/app/stats/stats";
4 |
5 | export default function TotalStats({ stats, lastMonth }: { stats: TotalStat; lastMonth: StatsOverTime }) {
6 | const total_hours_in_a_month = 24 * 30;
7 | const lines_per_second = lastMonth.total_lines / (total_hours_in_a_month * 60 * 60);
8 | return (
9 | <>
10 |
11 |
12 |
13 |
14 |
15 |
Total files
16 |
17 | {(stats.total_files / 1000 / 1000 / 1000).toLocaleString(undefined, { maximumFractionDigits: 2 })} Billion
18 |
19 |
{stats.unique_files.toLocaleString()} unique
20 |
21 |
22 |
23 |
24 |
25 |
26 |
Total lines of text
27 |
28 | {(stats.total_lines / 1000 / 1000 / 1000).toLocaleString(undefined, { maximumFractionDigits: 1 })} Billion
29 |
30 |
{stats.total_lines.toLocaleString()} to be precise
31 |
32 |
33 |
34 |
35 |
36 |
37 |
Total uncompressed size
38 |
39 | {byteSize(stats.total_size, {
40 | units: "iec",
41 | precision: 1,
42 | }).toString()}
43 |
44 |
That is ~{(stats.total_size / 1468006).toLocaleString()} floppy disks
45 |
46 |
47 |
48 |
49 |
50 |
51 |
Lines of code added per second
52 |
53 | {lines_per_second.toLocaleString(undefined, { maximumFractionDigits: 0 })}
54 |
55 |
In the month {lastMonth.month}
56 |
57 |
58 | >
59 | );
60 | }
61 |
--------------------------------------------------------------------------------
/src/app/stats/utils.tsx:
--------------------------------------------------------------------------------
1 | export function cumulative_sum(data: any[], valueNames: string[]): any[] {
2 | const chartDataDeepCopy = JSON.parse(JSON.stringify(data));
3 | for (const valueName of valueNames) {
4 | let sum = 0;
5 | for (const value of chartDataDeepCopy) {
6 | sum += value[valueName] as number;
7 | value[valueName] = sum;
8 | }
9 | }
10 | return chartDataDeepCopy;
11 | }
12 |
--------------------------------------------------------------------------------
/src/app/swr-provider.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 | import { SWRConfig } from "swr";
3 | // @ts-ignore
4 | export const SWRProvider = ({ children }) => {
5 | return fetch(url).then((res) => res.json()) }}>{children} ;
6 | };
7 |
--------------------------------------------------------------------------------
/src/app/table.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import { flexRender, getCoreRowModel, useReactTable } from "@tanstack/react-table";
4 | import byteSize from "byte-size";
5 | import { useMemo, useState } from "react";
6 |
7 | type Column = {
8 | name: string;
9 | type?: "string" | "number" | "bytes";
10 | };
11 |
12 | interface TableProps {
13 | data: Record[];
14 | columns: Column[];
15 | initialLimit?: number;
16 | addFooter?: boolean;
17 | }
18 |
19 | export default function Table({ data, columns, initialLimit = 15, addFooter = true }: TableProps) {
20 | const [expanded, setExpanded] = useState(false);
21 | // This is needed to stop a re-render loop? No idea why.
22 | const limitedData = useMemo(() => {
23 | const numbersCopy = JSON.parse(JSON.stringify(data));
24 | if (!expanded) {
25 | return numbersCopy.slice(0, initialLimit);
26 | } else {
27 | return numbersCopy;
28 | }
29 | }, [data, expanded, initialLimit]);
30 | const hasMore = data.length > initialLimit;
31 |
32 | const table = useReactTable({
33 | data: limitedData,
34 | columns: columns.map((column) => ({
35 | id: column.name,
36 | header: column.name.replace("_", " "),
37 | footer: !addFooter
38 | ? undefined
39 | : ({ table }) => {
40 | if (column.name == columns[0].name) {
41 | return "Total";
42 | }
43 | if (column.type === "bytes" || column.type === "number") {
44 | // @ts-ignore
45 | const total = data.reduce((total, row) => total + row[column.name], 0);
46 | if (column.type == "number") {
47 | return total.toLocaleString();
48 | }
49 | return byteSize(total, { units: "iec", precision: 1 }).toString();
50 | }
51 | },
52 | cell: (props) => {
53 | const row = props.getValue();
54 | if (column.type === undefined) {
55 | if (row == "") {
56 | return `No ${column.name}`;
57 | } else {
58 | return row;
59 | }
60 | }
61 | if (column.type === "string" || typeof row === "string") {
62 | return row;
63 | } else if (column.type === "number") {
64 | return row.toLocaleString();
65 | } else if (column.type === "bytes") {
66 | return byteSize(row, { units: "iec", precision: 1 }).toString();
67 | }
68 | },
69 | accessorKey: column.name,
70 | })),
71 | getCoreRowModel: getCoreRowModel(),
72 | });
73 |
74 | return (
75 |
76 |
77 | {table.getHeaderGroups().map((headerGroup) => (
78 |
79 | {headerGroup.headers.map((header) => (
80 |
81 | {header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
82 |
83 | ))}
84 |
85 | ))}
86 |
87 |
88 | {table.getRowModel().rows.map((row) => (
89 |
90 | {row.getVisibleCells().map((cell) => (
91 | {flexRender(cell.column.columnDef.cell, cell.getContext())}
92 | ))}
93 |
94 | ))}
95 |
96 |
97 | {table.getFooterGroups().map((footerGroup) => (
98 |
99 | {footerGroup.headers.map((header) => (
100 | {flexRender(header.column.columnDef.footer, header.getContext())}
101 | ))}
102 |
103 | ))}
104 | {hasMore && (
105 |
106 |
107 | {
109 | e.preventDefault();
110 | setExpanded(!expanded);
111 | }}
112 | >
113 | {expanded ? "Show less" : `Show All ${data.length} Rows`}
114 |
115 |
116 |
117 | )}
118 |
119 |
120 | );
121 | }
122 |
--------------------------------------------------------------------------------
/src/data/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/utils.ts:
--------------------------------------------------------------------------------
1 | import fs from "fs";
2 | import path from "path";
3 | import { RepoData } from "@/app/repositories/page";
4 |
5 | const allRepoData = JSON.parse(
6 | fs.readFileSync(path.join(process.cwd(), "src/data/repositories_with_releases.json"), "utf-8"),
7 | ) as RepoData[];
8 |
9 | export async function getData(): Promise {
10 | return allRepoData as RepoData[];
11 | // const res = await fetch('https://raw.githubusercontent.com/pypi-data/data/main/stats/repositories_with_releases.json')
12 | //
13 | // if (!res.ok) {
14 | // // This will activate the closest `error.js` Error Boundary
15 | // throw new Error('Failed to fetch data')
16 | // }
17 | //
18 | // return res.json()
19 | }
20 |
--------------------------------------------------------------------------------
/tailwind.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | module.exports = {
3 | content: [
4 | "./src/pages/**/*.{js,ts,jsx,tsx,mdx}",
5 | "./src/components/**/*.{js,ts,jsx,tsx,mdx}",
6 | "./src/app/**/*.{js,ts,jsx,tsx,mdx}",
7 | ],
8 | plugins: [require("daisyui"), require("@tailwindcss/typography")],
9 | daisyui: {
10 | themes: ["dark"],
11 | },
12 | theme: {
13 | extend: {
14 | typography: {
15 | DEFAULT: {
16 | css: {
17 | maxWidth: null, // full width
18 | },
19 | },
20 | },
21 | },
22 | },
23 | };
24 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "es6",
4 | "lib": ["dom", "dom.iterable", "esnext"],
5 | "allowJs": true,
6 | "skipLibCheck": true,
7 | "strict": true,
8 | "forceConsistentCasingInFileNames": true,
9 | "noEmit": true,
10 | "esModuleInterop": true,
11 | "module": "esnext",
12 | "moduleResolution": "node",
13 | "resolveJsonModule": true,
14 | "isolatedModules": true,
15 | "jsx": "preserve",
16 | "incremental": true,
17 | "plugins": [
18 | {
19 | "name": "next"
20 | }
21 | ],
22 | "paths": {
23 | "@/*": ["./src/*"],
24 | "@public/*": ["./public/*"]
25 | }
26 | },
27 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
28 | "exclude": ["node_modules"],
29 | "ts-node": {
30 | // these options are overrides used only by ts-node
31 | // same as the --compilerOptions flag and the TS_NODE_COMPILER_OPTIONS environment variable
32 | "compilerOptions": {
33 | "module": "commonjs"
34 | }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------