├── frontend
├── .gitignore
├── postcss.config.js
├── src
│ ├── assets
│ │ ├── laion-social-graph.png
│ │ ├── benchmark-dark-theme.webp
│ │ ├── benchmark-light-theme.webp
│ │ └── logos
│ │ │ ├── Laion-dark.svg
│ │ │ └── Laion-light.svg
│ ├── ui
│ │ ├── lib
│ │ │ └── utils.ts
│ │ ├── components
│ │ │ ├── custom
│ │ │ │ ├── GradientCard.tsx
│ │ │ │ ├── LoadingScreen.tsx
│ │ │ │ ├── Col.tsx
│ │ │ │ ├── Grid.tsx
│ │ │ │ ├── Row.tsx
│ │ │ │ ├── Centered.tsx
│ │ │ │ ├── JsonComponent.tsx
│ │ │ │ ├── ResponsiveRow.tsx
│ │ │ │ ├── FakeH1.tsx
│ │ │ │ ├── HomePageBackdrop.tsx
│ │ │ │ ├── TooltipContentComponent.tsx
│ │ │ │ ├── AlertInfo.tsx
│ │ │ │ ├── AlertWarning.tsx
│ │ │ │ ├── HeaderComponents.tsx
│ │ │ │ ├── SearchInput.tsx
│ │ │ │ ├── FeatureCard.tsx
│ │ │ │ ├── CommandBlock.tsx
│ │ │ │ ├── ThemeToggle.tsx
│ │ │ │ ├── StakingDashboardBanner.tsx
│ │ │ │ ├── ScoreBadge.tsx
│ │ │ │ ├── SelectableCard.tsx
│ │ │ │ ├── Code.tsx
│ │ │ │ ├── InferenceIcon.tsx
│ │ │ │ ├── WorkerLogsTerminal.tsx
│ │ │ │ └── CodeBlock.tsx
│ │ │ └── ui
│ │ │ │ ├── Skeleton.tsx
│ │ │ │ ├── Spinner.tsx
│ │ │ │ ├── SeparatorBorder.tsx
│ │ │ │ ├── Separator.tsx
│ │ │ │ ├── Label.tsx
│ │ │ │ ├── Slider.tsx
│ │ │ │ ├── Popover.tsx
│ │ │ │ ├── ScaleLoader.tsx
│ │ │ │ ├── Avatar.tsx
│ │ │ │ ├── Badge.tsx
│ │ │ │ ├── Textarea.tsx
│ │ │ │ ├── Tooltip.tsx
│ │ │ │ ├── Input.tsx
│ │ │ │ ├── Checkbox.tsx
│ │ │ │ ├── Toaster.tsx
│ │ │ │ ├── Switch.tsx
│ │ │ │ ├── Tabs.tsx
│ │ │ │ ├── Accordion.tsx
│ │ │ │ ├── Calendar.tsx
│ │ │ │ ├── Alert.tsx
│ │ │ │ ├── Breadcrumb.tsx
│ │ │ │ ├── Table.tsx
│ │ │ │ ├── Card.tsx
│ │ │ │ ├── Button.tsx
│ │ │ │ ├── AlertDialog.tsx
│ │ │ │ ├── Sheet.tsx
│ │ │ │ └── Toast.tsx
│ │ ├── hooks
│ │ │ ├── useHasMounted.hook.ts
│ │ │ ├── useBreakpoints.hook.ts
│ │ │ └── useToast.hook.ts
│ │ ├── utils
│ │ │ └── getThemeColor.ts
│ │ ├── index.tsx
│ │ └── providers
│ │ │ └── ThemeProvider.tsx
│ ├── vite-env.d.ts
│ ├── lib
│ │ ├── models.ts
│ │ ├── ui-shared.tsx
│ │ └── ui-client-utils.ts
│ ├── state
│ │ └── chartDataCache.ts
│ ├── types
│ │ ├── assets.d.ts
│ │ └── index.ts
│ ├── components
│ │ ├── LearnMoreContent.tsx
│ │ ├── LaionHotKeys.tsx
│ │ ├── DistributionChart.tsx
│ │ └── LearnMoreSheet.tsx
│ ├── main.tsx
│ ├── index.css
│ ├── utils
│ │ ├── routeMapping.ts
│ │ └── api.ts
│ └── styles
│ │ └── fonts
│ │ └── inter.css
├── .prettierignore
├── prettier.config.cjs
├── vite.config.ts
├── tsconfig.json
├── index.html
├── eslint.config.js
└── package.json
├── backend
├── .gitignore
├── .dev.vars.example
├── download_db.sh
├── wrangler.toml
├── pyproject.toml
├── import-to-local-d1.sh
└── src
│ ├── models.py
│ └── cache_generator.py
├── .github
└── workflows
│ └── pr-checks.yml
├── LICENSE
├── .vscode
└── settings.json
├── .gitignore
├── README.md
└── Taskfile.yml
/frontend/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | api/cache/
3 |
4 | .env.production
--------------------------------------------------------------------------------
/backend/.gitignore:
--------------------------------------------------------------------------------
1 | .venv
2 | .venv-workers
3 | node_modules
4 | python_modules
5 |
6 |
7 | data/
8 | .wrangler/
--------------------------------------------------------------------------------
/frontend/postcss.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 | plugins: {
3 | tailwindcss: {},
4 | autoprefixer: {},
5 | },
6 | };
7 |
--------------------------------------------------------------------------------
/frontend/src/assets/laion-social-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/context-labs/aella-data-explorer/HEAD/frontend/src/assets/laion-social-graph.png
--------------------------------------------------------------------------------
/frontend/src/assets/benchmark-dark-theme.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/context-labs/aella-data-explorer/HEAD/frontend/src/assets/benchmark-dark-theme.webp
--------------------------------------------------------------------------------
/frontend/src/assets/benchmark-light-theme.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/context-labs/aella-data-explorer/HEAD/frontend/src/assets/benchmark-light-theme.webp
--------------------------------------------------------------------------------
/frontend/.prettierignore:
--------------------------------------------------------------------------------
1 | **/build
2 | **/public
3 | **/dist
4 | **/.output
5 | **/.vercel
6 | **/.vinxi
7 | .DS_Store
8 | **/inter.css
9 | **/jetbrains.css
10 | **/helmfile.yaml
11 | .claude
--------------------------------------------------------------------------------
/frontend/src/ui/lib/utils.ts:
--------------------------------------------------------------------------------
1 | import type { ClassValue } from "clsx";
2 | import { clsx } from "clsx";
3 | import { twMerge } from "tailwind-merge";
4 |
5 | export function cn(...inputs: ClassValue[]) {
6 | return twMerge(clsx(inputs));
7 | }
8 |
--------------------------------------------------------------------------------
/frontend/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | ///
No data present.
; 14 | } 15 | return ( 16 |{title}
19 | {Array.isArray(content) ? ( 20 | content.map((item, index) => ( 21 |22 | {item} 23 |
24 | )) 25 | ) : typeof content === "string" ? ( 26 |{content}
27 | ) : ( 28 | content 29 | )} 30 | 31 | ); 32 | } 33 | -------------------------------------------------------------------------------- /frontend/src/ui/components/custom/AlertInfo.tsx: -------------------------------------------------------------------------------- 1 | import { Alert } from "~/ui/components/ui/Alert"; 2 | import { InfoIcon } from "lucide-react"; 3 | 4 | type AlertInfoProps = { 5 | title: string; 6 | content?: string | React.ReactNode; 7 | className?: string; 8 | }; 9 | 10 | export function AlertInfo({ className, content, title }: AlertInfoProps) { 11 | return ( 12 |{content}
23 | ) : ( 24 | content 25 | )} 26 |{content}
23 | ) : ( 24 | content 25 | )} 26 |4 | This dataset was built using a specialized small model, fine-tuned by{" "} 5 | 15 | Inference.net 16 | 17 | , in collaboration with{" "} 18 | 28 | LAION 29 | 30 | . 31 |
32 | ); 33 | }; 34 | 35 | export function LearnMoreContent() { 36 | return ( 37 |40 | This is a small 100,000 sample preview of the full ~50m sample dataset. 41 | Our fine-tuned model extracts structured summaries from original, 42 | arbitrary text data. 43 |
44 |{error}
} 46 | {hint &&{hint}
} 47 |{card.description}
47 |
41 | {cmd}
42 |
43 | 54 | Or press 't' to toggle 55 |
56 |
22 |
{description}
70 |{error}
} 56 | {hint &&{hint}
} 57 |
24 | {value}
25 |
26 | );
27 |
28 | export const Code = forwardRef
79 | {showCopyIcon && }
80 | {children}
81 |
82 | );
83 | },
84 | );
85 |
86 | Code.displayName = "Code";
87 | export default Code;
88 |
--------------------------------------------------------------------------------
/backend/src/models.py:
--------------------------------------------------------------------------------
1 | """Pydantic models for API responses."""
2 |
3 | from pydantic import BaseModel
4 |
5 |
6 | class PaperSummary(BaseModel):
7 | """Summary view of a paper for list/visualization."""
8 |
9 | id: int
10 | title: str | None
11 | x: float | None
12 | y: float | None
13 | z: float | None
14 | cluster_id: int | None
15 | cluster_label: str | None
16 | field_subfield: str | None
17 | publication_year: int | None
18 | classification: str | None
19 |
20 |
21 | class PaperDetail(BaseModel):
22 | """Detailed view of a paper."""
23 |
24 | id: int
25 | title: str | None
26 | sample: str | None
27 | summarization: str | None
28 | x: float | None
29 | y: float | None
30 | z: float | None
31 | cluster_id: int | None
32 | cluster_label: str | None
33 | field_subfield: str | None
34 | publication_year: int | None
35 | classification: str | None
36 | nearest_papers: list["PaperSummary"]
37 |
38 |
39 | class PaperSample(BaseModel):
40 | """Paper sample with extracted data and cluster info."""
41 |
42 | paper_id: int
43 | sample: str
44 | title: str | None
45 | summarization: str | None
46 | cluster_id: int | None
47 | cluster_label: str | None
48 | field_subfield: str | None
49 | publication_year: int | None
50 | classification: str | None
51 |
52 |
53 | class PaperSampleList(BaseModel):
54 | """List of paper IDs that have samples."""
55 |
56 | paper_ids: list[int]
57 |
58 |
59 | class ClusterInfo(BaseModel):
60 | """Information about a cluster."""
61 |
62 | cluster_id: int
63 | cluster_label: str
64 | count: int
65 | color: str
66 |
67 |
68 | class PapersResponse(BaseModel):
69 | """Response containing list of papers."""
70 |
71 | papers: list[PaperSummary]
72 |
73 |
74 | class ClustersResponse(BaseModel):
75 | """Response containing cluster information."""
76 |
77 | clusters: list[ClusterInfo]
78 |
79 |
80 | class TemporalDataPoint(BaseModel):
81 | """Data point for a specific year in temporal analysis."""
82 |
83 | year: int
84 | count: int
85 |
86 |
87 | class ClusterTemporalData(BaseModel):
88 | """Temporal evolution data for a single cluster."""
89 |
90 | cluster_id: int
91 | cluster_label: str
92 | color: str
93 | temporal_data: list[TemporalDataPoint]
94 |
95 |
96 | class TemporalDataResponse(BaseModel):
97 | """Response containing temporal evolution data for all clusters."""
98 |
99 | clusters: list[ClusterTemporalData]
100 |
--------------------------------------------------------------------------------
/frontend/src/ui/components/ui/Accordion.tsx:
--------------------------------------------------------------------------------
1 | import * as AccordionPrimitive from "@radix-ui/react-accordion";
2 | import { cn } from "~/ui/lib/utils";
3 | import { ChevronDown } from "lucide-react";
4 | import * as React from "react";
5 |
6 | const Accordion = AccordionPrimitive.Root;
7 |
8 | const AccordionItem = React.forwardRef<
9 | React.ComponentRefLoading distribution data...
122 |Error loading data: {error}
130 |73 | We fine-tuned a 14B Qwen model to specialize in the task of 74 | extracting structured summaries from scientific papers. We 75 | carefully benchmarked this model across a variety of closed source 76 | models. 77 |
78 |79 | We evaluated the model's performance on 1,000 samples withheld 80 | from the training set using an LLM-as-a-Judge methodology, on a 81 | qualitative 5-point rubric. 82 |
83 |91 | The fine-tuned model extracts structured summaries from papers 92 | following this TypeScript schema: 93 |
94 |107 | Paper embeddings were generated using{" "} 108 | 118 | SPECTER2 119 | 120 | , a transformer model from AllenAI specifically designed for 121 | scientific documents. The model processes each paper's title, 122 | executive summary, and research context to generate 123 | 768-dimensional embeddings optimized for semantic search over 124 | scientific literature. 125 |
126 | Cluster Algorithm 127 |128 | The visualization uses UMAP (Uniform Manifold Approximation and 129 | Projection) to reduce the 768D embeddings to 3D coordinates, 130 | preserving local and global structure. K-Means clustering groups 131 | papers into ~100 clusters based on semantic similarity in the 132 | embedding space. Cluster labels are automatically generated using 133 | TF-IDF analysis of paper fields and key takeaways, identifying the 134 | most distinctive terms for each cluster. 135 |
136 | 137 |