├── .gitignore ├── LICENSE ├── README.md ├── app ├── abs │ └── [id] │ │ └── page.js ├── components │ ├── ArxivInput.tsx │ ├── Layout.jsx │ ├── LoadingState.js │ ├── Metadata.js │ └── PaperView.js ├── favicon.ico ├── globals.css ├── hooks │ ├── usePaperContent.js │ └── usePaperMetadata.js ├── html │ └── [id] │ │ └── page.js ├── layout.tsx ├── lib │ └── arxiv.js ├── page.tsx ├── pdf │ └── [id] │ │ └── page.js └── raw │ ├── abs │ └── [id] │ │ └── route.js │ └── pdf │ └── [id] │ └── route.js ├── eslint.config.mjs ├── examples └── python │ └── lit_review.ipynb ├── next.config.ts ├── package-lock.json ├── package.json ├── postcss.config.mjs ├── public ├── file.svg ├── globe.svg ├── next.svg ├── vercel.svg └── window.svg ├── tailwind.config.ts └── tsconfig.json /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # env files (can opt-in for committing if needed) 34 | .env* 35 | 36 | # vercel 37 | .vercel 38 | 39 | # typescript 40 | *.tsbuildinfo 41 | next-env.d.ts 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Jeremy Pinto 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # arXiv-txt.org 2 | 3 | A simple tool that makes arXiv papers instantly available in LLM-friendly formats by just changing the URL. 4 | 5 | ## How It Works 6 | 7 | 1. Find a paper on arXiv: `https://arxiv.org/abs/2502.10248` 8 | 2. Change the URL to: `https://arxiv-txt.org/abs/2502.10248` 9 | 3. copy an LLM-optimized version 10 | 11 | ## API Usage 12 | 13 | See example use-cases in the [/examples/]() directory. 14 | 15 | ## Development 16 | 17 | This project is built with Next.js and Tailwind CSS. 18 | 19 | ### Prerequisites 20 | 21 | - Node.js (16.x or later) 22 | - npm or yarn 23 | 24 | ### Installation 25 | 26 | ```bash 27 | # Clone the repository 28 | git clone https://github.com/yourusername/arxiv-txt.git 29 | cd arxiv-txt 30 | 31 | # Install dependencies 32 | npm install 33 | ``` 34 | 35 | ### Development Server 36 | 37 | ```bash 38 | npm run dev 39 | ``` 40 | 41 | This runs the app in development mode. Open [http://localhost:3000](http://localhost:3000) to view it in your browser. 42 | 43 | ### Building for Production 44 | 45 | ```bash 46 | npm run build 47 | ``` 48 | 49 | ## Deployment 50 | 51 | The site is configured for easy deployment on Vercel with the included `vercel.json` configuration. 52 | 53 | ```bash 54 | # Install Vercel CLI 55 | npm i -g vercel 56 | 57 | # Deploy 58 | vercel 59 | ``` 60 | 61 | ## Technical Implementation 62 | 63 | - **Frontend**: Next.js with App Router, React, and Tailwind CSS 64 | - **API Proxy**: Next.js API routes handle CORS and rate limiting 65 | - **Caching**: Response caching via Next.js and HTTP headers 66 | - **Deployment**: Vercel with custom configuration 67 | 68 | This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app). 69 | 70 | 71 | ## Getting Started 72 | 73 | First, run the development server: 74 | 75 | ```bash 76 | npm run dev 77 | # or 78 | yarn dev 79 | # or 80 | pnpm dev 81 | # or 82 | bun dev 83 | ``` 84 | 85 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. 86 | 87 | You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file. 88 | 89 | This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel. 90 | 91 | ## Learn More 92 | 93 | To learn more about Next.js, take a look at the following resources: 94 | 95 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. 96 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. 97 | 98 | You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome! 99 | 100 | ## Deploy on Vercel 101 | 102 | The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js. 103 | 104 | Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details. 105 | -------------------------------------------------------------------------------- /app/abs/[id]/page.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { use } from 'react'; 4 | import LoadingState from '@/app/components/LoadingState'; 5 | import Layout from '@/app/components/Layout'; 6 | import { usePaperMetadata } from '@/app/hooks/usePaperMetadata'; 7 | import { usePaperContent } from '@/app/hooks/usePaperContent'; 8 | import PaperView from '@/app/components/PaperView'; 9 | 10 | export default function AbstractPage({ params }) { 11 | const unwrappedParams = use(params); 12 | const { id } = unwrappedParams; 13 | const { paper, loading: paperLoading, error: paperError, plainTextMetadata } = usePaperMetadata(id); 14 | const { paperContent, loading: paperContentLoading, error: paperContentError } = usePaperContent(id); 15 | 16 | if (paperLoading || paperContentLoading) { 17 | return ; 18 | } 19 | 20 | if (paperError && paperContentError) { 21 | return ( 22 |
23 |
24 |

Error

25 |

Failed to fetch paper information and content.

26 |

27 | Please check that you have entered a valid arXiv paper ID. 28 |

29 |
30 |
31 | ); 32 | } 33 | 34 | if (!paper && !paperError) { 35 | return null; 36 | } 37 | 38 | return ( 39 | <> 40 | 48 | 49 | 50 | ); 51 | } 52 | -------------------------------------------------------------------------------- /app/components/ArxivInput.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { useState } from 'react'; 4 | 5 | export default function ArxivInput() { 6 | const [arxivId, setArxivId] = useState(''); 7 | const [result, setResult] = useState(''); 8 | 9 | const handleSubmit = (e: React.FormEvent) => { 10 | e.preventDefault(); 11 | 12 | // Validate input (basic validation) 13 | if (!arxivId.trim()) { 14 | setResult('Please enter an arXiv ID'); 15 | return; 16 | } 17 | 18 | // Extract arXiv ID from different possible formats 19 | let cleanId = arxivId.trim(); 20 | 21 | // Handle URLs like arxiv.org/abs/[id] or arxiv.org/pdf/[id] 22 | if (cleanId.includes('arxiv.org/')) { 23 | // Extract ID from URL 24 | const matches = cleanId.match(/arxiv\.org\/(abs|pdf)\/([^\/\s]+)/); 25 | if (matches && matches[2]) { 26 | cleanId = matches[2]; 27 | } 28 | } 29 | 30 | // Remove version suffix if present (e.g., 1706.03762v1 -> 1706.03762) 31 | cleanId = cleanId.replace(/v\d+$/, ''); 32 | 33 | // Create the URL 34 | const arxivTxtUrl = `https://arxiv-txt.org/abs/${cleanId}`; 35 | 36 | // Open the URL in a new tab 37 | window.open(arxivTxtUrl, '_blank'); 38 | }; 39 | 40 | return ( 41 |
42 |
43 |

How it works

44 |

45 | Replace arxiv.org to 46 | arxiv-txt.org 47 |

48 |
49 |
50 | 53 |
54 | setArxivId(e.target.value)} 59 | placeholder="e.g., 1706.03762 or https://arxiv.org/abs/1706.03762" 60 | className="input input-bordered flex-grow" 61 | /> 62 | 65 |
66 |
67 | 78 |
79 | 80 | {result && ( 81 |
82 |

{result}

83 | {result.startsWith('Generated URL:') && ( 84 | 90 | Open URL 91 | 92 | )} 93 |
94 | )} 95 |
96 |
97 | ); 98 | } -------------------------------------------------------------------------------- /app/components/Layout.jsx: -------------------------------------------------------------------------------- 1 | export default function Layout({ children = null }) { 2 | return ( 3 |
4 | {children} 5 | 37 |
38 | ); 39 | } -------------------------------------------------------------------------------- /app/components/LoadingState.js: -------------------------------------------------------------------------------- 1 | export default function LoadingState() { 2 | return ( 3 |
4 |
5 |

Loading paper information...

6 |
7 | ); 8 | } -------------------------------------------------------------------------------- /app/components/Metadata.js: -------------------------------------------------------------------------------- 1 | export default function Metadata({ paper }) { 2 | if (!paper) return null; 3 | 4 | return ( 5 |
6 |
7 |

Authors

8 |

{paper.authors.join(', ')}

9 |
10 | 11 |
12 |
13 |

Categories

14 |
15 | {paper.categories.map((category, index) => ( 16 | 20 | {category} 21 | 22 | ))} 23 |
24 |
25 | 26 |
27 |

Dates

28 |

29 | Published: {paper.publishedDate} 30 | {paper.updatedDate !== paper.publishedDate && ( 31 | <> 32 |
33 | {/* Last Updated: {paper.updatedDate} */} 34 | 35 | )} 36 |

37 |
38 | 39 | {paper.journalRef && ( 40 |
41 |

Journal Reference

42 |

{paper.journalRef}

43 |
44 | )} 45 | 46 |
47 |

arXiv ID

48 |

{paper.id}

49 |
50 |
51 |
52 | ); 53 | } -------------------------------------------------------------------------------- /app/components/PaperView.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Toaster, toast } from 'react-hot-toast'; 4 | 5 | export default function PaperView({ paper, plainText, paperContent }) { 6 | const copyToClipboard = async (text) => { 7 | try { 8 | await navigator.clipboard.writeText(text); 9 | toast.success('Copied to clipboard!', { 10 | duration: 2000, 11 | style: { 12 | background: '#4ade80', 13 | color: '#fff', 14 | }, 15 | iconTheme: { 16 | primary: '#fff', 17 | secondary: '#10b981', 18 | } 19 | }); 20 | } catch (err) { 21 | console.error('Failed to copy: ', err); 22 | toast.error('Failed to copy text', { 23 | duration: 2000, 24 | }); 25 | } 26 | }; 27 | 28 | const copyAllContent = () => { 29 | const combinedText = `Summary:\n${plainText}\n\nPaper Content:\n${paperContent}`; 30 | copyToClipboard(combinedText); 31 | }; 32 | 33 | return ( 34 | <> 35 | 36 |
37 |
38 |
39 |
40 |

{paper.title}

41 |

42 | arXiv: 48 | {paper.id} 49 | 50 |

51 |
52 | 61 |
62 | 63 |
64 | 65 |
66 | The PDF content is experimental. Please report any issues on GitHub. 67 |
68 | 69 |
70 |
71 |
72 |

Summary

73 | 82 |
83 |
84 |
 85 |                   {plainText}
 86 |                 
87 |
88 |
89 | 90 |
91 |
92 |

PDF Content

93 | 102 |
103 |
104 |
105 |                   {paperContent}
106 |                 
107 |
108 |
109 |
110 |
111 |
112 | 113 | ); 114 | } 115 | -------------------------------------------------------------------------------- /app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerpint/arxiv-txt/e725f3f1eddc48b7ab70c0bb84d58d630667f645/app/favicon.ico -------------------------------------------------------------------------------- /app/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | :root { 6 | /* These will be overridden by daisyUI themes */ 7 | --foreground-rgb: 0, 0, 0; 8 | --background-rgb: 248, 250, 252; 9 | } 10 | 11 | body { 12 | min-height: 100vh; 13 | /* Let daisyUI handle the colors through themes */ 14 | } 15 | 16 | /* Add some fun animations and transitions */ 17 | @layer base { 18 | h1, h2, h3, h4, h5, h6 { 19 | @apply font-semibold; 20 | } 21 | 22 | code { 23 | @apply font-mono py-1 px-2 rounded transition-all duration-300; 24 | } 25 | 26 | pre { 27 | @apply font-mono; 28 | } 29 | 30 | a { 31 | @apply transition-all duration-300; 32 | } 33 | } 34 | 35 | @layer components { 36 | .glow-effect { 37 | @apply transition-all duration-300; 38 | filter: drop-shadow(0 0 8px theme('colors.primary')); 39 | } 40 | 41 | .glow-effect:hover { 42 | filter: drop-shadow(0 0 12px theme('colors.secondary')); 43 | } 44 | 45 | .bounce-hover { 46 | @apply transition-transform duration-200; 47 | } 48 | 49 | .bounce-hover:hover { 50 | transform: translateY(-4px); 51 | } 52 | } 53 | 54 | /* Custom theme tweaks for light/dark mode */ 55 | [data-theme="cyberpunk"] { 56 | background-image: linear-gradient(to bottom right, 57 | hsl(var(--b1)), 58 | hsl(var(--b2)) 59 | ); 60 | } 61 | 62 | [data-theme="synthwave"] { 63 | background-image: linear-gradient(to bottom right, 64 | hsl(var(--b1)), 65 | hsl(var(--p)) 66 | ); 67 | } 68 | 69 | /* Add to your globals.css */ 70 | .mockup-code::before { 71 | display: none !important; 72 | } -------------------------------------------------------------------------------- /app/hooks/usePaperContent.js: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from 'react'; 2 | 3 | export function usePaperContent(id) { 4 | const [loading, setLoading] = useState(true); 5 | const [error, setError] = useState(null); 6 | const [paperContent, setPaperContent] = useState({ text: '', error: null }); 7 | 8 | useEffect(() => { 9 | console.log('0. Starting fetch for ID:', id); 10 | 11 | async function fetchPaper() { 12 | setLoading(true); 13 | setError(null); 14 | 15 | try { 16 | const response = await fetch(`/raw/pdf/${id}`); 17 | 18 | if (!response.ok) { 19 | setPaperContent({ 20 | error: `Failed to fetch paper: ${response.status} ${response.statusText}`, 21 | text: null 22 | }); 23 | setError(`Failed to fetch paper: ${response.status} ${response.statusText}`); 24 | setLoading(false); 25 | return; 26 | } 27 | 28 | const content = await response.text(); 29 | 30 | console.log('Fetched content:', content); 31 | setPaperContent({ text: content, error: null }); 32 | } catch (err) { 33 | console.error('Error loading paper content:', err); 34 | setError(err.message || 'Failed to load paper content'); 35 | setPaperContent({ text: null, error: err.message || 'Failed to load paper content' }); 36 | } finally { 37 | setLoading(false); 38 | } 39 | } 40 | 41 | if (id) { 42 | fetchPaper(); 43 | } 44 | }, [id]); 45 | 46 | return { paperContent, loading, error }; 47 | } -------------------------------------------------------------------------------- /app/hooks/usePaperMetadata.js: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from 'react'; 2 | 3 | export function usePaperMetadata(id) { 4 | const [paper, setPaper] = useState(null); 5 | const [loading, setLoading] = useState(true); 6 | const [error, setError] = useState(null); 7 | const [plainTextMetadata, setPlainTextMetadata] = useState(''); 8 | 9 | useEffect(() => { 10 | 11 | async function fetchPaper() { 12 | setLoading(true); 13 | setError(null); 14 | 15 | 16 | try { 17 | const response = await fetch(`/raw/abs/${id}`); 18 | 19 | if (!response.ok) { 20 | throw new Error(`Failed to fetch paper: ${response.status} ${response.statusText}`); 21 | } 22 | 23 | const content = await response.text(); 24 | 25 | console.log('Fetched content:', content); 26 | setPlainTextMetadata(content); 27 | // Parse the plain text content to extract structured data 28 | const sections = content.split('\n\n'); 29 | const paperData = { 30 | title: sections[0].replace('# Title\n', ''), 31 | authors: sections[1].replace('# Authors\n', '').split(', '), 32 | categories: sections[2].replace('# Categories\n', '').split(', '), 33 | abstract: sections[sections.length - 1].replace('# Abstract\n', ''), 34 | publishedDate: sections[3].replace('# Publication Details\n', '').split('\n')[0].replace('- Published: ', ''), 35 | id: id 36 | }; 37 | 38 | // Extract DOI if present 39 | const doiMatch = content.match(/DOI: (.*)/); 40 | if (doiMatch) { 41 | paperData.doi = doiMatch[1]; 42 | } 43 | 44 | setPaper(paperData); 45 | } catch (err) { 46 | console.error('Error loading paper:', err); 47 | setError(err.message || 'Failed to load paper information'); 48 | } finally { 49 | setLoading(false); 50 | } 51 | } 52 | 53 | if (id) { 54 | fetchPaper(); 55 | } 56 | }, [id]); 57 | 58 | return { paper, loading, error, plainTextMetadata }; 59 | } -------------------------------------------------------------------------------- /app/html/[id]/page.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import AbstractPage from '@/app/abs/[id]/page'; 4 | 5 | /** 6 | * This page is a duplicate of /abs/[id] endpoint. 7 | * The functionality is identical to the /abs/[id] endpoint. 8 | */ 9 | 10 | export default function PDFPage(props) { 11 | return ; 12 | } 13 | -------------------------------------------------------------------------------- /app/layout.tsx: -------------------------------------------------------------------------------- 1 | import { Inter } from 'next/font/google'; 2 | import Link from 'next/link'; 3 | import { ReactNode } from 'react'; 4 | import './globals.css'; 5 | import { Analytics } from '@vercel/analytics/next'; 6 | 7 | const inter = Inter({ subsets: ['latin'] }); 8 | 9 | export const metadata = { 10 | title: 'arXiv-txt.org - LLM-friendly arXiv papers', 11 | description: 'Convert arXiv papers into LLM-friendly formats', 12 | }; 13 | 14 | export default function RootLayout({ children }: { children: ReactNode }) { 15 | return ( 16 | 17 | 18 |
19 | 20 |
21 | {/* Navbar */} 22 |
23 |
24 | 29 |
30 |
31 | arXiv-txt.org 32 | LLM-friendly arXiv papers 33 | 34 | GitHub 35 | 36 | 37 | 38 | 39 |
40 |
41 | {/* Page content */} 42 |
43 | {children} 44 | 45 |
46 |
47 |
48 | 49 |
    50 |
  • Home
  • 51 | {/* Add more menu items here */} 52 |
53 |
54 |
55 | 56 | 57 | ); 58 | } -------------------------------------------------------------------------------- /app/lib/arxiv.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Helper functions for working with the arXiv API 3 | */ 4 | import { htmlToText } from 'html-to-text'; 5 | 6 | // arXiv API base URL 7 | const ARXIV_API_BASE = 'http://export.arxiv.org/api/query'; 8 | 9 | /** 10 | * Normalize different arXiv ID formats to a standard format 11 | * @param {string} id - The arXiv paper ID in various formats 12 | * @returns {string} - Normalized ID 13 | */ 14 | export const normalizePaperId = (id) => { 15 | if (!id) return null; 16 | 17 | // Handle IDs like "2502.10248" 18 | if (/^\d{4}\.\d{5}(v\d+)?$/.test(id)) { 19 | return id; 20 | } 21 | 22 | // Handle IDs like "math/0211159v1" 23 | if (/^[a-z-]+\/\d{7}(v\d+)?$/.test(id)) { 24 | return id; 25 | } 26 | 27 | // Handle IDs that include "abs/" or "pdf/" prefix 28 | const match = id.match(/(?:abs|pdf)\/(.+)/); 29 | if (match) { 30 | return match[1]; 31 | } 32 | 33 | return id; 34 | }; 35 | 36 | /** 37 | * Get the arXiv API URL for a paper 38 | * @param {string} paperId - The normalized arXiv paper ID 39 | * @returns {string} - Full arXiv API URL 40 | */ 41 | export const getArxivApiUrl = (paperId) => { 42 | const normalizedId = normalizePaperId(paperId); 43 | return `${ARXIV_API_BASE}?id_list=${normalizedId}`; 44 | }; 45 | 46 | /** 47 | * Parse XML response from arXiv API 48 | * @param {string} xmlData - XML response text from arXiv API 49 | * @returns {object} - Structured paper metadata 50 | */ 51 | export const parseArxivResponse = (xmlData) => { 52 | // We'll parse the XML in the browser 53 | const parser = new DOMParser(); 54 | const xmlDoc = parser.parseFromString(xmlData, "text/xml"); 55 | 56 | // Check for parsing errors 57 | const parseError = xmlDoc.querySelector('parsererror'); 58 | if (parseError) { 59 | throw new Error('Failed to parse arXiv API response'); 60 | } 61 | 62 | // Extract entry data 63 | const entry = xmlDoc.querySelector('entry'); 64 | if (!entry) { 65 | throw new Error('Paper not found'); 66 | } 67 | 68 | // Helper function to get text content from nodes 69 | const getNodeText = (parent, selector) => { 70 | const node = parent.querySelector(selector); 71 | return node ? node.textContent.trim() : ''; 72 | }; 73 | 74 | // Extract categories 75 | const getCategories = (entry) => { 76 | const primaryCategory = entry.querySelector('arxiv\\:primary_category')?.getAttribute('term'); 77 | const categories = Array.from(entry.querySelectorAll('category')) 78 | .map(cat => cat.getAttribute('term')); 79 | 80 | // Ensure primary category is included and listed first 81 | const uniqueCategories = [primaryCategory, ...categories] 82 | .filter((cat, index, self) => cat && self.indexOf(cat) === index); 83 | 84 | return uniqueCategories; 85 | }; 86 | 87 | // Format date to a more readable format 88 | const formatDate = (dateString) => { 89 | if (!dateString) return ''; 90 | 91 | try { 92 | const date = new Date(dateString); 93 | return date.toLocaleDateString('en-US', { 94 | year: 'numeric', 95 | month: 'long', 96 | day: 'numeric' 97 | }); 98 | } catch (e) { 99 | console.error('Error formatting date:', e); 100 | return dateString; 101 | } 102 | }; 103 | 104 | // Extract and structure the data 105 | const normalizedId = normalizePaperId(getNodeText(entry, 'id').split('/').pop()); 106 | 107 | const metadata = { 108 | id: normalizedId, 109 | title: getNodeText(entry, 'title'), 110 | abstract: getNodeText(entry, 'summary'), 111 | authors: Array.from(entry.querySelectorAll('author')).map(author => 112 | getNodeText(author, 'name') 113 | ), 114 | categories: getCategories(entry), 115 | publishedDate: formatDate(getNodeText(entry, 'published')), 116 | updatedDate: formatDate(getNodeText(entry, 'updated')), 117 | pdfLink: entry.querySelector('link[title="pdf"]')?.getAttribute('href') || '', 118 | doi: getNodeText(entry, 'arxiv\\:doi') || null, 119 | journalRef: getNodeText(entry, 'arxiv\\:journal_ref') || null, 120 | }; 121 | 122 | return metadata; 123 | }; 124 | 125 | /** 126 | * Generate a plain text representation of paper metadata 127 | * @param {object} paper - Paper metadata object 128 | * @returns {string} - Plain text representation 129 | */ 130 | export const generatePlainTextFormat = (paper) => { 131 | return `# ${paper.title} 132 | 133 | ## Authors 134 | ${paper.authors.join(', ')} 135 | 136 | ## Categories 137 | ${paper.categories.join(', ')} 138 | 139 | ## Publication Details 140 | - Published: ${paper.publishedDate} 141 | - arXiv ID: ${paper.id} 142 | 143 | ## Abstract 144 | ${paper.abstract} 145 | `; 146 | }; 147 | 148 | 149 | export class ArxivError extends Error { 150 | constructor(message, statusCode) { 151 | super(message); 152 | this.name = 'ArxivError'; 153 | this.statusCode = statusCode; 154 | } 155 | } 156 | 157 | export async function fetchArxivHtml(paperId) { 158 | try { 159 | const response = await fetch(`https://arxiv.org/html/${paperId}`); 160 | 161 | if (!response.ok) { 162 | throw new ArxivError( 163 | `Failed to fetch arXiv paper ${paperId}`, 164 | response.status 165 | ); 166 | } 167 | 168 | return await response.text(); 169 | } catch (error) { 170 | if (error instanceof ArxivError) { 171 | throw error; 172 | } 173 | throw new ArxivError(`Error fetching arXiv paper: ${error.message}`, 500); 174 | } 175 | } 176 | 177 | export function convertHtmlToText(html) { 178 | // First, extract all math nodes and their LaTeX content 179 | const mathRegex = /]*>([\s\S]*?)<\/math>/g; 180 | const latexMap = new Map(); 181 | 182 | html = html.replace(mathRegex, (match, content) => { 183 | // Extract LaTeX annotation 184 | const texMatch = content.match(/([\s\S]*?)<\/annotation>/); 185 | if (texMatch) { 186 | const isDisplay = match.includes('display="block"'); 187 | const tex = texMatch[1].trim(); 188 | const placeholder = `__MATH_${latexMap.size}__`; 189 | latexMap.set(placeholder, isDisplay ? `\n\n$$${tex}$$\n\n` : `$${tex}$`); 190 | return placeholder; 191 | } 192 | // Fallback to alttext 193 | const altMatch = match.match(/alttext="([^"]*)"/); 194 | if (altMatch) { 195 | const placeholder = `__MATH_${latexMap.size}__`; 196 | latexMap.set(placeholder, `$${altMatch[1]}$`); 197 | return placeholder; 198 | } 199 | return match; 200 | }); 201 | 202 | const options = { 203 | wordwrap: false, 204 | preserveNewlines: true, 205 | singleNewLineParagraphs: true, 206 | selectors: [ 207 | { 208 | selector: 'p', 209 | format: 'block', 210 | transform: (content) => `${content}\n\n` 211 | } 212 | ] 213 | }; 214 | 215 | try { 216 | let text = htmlToText(html, options); 217 | 218 | // Replace math placeholders with LaTeX 219 | latexMap.forEach((latex, placeholder) => { 220 | text = text.replace(placeholder, latex); 221 | }); 222 | 223 | return text 224 | .replace(/\n{3,}/g, '\n\n') 225 | .replace(/\s+$/gm, '') 226 | .replace(/([^.])\n\n([^\n])/g, '$1 $2') 227 | .replace(/\s*\[\s*(\d+(?:,\s*\d+)*)\s*\]/g, ' [$1]') 228 | .trim(); 229 | } catch (error) { 230 | throw new ArxivError(`Error converting HTML to text: ${error.message}`, 500); 231 | } 232 | } 233 | 234 | /** 235 | * Fetch BibTeX citation for an arXiv paper 236 | * @param {string} paperId - The normalized arXiv paper ID 237 | * @returns {Promise} - BibTeX citation 238 | */ 239 | export const fetchArxivBibTeX = async (paperId) => { 240 | try { 241 | const normalizedId = normalizePaperId(paperId); 242 | const bibtexUrl = `https://arxiv.org/bibtex/${normalizedId}`; 243 | 244 | const response = await fetch(bibtexUrl); 245 | 246 | if (!response.ok) { 247 | throw new ArxivError( 248 | `Failed to fetch BibTeX for paper ${paperId}`, 249 | response.status 250 | ); 251 | } 252 | 253 | return await response.text(); 254 | } catch (error) { 255 | if (error instanceof ArxivError) { 256 | throw error; 257 | } 258 | throw new ArxivError(`Error fetching BibTeX: ${error.message}`, 500); 259 | } 260 | }; -------------------------------------------------------------------------------- /app/page.tsx: -------------------------------------------------------------------------------- 1 | import { Highlight, themes } from "prism-react-renderer"; 2 | import Layout from '@/app/components/Layout'; 3 | import ArxivInput from '@/app/components/ArxivInput'; 4 | 5 | export default function Home() { 6 | const pythonCode = `import requests 7 | 8 | arxiv_url = "https://arxiv.org/abs/1706.03762" 9 | arxiv_txt_url = arxiv_url.replace("arxiv.org", "arxiv-txt.org/raw/") 10 | summary: str = requests.get(arxiv_txt_url).text 11 | print(summary) 12 | 13 | # Pass this to your favorite agent`; 14 | 15 | const bashCode = `# Save the raw text to a file 16 | curl -o paper.txt https://arxiv-txt.org/raw/abs/1706.03762 17 | 18 | # or pipe directly to CLI apps: 19 | # This example uses the 'llm' library 20 | # https://github.com/simonw/llm 21 | 22 | curl -L https://arxiv-txt.org/raw/abs/1706.03762 | \\ 23 | llm -s "Explain this paper like I'm 5" 24 | `; 25 | 26 | 27 | return ( 28 |
29 |

30 | arXiv-txt.org 31 |

32 |

33 | LLM-friendly arXiv papers | 34 | GitHub 35 | 36 | 37 | 38 | 39 |

40 | 41 | 42 | 43 |
44 |
45 |

46 | API Usage Guide 47 |

48 | 49 |
50 |

51 | arXiv-txt is designed to be API-friendly 52 |

53 | 54 |
55 |
56 |

Fetch a text summary:

57 |
58 | arxiv-txt.org/raw/abs/[id] 59 |
60 |
61 | 62 |
63 |

Fetch the full paper content:

64 |
65 | arxiv-txt.org/raw/pdf/[id] 66 |
67 |
68 |
69 | 70 |
71 |
Try it out:
72 | 77 |
78 |
79 | 80 |
81 | 82 |
83 |

Python

84 |

Checkout this example Jupyter Notebook or use the arxiv-txt API directly

85 | 86 |
87 | 88 | {({ className, style, tokens, getLineProps, getTokenProps }) => ( 89 |
 90 |                     {tokens.map((line, i) => (
 91 |                       
92 | {line.map((token, key) => ( 93 | 94 | ))} 95 |
96 | ))} 97 |
98 | )} 99 |
100 |
101 |
102 | 103 |
104 |

Command Line

105 |
106 | 107 | {({ className, style, tokens, getLineProps, getTokenProps }) => ( 108 |
109 |                     {tokens.map((line, i) => (
110 |                       
111 | {line.map((token, key) => ( 112 | 113 | ))} 114 |
115 | ))} 116 |
117 | )} 118 |
119 |
120 |
121 |
122 |
123 | 124 |
125 | ); 126 | } -------------------------------------------------------------------------------- /app/pdf/[id]/page.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import AbstractPage from '@/app/abs/[id]/page'; 4 | 5 | /** 6 | * This page is a duplicate of /abs/[id] endpoint. 7 | * The functionality is identical to the /abs/[id] endpoint. 8 | */ 9 | 10 | export default function PDFPage(props) { 11 | return ; 12 | } 13 | -------------------------------------------------------------------------------- /app/raw/abs/[id]/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { XMLParser } from 'fast-xml-parser'; 3 | import { getArxivApiUrl, normalizePaperId } from '@/app/lib/arxiv'; 4 | 5 | export async function GET(request, { params }) { 6 | try { 7 | const { id } = params; 8 | 9 | if (!id) { 10 | return new NextResponse('Paper ID is required', { 11 | status: 400, 12 | headers: { 'Content-Type': 'text/plain; charset=utf-8' } 13 | }); 14 | } 15 | 16 | const normalizedId = normalizePaperId(id); 17 | const arxivUrl = getArxivApiUrl(normalizedId); 18 | const apiResponse = await fetch(arxivUrl, { 19 | headers: { 20 | 'Accept': 'application/xml', 21 | 'User-Agent': 'arXiv-txt.org (https://arxiv-txt.org; mailto:contact@arxiv-txt.org)' 22 | }, 23 | }); 24 | 25 | if (!apiResponse.ok) { 26 | return new NextResponse( 27 | `Error fetching paper data: ${apiResponse.status} ${apiResponse.statusText}`, 28 | { 29 | status: apiResponse.status, 30 | headers: { 'Content-Type': 'text/plain; charset=utf-8' } 31 | } 32 | ); 33 | } 34 | 35 | // Get the XML data 36 | const xmlData = await apiResponse.text(); 37 | 38 | const parser = new XMLParser({ 39 | ignoreAttributes: false, 40 | attributeNamePrefix: '@_' 41 | }); 42 | const result = parser.parse(xmlData); 43 | const entry = result.feed.entry; 44 | 45 | // Extract data more reliably 46 | const title = entry.title; 47 | const abstract = entry.summary; 48 | const authors = Array.isArray(entry.author) 49 | ? entry.author.map(a => a.name) 50 | : [entry.author.name]; 51 | const categories = Array.isArray(entry.category) 52 | ? entry.category.map(c => c['@_term']) 53 | : [entry.category['@_term']]; 54 | const published = entry.published; 55 | const arxivId = entry.id.split('/').pop(); 56 | const doi = entry['arxiv:doi']; 57 | const journalRef = entry['arxiv:journal_ref']; 58 | 59 | // Format dates if present 60 | const publishedDate = formatDate(published); 61 | 62 | // Fetch BibTeX from arXiv 63 | const bibtexUrl = `https://arxiv.org/bibtex/${normalizedId}`; 64 | const bibtexResponse = await fetch(bibtexUrl, { 65 | headers: { 66 | 'User-Agent': 'arXiv-txt.org (https://arxiv-txt.org; mailto:contact@arxiv-txt.org)' 67 | }, 68 | }); 69 | 70 | let bibtexContent = ''; 71 | if (bibtexResponse.ok) { 72 | bibtexContent = await bibtexResponse.text(); 73 | } else { 74 | console.error(`Failed to fetch BibTeX: ${bibtexResponse.status} ${bibtexResponse.statusText}`); 75 | // Continue even if BibTeX fetch fails 76 | } 77 | 78 | // Generate plain text format 79 | const plainTextContent = `# Title 80 | ${title} 81 | 82 | # Authors 83 | ${authors.join(', ')} 84 | 85 | # Abstract 86 | ${abstract} 87 | 88 | # Categories 89 | ${categories.join(', ')} 90 | 91 | # Publication Details 92 | - Published: ${publishedDate} 93 | - arXiv ID: ${arxivId} 94 | ${doi ? `- DOI: ${doi}` : ''} 95 | ${journalRef ? `- Journal Reference: ${journalRef}` : ''} 96 | 97 | ${bibtexContent ? `# BibTeX 98 | ${bibtexContent} 99 | ` : ''} 100 | `; 101 | 102 | return new NextResponse(plainTextContent, { 103 | status: 200, 104 | headers: { 105 | 'Content-Type': 'text/plain; charset=utf-8', 106 | 'Cache-Control': 'public, max-age=86400' // Cache for 1 day 107 | } 108 | }); 109 | 110 | } catch (error) { 111 | console.error('Error processing paper data:', error); 112 | return new NextResponse( 113 | `Failed to process paper data: ${error.message}`, 114 | { 115 | status: 500, 116 | headers: { 'Content-Type': 'text/plain; charset=utf-8' } 117 | } 118 | ); 119 | } 120 | } 121 | 122 | function formatDate(dateString) { 123 | if (!dateString) return ''; 124 | try { 125 | const date = new Date(dateString); 126 | return date.toLocaleDateString('en-US', { 127 | year: 'numeric', 128 | month: 'long', 129 | day: 'numeric' 130 | }); 131 | } catch { 132 | return dateString; 133 | } 134 | } 135 | 136 | export const revalidate = 86400; // 24 hours in seconds -------------------------------------------------------------------------------- /app/raw/pdf/[id]/route.js: -------------------------------------------------------------------------------- 1 | // app/raw/pdf/[id]/route.js 2 | import { fetchArxivHtml, convertHtmlToText, ArxivError } from '@/app/lib/arxiv'; 3 | 4 | export async function GET(request, { params }) { 5 | try { 6 | // Extract paper ID from params 7 | const { id } = params; 8 | 9 | if (!id) { 10 | return new Response('Paper ID is required', { status: 400 }); 11 | } 12 | 13 | // Fetch HTML from arXiv 14 | const html = await fetchArxivHtml(id); 15 | 16 | // Convert to plain text 17 | const text = convertHtmlToText(html); 18 | 19 | // Return plain text response 20 | return new Response(text, { 21 | headers: { 22 | 'Content-Type': 'text/plain', 23 | 'Cache-Control': 'public, s-maxage=86400', // Cache for 24 hours 24 | } 25 | }); 26 | 27 | } catch (error) { 28 | console.error(`Error processing paper:`, error); 29 | 30 | if (error instanceof ArxivError) { 31 | return new Response(error.message, { 32 | status: error.statusCode 33 | }); 34 | } 35 | 36 | return new Response('Internal server error', { 37 | status: 500 38 | }); 39 | } 40 | } -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import { dirname } from "path"; 2 | import { fileURLToPath } from "url"; 3 | import { FlatCompat } from "@eslint/eslintrc"; 4 | 5 | const __filename = fileURLToPath(import.meta.url); 6 | const __dirname = dirname(__filename); 7 | 8 | const compat = new FlatCompat({ 9 | baseDirectory: __dirname, 10 | }); 11 | 12 | const eslintConfig = [ 13 | ...compat.extends("next/core-web-vitals", "next/typescript"), 14 | ]; 15 | 16 | export default eslintConfig; 17 | -------------------------------------------------------------------------------- /examples/python/lit_review.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# arxiv-txt.org for literature reviews\n", 8 | "\n", 9 | "In this example, we will use [arxiv-txt.org](https://arxiv-txt.org) to generate a literature review for a given topic.\n", 10 | "First, let's identify a list of relevant papers on a topic we want to summarize.\n", 11 | "Here we will focus on \"Masked Autoencoders\".\n", 12 | "\n", 13 | "Here is a list of papers we will use:" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "paper_list = [\n", 23 | " \"https://arxiv.org/abs/2205.09113\",\n", 24 | " \"https://arxiv.org/abs/2304.00571\",\n", 25 | " \"https://arxiv.org/abs/2211.09120\",\n", 26 | " \"https://arxiv.org/abs/2212.05922\",\n", 27 | " \"https://arxiv.org/abs/2301.06018\",\n", 28 | "]\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "Let's define some helper functions to get the summaries of the papers for the lit review." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "Getting summary for https://arxiv.org/abs/2205.09113:\n", 48 | "Getting summary for https://arxiv.org/abs/2304.00571:\n", 49 | "Getting summary for https://arxiv.org/abs/2211.09120:\n", 50 | "Getting summary for https://arxiv.org/abs/2212.05922:\n", 51 | "Getting summary for https://arxiv.org/abs/2301.06018:\n", 52 | "Summaries:\n", 53 | "---\n", 54 | "\n", 55 | "# Masked Autoencoders As Spatiotemporal Learners\n", 56 | "\n", 57 | "## Authors\n", 58 | "Christoph Feichtenhofer, Haoqi Fan, Yanghao Li, Kaiming He\n", 59 | "\n", 60 | "## Categories\n", 61 | "cs.CV, cs.LG\n", 62 | "\n", 63 | "## Publication Details\n", 64 | "- Published: May 18, 2022\n", 65 | "- arXiv ID: 2205.09113v2\n", 66 | "\n", 67 | "\n", 68 | "\n", 69 | "## Abstract\n", 70 | "This paper studies a conceptually simple extension of Masked Autoencoders\n", 71 | "(MAE) to spatiotemporal representation learning from videos. We randomly mask\n", 72 | "out spacetime patches in videos and learn an autoencoder to reconstruct them in\n", 73 | "pixels. Interestingly, we show that our MAE method can learn strong\n", 74 | "representations with almost no inductive bias on spacetime (only except for\n", 75 | "patch and positional embeddings), and spacetime-agnostic random masking\n", 76 | "performs the best. We observe that the optimal masking ratio is as high as 90%\n", 77 | "(vs. 75% on images), supporting the hypothesis that this ratio is related to\n", 78 | "information redundancy of the data. A high masking ratio leads to a large\n", 79 | "speedup, e.g., > 4x in wall-clock time or even more. We report competitive\n", 80 | "results on several challenging video datasets using vanilla Vision\n", 81 | "Transformers. We observe that MAE can outperform supervised pre-training by\n", 82 | "large margins. We further report encouraging results of training on real-world,\n", 83 | "uncurated Instagram data. Our study suggests that the general framework of\n", 84 | "masked autoencoding (BERT, MAE, etc.) can be a unified methodology for\n", 85 | "representation learning with minimal domain knowledge.\n", 86 | "\n", 87 | "---\n", 88 | "# DropMAE: Masked Autoencoders with Spatial-Attention Dropout for Tracking\n", 89 | " Tasks\n", 90 | "\n", 91 | "## Authors\n", 92 | "Qiangqiang Wu, Tianyu Yang, Ziquan Liu, Baoyuan Wu, Ying Shan, Antoni B. Chan\n", 93 | "\n", 94 | "## Categories\n", 95 | "cs.CV\n", 96 | "\n", 97 | "## Publication Details\n", 98 | "- Published: April 2, 2023\n", 99 | "- arXiv ID: 2304.00571v2\n", 100 | "\n", 101 | "\n", 102 | "\n", 103 | "## Abstract\n", 104 | "In this paper, we study masked autoencoder (MAE) pretraining on videos for\n", 105 | "matching-based downstream tasks, including visual object tracking (VOT) and\n", 106 | "video object segmentation (VOS). A simple extension of MAE is to randomly mask\n", 107 | "out frame patches in videos and reconstruct the frame pixels. However, we find\n", 108 | "that this simple baseline heavily relies on spatial cues while ignoring\n", 109 | "temporal relations for frame reconstruction, thus leading to sub-optimal\n", 110 | "temporal matching representations for VOT and VOS. To alleviate this problem,\n", 111 | "we propose DropMAE, which adaptively performs spatial-attention dropout in the\n", 112 | "frame reconstruction to facilitate temporal correspondence learning in videos.\n", 113 | "We show that our DropMAE is a strong and efficient temporal matching learner,\n", 114 | "which achieves better finetuning results on matching-based tasks than the\n", 115 | "ImageNetbased MAE with 2X faster pre-training speed. Moreover, we also find\n", 116 | "that motion diversity in pre-training videos is more important than scene\n", 117 | "diversity for improving the performance on VOT and VOS. Our pre-trained DropMAE\n", 118 | "model can be directly loaded in existing ViT-based trackers for fine-tuning\n", 119 | "without further modifications. Notably, DropMAE sets new state-of-the-art\n", 120 | "performance on 8 out of 9 highly competitive video tracking and segmentation\n", 121 | "datasets. Our code and pre-trained models are available at\n", 122 | "https://github.com/jimmy-dq/DropMAE.git.\n", 123 | "\n", 124 | "---\n", 125 | "# AdaMAE: Adaptive Masking for Efficient Spatiotemporal Learning with\n", 126 | " Masked Autoencoders\n", 127 | "\n", 128 | "## Authors\n", 129 | "Wele Gedara Chaminda Bandara, Naman Patel, Ali Gholami, Mehdi Nikkhah, Motilal Agrawal, Vishal M. Patel\n", 130 | "\n", 131 | "## Categories\n", 132 | "cs.CV, cs.AI\n", 133 | "\n", 134 | "## Publication Details\n", 135 | "- Published: November 16, 2022\n", 136 | "- arXiv ID: 2211.09120v1\n", 137 | "\n", 138 | "\n", 139 | "\n", 140 | "## Abstract\n", 141 | "Masked Autoencoders (MAEs) learn generalizable representations for image,\n", 142 | "text, audio, video, etc., by reconstructing masked input data from tokens of\n", 143 | "the visible data. Current MAE approaches for videos rely on random patch, tube,\n", 144 | "or frame-based masking strategies to select these tokens. This paper proposes\n", 145 | "AdaMAE, an adaptive masking strategy for MAEs that is end-to-end trainable. Our\n", 146 | "adaptive masking strategy samples visible tokens based on the semantic context\n", 147 | "using an auxiliary sampling network. This network estimates a categorical\n", 148 | "distribution over spacetime-patch tokens. The tokens that increase the expected\n", 149 | "reconstruction error are rewarded and selected as visible tokens, motivated by\n", 150 | "the policy gradient algorithm in reinforcement learning. We show that AdaMAE\n", 151 | "samples more tokens from the high spatiotemporal information regions, thereby\n", 152 | "allowing us to mask 95% of tokens, resulting in lower memory requirements and\n", 153 | "faster pre-training. We conduct ablation studies on the Something-Something v2\n", 154 | "(SSv2) dataset to demonstrate the efficacy of our adaptive sampling approach\n", 155 | "and report state-of-the-art results of 70.0% and 81.7% in top-1 accuracy on\n", 156 | "SSv2 and Kinetics-400 action classification datasets with a ViT-Base backbone\n", 157 | "and 800 pre-training epochs.\n", 158 | "\n", 159 | "---\n", 160 | "# Audiovisual Masked Autoencoders\n", 161 | "\n", 162 | "## Authors\n", 163 | "Mariana-Iuliana Georgescu, Eduardo Fonseca, Radu Tudor Ionescu, Mario Lucic, Cordelia Schmid, Anurag Arnab\n", 164 | "\n", 165 | "## Categories\n", 166 | "cs.CV, cs.SD\n", 167 | "\n", 168 | "## Publication Details\n", 169 | "- Published: December 9, 2022\n", 170 | "- arXiv ID: 2212.05922v3\n", 171 | "\n", 172 | "\n", 173 | "\n", 174 | "## Abstract\n", 175 | "Can we leverage the audiovisual information already present in video to\n", 176 | "improve self-supervised representation learning? To answer this question, we\n", 177 | "study various pretraining architectures and objectives within the masked\n", 178 | "autoencoding framework, motivated by the success of similar methods in natural\n", 179 | "language and image understanding. We show that we can achieve significant\n", 180 | "improvements on audiovisual downstream classification tasks, surpassing the\n", 181 | "state-of-the-art on VGGSound and AudioSet. Furthermore, we can leverage our\n", 182 | "audiovisual pretraining scheme for multiple unimodal downstream tasks using a\n", 183 | "single audiovisual pretrained model. We additionally demonstrate the\n", 184 | "transferability of our representations, achieving state-of-the-art audiovisual\n", 185 | "results on Epic Kitchens without pretraining specifically for this dataset.\n", 186 | "\n", 187 | "---\n", 188 | "# CMAE-V: Contrastive Masked Autoencoders for Video Action Recognition\n", 189 | "\n", 190 | "## Authors\n", 191 | "Cheng-Ze Lu, Xiaojie Jin, Zhicheng Huang, Qibin Hou, Ming-Ming Cheng, Jiashi Feng\n", 192 | "\n", 193 | "## Categories\n", 194 | "cs.CV\n", 195 | "\n", 196 | "## Publication Details\n", 197 | "- Published: January 15, 2023\n", 198 | "- arXiv ID: 2301.06018v1\n", 199 | "\n", 200 | "\n", 201 | "\n", 202 | "## Abstract\n", 203 | "Contrastive Masked Autoencoder (CMAE), as a new self-supervised framework,\n", 204 | "has shown its potential of learning expressive feature representations in\n", 205 | "visual image recognition. This work shows that CMAE also trivially generalizes\n", 206 | "well on video action recognition without modifying the architecture and the\n", 207 | "loss criterion. By directly replacing the original pixel shift with the\n", 208 | "temporal shift, our CMAE for visual action recognition, CMAE-V for short, can\n", 209 | "generate stronger feature representations than its counterpart based on pure\n", 210 | "masked autoencoders. Notably, CMAE-V, with a hybrid architecture, can achieve\n", 211 | "82.2% and 71.6% top-1 accuracy on the Kinetics-400 and Something-something V2\n", 212 | "datasets, respectively. We hope this report could provide some informative\n", 213 | "inspiration for future works.\n", 214 | "\n" 215 | ] 216 | } 217 | ], 218 | "source": [ 219 | "import requests\n", 220 | "\n", 221 | "def get_paper_summary(arxiv_url) -> str:\n", 222 | " \"\"\"\n", 223 | " Get the summary of a paper from arxiv-txt.org\n", 224 | " \"\"\"\n", 225 | " assert arxiv_url.startswith(\"https://arxiv.org/\"), f\"Invalid arxiv url: {arxiv_url}, must start with https://arxiv.org/\"\n", 226 | " arxiv_txt_url = arxiv_url.replace(\"arxiv.org/\", \"arxiv-txt.org/raw/\")\n", 227 | " response = requests.get(arxiv_txt_url)\n", 228 | " return response.text\n", 229 | "\n", 230 | "\n", 231 | "def get_summaries(paper_list) -> str:\n", 232 | " \"\"\"\n", 233 | " Get the summaries of a list of papers\n", 234 | " \"\"\"\n", 235 | " summary_list = []\n", 236 | " for paper in paper_list:\n", 237 | " print(f\"Getting summary for {paper}:\")\n", 238 | " summary = get_paper_summary(paper)\n", 239 | " summary_list.append(summary)\n", 240 | " return \"\\n---\\n\".join(summary_list)\n", 241 | "\n", 242 | "paper_summaries = get_summaries(paper_list)\n", 243 | "print(\"Summaries:\\n---\\n\")\n", 244 | "print(paper_summaries)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "Now let's get the lit review from the LLM." 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 3, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "# Install litellm for LLM calls\n", 261 | "!pip -q install litellm\n", 262 | "\n", 263 | "import os\n", 264 | "import litellm\n", 265 | "\n", 266 | "def get_completion(messages, model: str = \"gpt-4o-mini\") -> str:\n", 267 | "\n", 268 | " response = litellm.completion(model=model, messages=messages)\n", 269 | " return response.choices[0].message.content\n", 270 | "\n", 271 | "\n", 272 | "model = \"gpt-4o-mini\" # Replace with any litellm supported model, make sure to set OPENAI_API_KEY\n", 273 | "\n", 274 | "system_prompt = f\"\"\"\n", 275 | "You are a helpful assistant that reviews papers.\n", 276 | "You are given a list of papers and their abstracts.\n", 277 | "Your goal is to review the papers for a research paper on the given topic.\n", 278 | "\n", 279 | "\"\"\"\n", 280 | "\n", 281 | "user_prompt = f\"\"\"This will be a paragraph in a scientific paper.\n", 282 | "Explain what Masked Autoencoders are, why they are important, and the different innovations listed in the follow-up papers.\n", 283 | "Cite the papers and their different contributions to the field of Masked Autoencoders.\n", 284 | "\n", 285 | "Here are the relevant papers and their abstracts:\n", 286 | "{paper_summaries}\n", 287 | "\"\"\"\n", 288 | "\n", 289 | "\n", 290 | "messages = [\n", 291 | " {\"role\": \"system\", \"content\": system_prompt},\n", 292 | " {\"role\": \"user\", \"content\": user_prompt},\n", 293 | "]\n" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 4, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "name": "stdout", 303 | "output_type": "stream", 304 | "text": [ 305 | "Masked Autoencoders (MAEs) are a class of self-supervised learning models that have gained prominence for their ability to learn robust representations by reconstructing masked portions of input data. Initially popularized in the context of images, these models have been extended to spatiotemporal domains, particularly for video data. The significance of MAEs lies in their capacity to efficiently utilize unlabeled data, enabling the extraction of meaningful patterns without costly annotation efforts. These approaches have shown remarkable performance in various computer vision tasks, establishing a framework for learning that parallels successful techniques in natural language processing, such as BERT.\n", 306 | "\n", 307 | "Recent innovations in the field of MAEs have built upon this foundational architecture to address specific limitations and enhance representation learning. For instance, **Feichtenhofer et al. (2022)** introduced a spatiotemporal extension of MAEs, demonstrating that high masking ratios (up to 90%) lead to significant improvements in representation quality and computational efficiency for video datasets, outperforming traditional supervised methods (Feichtenhofer et al., 2022). Subsequently, **Wu et al. (2023)** proposed DropMAE, which incorporates spatial-attention dropout to improve temporal matching capabilities, thereby setting new state-of-the-art results in visual object tracking and segmentation tasks (Wu et al., 2023). \n", 308 | "\n", 309 | "**Bandara et al. (2022)** introduced AdaMAE, which presented an end-to-end adaptive masking strategy that samples tokens based on their semantic relevance, achieving superior performance while efficiently managing computational resources (Bandara et al., 2022). Meanwhile, **Georgescu et al. (2022)** explored the integration of audiovisual information with MAE architectures, showing enhanced performance on audiovisual classification tasks, thus exemplifying the transferability of learned representations across modalities (Georgescu et al., 2022). Lastly, **Lu et al. (2023)** developed the Contrastive Masked Autoencoder for Video Action Recognition (CMAE-V), which enhances representation strength through a hybrid architecture, achieving state-of-the-art results on benchmark action recognition datasets (Lu et al., 2023). \n", 310 | "\n", 311 | "These advancements illuminate the versatility and robustness of MAE frameworks, pushing the boundaries of self-supervised learning and establishing new benchmarks in representation learning for video and audiovisual data.\n" 312 | ] 313 | } 314 | ], 315 | "source": [ 316 | "lit_review = get_completion(messages)\n", 317 | "print(lit_review)" 318 | ] 319 | } 320 | ], 321 | "metadata": { 322 | "kernelspec": { 323 | "display_name": "py11", 324 | "language": "python", 325 | "name": "python3" 326 | }, 327 | "language_info": { 328 | "codemirror_mode": { 329 | "name": "ipython", 330 | "version": 3 331 | }, 332 | "file_extension": ".py", 333 | "mimetype": "text/x-python", 334 | "name": "python", 335 | "nbconvert_exporter": "python", 336 | "pygments_lexer": "ipython3", 337 | "version": "3.11.9" 338 | } 339 | }, 340 | "nbformat": 4, 341 | "nbformat_minor": 2 342 | } 343 | -------------------------------------------------------------------------------- /next.config.ts: -------------------------------------------------------------------------------- 1 | import type { NextConfig } from "next"; 2 | 3 | const nextConfig: NextConfig = { 4 | async redirects() { 5 | return []; 6 | }, 7 | }; 8 | 9 | export default nextConfig; -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "arxiv-txt", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev --turbopack", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@vercel/analytics": "^1.5.0", 13 | "daisyui": "^4.12.23", 14 | "fast-xml-parser": "^5.0.6", 15 | "html-to-text": "^9.0.5", 16 | "next": "15.1.7", 17 | "prism-react-renderer": "^2.4.1", 18 | "react": "^19.0.0", 19 | "react-dom": "^19.0.0", 20 | "react-hot-toast": "^2.5.2" 21 | }, 22 | "devDependencies": { 23 | "@eslint/eslintrc": "^3", 24 | "@types/node": "^20", 25 | "@types/react": "^19", 26 | "@types/react-dom": "^19", 27 | "eslint": "^9.20.1", 28 | "eslint-config-next": "^15.1.7", 29 | "postcss": "^8", 30 | "tailwindcss": "^3.4.1", 31 | "typescript": "^5" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /postcss.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('postcss-load-config').Config} */ 2 | const config = { 3 | plugins: { 4 | tailwindcss: {}, 5 | }, 6 | }; 7 | 8 | export default config; 9 | -------------------------------------------------------------------------------- /public/file.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/globe.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/vercel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/window.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tailwind.config.ts: -------------------------------------------------------------------------------- 1 | import type { Config } from "tailwindcss"; 2 | import daisyui from "daisyui"; 3 | 4 | const config: Config = { 5 | content: [ 6 | "./pages/**/*.{js,ts,jsx,tsx,mdx}", 7 | "./components/**/*.{js,ts,jsx,tsx,mdx}", 8 | "./app/**/*.{js,ts,jsx,tsx,mdx}", 9 | ], 10 | theme: { 11 | extend: { 12 | colors: { 13 | background: "var(--background)", 14 | foreground: "var(--foreground)", 15 | }, 16 | }, 17 | }, 18 | plugins: [daisyui], 19 | daisyui: { 20 | themes: ["retro", "dark"], 21 | // themes: ["retro", "dark"], 22 | // themes: ["lemonade", "dark"], 23 | }, 24 | } 25 | 26 | export default config; -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2017", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./*"] 23 | } 24 | }, 25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 26 | "exclude": ["node_modules"] 27 | } 28 | --------------------------------------------------------------------------------