├── .gitignore
├── LICENSE
├── README.md
├── app
    ├── abs
    │   └── [id]
    │   │   └── page.js
    ├── components
    │   ├── ArxivInput.tsx
    │   ├── Layout.jsx
    │   ├── LoadingState.js
    │   ├── Metadata.js
    │   └── PaperView.js
    ├── favicon.ico
    ├── globals.css
    ├── hooks
    │   ├── usePaperContent.js
    │   └── usePaperMetadata.js
    ├── html
    │   └── [id]
    │   │   └── page.js
    ├── layout.tsx
    ├── lib
    │   └── arxiv.js
    ├── page.tsx
    ├── pdf
    │   └── [id]
    │   │   └── page.js
    └── raw
    │   ├── abs
    │       └── [id]
    │       │   └── route.js
    │   └── pdf
    │       └── [id]
    │           └── route.js
├── eslint.config.mjs
├── examples
    └── python
    │   └── lit_review.ipynb
├── next.config.ts
├── package-lock.json
├── package.json
├── postcss.config.mjs
├── public
    ├── file.svg
    ├── globe.svg
    ├── next.svg
    ├── vercel.svg
    └── window.svg
├── tailwind.config.ts
└── tsconfig.json


/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.*
 7 | .yarn/*
 8 | !.yarn/patches
 9 | !.yarn/plugins
10 | !.yarn/releases
11 | !.yarn/versions
12 | 
13 | # testing
14 | /coverage
15 | 
16 | # next.js
17 | /.next/
18 | /out/
19 | 
20 | # production
21 | /build
22 | 
23 | # misc
24 | .DS_Store
25 | *.pem
26 | 
27 | # debug
28 | npm-debug.log*
29 | yarn-debug.log*
30 | yarn-error.log*
31 | .pnpm-debug.log*
32 | 
33 | # env files (can opt-in for committing if needed)
34 | .env*
35 | 
36 | # vercel
37 | .vercel
38 | 
39 | # typescript
40 | *.tsbuildinfo
41 | next-env.d.ts
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Jeremy Pinto
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # arXiv-txt.org
  2 | 
  3 | A simple tool that makes arXiv papers instantly available in LLM-friendly formats by just changing the URL.
  4 | 
  5 | ## How It Works
  6 | 
  7 | 1. Find a paper on arXiv: `https://arxiv.org/abs/2502.10248`
  8 | 2. Change the URL to: `https://arxiv-txt.org/abs/2502.10248`
  9 | 3. copy an LLM-optimized version
 10 | 
 11 | ## API Usage
 12 | 
 13 | See example use-cases in the [/examples/]() directory.
 14 | 
 15 | ## Development
 16 | 
 17 | This project is built with Next.js and Tailwind CSS.
 18 | 
 19 | ### Prerequisites
 20 | 
 21 | - Node.js (16.x or later)
 22 | - npm or yarn
 23 | 
 24 | ### Installation
 25 | 
 26 | ```bash
 27 | # Clone the repository
 28 | git clone https://github.com/yourusername/arxiv-txt.git
 29 | cd arxiv-txt
 30 | 
 31 | # Install dependencies
 32 | npm install
 33 | ```
 34 | 
 35 | ### Development Server
 36 | 
 37 | ```bash
 38 | npm run dev
 39 | ```
 40 | 
 41 | This runs the app in development mode. Open [http://localhost:3000](http://localhost:3000) to view it in your browser.
 42 | 
 43 | ### Building for Production
 44 | 
 45 | ```bash
 46 | npm run build
 47 | ```
 48 | 
 49 | ## Deployment
 50 | 
 51 | The site is configured for easy deployment on Vercel with the included `vercel.json` configuration.
 52 | 
 53 | ```bash
 54 | # Install Vercel CLI
 55 | npm i -g vercel
 56 | 
 57 | # Deploy
 58 | vercel
 59 | ```
 60 | 
 61 | ## Technical Implementation
 62 | 
 63 | - **Frontend**: Next.js with App Router, React, and Tailwind CSS
 64 | - **API Proxy**: Next.js API routes handle CORS and rate limiting
 65 | - **Caching**: Response caching via Next.js and HTTP headers
 66 | - **Deployment**: Vercel with custom configuration
 67 | 
 68 | This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
 69 | 
 70 | 
 71 | ## Getting Started
 72 | 
 73 | First, run the development server:
 74 | 
 75 | ```bash
 76 | npm run dev
 77 | # or
 78 | yarn dev
 79 | # or
 80 | pnpm dev
 81 | # or
 82 | bun dev
 83 | ```
 84 | 
 85 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
 86 | 
 87 | You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
 88 | 
 89 | This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
 90 | 
 91 | ## Learn More
 92 | 
 93 | To learn more about Next.js, take a look at the following resources:
 94 | 
 95 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
 96 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
 97 | 
 98 | You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
 99 | 
100 | ## Deploy on Vercel
101 | 
102 | The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
103 | 
104 | Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
105 | 


--------------------------------------------------------------------------------
/app/abs/[id]/page.js:
--------------------------------------------------------------------------------
 1 | 'use client';
 2 | 
 3 | import { use } from 'react';
 4 | import LoadingState from '@/app/components/LoadingState';
 5 | import Layout from '@/app/components/Layout';
 6 | import { usePaperMetadata } from '@/app/hooks/usePaperMetadata';
 7 | import { usePaperContent } from '@/app/hooks/usePaperContent';
 8 | import PaperView from '@/app/components/PaperView';
 9 | 
10 | export default function AbstractPage({ params }) {
11 |   const unwrappedParams = use(params);
12 |   const { id } = unwrappedParams;
13 |   const { paper, loading: paperLoading, error: paperError, plainTextMetadata } = usePaperMetadata(id);
14 |   const { paperContent, loading: paperContentLoading, error: paperContentError } = usePaperContent(id);
15 | 
16 |   if (paperLoading || paperContentLoading) {
17 |     return <LoadingState />;
18 |   }
19 | 
20 |   if (paperError && paperContentError) {
21 |     return (
22 |       <div className="card bg-base-100 shadow-xl">
23 |         <div className="card-body text-center">
24 |           <h2 className="card-title text-error justify-center">Error</h2>
25 |           <p>Failed to fetch paper information and content.</p>
26 |           <p className="text-base-content/70">
27 |             Please check that you have entered a valid arXiv paper ID.
28 |           </p>
29 |         </div>
30 |       </div>
31 |     );
32 |   }
33 | 
34 |   if (!paper && !paperError) {
35 |     return null;
36 |   }
37 | 
38 |   return (
39 |     <>
40 |       <PaperView
41 |         paper={paper || { title: "Error fetching paper metadata", error: paperError }}
42 |         plainText={plainTextMetadata || ""}
43 |         paperContent={paperContent?.text || "Error fetching the paper content."}
44 |         paperContentError={paperContent?.error}
45 |         buttonText="Abstract"
46 |         type="abs"
47 |       />
48 |       <Layout />
49 |     </>
50 |   );
51 | }
52 | 


--------------------------------------------------------------------------------
/app/components/ArxivInput.tsx:
--------------------------------------------------------------------------------
 1 | "use client";
 2 | 
 3 | import { useState } from 'react';
 4 | 
 5 | export default function ArxivInput() {
 6 |   const [arxivId, setArxivId] = useState('');
 7 |   const [result, setResult] = useState('');
 8 | 
 9 |   const handleSubmit = (e: React.FormEvent) => {
10 |     e.preventDefault();
11 | 
12 |     // Validate input (basic validation)
13 |     if (!arxivId.trim()) {
14 |       setResult('Please enter an arXiv ID');
15 |       return;
16 |     }
17 | 
18 |     // Extract arXiv ID from different possible formats
19 |     let cleanId = arxivId.trim();
20 | 
21 |     // Handle URLs like arxiv.org/abs/[id] or arxiv.org/pdf/[id]
22 |     if (cleanId.includes('arxiv.org/')) {
23 |       // Extract ID from URL
24 |       const matches = cleanId.match(/arxiv\.org\/(abs|pdf)\/([^\/\s]+)/);
25 |       if (matches && matches[2]) {
26 |         cleanId = matches[2];
27 |       }
28 |     }
29 | 
30 |     // Remove version suffix if present (e.g., 1706.03762v1 -> 1706.03762)
31 |     cleanId = cleanId.replace(/v\d+$/, '');
32 | 
33 |     // Create the URL
34 |     const arxivTxtUrl = `https://arxiv-txt.org/abs/${cleanId}`;
35 | 
36 |     // Open the URL in a new tab
37 |     window.open(arxivTxtUrl, '_blank');
38 |   };
39 | 
40 |   return (
41 |     <div className="card bg-base-100 w-full max-w-2xl shadow-xl mb-6">
42 |       <div className="card-body py-6">
43 |         <h2 className="card-title mb-2">How it works</h2>
44 |         <p className="mb-4">
45 |         Replace <code className="badge badge-ghost">arxiv.org</code> to
46 |         <code className="badge badge-ghost ml-2">arxiv-txt.org</code>
47 |         </p>
48 |         <form onSubmit={handleSubmit} className="space-y-4">
49 |           <div>
50 |             <label htmlFor="arxiv-id" className="block text-sm font-medium mb-1">
51 |               Enter arXiv ID or URL:
52 |             </label>
53 |             <div className="flex gap-2">
54 |               <input
55 |                 id="arxiv-id"
56 |                 type="text"
57 |                 value={arxivId}
58 |                 onChange={(e) => setArxivId(e.target.value)}
59 |                 placeholder="e.g., 1706.03762 or https://arxiv.org/abs/1706.03762"
60 |                 className="input input-bordered flex-grow"
61 |               />
62 |               <button type="submit" className="btn btn-primary">
63 |                 Open
64 |               </button>
65 |             </div>
66 |           </div>
67 |           <div className="text-sm mt-1">
68 |             Example:&nbsp;
69 |             <a
70 |               href="/abs/1706.03762"
71 |               target="_blank"
72 |               rel="noopener noreferrer"
73 |               className="link link-primary"
74 |             >
75 |               https://arxiv-txt.org/abs/1706.03762
76 |             </a>
77 |           </div>
78 |         </form>
79 | 
80 |         {result && (
81 |           <div className="mt-4 p-4 bg-base-200 rounded-lg">
82 |             <p>{result}</p>
83 |             {result.startsWith('Generated URL:') && (
84 |               <a
85 |                 href={result.split(': ')[1]}
86 |                 target="_blank"
87 |                 rel="noopener noreferrer"
88 |                 className="btn btn-sm btn-outline mt-2"
89 |               >
90 |                 Open URL
91 |               </a>
92 |             )}
93 |           </div>
94 |         )}
95 |       </div>
96 |     </div>
97 |   );
98 | }


--------------------------------------------------------------------------------
/app/components/Layout.jsx:
--------------------------------------------------------------------------------
 1 | export default function Layout({ children = null }) {
 2 |   return (
 3 |     <div>
 4 |       {children}
 5 |       <footer className="text-sm opacity-75">
 6 |         <p className="flex items-center justify-center gap-1">
 7 |           Made with <span className="text-error">❤</span> by{" "}
 8 |           <a
 9 |             href="https://twitter.com/jerpint"
10 |             className="link link-primary"
11 |             target="_blank"
12 |             rel="noopener noreferrer"
13 |           >
14 |             @jerpint
15 |           </a>
16 |         </p>
17 |         <div className="mt-2 flex items-center justify-center gap-4">
18 |           <a
19 |             href="https://github.com/jerpint/arxiv-txt"
20 |             className="link link-primary"
21 |             target="_blank"
22 |             rel="noopener noreferrer"
23 |           >
24 |             GitHub
25 |           </a>
26 |           <span>•</span>
27 |           <a
28 |             href="https://jerpint.io"
29 |             className="link link-primary"
30 |             target="_blank"
31 |             rel="noopener noreferrer"
32 |           >
33 |             Blog
34 |           </a>
35 |         </div>
36 |       </footer>
37 |     </div>
38 |   );
39 | }


--------------------------------------------------------------------------------
/app/components/LoadingState.js:
--------------------------------------------------------------------------------
1 | export default function LoadingState() {
2 |   return (
3 |     <div className="flex flex-col items-center justify-center py-16">
4 |       <div className="w-12 h-12 border-4 border-blue-200 border-t-blue-600 rounded-full animate-spin mb-4"></div>
5 |       <p className="text-gray-600">Loading paper information...</p>
6 |     </div>
7 |   );
8 | }


--------------------------------------------------------------------------------
/app/components/Metadata.js:
--------------------------------------------------------------------------------
 1 | export default function Metadata({ paper }) {
 2 |   if (!paper) return null;
 3 | 
 4 |   return (
 5 |     <section className="mb-8">
 6 |       <div className="mb-6">
 7 |         <h2 className="text-lg font-semibold mb-2">Authors</h2>
 8 |         <p className="text-gray-800">{paper.authors.join(', ')}</p>
 9 |       </div>
10 | 
11 |       <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
12 |         <div>
13 |           <h3 className="text-sm font-medium text-gray-500 mb-2">Categories</h3>
14 |           <div className="flex flex-wrap gap-2">
15 |             {paper.categories.map((category, index) => (
16 |               <span
17 |                 key={index}
18 |                 className="inline-block bg-blue-50 text-blue-700 px-2 py-1 rounded-md text-sm"
19 |               >
20 |                 {category}
21 |               </span>
22 |             ))}
23 |           </div>
24 |         </div>
25 | 
26 |         <div>
27 |           <h3 className="text-sm font-medium text-gray-500 mb-2">Dates</h3>
28 |           <p className="text-gray-800">
29 |             <span className="font-medium">Published:</span> {paper.publishedDate}
30 |             {paper.updatedDate !== paper.publishedDate && (
31 |               <>
32 |                 <br />
33 |                 {/* <span className="font-medium">Last Updated:</span> {paper.updatedDate} */}
34 |               </>
35 |             )}
36 |           </p>
37 |         </div>
38 | 
39 |         {paper.journalRef && (
40 |           <div>
41 |             <h3 className="text-sm font-medium text-gray-500 mb-2">Journal Reference</h3>
42 |             <p className="text-gray-800">{paper.journalRef}</p>
43 |           </div>
44 |         )}
45 | 
46 |         <div>
47 |           <h3 className="text-sm font-medium text-gray-500 mb-2">arXiv ID</h3>
48 |           <p className="text-gray-800 font-mono">{paper.id}</p>
49 |         </div>
50 |       </div>
51 |     </section>
52 |   );
53 | }


--------------------------------------------------------------------------------
/app/components/PaperView.js:
--------------------------------------------------------------------------------
  1 | 'use client';
  2 | 
  3 | import { Toaster, toast } from 'react-hot-toast';
  4 | 
  5 | export default function PaperView({ paper, plainText, paperContent }) {
  6 |   const copyToClipboard = async (text) => {
  7 |     try {
  8 |       await navigator.clipboard.writeText(text);
  9 |       toast.success('Copied to clipboard!', {
 10 |         duration: 2000,
 11 |         style: {
 12 |           background: '#4ade80',
 13 |           color: '#fff',
 14 |         },
 15 |         iconTheme: {
 16 |           primary: '#fff',
 17 |           secondary: '#10b981',
 18 |         }
 19 |       });
 20 |     } catch (err) {
 21 |       console.error('Failed to copy: ', err);
 22 |       toast.error('Failed to copy text', {
 23 |         duration: 2000,
 24 |       });
 25 |     }
 26 |   };
 27 | 
 28 |   const copyAllContent = () => {
 29 |     const combinedText = `Summary:\n${plainText}\n\nPaper Content:\n${paperContent}`;
 30 |     copyToClipboard(combinedText);
 31 |   };
 32 | 
 33 |   return (
 34 |     <>
 35 |       <Toaster position="top-right" />
 36 |       <div className="card bg-base-100 shadow-xl">
 37 |         <div className="card-body">
 38 |           <div className="flex justify-between items-start">
 39 |             <div className="flex flex-col gap-2">
 40 |               <h1 className="card-title text-2xl">{paper.title}</h1>
 41 |               <p className="text-sm text-gray-500">
 42 |                 arXiv: <a
 43 |                   href={`https://arxiv.org/abs/${paper.id}`}
 44 |                   target="_blank"
 45 |                   rel="noopener noreferrer"
 46 |                   className="hover:underline text-blue-500"
 47 |                 >
 48 |                   {paper.id}
 49 |                 </a>
 50 |               </p>
 51 |             </div>
 52 |             <button
 53 |               onClick={copyAllContent}
 54 |               className="btn btn-primary btn-sm gap-2"
 55 |             >
 56 |               <svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
 57 |                 <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M8 5H6a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2v-1M8 5a2 2 0 002 2h2a2 2 0 002-2M8 5a2 2 0 012-2h2a2 2 0 012 2m0 0h2a2 2 0 012 2v3m2 4H10m0 0l3-3m-3 3l3 3" />
 58 |               </svg>
 59 |               Copy All
 60 |             </button>
 61 |           </div>
 62 | 
 63 |           <div className="divider"></div>
 64 | 
 65 |           <div className="text-sm border-l-4 border-base-content/20 pl-4 py-1 mb-4 text-base-content/80">
 66 |             The PDF content is experimental. Please report any issues on <a href="https://github.com/jerpint/arxiv-txt/issues" className="underline">GitHub</a>.
 67 |           </div>
 68 | 
 69 |           <div className="flex flex-col md:flex-row gap-4">
 70 |             <div className="flex-1">
 71 |               <div className="flex justify-between items-center mb-2">
 72 |                 <h3 className="text-lg font-medium">Summary</h3>
 73 |                 <button
 74 |                   onClick={() => copyToClipboard(plainText)}
 75 |                   className="btn btn-primary btn-sm gap-2"
 76 |                 >
 77 |                   <svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
 78 |                     <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M8 5H6a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2v-1M8 5a2 2 0 002 2h2a2 2 0 002-2M8 5a2 2 0 012-2h2a2 2 0 012 2m0 0h2a2 2 0 012 2v3m2 4H10m0 0l3-3m-3 3l3 3" />
 79 |                   </svg>
 80 |                   Copy
 81 |                 </button>
 82 |               </div>
 83 |               <div className="relative">
 84 |                 <pre className="bg-base-200 p-4 rounded-box overflow-auto text-sm font-mono whitespace-pre-wrap h-[400px] break-all overflow-x-hidden">
 85 |                   {plainText}
 86 |                 </pre>
 87 |               </div>
 88 |             </div>
 89 | 
 90 |             <div className="flex-1">
 91 |               <div className="flex justify-between items-center mb-2">
 92 |                 <h3 className="text-lg font-medium">PDF Content</h3>
 93 |                 <button
 94 |                   onClick={() => copyToClipboard(paperContent)}
 95 |                   className="btn btn-primary btn-sm gap-2"
 96 |                 >
 97 |                   <svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
 98 |                     <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M8 5H6a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2v-1M8 5a2 2 0 002 2h2a2 2 0 002-2M8 5a2 2 0 012-2h2a2 2 0 012 2m0 0h2a2 2 0 012 2v3m2 4H10m0 0l3-3m-3 3l3 3" />
 99 |                   </svg>
100 |                   Copy
101 |                 </button>
102 |               </div>
103 |               <div className="relative h-[400px]">
104 |                 <pre className="bg-base-200 p-4 rounded-box overflow-y-auto text-sm font-mono whitespace-pre-wrap absolute inset-0 break-all overflow-x-hidden">
105 |                   {paperContent}
106 |                 </pre>
107 |               </div>
108 |             </div>
109 |           </div>
110 |         </div>
111 |       </div>
112 |     </>
113 |   );
114 | }
115 | 


--------------------------------------------------------------------------------
/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jerpint/arxiv-txt/e725f3f1eddc48b7ab70c0bb84d58d630667f645/app/favicon.ico


--------------------------------------------------------------------------------
/app/globals.css:
--------------------------------------------------------------------------------
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | :root {
 6 |   /* These will be overridden by daisyUI themes */
 7 |   --foreground-rgb: 0, 0, 0;
 8 |   --background-rgb: 248, 250, 252;
 9 | }
10 | 
11 | body {
12 |   min-height: 100vh;
13 |   /* Let daisyUI handle the colors through themes */
14 | }
15 | 
16 | /* Add some fun animations and transitions */
17 | @layer base {
18 |   h1, h2, h3, h4, h5, h6 {
19 |     @apply font-semibold;
20 |   }
21 | 
22 |   code {
23 |     @apply font-mono py-1 px-2 rounded transition-all duration-300;
24 |   }
25 | 
26 |   pre {
27 |     @apply font-mono;
28 |   }
29 | 
30 |   a {
31 |     @apply transition-all duration-300;
32 |   }
33 | }
34 | 
35 | @layer components {
36 |   .glow-effect {
37 |     @apply transition-all duration-300;
38 |     filter: drop-shadow(0 0 8px theme('colors.primary'));
39 |   }
40 | 
41 |   .glow-effect:hover {
42 |     filter: drop-shadow(0 0 12px theme('colors.secondary'));
43 |   }
44 | 
45 |   .bounce-hover {
46 |     @apply transition-transform duration-200;
47 |   }
48 | 
49 |   .bounce-hover:hover {
50 |     transform: translateY(-4px);
51 |   }
52 | }
53 | 
54 | /* Custom theme tweaks for light/dark mode */
55 | [data-theme="cyberpunk"] {
56 |   background-image: linear-gradient(to bottom right,
57 |     hsl(var(--b1)),
58 |     hsl(var(--b2))
59 |   );
60 | }
61 | 
62 | [data-theme="synthwave"] {
63 |   background-image: linear-gradient(to bottom right,
64 |     hsl(var(--b1)),
65 |     hsl(var(--p))
66 |   );
67 | }
68 | 
69 | /* Add to your globals.css */
70 | .mockup-code::before {
71 |   display: none !important;
72 | }


--------------------------------------------------------------------------------
/app/hooks/usePaperContent.js:
--------------------------------------------------------------------------------
 1 | import { useState, useEffect } from 'react';
 2 | 
 3 | export function usePaperContent(id) {
 4 |   const [loading, setLoading] = useState(true);
 5 |   const [error, setError] = useState(null);
 6 |   const [paperContent, setPaperContent] = useState({ text: '', error: null });
 7 | 
 8 |   useEffect(() => {
 9 |     console.log('0. Starting fetch for ID:', id);
10 | 
11 |     async function fetchPaper() {
12 |       setLoading(true);
13 |       setError(null);
14 | 
15 |       try {
16 |         const response = await fetch(`/raw/pdf/${id}`);
17 | 
18 |         if (!response.ok) {
19 |           setPaperContent({
20 |             error: `Failed to fetch paper: ${response.status} ${response.statusText}`,
21 |             text: null
22 |           });
23 |           setError(`Failed to fetch paper: ${response.status} ${response.statusText}`);
24 |           setLoading(false);
25 |           return;
26 |         }
27 | 
28 |         const content = await response.text();
29 | 
30 |         console.log('Fetched content:', content);
31 |         setPaperContent({ text: content, error: null });
32 |       } catch (err) {
33 |         console.error('Error loading paper content:', err);
34 |         setError(err.message || 'Failed to load paper content');
35 |         setPaperContent({ text: null, error: err.message || 'Failed to load paper content' });
36 |       } finally {
37 |         setLoading(false);
38 |       }
39 |     }
40 | 
41 |     if (id) {
42 |       fetchPaper();
43 |     }
44 |   }, [id]);
45 | 
46 |   return { paperContent, loading, error };
47 | }


--------------------------------------------------------------------------------
/app/hooks/usePaperMetadata.js:
--------------------------------------------------------------------------------
 1 | import { useState, useEffect } from 'react';
 2 | 
 3 | export function usePaperMetadata(id) {
 4 |   const [paper, setPaper] = useState(null);
 5 |   const [loading, setLoading] = useState(true);
 6 |   const [error, setError] = useState(null);
 7 |   const [plainTextMetadata, setPlainTextMetadata] = useState('');
 8 | 
 9 |   useEffect(() => {
10 | 
11 |     async function fetchPaper() {
12 |       setLoading(true);
13 |       setError(null);
14 | 
15 | 
16 |       try {
17 |         const response = await fetch(`/raw/abs/${id}`);
18 | 
19 |         if (!response.ok) {
20 |           throw new Error(`Failed to fetch paper: ${response.status} ${response.statusText}`);
21 |         }
22 | 
23 |         const content = await response.text();
24 | 
25 |         console.log('Fetched content:', content);
26 |         setPlainTextMetadata(content);
27 |         // Parse the plain text content to extract structured data
28 |         const sections = content.split('\n\n');
29 |         const paperData = {
30 |           title: sections[0].replace('# Title\n', ''),
31 |           authors: sections[1].replace('# Authors\n', '').split(', '),
32 |           categories: sections[2].replace('# Categories\n', '').split(', '),
33 |           abstract: sections[sections.length - 1].replace('# Abstract\n', ''),
34 |           publishedDate: sections[3].replace('# Publication Details\n', '').split('\n')[0].replace('- Published: ', ''),
35 |           id: id
36 |         };
37 | 
38 |         // Extract DOI if present
39 |         const doiMatch = content.match(/DOI: (.*)/);
40 |         if (doiMatch) {
41 |           paperData.doi = doiMatch[1];
42 |         }
43 | 
44 |         setPaper(paperData);
45 |       } catch (err) {
46 |         console.error('Error loading paper:', err);
47 |         setError(err.message || 'Failed to load paper information');
48 |       } finally {
49 |         setLoading(false);
50 |       }
51 |     }
52 | 
53 |     if (id) {
54 |       fetchPaper();
55 |     }
56 |   }, [id]);
57 | 
58 |   return { paper, loading, error, plainTextMetadata };
59 | }


--------------------------------------------------------------------------------
/app/html/[id]/page.js:
--------------------------------------------------------------------------------
 1 | 'use client';
 2 | 
 3 | import AbstractPage from '@/app/abs/[id]/page';
 4 | 
 5 | /**
 6 |  * This page is a duplicate of /abs/[id] endpoint.
 7 |  * The functionality is identical to the /abs/[id] endpoint.
 8 |  */
 9 | 
10 | export default function PDFPage(props) {
11 |   return <AbstractPage {...props} />;
12 | }
13 | 


--------------------------------------------------------------------------------
/app/layout.tsx:
--------------------------------------------------------------------------------
 1 | import { Inter } from 'next/font/google';
 2 | import Link from 'next/link';
 3 | import { ReactNode } from 'react';
 4 | import './globals.css';
 5 | import { Analytics } from '@vercel/analytics/next';
 6 | 
 7 | const inter = Inter({ subsets: ['latin'] });
 8 | 
 9 | export const metadata = {
10 |   title: 'arXiv-txt.org - LLM-friendly arXiv papers',
11 |   description: 'Convert arXiv papers into LLM-friendly formats',
12 | };
13 | 
14 | export default function RootLayout({ children }: { children: ReactNode }) {
15 |   return (
16 |     <html lang="en" >
17 |       <body className={inter.className}>
18 |         <div className="drawer">
19 |           <input id="my-drawer-3" type="checkbox" className="drawer-toggle" />
20 |           <div className="drawer-content flex flex-col">
21 |             {/* Navbar */}
22 |             <div className="navbar bg-base-300">
23 |               <div className="flex-none lg:hidden">
24 |                 <label htmlFor="my-drawer-3" className="btn btn-square btn-ghost">
25 |                   <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" className="inline-block w-6 h-6 stroke-current">
26 |                     <path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M4 6h16M4 12h16M4 18h16"></path>
27 |                   </svg>
28 |                 </label>
29 |               </div>
30 |               <div className="flex-1 px-2 mx-2">
31 |                 <Link href="/" className="text-xl font-bold">arXiv-txt.org</Link>
32 |                 <span className="ml-4 text-sm opacity-70">LLM-friendly arXiv papers</span>
33 |                 <a href="https://github.com/jerpint/arxiv-txt" className="ml-4 link inline-flex items-center gap-1" target="_blank" rel="noopener noreferrer">
34 |                   GitHub
35 |                   <svg className="w-3 h-3" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor">
36 |                     <path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" />
37 |                   </svg>
38 |                 </a>
39 |               </div>
40 |             </div>
41 |             {/* Page content */}
42 |             <main className="container mx-auto px-4 py-8 max-w-3xl">
43 |               {children}
44 |               <Analytics />
45 |             </main>
46 |           </div>
47 |           <div className="drawer-side">
48 |             <label htmlFor="my-drawer-3" className="drawer-overlay"></label>
49 |             <ul className="menu p-4 w-80 min-h-full bg-base-200">
50 |               <li><Link href="/">Home</Link></li>
51 |               {/* Add more menu items here */}
52 |             </ul>
53 |           </div>
54 |         </div>
55 |       </body>
56 |     </html>
57 |   );
58 | }


--------------------------------------------------------------------------------
/app/lib/arxiv.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Helper functions for working with the arXiv API
  3 |  */
  4 | import { htmlToText } from 'html-to-text';
  5 | 
  6 | // arXiv API base URL
  7 | const ARXIV_API_BASE = 'http://export.arxiv.org/api/query';
  8 | 
  9 | /**
 10 |  * Normalize different arXiv ID formats to a standard format
 11 |  * @param {string} id - The arXiv paper ID in various formats
 12 |  * @returns {string} - Normalized ID
 13 |  */
 14 | export const normalizePaperId = (id) => {
 15 |   if (!id) return null;
 16 | 
 17 |   // Handle IDs like "2502.10248"
 18 |   if (/^\d{4}\.\d{5}(v\d+)?$/.test(id)) {
 19 |     return id;
 20 |   }
 21 | 
 22 |   // Handle IDs like "math/0211159v1"
 23 |   if (/^[a-z-]+\/\d{7}(v\d+)?$/.test(id)) {
 24 |     return id;
 25 |   }
 26 | 
 27 |   // Handle IDs that include "abs/" or "pdf/" prefix
 28 |   const match = id.match(/(?:abs|pdf)\/(.+)/);
 29 |   if (match) {
 30 |     return match[1];
 31 |   }
 32 | 
 33 |   return id;
 34 | };
 35 | 
 36 | /**
 37 |  * Get the arXiv API URL for a paper
 38 |  * @param {string} paperId - The normalized arXiv paper ID
 39 |  * @returns {string} - Full arXiv API URL
 40 |  */
 41 | export const getArxivApiUrl = (paperId) => {
 42 |   const normalizedId = normalizePaperId(paperId);
 43 |   return `${ARXIV_API_BASE}?id_list=${normalizedId}`;
 44 | };
 45 | 
 46 | /**
 47 |  * Parse XML response from arXiv API
 48 |  * @param {string} xmlData - XML response text from arXiv API
 49 |  * @returns {object} - Structured paper metadata
 50 |  */
 51 | export const parseArxivResponse = (xmlData) => {
 52 |   // We'll parse the XML in the browser
 53 |   const parser = new DOMParser();
 54 |   const xmlDoc = parser.parseFromString(xmlData, "text/xml");
 55 | 
 56 |   // Check for parsing errors
 57 |   const parseError = xmlDoc.querySelector('parsererror');
 58 |   if (parseError) {
 59 |     throw new Error('Failed to parse arXiv API response');
 60 |   }
 61 | 
 62 |   // Extract entry data
 63 |   const entry = xmlDoc.querySelector('entry');
 64 |   if (!entry) {
 65 |     throw new Error('Paper not found');
 66 |   }
 67 | 
 68 |   // Helper function to get text content from nodes
 69 |   const getNodeText = (parent, selector) => {
 70 |     const node = parent.querySelector(selector);
 71 |     return node ? node.textContent.trim() : '';
 72 |   };
 73 | 
 74 |   // Extract categories
 75 |   const getCategories = (entry) => {
 76 |     const primaryCategory = entry.querySelector('arxiv\\:primary_category')?.getAttribute('term');
 77 |     const categories = Array.from(entry.querySelectorAll('category'))
 78 |       .map(cat => cat.getAttribute('term'));
 79 | 
 80 |     // Ensure primary category is included and listed first
 81 |     const uniqueCategories = [primaryCategory, ...categories]
 82 |       .filter((cat, index, self) => cat && self.indexOf(cat) === index);
 83 | 
 84 |     return uniqueCategories;
 85 |   };
 86 | 
 87 |   // Format date to a more readable format
 88 |   const formatDate = (dateString) => {
 89 |     if (!dateString) return '';
 90 | 
 91 |     try {
 92 |       const date = new Date(dateString);
 93 |       return date.toLocaleDateString('en-US', {
 94 |         year: 'numeric',
 95 |         month: 'long',
 96 |         day: 'numeric'
 97 |       });
 98 |     } catch (e) {
 99 |         console.error('Error formatting date:', e);
100 |         return dateString;
101 |     }
102 |   };
103 | 
104 |   // Extract and structure the data
105 |   const normalizedId = normalizePaperId(getNodeText(entry, 'id').split('/').pop());
106 | 
107 |   const metadata = {
108 |     id: normalizedId,
109 |     title: getNodeText(entry, 'title'),
110 |     abstract: getNodeText(entry, 'summary'),
111 |     authors: Array.from(entry.querySelectorAll('author')).map(author =>
112 |       getNodeText(author, 'name')
113 |     ),
114 |     categories: getCategories(entry),
115 |     publishedDate: formatDate(getNodeText(entry, 'published')),
116 |     updatedDate: formatDate(getNodeText(entry, 'updated')),
117 |     pdfLink: entry.querySelector('link[title="pdf"]')?.getAttribute('href') || '',
118 |     doi: getNodeText(entry, 'arxiv\\:doi') || null,
119 |     journalRef: getNodeText(entry, 'arxiv\\:journal_ref') || null,
120 |   };
121 | 
122 |   return metadata;
123 | };
124 | 
125 | /**
126 |  * Generate a plain text representation of paper metadata
127 |  * @param {object} paper - Paper metadata object
128 |  * @returns {string} - Plain text representation
129 |  */
130 | export const generatePlainTextFormat = (paper) => {
131 |   return `# ${paper.title}
132 | 
133 | ## Authors
134 | ${paper.authors.join(', ')}
135 | 
136 | ## Categories
137 | ${paper.categories.join(', ')}
138 | 
139 | ## Publication Details
140 | - Published: ${paper.publishedDate}
141 | - arXiv ID: ${paper.id}
142 | 
143 | ## Abstract
144 | ${paper.abstract}
145 | `;
146 | };
147 | 
148 | 
149 | export class ArxivError extends Error {
150 |   constructor(message, statusCode) {
151 |     super(message);
152 |     this.name = 'ArxivError';
153 |     this.statusCode = statusCode;
154 |   }
155 | }
156 | 
157 | export async function fetchArxivHtml(paperId) {
158 |   try {
159 |     const response = await fetch(`https://arxiv.org/html/${paperId}`);
160 | 
161 |     if (!response.ok) {
162 |       throw new ArxivError(
163 |         `Failed to fetch arXiv paper ${paperId}`,
164 |         response.status
165 |       );
166 |     }
167 | 
168 |     return await response.text();
169 |   } catch (error) {
170 |     if (error instanceof ArxivError) {
171 |       throw error;
172 |     }
173 |     throw new ArxivError(`Error fetching arXiv paper: ${error.message}`, 500);
174 |   }
175 | }
176 | 
177 | export function convertHtmlToText(html) {
178 |   // First, extract all math nodes and their LaTeX content
179 |   const mathRegex = /<math[^>]*>([\s\S]*?)<\/math>/g;
180 |   const latexMap = new Map();
181 | 
182 |   html = html.replace(mathRegex, (match, content) => {
183 |     // Extract LaTeX annotation
184 |     const texMatch = content.match(/<annotation encoding="application\/x-tex">([\s\S]*?)<\/annotation>/);
185 |     if (texMatch) {
186 |       const isDisplay = match.includes('display="block"');
187 |       const tex = texMatch[1].trim();
188 |       const placeholder = `__MATH_${latexMap.size}__`;
189 |       latexMap.set(placeholder, isDisplay ? `\n\n$$${tex}$$\n\n` : `$${tex}$`);
190 |       return placeholder;
191 |     }
192 |     // Fallback to alttext
193 |     const altMatch = match.match(/alttext="([^"]*)"/);
194 |     if (altMatch) {
195 |       const placeholder = `__MATH_${latexMap.size}__`;
196 |       latexMap.set(placeholder, `$${altMatch[1]}$`);
197 |       return placeholder;
198 |     }
199 |     return match;
200 |   });
201 | 
202 |   const options = {
203 |     wordwrap: false,
204 |     preserveNewlines: true,
205 |     singleNewLineParagraphs: true,
206 |     selectors: [
207 |       {
208 |         selector: 'p',
209 |         format: 'block',
210 |         transform: (content) => `${content}\n\n`
211 |       }
212 |     ]
213 |   };
214 | 
215 |   try {
216 |     let text = htmlToText(html, options);
217 | 
218 |     // Replace math placeholders with LaTeX
219 |     latexMap.forEach((latex, placeholder) => {
220 |       text = text.replace(placeholder, latex);
221 |     });
222 | 
223 |     return text
224 |       .replace(/\n{3,}/g, '\n\n')
225 |       .replace(/\s+$/gm, '')
226 |       .replace(/([^.])\n\n([^\n])/g, '$1 $2')
227 |       .replace(/\s*\[\s*(\d+(?:,\s*\d+)*)\s*\]/g, ' [$1]')
228 |       .trim();
229 |   } catch (error) {
230 |     throw new ArxivError(`Error converting HTML to text: ${error.message}`, 500);
231 |   }
232 | }
233 | 
234 | /**
235 |  * Fetch BibTeX citation for an arXiv paper
236 |  * @param {string} paperId - The normalized arXiv paper ID
237 |  * @returns {Promise<string>} - BibTeX citation
238 |  */
239 | export const fetchArxivBibTeX = async (paperId) => {
240 |   try {
241 |     const normalizedId = normalizePaperId(paperId);
242 |     const bibtexUrl = `https://arxiv.org/bibtex/${normalizedId}`;
243 | 
244 |     const response = await fetch(bibtexUrl);
245 | 
246 |     if (!response.ok) {
247 |       throw new ArxivError(
248 |         `Failed to fetch BibTeX for paper ${paperId}`,
249 |         response.status
250 |       );
251 |     }
252 | 
253 |     return await response.text();
254 |   } catch (error) {
255 |     if (error instanceof ArxivError) {
256 |       throw error;
257 |     }
258 |     throw new ArxivError(`Error fetching BibTeX: ${error.message}`, 500);
259 |   }
260 | };


--------------------------------------------------------------------------------
/app/page.tsx:
--------------------------------------------------------------------------------
  1 | import { Highlight, themes } from "prism-react-renderer";
  2 | import Layout from '@/app/components/Layout';
  3 | import ArxivInput from '@/app/components/ArxivInput';
  4 | 
  5 | export default function Home() {
  6 |   const pythonCode = `import requests
  7 | 
  8 | arxiv_url = "https://arxiv.org/abs/1706.03762"
  9 | arxiv_txt_url = arxiv_url.replace("arxiv.org", "arxiv-txt.org/raw/")
 10 | summary: str = requests.get(arxiv_txt_url).text
 11 | print(summary)
 12 | 
 13 | # Pass this to your favorite agent`;
 14 | 
 15 |   const bashCode = `# Save the raw text to a file
 16 | curl -o paper.txt https://arxiv-txt.org/raw/abs/1706.03762
 17 | 
 18 | # or pipe directly to CLI apps:
 19 | # This example uses the 'llm' library
 20 | # https://github.com/simonw/llm
 21 | 
 22 | curl -L https://arxiv-txt.org/raw/abs/1706.03762 | \\
 23 | llm -s "Explain this paper like I'm 5"
 24 | `;
 25 | 
 26 | 
 27 |   return (
 28 |     <div className="container mx-auto px-4 py-8 flex flex-col items-center">
 29 |       <h1 className="text-4xl font-bold mb-4">
 30 |         arXiv-txt.org
 31 |       </h1>
 32 |       <p className="text-lg opacity-90 mb-6">
 33 |         LLM-friendly arXiv papers | <a href="https://github.com/jerpint/arxiv-txt" className="link inline-flex items-center gap-1" target="_blank" rel="noopener noreferrer">
 34 |           GitHub
 35 |           <svg className="w-3 h-3" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor">
 36 |             <path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" />
 37 |           </svg>
 38 |         </a>
 39 |       </p>
 40 | 
 41 |       <ArxivInput />
 42 | 
 43 |       <div className="card bg-base-100 w-full max-w-2xl shadow-xl my-8">
 44 |         <div className="card-body py-6">
 45 |           <h1 className="text-2xl font-bold mb-3 text-center">
 46 |             API Usage Guide
 47 |           </h1>
 48 | 
 49 |           <div className="space-y-3">
 50 |             <p className="text-center mb-4">
 51 |               arXiv-txt is designed to be API-friendly
 52 |             </p>
 53 | 
 54 |             <div className="grid grid-cols-1 md:grid-cols-2 gap-6 mb-6">
 55 |               <div>
 56 |                 <p className="mb-2">Fetch a text summary:</p>
 57 |                 <div className="bg-base-200 p-4 rounded-lg">
 58 |                   <code className="badge badge-ghost p-3 w-full flex justify-center">arxiv-txt.org/raw/abs/[id]</code>
 59 |                 </div>
 60 |               </div>
 61 | 
 62 |               <div>
 63 |                 <p className="mb-2">Fetch the full paper content:</p>
 64 |                 <div className="bg-base-200 p-4 rounded-lg">
 65 |                   <code className="badge badge-ghost p-3 w-full flex justify-center">arxiv-txt.org/raw/pdf/[id]</code>
 66 |                 </div>
 67 |               </div>
 68 |             </div>
 69 | 
 70 |             <div className="mt-6">
 71 |               <div className="text-sm opacity-75 mb-2">Try it out:</div>
 72 |               <div className="p-4 bg-base-300 rounded-lg hover:bg-base-200 transition-colors">
 73 |                 <a href="https://arxiv-txt.org/raw/abs/1706.03762" className="link link-primary block text-center">
 74 |                   https://arxiv-txt.org/raw/abs/1706.03762
 75 |                 </a>
 76 |               </div>
 77 |             </div>
 78 |           </div>
 79 | 
 80 |           <div className="divider my-6"></div>
 81 | 
 82 |           <div className="mb-6">
 83 |             <h3 id="python" className="text-xl font-semibold mb-3">Python</h3>
 84 |             <p className="mb-3"> Checkout this example <a href="https://github.com/jerpint/arxiv-txt/blob/main/examples/python/lit_review.ipynb" target="_blank" rel="noopener noreferrer" className="link link-primary">Jupyter Notebook</a> or use the arxiv-txt API directly </p>
 85 | 
 86 |             <div className="mockup-code relative overflow-x-auto rounded-lg shadow-md">
 87 |               <Highlight theme={themes.gruvboxMaterialDark} code={pythonCode} language="python">
 88 |                 {({ className, style, tokens, getLineProps, getTokenProps }) => (
 89 |                   <pre className={`${className} p-6 whitespace-pre`} style={{...style, lineHeight: 1.5}}>
 90 |                     {tokens.map((line, i) => (
 91 |                       <div key={i} {...getLineProps({ line })} className="pl-4">
 92 |                         {line.map((token, key) => (
 93 |                           <span key={key} {...getTokenProps({ token })} />
 94 |                         ))}
 95 |                       </div>
 96 |                     ))}
 97 |                   </pre>
 98 |                 )}
 99 |               </Highlight>
100 |             </div>
101 |           </div>
102 | 
103 |           <div>
104 |             <h3 id="cli" className="text-xl font-semibold mb-4">Command Line</h3>
105 |             <div className="mockup-code relative overflow-x-auto rounded-lg shadow-md">
106 |               <Highlight theme={themes.gruvboxMaterialDark} code={bashCode} language="python">
107 |                 {({ className, style, tokens, getLineProps, getTokenProps }) => (
108 |                   <pre className={`${className} p-6 whitespace-pre`} style={{...style, lineHeight: 1.5}}>
109 |                     {tokens.map((line, i) => (
110 |                       <div key={i} {...getLineProps({ line })}>
111 |                         {line.map((token, key) => (
112 |                           <span key={key} {...getTokenProps({ token })} />
113 |                         ))}
114 |                       </div>
115 |                     ))}
116 |                   </pre>
117 |                 )}
118 |               </Highlight>
119 |             </div>
120 |           </div>
121 |         </div>
122 |       </div>
123 |     <Layout></Layout>
124 |     </div>
125 |   );
126 | }


--------------------------------------------------------------------------------
/app/pdf/[id]/page.js:
--------------------------------------------------------------------------------
 1 | 'use client';
 2 | 
 3 | import AbstractPage from '@/app/abs/[id]/page';
 4 | 
 5 | /**
 6 |  * This page is a duplicate of /abs/[id] endpoint.
 7 |  * The functionality is identical to the /abs/[id] endpoint.
 8 |  */
 9 | 
10 | export default function PDFPage(props) {
11 |   return <AbstractPage {...props} />;
12 | }
13 | 


--------------------------------------------------------------------------------
/app/raw/abs/[id]/route.js:
--------------------------------------------------------------------------------
  1 | import { NextResponse } from 'next/server';
  2 | import { XMLParser } from 'fast-xml-parser';
  3 | import { getArxivApiUrl, normalizePaperId } from '@/app/lib/arxiv';
  4 | 
  5 | export async function GET(request, { params }) {
  6 |   try {
  7 |     const { id } = params;
  8 | 
  9 |     if (!id) {
 10 |       return new NextResponse('Paper ID is required', {
 11 |         status: 400,
 12 |         headers: { 'Content-Type': 'text/plain; charset=utf-8' }
 13 |       });
 14 |     }
 15 | 
 16 |     const normalizedId = normalizePaperId(id);
 17 |     const arxivUrl = getArxivApiUrl(normalizedId);
 18 |     const apiResponse = await fetch(arxivUrl, {
 19 |       headers: {
 20 |         'Accept': 'application/xml',
 21 |         'User-Agent': 'arXiv-txt.org (https://arxiv-txt.org; mailto:contact@arxiv-txt.org)'
 22 |       },
 23 |     });
 24 | 
 25 |     if (!apiResponse.ok) {
 26 |       return new NextResponse(
 27 |         `Error fetching paper data: ${apiResponse.status} ${apiResponse.statusText}`,
 28 |         {
 29 |           status: apiResponse.status,
 30 |           headers: { 'Content-Type': 'text/plain; charset=utf-8' }
 31 |         }
 32 |       );
 33 |     }
 34 | 
 35 |     // Get the XML data
 36 |     const xmlData = await apiResponse.text();
 37 | 
 38 |     const parser = new XMLParser({
 39 |       ignoreAttributes: false,
 40 |       attributeNamePrefix: '@_'
 41 |     });
 42 |     const result = parser.parse(xmlData);
 43 |     const entry = result.feed.entry;
 44 | 
 45 |     // Extract data more reliably
 46 |     const title = entry.title;
 47 |     const abstract = entry.summary;
 48 |     const authors = Array.isArray(entry.author)
 49 |       ? entry.author.map(a => a.name)
 50 |       : [entry.author.name];
 51 |     const categories = Array.isArray(entry.category)
 52 |       ? entry.category.map(c => c['@_term'])
 53 |       : [entry.category['@_term']];
 54 |     const published = entry.published;
 55 |     const arxivId = entry.id.split('/').pop();
 56 |     const doi = entry['arxiv:doi'];
 57 |     const journalRef = entry['arxiv:journal_ref'];
 58 | 
 59 |     // Format dates if present
 60 |     const publishedDate = formatDate(published);
 61 | 
 62 |     // Fetch BibTeX from arXiv
 63 |     const bibtexUrl = `https://arxiv.org/bibtex/${normalizedId}`;
 64 |     const bibtexResponse = await fetch(bibtexUrl, {
 65 |       headers: {
 66 |         'User-Agent': 'arXiv-txt.org (https://arxiv-txt.org; mailto:contact@arxiv-txt.org)'
 67 |       },
 68 |     });
 69 | 
 70 |     let bibtexContent = '';
 71 |     if (bibtexResponse.ok) {
 72 |       bibtexContent = await bibtexResponse.text();
 73 |     } else {
 74 |       console.error(`Failed to fetch BibTeX: ${bibtexResponse.status} ${bibtexResponse.statusText}`);
 75 |       // Continue even if BibTeX fetch fails
 76 |     }
 77 | 
 78 |     // Generate plain text format
 79 |     const plainTextContent = `# Title
 80 | ${title}
 81 | 
 82 | # Authors
 83 | ${authors.join(', ')}
 84 | 
 85 | # Abstract
 86 | ${abstract}
 87 | 
 88 | # Categories
 89 | ${categories.join(', ')}
 90 | 
 91 | # Publication Details
 92 | - Published: ${publishedDate}
 93 | - arXiv ID: ${arxivId}
 94 | ${doi ? `- DOI: ${doi}` : ''}
 95 | ${journalRef ? `- Journal Reference: ${journalRef}` : ''}
 96 | 
 97 | ${bibtexContent ? `# BibTeX
 98 | ${bibtexContent}
 99 | ` : ''}
100 | `;
101 | 
102 |     return new NextResponse(plainTextContent, {
103 |       status: 200,
104 |       headers: {
105 |         'Content-Type': 'text/plain; charset=utf-8',
106 |         'Cache-Control': 'public, max-age=86400' // Cache for 1 day
107 |       }
108 |     });
109 | 
110 |   } catch (error) {
111 |     console.error('Error processing paper data:', error);
112 |     return new NextResponse(
113 |       `Failed to process paper data: ${error.message}`,
114 |       {
115 |         status: 500,
116 |         headers: { 'Content-Type': 'text/plain; charset=utf-8' }
117 |       }
118 |     );
119 |   }
120 | }
121 | 
122 | function formatDate(dateString) {
123 |   if (!dateString) return '';
124 |   try {
125 |     const date = new Date(dateString);
126 |     return date.toLocaleDateString('en-US', {
127 |       year: 'numeric',
128 |       month: 'long',
129 |       day: 'numeric'
130 |     });
131 |   } catch {
132 |     return dateString;
133 |   }
134 | }
135 | 
136 | export const revalidate = 86400; // 24 hours in seconds


--------------------------------------------------------------------------------
/app/raw/pdf/[id]/route.js:
--------------------------------------------------------------------------------
 1 | // app/raw/pdf/[id]/route.js
 2 | import { fetchArxivHtml, convertHtmlToText, ArxivError } from '@/app/lib/arxiv';
 3 | 
 4 | export async function GET(request, { params }) {
 5 |   try {
 6 |     // Extract paper ID from params
 7 |     const { id } = params;
 8 | 
 9 |     if (!id) {
10 |       return new Response('Paper ID is required', { status: 400 });
11 |     }
12 | 
13 |     // Fetch HTML from arXiv
14 |     const html = await fetchArxivHtml(id);
15 | 
16 |     // Convert to plain text
17 |     const text = convertHtmlToText(html);
18 | 
19 |     // Return plain text response
20 |     return new Response(text, {
21 |       headers: {
22 |         'Content-Type': 'text/plain',
23 |         'Cache-Control': 'public, s-maxage=86400', // Cache for 24 hours
24 |       }
25 |     });
26 | 
27 |   } catch (error) {
28 |     console.error(`Error processing paper:`, error);
29 | 
30 |     if (error instanceof ArxivError) {
31 |       return new Response(error.message, {
32 |         status: error.statusCode
33 |       });
34 |     }
35 | 
36 |     return new Response('Internal server error', {
37 |       status: 500
38 |     });
39 |   }
40 | }


--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import { dirname } from "path";
 2 | import { fileURLToPath } from "url";
 3 | import { FlatCompat } from "@eslint/eslintrc";
 4 | 
 5 | const __filename = fileURLToPath(import.meta.url);
 6 | const __dirname = dirname(__filename);
 7 | 
 8 | const compat = new FlatCompat({
 9 |   baseDirectory: __dirname,
10 | });
11 | 
12 | const eslintConfig = [
13 |   ...compat.extends("next/core-web-vitals", "next/typescript"),
14 | ];
15 | 
16 | export default eslintConfig;
17 | 


--------------------------------------------------------------------------------
/examples/python/lit_review.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# arxiv-txt.org for literature reviews\n",
  8 |     "\n",
  9 |     "In this example, we will use [arxiv-txt.org](https://arxiv-txt.org) to generate a literature review for a given topic.\n",
 10 |     "First, let's identify a list of relevant papers on a topic we want to summarize.\n",
 11 |     "Here we will focus on \"Masked Autoencoders\".\n",
 12 |     "\n",
 13 |     "Here is a list of papers we will use:"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 1,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "paper_list = [\n",
 23 |     "    \"https://arxiv.org/abs/2205.09113\",\n",
 24 |     "    \"https://arxiv.org/abs/2304.00571\",\n",
 25 |     "    \"https://arxiv.org/abs/2211.09120\",\n",
 26 |     "    \"https://arxiv.org/abs/2212.05922\",\n",
 27 |     "    \"https://arxiv.org/abs/2301.06018\",\n",
 28 |     "]\n"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "Let's define some helper functions to get the summaries of the papers for the lit review."
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "Getting summary for https://arxiv.org/abs/2205.09113:\n",
 48 |       "Getting summary for https://arxiv.org/abs/2304.00571:\n",
 49 |       "Getting summary for https://arxiv.org/abs/2211.09120:\n",
 50 |       "Getting summary for https://arxiv.org/abs/2212.05922:\n",
 51 |       "Getting summary for https://arxiv.org/abs/2301.06018:\n",
 52 |       "Summaries:\n",
 53 |       "---\n",
 54 |       "\n",
 55 |       "# Masked Autoencoders As Spatiotemporal Learners\n",
 56 |       "\n",
 57 |       "## Authors\n",
 58 |       "Christoph Feichtenhofer, Haoqi Fan, Yanghao Li, Kaiming He\n",
 59 |       "\n",
 60 |       "## Categories\n",
 61 |       "cs.CV, cs.LG\n",
 62 |       "\n",
 63 |       "## Publication Details\n",
 64 |       "- Published: May 18, 2022\n",
 65 |       "- arXiv ID: 2205.09113v2\n",
 66 |       "\n",
 67 |       "\n",
 68 |       "\n",
 69 |       "## Abstract\n",
 70 |       "This paper studies a conceptually simple extension of Masked Autoencoders\n",
 71 |       "(MAE) to spatiotemporal representation learning from videos. We randomly mask\n",
 72 |       "out spacetime patches in videos and learn an autoencoder to reconstruct them in\n",
 73 |       "pixels. Interestingly, we show that our MAE method can learn strong\n",
 74 |       "representations with almost no inductive bias on spacetime (only except for\n",
 75 |       "patch and positional embeddings), and spacetime-agnostic random masking\n",
 76 |       "performs the best. We observe that the optimal masking ratio is as high as 90%\n",
 77 |       "(vs. 75% on images), supporting the hypothesis that this ratio is related to\n",
 78 |       "information redundancy of the data. A high masking ratio leads to a large\n",
 79 |       "speedup, e.g., > 4x in wall-clock time or even more. We report competitive\n",
 80 |       "results on several challenging video datasets using vanilla Vision\n",
 81 |       "Transformers. We observe that MAE can outperform supervised pre-training by\n",
 82 |       "large margins. We further report encouraging results of training on real-world,\n",
 83 |       "uncurated Instagram data. Our study suggests that the general framework of\n",
 84 |       "masked autoencoding (BERT, MAE, etc.) can be a unified methodology for\n",
 85 |       "representation learning with minimal domain knowledge.\n",
 86 |       "\n",
 87 |       "---\n",
 88 |       "# DropMAE: Masked Autoencoders with Spatial-Attention Dropout for Tracking\n",
 89 |       "  Tasks\n",
 90 |       "\n",
 91 |       "## Authors\n",
 92 |       "Qiangqiang Wu, Tianyu Yang, Ziquan Liu, Baoyuan Wu, Ying Shan, Antoni B. Chan\n",
 93 |       "\n",
 94 |       "## Categories\n",
 95 |       "cs.CV\n",
 96 |       "\n",
 97 |       "## Publication Details\n",
 98 |       "- Published: April 2, 2023\n",
 99 |       "- arXiv ID: 2304.00571v2\n",
100 |       "\n",
101 |       "\n",
102 |       "\n",
103 |       "## Abstract\n",
104 |       "In this paper, we study masked autoencoder (MAE) pretraining on videos for\n",
105 |       "matching-based downstream tasks, including visual object tracking (VOT) and\n",
106 |       "video object segmentation (VOS). A simple extension of MAE is to randomly mask\n",
107 |       "out frame patches in videos and reconstruct the frame pixels. However, we find\n",
108 |       "that this simple baseline heavily relies on spatial cues while ignoring\n",
109 |       "temporal relations for frame reconstruction, thus leading to sub-optimal\n",
110 |       "temporal matching representations for VOT and VOS. To alleviate this problem,\n",
111 |       "we propose DropMAE, which adaptively performs spatial-attention dropout in the\n",
112 |       "frame reconstruction to facilitate temporal correspondence learning in videos.\n",
113 |       "We show that our DropMAE is a strong and efficient temporal matching learner,\n",
114 |       "which achieves better finetuning results on matching-based tasks than the\n",
115 |       "ImageNetbased MAE with 2X faster pre-training speed. Moreover, we also find\n",
116 |       "that motion diversity in pre-training videos is more important than scene\n",
117 |       "diversity for improving the performance on VOT and VOS. Our pre-trained DropMAE\n",
118 |       "model can be directly loaded in existing ViT-based trackers for fine-tuning\n",
119 |       "without further modifications. Notably, DropMAE sets new state-of-the-art\n",
120 |       "performance on 8 out of 9 highly competitive video tracking and segmentation\n",
121 |       "datasets. Our code and pre-trained models are available at\n",
122 |       "https://github.com/jimmy-dq/DropMAE.git.\n",
123 |       "\n",
124 |       "---\n",
125 |       "# AdaMAE: Adaptive Masking for Efficient Spatiotemporal Learning with\n",
126 |       "  Masked Autoencoders\n",
127 |       "\n",
128 |       "## Authors\n",
129 |       "Wele Gedara Chaminda Bandara, Naman Patel, Ali Gholami, Mehdi Nikkhah, Motilal Agrawal, Vishal M. Patel\n",
130 |       "\n",
131 |       "## Categories\n",
132 |       "cs.CV, cs.AI\n",
133 |       "\n",
134 |       "## Publication Details\n",
135 |       "- Published: November 16, 2022\n",
136 |       "- arXiv ID: 2211.09120v1\n",
137 |       "\n",
138 |       "\n",
139 |       "\n",
140 |       "## Abstract\n",
141 |       "Masked Autoencoders (MAEs) learn generalizable representations for image,\n",
142 |       "text, audio, video, etc., by reconstructing masked input data from tokens of\n",
143 |       "the visible data. Current MAE approaches for videos rely on random patch, tube,\n",
144 |       "or frame-based masking strategies to select these tokens. This paper proposes\n",
145 |       "AdaMAE, an adaptive masking strategy for MAEs that is end-to-end trainable. Our\n",
146 |       "adaptive masking strategy samples visible tokens based on the semantic context\n",
147 |       "using an auxiliary sampling network. This network estimates a categorical\n",
148 |       "distribution over spacetime-patch tokens. The tokens that increase the expected\n",
149 |       "reconstruction error are rewarded and selected as visible tokens, motivated by\n",
150 |       "the policy gradient algorithm in reinforcement learning. We show that AdaMAE\n",
151 |       "samples more tokens from the high spatiotemporal information regions, thereby\n",
152 |       "allowing us to mask 95% of tokens, resulting in lower memory requirements and\n",
153 |       "faster pre-training. We conduct ablation studies on the Something-Something v2\n",
154 |       "(SSv2) dataset to demonstrate the efficacy of our adaptive sampling approach\n",
155 |       "and report state-of-the-art results of 70.0% and 81.7% in top-1 accuracy on\n",
156 |       "SSv2 and Kinetics-400 action classification datasets with a ViT-Base backbone\n",
157 |       "and 800 pre-training epochs.\n",
158 |       "\n",
159 |       "---\n",
160 |       "# Audiovisual Masked Autoencoders\n",
161 |       "\n",
162 |       "## Authors\n",
163 |       "Mariana-Iuliana Georgescu, Eduardo Fonseca, Radu Tudor Ionescu, Mario Lucic, Cordelia Schmid, Anurag Arnab\n",
164 |       "\n",
165 |       "## Categories\n",
166 |       "cs.CV, cs.SD\n",
167 |       "\n",
168 |       "## Publication Details\n",
169 |       "- Published: December 9, 2022\n",
170 |       "- arXiv ID: 2212.05922v3\n",
171 |       "\n",
172 |       "\n",
173 |       "\n",
174 |       "## Abstract\n",
175 |       "Can we leverage the audiovisual information already present in video to\n",
176 |       "improve self-supervised representation learning? To answer this question, we\n",
177 |       "study various pretraining architectures and objectives within the masked\n",
178 |       "autoencoding framework, motivated by the success of similar methods in natural\n",
179 |       "language and image understanding. We show that we can achieve significant\n",
180 |       "improvements on audiovisual downstream classification tasks, surpassing the\n",
181 |       "state-of-the-art on VGGSound and AudioSet. Furthermore, we can leverage our\n",
182 |       "audiovisual pretraining scheme for multiple unimodal downstream tasks using a\n",
183 |       "single audiovisual pretrained model. We additionally demonstrate the\n",
184 |       "transferability of our representations, achieving state-of-the-art audiovisual\n",
185 |       "results on Epic Kitchens without pretraining specifically for this dataset.\n",
186 |       "\n",
187 |       "---\n",
188 |       "# CMAE-V: Contrastive Masked Autoencoders for Video Action Recognition\n",
189 |       "\n",
190 |       "## Authors\n",
191 |       "Cheng-Ze Lu, Xiaojie Jin, Zhicheng Huang, Qibin Hou, Ming-Ming Cheng, Jiashi Feng\n",
192 |       "\n",
193 |       "## Categories\n",
194 |       "cs.CV\n",
195 |       "\n",
196 |       "## Publication Details\n",
197 |       "- Published: January 15, 2023\n",
198 |       "- arXiv ID: 2301.06018v1\n",
199 |       "\n",
200 |       "\n",
201 |       "\n",
202 |       "## Abstract\n",
203 |       "Contrastive Masked Autoencoder (CMAE), as a new self-supervised framework,\n",
204 |       "has shown its potential of learning expressive feature representations in\n",
205 |       "visual image recognition. This work shows that CMAE also trivially generalizes\n",
206 |       "well on video action recognition without modifying the architecture and the\n",
207 |       "loss criterion. By directly replacing the original pixel shift with the\n",
208 |       "temporal shift, our CMAE for visual action recognition, CMAE-V for short, can\n",
209 |       "generate stronger feature representations than its counterpart based on pure\n",
210 |       "masked autoencoders. Notably, CMAE-V, with a hybrid architecture, can achieve\n",
211 |       "82.2% and 71.6% top-1 accuracy on the Kinetics-400 and Something-something V2\n",
212 |       "datasets, respectively. We hope this report could provide some informative\n",
213 |       "inspiration for future works.\n",
214 |       "\n"
215 |      ]
216 |     }
217 |    ],
218 |    "source": [
219 |     "import requests\n",
220 |     "\n",
221 |     "def get_paper_summary(arxiv_url) -> str:\n",
222 |     "    \"\"\"\n",
223 |     "    Get the summary of a paper from arxiv-txt.org\n",
224 |     "    \"\"\"\n",
225 |     "    assert arxiv_url.startswith(\"https://arxiv.org/\"), f\"Invalid arxiv url: {arxiv_url}, must start with https://arxiv.org/\"\n",
226 |     "    arxiv_txt_url = arxiv_url.replace(\"arxiv.org/\", \"arxiv-txt.org/raw/\")\n",
227 |     "    response = requests.get(arxiv_txt_url)\n",
228 |     "    return response.text\n",
229 |     "\n",
230 |     "\n",
231 |     "def get_summaries(paper_list) -> str:\n",
232 |     "    \"\"\"\n",
233 |     "    Get the summaries of a list of papers\n",
234 |     "    \"\"\"\n",
235 |     "    summary_list = []\n",
236 |     "    for paper in paper_list:\n",
237 |     "        print(f\"Getting summary for {paper}:\")\n",
238 |     "        summary = get_paper_summary(paper)\n",
239 |     "        summary_list.append(summary)\n",
240 |     "    return \"\\n---\\n\".join(summary_list)\n",
241 |     "\n",
242 |     "paper_summaries = get_summaries(paper_list)\n",
243 |     "print(\"Summaries:\\n---\\n\")\n",
244 |     "print(paper_summaries)"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "markdown",
249 |    "metadata": {},
250 |    "source": [
251 |     "Now let's get the lit review from the LLM."
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": 3,
257 |    "metadata": {},
258 |    "outputs": [],
259 |    "source": [
260 |     "# Install litellm for LLM calls\n",
261 |     "!pip -q install litellm\n",
262 |     "\n",
263 |     "import os\n",
264 |     "import litellm\n",
265 |     "\n",
266 |     "def get_completion(messages, model: str = \"gpt-4o-mini\") -> str:\n",
267 |     "\n",
268 |     "    response = litellm.completion(model=model, messages=messages)\n",
269 |     "    return response.choices[0].message.content\n",
270 |     "\n",
271 |     "\n",
272 |     "model = \"gpt-4o-mini\"  # Replace with any litellm supported model, make sure to set OPENAI_API_KEY\n",
273 |     "\n",
274 |     "system_prompt = f\"\"\"\n",
275 |     "You are a helpful assistant that reviews papers.\n",
276 |     "You are given a list of papers and their abstracts.\n",
277 |     "Your goal is to review the papers for a research paper on the given topic.\n",
278 |     "\n",
279 |     "\"\"\"\n",
280 |     "\n",
281 |     "user_prompt = f\"\"\"This will be a paragraph in a scientific paper.\n",
282 |     "Explain what Masked Autoencoders are, why they are important, and the different innovations listed in the follow-up papers.\n",
283 |     "Cite the papers and their different contributions to the field of Masked Autoencoders.\n",
284 |     "\n",
285 |     "Here are the relevant papers and their abstracts:\n",
286 |     "{paper_summaries}\n",
287 |     "\"\"\"\n",
288 |     "\n",
289 |     "\n",
290 |     "messages = [\n",
291 |     "    {\"role\": \"system\", \"content\": system_prompt},\n",
292 |     "    {\"role\": \"user\", \"content\": user_prompt},\n",
293 |     "]\n"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 4,
299 |    "metadata": {},
300 |    "outputs": [
301 |     {
302 |      "name": "stdout",
303 |      "output_type": "stream",
304 |      "text": [
305 |       "Masked Autoencoders (MAEs) are a class of self-supervised learning models that have gained prominence for their ability to learn robust representations by reconstructing masked portions of input data. Initially popularized in the context of images, these models have been extended to spatiotemporal domains, particularly for video data. The significance of MAEs lies in their capacity to efficiently utilize unlabeled data, enabling the extraction of meaningful patterns without costly annotation efforts. These approaches have shown remarkable performance in various computer vision tasks, establishing a framework for learning that parallels successful techniques in natural language processing, such as BERT.\n",
306 |       "\n",
307 |       "Recent innovations in the field of MAEs have built upon this foundational architecture to address specific limitations and enhance representation learning. For instance, **Feichtenhofer et al. (2022)** introduced a spatiotemporal extension of MAEs, demonstrating that high masking ratios (up to 90%) lead to significant improvements in representation quality and computational efficiency for video datasets, outperforming traditional supervised methods (Feichtenhofer et al., 2022). Subsequently, **Wu et al. (2023)** proposed DropMAE, which incorporates spatial-attention dropout to improve temporal matching capabilities, thereby setting new state-of-the-art results in visual object tracking and segmentation tasks (Wu et al., 2023). \n",
308 |       "\n",
309 |       "**Bandara et al. (2022)** introduced AdaMAE, which presented an end-to-end adaptive masking strategy that samples tokens based on their semantic relevance, achieving superior performance while efficiently managing computational resources (Bandara et al., 2022). Meanwhile, **Georgescu et al. (2022)** explored the integration of audiovisual information with MAE architectures, showing enhanced performance on audiovisual classification tasks, thus exemplifying the transferability of learned representations across modalities (Georgescu et al., 2022). Lastly, **Lu et al. (2023)** developed the Contrastive Masked Autoencoder for Video Action Recognition (CMAE-V), which enhances representation strength through a hybrid architecture, achieving state-of-the-art results on benchmark action recognition datasets (Lu et al., 2023). \n",
310 |       "\n",
311 |       "These advancements illuminate the versatility and robustness of MAE frameworks, pushing the boundaries of self-supervised learning and establishing new benchmarks in representation learning for video and audiovisual data.\n"
312 |      ]
313 |     }
314 |    ],
315 |    "source": [
316 |     "lit_review = get_completion(messages)\n",
317 |     "print(lit_review)"
318 |    ]
319 |   }
320 |  ],
321 |  "metadata": {
322 |   "kernelspec": {
323 |    "display_name": "py11",
324 |    "language": "python",
325 |    "name": "python3"
326 |   },
327 |   "language_info": {
328 |    "codemirror_mode": {
329 |     "name": "ipython",
330 |     "version": 3
331 |    },
332 |    "file_extension": ".py",
333 |    "mimetype": "text/x-python",
334 |    "name": "python",
335 |    "nbconvert_exporter": "python",
336 |    "pygments_lexer": "ipython3",
337 |    "version": "3.11.9"
338 |   }
339 |  },
340 |  "nbformat": 4,
341 |  "nbformat_minor": 2
342 | }
343 | 


--------------------------------------------------------------------------------
/next.config.ts:
--------------------------------------------------------------------------------
1 | import type { NextConfig } from "next";
2 | 
3 | const nextConfig: NextConfig = {
4 |   async redirects() {
5 |     return [];
6 |   },
7 | };
8 | 
9 | export default nextConfig;


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "arxiv-txt",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "dev": "next dev --turbopack",
 7 |     "build": "next build",
 8 |     "start": "next start",
 9 |     "lint": "next lint"
10 |   },
11 |   "dependencies": {
12 |     "@vercel/analytics": "^1.5.0",
13 |     "daisyui": "^4.12.23",
14 |     "fast-xml-parser": "^5.0.6",
15 |     "html-to-text": "^9.0.5",
16 |     "next": "15.1.7",
17 |     "prism-react-renderer": "^2.4.1",
18 |     "react": "^19.0.0",
19 |     "react-dom": "^19.0.0",
20 |     "react-hot-toast": "^2.5.2"
21 |   },
22 |   "devDependencies": {
23 |     "@eslint/eslintrc": "^3",
24 |     "@types/node": "^20",
25 |     "@types/react": "^19",
26 |     "@types/react-dom": "^19",
27 |     "eslint": "^9.20.1",
28 |     "eslint-config-next": "^15.1.7",
29 |     "postcss": "^8",
30 |     "tailwindcss": "^3.4.1",
31 |     "typescript": "^5"
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('postcss-load-config').Config} */
2 | const config = {
3 |   plugins: {
4 |     tailwindcss: {},
5 |   },
6 | };
7 | 
8 | export default config;
9 | 


--------------------------------------------------------------------------------
/public/file.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>


--------------------------------------------------------------------------------
/public/globe.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>


--------------------------------------------------------------------------------
/public/next.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>


--------------------------------------------------------------------------------
/public/vercel.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>


--------------------------------------------------------------------------------
/public/window.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>


--------------------------------------------------------------------------------
/tailwind.config.ts:
--------------------------------------------------------------------------------
 1 | import type { Config } from "tailwindcss";
 2 | import daisyui from "daisyui";
 3 | 
 4 | const config: Config = {
 5 |   content: [
 6 |     "./pages/**/*.{js,ts,jsx,tsx,mdx}",
 7 |     "./components/**/*.{js,ts,jsx,tsx,mdx}",
 8 |     "./app/**/*.{js,ts,jsx,tsx,mdx}",
 9 |   ],
10 |   theme: {
11 |     extend: {
12 |       colors: {
13 |         background: "var(--background)",
14 |         foreground: "var(--foreground)",
15 |       },
16 |     },
17 |   },
18 |   plugins: [daisyui],
19 |   daisyui: {
20 |     themes: ["retro", "dark"],
21 |     // themes: ["retro", "dark"],
22 |     // themes: ["lemonade", "dark"],
23 |   },
24 | }
25 | 
26 | export default config;


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2017",
 4 |     "lib": ["dom", "dom.iterable", "esnext"],
 5 |     "allowJs": true,
 6 |     "skipLibCheck": true,
 7 |     "strict": true,
 8 |     "noEmit": true,
 9 |     "esModuleInterop": true,
10 |     "module": "esnext",
11 |     "moduleResolution": "bundler",
12 |     "resolveJsonModule": true,
13 |     "isolatedModules": true,
14 |     "jsx": "preserve",
15 |     "incremental": true,
16 |     "plugins": [
17 |       {
18 |         "name": "next"
19 |       }
20 |     ],
21 |     "paths": {
22 |       "@/*": ["./*"]
23 |     }
24 |   },
25 |   "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
26 |   "exclude": ["node_modules"]
27 | }
28 | 


--------------------------------------------------------------------------------