├── .eslintrc ├── .gitignore ├── .prettierrc ├── README.md ├── bin └── index.js ├── package-lock.json ├── package.json └── src ├── config ├── serviceMappings.json └── typeMappings.json ├── handlers ├── archiveHandler.js ├── audioHandler.js ├── codeHandler.js ├── imageHandler.js ├── jsonYamlHandler.js ├── pdfHandler.js ├── spreadsheetHandler.js └── textHandler.js ├── services ├── gzipService.js ├── tarService.js └── zipService.js └── utils ├── fileHandler.js ├── fileProcessor.js ├── files.js └── rangeFetcher.js /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "plugins": ["prettier"], 3 | "rules": { 4 | "prettier/prettier": "off", 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "tabWidth": 2, 3 | "printWidth": 100 4 | } 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🚀 Zip stream CLI 2 | 3 | [![npm](https://img.shields.io/npm/v/zip-stream-cli.svg?style=flat-square)](https://www.npmjs.com/package/zip-stream-cli) 4 | 5 | **Zip stream CLI** is a Node.js library that allows you to extract and display content from various file types inside a zip or tar archive directly in the terminal. The library supports multiple file types such as images, audio files, PDFs, text, spreadsheets, and more, with the option to extend functionality by adding new handlers. 6 | 7 | ![ezgif-4-c40395bcdb](https://github.com/user-attachments/assets/f12c2b9f-25da-44d0-8526-969c3bbad6a3) 8 | 9 | ## ✨ Features 10 | 11 | - **Supports Multiple File Types**: Automatically detect and display content from various file types inside both zip and tar archives. 12 | - **Modular Handler System**: 13 | - Easily extend support for new file types by adding custom handlers. 14 | - Handlers for existing file types are dynamically loaded based on the file extension. 15 | - **🎵 Stream and Display Audio Waveforms**: Display waveforms for audio files directly in the terminal. 16 | - **🖼️ Display Images**: View images as pixel art directly in the terminal. 17 | - **⚙️ Customizable Output**: Each file type is displayed using appropriate handlers, allowing you to customize the way content is shown for different types of files. 18 | 19 | ## ✨ Simplified Extension Management 20 | 21 | - With the new **fileHandler** system, adding support for additional compressed file types (e.g., `.rar`, `.7z`, `.gzip`) becomes easier. New services can be created for each file type and mapped in the `serviceMappings.json` file. 22 | - Once the service is implemented and mapped, the file type can be automatically recognized and processed by the CLI. 23 | - This allows the system to be more extensible without the need for significant modifications to the core logic, making it easy to add support for new file types as needed. 24 | 25 | ## ⚡ Installation dev 26 | 27 | 1. Clone this repository to your local machine: 28 | 29 | ```bash 30 | git clone https://github.com/agarrec-vivlio/zip-stream-cli 31 | cd zip-stream-cli 32 | ``` 33 | 34 | 2. Install the required dependencies: 35 | 36 | ```bash 37 | npm install 38 | ``` 39 | 40 | 3. Link the project globally using npm link: 41 | 42 | ```bash 43 | npm link 44 | ``` 45 | 46 | ## ⚡ Installation global 47 | 48 | You can also install globally using npm: 49 | 50 | ```bash 51 | npm install -g zip-stream-cli 52 | ``` 53 | 54 | ## 🌐 Global Usage 55 | 56 | Once installed globally or linked, you can run the `zip-stream-cli` command from anywhere in your terminal. 57 | 58 | ### Example: 59 | 60 | ```bash 61 | zip-stream-cli https://example.com/myarchive.zip 62 | zip-stream-cli https://example.com/myarchive.tar.gz 63 | ``` 64 | 65 | ## 🛠️ File Type Handlers 66 | 67 | The library dynamically loads file handlers based on the file extension. Handlers for various file types are stored in the `handlers` directory. 68 | 69 | The `typeMappings.json` file maps file extensions to their respective handlers. If a file type is not recognized or doesn't have a dedicated handler, it falls back to the `textHandler` to display the file as plain text. 70 | 71 | ### Supported File Types 72 | 73 | | File Type | Extensions | Handler | 74 | | ----------------- | ---------------------------------- | -------------------- | 75 | | Text Files | `.txt`, `.md`, `.html` | `textHandler` | 76 | | Audio Files | `.mp3`, `.wav`, `.ogg` | `audioHandler` | 77 | | Image Files | `.png`, `.jpg`, `.gif`, `.bmp` | `imageHandler` | 78 | | PDF Files | `.pdf` | `pdfHandler` | 79 | | Spreadsheet Files | `.xls`, `.xlsx`, `.csv` | `spreadsheetHandler` | 80 | | Code Files | `.js`, `.py`, `.java`, `.rb`, etc. | `codeHandler` | 81 | | Archive Files | `.zip`, `.tar`, `.gz` | `archiveHandler` | 82 | | YAML & JSON Files | `.yaml`, `.yml`, `.json` | `jsonYamlHandler` | 83 | 84 | ### Adding a New File Type 85 | 86 | The system is designed to be extensible, making it easy to add new handlers for different file types. Follow the steps below to add support for a new file type. 87 | 88 | ### Step 1: Create a New Handler 89 | 90 | To add support for a new file type, create a new handler file inside the `handlers` directory. 91 | 92 | Example: Create `customFileHandler.js` to handle a new file type, say `.custom`. 93 | 94 | ```javascript 95 | // handlers/customFileHandler.js 96 | module.exports = async function handleCustomFile(fileStream) { 97 | const chunks = []; 98 | 99 | for await (const chunk of fileStream) { 100 | chunks.push(chunk); 101 | } 102 | 103 | const fileContent = Buffer.concat(chunks).toString("utf-8"); 104 | console.log("Displaying custom file content:"); 105 | console.log(fileContent); // Replace this with your custom logic to handle the file 106 | }; 107 | ``` 108 | 109 | ### Step 2: Update `typeMappings.json` 110 | 111 | Add the new file extension and map it to the newly created handler in `typeMappings.json`. 112 | 113 | ```json 114 | { 115 | "custom": "customFileHandler", 116 | "txt": "textHandler", 117 | "md": "textHandler", 118 | "json": "jsonYamlHandler", 119 | "yaml": "jsonYamlHandler", 120 | "mp3": "audioHandler", 121 | "wav": "audioHandler", 122 | "png": "imageHandler", 123 | "jpg": "imageHandler" 124 | } 125 | ``` 126 | 127 | ### Step 3: Use Your Custom Handler 128 | 129 | Now, when a file with the `.custom` extension is encountered, the library will use your `customFileHandler.js` to process and display the file. 130 | 131 | ## 📄 TAR File Streaming 132 | 133 | In TAR file handling, the **Zip stream CLI** employs a streaming approach to efficiently process large archives without requiring the entire file to be downloaded and stored in memory. 134 | 135 | ### How TAR File Streaming Works: 136 | 137 | 1. **Partial Fetching**: For uncompressed TAR files, the CLI fetches small chunks of the file (e.g., a few megabytes at a time). For compressed `.tar.gz` files, compressed chunks are fetched and decompressed on the fly. This allows the CLI to start listing or extracting files without needing the entire archive. 138 | 139 | 2. **Entry-by-Entry Processing**: The TAR archive is processed entry by entry, reading file headers and skipping over data unless it is necessary for the current operation. This keeps memory usage low. 140 | 141 | 3. **File Extraction**: When extracting a specific file, the CLI fetches the portion of the TAR file where the file is located and decompresses only that part (if necessary). The rest of the archive is skipped. 142 | 143 | 4. **Efficient for Large Archives**: The CLI uses the `tar-stream` library to process entries without buffering the whole file. Compressed archives use `zlib` to decompress data in chunks. 144 | 145 | ### Advantages: 146 | 147 | - **Memory Efficiency**: Only the needed parts of the archive are processed, avoiding the need to load the entire archive into memory. 148 | - **Streaming**: Files are processed as they are streamed in, improving performance on large files. 149 | - **Optimized for Compressed Archives**: Compressed TAR files (`.tar.gz`) are streamed and decompressed incrementally. 150 | 151 | ## 📸 Screenshots 152 | 153 | - **File Listing**: 154 | 155 | Screenshot 2024-09-14 at 17 48 14 156 | 157 | - **🖼️ Image file output**: 158 | 159 | Screenshot 2024-09-14 at 17 48 48 160 | 161 | - **📄 Text file output**: 162 | 163 | Screenshot 2024-09-14 at 17 48 25 164 | 165 | ## 🤝 Contributing 166 | 167 | Contributions are welcome! Feel free to fork the repository, create new handlers, fix bugs, or add new features. 168 | 169 | To contribute: 170 | 171 | 1. Fork this repository. 172 | 2. Create a new branch (`git checkout -b feature-new-handler`). 173 | 3. Add your feature or fix. 174 | 4. Push your branch and submit a pull request. 175 | 176 | ## 📜 License 177 | 178 | This project is licensed under the MIT License. 179 | -------------------------------------------------------------------------------- /bin/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const inquirer = require("inquirer"); 4 | const validUrl = require("valid-url"); 5 | const getFileService = require("../src/utils/fileProcessor"); 6 | const { getFileType } = require("../src/utils/files"); 7 | 8 | const listAndSelectFile = async (files) => { 9 | const choices = files.map((file) => ({ 10 | name: `${file.filename} (${file.fileSize || "unknown"} bytes)`, 11 | value: file, 12 | })); 13 | 14 | if (!choices.length) { 15 | console.log("No files found in the archive."); 16 | process.exit(0); 17 | } 18 | 19 | const { selectedFile } = await inquirer.prompt([ 20 | { 21 | type: "list", 22 | name: "selectedFile", 23 | message: "Select a file to display or process:", 24 | choices, 25 | }, 26 | ]); 27 | 28 | return selectedFile; 29 | }; 30 | 31 | const main = async () => { 32 | try { 33 | const [url] = process.argv.slice(2); 34 | 35 | if (!url || !validUrl.isWebUri(url)) { 36 | console.error("Usage: "); 37 | process.exit(1); 38 | } 39 | 40 | const fileType = getFileType(url); 41 | 42 | const fileService = await getFileService(fileType); 43 | 44 | const files = await fileService.listFiles(url); 45 | 46 | const selectedFile = await listAndSelectFile(files); 47 | 48 | await fileService.processFile(selectedFile, url); 49 | // 50 | } catch (error) { 51 | console.error("An error occurred during processing:", error); 52 | process.exit(1); 53 | } 54 | }; 55 | 56 | main(); 57 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zip-stream-cli", 3 | "version": "2.0.1", 4 | "description": "A tool to extract files from remote zip archives over HTTP.", 5 | "main": "src/RemoteZipFile.js", 6 | "repository": { 7 | "type": "git", 8 | "url": "git+https://github.com/agarrec-vivlio/zip-stream-cli.git" 9 | }, 10 | "author": "Alexandre-g (https://github.com/agarrec-vivlio)", 11 | "bin": { 12 | "zip-stream-cli": "./bin/index.js" 13 | }, 14 | "scripts": { 15 | "start": "node bin/index.js", 16 | "release": "release-it" 17 | }, 18 | "license": "MIT", 19 | "dependencies": { 20 | "asciify-image": "^0.1.10", 21 | "audio-decode": "^2.2.2", 22 | "audio-loader": "^1.0.3", 23 | "chalk": "^5.3.0", 24 | "cli-chart": "^0.3.1", 25 | "cli-progress": "^3.12.0", 26 | "inquirer": "^8.2.0", 27 | "js-yaml": "^4.1.0", 28 | "keypress": "^0.2.1", 29 | "node-fetch": "^2.6.1", 30 | "ora": "^8.1.0", 31 | "pdf-parse": "^1.1.1", 32 | "readline": "^1.3.0", 33 | "speaker": "^0.5.5", 34 | "stream": "^0.0.3", 35 | "tar-stream": "^3.1.7", 36 | "terminal-image": "^3.0.0", 37 | "unzipper": "^0.12.3", 38 | "valid-url": "^1.0.9", 39 | "xlsx": "^0.18.5" 40 | }, 41 | "release-it": { 42 | "$schema": "https://unpkg.com/release-it/schema/release-it.json", 43 | "github": { 44 | "release": true 45 | } 46 | }, 47 | "devDependencies": { 48 | "prettier": "^3.3.3", 49 | "release-it": "^17.6.0" 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/config/serviceMappings.json: -------------------------------------------------------------------------------- 1 | { 2 | "zip": "zipService", 3 | "tar": "tarService", 4 | "rar": "rarService", 5 | "gz": "gzipService", 6 | "t7": "t7Service" 7 | } 8 | -------------------------------------------------------------------------------- /src/config/typeMappings.json: -------------------------------------------------------------------------------- 1 | { 2 | "txt": "textHandler", 3 | "md": "textHandler", 4 | "json": "jsonYamlHandler", 5 | "yaml": "jsonYamlHandler", 6 | "yml": "jsonYamlHandler", 7 | "mp3": "audioHandler", 8 | "wav": "audioHandler", 9 | "ogg": "audioHandler", 10 | "png": "imageHandler", 11 | "jpg": "imageHandler", 12 | "jpeg": "imageHandler", 13 | "gif": "imageHandler", 14 | "bmp": "imageHandler", 15 | "pdf": "pdfHandler", 16 | "xls": "spreadsheetHandler", 17 | "xlsx": "spreadsheetHandler", 18 | "csv": "spreadsheetHandler", 19 | "zip": "archiveHandler", 20 | "tar": "archiveHandler", 21 | "gz": "archiveHandler", 22 | "js": "codeHandler", 23 | "py": "codeHandler", 24 | "java": "codeHandler", 25 | "rb": "codeHandler" 26 | } 27 | -------------------------------------------------------------------------------- /src/handlers/archiveHandler.js: -------------------------------------------------------------------------------- 1 | const unzipper = require("unzipper"); 2 | 3 | module.exports = async function handleArchiveFile(fileStream) { 4 | fileStream.pipe(unzipper.Parse()).on("entry", (entry) => { 5 | console.log(`File: ${entry.path}, Type: ${entry.type}`); 6 | entry.autodrain(); 7 | }); 8 | }; 9 | -------------------------------------------------------------------------------- /src/handlers/audioHandler.js: -------------------------------------------------------------------------------- 1 | const load = require("audio-loader"); 2 | const Speaker = require("speaker"); 3 | const readline = require("readline"); 4 | const cliProgress = require("cli-progress"); 5 | 6 | /** 7 | * Processes and plays an audio stream with real-time playback progress shown in the CLI. 8 | * The function buffers the audio stream, plays it through the speaker, and displays 9 | * a progress bar in the terminal to track playback. The process can be stopped with `Ctrl+C`. 10 | * 11 | * @param {stream.Readable} audioStream - The audio stream to process and play. 12 | * 13 | * @returns {Promise} A promise that resolves when the audio playback is completed. 14 | */ 15 | async function handleAudioStreamWithPlayPause(audioStream) { 16 | const chunks = []; 17 | 18 | for await (const chunk of audioStream) { 19 | chunks.push(chunk); 20 | } 21 | 22 | const audioBuffer = Buffer.concat(chunks); 23 | const audioData = await load(audioBuffer); 24 | 25 | const speaker = new Speaker({ 26 | channels: audioData.numberOfChannels, 27 | bitDepth: 16, 28 | sampleRate: audioData.sampleRate, 29 | }); 30 | 31 | let currentSampleIndex = 0; 32 | const totalSamples = audioData.length * audioData.numberOfChannels; 33 | 34 | const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic); 35 | progressBar.start(totalSamples, 0); 36 | 37 | const writeAudioToSpeaker = () => { 38 | const sampleData = audioData.getChannelData(0); 39 | 40 | const writeData = () => { 41 | if (currentSampleIndex < sampleData.length) { 42 | const chunk = Buffer.alloc(2); 43 | const sample = Math.max(-1, Math.min(1, sampleData[currentSampleIndex++])); 44 | chunk.writeInt16LE(sample * 32767, 0); 45 | 46 | if (!speaker.write(chunk)) { 47 | speaker.once("drain", writeData); 48 | } else { 49 | setImmediate(writeData); 50 | } 51 | 52 | progressBar.update(currentSampleIndex); 53 | } else { 54 | speaker.end(); 55 | progressBar.stop(); 56 | process.stdin.setRawMode(false); 57 | process.stdin.pause(); 58 | } 59 | }; 60 | 61 | writeData(); 62 | }; 63 | 64 | const rl = readline.createInterface({ 65 | input: process.stdin, 66 | output: process.stdout, 67 | }); 68 | 69 | readline.emitKeypressEvents(process.stdin); 70 | process.stdin.setRawMode(true); 71 | 72 | process.stdin.on("keypress", (key) => { 73 | if (key.ctrl && key.name === "c") { 74 | progressBar.stop(); 75 | speaker.end(); 76 | process.stdin.setRawMode(false); 77 | process.exit(); 78 | } 79 | }); 80 | 81 | writeAudioToSpeaker(); 82 | } 83 | 84 | module.exports = handleAudioStreamWithPlayPause; 85 | -------------------------------------------------------------------------------- /src/handlers/codeHandler.js: -------------------------------------------------------------------------------- 1 | const readline = require("readline"); 2 | const chalk = require("chalk"); 3 | 4 | // Function to handle code files and syntax-highlight them in the terminal 5 | module.exports = async function handleCodeFile(fileStream) { 6 | const rl = readline.createInterface({ 7 | input: fileStream, 8 | output: process.stdout, 9 | terminal: false, 10 | }); 11 | 12 | rl.on("line", (line) => { 13 | console.log(chalk.green(line)); // Syntax highlight lines (simple green) 14 | }); 15 | }; 16 | -------------------------------------------------------------------------------- /src/handlers/imageHandler.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const fs = require("fs"); 3 | const { pipeline } = require("stream/promises"); 4 | 5 | // Function to dynamically import terminal-image 6 | async function getTerminalImage() { 7 | const terminalImage = await import("terminal-image"); 8 | return terminalImage.default; 9 | } 10 | 11 | // Function to handle image files and display them in the terminal 12 | module.exports = async function handleImageFile(fileStream) { 13 | const imagePath = path.join(__dirname, "temp_image.jpg"); 14 | const writeStream = fs.createWriteStream(imagePath); 15 | 16 | try { 17 | await pipeline(fileStream, writeStream); 18 | const terminalImage = await getTerminalImage(); 19 | const image = await terminalImage.file(imagePath, { 20 | width: "50%", 21 | height: "50%", 22 | }); 23 | console.log(image); 24 | } catch (error) { 25 | console.error("Error processing image file:", error.message); 26 | } finally { 27 | if (fs.existsSync(imagePath)) { 28 | fs.unlinkSync(imagePath); 29 | } 30 | } 31 | }; 32 | -------------------------------------------------------------------------------- /src/handlers/jsonYamlHandler.js: -------------------------------------------------------------------------------- 1 | const yaml = require("js-yaml"); 2 | 3 | module.exports = async function handleJsonYamlFile(fileStream, extension) { 4 | const chunks = []; 5 | for await (const chunk of fileStream) { 6 | chunks.push(chunk); 7 | } 8 | const data = Buffer.concat(chunks).toString(); 9 | 10 | if (extension === "json") { 11 | console.log(JSON.stringify(JSON.parse(data), null, 2)); // Pretty-print JSON 12 | } else if (extension === "yaml" || extension === "yml") { 13 | const yamlData = yaml.load(data); 14 | console.log(yaml.dump(yamlData)); // Pretty-print YAML 15 | } 16 | }; 17 | -------------------------------------------------------------------------------- /src/handlers/pdfHandler.js: -------------------------------------------------------------------------------- 1 | const pdf = require("pdf-parse"); 2 | 3 | module.exports = async function handlePdfFile(fileStream) { 4 | const chunks = []; 5 | 6 | for await (const chunk of fileStream) { 7 | chunks.push(chunk); 8 | } 9 | 10 | const buffer = Buffer.concat(chunks); 11 | 12 | try { 13 | const data = await pdf(buffer); 14 | console.log(data.text); // Display extracted text from PDF 15 | } catch (err) { 16 | console.error("Error parsing PDF:", err.message); 17 | } 18 | }; 19 | -------------------------------------------------------------------------------- /src/handlers/spreadsheetHandler.js: -------------------------------------------------------------------------------- 1 | const xlsx = require("xlsx"); 2 | 3 | // Function to handle and display spreadsheet files (xls, xlsx, csv) 4 | module.exports = async function handleSpreadsheetFile(fileStream) { 5 | const chunks = []; 6 | 7 | for await (const chunk of fileStream) { 8 | chunks.push(chunk); 9 | } 10 | 11 | const buffer = Buffer.concat(chunks); 12 | 13 | try { 14 | const workbook = xlsx.read(buffer, { type: "buffer" }); 15 | const sheetName = workbook.SheetNames[0]; 16 | const sheet = workbook.Sheets[sheetName]; 17 | const data = xlsx.utils.sheet_to_csv(sheet); 18 | console.log(data); // Display CSV representation of the sheet 19 | } catch (err) { 20 | console.error("Error processing spreadsheet:", err.message); 21 | } 22 | }; 23 | -------------------------------------------------------------------------------- /src/handlers/textHandler.js: -------------------------------------------------------------------------------- 1 | const readline = require("readline"); 2 | 3 | // Function to handle and display text files 4 | module.exports = async function handleTextFile(fileStream) { 5 | const rl = readline.createInterface({ 6 | input: fileStream, 7 | output: process.stdout, 8 | terminal: false, 9 | }); 10 | 11 | rl.on("line", (line) => { 12 | console.log(line); // Print each line of the file 13 | }); 14 | }; 15 | -------------------------------------------------------------------------------- /src/services/gzipService.js: -------------------------------------------------------------------------------- 1 | // WIP 2 | const zlib = require("zlib"); 3 | const stream = require("stream"); 4 | const path = require("path"); 5 | const fetch = require("node-fetch"); 6 | const { fetchByteRange } = require("../utils/rangeFetcher"); 7 | const getFileHandler = require("../utils/fileHandler"); 8 | 9 | /** 10 | * Extracts the original filename from the GZIP header if it exists. 11 | * @param {Buffer} gzipHeader - The GZIP header buffer. 12 | * @returns {string} - The original filename or 'unknown' if not present. 13 | */ 14 | const extractGzipFilename = (gzipHeader) => { 15 | const FNAME_FLAG = 0x08; // The flag that indicates the presence of the original filename 16 | 17 | // Check if the FNAME flag is set (indicating that the original filename is included) 18 | if (gzipHeader[3] & FNAME_FLAG) { 19 | let offset = 10; // Filename starts after the 10-byte GZIP header 20 | let filename = ""; 21 | 22 | // Iterate through the bytes after the header until we find a null terminator (0x00) 23 | while (gzipHeader[offset] !== 0x00 && offset < gzipHeader.length) { 24 | filename += String.fromCharCode(gzipHeader[offset]); 25 | offset++; 26 | } 27 | 28 | return filename || "unknown"; // Return the filename if found 29 | } 30 | 31 | return "unknown"; // Return 'unknown' if the FNAME flag is not set 32 | }; 33 | 34 | /** 35 | * Lists files from a GZIP file. GZIP is typically used to compress a single file. 36 | * @param {string} url - The URL of the GZIP file. 37 | * @returns {Promise} - A promise that resolves with an array of file information. 38 | */ 39 | const listFiles = async (url) => { 40 | try { 41 | const headResponse = await fetch(url, { method: "HEAD" }); 42 | const contentLength = headResponse.headers.get("content-length"); 43 | 44 | const gzipHeader = await fetchByteRange(url, 0, 100); 45 | const filename = extractGzipFilename(gzipHeader); 46 | 47 | const fileInfo = createGzipFileInfo(filename, contentLength); 48 | return [fileInfo]; 49 | } catch (error) { 50 | console.error("Error listing files from GZIP:", error); 51 | throw error; 52 | } 53 | }; 54 | 55 | /** 56 | * Creates an object containing information about a GZIP file entry. 57 | * @param {string} filename - The name of the file in the GZIP archive. 58 | * @param {number} fileSize - The compressed size of the file. 59 | * @returns {Object} - An object containing file information and metadata. 60 | */ 61 | const createGzipFileInfo = (filename, fileSize) => { 62 | return { 63 | filename, 64 | fileSize, 65 | isDir: () => false, // GZIP typically compresses a single file, not directories 66 | }; 67 | }; 68 | 69 | /** 70 | * Opens a GZIP file, decompresses it, and processes it using the appropriate handler. 71 | * @param {Object} file - The file information object. 72 | * @param {string} url - The URL of the GZIP file. 73 | * @returns {Promise} - A promise that resolves when the file is processed. 74 | */ 75 | const processFile = async (file, url) => { 76 | try { 77 | const compressedStream = await fetchByteRange(url, 0, 2000000); 78 | const decompressedStream = compressedStream.pipe(zlib.createGunzip()); // Decompress the stream 79 | 80 | const handler = await getFileHandler(path.extname(file.filename).substring(1)); // Get the handler based on the file extension 81 | 82 | if (handler) { 83 | await handler(decompressedStream); // Pass the decompressed stream to the handler 84 | } else { 85 | console.error(`No handler found for file: ${file.filename}`); 86 | } 87 | } catch (error) { 88 | console.error(`Error processing GZIP file entry: ${file.filename}`, error); 89 | throw error; 90 | } 91 | }; 92 | 93 | module.exports = { listFiles, processFile }; 94 | -------------------------------------------------------------------------------- /src/services/tarService.js: -------------------------------------------------------------------------------- 1 | const zlib = require("zlib"); 2 | const tar = require("tar-stream"); 3 | const stream = require("stream"); 4 | const path = require("path"); 5 | const getFileHandler = require("../utils/fileHandler"); 6 | const { fetchByteRange } = require("../utils/rangeFetcher"); 7 | const { getIsGzipped } = require("../utils/files"); 8 | 9 | /** 10 | * Handles a .tar or .tar.gz (gzip) file by extracting and processing the specified file. 11 | * @param {Object} selectedFile - The file to be extracted. 12 | * @param {boolean} isGzipped - Indicates if the file is gzipped. 13 | * @param {string} url - The URL of the archive. 14 | * @returns {Promise} - A promise that resolves when the file is processed. 15 | */ 16 | const processFile = async (selectedFile, url) => { 17 | return new Promise(async (resolve, reject) => { 18 | try { 19 | const isGzipped = getIsGzipped(url); 20 | 21 | const extract = tar.extract(); 22 | let fileFound = false; 23 | 24 | extract.on("entry", (header, entryStream, next) => { 25 | if (fileFound) { 26 | entryStream.resume(); 27 | return next(); 28 | } 29 | 30 | const isDirectory = header.type === "directory"; 31 | if (header.name !== selectedFile.filename || isDirectory) { 32 | entryStream.resume(); 33 | return next(); 34 | } 35 | 36 | fileFound = true; 37 | const chunks = []; 38 | entryStream.on("data", (chunk) => chunks.push(chunk)); 39 | 40 | entryStream.on("end", async () => { 41 | try { 42 | const fileContent = Buffer.concat(chunks); 43 | const handler = await getFileHandler(path.extname(header.name).substring(1)); 44 | 45 | const contentStream = stream.Readable.from(fileContent); 46 | await handler(contentStream); 47 | next(); 48 | } catch (error) { 49 | console.error(`Error processing ${header.name}:`, error); 50 | next(); 51 | } 52 | }); 53 | 54 | entryStream.resume(); 55 | }); 56 | 57 | extract.on("error", (err) => { 58 | console.error("Error in TAR extraction:", err); 59 | reject(err); 60 | }); 61 | 62 | extract.on("finish", () => { 63 | if (!fileFound) { 64 | reject(new Error(`File ${selectedFile} not found in the archive.`)); 65 | } else { 66 | resolve(); 67 | } 68 | }); 69 | 70 | let tarStream; 71 | if (isGzipped) { 72 | const gzStream = await fetchByteRange(url, 0, 2000000); 73 | tarStream = gzStream.pipe(zlib.createGunzip()); 74 | } else { 75 | tarStream = await fetchByteRange(url, 0, 2000000); 76 | } 77 | 78 | tarStream.pipe(extract); 79 | } catch (err) { 80 | console.error("Error handling tar file:", err); 81 | reject(err); 82 | } 83 | }); 84 | }; 85 | 86 | /** 87 | * Lists the files in a .tar or .tar.gz archive without fetching the entire stream. 88 | * @param {string} url - The URL of the archive. 89 | * @returns {Promise} - A promise that resolves with an array of file information. 90 | */ 91 | const listFiles = async (url) => { 92 | return new Promise(async (resolve, reject) => { 93 | const isGzipped = getIsGzipped(url); 94 | 95 | const files = []; 96 | const extract = tar.extract(); 97 | 98 | extract.on("entry", (header, entryStream, next) => { 99 | const isDirectory = header.type === "directory"; 100 | const fileInfo = { 101 | filename: header.name, 102 | fileSize: header.size, 103 | isDir: isDirectory, 104 | }; 105 | files.push(fileInfo); 106 | 107 | entryStream.on("end", next); 108 | entryStream.resume(); 109 | }); 110 | 111 | extract.on("finish", () => { 112 | resolve(files); 113 | }); 114 | 115 | extract.on("error", (err) => { 116 | reject(err); 117 | }); 118 | 119 | let tarStream; 120 | if (isGzipped) { 121 | const gzStream = await fetchByteRange(url, 0, 2000000); 122 | tarStream = gzStream.pipe(zlib.createGunzip()); 123 | } else { 124 | tarStream = await fetchByteRange(url, 0, 2000000); 125 | } 126 | 127 | tarStream.pipe(extract); 128 | }); 129 | }; 130 | 131 | module.exports = { processFile, listFiles }; 132 | -------------------------------------------------------------------------------- /src/services/zipService.js: -------------------------------------------------------------------------------- 1 | const zlib = require("zlib"); 2 | const stream = require("stream"); 3 | const path = require("path"); 4 | const { getRange } = require("../utils/rangeFetcher"); 5 | const getFileHandler = require("../utils/fileHandler"); 6 | const fetch = require("node-fetch"); 7 | 8 | /** 9 | * Creates an object containing information about a ZIP file entry. 10 | * @param {string} filename - The name of the file in the ZIP archive. 11 | * @param {number} date_time - The date and time encoded in the ZIP entry. 12 | * @param {number} headerOffset - The offset of the file header in the ZIP archive. 13 | * @param {number} compressType - The compression method used for the file. 14 | * @param {number} compressSize - The compressed size of the file. 15 | * @param {number} fileSize - The uncompressed size of the file. 16 | * @returns {Object} - An object containing file information and metadata. 17 | */ 18 | const createZipFileInfo = ( 19 | filename, 20 | date_time, 21 | headerOffset, 22 | compressType, 23 | compressSize, 24 | fileSize, 25 | ) => { 26 | const getBits = (val, ...args) => 27 | args.map((n) => { 28 | const bit = val & (2 ** n - 1); 29 | val >>= n; 30 | return bit; 31 | }); 32 | 33 | const [sec, mins, hour, day, mon, year] = getBits(date_time, 5, 6, 5, 5, 4, 7); 34 | const date_timeArray = [year + 1980, mon, day, hour, mins, sec]; 35 | 36 | return { 37 | filename, 38 | headerOffset, 39 | compressType, 40 | compressSize, 41 | fileSize, 42 | date_time: date_timeArray, 43 | isDir: () => filename.endsWith("/"), 44 | }; 45 | }; 46 | 47 | /** 48 | * Fetches the Central Directory of a ZIP file. 49 | * @param {number} zipSize - The total size of the ZIP file. 50 | * @param {string} url - The URL of the ZIP file. 51 | * @returns {Promise} - A promise that resolves with the Central Directory data. 52 | */ 53 | const getCentralDirectory = async (zipSize, url) => { 54 | try { 55 | const eocdData = await getRange(url, Math.max(zipSize - 65536, 0), 65536); 56 | const eocdOffset = eocdData.lastIndexOf(Buffer.from("504b0506", "hex")); 57 | 58 | if (eocdOffset === -1) 59 | throw new Error("Cannot find the End of Central Directory (EOCD) in the ZIP file."); 60 | 61 | const cdirOffset = eocdData.readUInt32LE(eocdOffset + 16); 62 | const cdirSize = eocdData.readUInt32LE(eocdOffset + 12); 63 | 64 | return getRange(url, cdirOffset, cdirSize); 65 | } catch (error) { 66 | console.error("Error fetching Central Directory:", error); 67 | throw error; 68 | } 69 | }; 70 | 71 | /** 72 | * Lists files from the Central Directory of a ZIP file. 73 | * @param {number} zipSize - The total size of the ZIP file. 74 | * @param {string} url - The URL of the ZIP file. 75 | * @returns {Promise} - A promise that resolves with an array of file information. 76 | */ 77 | const listFiles = async (url) => { 78 | try { 79 | const headResponse = await fetch(url, { method: "HEAD" }); 80 | const contentLength = headResponse.headers.get("content-length"); 81 | 82 | const cdirData = await getCentralDirectory(contentLength, url); 83 | const files = []; 84 | 85 | let offset = 0; 86 | while (offset < cdirData.length) { 87 | const fileNameLength = cdirData.readUInt16LE(offset + 28); 88 | const extraFieldLength = cdirData.readUInt16LE(offset + 30); 89 | const fileName = cdirData.slice(offset + 46, offset + 46 + fileNameLength).toString("utf-8"); 90 | 91 | const compressSize = cdirData.readUInt32LE(offset + 20); 92 | const uncompressSize = cdirData.readUInt32LE(offset + 24); 93 | 94 | const fileInfo = createZipFileInfo( 95 | fileName, 96 | cdirData.readUInt32LE(offset + 12), 97 | cdirData.readUInt32LE(offset + 42), 98 | cdirData.readUInt16LE(offset + 10), 99 | compressSize, 100 | uncompressSize, 101 | ); 102 | 103 | files.push(fileInfo); 104 | offset += 46 + fileNameLength + extraFieldLength + cdirData.readUInt16LE(offset + 32); 105 | } 106 | 107 | return files; 108 | } catch (error) { 109 | console.error("Error listing files from ZIP:", error); 110 | throw error; 111 | } 112 | }; 113 | 114 | /** 115 | * Opens a ZIP file and processes it using the appropriate handler based on the file extension. 116 | * @param {Object} file - The file information object. 117 | * @param {string} url - The URL of the ZIP file. 118 | * @returns {Promise} - A promise that resolves when the file is processed. 119 | */ 120 | const processFile = async (file, url) => { 121 | try { 122 | const localHeaderData = await getRange(url, file.headerOffset, 30); 123 | const fileNameLength = localHeaderData.readUInt16LE(26); 124 | const extraFieldLength = localHeaderData.readUInt16LE(28); 125 | const fileDataOffset = file.headerOffset + 30 + fileNameLength + extraFieldLength; 126 | const fileData = await getRange(url, fileDataOffset, file.compressSize); 127 | 128 | if (file.compressSize > 0 && fileData.length !== file.compressSize) { 129 | throw new Error("File data size mismatch."); 130 | } 131 | 132 | let fileStream; 133 | if (file.compressType === 0) { 134 | fileStream = stream.Readable.from(fileData); 135 | } else if (file.compressType === 8) { 136 | fileStream = stream.Readable.from(fileData).pipe(zlib.createInflateRaw()); 137 | } else { 138 | throw new Error(`Unsupported compression method: ${file.compressType}`); 139 | } 140 | 141 | const extension = path.extname(file.filename).substring(1); 142 | const handler = await getFileHandler(extension); 143 | 144 | if (handler) { 145 | await handler(fileStream); 146 | } else { 147 | console.error(`No handler found for file: ${file.filename}`); 148 | } 149 | } catch (error) { 150 | console.error(`Error opening ZIP file entry: ${file.filename}`, error); 151 | throw error; 152 | } 153 | }; 154 | 155 | module.exports = { listFiles, processFile }; 156 | -------------------------------------------------------------------------------- /src/utils/fileHandler.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const fs = require("fs").promises; 3 | const typeMappingsPath = path.join(__dirname, "/../config/typeMappings.json"); 4 | 5 | /** 6 | * Loads the handler module without caching. 7 | * @param {string} handlerName - The name of the handler file (without extension). 8 | * @returns {Promise} - The loaded handler module. 9 | */ 10 | async function loadHandler(handlerName) { 11 | try { 12 | const handler = require(`../handlers/${handlerName}.js`); 13 | return handler; 14 | } catch (err) { 15 | console.error(`Error loading handler "${handlerName}": ${err.message}`); 16 | throw err; 17 | } 18 | } 19 | 20 | /** 21 | * Loads the file type mappings from the JSON file without caching. 22 | * @returns {Promise} - The parsed JSON object containing the type mappings. 23 | */ 24 | async function loadTypeMappings() { 25 | try { 26 | const typeMappingsContent = await fs.readFile(typeMappingsPath, "utf-8"); 27 | return JSON.parse(typeMappingsContent); 28 | } catch (err) { 29 | console.error(`Error loading type mappings: ${err.message}`); 30 | throw err; 31 | } 32 | } 33 | 34 | /** 35 | * Dynamically loads the file handler based on the file extension. 36 | * This system is extensible to support new file types. 37 | * 38 | * Steps to Add a New File Type: 39 | * - Create a handler for the new file type in the "handlers" folder. 40 | * - Map the file extension to the handler in "typeMappings.json". 41 | * 42 | * @param {string} extension - The file extension (e.g., "txt", "custom"). 43 | * @returns {Promise} - The loaded handler module. 44 | */ 45 | async function getFileHandler(extension) { 46 | if (!extension || typeof extension !== "string") { 47 | throw new Error("Invalid file extension."); 48 | } 49 | 50 | const normalizedExtension = extension.toLowerCase().trim(); 51 | 52 | try { 53 | const typeMappings = await loadTypeMappings(); 54 | 55 | const handlerName = typeMappings[normalizedExtension] || "textHandler"; 56 | return await loadHandler(handlerName); 57 | } catch (err) { 58 | console.error(`Error finding handler for extension "${normalizedExtension}": ${err.message}`); 59 | return await loadHandler("textHandler"); 60 | } 61 | } 62 | 63 | module.exports = getFileHandler; 64 | -------------------------------------------------------------------------------- /src/utils/fileProcessor.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const fs = require("fs").promises; 3 | const serviceMappingsPath = path.join(__dirname, "../config/serviceMappings.json"); 4 | 5 | /** 6 | * Load the appropriate service based on the file extension. 7 | * @param {string} extension - The file extension (e.g., "zip", "rar"). 8 | * @returns {Promise} - The loaded service module. 9 | */ 10 | const getFileService = async (extension) => { 11 | if (!extension || typeof extension !== "string") { 12 | throw new Error("Invalid file extension."); 13 | } 14 | 15 | const normalizedExtension = extension.toLowerCase().trim(); 16 | 17 | try { 18 | // Load the service mappings from the config file without caching 19 | const serviceMappingsContent = await fs.readFile(serviceMappingsPath, "utf-8"); 20 | const serviceMappings = JSON.parse(serviceMappingsContent); 21 | 22 | // Dynamically require the service based on the file extension 23 | const serviceName = serviceMappings[normalizedExtension]; 24 | if (!serviceName) { 25 | throw new Error(`No service found for the file extension: ${normalizedExtension}`); 26 | } 27 | 28 | return require(`../services/${serviceName}.js`); 29 | } catch (err) { 30 | console.error(`Error loading service for extension "${normalizedExtension}": ${err.message}`); 31 | throw err; 32 | } 33 | }; 34 | 35 | module.exports = getFileService; 36 | -------------------------------------------------------------------------------- /src/utils/files.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | 3 | /** 4 | * Extracts and returns the file extension (type) from a given URL. 5 | * @param {string} url - The URL or file path to extract the file type from. 6 | * @returns {string} The file extension (without the dot) in lowercase. 7 | */ 8 | const getFileType = (url) => { 9 | return path.extname(url).toLowerCase().substring(1); 10 | }; 11 | 12 | /** 13 | * Determines if the file is gzipped by checking if the file extension is ".gz". 14 | * @param {string} url - The URL or file path to check. 15 | * @returns {boolean} True if the file has a ".gz" extension, otherwise false. 16 | */ 17 | const getIsGzipped = (url) => { 18 | return path.extname(url).toLowerCase() === ".gz"; 19 | }; 20 | 21 | module.exports = { getFileType, getIsGzipped }; 22 | -------------------------------------------------------------------------------- /src/utils/rangeFetcher.js: -------------------------------------------------------------------------------- 1 | const fetch = require("node-fetch"); 2 | 3 | /** 4 | * Fetches a specific byte range from a given URL. 5 | * @param {string} url - The URL to fetch the byte range from. 6 | * @param {number} start - The start byte position of the range. 7 | * @param {number} end - The end byte position of the range. 8 | * @returns {Promise} - A promise that resolves with the fetched byte range as a stream. 9 | * @throws {Error} - Throws an error if the fetch request fails. 10 | */ 11 | const fetchByteRange = async (url, start, end) => { 12 | const response = await fetch(url, { 13 | headers: { Range: `bytes=${start}-${end}` }, 14 | }); 15 | 16 | if (!response.ok) { 17 | throw new Error(`Failed to fetch range ${start}-${end}`); 18 | } 19 | 20 | return response.body; 21 | }; 22 | 23 | /** 24 | * Fetches a specific byte range as a Buffer. 25 | * @param {string} url - The URL to fetch the byte range from. 26 | * @param {number} start - The start byte position of the range. 27 | * @param {number} length - The length of the byte range to fetch. 28 | * @returns {Promise} - A promise that resolves with the fetched byte range as a Buffer. 29 | */ 30 | const getRange = async (url, start, length) => { 31 | const response = await fetch(url, { 32 | headers: { Range: `bytes=${start}-${start + length - 1}` }, 33 | }); 34 | 35 | if (!response.ok) { 36 | throw new Error(`Failed to fetch range ${start}-${start + length - 1}`); 37 | } 38 | 39 | return Buffer.from(await response.arrayBuffer()); 40 | }; 41 | 42 | module.exports = { fetchByteRange, getRange }; 43 | --------------------------------------------------------------------------------