├── .gitignore
├── package.json
├── index.mjs
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 |   "dependencies": {
3 |     "website-scraper": "^5.3.1"
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/index.mjs:
--------------------------------------------------------------------------------
 1 | import scrape from 'website-scraper';
 2 | const websiteUrl = 'https://bahrul-rozak.vercel.app/';
 3 | 
 4 | scrape({
 5 |     urls: [websiteUrl],
 6 |     urlFilter: function (url) {
 7 |         return url.indexOf(websiteUrl) === 0;
 8 |     },
 9 |     recursive: true,
10 |     maxDepth: 50,
11 |     prettifyUrls: true,
12 |     filenameGenerator: 'bySiteStructure',
13 |     directory: './result'
14 | }).then((data) => {
15 |     console.log("Entire website succesfully downloaded");
16 | }).catch((err) => {
17 |     console.log("An error ocurred", err);
18 | });


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # URL to Code 
 4 | 
 5 | [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/your-username/your-repo-name/blob/main/LICENSE)
 6 | [![GitHub issues](https://img.shields.io/github/issues/Bahrul-Rozak/url-to-code)](https://github.com/Bahrul-Rozak/url-to-code/issues)
 7 | [![GitHub stars](https://img.shields.io/github/stars/Bahrul-Rozak/url-to-code)](https://github.com/Bahrul-Rozak/url-to-code/stargazers)
 8 | [![GitHub forks](https://img.shields.io/github/forks/your-username/your-repo-name)](https://github.com/your-username/your-repo-name/network)
 9 | 
10 | A simple Node.js web scraper using [website-scraper](https://www.npmjs.com/package/website-scraper) to download an entire website.
11 | 
12 | ## Getting Started
13 | 
14 | ### Prerequisites
15 | 
16 | Make sure you have [Node.js](https://nodejs.org/) installed on your machine.
17 | 
18 | ### Installation
19 | 
20 | 1. Clone the repository:
21 | 
22 |     ```bash
23 |     git clone https://github.com/Bahrul-Rozak/url-to-code.git
24 |     ```
25 | 
26 | 2. Navigate to the project directory:
27 | 
28 |     ```bash
29 |     cd your-repo-name
30 |     ```
31 | 
32 | 3. Install dependencies:
33 | 
34 |     ```bash
35 |     npm install
36 |     ```
37 | 
38 | ### Usage
39 | 
40 | 1. Open `index.js` in your preferred code editor.
41 | 
42 | 2. Set the `websiteUrl` variable to the URL of the website you want to scrape.
43 | 
44 |     ```javascript
45 |     const websiteUrl = 'https://example.com';
46 |     ```
47 | 
48 | 3. Customize other options if needed (e.g., `maxDepth`, `directory`, etc.).
49 | 
50 | 4. Run the scraper:
51 | 
52 |     ```bash
53 |     node index.mjs
54 |     ```
55 | 
56 | 5. Check the `./result` directory for the downloaded website.
57 | 
58 | ## Configuration
59 | 
60 | - `urls`: An array of URLs to scrape.
61 | - `urlFilter`: A function to filter URLs. The example filters URLs that start with the specified `websiteUrl`.
62 | - `recursive`: If `true`, the scraper will follow links recursively.
63 | - `maxDepth`: Maximum recursion depth.
64 | - `prettifyUrls`: If `true`, URLs will be prettified.
65 | - `filenameGenerator`: File naming strategy, set to `'bySiteStructure'` in the example.
66 | - `directory`: Output directory for the downloaded website.
67 | 
68 | 
69 | ## Acknowledgments
70 | 
71 | - [website-scraper](https://www.npmjs.com/package/website-scraper) for providing an easy-to-use web scraping library.
72 | 
73 | Happy downloading! 🕸️
74 | 


--------------------------------------------------------------------------------