├── assets └── demonSlayer.png ├── src ├── constant │ ├── query │ │ ├── EXTENSIONS.ts │ │ ├── GOOGLE_PARAMS.ts │ │ ├── GOOGLE_COLORS.ts │ │ └── GOOGLE_QUERY.ts │ └── GOOGLE_CONSTANT.ts ├── types │ ├── results.ts │ ├── imageResultItem.ts │ ├── config.ts │ └── googleQuery.ts ├── core │ ├── getSearchFromGoogleUrl.ts │ ├── limitResultSize.ts │ ├── constructGoogleUrl.ts │ ├── verifyGoogleQuery.ts │ ├── inverseGetGoogleImageURL.ts │ ├── parseGoogleImages.ts │ └── buildGoogleDork.ts ├── utils │ └── utils.ts └── index.ts ├── .prettierrc ├── jest.config.ts ├── example ├── index.js ├── inverseUrl.js └── inverseUpload.js ├── test ├── inverseUrl.test.ts ├── proxy.test.ts ├── excludeWords.test.ts ├── domains.test.ts ├── excludeDomains.test.ts ├── inverseUpload.test.ts ├── limit.test.ts ├── urlMatch.test.ts ├── filterByTitles.test.ts └── query.test.ts ├── tsconfig.json ├── .eslintrc.js ├── .github ├── ISSUE_TEMPLATE │ ├── HELP_REQUEST.md │ ├── FEATURE_REQUEST.md │ └── BUG_REPORT.md ├── pull_request_template.md └── workflows │ └── test-prettier-lint.yml ├── LICENSE ├── CHANGELOG.md ├── package.json ├── .gitignore └── README.md /assets/demonSlayer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yoannchb-pro/google-img-scrap/HEAD/assets/demonSlayer.png -------------------------------------------------------------------------------- /src/constant/query/EXTENSIONS.ts: -------------------------------------------------------------------------------- 1 | export const EXTENSIONS = ['jpg', 'gif', 'bmp', 'png', 'svg', 'webp', 'ico', 'raw']; 2 | -------------------------------------------------------------------------------- /src/types/results.ts: -------------------------------------------------------------------------------- 1 | import ImageResultItem from './imageResultItem'; 2 | 3 | type Results = { 4 | url: string; 5 | search: string; 6 | result: ImageResultItem[]; 7 | }; 8 | 9 | export default Results; 10 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": true, 3 | "singleQuote": true, 4 | "tabWidth": 2, 5 | "useTabs": false, 6 | "printWidth": 100, 7 | "trailingComma": "none", 8 | "arrowParens": "avoid", 9 | "endOfLine": "auto" 10 | } 11 | -------------------------------------------------------------------------------- /src/types/imageResultItem.ts: -------------------------------------------------------------------------------- 1 | type ImageResultItem = { 2 | id: string; 3 | title: string; 4 | originalUrl: string; 5 | url: string; 6 | height: number; 7 | width: number; 8 | }; 9 | 10 | export default ImageResultItem; 11 | -------------------------------------------------------------------------------- /jest.config.ts: -------------------------------------------------------------------------------- 1 | import type { Config } from '@jest/types'; 2 | 3 | const config: Config.InitialOptions = { 4 | preset: 'ts-jest', 5 | rootDir: 'test', 6 | testEnvironment: 'node', 7 | verbose: true 8 | }; 9 | 10 | export default config; 11 | -------------------------------------------------------------------------------- /example/index.js: -------------------------------------------------------------------------------- 1 | const { GOOGLE_IMG_SCRAP } = require('../dist'); 2 | 3 | (async function () { 4 | const test = await GOOGLE_IMG_SCRAP({ 5 | search: 'cats', 6 | limit: 5 7 | }); 8 | 9 | console.log(test, test.result.length); 10 | })(); 11 | -------------------------------------------------------------------------------- /src/constant/query/GOOGLE_PARAMS.ts: -------------------------------------------------------------------------------- 1 | const GOOGLE_PARAMS = { 2 | SIZE: 'isz', 3 | COLOR: 'ic', 4 | SPECIFIC_COLOR: 'Cisc', 5 | TYPE: 'itp', 6 | DATE: 'qdr', 7 | LICENCE: 'il', 8 | IMAGE_EXTENSION: 'ift' 9 | }; 10 | 11 | export default GOOGLE_PARAMS; 12 | -------------------------------------------------------------------------------- /src/constant/query/GOOGLE_COLORS.ts: -------------------------------------------------------------------------------- 1 | const COLORS = [ 2 | 'red', 3 | 'blue', 4 | 'purple', 5 | 'orange', 6 | 'yellow', 7 | 'green', 8 | 'teal', 9 | 'pink', 10 | 'white', 11 | 'gray', 12 | 'black', 13 | 'brown' 14 | ]; 15 | 16 | export default COLORS; 17 | -------------------------------------------------------------------------------- /example/inverseUrl.js: -------------------------------------------------------------------------------- 1 | const { GOOGLE_IMG_INVERSE_ENGINE_URL } = require('../dist'); 2 | 3 | (async function () { 4 | const test = await GOOGLE_IMG_INVERSE_ENGINE_URL( 5 | 'https://i.natgeofe.com/n/548467d8-c5f1-4551-9f58-6817a8d2c45e/NationalGeographic_2572187_3x4.jpg' 6 | ); 7 | 8 | console.log(test, test.result.length); 9 | })(); 10 | -------------------------------------------------------------------------------- /src/core/getSearchFromGoogleUrl.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Get the search question of a google url 3 | * @param url 4 | * @returns 5 | */ 6 | function getSearchFromGoogleUrl(url: string): string { 7 | const search = /search\?q=([^&]+)/gi.exec(url)[1].split('+').join(' '); 8 | return decodeURIComponent(search); 9 | } 10 | 11 | export default getSearchFromGoogleUrl; 12 | -------------------------------------------------------------------------------- /example/inverseUpload.js: -------------------------------------------------------------------------------- 1 | const { GOOGLE_IMG_INVERSE_ENGINE_UPLOAD } = require('../dist'); 2 | const fs = require('fs'); 3 | const path = require('path'); 4 | 5 | (async function () { 6 | const image = fs.readFileSync(path.resolve(__dirname, '../assets/demonSlayer.png')); 7 | const test = await GOOGLE_IMG_INVERSE_ENGINE_UPLOAD(image); 8 | 9 | console.log(test, test.result.length); 10 | })(); 11 | -------------------------------------------------------------------------------- /test/inverseUrl.test.ts: -------------------------------------------------------------------------------- 1 | import { GOOGLE_IMG_INVERSE_ENGINE_URL } from '../dist'; 2 | 3 | describe('Inverse URL test', function () { 4 | it('Should return 5 results', async function () { 5 | const { result } = await GOOGLE_IMG_INVERSE_ENGINE_URL( 6 | 'https://i.natgeofe.com/n/548467d8-c5f1-4551-9f58-6817a8d2c45e/NationalGeographic_2572187_3x4.jpg', 7 | { limit: 5 } 8 | ); 9 | expect(result.length).toBe(5); 10 | }); 11 | }); 12 | -------------------------------------------------------------------------------- /test/proxy.test.ts: -------------------------------------------------------------------------------- 1 | import { GOOGLE_IMG_SCRAP } from '../dist'; 2 | 3 | describe('Proxy test', function () { 4 | it('Proxy for axios', async function () { 5 | const { result } = await GOOGLE_IMG_SCRAP({ 6 | search: 'cats', 7 | //change proxy if not working 8 | proxy: { 9 | protocol: 'https', 10 | host: '201.229.250.19', 11 | port: 80 12 | } 13 | }); 14 | expect(result.length).toBeGreaterThan(0); 15 | }); 16 | }); 17 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "CommonJS", 4 | "esModuleInterop": true, 5 | "allowSyntheticDefaultImports": true, 6 | "target": "ES6", 7 | "noImplicitAny": true, 8 | "moduleResolution": "node", 9 | "sourceMap": true, 10 | "outDir": "dist", 11 | "resolveJsonModule": true, 12 | "types": ["node", "jest"], 13 | "declaration": true, 14 | "typeRoots": ["node_modules/@types"] 15 | }, 16 | "include": ["src/**/*"] 17 | } 18 | -------------------------------------------------------------------------------- /test/excludeWords.test.ts: -------------------------------------------------------------------------------- 1 | import { GOOGLE_IMG_SCRAP } from '../dist'; 2 | 3 | describe('Exclude words test', function () { 4 | it("All title shouldn't have a specific word", async function () { 5 | const excludeWords = ['white', 'black']; 6 | const { result } = await GOOGLE_IMG_SCRAP({ 7 | search: 'cats', 8 | excludeWords 9 | }); 10 | for (const img of result) { 11 | expect(excludeWords.some(word => img.title.includes(word))).toBeFalsy(); 12 | } 13 | }); 14 | }); 15 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | root: true, 3 | parser: '@typescript-eslint/parser', 4 | plugins: ['@typescript-eslint'], 5 | extends: [ 6 | 'eslint:recommended', 7 | 'plugin:@typescript-eslint/eslint-recommended', 8 | 'plugin:@typescript-eslint/recommended', 9 | 'prettier' 10 | ], 11 | ignorePatterns: ['node_modules/', 'dist'], 12 | overrides: [ 13 | { 14 | files: ['src/**/*'], 15 | rules: { 16 | '@typescript-eslint/no-explicit-any': 'off' 17 | } 18 | } 19 | ] 20 | }; 21 | -------------------------------------------------------------------------------- /test/domains.test.ts: -------------------------------------------------------------------------------- 1 | import { GOOGLE_IMG_SCRAP } from '../dist'; 2 | 3 | describe('Domains test', function () { 4 | it('All result should be one of those specific domains', async function () { 5 | const domains = ['https://fr.wikipedia.org/', 'https://wall.alphacoders.com/']; 6 | const { result } = await GOOGLE_IMG_SCRAP({ 7 | search: 'cats', 8 | domains 9 | }); 10 | for (const img of result) { 11 | expect(domains.some(domain => img.url.includes(domain))).toBeTruthy(); 12 | } 13 | }); 14 | }); 15 | -------------------------------------------------------------------------------- /test/excludeDomains.test.ts: -------------------------------------------------------------------------------- 1 | import { GOOGLE_IMG_SCRAP } from '../dist'; 2 | 3 | describe('Exclude domains test', function () { 4 | it("All result shouldn't includes those domains", async function () { 5 | const excludeDomains = ['https://fr.wikipedia.org/', 'https://wall.alphacoders.com/']; 6 | const { result } = await GOOGLE_IMG_SCRAP({ 7 | search: 'cats', 8 | excludeDomains 9 | }); 10 | for (const img of result) { 11 | expect(excludeDomains.some(domain => img.url.includes(domain))).toBeFalsy(); 12 | } 13 | }); 14 | }); 15 | -------------------------------------------------------------------------------- /test/inverseUpload.test.ts: -------------------------------------------------------------------------------- 1 | import { GOOGLE_IMG_INVERSE_ENGINE_UPLOAD } from '../dist'; 2 | import fs from 'fs'; 3 | import path from 'path'; 4 | 5 | // Not working anymore for now :( 6 | describe.skip('Inverse Upload test', function () { 7 | it('Should return 5 results', async function () { 8 | const imageBuffer = fs.readFileSync(path.resolve(__dirname, '../assets/demonSlayer.png')); 9 | const { result } = await GOOGLE_IMG_INVERSE_ENGINE_UPLOAD(imageBuffer, { 10 | limit: 5 11 | }); 12 | expect(result.length).toBe(5); 13 | }); 14 | }); 15 | -------------------------------------------------------------------------------- /test/limit.test.ts: -------------------------------------------------------------------------------- 1 | import { GOOGLE_IMG_SCRAP } from '../dist'; 2 | 3 | describe('Limit test', function () { 4 | it('Should return only 5 elements', async function () { 5 | const { result } = await GOOGLE_IMG_SCRAP({ 6 | search: 'cats', 7 | limit: 5 8 | }); 9 | expect(result.length).toBe(5); 10 | }); 11 | it('Should return all elements', async function () { 12 | const { result } = await GOOGLE_IMG_SCRAP({ 13 | search: 'cats', 14 | limit: 0 15 | }); 16 | expect(result.length).toBeGreaterThan(0); 17 | }); 18 | }); 19 | -------------------------------------------------------------------------------- /src/types/config.ts: -------------------------------------------------------------------------------- 1 | import { AxiosProxyConfig } from 'axios'; 2 | 3 | type Config = { 4 | search: string; 5 | proxy?: AxiosProxyConfig; 6 | limit?: number; 7 | query?: { 8 | TYPE?: string; 9 | DATE?: string; 10 | COLOR?: string; 11 | SIZE?: string; 12 | LICENCE?: string; 13 | EXTENSION?: string; 14 | }; 15 | urlMatch?: string[][]; 16 | domains?: string[]; 17 | excludeWords?: string[]; 18 | custom?: string; 19 | safeSearch?: boolean; 20 | excludeDomains?: string[]; 21 | filterByTitles?: string[][]; 22 | }; 23 | 24 | export default Config; 25 | -------------------------------------------------------------------------------- /src/core/limitResultSize.ts: -------------------------------------------------------------------------------- 1 | import ImageResultItem from '../types/imageResultItem'; 2 | 3 | /** 4 | * Limit the result size 5 | * @param config 6 | * @param imagesItems 7 | * @returns 8 | */ 9 | function limitResultSize(limit: number, imagesItems: ImageResultItem[]): ImageResultItem[] { 10 | let slicedResult: ImageResultItem[] = []; 11 | 12 | if (limit && limit > 0 && imagesItems.length > limit) { 13 | slicedResult = imagesItems.slice(0, limit); 14 | } 15 | 16 | return slicedResult.length > 0 ? slicedResult : imagesItems; 17 | } 18 | 19 | export default limitResultSize; 20 | -------------------------------------------------------------------------------- /src/constant/GOOGLE_CONSTANT.ts: -------------------------------------------------------------------------------- 1 | const GOOGLE_CONSTANT = { 2 | url: 'https://www.google.com/search', 3 | inverse: { 4 | base: 'https://lens.google.com/', 5 | upload: 'https://lens.google.com/v3/upload', 6 | url: 'https://lens.google.com/uploadbyurl?url=' 7 | }, 8 | queryParam: 'tbs', 9 | forceGoogleImage: { 10 | tbm: 'isch' //needed to search on google image instead of google 11 | }, 12 | headers: { 13 | 'User-Agent': 14 | 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36' 15 | } 16 | }; 17 | 18 | export default GOOGLE_CONSTANT; 19 | -------------------------------------------------------------------------------- /test/urlMatch.test.ts: -------------------------------------------------------------------------------- 1 | import { GOOGLE_IMG_SCRAP } from '../dist'; 2 | 3 | describe('Url match test', function () { 4 | it('All url should match a specific regex', async function () { 5 | //will build something like this "(cdn and wikipedia) or (cdn istockphoto)" 6 | const urlMatch = [ 7 | ['cdn', 'wikipedia'], 8 | ['cdn', 'istockphoto'] 9 | ]; 10 | const { result } = await GOOGLE_IMG_SCRAP({ 11 | search: 'cats', 12 | urlMatch 13 | }); 14 | 15 | for (const img of result) { 16 | expect(urlMatch.flat().some(word => img.url.includes(word))).toBeTruthy(); 17 | } 18 | }); 19 | }); 20 | -------------------------------------------------------------------------------- /test/filterByTitles.test.ts: -------------------------------------------------------------------------------- 1 | import { GOOGLE_IMG_SCRAP } from '../dist'; 2 | 3 | describe('Filter by titles test', function () { 4 | it("All title shouldn't have a specific word", async function () { 5 | //will build something like this "(draw and white) or (albino and white)" 6 | const filterByTitles = [ 7 | ['draw', 'white'], 8 | ['albino', 'white'] 9 | ]; 10 | const { result } = await GOOGLE_IMG_SCRAP({ 11 | search: 'cats', 12 | filterByTitles 13 | }); 14 | for (const img of result) { 15 | expect(filterByTitles.flat().some(word => img.title.includes(word))).toBeTruthy(); 16 | } 17 | }); 18 | }); 19 | -------------------------------------------------------------------------------- /test/query.test.ts: -------------------------------------------------------------------------------- 1 | import { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } from '../dist'; 2 | 3 | describe('Query test', function () { 4 | it('Test query params like safeSearch, custom query and query object', async function () { 5 | const { url, result } = await GOOGLE_IMG_SCRAP({ 6 | search: 'cats', 7 | safeSearch: true, 8 | custom: 'test=true', 9 | query: { 10 | EXTENSION: GOOGLE_QUERY.EXTENSION.PNG, 11 | TYPE: GOOGLE_QUERY.TYPE.DRAW 12 | } 13 | }); 14 | expect(result[0].url).toMatch('png'); 15 | expect(url).toMatch('safe=active'); //safe search 16 | expect(url).toMatch('test=true'); //custom query 17 | }); 18 | }); 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/HELP_REQUEST.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Help Request 3 | about: Use this template to request help or support. 4 | title: '[HELP]' 5 | labels: help 6 | assignees: '' 7 | --- 8 | 9 | ## Help Request 10 | 11 | **Issue Summary** 12 | 13 | [Summarize the issue you are facing concisely.] 14 | 15 | **Describe the problem** 16 | 17 | [Describe what is the problem] 18 | 19 | **Environment** 20 | 21 | - Operating System: [Your OS] 22 | - Browser: [Your Browser] 23 | - Version: [Version Number] 24 | - Any other relevant information 25 | 26 | ### Screenshots 27 | 28 | [Include any relevant screenshots if they can help illustrate the issue.] 29 | 30 | ### Additional Information 31 | 32 | [Any additional information that may be helpful in diagnosing the issue.] 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/FEATURE_REQUEST.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: Suggest a new feature or enhancement for the project. 4 | title: '[FEATURE]' 5 | labels: 'enhancement' 6 | assignees: '' 7 | --- 8 | 9 | ## Feature Request 10 | 11 | **Description of the Feature** 12 | A clear and concise description of the feature you'd like to see added. 13 | 14 | **Motivation** 15 | An explanation of the motivation behind this feature. How would it improve the user experience or utility of the software? 16 | 17 | **Screenshot (Optional)** 18 | If possible, add a screenshot or illustration of the feature you're proposing. 19 | 20 | **Additional Context** 21 | Add any other context or information about the feature that you think is relevant. 22 | 23 | **Additional Information** 24 | Add any other information you think is relevant. 25 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Pull Request 2 | 3 | **Description** 4 | A brief description of the changes in this pull request. 5 | 6 | **Related Issue** 7 | 8 | - Fixes # (issue number) 9 | - Addresses # (issue number) 10 | 11 | **Changes Made** 12 | Describe the changes you've made. Provide a high-level overview of what has been done in this pull request. 13 | 14 | **Screenshots (if applicable)** 15 | Add screenshots or GIFs to demonstrate the changes, if applicable. 16 | 17 | **Checklist** 18 | 19 | - [ ] I have tested the changes locally. 20 | - [ ] I have updated the documentation accordingly. 21 | - [ ] I have added necessary comments to the code. 22 | - [ ] All tests pass successfully. 23 | 24 | **Additional Notes** 25 | Add any additional notes or comments about the pull request here. 26 | 27 | **Reviewer(s)** 28 | @username1, @username2 29 | -------------------------------------------------------------------------------- /src/core/constructGoogleUrl.ts: -------------------------------------------------------------------------------- 1 | import Config from '../types/config'; 2 | import GOOGLE_CONSTANT from '../constant/GOOGLE_CONSTANT'; 3 | import { buildQuery } from '../utils/utils'; 4 | import buildGoogleDorks from './buildGoogleDork'; 5 | 6 | /** 7 | * Construct google url for scrapping 8 | * @param config 9 | * @returns 10 | */ 11 | function constructGoogleUrl(config: Config): string { 12 | const GOOGLE_DORK = buildGoogleDorks(config); 13 | const CUSTOM_PARAM = config.custom ? `&${config.custom}` : ''; 14 | const SAFE_SEARCH = config.safeSearch ? `&safe=active` : ''; 15 | 16 | const QUERY = Object.assign(GOOGLE_CONSTANT.forceGoogleImage, { 17 | [GOOGLE_CONSTANT.queryParam]: Object.values(config.query || {}).join(','), 18 | q: GOOGLE_DORK 19 | }); 20 | 21 | return GOOGLE_CONSTANT.url + buildQuery(QUERY) + CUSTOM_PARAM + SAFE_SEARCH; 22 | } 23 | 24 | export default constructGoogleUrl; 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/BUG_REPORT.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Report a bug or issue with the software. 4 | title: '[BUG]' 5 | labels: 'bug' 6 | assignees: '' 7 | --- 8 | 9 | ## Bug Report 10 | 11 | **Description of the Bug** 12 | A clear and concise description of the bug. 13 | 14 | **Steps to Reproduce** 15 | 16 | 1. Step 1 17 | 2. Step 2 18 | 3. ... 19 | 20 | **Expected Behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Actual Behavior** 24 | A clear and concise description of what actually happens. 25 | 26 | **Screenshots** 27 | If applicable, add screenshots to help explain your problem. 28 | 29 | **Tried Solutions** 30 | Have you tried anything to solve the problem? If yes, what? 31 | 32 | **Additional Context** 33 | Add any other context about the problem here. 34 | 35 | **Additional Information** 36 | Add any other information you think is relevant. 37 | -------------------------------------------------------------------------------- /src/types/googleQuery.ts: -------------------------------------------------------------------------------- 1 | type GoogleQuery = { 2 | SIZE: { 3 | LARGE: string; 4 | MEDIUM: string; 5 | ICON: string; 6 | }; 7 | 8 | COLOR: { 9 | BLACK_AND_WHITE: string; 10 | TRANSPARENT: string; 11 | RED: string; 12 | BLUE: string; 13 | PURPLE: string; 14 | ORANGE: string; 15 | YELLOW: string; 16 | GREEN: string; 17 | TEAL: string; 18 | PINK: string; 19 | WHITE: string; 20 | GRAY: string; 21 | BLACK: string; 22 | BROWN: string; 23 | }; 24 | 25 | TYPE: { 26 | CLIPART: string; 27 | DRAW: string; 28 | GIF: string; 29 | }; 30 | 31 | EXTENSION: { 32 | JPG: 'jpg'; 33 | GIF: 'gif'; 34 | BMP: 'bmp'; 35 | PNG: 'png'; 36 | SVG: 'svg'; 37 | WEBP: 'webp'; 38 | ICO: 'ico'; 39 | RAW: 'raw'; 40 | }; 41 | 42 | DATE: { 43 | DAY: string; 44 | WEEK: string; 45 | MONTH: string; 46 | YEAR: string; 47 | }; 48 | 49 | LICENCE: { 50 | CREATIVE_COMMONS: string; 51 | COMMERCIAL_AND_OTHER: string; 52 | }; 53 | }; 54 | 55 | export default GoogleQuery; 56 | -------------------------------------------------------------------------------- /src/utils/utils.ts: -------------------------------------------------------------------------------- 1 | import { EXTENSIONS } from '../constant/query/EXTENSIONS'; 2 | 3 | /** 4 | * Build the query for url 5 | * @param query 6 | * @returns 7 | */ 8 | function buildQuery(query: Record) { 9 | const result = []; 10 | const params = Object.keys(query); 11 | 12 | for (const param of params) { 13 | const queryName = param; 14 | result.push(`${queryName}=${encodeURIComponent(query[param])}`); 15 | } 16 | 17 | return '?' + result.join('&'); 18 | } 19 | 20 | /** 21 | * Transform unicode to char for more visibility and fix invaldie url 22 | * @param text 23 | * @returns 24 | */ 25 | function unicodeToChar(text: string) { 26 | return text.replace(/\\u[\dA-F]{4}/gi, function (match) { 27 | return String.fromCharCode(parseInt(match.replace(/\\u/g, ''), 16)); 28 | }); 29 | } 30 | 31 | /** 32 | * Verify the url is an image 33 | * @param content 34 | * @returns 35 | */ 36 | function isImage(content = '') { 37 | return EXTENSIONS.some(extension => content.includes(extension)); 38 | } 39 | 40 | export { buildQuery, unicodeToChar, isImage }; 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 yoannchb-pro 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/test-prettier-lint.yml: -------------------------------------------------------------------------------- 1 | name: Test, Lint and Format Code 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | jobs: 8 | test-lint-and-format: 9 | runs-on: ubuntu-latest 10 | 11 | permissions: 12 | contents: write 13 | 14 | steps: 15 | - name: Checkout repository 16 | uses: actions/checkout@v4 17 | 18 | - name: Set up Node.js 19 | uses: actions/setup-node@v4 20 | with: 21 | node-version: '18' 22 | 23 | - name: Clear cache and Install dependencies 24 | run: | 25 | rm -rf node_modules package-lock.json 26 | npm install 27 | 28 | - name: Run Build 29 | run: npm run fastBuild 30 | 31 | - name: Run Tests 32 | run: npm run test 33 | 34 | - name: Run Prettier 35 | run: npm run prettier 36 | 37 | - name: Run Linter 38 | run: npm run lint 39 | 40 | - name: Commit changes 41 | uses: stefanzweifel/git-auto-commit-action@v5 42 | with: 43 | commit_message: 'WORKFLOW: Apply code formatting' 44 | branch: ${{ github.head_ref }} 45 | file_pattern: '.' 46 | -------------------------------------------------------------------------------- /src/core/verifyGoogleQuery.ts: -------------------------------------------------------------------------------- 1 | import Config from '../types/config'; 2 | import GoogleQuery from '../types/googleQuery'; 3 | import GOOGLE_QUERY from '../constant/query/GOOGLE_QUERY'; 4 | 5 | /** 6 | * Validation of the query passed as argument 7 | * @param config 8 | */ 9 | function verifyGoogleQuery(config: Config) { 10 | if (config.excludeDomains && config.domains) 11 | throw new Error("Can not set 'excludeDomains' and 'domains' as same times"); 12 | 13 | if (!config.search || config.search.trim() == '') throw new Error("'search' can not be empty"); 14 | 15 | if (config.query) { 16 | const queryToVerify = Object.keys(GOOGLE_QUERY); 17 | 18 | for (const key of Object.keys(config.query) as (keyof GoogleQuery)[]) { 19 | if (!queryToVerify.includes(key)) throw new Error(`Invalide query name '${key}'`); 20 | 21 | const VALUES = Object.values(GOOGLE_QUERY[key]); 22 | const ACTUAL_VALUE = config.query[key] ?? ''; 23 | if (!VALUES.includes(ACTUAL_VALUE)) 24 | throw new Error(`'${ACTUAL_VALUE}' is not a valide argument for the query : '${key}'`); 25 | } 26 | } 27 | } 28 | 29 | export default verifyGoogleQuery; 30 | -------------------------------------------------------------------------------- /src/core/inverseGetGoogleImageURL.ts: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import GOOGLE_CONSTANT from '../constant/GOOGLE_CONSTANT'; 3 | import { unicodeToChar } from '../utils/utils'; 4 | import FormData from 'form-data'; 5 | import { AxiosProxyConfig } from 'axios'; 6 | 7 | /** 8 | * Return google images url from lens.google.com inverse search image engine 9 | * @param url 10 | * @param extraHeader 11 | * @returns 12 | */ 13 | async function inverseGetGoogleImageURL( 14 | url: string, 15 | proxy?: AxiosProxyConfig, 16 | formData?: FormData 17 | ): Promise { 18 | if (formData) { 19 | //we send the image buffer to google 20 | const data = ( 21 | await axios.post(url, formData, { 22 | headers: { 23 | ...GOOGLE_CONSTANT.headers, 24 | ...formData.getHeaders() 25 | }, 26 | ...(proxy ?? {}) 27 | }) 28 | ).data; 29 | //we need to get the google lens url generated 30 | url = data.match(/https:\/\/lens.google.com\/[^"]+/gi)[0]; 31 | } 32 | 33 | //we scrap google lens to get the original google image search 34 | const data = ( 35 | await axios.get(url, { 36 | headers: GOOGLE_CONSTANT.headers, 37 | ...(proxy ?? {}) 38 | }) 39 | ).data; 40 | 41 | const urlStr = data.match(/https:\/\/www.google.com\/search\?q[^"]+/gi)[0]; 42 | 43 | return ( 44 | unicodeToChar(urlStr) + '&' + Object.entries(GOOGLE_CONSTANT.forceGoogleImage).flat().join('=') 45 | ); 46 | } 47 | 48 | export default inverseGetGoogleImageURL; 49 | -------------------------------------------------------------------------------- /src/constant/query/GOOGLE_QUERY.ts: -------------------------------------------------------------------------------- 1 | import GOOGLE_PARAMS from './GOOGLE_PARAMS'; 2 | import COLORS from './GOOGLE_COLORS'; 3 | import GoogleQuery from '../../types/googleQuery'; 4 | import { EXTENSIONS } from './EXTENSIONS'; 5 | 6 | const GOOGLE_QUERY = { 7 | SIZE: { 8 | LARGE: GOOGLE_PARAMS.SIZE + ':l', 9 | MEDIUM: GOOGLE_PARAMS.SIZE + ':m', 10 | ICON: GOOGLE_PARAMS.SIZE + ':i' 11 | }, 12 | 13 | COLOR: { 14 | BLACK_AND_WHITE: GOOGLE_PARAMS.COLOR + ':gray', 15 | TRANSPARENT: GOOGLE_PARAMS.COLOR + ':trans' 16 | }, 17 | 18 | TYPE: { 19 | CLIPART: GOOGLE_PARAMS.TYPE + ':clipart', 20 | DRAW: GOOGLE_PARAMS.TYPE + ':lineart', 21 | GIF: GOOGLE_PARAMS.TYPE + ':animated' 22 | }, 23 | 24 | EXTENSION: {}, 25 | 26 | DATE: { 27 | DAY: GOOGLE_PARAMS.DATE + ':d', 28 | WEEK: GOOGLE_PARAMS.DATE + ':w', 29 | MONTH: GOOGLE_PARAMS.DATE + ':m', 30 | YEAR: GOOGLE_PARAMS.DATE + ':y' 31 | }, 32 | 33 | LICENCE: { 34 | CREATIVE_COMMONS: GOOGLE_PARAMS.LICENCE + ':cl', 35 | COMMERCIAL_AND_OTHER: GOOGLE_PARAMS.LICENCE + ':ol' 36 | } 37 | }; 38 | 39 | //build extension 40 | for (const EXTENSION of EXTENSIONS) { 41 | const queryName = EXTENSION.toUpperCase(); 42 | (GOOGLE_QUERY as any).EXTENSION[queryName] = GOOGLE_PARAMS.IMAGE_EXTENSION + ':' + EXTENSION; 43 | } 44 | 45 | //build colors 46 | for (const COLOR of COLORS) { 47 | const queryName = COLOR.toUpperCase(); 48 | (GOOGLE_QUERY as any).COLOR[queryName] = 49 | GOOGLE_PARAMS.COLOR + ':specific,' + GOOGLE_PARAMS.SPECIFIC_COLOR + ':' + COLOR; 50 | } 51 | 52 | export default GOOGLE_QUERY as GoogleQuery; 53 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ### 1.1.4 4 | 5 | - Fixed user agent to avoid bad image quality, errors and captcha (gohoski) 6 | 7 | ### 1.1.3 8 | 9 | - Some fixes 10 | 11 | ### 1.1.2 12 | 13 | - Fixed empty result 14 | - Removed average color 15 | 16 | ### 1.1.1 17 | 18 | - Fixed empty result 19 | 20 | ### 1.1.0 21 | 22 | - Added google image inverse search engine. You can now search images with a local image or with an image url. 23 | 24 | ### 1.0.9 25 | 26 | - Fixed many bugs 27 | - `filterByTitles` is now working 28 | - `urlMatch` added in types 29 | - All the code have been write back in typescript with a new structure 30 | - Removed `execute` 31 | - Added `proxy` configuration 32 | - Writed back all test with jest 33 | 34 | ### 1.0.8 35 | 36 | - Fixed "ERROR: Cannot assign to "queryName" because it is a constant" (by GaspardCulis) 37 | - Removed gstatic url 38 | - Added average color, id, title and originalUrl 39 | 40 | ### 1.0.7 41 | 42 | - Readme update 43 | 44 | ### 1.0.6 45 | 46 | - Fixed types 47 | - Added `limit` to limit the size of the results 48 | 49 | ### 1.0.5 50 | 51 | - Added types (by christophe77) 52 | 53 | ### v1.0.4 54 | 55 | - New option `urlMatch`. You now get image when an url match a string (example: "cdn") 56 | - New option `filterByTitles`. Filter images by titles 57 | 58 | ### v1.0.3 59 | 60 | - New option `execute`. allow you to execute a function to remove "gstatic.com" domains for example 61 | 62 | ### v1.0.2 63 | 64 | - Cannot set 'domains' and 'excludeDomains' as same time 65 | - Fixed some bugs 66 | - New option `excludeWords` 67 | 68 | ### v1.0.1 69 | 70 | - Added the missing dependencie 71 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "google-img-scrap", 3 | "version": "1.1.4", 4 | "description": "Scrap images from google images with customs pre filled google dork options", 5 | "main": "./dist/index.js", 6 | "types": "./dist/index.d.ts", 7 | "scripts": { 8 | "example": "node ./example/index.js", 9 | "build": "npm run prettier && npm run lint && npm run fastBuild", 10 | "fastBuild": "tsc", 11 | "lint": "eslint . --ext .ts", 12 | "prettier": "prettier --write .", 13 | "test": "jest --colors" 14 | }, 15 | "files": [ 16 | "dist", 17 | "CHANGELOG.md", 18 | "README.md", 19 | "tsconfig.json", 20 | "LICENSE" 21 | ], 22 | "repository": { 23 | "type": "git", 24 | "url": "git+https://github.com/yoannchb-pro/google-img-scrap.git" 25 | }, 26 | "keywords": [ 27 | "google", 28 | "image", 29 | "scrap", 30 | "options", 31 | "google-scrap", 32 | "dork", 33 | "google-dork", 34 | "reverse-search-engine", 35 | "query", 36 | "powerfull", 37 | "easy", 38 | "type", 39 | "color", 40 | "extension", 41 | "filter", 42 | "date", 43 | "licence" 44 | ], 45 | "author": "yoannchb", 46 | "contributors": [ 47 | { 48 | "name": "christophe77", 49 | "url": "https://github.com/christophe77" 50 | }, 51 | { 52 | "name": "GaspardCulis", 53 | "url": "https://github.com/GaspardCulis" 54 | } 55 | ], 56 | "license": "MIT", 57 | "bugs": { 58 | "url": "https://github.com/yoannchb-pro/google-img-scrap/issues" 59 | }, 60 | "homepage": "https://github.com/yoannchb-pro/google-img-scrap#readme", 61 | "dependencies": { 62 | "axios": "^1.2.3", 63 | "form-data": "^4.0.0" 64 | }, 65 | "devDependencies": { 66 | "@types/jest": "^29.2.6", 67 | "@types/node": "^18.11.18", 68 | "@typescript-eslint/eslint-plugin": "^8.13.0", 69 | "@typescript-eslint/parser": "^8.13.0", 70 | "eslint": "^8.57.0", 71 | "eslint-config-prettier": "^9.1.0", 72 | "jest": "^29.3.1", 73 | "prettier": "^3.3.3", 74 | "ts-jest": "^29.0.5", 75 | "ts-node": "^10.9.1", 76 | "typescript": "^4.7.4" 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | 3 | # Logs 4 | logs 5 | *.log 6 | npm-debug.log* 7 | yarn-debug.log* 8 | yarn-error.log* 9 | lerna-debug.log* 10 | 11 | # Diagnostic reports (https://nodejs.org/api/report.html) 12 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 13 | 14 | # Runtime data 15 | pids 16 | *.pid 17 | *.seed 18 | *.pid.lock 19 | 20 | # Directory for instrumented libs generated by jscoverage/JSCover 21 | lib-cov 22 | 23 | # Coverage directory used by tools like istanbul 24 | coverage 25 | *.lcov 26 | 27 | # nyc test coverage 28 | .nyc_output 29 | 30 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 31 | .grunt 32 | 33 | # Bower dependency directory (https://bower.io/) 34 | bower_components 35 | 36 | # node-waf configuration 37 | .lock-wscript 38 | 39 | # Compiled binary addons (https://nodejs.org/api/addons.html) 40 | build/Release 41 | 42 | # Dependency directories 43 | node_modules/ 44 | jspm_packages/ 45 | 46 | # TypeScript v1 declaration files 47 | typings/ 48 | 49 | # TypeScript cache 50 | *.tsbuildinfo 51 | 52 | # Optional npm cache directory 53 | .npm 54 | 55 | # Optional eslint cache 56 | .eslintcache 57 | 58 | # Microbundle cache 59 | .rpt2_cache/ 60 | .rts2_cache_cjs/ 61 | .rts2_cache_es/ 62 | .rts2_cache_umd/ 63 | 64 | # Optional REPL history 65 | .node_repl_history 66 | 67 | # Output of 'npm pack' 68 | *.tgz 69 | 70 | # Yarn Integrity file 71 | .yarn-integrity 72 | 73 | # dotenv environment variables file 74 | .env 75 | .env.test 76 | 77 | # parcel-bundler cache (https://parceljs.org/) 78 | .cache 79 | 80 | # Next.js build output 81 | .next 82 | 83 | # Nuxt.js build / generate output 84 | .nuxt 85 | 86 | # Gatsby files 87 | .cache/ 88 | # Comment in the public line in if your project uses Gatsby and *not* Next.js 89 | # https://nextjs.org/blog/next-9-1#public-directory-support 90 | # public 91 | 92 | # vuepress build output 93 | .vuepress/dist 94 | 95 | # Serverless directories 96 | .serverless/ 97 | 98 | # FuseBox cache 99 | .fusebox/ 100 | 101 | # DynamoDB Local files 102 | .dynamodb/ 103 | 104 | # TernJS port file 105 | .tern-port 106 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import FormData from 'form-data'; 2 | 3 | import parseGoogleImages from './core/parseGoogleImages'; 4 | import verifyGoogleQuery from './core/verifyGoogleQuery'; 5 | import constructGoogleUrl from './core/constructGoogleUrl'; 6 | import limitResultSize from './core/limitResultSize'; 7 | 8 | import Config from './types/config'; 9 | import Results from './types/results'; 10 | 11 | import GOOGLE_QUERY from './constant/query/GOOGLE_QUERY'; 12 | import GOOGLE_CONSTANT from './constant/GOOGLE_CONSTANT'; 13 | import inverseGetGoogleImageURL from './core/inverseGetGoogleImageURL'; 14 | import getSearchFromGoogleUrl from './core/getSearchFromGoogleUrl'; 15 | 16 | /** 17 | * Inverse google image search engine with image buffer 18 | * @param imageData 19 | * @param proxy 20 | * @returns 21 | */ 22 | async function GOOGLE_IMG_INVERSE_ENGINE_UPLOAD( 23 | imageData: Buffer, 24 | config?: Omit 25 | ): Promise { 26 | const formData = new FormData(); 27 | formData.append('encoded_image', imageData); 28 | 29 | const URL_LENS = GOOGLE_CONSTANT.inverse.upload; 30 | const GOOGLE_IMG_URL = await inverseGetGoogleImageURL(URL_LENS, config?.proxy, formData); 31 | return GOOGLE_IMG_SCRAP({ 32 | search: getSearchFromGoogleUrl(GOOGLE_IMG_URL), 33 | ...config 34 | }); 35 | } 36 | 37 | /** 38 | * Inverse google image search engine with an image url 39 | * @param imageUrl 40 | * @param proxy 41 | * @returns 42 | */ 43 | async function GOOGLE_IMG_INVERSE_ENGINE_URL( 44 | imageUrl: string, 45 | config?: Omit 46 | ): Promise { 47 | const URL_LENS = GOOGLE_CONSTANT.inverse.url + encodeURIComponent(imageUrl); 48 | const GOOGLE_IMG_URL = await inverseGetGoogleImageURL(URL_LENS, config?.proxy); 49 | return GOOGLE_IMG_SCRAP({ 50 | search: getSearchFromGoogleUrl(GOOGLE_IMG_URL), 51 | ...config 52 | }); 53 | } 54 | 55 | /** 56 | * Search images on google image 57 | * @param config 58 | * @returns 59 | */ 60 | async function GOOGLE_IMG_SCRAP(config: Config): Promise { 61 | verifyGoogleQuery(config); 62 | 63 | const URL = constructGoogleUrl(config); 64 | const result = await parseGoogleImages(URL, config?.proxy); 65 | const slicedResult = limitResultSize(config?.limit, result); 66 | 67 | return { 68 | url: URL, 69 | search: config.search, 70 | result: slicedResult 71 | }; 72 | } 73 | 74 | export { 75 | GOOGLE_IMG_SCRAP, 76 | GOOGLE_IMG_INVERSE_ENGINE_URL, 77 | GOOGLE_IMG_INVERSE_ENGINE_UPLOAD, 78 | GOOGLE_QUERY 79 | }; 80 | -------------------------------------------------------------------------------- /src/core/parseGoogleImages.ts: -------------------------------------------------------------------------------- 1 | import { unicodeToChar } from '../utils/utils'; 2 | import GOOGLE_CONSTANT from '../constant/GOOGLE_CONSTANT'; 3 | import axios, { AxiosProxyConfig } from 'axios'; 4 | import ImageResultItem from '../types/imageResultItem'; 5 | 6 | /** 7 | * Scrap google images scripts tag 8 | * @param url 9 | * @returns 10 | */ 11 | async function scrapGoogleImages(url: string, proxy?: AxiosProxyConfig) { 12 | const { data } = await axios(url, { 13 | headers: GOOGLE_CONSTANT.headers, 14 | ...(proxy ?? {}) 15 | }); 16 | 17 | return data; 18 | } 19 | 20 | /** 21 | * Construct the final object image from regex match with all informations 22 | * @param informationsMatch 23 | * @param otherInformationsMatch 24 | * @returns 25 | */ 26 | function getGoogleImageObject( 27 | informationsMatch: RegExpExecArray, 28 | otherInformationsMatch: RegExpExecArray 29 | ) { 30 | return { 31 | id: otherInformationsMatch[1], 32 | title: otherInformationsMatch[3], 33 | url: unicodeToChar(informationsMatch[1]), 34 | originalUrl: otherInformationsMatch[2], 35 | height: parseInt(informationsMatch[2], 10), 36 | width: parseInt(informationsMatch[3], 10) 37 | }; 38 | } 39 | 40 | /** 41 | * Parse the html from google image to get the images links 42 | * @param url 43 | * @returns 44 | */ 45 | async function parseGoogleImages( 46 | url: string, 47 | proxy?: AxiosProxyConfig 48 | ): Promise { 49 | const result: ImageResultItem[] = []; 50 | 51 | const body: string = await scrapGoogleImages(url, proxy); 52 | 53 | //getting image url, height, width, color average 54 | const informationsRegex = /\["(http[^"]+?)",(\d+),(\d+)\]/gi; 55 | //getting originalUrl, title, id 56 | const otherInformationsRegex = /\[[\w\d]+?,"([^"]+?)","(http[^"]+?)","([^"]+?)"/gi; 57 | 58 | let informationsMatch: RegExpExecArray; 59 | 60 | while ((informationsMatch = informationsRegex.exec(body)) !== null) { 61 | if (informationsMatch[1].startsWith('https://encrypted-tbn0.gstatic.com')) continue; 62 | 63 | const otherInformationsMatch = otherInformationsRegex.exec(body); 64 | 65 | if (otherInformationsMatch === null) return result; 66 | 67 | if (informationsMatch.length < 4 || otherInformationsMatch.length < 4) continue; 68 | if ( 69 | informationsMatch[1].match(/http/gi).length > 2 || 70 | otherInformationsMatch[2].match(/http/gi).length > 2 71 | ) 72 | continue; 73 | 74 | result.push(getGoogleImageObject(informationsMatch, otherInformationsMatch)); 75 | } 76 | 77 | return result; 78 | } 79 | 80 | export default parseGoogleImages; 81 | -------------------------------------------------------------------------------- /src/core/buildGoogleDork.ts: -------------------------------------------------------------------------------- 1 | import Config from '../types/config'; 2 | 3 | /** 4 | * Show only images with a particular title 5 | * @param config 6 | * @returns 7 | */ 8 | function filterByTitlesBuilder(config: Config): string { 9 | const FILTER_TITLE = []; 10 | if (config.filterByTitles) { 11 | for (const titleFilter of config.filterByTitles) { 12 | const value = titleFilter.map(title => { 13 | return `intitle:"${title}"`; 14 | }); 15 | 16 | FILTER_TITLE.push(`(${value.join(' AND ')})`); 17 | } 18 | } 19 | return FILTER_TITLE.join(' '); 20 | } 21 | 22 | /** 23 | * Show only images without some specific words 24 | * @param config 25 | * @returns 26 | */ 27 | function excludeWordsBuilder(config: Config): string { 28 | const EXCLUDE_WORDS = []; 29 | if (config.excludeWords) { 30 | for (const excludeWord of config.excludeWords) { 31 | EXCLUDE_WORDS.push(`-"${excludeWord}"`); 32 | } 33 | } 34 | return EXCLUDE_WORDS.join(' '); 35 | } 36 | 37 | /** 38 | * Show only images of some particular domains 39 | * @param config 40 | * @returns 41 | */ 42 | function onlyDomainsBuilder(config: Config): string { 43 | const DOMAINS = []; 44 | if (config.domains) { 45 | for (const domain of config.domains) { 46 | DOMAINS.push(`site:"${domain}"`); 47 | } 48 | } 49 | return DOMAINS.join(' OR '); 50 | } 51 | 52 | /** 53 | * Don't show images from particular domains 54 | * @param config 55 | * @returns 56 | */ 57 | function excludeDomainsBuilder(config: Config): string { 58 | const EXCLUDE_DOMAINS = []; 59 | if (config.excludeDomains) { 60 | for (const excludeDomain of config.excludeDomains) { 61 | EXCLUDE_DOMAINS.push(`-site:"${excludeDomain}"`); 62 | } 63 | } 64 | return EXCLUDE_DOMAINS.join(' '); 65 | } 66 | 67 | /** 68 | * Only show images with a domain that match a particular regex 69 | * @param config 70 | * @returns 71 | */ 72 | function urlMatchBuilder(config: Config): string { 73 | const URL_MATCH = []; 74 | if (config.urlMatch) { 75 | for (const urlMatch of config.urlMatch) { 76 | const value = urlMatch.map(content => { 77 | return `inurl:${content}`; 78 | }); 79 | URL_MATCH.push(`(${value.join(' AND ')})`); 80 | } 81 | } 82 | return URL_MATCH.join(' OR '); 83 | } 84 | 85 | /** 86 | * Build google dork string based on the config query 87 | * @param config 88 | * @returns 89 | */ 90 | function buildGoogleDorks(config: Config): string { 91 | return [ 92 | config.search, 93 | urlMatchBuilder(config), 94 | excludeWordsBuilder(config), 95 | excludeDomainsBuilder(config), 96 | onlyDomainsBuilder(config), 97 | filterByTitlesBuilder(config) 98 | ] 99 | .join(' ') 100 | .trim(); 101 | } 102 | 103 | export default buildGoogleDorks; 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Google-img-scrap 2 | 3 | Scrap images from google images with customs pre filled dorking options 4 | 5 | ## Update 6 | 7 | - See [changelog](CHANGELOG.md) 8 | 9 | ## Found a bug ? 10 | 11 | - Tell it in my github issues dont be afraid :) 12 | 13 | ## Installation 14 | 15 | ``` 16 | npm i google-img-scrap 17 | ``` 18 | 19 | ## Import 20 | 21 | ```js 22 | const { 23 | GOOGLE_IMG_SCRAP, 24 | GOOGLE_IMG_INVERSE_ENGINE_URL, 25 | GOOGLE_IMG_INVERSE_ENGINE_UPLOAD, 26 | GOOGLE_QUERY 27 | } = require('google-img-scrap'); 28 | // OR 29 | import { 30 | GOOGLE_IMG_SCRAP, 31 | GOOGLE_IMG_INVERSE_ENGINE_URL, 32 | GOOGLE_IMG_INVERSE_ENGINE_UPLOAD, 33 | GOOGLE_QUERY 34 | } from 'google-img-scrap'; 35 | ``` 36 | 37 | ## Options definition 38 | 39 | - "search" `string` what you want to search 40 | - "proxy" `AxiosProxyConfig` configure a proxy with axios proxy 41 | - "excludeWords" `string[]` exclude some words from the search 42 | - "domains" `string[]` filter by domains 43 | - "excludeDomains" `string[]` exclude some domains 44 | - "safeSearch" `boolean` active safe search or not for nsfw for example 45 | - "custom" `string` add extra query 46 | - "urlMatch" `string[][]` get image when an url match a string (example: "cdn") | `example below` 47 | - "filterByTitles" `string[][]` filter images by titles | `example below` 48 | - "query" `GoogleQuery` set a query (can be [TYPE, DATE, COLOR, SIZE, LICENCE, EXTENSION]) (use GOOGLE_QUERY items | `example below` 49 | - "limit" `number` to limit the size of the results 50 | 51 | ## Result 52 | 53 | ```js 54 | { 55 | url: 'https://images.google.com/search?tbm=isch&tbs=&q=cats', 56 | search: "cats", 57 | result: [ 58 | { 59 | id: 'K6Qd9XWnQFQCoM', 60 | title: 'Domestic cat', 61 | url: 'https://i.natgeofe.com/n/548467d8-c5f1-4551-9f58-6817a8d2c45e/NationalGeographic_2572187_2x1.jpg', 62 | originalUrl: 'https://www.nationalgeographic.com/animals/mammals/facts/domestic-cat', 63 | height: 1536, 64 | width: 3072 65 | }, 66 | { 67 | id: 'HkevFQZ5DYu7oM', 68 | title: 'Cat - Wikipedia', 69 | url: 'https://upload.wikimedia.org/wikipedia/commons/1/15/Cat_August_2010-4.jpg', 70 | originalUrl: 'https://en.wikipedia.org/wiki/Cat', 71 | height: 2226, 72 | width: 3640 73 | }, 74 | ... 75 | ] 76 | } 77 | ``` 78 | 79 | ## How to use ? 80 | 81 | ## Simple example 82 | 83 | Search cats images 84 | 85 | ```js 86 | const test = await GOOGLE_IMG_SCRAP({ 87 | search: 'cats' 88 | }); 89 | 90 | console.log(test); 91 | ``` 92 | 93 | ## Reverse search engine 94 | 95 | The second parameter is like GOOGLE_IMG_SCRAP it include all type of options omitting search. (Omit) 96 | 97 | ### With an url (cost: 2 request) 98 | 99 | ```js 100 | const test = await GOOGLE_IMG_INVERSE_ENGINE_URL( 101 | 'https://upload.wikimedia.org/wikipedia/commons/1/15/Cat_August_2010-4.jpg', 102 | { limit: 5 } 103 | ); 104 | 105 | console.log(test); 106 | ``` 107 | 108 | ### With a local image (cost: 3 request) 109 | 110 | ```js 111 | const imageBuffer = fs.readFileSync('demonSlayer.png'); 112 | const test = await GOOGLE_IMG_INVERSE_ENGINE_UPLOAD(imageBuffer, { 113 | limit: 5 114 | }); 115 | 116 | console.log(test); 117 | ``` 118 | 119 | ## Custom query 120 | 121 | All query options are optional (see below for all the options) and need to be in uppercase. You can combine as much as you want. 122 | Find all possible query options below. 123 | 124 | ```js 125 | const test = await GOOGLE_IMG_SCRAP({ 126 | search: 'cats', 127 | query: { 128 | TYPE: GOOGLE_QUERY.TYPE.CLIPART, 129 | LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER, 130 | EXTENSION: GOOGLE_QUERY.EXTENSION.JPG 131 | } 132 | }); 133 | 134 | console.log(test); 135 | ``` 136 | 137 | ## Limit result size 138 | 139 | ```js 140 | const test = await GOOGLE_IMG_SCRAP({ 141 | search: 'cats', 142 | limit: 5 143 | }); 144 | 145 | console.log(test); 146 | ``` 147 | 148 | ## Proxy 149 | 150 | See axios documentation to setup the proxy 151 | 152 | ```js 153 | const test = await GOOGLE_IMG_SCRAP({ 154 | search: 'cats', 155 | proxy: { 156 | protocol: 'https', 157 | host: 'example.com', 158 | port: 8080 159 | } 160 | }); 161 | 162 | console.log(test); 163 | ``` 164 | 165 | ## Domains 166 | 167 | Only scrap from a specific domain 168 | 169 | ```js 170 | const test = await GOOGLE_IMG_SCRAP({ 171 | search: 'cats', 172 | domains: ['alamy.com', 'istockphoto.com', 'vecteezy.com'] 173 | }); 174 | 175 | console.log(test); 176 | ``` 177 | 178 | ## Exclude domains 179 | 180 | ```js 181 | const test = await GOOGLE_IMG_SCRAP({ 182 | search: 'cats', 183 | excludeDomains: ['istockphoto.com', 'alamy.com'] 184 | }); 185 | 186 | console.log(test); 187 | ``` 188 | 189 | ## Exclude words 190 | 191 | If you don' like black cats and white cats 192 | 193 | ```js 194 | const test = await GOOGLE_IMG_SCRAP({ 195 | search: 'cats', 196 | excludeWords: ['black', 'white'] //If you don't like black cats and white cats 197 | }); 198 | 199 | console.log(test); 200 | ``` 201 | 202 | ## Safe search (no nsfw) 203 | 204 | ```js 205 | const test = await GOOGLE_IMG_SCRAP({ 206 | search: 'cats', 207 | safeSearch: false 208 | }); 209 | 210 | console.log(test); 211 | ``` 212 | 213 | ## Custom query params 214 | 215 | ```js 216 | const test = await GOOGLE_IMG_SCRAP({ 217 | search: 'cats', 218 | custom: 'name=content&name2=content2' 219 | }); 220 | 221 | console.log(test); 222 | ``` 223 | 224 | ## How urlMatch and filterByTitles work ? 225 | 226 | ```js 227 | const test = await GOOGLE_IMG_SCRAP({ 228 | search: 'cats', 229 | //will build something like this "(draw and white) or (albino and white)" 230 | filterByTitles: [ 231 | ['draw', 'white'], 232 | ['albino', 'white'] 233 | ], 234 | //will build something like this "(cdn and wikipedia) or (cdn istockphoto)" 235 | urlMatch: [ 236 | ['cdn', 'wikipedia'], 237 | ['cdn', 'istockphoto'] 238 | ] 239 | }); 240 | 241 | console.log(test); 242 | ``` 243 | 244 | ## Google query 245 | 246 | ```js 247 | { 248 | SIZE: { 249 | LARGE, 250 | MEDIUM, 251 | ICON 252 | }, 253 | COLOR: { 254 | BLACK_AND_WHITE, 255 | TRANSPARENT, 256 | RED, 257 | BLUE, 258 | PURPLE, 259 | ORANGE, 260 | YELLOW, 261 | GREEN, 262 | TEAL, 263 | PINK, 264 | WHITE, 265 | GRAY, 266 | BLACK, 267 | BROWN 268 | }, 269 | TYPE: { 270 | CLIPART, 271 | DRAW, 272 | GIF 273 | }, 274 | EXTENSION: { 275 | JPG, 276 | GIF, 277 | BMP, 278 | PNG, 279 | SVG, 280 | WEBP, 281 | ICO, 282 | RAW 283 | }, 284 | DATE: { 285 | DAY, 286 | WEEK, 287 | MONTH, 288 | YEAR 289 | }, 290 | LICENCE: { 291 | CREATIVE_COMMONS, 292 | COMMERCIAL_AND_OTHER 293 | } 294 | } 295 | ``` 296 | --------------------------------------------------------------------------------