├── .gitignore ├── images ├── demo.gif ├── demo1.gif ├── demo2.gif ├── demo3.gif ├── logo_128x128.png ├── DupChecker-settings.png └── logo.svg ├── .vscode ├── settings.json ├── extensions.json └── launch.json ├── .vscodeignore ├── jsconfig.json ├── .eslintrc.json ├── test ├── extension.test.js └── index.js ├── LICENSE ├── vsc-extension-quickstart.md ├── CHANGELOG.md ├── README.md ├── package.json └── extension.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .vscode-test/ 3 | *.vsix 4 | -------------------------------------------------------------------------------- /images/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianbingfang/vscode-dup-checker/HEAD/images/demo.gif -------------------------------------------------------------------------------- /images/demo1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianbingfang/vscode-dup-checker/HEAD/images/demo1.gif -------------------------------------------------------------------------------- /images/demo2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianbingfang/vscode-dup-checker/HEAD/images/demo2.gif -------------------------------------------------------------------------------- /images/demo3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianbingfang/vscode-dup-checker/HEAD/images/demo3.gif -------------------------------------------------------------------------------- /images/logo_128x128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianbingfang/vscode-dup-checker/HEAD/images/logo_128x128.png -------------------------------------------------------------------------------- /images/DupChecker-settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianbingfang/vscode-dup-checker/HEAD/images/DupChecker-settings.png -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | // Place your settings in this file to overwrite default and user settings. 2 | { 3 | "standard.enable": false 4 | } -------------------------------------------------------------------------------- /.vscodeignore: -------------------------------------------------------------------------------- 1 | .vscode/** 2 | .vscode-test/** 3 | test/** 4 | .gitignore 5 | jsconfig.json 6 | vsc-extension-quickstart.md 7 | .eslintrc.json 8 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com/fwlink/?LinkId=733558 3 | // for the documentation about the extensions.json format 4 | "recommendations": [ 5 | "dbaeumer.vscode-eslint" 6 | ] 7 | } -------------------------------------------------------------------------------- /jsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "target": "es6", 5 | "lib": [ 6 | "es6" 7 | ] 8 | }, 9 | "exclude": [ 10 | "node_modules" 11 | ] 12 | } -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "browser": false, 4 | "commonjs": true, 5 | "es6": true, 6 | "node": true 7 | }, 8 | "parserOptions": { 9 | "ecmaFeatures": { 10 | "jsx": true 11 | }, 12 | "sourceType": "module" 13 | }, 14 | "rules": { 15 | "no-const-assign": "warn", 16 | "no-this-before-super": "warn", 17 | "no-undef": "warn", 18 | "no-unreachable": "warn", 19 | "no-unused-vars": "warn", 20 | "constructor-super": "warn", 21 | "valid-typeof": "warn" 22 | } 23 | } -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | // A launch configuration that launches the extension inside a new window 2 | { 3 | "version": "0.1.0", 4 | "configurations": [ 5 | { 6 | "name": "Extension", 7 | "type": "extensionHost", 8 | "request": "launch", 9 | "runtimeExecutable": "${execPath}", 10 | "args": ["--extensionDevelopmentPath=${workspaceRoot}" ], 11 | "stopOnEntry": false 12 | }, 13 | { 14 | "name": "Extension Tests", 15 | "type": "extensionHost", 16 | "request": "launch", 17 | "runtimeExecutable": "${execPath}", 18 | "args": ["--extensionDevelopmentPath=${workspaceRoot}", "--extensionTestsPath=${workspaceRoot}/test" ], 19 | "stopOnEntry": false 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /test/extension.test.js: -------------------------------------------------------------------------------- 1 | /* global suite, test */ 2 | 3 | // 4 | // Note: This example test is leveraging the Mocha test framework. 5 | // Please refer to their documentation on https://mochajs.org/ for help. 6 | // 7 | 8 | // The module 'assert' provides assertion methods from node 9 | const assert = require('assert'); 10 | 11 | // You can import and use all API from the 'vscode' module 12 | // as well as import your extension to test it 13 | const vscode = require('vscode'); 14 | const myExtension = require('../extension'); 15 | 16 | // Defines a Mocha test suite to group tests of similar kind together 17 | suite("Extension Tests", function() { 18 | 19 | // Defines a Mocha unit test 20 | test("Something 1", function() { 21 | assert.equal(-1, [1, 2, 3].indexOf(5)); 22 | assert.equal(-1, [1, 2, 3].indexOf(0)); 23 | }); 24 | }); -------------------------------------------------------------------------------- /test/index.js: -------------------------------------------------------------------------------- 1 | // 2 | // PLEASE DO NOT MODIFY / DELETE UNLESS YOU KNOW WHAT YOU ARE DOING 3 | // 4 | // This file is providing the test runner to use when running extension tests. 5 | // By default the test runner in use is Mocha based. 6 | // 7 | // You can provide your own test runner if you want to override it by exporting 8 | // a function run(testRoot: string, clb: (error:Error) => void) that the extension 9 | // host can call to run the tests. The test runner is expected to use console.log 10 | // to report the results back to the caller. When the tests are finished, return 11 | // a possible error to the callback or null if none. 12 | 13 | const testRunner = require('vscode/lib/testrunner'); 14 | 15 | // You can directly control Mocha options by uncommenting the following lines 16 | // See https://github.com/mochajs/mocha/wiki/Using-mocha-programmatically#set-options for more info 17 | testRunner.configure({ 18 | ui: 'tdd', // the TDD UI is being used in extension.test.js (suite, test, etc.) 19 | useColors: true // colored output from test results 20 | }); 21 | 22 | module.exports = testRunner; -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Jianbing Fang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /images/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /vsc-extension-quickstart.md: -------------------------------------------------------------------------------- 1 | # Welcome to your VS Code Extension 2 | 3 | ## What's in the folder 4 | * This folder contains all of the files necessary for your extension. 5 | * `package.json` - this is the manifest file in which you declare your extension and command. 6 | The sample plugin registers a command and defines its title and command name. With this information 7 | VS Code can show the command in the command palette. It doesn’t yet need to load the plugin. 8 | * `extension.js` - this is the main file where you will provide the implementation of your command. 9 | The file exports one function, `activate`, which is called the very first time your extension is 10 | activated (in this case by executing the command). Inside the `activate` function we call `registerCommand`. 11 | We pass the function containing the implementation of the command as the second parameter to 12 | `registerCommand`. 13 | 14 | ## Get up and running straight away 15 | * Press `F5` to open a new window with your extension loaded. 16 | * Run your command from the command palette by pressing (`Ctrl+Shift+P` or `Cmd+Shift+P` on Mac) and typing `Hello World`. 17 | * Set breakpoints in your code inside `extension.js` to debug your extension. 18 | * Find output from your extension in the debug console. 19 | 20 | ## Make changes 21 | * You can relaunch the extension from the debug toolbar after changing code in `extension.js`. 22 | * You can also reload (`Ctrl+R` or `Cmd+R` on Mac) the VS Code window with your extension to load your changes. 23 | 24 | ## Explore the API 25 | * You can open the full set of our API when you open the file `node_modules/vscode/vscode.d.ts`. 26 | 27 | ## Run tests 28 | * Open the debug viewlet (`Ctrl+Shift+D` or `Cmd+Shift+D` on Mac) and from the launch configuration dropdown pick `Launch Tests`. 29 | * Press `F5` to run the tests in a new window with your extension loaded. 30 | * See the output of the test result in the debug console. 31 | * Make changes to `test/extension.test.js` or create new test files inside the `test` folder. 32 | * By convention, the test runner will only consider files matching the name pattern `**.test.js`. 33 | * You can create folders inside the `test` folder to structure your tests any way you want. 34 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## v0.1.7 - 2021/02/19 4 | - Add MIT license. 5 | 6 | ## v0.1.6 - 2019/11/04 7 | - Fix "Assignment to a constant error" bug in regex check mode. 8 | 9 | ## v0.1.5 - 2019/10/21 10 | - Some config default value changes. 11 | 12 | ## v0.1.4 - 2019/10/20 13 | - Exceptions will not interrupt checking progress in `Check Duplicates (For All Files)` mode. 14 | - Skip non-textual files in `Check Duplicates (For All Files)` mode. 15 | 16 | ## v0.1.3 - 2019/10/19 17 | - Change default value of config `checkAllFilesInclude` to `**`. 18 | - Fix bug of "remove duplicates" not working if lose focus on the file. 19 | 20 | ## v0.1.2 - 2019/10/18 21 | - Support new check mode `Check Duplicates (For All Files)`. 22 | 23 | ## v0.1.1 - 2019/09/25 24 | - Optimize memory usage. 25 | - Better information output. 26 | 27 | ## v0.1.0 - 2019/09/24 28 | - Use [cuckoo-filter](https://github.com/vijayee/cuckoo-filter) to speed up checking progress. 29 | - Support large file(up to hundreds of thousands of lines). 30 | - Enable configuration editing in vscode settings view. 31 | 32 | ## v0.0.9 - 2018/08/13 33 | - Add new configuration `removeAllDuplicates` to support removing duplicates including the first occurrence. 34 | 35 | ## v0.0.8 - 2018/03/12 36 | - Add new configuration `leaveEmptyLine` to support removing line break. 37 | 38 | ## v0.0.7 - 2018/03/06 39 | - Add new configuration `ignoreCase` to support case insensitive comparison. 40 | - Add new configuration `trimStart` and `trimEnd` to support self configuration on leading and trailing whitespace trimming. 41 | 42 | ## v0.0.6 - 2018/02/13 43 | - Support duplicate checking in text selection. 44 | - Fix not working bug in `trim mode`. 45 | 46 | ## v0.0.5 - 2018/01/17 47 | - Fix bug when file eol is `\n`. 48 | 49 | ## v0.0.4 - 2018/01/10 50 | - Fix bug of removing wrong line if there are empty lines in file. 51 | 52 | ## v0.0.3 - 2017/12/19 53 | - Support remove duplicate lines after duplicate checking. 54 | 55 | ## v0.0.2 - 2017/12/18 56 | - Support new check mode `Check Duplicates With Regex Match`. 57 | 58 | ## v0.0.1 - 2017/12/17 59 | - Initial release. 60 | - Support duplicate lines checking in file. 61 | - Support customer input characters trimming when comparing lines. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DupChecker 2 | 3 | ## Features 4 | 5 | **Check duplicate lines** in the file content or selection, and **remove them** if you need to keep the unique lines only. 6 | 7 | 🆕Since v0.1.0, DupChecker uses [cuckoo-filter](https://github.com/vijayee/cuckoo-filter) to speed up checking progress, and be able to handle large files with 100k+ lines. 8 | 9 | ❗️Note: for huge files with millions of lines, you may get an error like `Extension host terminated unexpectedly` which is caused by `JavaScript heap out of memory` exception, you'd better deal with the file outside vscode in this case. 10 | 11 | ### Multiple Check Modes 12 | Commands: 13 | - `Check Duplicates`: check duplicate lines immediately. 14 | - `Check Duplicates With Trim Condition`: trim customer input characters first (on both start and end). 15 | - `Check Duplicates With Regex Match`: capture matched substrings with customer input regex first (DupChecker will use the **last match** if you have multiple groups in regex). 16 | - `Check Duplicates (For All Files)`: check duplicate lines for all files in workspace one by one. 17 | 18 | ### Configurations: 19 | 20 | In **Preferences -> settings**: 21 | 22 | ![configurations](images/DupChecker-settings.png) 23 | 24 | Or in `settings.json`: 25 | 26 | ``` 27 | "dupchecker": { 28 | "trimStart": true, // trim starting whitespaces in each line, default: true 29 | "trimEnd": true, // trim ending whitespaces in each line, default: true 30 | "ignoreCase": false, // ignore case when comparing lines, default: false 31 | "ignoreLines": [], // ignore lines that contain these strings, default: [] 32 | "leaveEmptyLine": true, // leave an empty line after removing duplicates if true, or remove whole line(including line break) if false, default: true 33 | "removeAllDuplicates": false // remove all duplicate lines including the first occurrence if true, default: false 34 | "checkAllFilesInclude": "**" // GlobPattern for files to include in [Check Duplicates (For All Files)] mode, default: "**" 35 | "checkAllFilesExclude": "" // GlobPattern for files to exclude in [Check Duplicates (For All Files)] mode, default: "" 36 | "checkAllFilesNumLimit": 100 // Maximum number of files to include in [Check Duplicates (For All Files)] mode, default: 100 37 | } 38 | ``` 39 | 40 | #### GlobPattern 41 | Config `checkAllFilesInclude` and `checkAllFilesExclude` uses GlobPattern. 42 | e.g. `*` stands for all files under a folder(non-recursive), `**` stands for all files under a folder(recursive). 43 | 44 | Learn more syntax of [GlobPattern](https://code.visualstudio.com/api/references/vscode-api#GlobPattern). 45 | 46 | ## Use Case 47 | 48 | ### Check Duplicates 49 | ![feature X](images/demo1.gif) 50 | 51 | ### Check Duplicates With Trim Condition 52 | ![feature X](images/demo2.gif) 53 | 54 | ### Check Duplicates With Regex Match 55 | ![feature X](images/demo3.gif) 56 | 57 | ## Changelog 58 | 59 | See the [Releases section of our GitHub project](https://github.com/jianbingfang/vscode-dup-checker/releases) for changelogs for each release version. 60 | 61 | ## License 62 | 63 | This software is released under the terms of the [MIT license](https://github.com/jianbingfang/vscode-dup-checker/blob/master/LICENSE). 64 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dupchecker", 3 | "displayName": "DupChecker", 4 | "description": "Check duplicate lines and remove them if you need to keep the unique lines only", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/jianbingfang/vscode-dup-checker.git" 8 | }, 9 | "homepage": "https://github.com/jianbingfang/vscode-dup-checker", 10 | "version": "0.1.8", 11 | "license": "MIT", 12 | "author": { 13 | "name": "Jianbing Fang", 14 | "email": "jianbingfang@gmail.com" 15 | }, 16 | "publisher": "jianbingfang", 17 | "icon": "images/logo_128x128.png", 18 | "engines": { 19 | "vscode": "^1.0.0" 20 | }, 21 | "categories": [ 22 | "Other" 23 | ], 24 | "keywords": [ 25 | "dupchecker", 26 | "dup", 27 | "duplicate", 28 | "dedup", 29 | "unique", 30 | "deduplicate", 31 | "distinct", 32 | "checker" 33 | ], 34 | "activationEvents": [ 35 | "onCommand:extension.checkDup", 36 | "onCommand:extension.checkDupWithTrim", 37 | "onCommand:extension.checkDupWithRegex", 38 | "onCommand:extension.checkDupForAllFiles" 39 | ], 40 | "main": "./extension", 41 | "contributes": { 42 | "commands": [ 43 | { 44 | "command": "extension.checkDup", 45 | "title": "Check Duplicates" 46 | }, 47 | { 48 | "command": "extension.checkDupWithTrim", 49 | "title": "Check Duplicates With Trim Condition" 50 | }, 51 | { 52 | "command": "extension.checkDupWithRegex", 53 | "title": "Check Duplicates With Regex Match" 54 | }, 55 | { 56 | "command": "extension.checkDupForAllFiles", 57 | "title": "Check Duplicates (For All Files)" 58 | } 59 | ], 60 | "configuration": { 61 | "title": "DupChecker", 62 | "properties": { 63 | "dupchecker.trimStart": { 64 | "type": "boolean", 65 | "default": true, 66 | "description": "trim starting white spaces in each line, default: true" 67 | }, 68 | "dupchecker.trimEnd": { 69 | "type": "boolean", 70 | "default": true, 71 | "description": "trim ending white spaces in each line, default: true" 72 | }, 73 | "dupchecker.ignoreCase": { 74 | "type": "boolean", 75 | "default": false, 76 | "description": "ignore case when comparing lines, default: false" 77 | }, 78 | "dupchecker.leaveEmptyLine": { 79 | "type": "boolean", 80 | "default": false, 81 | "description": "leave an empty line after removing duplicates if true, or remove whole line(including line break) if false, default: true" 82 | }, 83 | "dupchecker.removeAllDuplicates": { 84 | "type": "boolean", 85 | "default": false, 86 | "description": "remove all duplicate lines including the first occurrence if true, default: false" 87 | }, 88 | "dupchecker.ignoreLines": { 89 | "type": "array", 90 | "default": [], 91 | "description": "ignore lines that contain these strings", 92 | "items": { 93 | "type": "string" 94 | }, 95 | "scope": "resource" 96 | }, 97 | "dupchecker.checkAllFilesInclude": { 98 | "type": "string", 99 | "default": "**", 100 | "description": "GlobPattern for files to include in [Check Duplicates (For All Files)] mode" 101 | }, 102 | "dupchecker.checkAllFilesExclude": { 103 | "type": "string", 104 | "default": "", 105 | "description": "GlobPattern for files to exclude in [Check Duplicates (For All Files)] mode" 106 | }, 107 | "dupchecker.checkAllFilesNumLimit": { 108 | "type": "integer", 109 | "default": 100, 110 | "description": "Maximum number of files to include in [Check Duplicates (For All Files)] mode" 111 | } 112 | } 113 | } 114 | }, 115 | "bugs": { 116 | "url": "https://github.com/jianbingfang/vscode-dup-checker/issues", 117 | "email": "jianbingfang@gmail.com" 118 | }, 119 | "scripts": {}, 120 | "devDependencies": { 121 | "@types/mocha": "^2.2.42", 122 | "@types/node": "^7.0.43", 123 | "@types/vscode": "^1.87.0", 124 | "eslint": "^8.25.0", 125 | "typescript": "^2.6.1", 126 | "vscode-test": "^1.6.1" 127 | }, 128 | "dependencies": { 129 | "cbor-js": "^0.1.0", 130 | "cuckoo-filter": "^1.1.4", 131 | "is-glob": "^4.0.1", 132 | "lodash": "^4.17.21", 133 | "string-hash": "^1.1.3" 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /extension.js: -------------------------------------------------------------------------------- 1 | // The module 'vscode' contains the VS Code extensibility API 2 | // Import the module and reference it with the alias vscode in your code below 3 | const vscode = require('vscode') 4 | const _ = require('lodash') 5 | const isGlob = require("is-glob") 6 | const stringHash = require("string-hash") 7 | const { CuckooFilter } = require('cuckoo-filter') 8 | 9 | // this method is called when your extension is activated 10 | // your extension is activated the very first time the command is executed 11 | function activate(context) { 12 | // Use the console to output diagnostic information (console.log) and errors (console.error) 13 | // This line of code will only be executed once when your extension is activated 14 | console.log('Congratulations, your extension "DupChecker" is now active!') 15 | const output = vscode.window.createOutputChannel('DupChecker') 16 | 17 | // The command has been defined in the package.json file 18 | // Now provide the implementation of the command with registerCommand 19 | // The commandId parameter must match the command field in package.json 20 | let disposable = vscode.commands.registerCommand('extension.checkDup', async function () { 21 | // The code you place here will be executed every time your command is executed 22 | // Display a message box to the user 23 | output.clear() 24 | try { 25 | await checkDup() 26 | } catch (err) { 27 | console.error(err) 28 | vscode.window.showErrorMessage(err.message) 29 | output.appendLine(`⛔️Checking error on ${uri.fsPath}: ${err.message}`) 30 | } 31 | }) 32 | 33 | context.subscriptions.push(disposable) 34 | 35 | disposable = vscode.commands.registerCommand('extension.checkDupWithTrim', async function () { 36 | const input = await vscode.window.showInputBox({ 37 | prompt: 'Characters to trim' 38 | }) 39 | if (input === undefined) return 40 | output.clear() 41 | try { 42 | await checkDup({ trimChars: input }) 43 | } catch (err) { 44 | console.error(err) 45 | vscode.window.showErrorMessage(err.message) 46 | output.appendLine(`⛔️Checking error on ${uri.fsPath}: ${err.message}`) 47 | } 48 | }) 49 | 50 | context.subscriptions.push(disposable) 51 | 52 | disposable = vscode.commands.registerCommand('extension.checkDupWithRegex', async function () { 53 | const input = await vscode.window.showInputBox({ 54 | prompt: 'RegExp to match and select for each line' 55 | }) 56 | if (input === undefined) return 57 | const re = new RegExp(_.trim(input.trim(), '/')) 58 | if (!re) return vscode.window.showErrorMessage(`[Invalid Regex]: ${param.regex}`) 59 | output.clear() 60 | try { 61 | await checkDup({ regex: re }) 62 | } catch (err) { 63 | console.error(err) 64 | vscode.window.showErrorMessage(err.message) 65 | output.appendLine(`⛔️Checking error on ${uri.fsPath}: ${err.message}`) 66 | } 67 | }) 68 | 69 | context.subscriptions.push(disposable) 70 | 71 | disposable = vscode.commands.registerCommand('extension.checkDupForAllFiles', async function () { 72 | const config = vscode.workspace.getConfiguration('dupchecker') 73 | const includes = config.get('checkAllFilesInclude', '') || '**' 74 | const excludes = config.get('checkAllFilesExclude', '') 75 | const limit = config.get('checkAllFilesNumLimit', 100) 76 | const files = await vscode.workspace.findFiles(includes, excludes, limit) 77 | if (files.length === 0) { 78 | if (includes && !isGlob(includes)) { 79 | return vscode.window.showWarningMessage(`DupChecker: no matched file in workspace, your FilesInclude GlobPattern setting looks invalid: "${includes}"`, 'Got it!'); 80 | } 81 | if (excludes && !isGlob(excludes)) { 82 | return vscode.window.showWarningMessage(`DupChecker: no matched file in workspace, your FilesExclude GlobPattern setting looks invalid: "${excludes}"`, 'Got it!'); 83 | } 84 | if (limit === 0) { 85 | return vscode.window.showWarningMessage(`DupChecker: no matched file in workspace, your FilesNumLimit setting is 0!`, 'Got it!'); 86 | } 87 | return vscode.window.showInformationMessage('DupChecker: no matched file in workspace, please check your FilesInclude and FilesExclude GlobPattern in settings.', 'Sure!'); 88 | } 89 | if (files.length > 10) { 90 | const msg = `Check duplicates for all ${files.length} files in workspace?` + (files.length === limit ? `⚠️You have reached max file number limit: ${limit}` : '') 91 | const select = await vscode.window.showInformationMessage(msg, 'Yes', 'No') 92 | if (select !== 'Yes') return 93 | } 94 | output.clear() 95 | const beginTime = Date.now() 96 | let count = 1 97 | for (const uri of files) { 98 | try { 99 | await checkDup({ multipleFiles: true, progressInfo: `${count}/${files.length} ` }, uri) 100 | } catch (err) { 101 | console.error(err) 102 | output.appendLine(`⛔️Checking error on ${uri.fsPath}: ${err.message}`) 103 | return 104 | } 105 | count++ 106 | } 107 | const timeCost = (Date.now() - beginTime) / 1000 108 | return vscode.window.showInformationMessage(`DupChecker: Checking ${files.length} file${files.length > 1 ? 's' : ''} finished in ${timeCost}s, please view the result in OUTPUT 😃`, 'Got it!'); 109 | }) 110 | 111 | context.subscriptions.push(disposable) 112 | 113 | async function checkDup(param, uri) { 114 | param = param || {} 115 | 116 | output.show() 117 | output.appendLine(`------------------ Prepare ${param.progressInfo || ''}------------------`) 118 | 119 | let doc 120 | if (uri) { 121 | try { 122 | doc = await vscode.workspace.openTextDocument(uri) 123 | } catch (err) { 124 | console.error(err) 125 | output.appendLine(`📄${uri.fsPath}`) 126 | output.appendLine(`❌${err.message}`) 127 | if (!param.multipleFiles) { 128 | vscode.window.showErrorMessage(`DupChecker: ${err.message}`); 129 | } 130 | return 131 | } 132 | } else { 133 | if (vscode.window.activeTextEditor) { 134 | doc = vscode.window.activeTextEditor.document 135 | } 136 | } 137 | 138 | if (!doc) { 139 | vscode.window.showErrorMessage('DupChecker: the specified document is unavailable!') 140 | return 141 | } 142 | 143 | let startLineNumber = 0 144 | let endLineNumber = doc.lineCount 145 | if (vscode.window.activeTextEditor) { 146 | const selections = vscode.window.activeTextEditor.selections 147 | if (selections.length > 1) { 148 | vscode.window.showWarningMessage('Oops! DupChecker cannot work with multiple selections... Please clear the selections or keep just only one!', 'Got it!') 149 | return 150 | } 151 | if (selections.length === 1 && !selections[0].isEmpty) { 152 | startLineNumber = selections[0].start.line 153 | endLineNumber = selections[0].end.line + 1 154 | } 155 | } 156 | 157 | output.appendLine(`📄${doc.fileName}${startLineNumber !== 0 || endLineNumber !== doc.lineCount ? `:${startLineNumber + 1}-${endLineNumber}` : ''}`) 158 | output.append('🔍checking duplicates...') 159 | const totalLineCount = endLineNumber - startLineNumber 160 | 161 | const largeFileLineCount = 100000 162 | if (totalLineCount >= largeFileLineCount) { 163 | vscode.window.showInformationMessage( 164 | `DupChecker may take a while to deal with the large file(${doc.lineCount.toLocaleString()} lines), please be patient ☕`, 'Sure!') 165 | } 166 | 167 | await new Promise(resolve => { 168 | setTimeout(() => { 169 | resolve() 170 | }, 0) 171 | }) 172 | 173 | const beginTime = Date.now() 174 | 175 | const config = vscode.workspace.getConfiguration('dupchecker') 176 | const needTrimStart = !!config.get('trimStart', true) 177 | const needTrimEnd = !!config.get('trimEnd', true) 178 | const needIgnoreCase = !!config.get('ignoreCase', false) 179 | const needRemoveAllDuplicates = !!config.get('removeAllDuplicates', false) 180 | const ignoreLines = _.filter(config.get('ignoreLines', []), v => _.isString(v) && !_.isEmpty(v)) 181 | 182 | const transformLine = getLineTransformer({ 183 | trimChars: param.trimChars, 184 | regex: param.regex, 185 | needTrimStart: needTrimStart, 186 | needTrimEnd: needTrimEnd, 187 | needIgnoreCase: needIgnoreCase 188 | }) 189 | 190 | // stage1: check duplicates 191 | const cuckooFilterBucketSize = 2 192 | const cuckooFilterBucketNum = Math.ceil((1.2 * totalLineCount) / cuckooFilterBucketSize * 2) 193 | const cuckooFilterFingerprintSize = 3 194 | console.debug(`building cuckoo filter: cfsize=${cuckooFilterBucketNum}, bsize=${cuckooFilterBucketSize}, fpsize=${cuckooFilterFingerprintSize}`) 195 | const cuckooFilter = new CuckooFilter(cuckooFilterBucketNum, cuckooFilterBucketSize, cuckooFilterFingerprintSize) 196 | console.debug(`cuckoo filter build finished, time cost: ${(Date.now() - beginTime) / 1000}s`) 197 | const dupLines = new Set() 198 | const dupLineNumbers = [] 199 | const firstOccurrenceMap = new Map() 200 | for (let i = startLineNumber; i < endLineNumber; ++i) { 201 | const text = doc.lineAt(i).text 202 | if (_.some(ignoreLines, li => text.indexOf(li) >= 0)) { 203 | continue 204 | } 205 | const line = transformLine(text) 206 | if (isDuplicate(line)) { 207 | dupLines.add(line) 208 | dupLineNumbers.push(i) 209 | } 210 | if (needRemoveAllDuplicates) { 211 | const hashCode = stringHash(line) 212 | if (!firstOccurrenceMap.has(hashCode)) { 213 | firstOccurrenceMap.set(hashCode, i) 214 | } 215 | } 216 | } 217 | 218 | const timeCost = (Date.now() - beginTime) / 1000 219 | output.appendLine(` done (${timeCost}s)`) 220 | await new Promise(resolve => { 221 | setTimeout(() => { 222 | resolve() 223 | }, 0) 224 | }) 225 | 226 | const configInfoList = [] 227 | if (needTrimStart) configInfoList.push('trimStart') 228 | if (needTrimEnd) configInfoList.push('trimEnd') 229 | if (needIgnoreCase) configInfoList.push('ignoreCase') 230 | if (!_.isEmpty(ignoreLines)) configInfoList.push('ignoreLines') 231 | if (!_.isEmpty(param.trimChars)) configInfoList.push(`trimChars: ${param.trimChars}`) 232 | if (!_.isEmpty(param.regex)) configInfoList.push(`regex: /${param.regex}/`) 233 | 234 | output.appendLine(`------------------ Results ${param.progressInfo || ''}------------------`) 235 | output.appendLine('⚙️' + configInfoList.map(info => `[${info}]`).join(' ')) 236 | if (!cuckooFilter.reliable && dupLines.size > 0) { 237 | output.appendLine('⚠️There might be some unique items which are wrongly detected as duplicates, please double check the results manually!') 238 | vscode.window.showWarningMessage('ATTENTION! There might be some unique items which are wrongly detected as duplicates, please double check the results manually!') 239 | } 240 | output.appendLine(`✅${dupLines.size} duplicate value${dupLines.size > 1 ? 's' : ''} found in ${totalLineCount.toLocaleString()} lines:`) 241 | dupLines.forEach(line => output.appendLine(line)) 242 | 243 | // stage2: ask user to remove duplicates 244 | if (param.multipleFiles === true) return 245 | if (dupLines.size > 0) { 246 | const select = await vscode.window.showInformationMessage(`DupChecker: ${dupLines.size} duplicate value${dupLines.size > 1 ? 's' : ''} found in ${timeCost}s, need remove them?`, 'Yes', 'No') 247 | if (select === 'Yes') { 248 | if (needRemoveAllDuplicates) { 249 | for (const dupLine of dupLines) { 250 | const index = firstOccurrenceMap.get(stringHash(dupLine)) 251 | if (index >= 0) { 252 | dupLineNumbers.push(index) 253 | } 254 | } 255 | } 256 | await removeLines(doc, dupLineNumbers) 257 | vscode.window.showInformationMessage(`DupChecker: ${dupLineNumbers.length} duplicate line${dupLineNumbers.length > 1 ? 's' : ''} removed!`) 258 | } 259 | } else { 260 | vscode.window.showInformationMessage(`DupChecker: 0 duplicate value found in ${timeCost}s`, 'Got it!') 261 | } 262 | 263 | function getLineTransformer(cfg) { 264 | cfg = cfg || {} 265 | const funcs = [] 266 | if (cfg.needTrimStart) { funcs.push(_.trimStart) } 267 | if (cfg.needTrimEnd) { funcs.push(_.trimEnd) } 268 | if (!_.isEmpty(cfg.trimChars)) { funcs.push(line => _.trim(line, cfg.trimChars)) } 269 | if (_.isRegExp(cfg.regex)) { 270 | funcs.push(line => { 271 | const match = cfg.regex.exec(line) 272 | return match ? match[match.length - 1] : '' 273 | }) 274 | } 275 | if (cfg.needIgnoreCase) { funcs.push(_.toLower) } 276 | return _.flow(funcs) 277 | } 278 | 279 | function isDuplicate(line) { 280 | if (!line) return false 281 | const exist = cuckooFilter.contains(line) 282 | if (!exist) { 283 | cuckooFilter.add(line) 284 | } 285 | return exist 286 | } 287 | 288 | async function removeLines(doc, lineNumbers) { 289 | const leaveEmptyLine = !!config.get('leaveEmptyLine', true) 290 | let editor = await vscode.window.showTextDocument(doc) 291 | editor.edit(edit => { 292 | _.sortedUniq(lineNumbers.sort((a, b) => a < b)).forEach(lineNum => { 293 | const line = doc.lineAt(lineNum) 294 | const range = leaveEmptyLine ? line.range : line.rangeIncludingLineBreak 295 | edit.delete(range) 296 | }) 297 | }) 298 | } 299 | } 300 | } 301 | exports.activate = activate 302 | 303 | // this method is called when your extension is deactivated 304 | function deactivate() { } 305 | exports.deactivate = deactivate 306 | --------------------------------------------------------------------------------