├── example ├── set-4.txt ├── set-1.txt ├── set-2.txt └── set-3.txt ├── package.json ├── LICENSE ├── bin └── setop └── README.md /example/set-4.txt: -------------------------------------------------------------------------------- 1 | mno 2 | pqr 3 | stu 4 | -------------------------------------------------------------------------------- /example/set-1.txt: -------------------------------------------------------------------------------- 1 | abc 2 | def 3 | ghi 4 | def 5 | -------------------------------------------------------------------------------- /example/set-2.txt: -------------------------------------------------------------------------------- 1 | def 2 | ghi 3 | ghi 4 | abc 5 | -------------------------------------------------------------------------------- /example/set-3.txt: -------------------------------------------------------------------------------- 1 | abc 2 | xyz 3 | def 4 | ghi 5 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "setop", 3 | "version": "0.1.0", 4 | "preferGlobal": true, 5 | "description": "A Bash script to perform set operations in the UNIX shell!", 6 | "bin": "bin/setop", 7 | "homepage": "https://github.com/cdax/setop", 8 | "keywords": ["set", "bash", "script"], 9 | "license": "MIT", 10 | "author": "Chitharanjan Das ", 11 | "files": ["bin"] 12 | } 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Chitharanjan Das 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bin/setop: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Perform set operations in the UNIX shell! 4 | 5 | PROGNAME=${0##*/} 6 | VERSION="0.1.0" 7 | 8 | usage() 9 | { 10 | cat < 12 | Perform set operations in the UNIX shell! 13 | Commands: 14 | EO 15 | cat < ... & Tests whether the line set1 is present in the file set2 18 | equals & Tests whether the unique lines in file set1 are the same as the unique lines in set2 19 | count ... & Counts the number of unique lines in files set1, set2, ..., setn combined 20 | is-subset ... & Tests whether all lines in file base are present in files set1, ..., setn combined 21 | union ... & Displays all unique lines that are present in files set1, set2, ..., setn combined 22 | inter & Displays all unique lines that are common to files set1 and set2 23 | minus & Displays all unique lines in file set1 that are not present in file set2 24 | sym-diff ... & Displays all unique lines that are present in either files set1 or set2 ... or setn, but not in all of them 25 | product & Displays the cartesian product of the unique lines from files set1 and set2 26 | is-disjoint & Tests whether there are any lines common to both files set1 and set2 27 | is-empty ... & Tests whether there are any lines in files set1, set2, ..., setn combined 28 | min ... & Displays the lexicographic minimum from among the lines in files set1, set2, ..., setn combined 29 | max ... & Displays the lexicographic maximum from among the lines in files set1, set2, ..., setn combined 30 | EO 31 | } 32 | 33 | version() { 34 | cat <&2; exit 1 41 | } 42 | 43 | is_not_empty() { 44 | head -1 "$@" | wc -l | awk {'print $1'} 45 | } 46 | 47 | is_empty() { 48 | head -1 "$@" | wc -l | awk {'print ($1 == 0 ? 1 : 0)'} 49 | } 50 | 51 | is_member() { 52 | kwd="$1" 53 | shift 54 | grep "$kwd" "$@" | is_not_empty 55 | } 56 | 57 | equals() { 58 | diff <(sort -u "$1") <(sort -u "$2") | is_empty 59 | } 60 | 61 | count() { 62 | sort -u "$@" | wc -l | awk {'print $1'} 63 | } 64 | 65 | is_subset() { 66 | base="$1" 67 | shift 68 | comm -23 <(sort -u "$base") <(sort -u "$@") | is_empty 69 | } 70 | 71 | union() { 72 | sort -u "$@" 73 | } 74 | 75 | inter() { 76 | comm -12 <(sort -u "$1") <(sort -u "$2") 77 | } 78 | 79 | minus() { 80 | comm -23 <(sort -u "$1") <(sort -u "$2") 81 | } 82 | 83 | sym_diff() { 84 | sort "$@" | uniq -u 85 | } 86 | 87 | product() { 88 | awk 'NR==FNR { a[$0]; next } { for (i in a) print i, $0 }' <(sort -u "$1") <(sort -u "$2") 89 | } 90 | 91 | is_disjoint() { 92 | inter "$1" "$2" | is_empty 93 | } 94 | 95 | min() { 96 | sort "$@" | head -1 97 | } 98 | 99 | max() { 100 | sort "$@" | tail -1 101 | } 102 | 103 | CMD="$1" 104 | case $CMD in 105 | -h|--help|help) 106 | usage; exit 0 107 | ;; 108 | -v|--version|version) 109 | version; exit 0 110 | ;; 111 | is-member) 112 | kwd="$2" 113 | shift 2 114 | is_member "$kwd" "$@" 115 | ;; 116 | equals) 117 | equals "$2" "$3" 118 | ;; 119 | count) 120 | shift; count "$@" 121 | ;; 122 | is-subset) 123 | base="$2" 124 | shift 2 125 | is_subset "$base" "$@" 126 | ;; 127 | union) 128 | shift; union "$@" 129 | ;; 130 | inter) 131 | inter "$2" "$3" 132 | ;; 133 | minus) 134 | minus "$2" "$3" 135 | ;; 136 | sym-diff) 137 | shift; sym_diff "$@" 138 | ;; 139 | product) 140 | product "$2" "$3" 141 | ;; 142 | is-disjoint) 143 | is_disjoint "$2" "$3" 144 | ;; 145 | is-empty) 146 | shift; is_empty "$@" 147 | ;; 148 | min) 149 | shift; min "$@" 150 | ;; 151 | max) 152 | shift; max "$@" 153 | ;; 154 | *) 155 | log_error_and_exit "$CMD" "Unknown command" 156 | ;; 157 | esac 158 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | setop 2 | ----- 3 | 4 | **Set** **op**erations in the UNIX shell! 5 | 6 | Resting on the shoulders of giants like `grep`, `cat`, `sort`, `uniq`, `comm`, `diff`, `cut`, `awk`, and more. 7 | 8 | ## Installation 9 | 10 | Using `npm`: 11 | 12 | $ npm install -g setop 13 | 14 | ..or, copy the [script](https://raw.githubusercontent.com/cdax/setop/master/bin/setop) to a file called `setop`, give it the proper permissions, and move it to somewhere in your PATH, like so: 15 | 16 | $ chmod u+x setop 17 | $ mv setop /usr/local/bin 18 | 19 | ## Usage 20 | 21 | * [Membership Test](#is-member) 22 | * [Equality Test](#equals) 23 | * [Cardinality](#count) 24 | * [Subset Test](#is-subset) 25 | * [Union](#union) 26 | * [Intersection](#inter) 27 | * [Complement](#minus) 28 | * [Symmetric Difference](#sym-diff) 29 | * [Cartesian Product](#product) 30 | * [Disjoint Sets Test](#are-disjoint) 31 | * [Empty Set Test](#is-empty) 32 | * [Minimum Element](#min) 33 | * [Maximum Element](#max) 34 | 35 | #### Membership Test 36 | 37 | $ setop is-member ... 38 | 39 | >Tests whether the line `kwd` is present in the files `set1`, `set2`, ..., `setn`. 40 | 41 | For example: 42 | 43 | $ setop is-member abc set-1.txt 44 | 1 45 | $ setop is-member mno set-1.txt set-2.txt set-3.txt 46 | 0 47 | $ setop is-member xyz set-*.txt 48 | 1 49 | 50 | `set-1.txt` 51 | 52 | abc 53 | def 54 | abc 55 | ghi 56 | 57 | `set-2.txt` 58 | 59 | def 60 | ghi 61 | ghi 62 | abc 63 | 64 | `set-3.txt` 65 | 66 | abc 67 | xyz 68 | def 69 | ghi 70 | 71 | #### Equality Test 72 | 73 | $ setop equals 74 | 75 | >Tests whether the unique lines in file `set1` are the same as the unique lines in `set2`. 76 | 77 | For example: 78 | 79 | $ setop equals set-1.txt set-2.txt 80 | 1 81 | $ setop equals set-1.txt set-3.txt 82 | 0 83 | 84 | `set-1.txt` 85 | 86 | abc 87 | def 88 | ghi 89 | def 90 | 91 | `set-2.txt` 92 | 93 | def 94 | ghi 95 | ghi 96 | abc 97 | 98 | `set-3.txt` 99 | 100 | abc 101 | xyz 102 | def 103 | ghi 104 | 105 | 106 | #### Cardinality 107 | 108 | $ setop count ... 109 | 110 | >Counts the number of unique lines in files `set1`, `set2`, ..., `setn` combined. 111 | 112 | For example: 113 | 114 | $ setop count set-1.txt 115 | 3 116 | $ setop count set-1.txt set-2.txt set-3.txt 117 | 4 118 | $ setop count set-*.txt 119 | 4 120 | 121 | `set-1.txt` 122 | 123 | abc 124 | def 125 | ghi 126 | def 127 | 128 | `set-2.txt` 129 | 130 | def 131 | ghi 132 | ghi 133 | abc 134 | 135 | `set-3.txt` 136 | 137 | abc 138 | xyz 139 | def 140 | ghi 141 | 142 | #### Subset Test 143 | 144 | $ setop is-subset ... 145 | 146 | >Tests whether all lines in file `base` are present in files `set1`, `set2`, ..., `setn` combined 147 | 148 | For example: 149 | 150 | $ setop is-subset set-3.txt set-1.txt 151 | 0 152 | $ setop is-subset set-3.txt set-2.txt set-3.txt 153 | 1 154 | $ setop is-subset set-3.txt set-*.txt 155 | 1 156 | 157 | `set-1.txt` 158 | 159 | abc 160 | 161 | def 162 | ghi 163 | def 164 | 165 | `set-2.txt` 166 | 167 | def 168 | ghi 169 | ghi 170 | abc 171 | 172 | `set-3.txt` 173 | 174 | abc 175 | xyz 176 | def 177 | ghi 178 | 179 | #### Union 180 | 181 | $ setop union ... 182 | 183 | >Displays all unique lines that are present in files `set1`, `set2`, ..., `setn` combined 184 | 185 | For example: 186 | 187 | $ setop union set-1.txt set-2.txt 188 | abc 189 | def 190 | ghi 191 | $ setop union set-1.txt set-2.txt set-3.txt 192 | abc 193 | def 194 | ghi 195 | xyz 196 | $ setop union set-*.txt 197 | abc 198 | def 199 | ghi 200 | xyz 201 | 202 | `set-1.txt` 203 | 204 | abc 205 | def 206 | ghi 207 | def 208 | 209 | `set-2.txt` 210 | 211 | def 212 | ghi 213 | ghi 214 | abc 215 | 216 | `set-3.txt` 217 | 218 | abc 219 | xyz 220 | def 221 | ghi 222 | 223 | #### Intersection 224 | 225 | $ setop inter 226 | 227 | >Displays all unique lines that are common to files `set1` and `set2` 228 | 229 | For example: 230 | 231 | $ setop inter set-1.txt set-2.txt 232 | abc 233 | def 234 | ghi 235 | $ setop inter set-1.txt set-3.txt 236 | abc 237 | def 238 | ghi 239 | 240 | `set-1.txt` 241 | 242 | abc 243 | def 244 | ghi 245 | def 246 | 247 | `set-2.txt` 248 | 249 | def 250 | ghi 251 | ghi 252 | abc 253 | 254 | `set-3.txt` 255 | 256 | abc 257 | xyz 258 | def 259 | ghi 260 | 261 | #### Complement 262 | 263 | $ setop minus 264 | 265 | >Displays all unique lines in file `set1` that are not present in file `set2` 266 | 267 | For example: 268 | 269 | $ setop minus set-1.txt set-2.txt 270 | $ setop minus set-3.txt set-2.txt 271 | xyz 272 | 273 | `set-1.txt` 274 | 275 | abc 276 | def 277 | ghi 278 | def 279 | 280 | `set-2.txt` 281 | 282 | def 283 | ghi 284 | ghi 285 | abc 286 | 287 | `set-3.txt` 288 | 289 | abc 290 | xyz 291 | def 292 | ghi 293 | 294 | #### Symmetric Difference 295 | 296 | $ setop sym-diff ... 297 | 298 | >Displays all unique lines that are present in either files `set1` or `set2` ... or `setn`, but not in all of them 299 | 300 | For example: 301 | 302 | $ setop sym-diff set-1.txt set-2.txt 303 | $ setop sym-diff set-1.txt set-2.txt set-3.txt 304 | xyz 305 | $ setop sym-diff set-*.txt 306 | xyz 307 | 308 | `set-1.txt` 309 | 310 | abc 311 | def 312 | ghi 313 | def 314 | 315 | `set-2.txt` 316 | 317 | def 318 | ghi 319 | ghi 320 | abc 321 | 322 | `set-3.txt` 323 | 324 | abc 325 | xyz 326 | def 327 | ghi 328 | 329 | #### Cartesian Product 330 | 331 | $ setop product 332 | 333 | >Displays the cartesian product of the unique lines from files `set1` and `set2` 334 | 335 | For example: 336 | 337 | $ setop product set-1.txt set-2.txt 338 | abc abc 339 | ghi abc 340 | def abc 341 | abc def 342 | ghi def 343 | def def 344 | abc ghi 345 | ghi ghi 346 | def ghi 347 | $ setop product set-2.txt set-3.txt 348 | abc abc 349 | ghi abc 350 | def abc 351 | abc def 352 | ghi def 353 | def def 354 | abc ghi 355 | ghi ghi 356 | def ghi 357 | abc xyz 358 | ghi xyz 359 | def xyz 360 | 361 | `set-1.txt` 362 | 363 | abc 364 | def 365 | ghi 366 | def 367 | 368 | `set-2.txt` 369 | 370 | def 371 | ghi 372 | ghi 373 | abc 374 | 375 | `set-3.txt` 376 | 377 | abc 378 | xyz 379 | def 380 | ghi 381 | 382 | #### Disjoint Sets Test 383 | 384 | $ setop is-disjoint 385 | 386 | >Tests whether there are any lines common to both files `set1` and `set2` 387 | 388 | For example: 389 | 390 | $ setop is-disjoint set-1.txt set-2.txt 391 | 0 392 | $ setop is-disjoint set-2.txt set-3.txt 393 | 0 394 | $ setop is-disjoint set-3.txt set-4.txt 395 | 1 396 | 397 | `set-1.txt` 398 | 399 | abc 400 | def 401 | ghi 402 | def 403 | 404 | `set-2.txt` 405 | 406 | def 407 | ghi 408 | ghi 409 | abc 410 | 411 | `set-3.txt` 412 | 413 | abc 414 | xyz 415 | def 416 | ghi 417 | 418 | `set-4.txt` 419 | 420 | mno 421 | pqr 422 | stu 423 | 424 | #### Empty Set Test 425 | 426 | $ setop is-empty ... 427 | 428 | >Tests whether there are any lines in files `set1`, `set2`, ..., `setn` combined 429 | 430 | For example: 431 | 432 | $ setop is-empty set-1.txt 433 | 0 434 | $ setop is-empty <(setop sym-diff set-1.txt set-2.txt) 435 | 1 436 | 437 | `set-1.txt` 438 | 439 | abc 440 | def 441 | ghi 442 | def 443 | 444 | `set-2.txt` 445 | 446 | def 447 | ghi 448 | ghi 449 | abc 450 | 451 | #### Minimum Element 452 | 453 | $ setop min ... 454 | 455 | >Displays the lexicographic minimum from among the lines in files `set1`, `set2`, ..., `setn` combined 456 | 457 | For example: 458 | 459 | $ setop min set-4.txt 460 | mno 461 | $ setop min set-1.txt set-4.txt 462 | abc 463 | 464 | `set-1.txt` 465 | 466 | abc 467 | def 468 | ghi 469 | def 470 | 471 | `set-4.txt` 472 | 473 | mno 474 | pqr 475 | stu 476 | 477 | #### Maximum Element 478 | 479 | $ setop max ... 480 | 481 | >Displays the lexicographic maximum from among the lines in files `set1`, `set2`, ..., `setn` combined 482 | 483 | For example: 484 | 485 | $ setop min set-1.txt 486 | ghi 487 | $ setop min set-1.txt set-3.txt 488 | xyz 489 | 490 | `set-1.txt` 491 | 492 | abc 493 | def 494 | ghi 495 | def 496 | 497 | `set-3.txt` 498 | 499 | abc 500 | xyz 501 | def 502 | ghi 503 | 504 | 505 | ## Credits 506 | 507 | Many of the bash one-liners that are part of this project were found at a [post on Peter Krumin's blog](//www.catonmat.net/blog/set-operations-in-unix-shell/). I've been using them for years and I finally decided to put it all together in one script, with easy-to-remember command names. 508 | 509 | ## Support 510 | 511 | Please [open an issue](https://github.com/cdax/setop/issues/new) for support. 512 | --------------------------------------------------------------------------------