├── examples ├── spaces.txt ├── scores.csv ├── script.awk └── Examples.md ├── LICENSE ├── rcut └── README.md /examples/spaces.txt: -------------------------------------------------------------------------------- 1 | 1 2 3 2 | x y z 3 | i j k 4 | -------------------------------------------------------------------------------- /examples/scores.csv: -------------------------------------------------------------------------------- 1 | Name,Maths,Physics,Chemistry 2 | Ith,100,100,100 3 | Cy,97,98,95 4 | Lin,78,83,80 5 | -------------------------------------------------------------------------------- /examples/script.awk: -------------------------------------------------------------------------------- 1 | BEGIN { 2 | op_sh = "chk_snippets.sh" 3 | exp_op = "expected_op.txt" 4 | actual_op = "output.txt" 5 | } 6 | 7 | { 8 | if ($0 == "```bash") { 9 | code_block = 1 10 | } else if ($0 == "```") { 11 | code_block = 0 12 | } else if (code_block) { 13 | if (/^\$ /) { 14 | print(substr($0, 3)) > op_sh 15 | } else if (! /^$/ && ! /^# /) { 16 | print($0) > exp_op 17 | } 18 | } 19 | } 20 | 21 | END { 22 | system("bash " op_sh " > " actual_op " 2>&1") 23 | es = system("diff -q " exp_op " " actual_op) 24 | if (es == 0) { 25 | system("rm " op_sh " " exp_op " " actual_op) 26 | print ("All tests passed") 27 | } 28 | } 29 | 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Sundeep Agarwal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /rcut: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # use mawk by default 4 | # will be changed to gawk if -g option is used 5 | cmd='mawk' 6 | 7 | # similar to grep's -F option 8 | # treats the input field separator(IFS) literally 9 | F='' 10 | 11 | # initialize input and output field separators as single space 12 | # which is same default as awk 13 | d=' ' 14 | o=' ' 15 | 16 | # - means print all fields 17 | # this default helps to format input with value of output field separator(OFS) 18 | f='-' 19 | 20 | # allow negative indexing if set 21 | n='' 22 | 23 | # complement fields if set 24 | c='' 25 | 26 | # suppress lines not matching IFS 27 | s='' 28 | 29 | # flag variable to check if output field separator was set by the user or not 30 | ofs_flag='false' 31 | 32 | # first : makes it silent error reporting mode 33 | # d f o options require arguments if used 34 | # n c g s F options do not accept arguments 35 | while getopts ":d:f:o:ncgsF" opt; do 36 | case $opt in 37 | d) 38 | d="$OPTARG" 39 | ;; 40 | f) 41 | f="$OPTARG" 42 | ;; 43 | o) 44 | o="$OPTARG" 45 | ofs_flag='true' 46 | ;; 47 | n) 48 | n=':' 49 | ;; 50 | c) 51 | c='1' 52 | ;; 53 | g) 54 | cmd='gawk' 55 | ;; 56 | s) 57 | s='1' 58 | ;; 59 | F) 60 | F='1' 61 | ;; 62 | \?) 63 | echo "Invalid option: -$OPTARG" >&2 64 | exit 1 65 | ;; 66 | :) 67 | echo "Option -$OPTARG requires an argument." >&2 68 | exit 1 69 | ;; 70 | esac 71 | done 72 | # Discard the options and sentinel -- 73 | shift "$((OPTIND-1))" 74 | 75 | # argument to -f option cannot be empty 76 | # it can have only digits comma and hyphen characters 77 | # if -n is active, colon is also allowed 78 | if [[ ! "$f" =~ ^[${n}0-9,-]+$ ]]; then 79 | echo "Field number can only use integer values" >&2 80 | exit 1 81 | fi 82 | 83 | # if -o option isn't used, two cases where OFS will set to same value as IFS 84 | # 1) if -F option is used 85 | # 2) if IFS is a single character (partially mimics cut command behavior) 86 | # \ character needs to be doubled since it is a metacharacter in awk strings 87 | if [[ "$ofs_flag" == 'false' && ( ${#d} == 1 || "$F" == '1' ) ]] ; then 88 | o="${d//\\/\\\\}" 89 | fi 90 | 91 | # if -F option is used, change IFS such that it matches literally 92 | if [[ "$F" == '1' ]] ; then 93 | d="$(echo "$d" | "$cmd" '{gsub(/[^^\\]/, "[&]"); 94 | gsub(/\\/, "\\x5c\\x5c"); 95 | gsub(/\^/, "\\x5c\\x5e")} 1')" 96 | fi 97 | 98 | # corner case: when -o is \ character (needed for gawk) 99 | # since -o is used, this isn't covered by earlier \ doubling 100 | if [[ "$o" == '\' ]] ; then 101 | o='\\' 102 | fi 103 | 104 | "$cmd" -F "$d" -v fields="$f" -v OFS="$o" -v neg="$n" -v complement="$c" -v suppress="$s" ' 105 | BEGIN{ 106 | fn = split(fields, fields_arr, /,/) 107 | } 108 | 109 | { 110 | if(suppress && NF<=1) 111 | next 112 | 113 | sep = "" 114 | 115 | for(i=1; i<=fn; i++){ 116 | if(neg){ 117 | rn = split(fields_arr[i], range, /:/) 118 | if(range[1] ~ /^-/) range[1] += NF + 1 119 | if(range[2] ~ /^-/) range[2] += NF + 1 120 | } 121 | else 122 | rn = split(fields_arr[i], range, /-/) 123 | 124 | start = range[1] <= 0 ? 1: range[1] 125 | if(start > NF) start = NF 126 | 127 | if(rn == 1) 128 | end = start 129 | else if(range[2] == "" || range[2] > NF) 130 | end = NF 131 | else if(range[2] <= 0) 132 | end = 1 133 | else 134 | end = range[2] 135 | 136 | for(j=start; j<=end; j++) 137 | if(complement) 138 | ignore_fields[j] 139 | else { 140 | printf "%s%s", sep, $j 141 | sep = OFS 142 | } 143 | } 144 | 145 | if(complement){ 146 | for(k=1; k<=NF; k++) 147 | if(!(k in ignore_fields)){ 148 | printf "%s%s", sep, $k 149 | sep = OFS 150 | } 151 | delete ignore_fields 152 | } 153 | 154 | print "" 155 | }' "$@" 156 | 157 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # regexp-cut 2 | 3 | Uses `awk` to provide `cut` like syntax for field extraction. The command name is `rcut`. 4 | 5 | :warning: :warning: Work under construction! 6 | 7 |
8 | 9 | ## Motivation 10 | 11 | `cut`'s syntax is handy for many field extraction problems. But it doesn't allow multi-character or regexp delimiters. So, this project aims to provide `cut` like syntax for those cases. Currently uses `mawk` in a `bash` script. 12 | 13 | :information_source: **Note** that `rcut` isn't feature compatible or a replacement for the `cut` command. `rcut` helps when you need features like regexp field separator. 14 | 15 |
16 | 17 | ## Features 18 | 19 | * Default field separation is same as `awk` 20 | * Both input (`-d`) and output (`-o`) field separators can be multiple characters 21 | * Input field separator can use regular expressions 22 | * this script uses `mawk` by default 23 | * you can change it to `gawk` for better regexp support with `-g` option 24 | * If input field separator is a single character, output field separator will also be this same character 25 | * Fixed string input field separator can be enabled by using the `-F` option 26 | * if `-o` is *not* used, value passed to the `-d` option will be set as the output field separator 27 | * Field range can be specified by using `-` separator (same as `cut`) 28 | * `-` by itself means all the fields (this is also the default if `-f` option isn't used at all) 29 | * if start of the range isn't given, default is `1` 30 | * if end of the range isn't given, default is last field of a line 31 | * Negative indexing is allowed if you use `-n` option 32 | * `-1` means the last field, `-2` means the second-last field and so on 33 | * you'll have to use `:` to specify field ranges 34 | * Multiple fields and ranges can be separated using `,` character (same as `cut`) 35 | * Unlike `cut`, order matters with the `-f` option and field/range duplication is also allowed 36 | * this assumes `-c` (complement) is not active 37 | * Using `-c` option will print all the fields in the same order as input except the fields specified by `-f` option 38 | * Using `-s` option will suppress lines not matching the input field separator 39 | * Minimum field number is forced to be `1` 40 | * Maximum field number is forced to be last field of a line 41 | 42 | :warning: :warning: Work under construction! 43 | 44 |
45 | 46 | ## Examples 47 | 48 | ```bash 49 | $ cat spaces.txt 50 | 1 2 3 51 | x y z 52 | i j k 53 | 54 | # by default, it uses awk's space/tab field separation and trimming 55 | # unlike cut, order matters 56 | $ rcut -f3,1 spaces.txt 57 | 3 1 58 | z x 59 | k i 60 | 61 | # multi-character delimiter 62 | $ echo 'apple:-:fig:-:guava' | rcut -d:-: -f2 63 | fig 64 | 65 | # regexp delimiter 66 | $ echo 'Sample123string42with777numbers' | rcut -d'[0-9]+' -f1,4 67 | Sample numbers 68 | 69 | # fixed string delimiter 70 | $ echo '123)(%)*#^&(*@#.[](\\){1}\xyz' | rcut -Fd')(%)*#^&(*@#.[](\\){1}\' -f1,2 -o, 71 | 123,xyz 72 | 73 | # multiple ranges can be specified, order matters 74 | $ printf '1 2 3 4 5\na b c d e\n' | rcut -f2-3,5,1,2-4 75 | 2 3 5 1 2 3 4 76 | b c e a b c d 77 | 78 | # last field 79 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -nf-1 80 | cat 81 | 5 82 | 83 | # except last two fields 84 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -cnf-2: 85 | apple 86 | 1 2 3 87 | 88 | # suppress lines without input field delimiter 89 | $ printf '1,2,3,4\nhello\na,b,c\n' | rcut -sd, -f2 90 | 2 91 | b 92 | 93 | # -g option will switch to gawk 94 | $ echo '1aa2aa3' | rcut -gd'a{2}' -f2 95 | 2 96 | ``` 97 | 98 | See [Examples.md](examples/Examples.md) for many more examples. 99 | 100 |
101 | 102 | ## Tests 103 | 104 | You can use [script.awk](examples/script.awk) to check if all the example code snippets are working as expected. 105 | 106 | ```bash 107 | $ cd examples/ 108 | $ awk -f script.awk Examples.md 109 | ``` 110 | 111 |
112 | 113 | ## TODO 114 | 115 | * Step value other than `1` for field range 116 | * What to do if start of the range is greater than end? 117 | * And possibly more... 118 | 119 |
120 | 121 | ## Similar tools 122 | 123 | * [hck](https://github.com/sstadick/hck) — close to drop in replacement for `cut` that can use a regex delimiter, works on compressed files, etc 124 | * [choose](https://github.com/theryangeary/choose) — negative indexing, regexp based delimiters, etc 125 | 126 |
127 | 128 | ## Contributing 129 | 130 | * Please open an issue for typos/bugs/suggestions/etc 131 | * **Even for pull requests, open an issue for discussion before submitting PRs** 132 | * In case you need to reach me, mail me at `echo 'bGVhcm5ieWV4YW1wbGUubmV0QGdtYWlsLmNvbQo=' | base64 --decode` or send a DM via [twitter](https://twitter.com/learn_byexample) 133 | 134 |
135 | 136 | ## License 137 | 138 | This project is licensed under MIT, see [LICENSE](./LICENSE) file for details. 139 | 140 | -------------------------------------------------------------------------------- /examples/Examples.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | Examples for various options and combinations are shown below. These will also act as a source for testing the `rcut` command. 4 | 5 | ## Default field separators 6 | 7 | Same as `awk`. Given newline as record separator, space and tab characters will be trimmed from the start/end of input lines. One or more space/tab characters will then act as the field separator. 8 | 9 | Sample input file: 10 | 11 | ```bash 12 | $ cat spaces.txt 13 | 1 2 3 14 | x y z 15 | i j k 16 | $ cat -A spaces.txt 17 | 1 2^I3 $ 18 | x y z$ 19 | i j ^I^Ik^I$ 20 | ``` 21 | 22 | Here's some example operations with default field separators: 23 | 24 | ```bash 25 | # format lines with single space between fields 26 | # leading/trailing space/tab characters will be removed as well 27 | # same as: awk '{$1=$1} 1' spaces.txt 28 | $ rcut spaces.txt 29 | 1 2 3 30 | x y z 31 | i j k 32 | 33 | # specific field 34 | $ rcut -f2 spaces.txt 35 | 2 36 | y 37 | j 38 | 39 | # multiple fields can be specified separated by , character 40 | # unlike cut, order matters 41 | $ rcut -f3,1 spaces.txt 42 | 3 1 43 | z x 44 | k i 45 | ``` 46 | 47 | ## Changing output field separator 48 | 49 | ```bash 50 | $ rcut -o: spaces.txt 51 | 1:2:3 52 | x:y:z 53 | i:j:k 54 | 55 | $ rcut -o'{-}' spaces.txt 56 | 1{-}2{-}3 57 | x{-}y{-}z 58 | i{-}j{-}k 59 | ``` 60 | 61 | ## Changing input field separator 62 | 63 | Here's some examples with `csv` input: 64 | 65 | ```bash 66 | $ cat scores.csv 67 | Name,Maths,Physics,Chemistry 68 | Ith,100,100,100 69 | Cy,97,98,95 70 | Lin,78,83,80 71 | 72 | # single character input field separator 73 | # implies same output field separator as well 74 | $ rcut -d, -f1,4 scores.csv 75 | Name,Chemistry 76 | Ith,100 77 | Cy,95 78 | Lin,80 79 | 80 | $ rcut -d, -o: -f4,2 scores.csv 81 | Chemistry:Maths 82 | 100:100 83 | 95:97 84 | 80:78 85 | ``` 86 | 87 | Input field separator can be multiple characters as well: 88 | 89 | ```bash 90 | $ echo 'apple:-:fig:-:guava' | rcut -d:-: -f2 91 | fig 92 | 93 | # output field separator won't be same as input in this case 94 | $ echo 'apple:-:fig:-:guava' | rcut -d:-: -f2,1 95 | fig apple 96 | ``` 97 | 98 | ## Selecting range of fields 99 | 100 | Range of fields can be specified separated by a `-` character. If negative indexing option `-n` is enabled (discussed later), the separator changes to `:` character. 101 | 102 | ```bash 103 | $ printf '1 2 3 4 5\na b c d e\n' | rcut -f1-3 104 | 1 2 3 105 | a b c 106 | 107 | # multiple ranges can be specified, order matters 108 | $ printf '1 2 3 4 5\na b c d e\n' | rcut -f2-3,5,1,2-4 109 | 2 3 5 1 2 3 4 110 | b c e a b c d 111 | ``` 112 | 113 | Beginning or ending or both can be ignored for a range. 114 | 115 | ```bash 116 | # if - alone is used, it indicates all the fields 117 | $ printf '1 2 3 4 5\na b c d e\n' | rcut -f-,1 118 | 1 2 3 4 5 1 119 | a b c d e a 120 | 121 | # if beginning of the range is left out, default is 1 122 | $ rcut -f-2 spaces.txt 123 | 1 2 124 | x y 125 | i j 126 | 127 | # if ending of the range is left out, default is last field of that line 128 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -f2- -o, 129 | ball,cat 130 | 2,3,4,5 131 | ``` 132 | 133 | ## Regexp based input field separator 134 | 135 | Regular expression syntax will depend on the `awk` version. 136 | 137 | ```bash 138 | $ echo 'apple : fig : guava' | rcut -d' *: *' -f2 139 | fig 140 | 141 | $ echo 'Sample123string42with777numbers' | rcut -d'[0-9]+' -f1,4 142 | Sample numbers 143 | 144 | # if you change mawk to gawk, you can also use rcut -d'\\W+' 145 | $ echo 'load;err_msg--\ant,r2..not' | rcut -d'[^[:alnum:]_]+' 146 | load err_msg ant r2 not 147 | ``` 148 | 149 | ## Fixed string input field separator 150 | 151 | Using `-F` option will cause the content passed to `-d` option to be matched literally. If `-o` option isn't set, value passed to the `-d` option will be used. 152 | 153 | ```bash 154 | $ echo '1\x5e2' | rcut -Fd'\x5e' -f1,2,1 155 | 1\x5e2\x5e1 156 | $ echo 'a\b' | rcut -Fd'\' -f1,2,1 157 | a\b\a 158 | $ echo 'a\\b' | rcut -Fd'\\' -f1,2,1 159 | a\\b\\a 160 | 161 | $ echo '123)(%)*#^&(*@#.[](\\){1}\xyz' | rcut -Fd')(%)*#^&(*@#.[](\\){1}\' -f1 162 | 123 163 | $ echo '123)(%)*#^&(*@#.[](\\){1}\xyz' | rcut -Fd')(%)*#^&(*@#.[](\\){1}\' -f2 164 | xyz 165 | 166 | # output should be same as input here 167 | $ echo '123)(%)*#^&(*@#.[](\\){1}\xyz' | rcut -Fd')(%)*#^&(*@#.[](\\){1}\' -f1,2 168 | 123)(%)*#^&(*@#.[](\\){1}\xyz 169 | 170 | # saner output with , as output delimiter 171 | $ echo '123)(%)*#^&(*@#.[](\\){1}\xyz' | rcut -Fd')(%)*#^&(*@#.[](\\){1}\' -f1,2 -o, 172 | 123,xyz 173 | ``` 174 | 175 | ## Negative indexing 176 | 177 | When `-n` option is used, you can specify `-1` for last field, `-2` for second-last field and so on. You'll have to use `:` character for ranges. 178 | 179 | ```bash 180 | # last field 181 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -nf-1 182 | cat 183 | 5 184 | 185 | # last field and third-last field 186 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -nf-1,-3 187 | cat apple 188 | 5 3 189 | 190 | # first and last field 191 | $ echo 'Sample123string42with777numbers' | rcut -d'[0-9]+' -nf1,-1 192 | Sample numbers 193 | 194 | # range separator is : when -n is active 195 | # last four fields 196 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -nf-4: 197 | apple ball cat 198 | 2 3 4 5 199 | ``` 200 | 201 | ## Complement 202 | 203 | The `-c` option will invert the field selections. Unlike the normal field extraction, order doesn't matter. All the fields except those specified by the `-f` option will be displayed using the same order as input. 204 | 205 | ```bash 206 | # except second field 207 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -cf2 208 | apple cat 209 | 1 3 4 5 210 | 211 | # except first and third fields, order doesn't matter 212 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -cf3,1 213 | ball 214 | 2 4 5 215 | 216 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -cf2-3 217 | apple 218 | 1 4 5 219 | 220 | # except last two fields 221 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -cnf-2: 222 | apple 223 | 1 2 3 224 | 225 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -cnf-4 226 | ball cat 227 | 1 3 4 5 228 | ``` 229 | 230 | ## Empty separators 231 | 232 | ```bash 233 | $ echo 'apple' | rcut -d '' -f2-4 234 | p p l 235 | 236 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -o '' -f1,3 237 | applecat 238 | 13 239 | ``` 240 | 241 | ## Suppress lines without delimiters 242 | 243 | The `-s` option behaves similarly to the `-s` option provided by `cut`. This will suppress a line from being printed if it doesn't contain the given IFS. 244 | 245 | ```bash 246 | $ printf '1,2,3,4\nhello\na,b,c\n' 247 | 1,2,3,4 248 | hello 249 | a,b,c 250 | 251 | $ printf '1,2,3,4\nhello\na,b,c\n' | rcut -d, -f2 252 | 2 253 | hello 254 | b 255 | $ printf '1,2,3,4\nhello\na,b,c\n' | rcut -sd, -f2 256 | 2 257 | b 258 | 259 | $ printf '1,2,3,4\nhello\na,b,c\n' | rcut -csd, -f2 260 | 1,3,4 261 | a,c 262 | ``` 263 | 264 | ## Unicode 265 | 266 | Unicode processing might work for some cases depending on the current locale. 267 | 268 | ```bash 269 | # single character input field separator 270 | # so output field separator is also same as input separator 271 | $ echo '1α2α3' | rcut -dα -f3,1,2 272 | 3α1α2 273 | 274 | # input field separator is considered as multiple characters here 275 | # so, output field separator will be space instead of α 276 | $ echo '1α2α3' | LC_ALL=C rcut -dα -f3,1,2 277 | 3 1 2 278 | ``` 279 | 280 | ## Switch to gawk 281 | 282 | ```bash 283 | # mawk doesn't support {} form of quantifiers 284 | # see https://unix.stackexchange.com/q/506119 for more details 285 | $ echo '1aa2aa3' | rcut -d'a{2}' -f2 286 | 1aa2aa3 287 | $ echo '1aa2aa3' | rcut -d'aa' -f2 288 | 2 289 | 290 | # -g option will use gawk, which supports {} quantifiers 291 | $ echo '1aa2aa3' | rcut -gd'a{2}' -f2 292 | 2 293 | ``` 294 | 295 | ## Corner cases 296 | 297 | Minimum field number is forced to be `1`. 298 | 299 | ```bash 300 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -f0 301 | apple 302 | 1 303 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -f0-0 304 | apple 305 | 1 306 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -f1-0 307 | apple 308 | 1 309 | 310 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -f22 311 | cat 312 | 5 313 | 314 | # first line has only three fields, so -4 becomes 1 (since minimum is 1) 315 | # second line has five fields, so -4 becomes 2 316 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -nf-4,-1 317 | apple cat 318 | 2 5 319 | 320 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -nf-100 321 | apple 322 | 1 323 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -nf:-100 324 | apple 325 | 1 326 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -nf-200:-100 327 | apple 328 | 1 329 | 330 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -cnf-100 331 | ball cat 332 | 2 3 4 5 333 | ``` 334 | 335 | Maximum field number is forced to be the last field of that particular input line. 336 | 337 | ```bash 338 | # no extra output field separator for first line even though it has only 3 fields 339 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -o, -f-4 340 | apple,ball,cat 341 | 1,2,3,4 342 | 343 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -f4 344 | cat 345 | 4 346 | 347 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -f100 348 | cat 349 | 5 350 | ``` 351 | 352 | Backslash as field delimiters. 353 | 354 | ```bash 355 | $ echo 'a\b' | rcut -d'\' -f1,2,1 356 | a\b\a 357 | $ echo 'a,b' | rcut -d',' -o'\' -f1,2,1 358 | a\b\a 359 | $ echo 'a\b' | rcut -d'\' -o'\' -f1,2,1 360 | a\b\a 361 | 362 | # gawk needs special attention if -o is \ 363 | $ echo 'a,b' | rcut -d',' -go'\' -f1,2,1 364 | a\b\a 365 | ``` 366 | 367 | ## Errors 368 | 369 | Space between the option and empty string is mandatory. Otherwise, further options if any would become the argument. 370 | 371 | ```bash 372 | # -f2-4 will get treated as argument for -d 373 | $ echo 'apple' | rcut -d'' -f2-4 374 | apple 375 | $ echo 'apple' | rcut -d'' 376 | Option -d requires an argument. 377 | 378 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -o'' -f1,3 379 | apple-f1,3ball-f1,3cat 380 | 1-f1,32-f1,33-f1,34-f1,35 381 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -o'' 382 | Option -o requires an argument. 383 | ``` 384 | 385 | Bad arguments for `-f` option. 386 | 387 | ```bash 388 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -f1a 389 | Field number can only use integer values 390 | 391 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -fx 392 | Field number can only use integer values 393 | 394 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -f1.1 395 | Field number can only use integer values 396 | 397 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -f '' 398 | Field number can only use integer values 399 | 400 | # can't use : if -n option isn't provided 401 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -f1:3 402 | Field number can only use integer values 403 | ``` 404 | 405 | Invalid options. 406 | 407 | ```bash 408 | $ printf 'apple ball cat\n1 2 3 4 5' | rcut -t 409 | Invalid option: -t 410 | $ echo '123' | rcut -x 411 | Invalid option: -x 412 | ``` 413 | 414 | --------------------------------------------------------------------------------