├── LICENSE ├── README.md └── rmHTMLement.nim /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 abdulbadii 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Get, Remove or Copy Markup Language elements 2 | 3 | Do such operations in a file by specifying the path 4 | 5 | This should be of abbreviated [Xpath](https://www.w3.org/TR/1999/REC-xpath-19991116/#path-abbrev) syntax, e.g: 6 | 7 | ```html/body/div[1]/div/div[2]``` 8 | 9 | means find in a given HTML or XML file, the second div tag element node under every div element lies under the first div element, under any body element, under any html element. 10 | 11 | ```html/body/div[3]//div[1]/div``` 12 | 13 | means find in a given HTML or XML file, any div tag element node under the first div element lies at any depth under the third div element of any body element, under any html element. 14 | 15 | It may be put in multiply one after another delimited by ```;``` or ```|``` 16 | For copying, append at the end the characters ```+>``` followed by the copy target path 17 | e.g. 18 | 19 | ```html/body/div[1]/div/div[2]+>html/body/div[3]//div[1]/div``` 20 | -------------------------------------------------------------------------------- /rmHTMLement.nim: -------------------------------------------------------------------------------- 1 | import std/[os, terminal, nre, math, algorithm, macros] 2 | 3 | template strUint( s :string) :uint= 4 | var res :uint 5 | for i, c in s: 6 | if c in '0'..'9': res += ((ord(c) - 48) * 10^(s.high-i)).uint 7 | else: raise newException(ValueError, "Non-number in string") 8 | res 9 | template numChr( c :char, s: string) :uint= 10 | var res :uint 11 | for i in s: 12 | if i==c: inc(res) 13 | res 14 | # ML regexes text node & comment/asymetric tag as content of text node: 15 | let 16 | txNodeR= r"(?:[^<>]++(?>|<(?>meta|link|input|img|hr|base)\b[^>]*+>)*)+" 17 | ctntR= r"(?:[^<>]*+(?>|<(?>meta|link|input|img|hr|base)\b[^>]*+>)*)*+" #-> as of element node 18 | ctnt= re("(?s)^(" & ctntR & ")(.+)") 19 | nodeR= r"(<([a-z]\w*+)(?>[^/>]*+/>|[^>]*+>(?:" & ctntR & r"(?-2)*+)*+))" 20 | headR= r"(<(?>[a-z]\w*+|!DOCTYPE)[^>]*+>)" 21 | head= re("(?s)^" & headR & "(.+)") 22 | var 23 | whole, offset, res, remain, tag :string 24 | totN, maxND :uint 25 | 26 | template nodeB( tot, max, d :uint)= 27 | if m.isNone: return false 28 | let g= m.get.captures.toSeq 29 | inc(tot); inc(d); if d>max: max=d 30 | var 31 | tagname= g[0].get 32 | off = "<" & tagname 33 | str = g[3].get 34 | if g[1].isSome: 35 | res= off & g[1].get 36 | dec(d) 37 | if d==0: 38 | remain= str 39 | totN=tot; maxND=max 40 | return true 41 | off &= g[2].get 42 | while true: 43 | let m = str.find ctnt 44 | off &= m.get.captures[0] 45 | str= m.get.captures[1] 46 | if str[0..1] == "" 48 | if str[ 2..1+tagname.len] == tagname: 49 | res= off & " ([^/>]*+/>) | ([^>]*+>) ) (.+)" 68 | nodeB tot, max, d 69 | nodeRec str 70 | 71 | proc node( str, tag :string) :bool= 72 | var tot, max, d :uint 73 | proc nodeRec( str :string, tag="") :bool= 74 | let m= str.find re("(?xs) ^<(" & tag & r"[a-z]\w*+) (?> ([^/>]*+/>) | ([^>]*+>) ) (.+)" ) 75 | nodeB tot, max, d 76 | nodeRec str, tag 77 | 78 | template getOffUp2Tag( tag="") :bool= 79 | let m= remain.find re( 80 | "(?s)^((?:" & ctntR & "(?:(?!<" & tag & r"\b)" & nodeR & ")?)*+)(?=<" & tag & r"\b)" & nodeR & "(.+)" ) 81 | if m.isNone: false 82 | else: 83 | offset &= m.get.captures[0] 84 | res= m.get.captures[3] 85 | remain= m.get.captures[5] 86 | true 87 | 88 | type 89 | StrPair = tuple[ Offset :string, Node :string] 90 | OffNodArray = seq[ StrPair] 91 | 92 | template ctnPrevTag( nd :OffNodArray; isTheTagNode) = 93 | while true: 94 | let m= remain.find ctnt 95 | offset &= m.get.captures[0] 96 | remain = m.get.captures[1] 97 | if isTheTagNode: 98 | if maxND > minD: nd.add (offset, res) 99 | offset &= res 100 | else: break 101 | 102 | template ctnoTagUp2Tag( nd :OffNodArray, xSpeCmd) = 103 | tag &= r"\b)" 104 | ctnPrevTag nd: 105 | node remain, "(?!" & tag 106 | tag = "(?=" & tag 107 | if node(remain, tag): 108 | if maxND > minD: 109 | nd.add (offset, res) 110 | xSpeCmd 111 | offset &= res 112 | else: break 113 | 114 | template headeRemain( nd :string, prevOff="") = 115 | var m = nd.find head 116 | offset= prevOff & m.get.captures[0] 117 | remain= m.get.captures[1] 118 | 119 | const 120 | isERROR = true 121 | isSUCCEED= false 122 | 123 | template posiN( posn :string, a,b :uint)= 124 | var 125 | g = posn.find(re"(?>(<)|>)(=)?(\d+)").get.captures.toSeq 126 | eq= g[1].isSome 127 | n = g[2].get.strUint # Get a-b as lower-upper bound 128 | if g[0].isSome: 129 | b= if eq: n else: n-1 130 | else: a= if eq: n-1 else: n 131 | 132 | proc getNthRev( n :var uint) :bool= 133 | var i :uint 134 | while true: 135 | inc(i) 136 | let m= remain.find re( 137 | "^((?:(?:" & ctntR & "(?:(?!<" & tag & r"\b)" & nodeR & ")?)*+(?=<" & tag & r"\b)" & nodeR & "){" & $i & "})" ) 138 | if m.isNone: break 139 | if i<=n: return false 140 | n = i-n # i is the max nth from which subtract the specified nth 141 | true 142 | 143 | proc getxNRev( n :var uint) :bool= 144 | var i :uint 145 | while true: 146 | inc(i) 147 | let m= remain.find re( r"^(?:" & nodeR & "*" & txNodeR & "){" & $i & "}" ) 148 | if m.isNone: break 149 | if i<=n: return false 150 | n= i-n 151 | true 152 | 153 | template getTx_Nth( ret :OffNodArray; off, nod :string; n :uint; txNRev:bool) :bool= 154 | nod.headeRemain off 155 | if txNRev and not getxNRev( n): isERROR 156 | else: 157 | let m= remain.find re( r"^((?:" & nodeR & "*(" & txNodeR & ")){" & $n & "})" ) 158 | if m.isNone: isERROR 159 | else: 160 | var r= m.get.captures[3] 161 | ret.add (offset & m.get.captures[0][0..^r.len+1], r) 162 | isSUCCEED 163 | 164 | template getE_Nth( ret :OffNodArray; off, nod :string; n:uint; nthRev:bool) :bool= 165 | nod.headeRemain off 166 | if nthRev and not getNthRev( n): isERROR 167 | else: 168 | let m= remain.find re( 169 | r"^((?:(?:" & ctntR & "(?:(?!<" & tag & r"\b)" & nodeR & ")?)*+(?=<" & tag & r"\b)" & nodeR & "){" & $n & "})") 170 | if m.isNone: isERROR 171 | else: 172 | var r= m.get.captures[3] 173 | ret.add (offset & m.get.captures[0][0..^r.len+1], r) 174 | isSUCCEED 175 | 176 | macro getTextMulN( ret :OffNodArray; off, nod, posn :string) :bool= 177 | result = quote do: 178 | `nod`.headeRemain `off` 179 | var a,b,i :uint 180 | if `posn` != "": `posn`.posiN(a,b) 181 | while true: 182 | inc(i) 183 | let m= remain.find re( r"^((?:" & nodeR & "*(" & txNodeR & ")){" & $i & "})" ) 184 | if m.isNone: break 185 | if i>a and (b==0 or i<=b): 186 | var r= m.get.captures[3] 187 | `ret`.add (offset & m.get.captures[0][0..^r.len+1], r) 188 | `ret`.len==0 189 | 190 | macro getE_MultiN( ret :OffNodArray; off, nod, posn, att :string) :bool= 191 | result = quote do: 192 | `nod`.headeRemain `off` 193 | var a,b,i :uint 194 | if `posn` != "": `posn`.posiN(a,b) 195 | elif `att` != "": tag &= r"\s+" & `att` 196 | while true: 197 | if getOffUp2Tag( tag): 198 | inc(i) 199 | if i>a and (b==0 or i<=b): `ret`.add (offset, res) 200 | offset &= res 201 | else: break 202 | `ret`.len==0 203 | 204 | template getE_MultiN( ret :OffNodArray; off, nod, aatt :string) :bool= 205 | nod.headeRemain off 206 | while true: 207 | if getOffUp2Tag( r"\S+\s+" & aatt): 208 | ret.add (offset, res) 209 | offset &= res 210 | else: break 211 | ret.len==0 212 | 213 | template getE_MultiN( ret :OffNodArray; off, nod :string) :bool= 214 | nod.headeRemain off 215 | while true: 216 | if getOffUp2Tag: 217 | ret.add (offset, res) 218 | offset &= res 219 | else: break 220 | ret.len==0 221 | 222 | template allDepthInit {.dirty.}= 223 | var curNode, nd= newSeqOfCap[ StrPair]( avgOffNode_Ply) 224 | for _ in 0..avgOffNode_Ply: 225 | nd.add (newStringOfCap(maxw), newStringOfCap(maxw)) 226 | curNode.add (newStringOfCap(maxw), newStringOfCap(maxw)) 227 | curNode.reset 228 | 229 | template getAllDepthNth_Tx( ret :OffNodArray; off, nod :string; nth :uint; txNRev:bool) :bool= 230 | curNode.add (off, nod) 231 | while curNode.len>0: 232 | nd.reset 233 | for the in curNode: 234 | if getTx_Nth( ret, the.Offset, the.Node, nth, txNRev): ret.add (offset, res) 235 | while true: 236 | if getOffUp2Tag: nd.add (offset, res) 237 | else: break 238 | curNode= nd 239 | ret.len==0 240 | 241 | template getAllDepthMultiN_Tx( ret :OffNodArray; off, nod, posn :string) :bool= 242 | var i,a,b:uint 243 | if posn != "": posn.posiN(a,b) 244 | curNode.add (off, nod) 245 | while curNode.len>0: 246 | nd.reset 247 | for the in curNode: 248 | while true: 249 | if getOffUp2Tag: 250 | nd.add (offset, res) 251 | i.inc 252 | if i>a and (b==0 or i<=b): 253 | ret.add (offset, res) 254 | else: 255 | break 256 | curNode= nd 257 | ret.len==0 258 | 259 | template getAllDepthNth( ret :OffNodArray; off, nod :string; minD, nth :uint; nthRev, nextIsTx:bool) :bool= 260 | curNode.add (off, nod) 261 | template everyPsblNode( nthORnthRevCo) = 262 | while curNode.len>0: 263 | nd.reset 264 | for the in curNode: 265 | the.Node.headeRemain the.Offset 266 | nthORnthRevCo 267 | curNode= nd 268 | var i :uint 269 | template loopn( n:uint)= 270 | while true: 271 | inc(i) 272 | ctnoTagUp2Tag nd: 273 | if i==n and maxND >= minD: ret.add (offset, res) 274 | template txLoop( n:uint)= 275 | if getOffUp2Tag( tag): 276 | nd.add (offset, res) 277 | inc(i) 278 | if i==n: 279 | ret.add (offset, res) 280 | offset &= res 281 | else: break 282 | if nextIsTx: 283 | if nthRev: 284 | everyPsblNode: 285 | var n=nth; if getNthRev( n): n.txLoop 286 | else: 287 | everyPsblNode: 288 | nth.txLoop 289 | else: 290 | if nthRev: 291 | everyPsblNode: 292 | var n=nth; if getNthRev( n): n.loopn 293 | else: 294 | everyPsblNode: 295 | nth.loopn 296 | ret.len==0 297 | 298 | template loop( commandIFoundOrBreak, i_var) = 299 | curNode.add (this.Offset, this.Node) 300 | while curNode.len>0: 301 | nd.reset 302 | for the in curNode: 303 | the.Node.headeRemain the.Offset 304 | while true: 305 | commandIFoundOrBreak 306 | i_var 307 | curNode=nd 308 | 309 | # off, nod are arguments of func. of offsetNodeLoop arg. template normalization is: this.Offset, this.Node 310 | 311 | template getAllDepthMultiN( ret :OffNodArray; off, nod :string; minD:uint; posn, att :string; nextIsTx :bool) :bool {.dirty.}= 312 | var a, b, i :uint 313 | if posn != "": posn.posiN(a,b) 314 | elif att != "": tag &= r"\s+" & att 315 | if nextIsTx: 316 | loop do: 317 | if getOffUp2Tag( tag): 318 | nd.add (offset, res) 319 | inc(i) 320 | if i>a and (b==0 or i<=b): 321 | ret.add (offset, res) 322 | offset &= res 323 | else: break 324 | do: i=0 325 | else: 326 | loop do: 327 | ctnoTagUp2Tag nd: 328 | inc(i) 329 | if i>a and (b==0 or i<=b) and maxND >= minD: 330 | ret.add (offset, res) 331 | do: i=0 332 | ret.len==0 333 | 334 | template getAllDepthMultiN( ret :OffNodArray; o, n :string; minD :uint; aatt :string; nextIsTx :bool) :bool {.dirty.}= 335 | tag = r"\S+\s+" & aatt 336 | loop do: 337 | ctnoTagUp2Tag nd: 338 | if maxND >= minD: ret.add (offset, res) 339 | do:discard 340 | ret.len==0 341 | 342 | template getAllDepthMultiN( ret :OffNodArray; o, n :string; minD :uint; nextIsTx :bool) :bool {.dirty.}= 343 | tag = "" 344 | loop do: 345 | ctnoTagUp2Tag nd: 346 | if maxND >= minD: ret.add (offset, res) 347 | do:discard 348 | ret.len==0 349 | 350 | template offsetNodeLoop( xPathPat) = 351 | for i, this {.inject.} in offsetNode: 352 | retOffNode.reset 353 | if xPathPat: 354 | if i0: 358 | let e= getE_Path_R( remPath, retOffNode) #...is propagating to the next depth and returning boolean 359 | if i==offsetNode.high: return e # value which'll be returned if this's the last iteration 360 | else: 361 | resultArr.add retOffNode 362 | 363 | var 364 | avgOffNode_Ply, maxw :uint 365 | resultArr :OffNodArray 366 | 367 | proc getE_Path_R( path :string; offsetNode :OffNodArray) :bool= 368 | var 369 | g= path.find(re"(?x)^/ (/)? (?> (text\(\)) (?: \[ (?> (last\(\)-)? ([1-9]\d*+) | position\(\) (?!<1) ([<>]=? [1-9]\d*+)) \])? | (?> ([^/@*[]+) (?: \[ (?> (?>(last\(\)-)|position\(\)=)? ([1-9]\d*+) | position\(\) (?!<1) ( [<>]=? [1-9]\d*+ ) | @(\*| [^]]+) ) \] )? | @([a-z]\w*[^/]* |\*) | (\*) ) (.*) )" ).get.captures.toSeq 370 | nth, txNth :uint 371 | txPos, posn, attg, aatt :string 372 | remPath= g[12].get 373 | minD= 1 + numChr( '/', remPath) 374 | retOffNode= newSeqOfCap[ StrPair]( avgOffNode_Ply) # the offset-node pair found which... 375 | for _ in 0..avgOffNode_Ply: 376 | retOffNode.add (newStringOfCap(maxw),newStringOfCap(maxw)) 377 | template isAllDepths:bool= g[0].isSome 378 | template isTxNode :bool= g[1].isSome 379 | template txNRev :bool= g[2].isSome 380 | template isTxNth:bool= g[3].isSome 381 | template isTxPos:bool= g[4].isSome 382 | template isTag :bool= g[5].isSome 383 | template nthRev :bool= g[6].isSome 384 | template isNth :bool= g[7].isSome 385 | template isPosn :bool= g[8].isSome 386 | template isAttg :bool= g[9].isSome 387 | template isAatt :bool= g[10].isSome 388 | template nextPly_Tx :bool= remPath.find(re"^//?text\(\)").isSome 389 | if isTxNode: 390 | if isTxNth: txNth= g[3].get.strUint 391 | elif isTxPos: txPos= g[4].get 392 | elif isTag: 393 | tag= g[5].get 394 | if isNth: 395 | nth= g[7].get.strUint 396 | elif isPosn: 397 | posn= g[8].get 398 | elif isAttg: 399 | attg= g[9].get 400 | elif isAatt: 401 | aatt= g[10].get 402 | if isAllDepths: # all depths under current // 403 | allDepthInit 404 | if isTag: 405 | if isNth: 406 | offsetNodeLoop: getAllDepthNth retOffNode, this.Offset, this.Node, minD, nth, nthRev, nextPly_Tx 407 | else: 408 | offsetNodeLoop: getAllDepthMultiN retOffNode, this.Offset, this.Node, minD, posn, attg, nextPly_Tx 409 | elif isAatt: 410 | offsetNodeLoop: getAllDepthMultiN retOffNode, this.Offset, this.Node, minD, aatt=aatt, nextPly_Tx 411 | elif isTxNode: 412 | if isTxNth: 413 | offsetNodeLoop: getAllDepthNth_Tx retOffNode, this.Offset, this.Node, txNth, txNRev 414 | else: 415 | offsetNodeLoop: getAllDepthMultiN_Tx retOffNode, this.Offset, this.Node, txPos 416 | else: 417 | offsetNodeLoop: getAllDepthMultiN retOffNode, this.Offset, this.Node, minD, nextPly_Tx 418 | elif isTag: 419 | if isNth: 420 | offsetNodeLoop: getE_Nth retOffNode, this.Offset, this.Node, nth, nthRev 421 | else: 422 | offsetNodeLoop: getE_MultiN retOffNode, this.Offset, this.Node, posn, attg 423 | elif isAatt: 424 | offsetNodeLoop: getE_MultiN retOffNode, this.Offset, this.Node, aatt 425 | elif isTxNode: 426 | if isTxNth: 427 | offsetNodeLoop: getTx_Nth retOffNode, this.Offset, this.Node, txNth, txNRev 428 | else: 429 | offsetNodeLoop: getTextMulN retOffNode, this.Offset, this.Node, txPos 430 | else: 431 | offsetNodeLoop: getE_MultiN retOffNode, this.Offset, this.Node 432 | # be any of these true, it failed finding. Now see if 433 | isSUCCEED 434 | 435 | var 436 | aCP, outf :string 437 | paths :seq[ string] 438 | template xPathsCheck( path:string; hasTarget="")= 439 | paths.reset 440 | for p in path.split re"[|;]": 441 | if p.contains xpath: 442 | if p.contains re"^[^/]": 443 | if aCP.len == 0: 444 | echo "\n'",p,"' is relative to base/current path which is empty" 445 | while true: 446 | echo "\nPut a valid one: " 447 | aCP = readLine(stdin).replace( re"\h+", "") 448 | if aCP.contains xpath: break 449 | echo "\n'",aCP,"' is not a valid Xpath" 450 | aCP &= "/" 451 | var pr= p.replace( re"^\./", "") 452 | while pr.contains(re"^\.\.") : 453 | if aCP == "/" : echo "\n'", pr,"' upward node ..\nran over '",aCP,"' current path";quit(0) 454 | aCP = aCP.replace( re"[^/]+/$", "") 455 | pr = pr.replace( re"^../?", "") 456 | paths.add( aCP & pr) 457 | else: paths.add( p) 458 | else: 459 | echo "\n'",p,"' is invalid Xpath\nSkip? (s: skip. else: abort): " 460 | if getch() != 's': echo "\nAborting";quit(1) 461 | let totPaths {.inject.}= paths.len.uint 462 | if totPaths==0: quit("\nNo valid xpath " & hasTarget,0) 463 | 464 | template getDocFile( f :string)= 465 | f.getDocFile(whole) 466 | template getDocFile( f, w :string)= 467 | while true: 468 | if fileExists(f): 469 | try: w = readFile f 470 | except IOError as e: 471 | echo "\nCannot read '",f,"': ",e.msg 472 | continue 473 | except OverflowDefect as e: 474 | echo e.msg;continue 475 | except: 476 | echo "\nFile '",outf,"': critical error";continue 477 | finally:break 478 | else: 479 | echo "'",f,"' doesn't exist" 480 | while true: 481 | stdout.write "Try another file name:" 482 | f=readLine(stdin).replace(re"^\h+|\s+$", "") 483 | if f.len>0: break 484 | 485 | template validatingML= validatingML whole 486 | template validatingML( w :string)= 487 | let m= w.find re( 488 | r"(?xs)^(\s* (?: <\?xml\b [^>]*+> \s* )?) (< (!DOCTYPE) [^>]*+> [^<]* (.+))" ) 489 | if m.isNone or 490 | not m.get.captures[3].node or (let r=remain.replace(re"^\s+|\s+$",""); r).len>0 and 491 | not r.contains(re( r"\s*(?:" & nodeR & ")*")): 492 | echo "\nCan't parse it due to mark-up language's ill-form or unbalanced tag pair\nAborting" 493 | quit(0) 494 | iniNode= @[(m.get.captures[0], m.get.captures[1] & "")] 495 | 496 | template path_search_H= 497 | avgOffNode_Ply= (totN.float / maxND.float * 1.5 ).uint 498 | miss = newSeqOfCap[ string ](totPaths) 499 | let maxFouND = (totN.float * 3 / 4).uint 500 | fpath= newSeqOfCap[ StrPair ](maxFouND) 501 | maxw= (whole.len-17).uint 502 | offset= newStringOfCap(maxw) 503 | res = newStringOfCap(maxw) 504 | remain= newStringOfCap(maxw) 505 | for _ in 0..maxFouND: 506 | resultArr.add (newStringOfCap(maxw), newStringOfCap(maxw)) 507 | fpath.add (newStringOfCap(maxw), newStringOfCap(maxw)) 508 | for _ in 0..totPaths: 509 | pathResult.add (newStringOfCap(71), fpath) 510 | 511 | template path_search_B( asTarget="")= 512 | template unsortRes( fnd)= 513 | foundd="" 514 | for i in pathResult: 515 | foundd &= "\n" & i[0] & ":" 516 | for j{.inject.} in i[1]: 517 | foundd &= "\n-----\n" & j.Node 518 | fnd 519 | fpath.reset 520 | pathResult.reset 521 | var fail :bool 522 | paths.sort( proc( a,b :string) :int= cmp(a.len, b.len) ) 523 | for aPath in paths: 524 | if fail: 525 | echo "Skipping it, searching for the next path" 526 | resultArr.reset 527 | fail= getE_Path_R( aPath, iniNode) 528 | if fail: 529 | miss.add aPath; echo "Can't find:\n",aPath 530 | else: 531 | pathResult.add (aPath, resultArr) # tuple of each path's array of offset-node found 532 | block F: 533 | for s in short: # filter out duplicate path or path whose head as the same as shorter one's 534 | if aPath.contains(re(r"^\Q" & s & r"\E")): break F 535 | fpath.add(resultArr) 536 | short.add aPath 537 | if miss.len>0: 538 | if pathResult.len>0: 539 | echo "\nSkip every unfound path and keep going for the found ones? (y: Yes. else key: Abort) " 540 | if getch()=='y': 541 | echo "To process every path" 542 | for p in pathResult: 543 | echo "\n",p[0] 544 | else: quit("\nAborting",0) 545 | else: quit("\nNothing was done" & asTarget,0) 546 | else: stdout.write "Every given path was " 547 | if asTarget=="": 548 | unsortRes: founds &= j.Node 549 | else:unsortRes: discard 550 | 551 | template each_path_search( file :string; asTarget="")= 552 | file.getDocFile 553 | echo "Checking document '",file,"'... " 554 | validatingML() 555 | path_search_H 556 | path_search_B( asTarget) 557 | echo "found on document '",file,"'",asTarget,"\nEvery element of it:\n",foundd 558 | 559 | ###### main ###### 560 | let 561 | xpath= re"(?x) ^(?> /?/? ( ([a-z]\w*+) (?:\[ (?> (?:last\(\)-)? [0-9]\d*+ | position\(\) (?!<1)[<>]=? [0-9]\d*+ | @((?>(?2)(?:=(?2))? | \*)) ) \])? | @(?-1) | \*) | \.\.?) (?://?(?1))* (?: //?text\(\) (?: \[ (?> (?:last\(\)-)? [1-9]\d*+ | position\(\) (?!<1) [<>]=? [1-9]\d*+) \])? )? $" 562 | var 563 | cmdLine= commandLineParams() 564 | (pathStr, srcFile)= if cmdLine.len>0: # This block expectedly errs and need a knowledgable one's 565 | echo "\nTry to accomplish:" # colloboration to correct it 566 | for i,l in cmdLine: 567 | echo i,". ",l 568 | var 569 | op= cmdLine[1] 570 | l = cmdLine[0] 571 | (cmdLine[2], cmdLine[3]) 572 | else: 573 | echo "Element path is of Xpath form e.g:\n\thtml/body/div[1]//div[1]/div[2]\nmeans find in a given HTML or XML file, the second div tag element that is under the first\ndiv element anywhere under the first div element, under any body element,\nunder any html element.\n\nTo put multiply at once, put one after another delimited by ; or |. Put in two data,\nFirst, the element path. Copy operation may be as source then target delimited by '+>'\nSecond, the HTML/XML file name :\n" 574 | (readLine(stdin), readLine(stdin)) 575 | if pathStr.len==0: quit("\nNo Xpath given",0) 576 | 577 | let srdPaths= pathStr.replace(re"\h+", "").split(re"\+>") 578 | srdPaths[0].xPathsCheck 579 | var 580 | pathResult = newSeqOfCap[ (string, OffNodArray) ](totPaths) # Preallocation 581 | miss :seq[ string ] 582 | short :seq[ string ] 583 | fpath, iniNode :OffNodArray 584 | opt :char 585 | founds, foundd :string 586 | block: # scope to get around equivalent C++ delete command with hope 587 | srcFile.each_path_search # once exiting it, any allocation inside gets freed by Nim GC 588 | if cmdLine.len==0: 589 | opt= if srdPaths.len>1:'c' 590 | else: 591 | echo "\nWhich operation would be done:\n- Remove\n- Copy\n- Save to file or quit\n( r: Remove. c: Copy. Else key: Save or quit )" 592 | getch() 593 | 594 | case opt 595 | of 'c','C': 596 | var dstPath= if srdPaths.len==1: 597 | echo "\nPut target element, in xpath form:"; readLine(stdin) 598 | else: srdPaths[1] 599 | dstPath.xPathsCheck : "of copy target" 600 | echo "\nSpecify the copy target file (Enter: as the same as the source):" 601 | var dstFile=readLine(stdin) 602 | if dstFile != "": 603 | dstFile.each_path_search: " to copying" 604 | else: 605 | path_search_B: " to copying" 606 | echo "found as copy target in the same document '",srcFile,"'\nEvery element of it:\n",foundd 607 | fpath.sort( proc( a,b :StrPair) :int=cmp( b.Offset.len, a.Offset.len) ) 608 | echo "Should source element be under target element, replacing it, preceding it, or following it?\n(u: Under it. r: Replacing it. p: Preceding it. else key: Following it)" 609 | case getch() 610 | of 'u','U': 611 | for on in fpath: 612 | whole= whole.replace(re( 613 | r"^\Q" & on.Offset & r"\E" & headR), "$0" & founds) 614 | of 'r','R': 615 | for on in fpath: 616 | whole= whole.replace(re( 617 | r"^(\Q" & on.Offset & r"\E)" & nodeR), "$1" & founds) 618 | of 'p','P': 619 | for on in fpath: 620 | whole= whole.replace(re( 621 | r"^\Q" & on.Offset & r"\E"), "$0" & founds) 622 | else: 623 | for on in fpath: 624 | whole= whole.replace(re( 625 | r"^\Q" & on.Offset & on.Node & r"\E"), "$0" & founds) 626 | echo "\nCopying result:\n",whole 627 | of 'r','R': 628 | fpath.sort( proc( a,b :StrPair) :int=cmp( b.Offset.len, a.Offset.len) ) 629 | for on in fpath: 630 | whole= whole.replace(re(r"(?s)^(\Q" & on.Offset & r"\E)\Q" & on.Node & r"\E(.*)"), "$1$2") 631 | echo "\nRemoval result:\n",whole 632 | else: whole=founds 633 | echo "Save to a file? (y: Yes, save. else key: Quit)" 634 | if getch()=='y': 635 | while true: 636 | echo "File name to save:" 637 | outf=readLine(stdin).replace(re"^\h+|\s+$", "") 638 | if outf.len>0: 639 | if fileExists(outf): 640 | echo "There is file name '",outf,"'\nOverwrite it (y: Yes. Else key: No) ?" 641 | if getch() != 'y': 642 | echo "\nNot overwrite it" 643 | else: 644 | try: writeFile(outf,whole) 645 | except IOError as e: 646 | echo "\nCannot write to '",outf,"': ",e.msg 647 | continue 648 | except: 649 | echo "\nFile '",outf,"': critical error" 650 | continue 651 | finally:break 652 | echo "Successfully saved to '",outf,"'" 653 | --------------------------------------------------------------------------------