├── LICENSE
├── README.md
└── rmHTMLement.nim
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 abdulbadii
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Get, Remove or Copy Markup Language elements
2 |
3 | Do such operations in a file by specifying the path
4 |
5 | This should be of abbreviated [Xpath](https://www.w3.org/TR/1999/REC-xpath-19991116/#path-abbrev) syntax, e.g:
6 |
7 | ```html/body/div[1]/div/div[2]```
8 |
9 | means find in a given HTML or XML file, the second div tag element node under every div element lies under the first div element, under any body element, under any html element.
10 |
11 | ```html/body/div[3]//div[1]/div```
12 |
13 | means find in a given HTML or XML file, any div tag element node under the first div element lies at any depth under the third div element of any body element, under any html element.
14 |
15 | It may be put in multiply one after another delimited by ```;``` or ```|```
16 | For copying, append at the end the characters ```+>``` followed by the copy target path
17 | e.g.
18 |
19 | ```html/body/div[1]/div/div[2]+>html/body/div[3]//div[1]/div```
20 |
--------------------------------------------------------------------------------
/rmHTMLement.nim:
--------------------------------------------------------------------------------
1 | import std/[os, terminal, nre, math, algorithm, macros]
2 |
3 | template strUint( s :string) :uint=
4 | var res :uint
5 | for i, c in s:
6 | if c in '0'..'9': res += ((ord(c) - 48) * 10^(s.high-i)).uint
7 | else: raise newException(ValueError, "Non-number in string")
8 | res
9 | template numChr( c :char, s: string) :uint=
10 | var res :uint
11 | for i in s:
12 | if i==c: inc(res)
13 | res
14 | # ML regexes text node & comment/asymetric tag as content of text node:
15 | let
16 | txNodeR= r"(?:[^<>]++(?>|<(?>meta|link|input|img|hr|base)\b[^>]*+>)*)+"
17 | ctntR= r"(?:[^<>]*+(?>|<(?>meta|link|input|img|hr|base)\b[^>]*+>)*)*+" #-> as of element node
18 | ctnt= re("(?s)^(" & ctntR & ")(.+)")
19 | nodeR= r"(<([a-z]\w*+)(?>[^/>]*+/>|[^>]*+>(?:" & ctntR & r"(?-2)*+)*+\g-1>))"
20 | headR= r"(<(?>[a-z]\w*+|!DOCTYPE)[^>]*+>)"
21 | head= re("(?s)^" & headR & "(.+)")
22 | var
23 | whole, offset, res, remain, tag :string
24 | totN, maxND :uint
25 |
26 | template nodeB( tot, max, d :uint)=
27 | if m.isNone: return false
28 | let g= m.get.captures.toSeq
29 | inc(tot); inc(d); if d>max: max=d
30 | var
31 | tagname= g[0].get
32 | off = "<" & tagname
33 | str = g[3].get
34 | if g[1].isSome:
35 | res= off & g[1].get
36 | dec(d)
37 | if d==0:
38 | remain= str
39 | totN=tot; maxND=max
40 | return true
41 | off &= g[2].get
42 | while true:
43 | let m = str.find ctnt
44 | off &= m.get.captures[0]
45 | str= m.get.captures[1]
46 | if str[0..1] == "" :
47 | tagname &= ">"
48 | if str[ 2..1+tagname.len] == tagname:
49 | res= off & "" & tagname
50 | dec(d)
51 | if d==0:
52 | remain= str[ 2+tagname.len..^1]
53 | totN=tot; maxND=max
54 | return true
55 | else: break
56 | elif nodeRec( str):
57 | off &= res
58 | str= str[ res.len..^1]
59 | else: break
60 | return false
61 |
62 | # 2 node functions to find direct closed tag or the nested ones, it'd count the max depth, nodes etc
63 | # they're just header; with and without lookahead tag fed to the recursive node function inside
64 | proc node( str= remain) :bool=
65 | var tot, max, d :uint
66 | proc nodeRec( str :string) :bool=
67 | let m= str.find re"(?xs) ^<([a-z]\w*+) (?> ([^/>]*+/>) | ([^>]*+>) ) (.+)"
68 | nodeB tot, max, d
69 | nodeRec str
70 |
71 | proc node( str, tag :string) :bool=
72 | var tot, max, d :uint
73 | proc nodeRec( str :string, tag="") :bool=
74 | let m= str.find re("(?xs) ^<(" & tag & r"[a-z]\w*+) (?> ([^/>]*+/>) | ([^>]*+>) ) (.+)" )
75 | nodeB tot, max, d
76 | nodeRec str, tag
77 |
78 | template getOffUp2Tag( tag="") :bool=
79 | let m= remain.find re(
80 | "(?s)^((?:" & ctntR & "(?:(?!<" & tag & r"\b)" & nodeR & ")?)*+)(?=<" & tag & r"\b)" & nodeR & "(.+)" )
81 | if m.isNone: false
82 | else:
83 | offset &= m.get.captures[0]
84 | res= m.get.captures[3]
85 | remain= m.get.captures[5]
86 | true
87 |
88 | type
89 | StrPair = tuple[ Offset :string, Node :string]
90 | OffNodArray = seq[ StrPair]
91 |
92 | template ctnPrevTag( nd :OffNodArray; isTheTagNode) =
93 | while true:
94 | let m= remain.find ctnt
95 | offset &= m.get.captures[0]
96 | remain = m.get.captures[1]
97 | if isTheTagNode:
98 | if maxND > minD: nd.add (offset, res)
99 | offset &= res
100 | else: break
101 |
102 | template ctnoTagUp2Tag( nd :OffNodArray, xSpeCmd) =
103 | tag &= r"\b)"
104 | ctnPrevTag nd:
105 | node remain, "(?!" & tag
106 | tag = "(?=" & tag
107 | if node(remain, tag):
108 | if maxND > minD:
109 | nd.add (offset, res)
110 | xSpeCmd
111 | offset &= res
112 | else: break
113 |
114 | template headeRemain( nd :string, prevOff="") =
115 | var m = nd.find head
116 | offset= prevOff & m.get.captures[0]
117 | remain= m.get.captures[1]
118 |
119 | const
120 | isERROR = true
121 | isSUCCEED= false
122 |
123 | template posiN( posn :string, a,b :uint)=
124 | var
125 | g = posn.find(re"(?>(<)|>)(=)?(\d+)").get.captures.toSeq
126 | eq= g[1].isSome
127 | n = g[2].get.strUint # Get a-b as lower-upper bound
128 | if g[0].isSome:
129 | b= if eq: n else: n-1
130 | else: a= if eq: n-1 else: n
131 |
132 | proc getNthRev( n :var uint) :bool=
133 | var i :uint
134 | while true:
135 | inc(i)
136 | let m= remain.find re(
137 | "^((?:(?:" & ctntR & "(?:(?!<" & tag & r"\b)" & nodeR & ")?)*+(?=<" & tag & r"\b)" & nodeR & "){" & $i & "})" )
138 | if m.isNone: break
139 | if i<=n: return false
140 | n = i-n # i is the max nth from which subtract the specified nth
141 | true
142 |
143 | proc getxNRev( n :var uint) :bool=
144 | var i :uint
145 | while true:
146 | inc(i)
147 | let m= remain.find re( r"^(?:" & nodeR & "*" & txNodeR & "){" & $i & "}" )
148 | if m.isNone: break
149 | if i<=n: return false
150 | n= i-n
151 | true
152 |
153 | template getTx_Nth( ret :OffNodArray; off, nod :string; n :uint; txNRev:bool) :bool=
154 | nod.headeRemain off
155 | if txNRev and not getxNRev( n): isERROR
156 | else:
157 | let m= remain.find re( r"^((?:" & nodeR & "*(" & txNodeR & ")){" & $n & "})" )
158 | if m.isNone: isERROR
159 | else:
160 | var r= m.get.captures[3]
161 | ret.add (offset & m.get.captures[0][0..^r.len+1], r)
162 | isSUCCEED
163 |
164 | template getE_Nth( ret :OffNodArray; off, nod :string; n:uint; nthRev:bool) :bool=
165 | nod.headeRemain off
166 | if nthRev and not getNthRev( n): isERROR
167 | else:
168 | let m= remain.find re(
169 | r"^((?:(?:" & ctntR & "(?:(?!<" & tag & r"\b)" & nodeR & ")?)*+(?=<" & tag & r"\b)" & nodeR & "){" & $n & "})")
170 | if m.isNone: isERROR
171 | else:
172 | var r= m.get.captures[3]
173 | ret.add (offset & m.get.captures[0][0..^r.len+1], r)
174 | isSUCCEED
175 |
176 | macro getTextMulN( ret :OffNodArray; off, nod, posn :string) :bool=
177 | result = quote do:
178 | `nod`.headeRemain `off`
179 | var a,b,i :uint
180 | if `posn` != "": `posn`.posiN(a,b)
181 | while true:
182 | inc(i)
183 | let m= remain.find re( r"^((?:" & nodeR & "*(" & txNodeR & ")){" & $i & "})" )
184 | if m.isNone: break
185 | if i>a and (b==0 or i<=b):
186 | var r= m.get.captures[3]
187 | `ret`.add (offset & m.get.captures[0][0..^r.len+1], r)
188 | `ret`.len==0
189 |
190 | macro getE_MultiN( ret :OffNodArray; off, nod, posn, att :string) :bool=
191 | result = quote do:
192 | `nod`.headeRemain `off`
193 | var a,b,i :uint
194 | if `posn` != "": `posn`.posiN(a,b)
195 | elif `att` != "": tag &= r"\s+" & `att`
196 | while true:
197 | if getOffUp2Tag( tag):
198 | inc(i)
199 | if i>a and (b==0 or i<=b): `ret`.add (offset, res)
200 | offset &= res
201 | else: break
202 | `ret`.len==0
203 |
204 | template getE_MultiN( ret :OffNodArray; off, nod, aatt :string) :bool=
205 | nod.headeRemain off
206 | while true:
207 | if getOffUp2Tag( r"\S+\s+" & aatt):
208 | ret.add (offset, res)
209 | offset &= res
210 | else: break
211 | ret.len==0
212 |
213 | template getE_MultiN( ret :OffNodArray; off, nod :string) :bool=
214 | nod.headeRemain off
215 | while true:
216 | if getOffUp2Tag:
217 | ret.add (offset, res)
218 | offset &= res
219 | else: break
220 | ret.len==0
221 |
222 | template allDepthInit {.dirty.}=
223 | var curNode, nd= newSeqOfCap[ StrPair]( avgOffNode_Ply)
224 | for _ in 0..avgOffNode_Ply:
225 | nd.add (newStringOfCap(maxw), newStringOfCap(maxw))
226 | curNode.add (newStringOfCap(maxw), newStringOfCap(maxw))
227 | curNode.reset
228 |
229 | template getAllDepthNth_Tx( ret :OffNodArray; off, nod :string; nth :uint; txNRev:bool) :bool=
230 | curNode.add (off, nod)
231 | while curNode.len>0:
232 | nd.reset
233 | for the in curNode:
234 | if getTx_Nth( ret, the.Offset, the.Node, nth, txNRev): ret.add (offset, res)
235 | while true:
236 | if getOffUp2Tag: nd.add (offset, res)
237 | else: break
238 | curNode= nd
239 | ret.len==0
240 |
241 | template getAllDepthMultiN_Tx( ret :OffNodArray; off, nod, posn :string) :bool=
242 | var i,a,b:uint
243 | if posn != "": posn.posiN(a,b)
244 | curNode.add (off, nod)
245 | while curNode.len>0:
246 | nd.reset
247 | for the in curNode:
248 | while true:
249 | if getOffUp2Tag:
250 | nd.add (offset, res)
251 | i.inc
252 | if i>a and (b==0 or i<=b):
253 | ret.add (offset, res)
254 | else:
255 | break
256 | curNode= nd
257 | ret.len==0
258 |
259 | template getAllDepthNth( ret :OffNodArray; off, nod :string; minD, nth :uint; nthRev, nextIsTx:bool) :bool=
260 | curNode.add (off, nod)
261 | template everyPsblNode( nthORnthRevCo) =
262 | while curNode.len>0:
263 | nd.reset
264 | for the in curNode:
265 | the.Node.headeRemain the.Offset
266 | nthORnthRevCo
267 | curNode= nd
268 | var i :uint
269 | template loopn( n:uint)=
270 | while true:
271 | inc(i)
272 | ctnoTagUp2Tag nd:
273 | if i==n and maxND >= minD: ret.add (offset, res)
274 | template txLoop( n:uint)=
275 | if getOffUp2Tag( tag):
276 | nd.add (offset, res)
277 | inc(i)
278 | if i==n:
279 | ret.add (offset, res)
280 | offset &= res
281 | else: break
282 | if nextIsTx:
283 | if nthRev:
284 | everyPsblNode:
285 | var n=nth; if getNthRev( n): n.txLoop
286 | else:
287 | everyPsblNode:
288 | nth.txLoop
289 | else:
290 | if nthRev:
291 | everyPsblNode:
292 | var n=nth; if getNthRev( n): n.loopn
293 | else:
294 | everyPsblNode:
295 | nth.loopn
296 | ret.len==0
297 |
298 | template loop( commandIFoundOrBreak, i_var) =
299 | curNode.add (this.Offset, this.Node)
300 | while curNode.len>0:
301 | nd.reset
302 | for the in curNode:
303 | the.Node.headeRemain the.Offset
304 | while true:
305 | commandIFoundOrBreak
306 | i_var
307 | curNode=nd
308 |
309 | # off, nod are arguments of func. of offsetNodeLoop arg. template normalization is: this.Offset, this.Node
310 |
311 | template getAllDepthMultiN( ret :OffNodArray; off, nod :string; minD:uint; posn, att :string; nextIsTx :bool) :bool {.dirty.}=
312 | var a, b, i :uint
313 | if posn != "": posn.posiN(a,b)
314 | elif att != "": tag &= r"\s+" & att
315 | if nextIsTx:
316 | loop do:
317 | if getOffUp2Tag( tag):
318 | nd.add (offset, res)
319 | inc(i)
320 | if i>a and (b==0 or i<=b):
321 | ret.add (offset, res)
322 | offset &= res
323 | else: break
324 | do: i=0
325 | else:
326 | loop do:
327 | ctnoTagUp2Tag nd:
328 | inc(i)
329 | if i>a and (b==0 or i<=b) and maxND >= minD:
330 | ret.add (offset, res)
331 | do: i=0
332 | ret.len==0
333 |
334 | template getAllDepthMultiN( ret :OffNodArray; o, n :string; minD :uint; aatt :string; nextIsTx :bool) :bool {.dirty.}=
335 | tag = r"\S+\s+" & aatt
336 | loop do:
337 | ctnoTagUp2Tag nd:
338 | if maxND >= minD: ret.add (offset, res)
339 | do:discard
340 | ret.len==0
341 |
342 | template getAllDepthMultiN( ret :OffNodArray; o, n :string; minD :uint; nextIsTx :bool) :bool {.dirty.}=
343 | tag = ""
344 | loop do:
345 | ctnoTagUp2Tag nd:
346 | if maxND >= minD: ret.add (offset, res)
347 | do:discard
348 | ret.len==0
349 |
350 | template offsetNodeLoop( xPathPat) =
351 | for i, this {.inject.} in offsetNode:
352 | retOffNode.reset
353 | if xPathPat:
354 | if i0:
358 | let e= getE_Path_R( remPath, retOffNode) #...is propagating to the next depth and returning boolean
359 | if i==offsetNode.high: return e # value which'll be returned if this's the last iteration
360 | else:
361 | resultArr.add retOffNode
362 |
363 | var
364 | avgOffNode_Ply, maxw :uint
365 | resultArr :OffNodArray
366 |
367 | proc getE_Path_R( path :string; offsetNode :OffNodArray) :bool=
368 | var
369 | g= path.find(re"(?x)^/ (/)? (?> (text\(\)) (?: \[ (?> (last\(\)-)? ([1-9]\d*+) | position\(\) (?!<1) ([<>]=? [1-9]\d*+)) \])? | (?> ([^/@*[]+) (?: \[ (?> (?>(last\(\)-)|position\(\)=)? ([1-9]\d*+) | position\(\) (?!<1) ( [<>]=? [1-9]\d*+ ) | @(\*| [^]]+) ) \] )? | @([a-z]\w*[^/]* |\*) | (\*) ) (.*) )" ).get.captures.toSeq
370 | nth, txNth :uint
371 | txPos, posn, attg, aatt :string
372 | remPath= g[12].get
373 | minD= 1 + numChr( '/', remPath)
374 | retOffNode= newSeqOfCap[ StrPair]( avgOffNode_Ply) # the offset-node pair found which...
375 | for _ in 0..avgOffNode_Ply:
376 | retOffNode.add (newStringOfCap(maxw),newStringOfCap(maxw))
377 | template isAllDepths:bool= g[0].isSome
378 | template isTxNode :bool= g[1].isSome
379 | template txNRev :bool= g[2].isSome
380 | template isTxNth:bool= g[3].isSome
381 | template isTxPos:bool= g[4].isSome
382 | template isTag :bool= g[5].isSome
383 | template nthRev :bool= g[6].isSome
384 | template isNth :bool= g[7].isSome
385 | template isPosn :bool= g[8].isSome
386 | template isAttg :bool= g[9].isSome
387 | template isAatt :bool= g[10].isSome
388 | template nextPly_Tx :bool= remPath.find(re"^//?text\(\)").isSome
389 | if isTxNode:
390 | if isTxNth: txNth= g[3].get.strUint
391 | elif isTxPos: txPos= g[4].get
392 | elif isTag:
393 | tag= g[5].get
394 | if isNth:
395 | nth= g[7].get.strUint
396 | elif isPosn:
397 | posn= g[8].get
398 | elif isAttg:
399 | attg= g[9].get
400 | elif isAatt:
401 | aatt= g[10].get
402 | if isAllDepths: # all depths under current //
403 | allDepthInit
404 | if isTag:
405 | if isNth:
406 | offsetNodeLoop: getAllDepthNth retOffNode, this.Offset, this.Node, minD, nth, nthRev, nextPly_Tx
407 | else:
408 | offsetNodeLoop: getAllDepthMultiN retOffNode, this.Offset, this.Node, minD, posn, attg, nextPly_Tx
409 | elif isAatt:
410 | offsetNodeLoop: getAllDepthMultiN retOffNode, this.Offset, this.Node, minD, aatt=aatt, nextPly_Tx
411 | elif isTxNode:
412 | if isTxNth:
413 | offsetNodeLoop: getAllDepthNth_Tx retOffNode, this.Offset, this.Node, txNth, txNRev
414 | else:
415 | offsetNodeLoop: getAllDepthMultiN_Tx retOffNode, this.Offset, this.Node, txPos
416 | else:
417 | offsetNodeLoop: getAllDepthMultiN retOffNode, this.Offset, this.Node, minD, nextPly_Tx
418 | elif isTag:
419 | if isNth:
420 | offsetNodeLoop: getE_Nth retOffNode, this.Offset, this.Node, nth, nthRev
421 | else:
422 | offsetNodeLoop: getE_MultiN retOffNode, this.Offset, this.Node, posn, attg
423 | elif isAatt:
424 | offsetNodeLoop: getE_MultiN retOffNode, this.Offset, this.Node, aatt
425 | elif isTxNode:
426 | if isTxNth:
427 | offsetNodeLoop: getTx_Nth retOffNode, this.Offset, this.Node, txNth, txNRev
428 | else:
429 | offsetNodeLoop: getTextMulN retOffNode, this.Offset, this.Node, txPos
430 | else:
431 | offsetNodeLoop: getE_MultiN retOffNode, this.Offset, this.Node
432 | # be any of these true, it failed finding. Now see if
433 | isSUCCEED
434 |
435 | var
436 | aCP, outf :string
437 | paths :seq[ string]
438 | template xPathsCheck( path:string; hasTarget="")=
439 | paths.reset
440 | for p in path.split re"[|;]":
441 | if p.contains xpath:
442 | if p.contains re"^[^/]":
443 | if aCP.len == 0:
444 | echo "\n'",p,"' is relative to base/current path which is empty"
445 | while true:
446 | echo "\nPut a valid one: "
447 | aCP = readLine(stdin).replace( re"\h+", "")
448 | if aCP.contains xpath: break
449 | echo "\n'",aCP,"' is not a valid Xpath"
450 | aCP &= "/"
451 | var pr= p.replace( re"^\./", "")
452 | while pr.contains(re"^\.\.") :
453 | if aCP == "/" : echo "\n'", pr,"' upward node ..\nran over '",aCP,"' current path";quit(0)
454 | aCP = aCP.replace( re"[^/]+/$", "")
455 | pr = pr.replace( re"^../?", "")
456 | paths.add( aCP & pr)
457 | else: paths.add( p)
458 | else:
459 | echo "\n'",p,"' is invalid Xpath\nSkip? (s: skip. else: abort): "
460 | if getch() != 's': echo "\nAborting";quit(1)
461 | let totPaths {.inject.}= paths.len.uint
462 | if totPaths==0: quit("\nNo valid xpath " & hasTarget,0)
463 |
464 | template getDocFile( f :string)=
465 | f.getDocFile(whole)
466 | template getDocFile( f, w :string)=
467 | while true:
468 | if fileExists(f):
469 | try: w = readFile f
470 | except IOError as e:
471 | echo "\nCannot read '",f,"': ",e.msg
472 | continue
473 | except OverflowDefect as e:
474 | echo e.msg;continue
475 | except:
476 | echo "\nFile '",outf,"': critical error";continue
477 | finally:break
478 | else:
479 | echo "'",f,"' doesn't exist"
480 | while true:
481 | stdout.write "Try another file name:"
482 | f=readLine(stdin).replace(re"^\h+|\s+$", "")
483 | if f.len>0: break
484 |
485 | template validatingML= validatingML whole
486 | template validatingML( w :string)=
487 | let m= w.find re(
488 | r"(?xs)^(\s* (?: <\?xml\b [^>]*+> \s* )?) (< (!DOCTYPE) [^>]*+> [^<]* (.+))" )
489 | if m.isNone or
490 | not m.get.captures[3].node or (let r=remain.replace(re"^\s+|\s+$",""); r).len>0 and
491 | not r.contains(re( r"\s*(?:" & nodeR & ")*")):
492 | echo "\nCan't parse it due to mark-up language's ill-form or unbalanced tag pair\nAborting"
493 | quit(0)
494 | iniNode= @[(m.get.captures[0], m.get.captures[1] & "" & m.get.captures[2] & ">")]
495 |
496 | template path_search_H=
497 | avgOffNode_Ply= (totN.float / maxND.float * 1.5 ).uint
498 | miss = newSeqOfCap[ string ](totPaths)
499 | let maxFouND = (totN.float * 3 / 4).uint
500 | fpath= newSeqOfCap[ StrPair ](maxFouND)
501 | maxw= (whole.len-17).uint
502 | offset= newStringOfCap(maxw)
503 | res = newStringOfCap(maxw)
504 | remain= newStringOfCap(maxw)
505 | for _ in 0..maxFouND:
506 | resultArr.add (newStringOfCap(maxw), newStringOfCap(maxw))
507 | fpath.add (newStringOfCap(maxw), newStringOfCap(maxw))
508 | for _ in 0..totPaths:
509 | pathResult.add (newStringOfCap(71), fpath)
510 |
511 | template path_search_B( asTarget="")=
512 | template unsortRes( fnd)=
513 | foundd=""
514 | for i in pathResult:
515 | foundd &= "\n" & i[0] & ":"
516 | for j{.inject.} in i[1]:
517 | foundd &= "\n-----\n" & j.Node
518 | fnd
519 | fpath.reset
520 | pathResult.reset
521 | var fail :bool
522 | paths.sort( proc( a,b :string) :int= cmp(a.len, b.len) )
523 | for aPath in paths:
524 | if fail:
525 | echo "Skipping it, searching for the next path"
526 | resultArr.reset
527 | fail= getE_Path_R( aPath, iniNode)
528 | if fail:
529 | miss.add aPath; echo "Can't find:\n",aPath
530 | else:
531 | pathResult.add (aPath, resultArr) # tuple of each path's array of offset-node found
532 | block F:
533 | for s in short: # filter out duplicate path or path whose head as the same as shorter one's
534 | if aPath.contains(re(r"^\Q" & s & r"\E")): break F
535 | fpath.add(resultArr)
536 | short.add aPath
537 | if miss.len>0:
538 | if pathResult.len>0:
539 | echo "\nSkip every unfound path and keep going for the found ones? (y: Yes. else key: Abort) "
540 | if getch()=='y':
541 | echo "To process every path"
542 | for p in pathResult:
543 | echo "\n",p[0]
544 | else: quit("\nAborting",0)
545 | else: quit("\nNothing was done" & asTarget,0)
546 | else: stdout.write "Every given path was "
547 | if asTarget=="":
548 | unsortRes: founds &= j.Node
549 | else:unsortRes: discard
550 |
551 | template each_path_search( file :string; asTarget="")=
552 | file.getDocFile
553 | echo "Checking document '",file,"'... "
554 | validatingML()
555 | path_search_H
556 | path_search_B( asTarget)
557 | echo "found on document '",file,"'",asTarget,"\nEvery element of it:\n",foundd
558 |
559 | ###### main ######
560 | let
561 | xpath= re"(?x) ^(?> /?/? ( ([a-z]\w*+) (?:\[ (?> (?:last\(\)-)? [0-9]\d*+ | position\(\) (?!<1)[<>]=? [0-9]\d*+ | @((?>(?2)(?:=(?2))? | \*)) ) \])? | @(?-1) | \*) | \.\.?) (?://?(?1))* (?: //?text\(\) (?: \[ (?> (?:last\(\)-)? [1-9]\d*+ | position\(\) (?!<1) [<>]=? [1-9]\d*+) \])? )? $"
562 | var
563 | cmdLine= commandLineParams()
564 | (pathStr, srcFile)= if cmdLine.len>0: # This block expectedly errs and need a knowledgable one's
565 | echo "\nTry to accomplish:" # colloboration to correct it
566 | for i,l in cmdLine:
567 | echo i,". ",l
568 | var
569 | op= cmdLine[1]
570 | l = cmdLine[0]
571 | (cmdLine[2], cmdLine[3])
572 | else:
573 | echo "Element path is of Xpath form e.g:\n\thtml/body/div[1]//div[1]/div[2]\nmeans find in a given HTML or XML file, the second div tag element that is under the first\ndiv element anywhere under the first div element, under any body element,\nunder any html element.\n\nTo put multiply at once, put one after another delimited by ; or |. Put in two data,\nFirst, the element path. Copy operation may be as source then target delimited by '+>'\nSecond, the HTML/XML file name :\n"
574 | (readLine(stdin), readLine(stdin))
575 | if pathStr.len==0: quit("\nNo Xpath given",0)
576 |
577 | let srdPaths= pathStr.replace(re"\h+", "").split(re"\+>")
578 | srdPaths[0].xPathsCheck
579 | var
580 | pathResult = newSeqOfCap[ (string, OffNodArray) ](totPaths) # Preallocation
581 | miss :seq[ string ]
582 | short :seq[ string ]
583 | fpath, iniNode :OffNodArray
584 | opt :char
585 | founds, foundd :string
586 | block: # scope to get around equivalent C++ delete command with hope
587 | srcFile.each_path_search # once exiting it, any allocation inside gets freed by Nim GC
588 | if cmdLine.len==0:
589 | opt= if srdPaths.len>1:'c'
590 | else:
591 | echo "\nWhich operation would be done:\n- Remove\n- Copy\n- Save to file or quit\n( r: Remove. c: Copy. Else key: Save or quit )"
592 | getch()
593 |
594 | case opt
595 | of 'c','C':
596 | var dstPath= if srdPaths.len==1:
597 | echo "\nPut target element, in xpath form:"; readLine(stdin)
598 | else: srdPaths[1]
599 | dstPath.xPathsCheck : "of copy target"
600 | echo "\nSpecify the copy target file (Enter: as the same as the source):"
601 | var dstFile=readLine(stdin)
602 | if dstFile != "":
603 | dstFile.each_path_search: " to copying"
604 | else:
605 | path_search_B: " to copying"
606 | echo "found as copy target in the same document '",srcFile,"'\nEvery element of it:\n",foundd
607 | fpath.sort( proc( a,b :StrPair) :int=cmp( b.Offset.len, a.Offset.len) )
608 | echo "Should source element be under target element, replacing it, preceding it, or following it?\n(u: Under it. r: Replacing it. p: Preceding it. else key: Following it)"
609 | case getch()
610 | of 'u','U':
611 | for on in fpath:
612 | whole= whole.replace(re(
613 | r"^\Q" & on.Offset & r"\E" & headR), "$0" & founds)
614 | of 'r','R':
615 | for on in fpath:
616 | whole= whole.replace(re(
617 | r"^(\Q" & on.Offset & r"\E)" & nodeR), "$1" & founds)
618 | of 'p','P':
619 | for on in fpath:
620 | whole= whole.replace(re(
621 | r"^\Q" & on.Offset & r"\E"), "$0" & founds)
622 | else:
623 | for on in fpath:
624 | whole= whole.replace(re(
625 | r"^\Q" & on.Offset & on.Node & r"\E"), "$0" & founds)
626 | echo "\nCopying result:\n",whole
627 | of 'r','R':
628 | fpath.sort( proc( a,b :StrPair) :int=cmp( b.Offset.len, a.Offset.len) )
629 | for on in fpath:
630 | whole= whole.replace(re(r"(?s)^(\Q" & on.Offset & r"\E)\Q" & on.Node & r"\E(.*)"), "$1$2")
631 | echo "\nRemoval result:\n",whole
632 | else: whole=founds
633 | echo "Save to a file? (y: Yes, save. else key: Quit)"
634 | if getch()=='y':
635 | while true:
636 | echo "File name to save:"
637 | outf=readLine(stdin).replace(re"^\h+|\s+$", "")
638 | if outf.len>0:
639 | if fileExists(outf):
640 | echo "There is file name '",outf,"'\nOverwrite it (y: Yes. Else key: No) ?"
641 | if getch() != 'y':
642 | echo "\nNot overwrite it"
643 | else:
644 | try: writeFile(outf,whole)
645 | except IOError as e:
646 | echo "\nCannot write to '",outf,"': ",e.msg
647 | continue
648 | except:
649 | echo "\nFile '",outf,"': critical error"
650 | continue
651 | finally:break
652 | echo "Successfully saved to '",outf,"'"
653 |
--------------------------------------------------------------------------------