├── .gitignore
├── README.md
├── bin
├── 7-zip
│ ├── 7za.exe
│ └── license.txt
├── unxutils
│ ├── StdDisclaimer.html
│ ├── UnxUtilsDist.html
│ ├── cp.exe
│ ├── mkdir.exe
│ └── rm.exe
└── xidel
│ ├── readme.txt
│ └── xidel.exe
└── build.bat
/.gitignore:
--------------------------------------------------------------------------------
1 | tmp_libcurl
2 | curl.zip
3 | third-party
4 | tmp_url
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Auto download & compile libcurl
2 | -----------
3 | This batch script will automatically download the latest libcurl source code and build it using Visual Studio compiler.
4 |
5 | Supported Visual Studio are:
6 | * Visual C++ 6 (require Windows Server 2003 Platform SDK released in February 2003)
7 | * Visual Studio 2005
8 | * Visual Studio 2008
9 | * Visual Studio 2010
10 | * Visual Studio 2012
11 | * Visual Studio 2013 [](https://ci.appveyor.com/project/blackrosezy/build-libcurl-windows)
12 | * Visual Studio 2015 [](https://ci.appveyor.com/project/blackrosezy/build-libcurl-windows-unln0)
13 |
14 |
15 | *Note-1*: All versions of **Visual Studio Express are unsupported**.
16 |
17 | *Note-2*: This script is using third-party open source software
18 | * `bin/7-zip` http://www.7-zip.org/download.html
19 | * `bin/unxutils` http://sourceforge.net/projects/unxutils/
20 | * `bin/xidel` http://sourceforge.net/projects/videlibri/files/Xidel/
21 |
22 | Usage :
23 |
24 | $ build.bat
25 |
26 | To build using /MT rather than /MD:
27 |
28 | $ build.bat -static
29 |
30 | Output :
31 |
32 | ```
33 | third-party
34 | └───libcurl
35 | ├───include
36 | │ └───curl
37 | │ curl.h
38 | │ curlbuild.h
39 | │ curlrules.h
40 | │ curlver.h
41 | │ easy.h
42 | │ mprintf.h
43 | │ multi.h
44 | │ stdcheaders.h
45 | │ typecheck-gcc.h
46 | │
47 | └───lib
48 | ├───dll-debug-x64
49 | │ libcurl_debug.dll
50 | │ libcurl_debug.lib
51 | │ libcurl_debug.pdb
52 | │
53 | ├───dll-debug-x86
54 | │ libcurl_debug.dll
55 | │ libcurl_debug.lib
56 | │ libcurl_debug.pdb
57 | │
58 | ├───dll-release-x64
59 | │ libcurl.dll
60 | │ libcurl.lib
61 | │ libcurl.pdb
62 | │
63 | ├───dll-release-x86
64 | │ libcurl.dll
65 | │ libcurl.lib
66 | │ libcurl.pdb
67 | │
68 | ├───static-debug-x64
69 | │ libcurl_a_debug.lib
70 | │
71 | ├───static-debug-x86
72 | │ libcurl_a_debug.lib
73 | │
74 | ├───static-release-x64
75 | │ libcurl_a.lib
76 | │
77 | └───static-release-x86
78 | libcurl_a.lib
79 | ```
80 |
81 | ## FAQ
82 | If you get message something like below, please re-run build.bat again.
83 |
84 | **** Retrieving:http://curl.haxx.se/download.html ****
85 | Downloading latest curl...
86 | http://curl.haxx.seAn unhandled exception occurred at $004C7D39 :: Bad port number.
87 |
88 | License (build.bat)
89 | -----------
90 |
91 | The MIT License (MIT)
92 |
93 | Copyright (c) 2014 Mohd Rozi
94 |
95 | Permission is hereby granted, free of charge, to any person obtaining a copy
96 | of this software and associated documentation files (the "Software"), to deal
97 | in the Software without restriction, including without limitation the rights
98 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
99 | copies of the Software, and to permit persons to whom the Software is
100 | furnished to do so, subject to the following conditions:
101 |
102 | The above copyright notice and this permission notice shall be included in
103 | all copies or substantial portions of the Software.
104 |
105 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
106 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
107 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
108 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
109 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
110 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
111 | THE SOFTWARE.
112 |
--------------------------------------------------------------------------------
/bin/7-zip/7za.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blackrosezy/build-libcurl-windows/c281bb34209665603549ba98d3caaa921d25c065/bin/7-zip/7za.exe
--------------------------------------------------------------------------------
/bin/7-zip/license.txt:
--------------------------------------------------------------------------------
1 | 7-Zip Command line version
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
3 | License for use and distribution
4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 |
6 | 7-Zip Copyright (C) 1999-2010 Igor Pavlov.
7 |
8 | 7za.exe is distributed under the GNU LGPL license
9 |
10 | Notes:
11 | You can use 7-Zip on any computer, including a computer in a commercial
12 | organization. You don't need to register or pay for 7-Zip.
13 |
14 |
15 | GNU LGPL information
16 | --------------------
17 |
18 | This library is free software; you can redistribute it and/or
19 | modify it under the terms of the GNU Lesser General Public
20 | License as published by the Free Software Foundation; either
21 | version 2.1 of the License, or (at your option) any later version.
22 |
23 | This library is distributed in the hope that it will be useful,
24 | but WITHOUT ANY WARRANTY; without even the implied warranty of
25 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 | Lesser General Public License for more details.
27 |
28 | You can receive a copy of the GNU Lesser General Public License from
29 | http://www.gnu.org/
30 |
--------------------------------------------------------------------------------
/bin/unxutils/StdDisclaimer.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Disclaimer
6 |
7 |
8 |
9 |
10 |
Disclaimer
11 |
12 |
THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
13 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
14 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS
15 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
16 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
17 | OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
19 | TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
20 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 |
24 |
25 |
--------------------------------------------------------------------------------
/bin/unxutils/UnxUtilsDist.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blackrosezy/build-libcurl-windows/c281bb34209665603549ba98d3caaa921d25c065/bin/unxutils/UnxUtilsDist.html
--------------------------------------------------------------------------------
/bin/unxutils/cp.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blackrosezy/build-libcurl-windows/c281bb34209665603549ba98d3caaa921d25c065/bin/unxutils/cp.exe
--------------------------------------------------------------------------------
/bin/unxutils/mkdir.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blackrosezy/build-libcurl-windows/c281bb34209665603549ba98d3caaa921d25c065/bin/unxutils/mkdir.exe
--------------------------------------------------------------------------------
/bin/unxutils/rm.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blackrosezy/build-libcurl-windows/c281bb34209665603549ba98d3caaa921d25c065/bin/unxutils/rm.exe
--------------------------------------------------------------------------------
/bin/xidel/readme.txt:
--------------------------------------------------------------------------------
1 |
2 | ================================================ Basics ================================================
3 |
4 |
5 | The trivial usage is to extract an expression from a webpage like:
6 |
7 | xidel http://www.example.org --extract //title
8 |
9 | Instead of one or more urls, you can also pass file names or the xml data itself (xidel ".." ...).
10 | The --extract option can be abbreviated as -e, and there are five different kind of extract expressions:
11 |
12 | 1 ) XPath 2 expressions, with some changes and additional functions.
13 |
14 | 2 ) XQuery 1 expressions
15 |
16 | 3 ) CSS 3 selectors.
17 |
18 | 4 ) Templates, a simplified version of the page which is pattern matched against the input
19 |
20 | 5 ) Multipage templates, i.e. a file that contains templates for several pages
21 |
22 | The different kinds except multipage templates are usually automatically detected, but
23 | a certain type can be forced with the extract-kind option.
24 | Or by using the shorter --xpath "..", --xquery "..", --css ".." options.
25 | Especially XQuery and template expressions are easily confused by the auto detector.
26 | (Xidel assumes templates, if the expression starts with a "<" )
27 |
28 | See the sections below for a more detailed description of each expression kind.
29 |
30 |
31 |
32 | The next important option is --follow (abbreviated as -f) to follow links on a page. E.g:
33 |
34 | xidel http://www.example.org --follow //a --extract //title
35 |
36 | This will print the titles of all pages that are linked from http://www.example.org.
37 |
38 | --follow supports the same expressions as --extract, and it will follow the href or src attributes of the
39 | usual elements, or the contained text if there are no such attributes.
40 |
41 |
42 |
43 | ============================== Recursion / Argument order and grouping ===============================
44 |
45 |
46 | You can specify multiple --extract (-e) and --follow (-f) arguments to extract values from one page,
47 | follow the links to the next pages and extract values from there as well ...
48 | Then it becomes important in which order the arguments are given, so it extracts before following,
49 | or the other way around.
50 | You can usually read it left-to-right like an English sentence, extracting from the current page,
51 | or following to a new one, which will then become the next current page.
52 | For example:
53 |
54 | a) xidel http://site1 -e "select content 1" http://site2 -e "select content 2"
55 |
56 | This will extract content 1 from site 1 and content 2 from site 2
57 |
58 | b) xidel http://site1 http://site2 -e "select content 1" -e "select content 2"
59 |
60 | This will extract content 1 and 2 from site 1 as well as from site 2
61 |
62 | c) xidel http://site1 -e "select content 1" -f "//a (:select links:)" -e "select content 2"
63 |
64 | This will extract the "content 1" from site1, and "content 2" from all sites the first site has links to.
65 |
66 | d) xidel http://site1 -f "//a (:select links:)" -e "select content 1" -e "select content 2"
67 |
68 | This will extract "content 1" and "content 2" from all sites the first site links to, and will not
69 | extract anything from site1.
70 |
71 | e) xidel http://site1 -e "select content 1" -e "select content 2" -f "//a (:select links:)"
72 |
73 | This is some kind of special case. Since -f is the last option, it will repeat the previous operation, i.e.
74 | it will extract content 1 and 2 from site1 and ALL sites that can be reached by an selected link on site1
75 | or any other of the processed sites.
76 | Only if there were another -e after -f, it would extract that from the first set of followed links and stop.
77 |
78 | In some kinds of extract expression you can create new variables, if you assign values to a variable called
79 | "_follow", that value will be included in the next follow expression.
80 | If you assign an object to _follow, its properties will override the command line parameters with the same
81 | value.
82 |
83 | Generally an option modifier (like --extract-kind) affects all succeeding options, unless there are none,
84 | then it affects the immediate preceding option.
85 |
86 |
87 |
88 |
89 | You can always override the argument order by using [ and ] to group the options.
90 | For example:
91 |
92 | f) xidel http://site1 [ -f "//a (:select links:)" -e "select content 1" ] -e "select content 2"
93 |
94 | This will extract content 1 from all sites linked by site1 and content 2 from site1 itself.
95 | I.e. the extract of content 2 is not affected by the follow-option within the [..] brackets.
96 |
97 | g) xidel http://site1 [ -f //a[@type1] --download type1/ ]
98 | [ -f //a[@type2] --download type2/ ]
99 | [ -f //a[@type3] --download type3/ ]
100 |
101 | This will download all links of type 1 in a directory type1, all links of type2 in directory type2...
102 | (if written on one line)
103 |
104 | [ and ] must be surrounded by a space.
105 |
106 |
107 |
108 |
109 |
110 |
111 | ========================================== XPath 2.0 / XQuery ===========================================
112 |
113 | XPath expressions provide an easy way to extract calculated values from x/html.
114 | See http://en.wikipedia.org/wiki/XPath_2.0 for details.
115 |
116 | Xidel also supports JSONiq and some custom extensions, so it deviates in a few ways from the standard.
117 | However, you can disable this differences with the respective options (see link below or the
118 | command line parameter listing printed by --help).
119 | Switched to full standard compatible mode, its implementation passes 99.3% of the XPath 2 only tests and
120 | 97.8% of the XQuery 1 tests in the XQuery Testsuite (skipping tests for invalid input queries)
121 |
122 | However, in the default mode, there are the following important extensions:
123 |
124 | Syntax:
125 |
126 | Variable assignment: $var := value
127 |
128 | adds $var to a set of global variables, which can be created and accessed
129 | everywhere
130 |
131 | JSONiq literals true, false, null
132 |
133 | true and false are evaluated as true(), false(), null becomes jn:null()
134 |
135 | JSONiq arrays: [a,b,c]
136 |
137 | Arrays store a list of values and can be nested with each other and
138 | within sequences.
139 | jn:members converts an array to a sequence.
140 |
141 | JSONiq objects: {"name": value, ...}
142 |
143 | Object stores a set of values as associative map. The values can be
144 | accessed similar to a function call, e.g.: {"name": value, ...}("name").
145 | Xidel also has {"name": value, ..}.name as an additional syntax to
146 | access properties.
147 | jn:keys returns a sequence of all property names, libjn:values a sequence
148 | of values.
149 | Used with global variables, you can copy an object with obj2 := obj
150 | (objects are immutable, but properties can be changed with
151 | obj2.foo := 12, which will create a new object with the changed property)
152 |
153 | Extended strings: x"..{..}.."
154 |
155 | If a string is prefixed by an "x", all expressions inside {}-parentheses
156 | are evaluated, like in the value of a direct attribute constructor.
157 | (Warning: This was changed in Xidel 0.7. Xidel <= 0.6 used
158 | "foo$var;bar" without prefix for this)
159 |
160 |
161 | Semantic:
162 |
163 | All string comparisons are case insensitive, and "clever", e.g.:
164 | '9xy' = '9XY' < '10XY' < 'xy'
165 | This is more useful for html (think of @class = 'foobar'), but can be
166 | disabled by passing collation urls to the string functions.
167 |
168 | Everything is weakly typed, e.g 'false' = false() is true, and 1+"2" is 3.
169 |
170 | Unknown namespace prefixes are resolved with the namespace bindings of the
171 | input data.
172 | Therefore //a always finds all links, independent of any xmlns-attributes.
173 | (however, if you explicitly declare a namespace like
174 | 'declare default element namespace "..."' in XQuery, it will only find
175 | elements in that namespace)
176 |
177 | XML Schemas, error codes and static type checking are not supported.
178 |
179 | Certain additional functions:
180 |
181 | jn:*, libjn:* The standard JSONiq and JSONlib functions
182 | json("str.") Parses a string as json, or downloads json from an url.(only use with trusted input)
183 | serialize-json(value)
184 | Converts a value to JSON
185 | extract("string","regex"[,,[]])
186 | This applies the regex "regex" to "string" and returns only the matching part.
187 | If the argument is used, only the -th submatch will be returned.
188 | (this function used to be called "filter")
189 | css("sel") This returns the nodes below the context node matched by the specified css 3 selector.
190 | You can use this to combine css and XPath, like in 'css("a.aclass")/@href'.
191 | eval("xpath") This will evaluate the string "xpath" as an XPath expression
192 | system("..") Runs a certain program and returns its stdout result as string
193 | deep-text() This is the concatenated plain text of the every tag inside the current text.
194 | You can also pass a separator like deep-text(' ') to separate text of different nodes.
195 | inner-html() This is the html content of node ., like innerHTML in javascript.
196 | outer-html() This is the same as inner-html, but includes the node itself
197 | inner-xml() This is the xml content of node, similar to inner-html()
198 | outer-xml() Like outer-html(), but xml-serialized
199 | split-equal("list", "string"[, "sep" = " "])
200 | Treats the string "list" as a list of strings separated by "sep" and tests if
201 | "string" is contained in that list. (just like css class matching)
202 | form(form, [overridden parameters = ()])
203 | Converts a html form in a http request, by url encoding all inputs descendants
204 | of the given form node. You can give a sequence of parameters to override.
205 | e.g. form(//form[1], "foo=bar&xyz=123") returns a request for the first form,
206 | with the foo and xyz parameters overriden by bar and 123.
207 | You can also use a JSON object to set the override parameters, e.g.
208 | {"foo": "bar", "xyz": 123}, in that case they are url encoded.
209 | It returns an object with .url, .method and .post properties.
210 | match(, )
211 | Performs pattern matching between the template (see below for template documentation)
212 | and the nodes, and returns a list or an object of matched values.
213 | For exmple match({{.}}, FOOBAR) returns FOO, and
214 | match(*{{.}}, FOOBAR) returns (FOO, BAR).
215 | It is also possible to use named variables in the template, in which case an object
216 | is returned, e.g:
217 | match({{first:=.}}{{second:=.}}, FOOBAR)
218 | returns an object with two properties "first" and "bar", containing respectively
219 | FOO and BAR.
220 | The template can be a node or a string. Written as string the above example would be
221 | match("{.}", FOOBAR).
222 |
223 | All additional functions except the jn/libjn functions are in the pxp: namespace, which is also set
224 | as default namespace.
225 |
226 | The pasdoc documentation of my XPath 2 / XQuery library explains more details and lists more functions:
227 | http://www.benibela.de/documentation/internettools/xquery.TXQueryEngine.html
228 |
229 |
230 |
231 | ========================================== CSS 3.0 Selectors ==========================================
232 |
233 |
234 | CSS 3 Selectors are fully supported, except some pseudoclasses like :hover and ::before that do not
235 | make sense in a gui less, reading-only application.
236 | (It is however not much tested, since I personally only use XPath)
237 |
238 | The easiest way to use CSS selectors with the command line is to write it like --extract "css('selector')"
239 | (the "-quotes are necessary to escape the '-quotes.)
240 |
241 | Alternatively you can use --extract-kind=css --extract="your selector", or --css="your selector"
242 |
243 |
244 |
245 | ============================================== Templates ==============================================
246 |
247 | Templates are a very easy way to extract complex data from a webpage.
248 | Each template is basically a stripped-down excerpt of the webpage, in which the relevant parts have
249 | been annotated.
250 |
251 | The best way to describe templates is with a real world example:
252 |
253 | The following is the html of an entry of one of the recommended videos you can always see at the right
254 | side of an youtube video: (skipped the image part for clarity)
255 |
256 |
265 |
266 | As you see there are actual interesting values like the url/title/username/view texts, and irrelevant,
267 | changing values like the session url.
268 | If you now remove the irrelevant parts, and annotate the interesting values as {name:=value},
269 | you get the following:
270 |
271 |
+
279 |
280 | This template can directly passed as an extract-expression, applied to the page of an youtube video,
281 | and will return all recommended/related videos.
282 | More precisely, it will return four (interleaved) arrays "title", "username", "views", "url" each
283 | containing the relevant values.
284 |
285 | A basic template as above consists of three different kind of expressions:
286 |
287 | A normal html element will be matched to the processed html page.
288 | This means it will search the first element on the page, that has the same node name,
289 | all the attributes with the same values, and whose children match the children of the
290 | template element.
291 |
292 | {..} A {} marker will execute the contained XPath expression, once the corresponding
293 | place in the html page has been found.
294 | The context node . will refer to the surrounding element, and you can use my extended
295 | XPath syntax (var := value) to create a variable. (see XPath above)
296 | Often you want to read the entire matched element in a variable with $name, which
297 | can be written as {$name := .} or further abbreviated as {$name} .
298 | It can also be used within attributes, like to read the attribute value.
299 | (the parentheses can be also replaced by .. or ..)
300 |
301 |
302 | + Finally the loop marker will repeat the matching of the previous element as long as
303 | possible (an similar syntax is .. or ..).
304 |
305 |
306 |
307 | This is sufficient for most basic scraping operations, but you can also use the following things in a
308 | template:
309 |
310 | textnodes Textnodes are matched like html element nodes.
311 | A textnode in the webpage is considered a valid match, if it starts
312 | with the same text as the text node in the template.
313 | (but you can change this behavior to ends-with/exact/regex-comparisons with
314 | the
315 | command)
316 |
317 | All children of a template:if-tag are ignored if the test-XPath-expressions
318 | evaluates to false()
319 |
320 | Only one of the child elements will be used for matching
321 |
322 | Same a t:switch, but it will choose the earliest template child that has a match.
323 |
324 | t:optional="true" Html nodes can be marked as optional, and they will be ignored, if no possible
325 | match can be found
326 |
327 | t:condition="??" An XPath expression that c be The context node (.) refers to a potential match.
328 |
329 | * Like +, but it can also match none
330 |
331 | {min,max} or {count} Matches between [min,max] or {count}-many occurrences of the previous element
332 |
333 |
334 | The same as above. However, t:loop will repeat all its children, while a marker
335 | like + can only repeat the single, previous element.
336 |
337 | ? Short notation for t:optional.
338 |
339 | (see http://www.benibela.de/documentation/internettools/extendedhtmlparser.THtmlTemplateParser.html
340 | for more detailed explanations)
341 |
342 |
343 | There is also a Greasemonkey script to create templates directly by just selecting the text on the
344 | corresponding webpage.
345 |
346 |
347 |
348 | ========================================= Multipage templates ==========================================
349 |
350 | Multipage templates collect several single page templates in a xml file.
351 | They are basically just a list of nodes with data and associated s
352 | E.g.
353 |
354 |
355 |
356 | unescaped post data
357 | your=escaped&post=data&...
358 |
359 | {alink:=.}*
360 |
361 |
362 |
363 | ...
364 |
365 | ...
366 |
367 |
368 | All pages are downloaded with GET or respectively POST requests, and processed with the given template.
369 | The page-node also accepts a "test" attribute, which gives an XPath expression that needs to be true,
370 | if the page element should be used.
371 | In the attributes and the text of post-nodes, everything enclosed in {..} parentheses is evaluated
372 | as xpath expression. (like in an extended x".." string, see above)
373 |
374 | Since this would be cumbersome to pass directly to --extract, you can also specify the containing file
375 | with the --template-file argument.
376 |
377 | You can also have multiple s in a multipage template (surrounded by a parent element with
378 | name ), and call the later actions with from another action.
379 | If a template with multiple actions is passed to Xidel it will always perform the first action,
380 | unless the --template-action parameter specifies another action to run. (in Xidel > 0.5)
381 |
382 | There are also -elements to declare variables and -elements to repeat other elements,
383 | see http://www.benibela.de/documentation/internettools/multipagetemplate.TMultiPageTemplate.html
384 | for more details.
385 |
386 | =========================================== Input formats =============================================
387 |
388 | Xidel supports html and xml input, and the option input-format can be used to set the parsing behaviour:
389 |
390 | auto: Automatically switch between html and xml
391 |
392 | html: The input will be parsed as html.
393 | Missing tags like head, body, tbody are automatically created.
394 | (beware that this means table/tr is never valid, and either table//tr or table/tbody/tr
395 | has to be used)
396 |
397 | xml: The input will be parsed as xml.
398 | However, it still uses the html parser, so it will correct missing end tags and not
399 | support DTDs.
400 |
401 | xml-strict: The input will be parsed as strict xml.
402 | This uses the standard fpc, validating xml parser.
403 |
404 | You can also use json files, by loading them explicitly with pxp:json() or jn:json-doc() within a
405 | XPath/XQuery expression.
406 |
407 | =========================================== Output formats =============================================
408 |
409 | Xidel has several different output formats, which can be chosen with the output-format option:
410 |
411 | adhoc: A very simple format, it will just print all values (default)
412 |
413 | xml: The output will be serialized as xml
414 |
415 | html: The output will be serialized as html
416 |
417 | xml-wrapped: It will print a xml-based machine readable output.
418 | Sequences will become value 1value 2...
419 | Objects will become
420 | (so in contrast to xml, it will keep variable names and type information intact)
421 |
422 | json-wrapped: It will print a json-based machine readable output.
423 | Sequences become arrays [ ... ].
424 | Objects become objects. {"prop-1": "value 1", "prop-2": "value 2", ... }
425 | (this was called json before Xidel 0.7)
426 |
427 | bash: Prints a bash script that sets the internal variables as bash variables.
428 | E.g.
429 | eval $(xidel http://data -e 'title:=//title' -e 'links:=//a')
430 | can be used to set the bash variable $title to the title of a page and the
431 | variable $links to a bash array of all links on the page.
432 |
433 | cmd: Like bash, but for Windows cmd.exe
434 |
435 | Generally it prints a sequence of all processed pages (i.e. each page a single sequence element),
436 | and the variables defined as global variables or read by a template become variables or
437 | object properties.
438 | There is a special rule for json-wrapped output, if the template assigns multiple values to the same
439 | variable: Xidel will collect all these values in an array. I.e. (a:=1, b:=2, a:=3, c:=4)
440 | becomes "a": [1, 3], "b": 2. "c": 4
441 |
442 |
443 |
444 |
445 |
446 |
--------------------------------------------------------------------------------
/bin/xidel/xidel.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blackrosezy/build-libcurl-windows/c281bb34209665603549ba98d3caaa921d25c065/bin/xidel/xidel.exe
--------------------------------------------------------------------------------
/build.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | setlocal EnableDelayedExpansion
3 |
4 | set PROGFILES=%ProgramFiles%
5 | if not "%ProgramFiles(x86)%" == "" set PROGFILES=%ProgramFiles(x86)%
6 |
7 | REM Check if Visual Studio 2017 is installed
8 | set MSVCDIR="%PROGFILES%\Microsoft Visual Studio\2017"
9 | set VCVARSALLPATH="%PROGFILES%\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat"
10 | if exist %MSVCDIR% (
11 | if exist %VCVARSALLPATH% (
12 | set COMPILER_VER="2017"
13 | echo Using Visual Studio 2017 Community
14 | goto setup_env
15 | )
16 | )
17 | REM Check if Visual Studio 2015 is installed
18 | set MSVCDIR="%PROGFILES%\Microsoft Visual Studio 14.0"
19 | set VCVARSALLPATH="%PROGFILES%\Microsoft Visual Studio 14.0\VC\vcvarsall.bat"
20 | if exist %MSVCDIR% (
21 | if exist %VCVARSALLPATH% (
22 | set COMPILER_VER="2015"
23 | echo Using Visual Studio 2015
24 | goto setup_env
25 | )
26 | )
27 | REM Check if Visual Studio 2013 is installed
28 | set MSVCDIR="%PROGFILES%\Microsoft Visual Studio 12.0"
29 | set VCVARSALLPATH="%PROGFILES%\Microsoft Visual Studio 12.0\VC\vcvarsall.bat"
30 | if exist %MSVCDIR% (
31 | if exist %VCVARSALLPATH% (
32 | set COMPILER_VER="2013"
33 | echo Using Visual Studio 2013
34 | goto setup_env
35 | )
36 | )
37 |
38 | REM Check if Visual Studio 2012 is installed
39 | set MSVCDIR="%PROGFILES%\Microsoft Visual Studio 11.0"
40 | set VCVARSALLPATH="%PROGFILES%\Microsoft Visual Studio 11.0\VC\vcvarsall.bat"
41 | if exist %MSVCDIR% (
42 | if exist %VCVARSALLPATH% (
43 | set COMPILER_VER="2012"
44 | echo Using Visual Studio 2012
45 | goto setup_env
46 | )
47 | )
48 |
49 | REM Check if Visual Studio 2010 is installed
50 | set MSVCDIR="%PROGFILES%\Microsoft Visual Studio 10.0"
51 | set VCVARSALLPATH="%PROGFILES%\Microsoft Visual Studio 10.0\VC\vcvarsall.bat"
52 | if exist %MSVCDIR% (
53 | if exist %VCVARSALLPATH% (
54 | set COMPILER_VER="2010"
55 | echo Using Visual Studio 2010
56 | goto setup_env
57 | )
58 | )
59 |
60 | REM Check if Visual Studio 2008 is installed
61 | set MSVCDIR="%PROGFILES%\Microsoft Visual Studio 9.0"
62 | set VCVARSALLPATH="%PROGFILES%\Microsoft Visual Studio 9.0\VC\vcvarsall.bat"
63 | if exist %MSVCDIR% (
64 | if exist %VCVARSALLPATH% (
65 | set COMPILER_VER="2008"
66 | echo Using Visual Studio 2008
67 | goto setup_env
68 | )
69 | )
70 |
71 | REM Check if Visual Studio 2005 is installed
72 | set MSVCDIR="%PROGFILES%\Microsoft Visual Studio 8"
73 | set VCVARSALLPATH="%PROGFILES%\Microsoft Visual Studio 8\VC\vcvarsall.bat"
74 | if exist %MSVCDIR% (
75 | if exist %VCVARSALLPATH% (
76 | set COMPILER_VER="2005"
77 | echo Using Visual Studio 2005
78 | goto setup_env
79 | )
80 | )
81 |
82 | REM Check if Visual Studio 6 is installed
83 | set MSVCDIR="%PROGFILES%\Microsoft Visual Studio\VC98"
84 | set VCVARSALLPATH="%PROGFILES%\Microsoft Visual Studio\VC98\vcvarsall.bat"
85 | if exist %MSVCDIR% (
86 | if exist %VCVARSALLPATH% (
87 | set COMPILER_VER="6"
88 | echo Using Visual Studio 6
89 | goto setup_env
90 | )
91 | )
92 |
93 | echo No compiler : Microsoft Visual Studio (6, 2005, 2008, 2010, 2012, 2013 or 2015) is not installed.
94 | goto end
95 |
96 | :setup_env
97 |
98 | echo Setting up environment
99 | if %COMPILER_VER% == "6" (
100 | call %MSVCDIR%\Bin\VCVARS32.BAT
101 | goto begin
102 | )
103 |
104 | :begin
105 |
106 | REM Setup path to helper bin
107 | set ROOT_DIR="%CD%"
108 | set RM="%CD%\bin\unxutils\rm.exe"
109 | set CP="%CD%\bin\unxutils\cp.exe"
110 | set MKDIR="%CD%\bin\unxutils\mkdir.exe"
111 | set SEVEN_ZIP="%CD%\bin\7-zip\7za.exe"
112 | set XIDEL="%CD%\bin\xidel\xidel.exe"
113 |
114 | REM Housekeeping
115 | %RM% -rf tmp_*
116 | %RM% -rf third-party
117 | %RM% -rf curl.zip
118 | %RM% -rf build_*.txt
119 |
120 | REM Get download url .Look under