├── .gitignore
├── LICENSE.txt
├── Makefile
├── README.md
├── wiki2text.nim
└── wiki2text.nimble


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | nimcache
3 | wiki2text
4 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Rob Speer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | NIMFLAGS = -d:release --app:console
2 | NIM = nim
3 | 
4 | wiki2text: wiki2text.nim
5 | 	$(NIM) c $(NIMFLAGS) wiki2text.nim
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # wiki2text (unmaintained)
  2 | 
  3 | I don't work on this project anymore, and the Nim language has probably moved
  4 | far beyond the version I used to make this. You may be able to make it work for
  5 | you, but no guarantees.
  6 | 
  7 | I now work on a full-blown LL parser for wikitext,
  8 | [wikiparsec](https://github.com/LuminosoInsight/wikiparsec).
  9 | 
 10 | # Original introduction
 11 | 
 12 | **What you put in:** a .xml.bz2 file downloaded from Wikimedia
 13 | 
 14 | **What you get out**: gigabytes of clean natural language text
 15 | 
 16 | wiki2text is a fast pipeline that takes a MediaWiki XML dump -- such as the
 17 | exports of Wikipedia that you can download from [dumps.wikimedia.org][] -- and
 18 | extract just the natural-language text from them, skipping the Wiki formatting
 19 | characters and the HTML tags.
 20 | 
 21 | This is particularly useful as a way to get free corpora, in many languages,
 22 | for natural language processing.
 23 | 
 24 | The only formatting you will get is that the titles of new articles and new
 25 | sections will appear on lines that start and end with some number of `=` signs.
 26 | I've found it useful to distinguish titles from body text. If you don't need
 27 | this, these lines are easy to exclude using `grep -v`.
 28 | 
 29 | wiki2text is written with these goals in mind:
 30 | 
 31 | - Clean code that is clear about what it's doing
 32 | - Doing no more than is necessary
 33 | - Being incredibly fast (it parses an entire Wikipedia in minutes)
 34 | - Being usable as a step in a pipeline
 35 | 
 36 | Thanks to def-, a core Nim developer, for making optimizations that make the
 37 | code so incredibly fast.
 38 | 
 39 | [dumps.wikimedia.org]: https://dumps.wikimedia.org/backup-index.html
 40 | 
 41 | ## Why Nim?
 42 | 
 43 | Why is this code written in a fast-moving, emerging programming language? It's
 44 | an adaptation of a Python script that took *days* to run. Nim allowed me to
 45 | keep the understandability of Python but also have the speed of C.
 46 | 
 47 | ## Setup
 48 | 
 49 | wiki2text needs to be compiled using Nim 0.11. Install it by following the
 50 | directions on [Nim's download page][].
 51 | 
 52 | [Nim's download page]: http://nim-lang.org/download.html
 53 | 
 54 | You can build Nim from this repository by running:
 55 | 
 56 |     make
 57 | 
 58 | You can also install it using Nimble, Nim's package manager, instead:
 59 | 
 60 |     nimble install wiki2text
 61 | 
 62 | ## Usage
 63 | 
 64 | Download one of the database dumps from [dumps.wikimedia.org][]. The filename
 65 | you want should be the one of the form `*-pages-articles.xml.bz2`. These files
 66 | can be many gigabytes in size, so you might want to start with a language besides
 67 | English, with a smaller number of articles.
 68 | 
 69 | But suppose you did download `enwiki-DATE-pages-articles.xml.bz2`. Then you should
 70 | run:
 71 | 
 72 |     bunzip2 -c enwiki-DATE-pages-articles.xml.bz2 | ./wiki2text > enwiki.txt
 73 | 
 74 | To skip all headings, run:
 75 | 
 76 |     bunzip2 -c enwiki-DATE-pages-articles.xml.bz2 | ./wiki2text | grep -v '^=' > enwiki.txt
 77 | 
 78 | enwiki.txt will fill up with article text as quickly as it comes out of `bunzip2`.
 79 | 
 80 | ## Example output
 81 | 
 82 | Here's an example of part of the text that comes out of the English Wikipedia
 83 | (with hard line wrapping added):
 84 | 
 85 |     = Albedo =
 86 | 
 87 |     Albedo (), or reflection coefficient, derived from Latin albedo "whiteness"
 88 |     (or reflected sunlight) in turn from albus "white", is the diffuse
 89 |     reflectivity or reflecting power of a surface. It is the ratio of reflected
 90 |     radiation from the surface to incident radiation upon it. Its dimensionless
 91 |     nature lets it be expressed as a percentage and is measured on a scale from
 92 |     zero for no reflection of a perfectly black surface to 1 for perfect
 93 |     reflection of a white surface.
 94 | 
 95 |     Albedo depends on the frequency of the radiation. When quoted unqualified,
 96 |     it usually refers to some appropriate average across the spectrum of
 97 |     visible light. In general, the albedo depends on the directional
 98 |     distribution of incident radiation, except for Lambertian surfaces, which
 99 |     scatter radiation in all directions according to a cosine function and
100 |     therefore have an albedo that is independent of the incident distribution.
101 |     In practice, a bidirectional reflectance distribution function (BRDF) may
102 |     be required to accurately characterize the scattering properties of a
103 |     surface, but albedo is very useful as a first approximation.
104 | 
105 |     The albedo is an important concept in climatology, astronomy, and
106 |     calculating reflectivity of surfaces in LEED sustainable-rating systems for
107 |     buildings. The average overall albedo of Earth, its planetary albedo, is 30
108 |     to 35% because of cloud cover, but widely varies locally across the surface
109 |     because of different geological and environmental features.
110 | 
111 |     The term was introduced into optics by Johann Heinrich Lambert in his 1760
112 |     work Photometria.
113 | 
114 |     ==Terrestrial albedo==
115 | 
116 |     Albedos of typical materials in visible light range from up to 0.9 for
117 |     fresh snow to about 0.04 for charcoal, one of the darkest substances.
118 |     Deeply shadowed cavities can achieve an effective albedo approaching the
119 |     zero of a black body. When seen from a distance, the ocean surface has a
120 |     low albedo, as do most forests, whereas desert areas have some of the
121 |     highest albedos among landforms. Most land areas are in an albedo range of
122 |     0.1 to 0.4. The average albedo of the Earth is about 0.3. This is far
123 |     higher than for the ocean primarily because of the contribution of clouds.
124 |     Earth's surface albedo is regularly estimated via Earth observation
125 |     satellite sensors such as NASA's MODIS instruments on board the Terra and
126 |     Aqua satellites. As the total amount of reflected radiation cannot be
127 |     directly measured by satellite, a mathematical model of the BRDF is used to
128 |     translate a sample set of satellite reflectance measurements into estimates
129 |     of directional-hemispherical reflectance and bi-hemispherical reflectance
130 |     (e.g.).
131 | 
132 |     Earth's average surface temperature due to its albedo and the greenhouse
133 |     effect is currently about 15°C. If Earth were frozen entirely (and hence be
134 |     more reflective) the average temperature of the planet would drop below
135 |     −40°C. If only the continental land masses became covered by glaciers, the
136 |     mean temperature of the planet would drop to about 0°C. In contrast, if the
137 |     entire Earth is covered by water—a so-called aquaplanet—the average
138 |     temperature on the planet would rise to just under 27°C.
139 | 
140 | ## Limitations
141 | 
142 | You may notice that occasional words and phrases are missing from the text.
143 | These are the parts of the article that come from MediaWiki templates.
144 | 
145 | Templates are an incredibly complicated, Turing-complete subset of MediaWiki,
146 | and are used for everything from simple formatting to building large infoboxes,
147 | tables, and navigation boxes.
148 | 
149 | It would be nice if we could somehow keep only the simple ones and discard
150 | the complex ones, but what's easiest to do is to simply ignore every template.
151 | 
152 | Sometimes templates contain the beginnings or ends of HTML or Wikitable
153 | formatting that we would normally skip, in which case extra crud may show up in
154 | the article.
155 | 
156 | This probably doesn't work very well for wikis that have specific, meaningful
157 | formatting, such as Wiktionary. The [conceptnet5][] project includes a slow
158 | Wiktionary parser in Python that you might be able to use.
159 | 
160 | [conceptnet5]: https://github.com/commonsense/conceptnet5
161 | 
162 | 


--------------------------------------------------------------------------------
/wiki2text.nim:
--------------------------------------------------------------------------------
  1 | # There are two steps to interpreting a Wikipedia XML file. First, we need to
  2 | # get the data out of the XML, which requires a streaming XML parser.
  3 | #
  4 | # Then, we need to deal with the actual content of an article, which is in a
  5 | # hybrid of HTML and MediaWiki's own syntax. The way that we handle the HTML
  6 | # tags (mostly for the purpose of skipping their contents) is to run them
  7 | # through another streaming XML parser. This one doesn't really need to be
  8 | # streaming, but it might as well be so that we can reuse the same Nim library.
  9 | 
 10 | import streams, parsexml, re, strutils, unicode, parseopt2
 11 | 
 12 | # Wikitext handling
 13 | # -----------------
 14 | 
 15 | # This regex matches anywhere in the text that there *might* be wiki syntax
 16 | # that we have to clean up.
 17 | let ANYTHING_INTERESTING_RE: Regex = re"[*#:;|!{['_]"
 18 | 
 19 | # We skip the contents of these HTML tags entirely, and they don't nest
 20 | # inside each other.
 21 | const SKIP_SPANS = [
 22 |     "cite", "ref", "hiero", "gallery", "timeline", "noinclude",
 23 |     "caption", "references", "img", "source", "math"
 24 | ]
 25 | 
 26 | # This regex is for matching and skipping over simple wikitext formatting.
 27 | # Here's the breakdown of the patterns we're matching:
 28 | #
 29 | #   '''?                         = Bold and italic formatting (two or three apostrophes)
 30 | #   ^#\s*(REDIRECT|redirect).*$  = Redirect syntax
 31 | #   ^[ *#:;]+                    = Bullets and indentation markers at the start of a line
 32 | #   ^__.*__$                     = table-of-contents directives
 33 | #   ^[|!].*$                     = Table detritus
 34 | #
 35 | # "Table detritus" might require some explanation. Tables, delimited by {|
 36 | # and |}, are something that we skip separately in filterWikitext. But
 37 | # because MediaWiki is insane like this, some tables are made using syntax
 38 | # that uses a template for the beginning of the table and |} syntax for the
 39 | # end.
 40 | #
 41 | # Because we don't know what's in templates, when this happens, we end up
 42 | # just seeing the inside and end of the table as if it were text. Usually,
 43 | # though, these lines begin with the cell separator |, so we can just filter
 44 | # those out.
 45 | 
 46 | let FORMATTING_RE: Regex = re(r"('''?|^#\s*redirect.*$|^[ *#:;]+|^[|!].*$|^__.*__$)", {reMultiLine, reIgnoreCase})
 47 | 
 48 | # This regex matches sequences of more than one blank line.
 49 | let BLANK_LINE_RE: Regex = re"\n\s*\n\s*\n"
 50 | 
 51 | let WORD_SEPARATOR_RE: Regex = re"'?([\x01-\x26\x28-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]|(\xc2[\x80-\xbf])|(\xe2\x80.)|(\xe2\x81[\x80-\xaf])|(\xe3\x80[\x80-\x9f]))'?"
 52 | 
 53 | let EMPTY_REF_RE: Regex = re(r"<ref [^>]+/\s*>", {reIgnoreCase})
 54 | 
 55 | const FAKE_FILENAME = "<wikipage>"
 56 | 
 57 | proc skipNestedChars(text: string, pos: var int, open: char, close: char) =
 58 |     ## Move our position 'pos' forward in the text, to skip a number of
 59 |     ## matching instances of the characters 'open' and 'close'.
 60 |     ##
 61 |     ## Precondition: text[pos] == open
 62 |     ## Postcondition: pos will increase by at least 1
 63 |     pos += 1
 64 |     var count = 1
 65 |     while count > 0 and pos < text.len:
 66 |         let nextPos: int = text.find({open, close}, pos)
 67 |         if nextPos == -1:
 68 |             # We can't find any more closing characters in the text.
 69 |             # Jump to the end and abort.
 70 |             pos = text.len
 71 |             return
 72 |         else:
 73 |             let nextChar: char = text[nextPos]
 74 |             if nextChar == open:
 75 |                 count += 1
 76 |             else:
 77 |                 count -= 1
 78 |             pos = nextPos + 1
 79 | 
 80 | 
 81 | # Convert Unicode text to lowercase.
 82 | # I hope this eventually ends up in the standard library.
 83 | proc unicodeLower(text: string): string =
 84 |     result = ""
 85 |     for rune in runes(text):
 86 |         result.add(rune.toLower.toUTF8)
 87 | 
 88 | 
 89 | # forward declaration
 90 | proc filterWikitext(text: string): string
 91 | 
 92 | proc extractInternalLink(linkText: string): string =
 93 |     # Links with colons might be special MediaWiki syntax. Just throw them
 94 |     # all away.
 95 |     if linkText.contains(':'):
 96 |         return ""
 97 |     # If the brackets are unbalanced, return nothing.
 98 |     if linkText[^2] != ']' or linkText[^1] != ']':
 99 |         return ""
100 |     let contents: string = filterWikitext(linkText[2 .. ^3])
101 |     let lastPart: int = contents.rfind('|') + 1
102 |     return contents[lastPart .. ^1]
103 | 
104 | 
105 | proc extractExternalLink(linkText: string): string =
106 |     let spacePos = linkText.find(' ')
107 |     if spacePos == -1:
108 |         return ""
109 |     else:
110 |         if linkText[^1] == ']':
111 |             return ""
112 |         else:
113 |             return filterWikitext(linkText[spacePos + 1 .. ^2])
114 | 
115 | 
116 | proc filterLink(text: string, pos: var int): string =
117 |     let startPos: int = pos
118 | 
119 |     # No matter what, move pos to the end of the link
120 |     skipNestedChars(text, pos, '[', ']')
121 | 
122 |     # Figure out what we skipped and try to return its displayed content.
123 |     if text.continuesWith("[[", startPos):
124 |         # Get the displayed text out of the internal link.
125 |         return extractInternalLink(text[startPos .. <pos])
126 |     else:
127 |         # Get the displayed text out of the external link.
128 |         return extractExternalLink(text[startPos .. <pos])
129 | 
130 | 
131 | var tstream: StringStream = newStringStream()
132 | 
133 | proc filterHTML(origText: string): string =
134 |     ## Scan through a Wiki page as HTML (Wikitext can have HTML tags mixed
135 |     ## into it). Remove HTML tags, as well as the content of certain tags
136 |     ## listed in SKIP_SPANS.
137 |     let text = origText.replace(EMPTY_REF_RE, "<ref />")
138 |     var xml: XmlParser
139 | 
140 |     # Quickly copy the text into the StringStream object
141 |     shallowCopy(tstream.data, text)
142 |     tstream.setPosition(0)
143 | 
144 |     result = newStringOfCap(text.len)
145 |     xml.open(tstream, FAKE_FILENAME, options={reportWhitespace})
146 |     while true:
147 |         xml.next()
148 |         case xml.kind
149 |         of xmlElementStart, xmlElementOpen:
150 |             if SKIP_SPANS.contains(xml.elementName):
151 |                 let skipTo: string = xml.elementName
152 |                 while true:
153 |                     xml.next()
154 |                     if xml.kind == xmlElementEnd and xml.elementName == skipTo:
155 |                         break
156 |                     elif xml.kind == xmlEof:
157 |                         break
158 |         of xmlCharData, xmlWhitespace:
159 |             result.add(xml.charData)
160 |         of xmlEof:
161 |             break
162 |         else:
163 |             discard
164 | 
165 |     # return result implicitly
166 |     xml.close
167 | 
168 | 
169 | proc filterWikitext(text: string): string =
170 |     ## Given the complete wikitext of an article, filter it for the part
171 |     ## that's meant to be read as plain text.
172 | 
173 |     # This method works by building a 'result' string incrementally, and
174 |     # advancing an index called 'pos' through the text as it goes. Some
175 |     # of the procedures this relies on will also advance 'pos' themselves.
176 |     result = newStringOfCap(text.len)
177 |     var pos = 0
178 |     while pos < text.len:
179 |         # Skip to the next character that could be wiki syntax.
180 |         var found: int = text.find(ANYTHING_INTERESTING_RE, pos)
181 |         if found == -1:
182 |             found = text.len
183 | 
184 |         # Add everything up until then to the string.
185 |         if found > pos:
186 |             result.add(text[pos .. <found])
187 | 
188 |         # Figure out what's here and deal with it.
189 |         pos = found
190 |         if pos < text.len:
191 |             let next2chars: string = text[pos .. pos+1]
192 |             if next2chars == "{{" or next2chars == "{|":
193 |                 skipNestedChars(text, pos, '{', '}')
194 | 
195 |             elif text[pos] == '[':
196 |                 # pos gets updated by filterLink
197 |                 result.add(filterLink(text, pos))
198 | 
199 |             else:
200 |                 # Skip over formatting
201 |                 let matched = text.matchLen(FORMATTING_RE, pos)
202 |                 if matched > 0:
203 |                     pos += matched
204 |                 else:
205 |                     # We didn't match any of the cases, so output one character
206 |                     # and proceed
207 |                     result.add($(text[pos]))
208 |                     pos += 1
209 | 
210 | # XML handling
211 | # ------------
212 | 
213 | # ArticleData is an array that stores four string properties of a page: its
214 | # title, its text content, its redirect value if it redirects to another page,
215 | # and its namespace number. These are the XML values that we care about for
216 | # each page.
217 | type
218 |     TagType = enum
219 |         TITLE, TEXT, REDIRECT, NS
220 |     ArticleData = array[TagType, string]
221 | 
222 | var RELEVANT_XML_TAGS = ["title", "text", "ns"]
223 | 
224 | proc handleArticle(article: ArticleData, tokenize: bool) =
225 |     if article[NS] == "0" and article[REDIRECT] == "":
226 |         if not tokenize:
227 |             echo("= $1 =" % [article[TITLE]])
228 |         # Parse the article inside a try/except block, discarding the errors
229 |         # that appear due to occasional HTML that's flagrantly bad XML.
230 |         try:
231 |             let text = filterWikitext(filterHTML(article[TEXT]))
232 |             if tokenize:
233 |                 let words = text.split(WORD_SEPARATOR_RE)
234 |                 for word in words:
235 |                     if len(word) > 0:
236 |                         echo(unicodeLower(word))
237 |             else:
238 |                 echo(text.replace(BLANK_LINE_RE, "\n"))
239 |         except IndexError:
240 |             discard
241 |         except RangeError:
242 |             discard
243 | 
244 | 
245 | proc readMediaWikiXML(input: Stream, tokenize: bool, filename="<input>") =
246 |     ## Read the XML content that one actually downloads from Wikimedia,
247 |     ## extracting the article content and sending it to the handleArticle()
248 |     ## procedure.
249 |     var xml: XmlParser
250 |     var textBuffer: string = ""
251 |     var article: ArticleData
252 |     for tag in TITLE..NS:
253 |         article[tag] = ""
254 | 
255 |     # Keep track of what text content represents. Is it article text, or a
256 |     # similar text property of the page? Is it an attribute value on a tag that
257 |     # we should pay attention to?
258 |     var gettingText: bool = false
259 |     var gettingAttribute: bool = false
260 | 
261 |     xml.open(input, filename, options={reportWhitespace})
262 |     while true:
263 |         # Scan through the XML, handling each token as it arrives.
264 |         xml.next()
265 |         case xml.kind
266 |         of xmlElementStart, xmlElementOpen:
267 |             if RELEVANT_XML_TAGS.contains(xml.elementName):
268 |                 # If this is a "title", "text", or "ns" tag, prepare to get its
269 |                 # text content. Move our writing pointer to the beginning of
270 |                 # the text buffer, so we can overwrite what was there.
271 |                 textBuffer.setLen(0)
272 |                 gettingText = true
273 |             elif xml.elementName == "page":
274 |                 # If this is a new instance of the <page> tag that contains all
275 |                 # these tags, then reset the value that won't necessarily be
276 |                 # overridden, which is the redirect value.
277 |                 article[REDIRECT].setLen(0)
278 |             elif xml.elementName == "redirect":
279 |                 # If this is the start of a redirect tag, prepare to get its
280 |                 # attribute value.
281 |                 gettingAttribute = true
282 |         of xmlAttribute:
283 |             # If we're looking for an attribute value, and we found one, add it
284 |             # to the buffer.
285 |             if gettingAttribute:
286 |                 textBuffer.add(xml.attrValue)
287 |         of xmlCharData, xmlWhitespace:
288 |             # If we're looking for text, and we found it, add it to the buffer.
289 |             if gettingText:
290 |                 textBuffer.add(xml.charData)
291 |         of xmlElementEnd:
292 |             # When we reach the end of an element we care about, take the text
293 |             # we've found and store it in the 'article' data structure. We can
294 |             # accomplish this quickly by simply swapping their references.
295 |             case xml.elementName
296 |             of "title":
297 |                 swap article[TITLE], textBuffer
298 |             of "text":
299 |                 swap article[TEXT], textBuffer
300 |             of "redirect":
301 |                 swap article[REDIRECT], textBuffer
302 |             of "ns":
303 |                 swap article[NS], textBuffer
304 |             of "page":
305 |                 # When we reach the end of the <page> tag, send the article
306 |                 # data to handleArticle().
307 |                 handleArticle(article, tokenize)
308 |             else:
309 |                 discard
310 | 
311 |             # Now that we've reached the end of an element, stop extracting
312 |             # text. (We'll never need to extract text from elements that can
313 |             # have other XML elements nested inside them.)
314 |             gettingText = false
315 |             gettingAttribute = false
316 |         of xmlEof:
317 |             break
318 |         else:
319 |             discard
320 |     xml.close
321 | 
322 | 
323 | const helptext: string = """
324 | wiki2text - transform MediaWiki XML to text
325 | Usage: wiki2text [-t] [-h]
326 | 
327 | Options:
328 | -h	Show this help text
329 | -t	Use a simple tokenizer, outputting one word per line
330 | """
331 | 
332 | proc writeHelp() =
333 |     stderr.write(helptext)
334 | 
335 | 
336 | when isMainModule:
337 |     var
338 |         tokenize: bool = false
339 |         run: bool = true
340 |     for kind, key, val in getopt():
341 |         case kind
342 |         of cmdLongOption, cmdShortOption:
343 |             case key
344 |             of "tokenize", "t":
345 |                 tokenize = true
346 |             of "help", "h":
347 |                 writeHelp()
348 |                 run = false
349 |             else:
350 |                 discard
351 |         else:
352 |             discard
353 | 
354 |     if run:
355 |         readMediaWikiXML(newFileStream(stdin), tokenize)
356 | 
357 | 


--------------------------------------------------------------------------------
/wiki2text.nimble:
--------------------------------------------------------------------------------
 1 | [Package]
 2 | name          = "wiki2text"
 3 | version       = "0.2.1"
 4 | author        = "Rob Speer"
 5 | description   = "Quickly extracts natural-language text from a MediaWiki XML file."
 6 | license       = "MIT"
 7 | bin           = "wiki2text"
 8 | SkipExt       = "nim"
 9 | SkipFiles     = "Makefile"
10 | 
11 | [Deps]
12 | Requires: "nim >= 0.11"
13 | 


--------------------------------------------------------------------------------