├── __init__.py
├── box
    ├── README.md
    ├── test.scala
    └── train.scala
├── README.md
├── stmt.py
└── LICENSE


/__init__.py:
--------------------------------------------------------------------------------
1 | from .stmt import STMT
2 | 


--------------------------------------------------------------------------------
/box/README.md:
--------------------------------------------------------------------------------
1 | Download STMT from [here](http://nlp.stanford.edu/software/tmt/tmt-0.4/) and put it in this directory.
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # topbox
 2 | A small Python 3 wrapper around the Stanford Topic Modeling Toolbox (STMT) that makes working with L-LDA a bit easier; no need to leave the Python environment. More information on its workings can be found on [my blog](https://cmry.github.io/notes/topbox).
 3 | 
 4 | # Setting up
 5 | 
 6 | Just [download](http://nlp.stanford.edu/software/tmt/tmt-0.4/tmt-0.4.0.jar) STMT and put it in the `box` directory. After, import `topbox` from wherever you left it.
 7 | 
 8 | On Linux, this would look something like this:
 9 | 
10 | ``` shell
11 | $ cd ~
12 | $ git clone https://github.com/cmry/topbox
13 | $ cd ~/topbox/box
14 | $ wget http://nlp.stanford.edu/software/tmt/tmt-0.4/tmt-0.4.0.jar
15 | $ cd ~
16 | $ vi some_topbox_script.py
17 | ```
18 | 
19 | You can paste the code below in the script file to test if it's working.
20 | 
21 | # Example
22 | 
23 | ``` python
24 | import topbox
25 | 
26 | stmt = topbox.STMT('bit_of_testing', epochs=10, mem=15000)
27 | 
28 | 
29 | space = ['text text more text', 'things to do with text']
30 | labels = ['label1 label2', 'label1 label3']
31 | 
32 | stmt.train(space, labels)
33 | 
34 | 
35 | infer = ['this is a text', 'things with more text']
36 | gs = ['label1 label2', 'label1 label3']
37 | 
38 | stmt.test(infer, gs)
39 | 
40 | 
41 | from sklearn.metrics import average_precision_score
42 | 
43 | # array requires numpy and scipy
44 | y_true, y_score = stmt.results(gs, array=True)
45 | 
46 | print(average_precision_score(y_true, y_score))
47 | ```
48 | 


--------------------------------------------------------------------------------
/box/test.scala:
--------------------------------------------------------------------------------
 1 | import scalanlp.io._;
 2 | import scalanlp.stage._;
 3 | import scalanlp.stage.text._;
 4 | import scalanlp.text.tokenize._;
 5 | import scalanlp.pipes.Pipes.global._;
 6 | 
 7 | import edu.stanford.nlp.tmt.stage._;
 8 | import edu.stanford.nlp.tmt.model.lda._;
 9 | import edu.stanford.nlp.tmt.model.llda._;
10 | 
11 | val modelPath = file("modelfolder");
12 | 
13 | println("Loading "+modelPath);
14 | val model = LoadCVB0LabeledLDA(modelPath).asCVB0LDA;
15 | val source = CSVFile("datafile.csv") ~> IDColumn(1);
16 | 
17 | val text = {
18 |   source ~>                              // read from the source file
19 |   Column(3) ~>                           // select column containing text
20 |   TokenizeWith(model.tokenizer.get)      // tokenize with tokenizer above
21 | }
22 |  
23 | val output = file(modelPath, source.meta[java.io.File].getName.replaceAll(".csv",""));
24 | val dataset = LDADataset(text, model.termIndex);
25 | 
26 | println("Writing document distributions to "+output+"-document-topic-distributions-res.csv");
27 | val perDocTopicDistributions = InferCVB0DocumentTopicDistributions(model, dataset);
28 | CSVFile(output+"-document-topic-distributions-res.csv").write(perDocTopicDistributions);
29 | 
30 | // println("Writing topic usage to "+output+"-usage-res.csv");
31 | // val usage = QueryTopicUsage(model, dataset, perDocTopicDistributions);
32 | // CSVFile(output+"-usage-res.csv").write(usage);
33 | 
34 | // println("Estimating per-doc per-word topic distributions");
35 | // val perDocWordTopicDistributions = EstimatePerWordTopicDistributions(
36 | //   model, dataset, perDocTopicDistributions);
37 | // CSVFile(output+"-document-word-topic-distributions.csv").write(perDocWordTopicDistributions);
38 | 
39 | // println("Writing top terms to "+output+"-top-terms.csv");
40 | // val topTerms = QueryTopTerms(model, dataset, perDocWordTopicDistributions, numTopTerms=50);
41 | // CSVFile(output+"-top-terms.csv").write(topTerms);
42 | 
43 | 


--------------------------------------------------------------------------------
/box/train.scala:
--------------------------------------------------------------------------------
 1 | // Stanford TMT Example 6 - Training a LabeledLDA model
 2 | // http://nlp.stanford.edu/software/tmt/0.4/
 3 | 
 4 | // tells Scala where to find the TMT classes
 5 | import scalanlp.io._;
 6 | import scalanlp.stage._;
 7 | import scalanlp.stage.text._;
 8 | import scalanlp.text.tokenize._;
 9 | import scalanlp.pipes.Pipes.global._;
10 | import edu.stanford.nlp.tmt.stage._;
11 | import edu.stanford.nlp.tmt.model.lda._;
12 | import edu.stanford.nlp.tmt.model.llda._;
13 | 
14 | val source = CSVFile("datafile.csv") ~> IDColumn(1);
15 | 
16 | val tokenizer = {
17 |   SimpleEnglishTokenizer() ~>            // tokenize on space and punctuation
18 |   CaseFolder() ~>                        // lowercase everything
19 |   WordsAndNumbersOnlyFilter() ~>         // ignore non-words and non-numbers
20 |   MinimumLengthFilter(1)                 // take terms with >=3 characters
21 | }
22 | 
23 | val text = {
24 |   source ~>                              // read from the source file
25 |   Column(3) ~>                           // select column containing text
26 |   TokenizeWith(tokenizer) ~>             // tokenize with tokenizer above
27 |   TermCounter() ~>                       // collect counts (needed below)
28 |   TermMinimumDocumentCountFilter(1) ~>   // filter terms in <4 docs
29 |   TermDynamicStopListFilter(0) ~>      // filter out 30 most common terms
30 |   DocumentMinimumLengthFilter(1)         // take only docs with >=5 terms
31 | }
32 | 
33 | // define fields from the dataset we are going to slice against
34 | val labels = {
35 |   source ~>                              // read from the source file
36 |   Column(2) ~>                           // take column two, the year
37 |   TokenizeWith(WhitespaceTokenizer()) ~> // turns label field into an array
38 |   TermCounter() ~>                       // collect label counts
39 |   TermMinimumDocumentCountFilter(0)     // filter labels in < 10 docs
40 | }
41 | 
42 | val dataset = LabeledLDADataset(text, labels);
43 | 
44 | // define the model parameters
45 | val modelParams = LabeledLDAModelParams(dataset=dataset);
46 | 
47 | // Name of the output model folder to generate
48 | val modelPath = file("modelfolder");
49 | 
50 | // Trains the model, writing to the given output path
51 | TrainCVB0LabeledLDA(modelParams, dataset, output = modelPath, maxIterations = 5);
52 | // or could use TrainGibbsLabeledLDA(modelParams, dataset, output = modelPath, maxIterations = 1500);
53 | 


--------------------------------------------------------------------------------
/stmt.py:
--------------------------------------------------------------------------------
  1 | """Python 2 & 3 wrapper around the Stanford Topic Modeling Toolbox."""
  2 | 
  3 | from csv import writer, reader
  4 | from re import sub
  5 | from subprocess import call
  6 | from os import path, remove, sep
  7 | from shutil import rmtree
  8 | from glob import glob
  9 | from inspect import isgenerator
 10 | from sys import version_info
 11 | 
 12 | # Authors:      Chris Emmery
 13 | # References:   Ramage, Hall, Nallapati, Manning (2009)
 14 | # License:      BSD 3-Clause
 15 | # pylint:       disable=C0103
 16 | 
 17 | 
 18 | class STMT(object):
 19 |     """Stanford Topic Modelling Toolbox Wrapper.
 20 | 
 21 |     This is a wrapper Class around the Stanford Topic Modelling Toolbox. It
 22 |     assumes that you have your vector space in your code, and don't want to
 23 |     bother with the `csv -> scala -> java -> csv -> extract results` process.
 24 |     It therefore compresses all of this in a few class interactions. Basically,
 25 |     you create model by initiating it with a name, set the amount of epochs
 26 |     and memory as desired, and then start training and testing on data that
 27 |     you have in Python code. After, the class can handle extracting the correct
 28 |     results (even in sklearn format), as well as cleaning up once you're done.
 29 |     Some examples of this will be given below, more information can be found
 30 |     on https://cmry.github.io/notes/topbox.
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     name : string
 35 |         The name that will be appended to all the saved files. If you want to
 36 |         keep the trained model, this name can be used to load it back in.
 37 | 
 38 |     epochs : integer, optional, default 20
 39 |         The amount of iterations you want L-LDA to train and sample; if you
 40 |         run into some errors, it's a good idea to set this to 1 to save time
 41 |         whilst debugging.
 42 | 
 43 |     mem : integer, optional, default 7000
 44 |         The amount of memory (in MB) that the model will use. By default it
 45 |         assumes that you have 8G of memory, so it will account for 1G of os
 46 |         running. Should be comfortable; adjust if running into OutOfMemory
 47 |         errors though.
 48 | 
 49 |     keep : boolean, optional, default True
 50 |         If set to False, will remove the data and scala files after training,
 51 |         and will remove EVERYTHING after the resutls are obtained. This can
 52 |         be handy when running a quick topic model and save disk space. If
 53 |         you're running a big model and want to keep it after your session is
 54 |         done, it might be better to just leave it to True.
 55 | 
 56 |     Attributes
 57 |     ----------
 58 |     dir : string
 59 |         Absolute path where the storage area of the topbox is located.
 60 | 
 61 |     Examples
 62 |     --------
 63 |     train = [['sports football', 'this talks about football, or soccer,
 64 |                with a goal and a ball'],
 65 |              ['sports rugby', 'here we have some document where we do a scrum
 66 |                and kick the ball'],
 67 |              ['music concerts', 'a venue with loud music and a stage'],
 68 |              ['music instruments', 'thing that have strings or keys, or
 69 |                whatever']]
 70 | 
 71 |     test = [['music', 'the stage was full of string things'],
 72 |             ['sports', 'we kick a ball around'],
 73 |             ['rugby', 'now add some confusing sentence with novel words what is
 74 |               happening']]
 75 | 
 76 |     import topbox
 77 | 
 78 |     stmt = topbox.STMT('test_model')
 79 |     stmt = topbox.STMT('test_model', epochs=400, mem=14000)
 80 | 
 81 |     train_labels, train_space = zip(*train)
 82 |     test_labels, test_space = zip(*test)
 83 | 
 84 |     stmt.train(train_space, train_labels)
 85 |     stmt.test(test_space, test_labels)
 86 | 
 87 |     y_true, y_score = stmt.results(test_labels, array=True)
 88 | 
 89 |     from sklearn.metrics import average_precision_score
 90 |     average_precision_score(y_true, y_score)
 91 | 
 92 |     Notes
 93 |     -----
 94 |     The code and scale examples are obtained from the Stanford website
 95 |     (http://nlp.stanford.edu/software/tmt/tmt-0.4/). Their code thusly exists
 96 |     in this repository under equal license. Please respect this.
 97 |     """
 98 | 
 99 |     def __init__(self, name, epochs=20, mem=7000, keep=True):
100 |         """Set paths and variables."""
101 |         self.dir = path.normpath(path.dirname(path.realpath(__file__)) + \
102 |             '{0}box'.format(sep)) + sep
103 |         self.name = name
104 |         self.keep = keep
105 |         self.epochs = epochs
106 |         self.mem = mem
107 | 
108 |     def boot(self, mod):
109 |         """Boot script.
110 | 
111 |         Alters the directories in the .scala files for running and testing
112 |         L-LDA (depending on the `mod`). Uses a generic call on the .jar that
113 |         STMT resides in.
114 | 
115 |         Parameters
116 |         ----------
117 |         :mod: string
118 |             Either 'test' or 'train' for swithing states.
119 |         """
120 |         self.scala(mod)
121 |         call(["java", "-Xmx" + str(self.mem) + "m", "-jar", self.dir +
122 |               "tmt-0.4.0.jar", self.dir + self.name + "_" + mod + ".scala"])
123 |         self.scala(mod, 1)
124 | 
125 |     def store(self, space, labels, vsp_type):
126 |         """Data to csv storage.
127 | 
128 | 
129 |         Stores a given (sub)vectorspace to the .csv format that STMT works
130 |         with. The space should be a dict where the key is a tuple with (int,
131 |         str), where int is the index number and str the document its topic
132 |         labels seperated by a whitespace. The value is your vector stored in
133 |         a list.
134 | 
135 |         If you want to iteratively construct a space, provide a generator that
136 |         will feed batches of the space.
137 | 
138 |         Parameters
139 |         ----------
140 |         space : list
141 |             The vector space; a list with text.
142 | 
143 |         labels : list
144 |             List with labels where each index corresponds to the text in space.
145 | 
146 |         vps_type : string
147 |             Either train or test as appendix for the filename.
148 |         """
149 |         csv_file = open("%s%s_%s.csv" % (self.dir, self.name, vsp_type), 'a')
150 |         csv_writer = writer(csv_file)
151 |         for i, zipped in enumerate(zip(labels, space)):
152 |             line = [str(i + 1), zipped[0], zipped[1]]
153 |             if version_info.major < 3:  # fix py2 compat
154 |                 line = [i.encode('utf8') for i in line]
155 |             csv_writer.writerow(line)
156 |         csv_file.close()
157 | 
158 |     def regex(self, f, needle, rock):
159 |         """File name replacer.
160 | 
161 |         Function is used to flip the read object file (original .scale file)
162 |         and write replaced cotents to this newly created file.
163 | 
164 |         Parameters
165 |         ----------
166 |         f : string
167 |             Contents of the original .scala file.
168 | 
169 |         needle : string
170 |             String sequence to be replaced in the original .scala file.
171 | 
172 |         rock : string
173 |             Basically the .read() contents of the original .scala file.
174 |         """
175 |         wf = (self.name + '_').join(f.rsplit('_', 1))
176 |         f = ''.join(f.rsplit('_', 1))
177 |         try:
178 |             rf = open(wf, 'r')
179 |         except IOError:
180 |             rf = open(f, 'r')
181 |         stack = sub(needle, rock, rf.read())
182 |         rf.close()
183 |         with open(wf, 'w') as wf:
184 |             wf.write(stack)
185 | 
186 |     def scala(self, s, r=False):
187 |         """Scala code replacer.
188 | 
189 |         Handles the .scala text replacements. In the basefiles, the replace
190 |         targets are `modelfile` by default. This can also be used to flip
191 |         number of the iterations.
192 | 
193 |         Parameters
194 |         ----------
195 |         s : string
196 |             Has the value of either train or test in the framework.
197 | 
198 |         r : boolean, optional, default False
199 |             Indicates old to new replace by default.
200 |         """
201 |         prep, std = 'maxIterations = ', '5'
202 |         orig, new = 'modelfolder', self.dir + self.name + '_' + 'train'
203 |         o_csv, n_csv = 'datafile.csv', self.dir + self.name + '_' + s + '.csv'
204 |         f = self.dir + '_' + s + '.scala'
205 |         self.regex(f, o_csv, n_csv) if not r else self.regex(f, n_csv, o_csv)
206 |         self.regex(f, orig, new) if not r else self.regex(f, new, orig)
207 |         self.regex(f, prep + std, prep + ' ' + str(self.epochs)) if \
208 |             self.epochs else self.regex(f, prep + std, prep + std)
209 | 
210 |     def m_incidence(self, predicted_row, label_index, gold_standard):
211 |         """Matrix to Incidence.
212 | 
213 |         Extracts the probabilities from the .csvs, and generates an incidence
214 |         vector based on the correct topic labels. If a value is 'NaN', it will
215 |         be skipped (model might have crapped up somewhere). The result is a
216 |         zipped matrix with tuple values giving (incidence, probability).
217 | 
218 |         Parameters
219 |         ----------
220 |         predicted_row : list
221 |             Predicted row in the .csv file.
222 | 
223 |         label_index : list
224 |             Lookup list for topics on index number.
225 | 
226 |         gold_standard : list
227 |             Lookup list for correct topics per document.
228 | 
229 |         Return
230 |         ------
231 |         vector : list of lists
232 |             Incidence matrix with: list(list(tuple(incidence, probability))).
233 |         """
234 |         if 'NaN' in predicted_row:  # don't wanna return NaN
235 |             return
236 |         else:
237 |             vector = [(1 if label_index[i] in gold_standard else 0,
238 |                        float(predicted_row[i + 1])) for i in
239 |                       range(len(label_index))]
240 |             return vector
241 | 
242 |     def get_scores(self, label_index, predicted_weights, true_labels):
243 |         """Grab results.
244 | 
245 |         Given the labelled and original file, retrieve for each
246 |         vector: the correct label, ranks and probabilities. Get
247 |         tuple vector, unzip it and add the incidence part to
248 |         y_true and the probability part to y_score (these are
249 |         sklearn arrays for evluation).
250 | 
251 |         Parameters
252 |         ----------
253 |         label_index : list of tuples
254 |             Enumerated list with topic indexes.
255 | 
256 |         predicted_weights : string
257 |             Csv file directory containing label confidences.
258 | 
259 |         true_labels : string
260 |             Csv file directory containing original material.
261 | 
262 |         Return
263 |         ------
264 |         y_true : list of integers
265 |             Binary list (incidence matrix).
266 | 
267 |         y_score : list of floats
268 |             Probabilities per topic.
269 |         """
270 |         y_true = []
271 |         y_score = []
272 |         for predicted_row, true_row in zip(predicted_weights, true_labels):
273 |             gold_standard = true_row.lower().split()
274 |             rank, prob = zip(*self.m_incidence(predicted_row, label_index,
275 |                                                gold_standard))
276 |             if 1 in rank:
277 |                 y_true.append(rank)
278 |                 y_score.append(prob)
279 | 
280 |         return y_true, y_score
281 | 
282 |     def to_array(self, y_true, y_score):
283 |         """To sklean-ready array.
284 | 
285 |         Converts the incidence matrix and its probabilites to a numpy format.
286 |         Also cleans out columns that produce a sum of zeroes; this results in
287 |         a division by zero error when determining recall. Dependencies are
288 |         both numpu and scipy.
289 | 
290 |         Parameters
291 |         ----------
292 |         y_true : list of integers
293 |             Binary list (incidence matrix).
294 | 
295 |         y_score : list of floats
296 |             Probabilities per topic.
297 | 
298 |         Return
299 |         ------
300 |         (y_true, y_score): numpy arrays
301 |             Filtered and converted version of y_true and y_score input.
302 |         """
303 |         from collections import Counter
304 |         import scipy
305 |         import numpy as np
306 | 
307 |         def scan_empty(y_true):
308 |             c = Counter()
309 |             for x in y_true:
310 |                 for i, y in enumerate(x):
311 |                     c[i] += y
312 |             return [key for key, value in c.items() if value == 0]
313 | 
314 |         def lab_reduce(y_true, y_score):
315 |             empty_indices = scan_empty(y_true)
316 |             i = 0
317 |             for k in empty_indices:
318 |                 y_true = scipy.delete(y_true, k-i, 1)
319 |                 y_score = scipy.delete(y_score, k-i, 1)
320 |                 i += 1
321 |             return y_true, y_score
322 | 
323 |         return lab_reduce(np.asarray(y_true), np.asarray(y_score))
324 | 
325 |     def results(self, true_labels, array=False):
326 |         """Results grabber.
327 | 
328 |         Finds the predicted document topic distribution and label index for the
329 |         model, then retrieves the actual labels from the original file and
330 |         serves these to self.get_scores.
331 | 
332 |         labels : list
333 |             The original set of labels per document
334 | 
335 |         array : boolean, optional, default False
336 |             Returns a cleaned numpy array where a column cannot be all zeroes.
337 |             Has numpy and scipy as dependencies; better handle this outside of
338 |             the class if you do not want to work with those.
339 | 
340 |         Return
341 |         ------
342 |         y_true, y_score : list, list
343 |             List of lists incidence matrix (binary) and list of lists document
344 |             topic probabilities.
345 |         """
346 |         DTDA = 'document-topic-distributions-res'  # doctop file
347 |         LIDX = '00000{0}label-index'.format(sep)   # label index
348 | 
349 |         orf = open("{0}{1}_{2}{3}{4}.txt".format(
350 |             self.dir, self.name, 'train', sep, LIDX), 'r')
351 |         label_index = orf.read().lower().split('\n')[:-1]
352 | 
353 |         lbf = \
354 |             open("{0}{1}_{2}{3}{4}_{5}-{6}.csv".format(
355 |                 self.dir, self.name, 'train', sep, self.name, 'test', DTDA),
356 |                 'r')
357 |         predicted_weights = reader(lbf)
358 | 
359 |         y_true, y_score = self.get_scores(label_index, predicted_weights,
360 |                                           true_labels)
361 | 
362 |         lbf.close()
363 |         orf.close()
364 | 
365 |         if array:
366 |             y_true, y_score = self.to_array(y_true, y_score)
367 | 
368 |         self.cleanup(step='results')
369 |         return y_true, y_score
370 | 
371 |     def cleanup(self, rmall=False, step=False):
372 |         """Cleanup module.
373 | 
374 |         If the user wants the trained model to be kept, it will only remove the
375 |         .csvs and wordcounts. Otherwise, it also dumps the fully trained model
376 |         in self.train.
377 | 
378 |         Parameters
379 |         ----------
380 |         rmall : bool, optional, default False
381 |             Can be used to remove ALL files from box.
382 | 
383 |         step : bool, optional, default False
384 |             Indicates the step so that it will keep the compressed and model
385 |             files.
386 |         """
387 |         pattern = self.name + '_*' if not rmall else '*_*'
388 |         files = glob(self.dir + pattern)
389 |         for f in files:
390 |             if not self.keep and step != 'results':
391 |                 rmtree(f) if '.' not in f else remove(f)
392 |             else:
393 |                 remove(f) if '.' in f and '.gz' not in f else None
394 | 
395 |     def run(self, space, labels, step):
396 |         """Main runner.
397 | 
398 |         Checks if the given space is given in a generator for batching, writes
399 |         it out to a csv with self.store, then self.boot-s the model in either
400 |         train or test mode. If it's in test, it will return the results so that
401 |         self,results does not have to be used.
402 | 
403 |         Parameters
404 |         ----------
405 |         space : list
406 |             The vector space; a list with text.
407 | 
408 |         labels : list
409 |             List with labels where each index corresponds to the text in space.
410 | 
411 |         step : str
412 |             Either test or train.
413 |         """
414 |         if not isgenerator(space):
415 |             space = [space]
416 |             labels = [labels]
417 |         for batch_space, batch_labels in zip(space, labels):
418 |             self.store(batch_space, batch_labels, step)
419 |         space, labels = None, None
420 |         self.boot(step)
421 |         self.cleanup()
422 | 
423 |     def train(self, space, labels):
424 |         """Sugar train.
425 | 
426 |         Will train a previously untrained STMT instance on the given
427 |         vectorspace. Please check the store function for space requirements.
428 |         Can accept a generator for both space and labels.
429 | 
430 |         Parameters
431 |         ----------
432 |         space : list
433 |             The vector space; a list with text.
434 | 
435 |         labels : list
436 |             List with labels where each index corresponds to the text in space.
437 |         """
438 |         self.run(space, labels, 'train')
439 | 
440 |     def test(self, space, labels):
441 |         """Sugar test.
442 | 
443 |         Will test a previously trained STMT instance on the given vectorspace.
444 |         Please check the store function for space requirements.
445 |         Can accept a generator for both space and labels.
446 | 
447 |         Parameters
448 |         ----------
449 |         space : list
450 |             The vector space; a list with text.
451 | 
452 |         labels : list
453 |             List with labels where each index corresponds to the text in space.
454 |         """
455 |         self.run(space, labels, 'test')
456 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 
341 | 


--------------------------------------------------------------------------------