├── README
├── __init__.py
├── res
    ├── iris.data
    └── names.txt
├── src
    ├── __init__.py
    ├── alg.py
    ├── aot.py
    ├── config.py
    ├── convert-rnc.py
    ├── create-lexicon.py
    ├── dep.py
    ├── liblinear.patch
    ├── ml
    │   ├── __init__.py
    │   ├── nb.py
    │   ├── nn.py
    │   └── svm.py
    ├── morph.py
    ├── mstparser.py
    ├── parsers
    │   ├── __init__.py
    │   └── cyk.py
    ├── pos.py
    ├── rnc.py
    ├── sentiment
    │   ├── __init__.py
    │   ├── demo.html
    │   ├── demo.py
    │   ├── download-kinopoisk.py
    │   ├── index.py
    │   ├── public
    │   │   ├── main.css
    │   │   └── reset5.css
    │   ├── test.py
    │   ├── train.py
    │   └── validate.py
    ├── syntagrus.py
    ├── template.py
    └── train.py
├── test
    ├── pos-test.py
    ├── test-alg.py
    ├── test-cyk.py
    ├── test-iris.py
    ├── test-names.py
    ├── test-nn.py
    └── test-polarity.py
├── tmp
    ├── ids.pickle
    └── svm.model
└── web
    ├── .server.py.swp
    ├── html
        ├── .tagging.html.swp
        └── tagging.html
    ├── public
        ├── .htaccess
        ├── css
        │   ├── html5.js
        │   ├── main.css
        │   └── reset5.css
        ├── favicon.ico
        └── js
        │   ├── .d3-tree-test.js.swp
        │   ├── Curry-1.0.1.js
        │   ├── d3-tree-test.js
        │   ├── d3.v2.min.js
        │   ├── dracula_algorithms.js
        │   ├── dracula_graffle.js
        │   ├── dracula_graph.js
        │   ├── html5.js
        │   ├── jquery-1.4.2.min.js
        │   ├── raphael-min.js
        │   ├── seedrandom.js
        │   └── sigma.min.js
    └── server.py


/README:
--------------------------------------------------------------------------------
1 | Pyrus is a project of analyzing and parsing of Russian language in Python 3.
2 | It is not to replace NLTK, but rather for practice.
3 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irokez/Pyrus/9a5c64991592ca13e0a5726f394fbe2a399501c3/__init__.py


--------------------------------------------------------------------------------
/res/iris.data:
--------------------------------------------------------------------------------
  1 | 5.1,3.5,1.4,0.2,Iris-setosa
  2 | 4.9,3.0,1.4,0.2,Iris-setosa
  3 | 4.7,3.2,1.3,0.2,Iris-setosa
  4 | 4.6,3.1,1.5,0.2,Iris-setosa
  5 | 5.0,3.6,1.4,0.2,Iris-setosa
  6 | 5.4,3.9,1.7,0.4,Iris-setosa
  7 | 4.6,3.4,1.4,0.3,Iris-setosa
  8 | 5.0,3.4,1.5,0.2,Iris-setosa
  9 | 4.4,2.9,1.4,0.2,Iris-setosa
 10 | 4.9,3.1,1.5,0.1,Iris-setosa
 11 | 5.4,3.7,1.5,0.2,Iris-setosa
 12 | 4.8,3.4,1.6,0.2,Iris-setosa
 13 | 4.8,3.0,1.4,0.1,Iris-setosa
 14 | 4.3,3.0,1.1,0.1,Iris-setosa
 15 | 5.8,4.0,1.2,0.2,Iris-setosa
 16 | 5.7,4.4,1.5,0.4,Iris-setosa
 17 | 5.4,3.9,1.3,0.4,Iris-setosa
 18 | 5.1,3.5,1.4,0.3,Iris-setosa
 19 | 5.7,3.8,1.7,0.3,Iris-setosa
 20 | 5.1,3.8,1.5,0.3,Iris-setosa
 21 | 5.4,3.4,1.7,0.2,Iris-setosa
 22 | 5.1,3.7,1.5,0.4,Iris-setosa
 23 | 4.6,3.6,1.0,0.2,Iris-setosa
 24 | 5.1,3.3,1.7,0.5,Iris-setosa
 25 | 4.8,3.4,1.9,0.2,Iris-setosa
 26 | 5.0,3.0,1.6,0.2,Iris-setosa
 27 | 5.0,3.4,1.6,0.4,Iris-setosa
 28 | 5.2,3.5,1.5,0.2,Iris-setosa
 29 | 5.2,3.4,1.4,0.2,Iris-setosa
 30 | 4.7,3.2,1.6,0.2,Iris-setosa
 31 | 4.8,3.1,1.6,0.2,Iris-setosa
 32 | 5.4,3.4,1.5,0.4,Iris-setosa
 33 | 5.2,4.1,1.5,0.1,Iris-setosa
 34 | 5.5,4.2,1.4,0.2,Iris-setosa
 35 | 4.9,3.1,1.5,0.1,Iris-setosa
 36 | 5.0,3.2,1.2,0.2,Iris-setosa
 37 | 5.5,3.5,1.3,0.2,Iris-setosa
 38 | 4.9,3.1,1.5,0.1,Iris-setosa
 39 | 4.4,3.0,1.3,0.2,Iris-setosa
 40 | 5.1,3.4,1.5,0.2,Iris-setosa
 41 | 5.0,3.5,1.3,0.3,Iris-setosa
 42 | 4.5,2.3,1.3,0.3,Iris-setosa
 43 | 4.4,3.2,1.3,0.2,Iris-setosa
 44 | 5.0,3.5,1.6,0.6,Iris-setosa
 45 | 5.1,3.8,1.9,0.4,Iris-setosa
 46 | 4.8,3.0,1.4,0.3,Iris-setosa
 47 | 5.1,3.8,1.6,0.2,Iris-setosa
 48 | 4.6,3.2,1.4,0.2,Iris-setosa
 49 | 5.3,3.7,1.5,0.2,Iris-setosa
 50 | 5.0,3.3,1.4,0.2,Iris-setosa
 51 | 7.0,3.2,4.7,1.4,Iris-versicolor
 52 | 6.4,3.2,4.5,1.5,Iris-versicolor
 53 | 6.9,3.1,4.9,1.5,Iris-versicolor
 54 | 5.5,2.3,4.0,1.3,Iris-versicolor
 55 | 6.5,2.8,4.6,1.5,Iris-versicolor
 56 | 5.7,2.8,4.5,1.3,Iris-versicolor
 57 | 6.3,3.3,4.7,1.6,Iris-versicolor
 58 | 4.9,2.4,3.3,1.0,Iris-versicolor
 59 | 6.6,2.9,4.6,1.3,Iris-versicolor
 60 | 5.2,2.7,3.9,1.4,Iris-versicolor
 61 | 5.0,2.0,3.5,1.0,Iris-versicolor
 62 | 5.9,3.0,4.2,1.5,Iris-versicolor
 63 | 6.0,2.2,4.0,1.0,Iris-versicolor
 64 | 6.1,2.9,4.7,1.4,Iris-versicolor
 65 | 5.6,2.9,3.6,1.3,Iris-versicolor
 66 | 6.7,3.1,4.4,1.4,Iris-versicolor
 67 | 5.6,3.0,4.5,1.5,Iris-versicolor
 68 | 5.8,2.7,4.1,1.0,Iris-versicolor
 69 | 6.2,2.2,4.5,1.5,Iris-versicolor
 70 | 5.6,2.5,3.9,1.1,Iris-versicolor
 71 | 5.9,3.2,4.8,1.8,Iris-versicolor
 72 | 6.1,2.8,4.0,1.3,Iris-versicolor
 73 | 6.3,2.5,4.9,1.5,Iris-versicolor
 74 | 6.1,2.8,4.7,1.2,Iris-versicolor
 75 | 6.4,2.9,4.3,1.3,Iris-versicolor
 76 | 6.6,3.0,4.4,1.4,Iris-versicolor
 77 | 6.8,2.8,4.8,1.4,Iris-versicolor
 78 | 6.7,3.0,5.0,1.7,Iris-versicolor
 79 | 6.0,2.9,4.5,1.5,Iris-versicolor
 80 | 5.7,2.6,3.5,1.0,Iris-versicolor
 81 | 5.5,2.4,3.8,1.1,Iris-versicolor
 82 | 5.5,2.4,3.7,1.0,Iris-versicolor
 83 | 5.8,2.7,3.9,1.2,Iris-versicolor
 84 | 6.0,2.7,5.1,1.6,Iris-versicolor
 85 | 5.4,3.0,4.5,1.5,Iris-versicolor
 86 | 6.0,3.4,4.5,1.6,Iris-versicolor
 87 | 6.7,3.1,4.7,1.5,Iris-versicolor
 88 | 6.3,2.3,4.4,1.3,Iris-versicolor
 89 | 5.6,3.0,4.1,1.3,Iris-versicolor
 90 | 5.5,2.5,4.0,1.3,Iris-versicolor
 91 | 5.5,2.6,4.4,1.2,Iris-versicolor
 92 | 6.1,3.0,4.6,1.4,Iris-versicolor
 93 | 5.8,2.6,4.0,1.2,Iris-versicolor
 94 | 5.0,2.3,3.3,1.0,Iris-versicolor
 95 | 5.6,2.7,4.2,1.3,Iris-versicolor
 96 | 5.7,3.0,4.2,1.2,Iris-versicolor
 97 | 5.7,2.9,4.2,1.3,Iris-versicolor
 98 | 6.2,2.9,4.3,1.3,Iris-versicolor
 99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica
151 | 
152 | 


--------------------------------------------------------------------------------
/res/names.txt:
--------------------------------------------------------------------------------
  1 | Оксана f
  2 | Альберт m
  3 | Лазарь m
  4 | Христоф m
  5 | Рунар m
  6 | Феофан m
  7 | Джой m
  8 | Христина f
  9 | Серафим m
 10 | Майслав m
 11 | Зигмунд m
 12 | Антуан m
 13 | Климент m
 14 | Фрол m
 15 | Викторина f
 16 | Фернан m
 17 | Фаина f
 18 | Юджин m
 19 | Серафима f
 20 | Чеслав m
 21 | Арсения f
 22 | Аврор m
 23 | Эрик m
 24 | Самсон m
 25 | Рюрик m
 26 | Святослава f
 27 | Виринея f
 28 | Казимир m
 29 | Кира f
 30 | Вильям m
 31 | Габриель m
 32 | Марин m
 33 | Торез m
 34 | Ираклия f
 35 | Фелиция f
 36 | Нада f
 37 | Гавриил m
 38 | Милада f
 39 | Евгения f
 40 | Иосиф m
 41 | Иоанн m
 42 | Феодосий m
 43 | Исей m
 44 | Назар m
 45 | Вилиор m
 46 | Анисим m
 47 | Лель m
 48 | Нил m
 49 | Филат m
 50 | Прасковья f
 51 | Архип m
 52 | Светослав m
 53 | Ратмир m
 54 | Равель m
 55 | Владимира f
 56 | Нелли f
 57 | Юлиан m
 58 | Будимир m
 59 | Вера f
 60 | Оливия f
 61 | Веселина f
 62 | Слава m
 63 | Володар m
 64 | Адольф m
 65 | Любим m
 66 | Винцент m
 67 | Лионель m
 68 | Евлогий m
 69 | Харита f
 70 | Ксенофонт m
 71 | Любава f
 72 | Аглаида f
 73 | Андрей m
 74 | Кондратий m
 75 | Дана f
 76 | Аполлинария f
 77 | Равиль m
 78 | Эмилий m
 79 | Флоренция f
 80 | Неон m
 81 | Дарья f
 82 | Аскольд m
 83 | Савва m
 84 | Ганна f
 85 | Арина f
 86 | Юлиана f
 87 | Аврелия f
 88 | Зиновий m
 89 | Леонида f
 90 | Матвей m
 91 | Гораций m
 92 | Константин m
 93 | Дарина f
 94 | Эвальд m
 95 | Стефания f
 96 | Мир m
 97 | Святополк m
 98 | Юдифь f
 99 | Августин m
100 | Марат m
101 | Лаэрт m
102 | Антип m
103 | Алёна f
104 | Мира f
105 | Лина f
106 | Феоктиста f
107 | Свет m
108 | Ювеналий m
109 | Сюзанна f
110 | Юлия f
111 | Аргент m
112 | Рубен m
113 | Мальвина f
114 | Ромуальд m
115 | Олимпий m
116 | Мелания f
117 | Воислав m
118 | Ян m
119 | Венедикт m
120 | Мирослава f
121 | Касьян m
122 | Юстин m
123 | Тигран m
124 | Еруслан m
125 | Рем m
126 | Вилена f
127 | Зарина f
128 | Клеопатра f
129 | Таисия f
130 | Эльмир m
131 | Яромир m
132 | Мария f
133 | Октавиан m
134 | Ричард m
135 | Светлана f
136 | Арий m
137 | Парамон m
138 | Рид m
139 | Прохор m
140 | Донара f
141 | Серапион m
142 | Мариан m
143 | Анатолия f
144 | Авангард m
145 | Мстислав m
146 | Роальд m
147 | Герт m
148 | Франц m
149 | Павлина f
150 | Лукерия f
151 | Павел m
152 | Лилиана f
153 | Луиза f
154 | Евгений m
155 | Яна f
156 | Вячеслав m
157 | Февронья f
158 | Тихомир m
159 | Майя f
160 | Ангелина f
161 | Георгий m
162 | Анастасий m
163 | Милен m
164 | Карен m
165 | Магдалина f
166 | Ренат m
167 | Юлий m
168 | Норд m
169 | Филимон m
170 | Валерия f
171 | Мирон m
172 | Прозор m
173 | Исидор m
174 | Лоренс m
175 | Наль m
176 | Тельнан m
177 | Джозеф m
178 | Дидим m
179 | Владлена f
180 | Аксинья f
181 | Мариетта f
182 | Магда f
183 | Арнольд m
184 | Орест m
185 | Регина f
186 | Илиана f
187 | Прокофий m
188 | Агапия f
189 | Анатолий m
190 | Ибрагим m
191 | Евграф m
192 | Ада f
193 | Оливер m
194 | Ананий m
195 | Кристина f
196 | Иона f
197 | Ревмир m
198 | Виолен m
199 | Наталья f
200 | Сильва f
201 | Горн m
202 | Гайя f
203 | Мефодий m
204 | Тихон m
205 | Станислав m
206 | Люсьен m
207 | Яромира f
208 | Сусанна f
209 | Дементий m
210 | Емельян m
211 | Кристоф m
212 | Ревмира f
213 | Никифор m
214 | Венера f
215 | Мари f
216 | Зорий m
217 | Семён m
218 | Евсей m
219 | Руслан m
220 | Чеслава f
221 | Феликсана f
222 | Исай m
223 | Зот m
224 | Антония f
225 | Любомира f
226 | Влада f
227 | Юманита f
228 | Дональт m
229 | Бруно m
230 | Лаврентий m
231 | Лорис m
232 | Илья m
233 | Правдина f
234 | Беатриса f
235 | Болеслав m
236 | Радислав m
237 | Ксения f
238 | Тамила f
239 | Лидия f
240 | Владелин m
241 | Альбин m
242 | Гелла f
243 | Горислава f
244 | Рогнеда f
245 | Феодосия f
246 | Алина f
247 | Юстина f
248 | Эраст m
249 | Феона f
250 | Эсфирь f
251 | Адам m
252 | Руссо m
253 | Савелий m
254 | Макс m
255 | Галактион m
256 | Зоя f
257 | Филадельфия f
258 | Флорентий m
259 | Ярослава f
260 | Монолит m
261 | Беата f
262 | Ярополк m
263 | Сильвест m
264 | Глафира f
265 | Марк m
266 | Злата f
267 | Лениан m
268 | Радамес m
269 | Дар m
270 | Эвелина f
271 | Ипатия f
272 | Трифон m
273 | Фёдор m
274 | Корней m
275 | Горимир m
276 | Назим m
277 | Леонила f
278 | Гертруда f
279 | Илиодор m
280 | Алла f
281 | Искра f
282 | Рафаэль m
283 | Лермонт m
284 | Надир m
285 | Краснослав m
286 | Елизавета f
287 | Вилора f
288 | Поликсена f
289 | Алиса f
290 | Сидор m
291 | Бронислав m
292 | Лукиана f
293 | Васса f
294 | Игнатий m
295 | Февралин m
296 | Товий m
297 | Ленина f
298 | Виталия f
299 | Бела f
300 | Храбр m
301 | Евдокия f
302 | Джон m
303 | Леопольд m
304 | Адриан m
305 | Флавия f
306 | Робеспьер m
307 | Ермил m
308 | Адий m
309 | Торий m
310 | Муза f
311 | Конон m
312 | Борислав m
313 | Роза f
314 | Яков m
315 | Роман m
316 | Симона f
317 | Зинаида f
318 | Варфоломей m
319 | Варвара f
320 | Марта f
321 | Улита f
322 | Эдда f
323 | Альфред m
324 | Марс m
325 | Никандр m
326 | Ростислав m
327 | Джеральд m
328 | Екатерина f
329 | Корнил m
330 | Октябрина f
331 | Велор m
332 | Тальяна f
333 | Авдей m
334 | Клавдий m
335 | Колумбий m
336 | Поликарп m
337 | Эрг m
338 | Евдоким m
339 | Вилорг m
340 | Лукьян m
341 | Елизар m
342 | Гаспар m
343 | Болеслава f
344 | Капитон m
345 | Панфил m
346 | Чарлз m
347 | Пахом m
348 | Элина f
349 | Мирослав m
350 | Вадим m
351 | Софокл m
352 | Пересвет m
353 | Милан m
354 | Леонтия f
355 | Степан m
356 | Тит m
357 | Владислав m
358 | Гаральд m
359 | Федор m
360 | Ипатий m
361 | Авдотья f
362 | Августа f
363 | Вольдемар m
364 | Конкордия f
365 | Милад m
366 | Агап m
367 | Анисия f
368 | Дея f
369 | Лилия f
370 | Максим m
371 | Зара f
372 | Митродора f
373 | Оксар m
374 | Владилена f
375 | Иероним m
376 | Платонида f
377 | Рэм m
378 | Вергилий m
379 | Эльвира f
380 | Артём m
381 | Веста f
382 | Вахтанг m
383 | Гвидон m
384 | Клемент m
385 | Амос m
386 | Надия f
387 | Цезарь m
388 | Декабрий m
389 | Фотий m
390 | Флоренц m
391 | Родион m
392 | Северин m
393 | Вавила m
394 | Лора f
395 | Мурат m
396 | Измаил m
397 | Звенислава f
398 | Евстафий m
399 | Эмилия f
400 | Светозара f
401 | Камиль m
402 | Куприян m
403 | Июлий m
404 | Лука m
405 | Степанида f
406 | Лениана f
407 | Максимильян m
408 | Нельсон m
409 | Мадлен f
410 | Рустем m
411 | Аркадия f
412 | Таир m
413 | Изяслав m
414 | Астрид f
415 | Илий m
416 | Эдмунд m
417 | Бертольд m
418 | Юм m
419 | Арефий m
420 | Отто m
421 | Елена f
422 | Кузьма m
423 | Эрий m
424 | Давыд m
425 | Милия f
426 | Нисон m
427 | Нонна f
428 | Глория f
429 | Боян m
430 | Эмилиан m
431 | Дан m
432 | Геннадий m
433 | Лия f
434 | Арсений m
435 | Лариса f
436 | Владилен m
437 | Меркурий m
438 | Эльдар m
439 | Зигфрид m
440 | Добрыня m
441 | Мирра f
442 | Дина f
443 | Соломон m
444 | Валериан m
445 | Гений m
446 | Викентий m
447 | Ларион m
448 | Софон m
449 | Аристарх m
450 | Ядвига f
451 | Евпраксия f
452 | Воля f
453 | Розалия f
454 | Эдгар m
455 | Арам m
456 | Белла f
457 | Климентий m
458 | Горислав m
459 | Цецилия f
460 | Лор m
461 | Аверкий m
462 | Маврикий m
463 | Севастиан m
464 | Никон m
465 | Аким m
466 | Федора f
467 | Анжела f
468 | Люксен m
469 | Даниар m
470 | Исак m
471 | Арвид m
472 | Пётр m
473 | Евфалия f
474 | Януарий m
475 | Леонид m
476 | Ролан m
477 | Ефим m
478 | Дионисия f
479 | Аэлла f
480 | Гелиан m
481 | Дин m
482 | Манфред m
483 | Омар m
484 | Ульяна f
485 | Еремей m
486 | Левкий m
487 | Олимпия f
488 | Шмидт m
489 | Калина m
490 | Любомир m
491 | Антонин m
492 | Вилор m
493 | Феликс m
494 | Роберт m
495 | Вацлав m
496 | Киприан m
497 | Виталий m
498 | Анимаиса f
499 | Нина f
500 | Туллий m
501 | Северян m
502 | Витольд m
503 | Севастьяна f
504 | Власта f
505 | Жанна f
506 | Боеслав m
507 | Владимир m
508 | Домна f
509 | Вероника f
510 | Эльмар m
511 | Ванадий m
512 | Артемия f
513 | Валерий m
514 | Еликонида f
515 | Нинель f
516 | Зиновия f
517 | Иоанна f
518 | Клариса f
519 | Терентий m
520 | Виссарион m
521 | Пимен m
522 | Капитолина f
523 | Нодар m
524 | Никанор m
525 | Галий m
526 | Эльза f
527 | Влас m
528 | Ева f
529 | Даниил m
530 | Мстислава f
531 | Римма f
532 | Розана f
533 | Софья f
534 | Эдвард m
535 | Радий m
536 | Авенир m
537 | Баграт m
538 | Василий m
539 | Милий m
540 | Даниэль m
541 | Платон m
542 | Юрий m
543 | Леонтий m
544 | Игорь m
545 | Протас m
546 | Акилина f
547 | Евстолия f
548 | Жозеф m
549 | Геронтий m
550 | Николай m
551 | Мариана f
552 | Ефросиния f
553 | Руслана f
554 | Фарид m
555 | Устин m
556 | Ливадий m
557 | Любовь f
558 | Фелицата f
559 | Маргарита f
560 | Святослав m
561 | Виктория f
562 | Вильгельм m
563 | Ярослав m
564 | Марина f
565 | Ждан m
566 | Адель f
567 | Галина f
568 | Млада f
569 | Вартан m
570 | Маркел m
571 | Дамир m
572 | Сократ m
573 | Лукина f
574 | Булат m
575 | Руфина f
576 | Ермолай m
577 | Май m
578 | Рафаил m
579 | Нана f
580 | Партизан m
581 | Саломея f
582 | Натан m
583 | Всеслава f
584 | Густав m
585 | Эдвин m
586 | Аркадий m
587 | Лада f
588 | Гамлет m
589 | Варлам m
590 | Настасья f
591 | Сабина f
592 | Борислава f
593 | Ленар m
594 | Нестор m
595 | Валентин m
596 | Рада f
597 | Агата f
598 | Голуба f
599 | Рустам m
600 | Виола f
601 | Владислава f
602 | Марлен m
603 | Ия f
604 | Анфия f
605 | Анвар m
606 | Эдип m
607 | Нинелла f
608 | Дорофея f
609 | Октавия f
610 | Олеся f
611 | Лавр m
612 | Наина f
613 | Юзефа f
614 | Ироида f
615 | Эдуард m
616 | Домника f
617 | Людмила f
618 | Флорин m
619 | Григорий m
620 | Гурий m
621 | Ольга f
622 | Милана f
623 | Вольфрам m
624 | Алексей m
625 | Сергей m
626 | Ириней m
627 | Епифан m
628 | Горазд m
629 | Алим m
630 | Аполлинарий m
631 | Лир m
632 | Станислава f
633 | Ольгерд m
634 | Борис m
635 | Арсен m
636 | Видана f
637 | Герман m
638 | Егор m
639 | Оскар m
640 | Янина f
641 | Аэлита f
642 | Аида f
643 | Совет m
644 | Илона f
645 | Светозар m
646 | Харитон m
647 | Корнелий m
648 | Рандольф m
649 | Лоэнгрин m
650 | Абрам m
651 | Виктор m
652 | Элеонора f
653 | Анастасия f
654 | Анфиса f
655 | Карина f
656 | Карп m
657 | Ванда f
658 | Сигизмунд m
659 | Ренальд m
660 | Клара f
661 | Нинел m
662 | Тенгиз m
663 | Афанасий m
664 | Карл m
665 | Ида f
666 | Атеист m
667 | Глеб m
668 | Радим m
669 | Азалия f
670 | Осип m
671 | Юлитта f
672 | Геральд m
673 | Бронислава f
674 | Шамиль m
675 | Морис m
676 | Тамара f
677 | Варлен m
678 | Катерина f
679 | Гектор m
680 | Добрава f
681 | Никодим m
682 | Анис m
683 | Юнона f
684 | Ромен m
685 | Гермоген m
686 | Селена f
687 | Альвин m
688 | Олег m
689 | Всеволод m
690 | Эрнест m
691 | Надежда f
692 | Слава f
693 | Гарибальди m
694 | Вилен m
695 | Модест m
696 | Астра f
697 | Татьяна f
698 | Эрика f
699 | Стелла f
700 | Сурен m
701 | Вацлава f
702 | Агнеса f
703 | Вевея f
704 | Матильда f
705 | Маркиан m
706 | Ермиония f
707 | Ий m
708 | Симон m
709 | Эль m
710 | Велислав m
711 | Ким m
712 | Марфа f
713 | Евлалия f
714 | Андрон m
715 | Иван m
716 | Всемил m
717 | Афанасия f
718 | Генрих m
719 | Славяна f
720 | Гелия f
721 | Гайдар m
722 | Берта f
723 | Ипполит m
724 | Фернанд m
725 | Текуса f
726 | Дионисий m
727 | Эльбрус m
728 | Фадей m
729 | Василько m
730 | Светлан m
731 | Виль m
732 | Ефимия f
733 | Кирилла f
734 | Жорес m
735 | Кюри m
736 | Милослава f
737 | Лев m
738 | Джульетта f
739 | Спиридон m
740 | Гордей m
741 | Андриан m
742 | Пров m
743 | Ника f
744 | Милослав m
745 | Богдана f
746 | Анна f
747 | Харитина f
748 | Анита f
749 | Фома m
750 | Конрад m
751 | Остап m
752 | Моисей m
753 | Захар m
754 | Флорентина f
755 | Ахмат m
756 | Герасим m
757 | Мелитина f
758 | Боримир m
759 | Аполлон m
760 | Леонард m
761 | Леся f
762 | Одиссей m
763 | Неонил m
764 | Гаяна f
765 | Каролина f
766 | Василиса f
767 | Дорофей m
768 | Ариадна f
769 | Мечислав m
770 | Ульяна m
771 | Самуил m
772 | Флора f
773 | Ерофей m
774 | Александр m
775 | Афиноген m
776 | Зосим m
777 | Новелла f
778 | Мартын m
779 | Пантелеймон m
780 | Аста f
781 | Наум m
782 | Нифонт m
783 | Демократ m
784 | Свобода f
785 | Энвер m
786 | Рената f
787 | Рауль m
788 | Марлена f
789 | Людвиг m
790 | Тарас m
791 | Либерт m
792 | Светослава f
793 | Марианна f
794 | Артур m
795 | Милонег m
796 | Ефрем m
797 | Богдан m
798 | Раймонд m
799 | Спартак m
800 | Воин m
801 | Тереза f
802 | Жерар m
803 | Овидий m
804 | Декабрина f
805 | Антонина f
806 | Дарьяна f
807 | Эразм m
808 | Юния f
809 | Робинзон m
810 | Исидора f
811 | Трофим m
812 | Дайна f
813 | Иларий m
814 | Матрёна f
815 | Гелена f
816 | Никита m
817 | Теодор m
818 | Милица f
819 | Марсен m
820 | Василина f
821 | Северьян m
822 | Вилий m
823 | Потап m
824 | Кирилл m
825 | Энергий m
826 | Баян m
827 | Палладий m
828 | Павла f
829 | Октябрин m
830 | Сталь m
831 | Томас m
832 | Теймураз m
833 | Альвиан m
834 | Ирм m
835 | Дориан m
836 | Ливия f
837 | Аза f
838 | Аксён m
839 | Иванна f
840 | Агриппина f
841 | Тристан m
842 | Феофания f
843 | Альбина f
844 | Калиса f
845 | Мавра f
846 | Август m
847 | Икар m
848 | Устинья f
849 | Октябрь m
850 | Ной m
851 | Курт m
852 | Электрон m
853 | Михаил m
854 | Тимофей m
855 | Неонила f
856 | Алевтин m
857 | Юпитер m
858 | Пелагея f
859 | Север m
860 | Ростислава f
861 | Демьян m
862 | Рашид m
863 | Фидель m
864 | Вениамин m
865 | Тимур m
866 | Гранит m
867 | Раиса f
868 | Радмила f
869 | Мануил m
870 | Иннокентий m
871 | Демид m
872 | Вольт m
873 | Калерия f
874 | Илларион m
875 | Филипп m
876 | Клавдия f
877 | Раймонда f
878 | Радомир m
879 | Макар m
880 | Энгель m
881 | Донат m
882 | Антон m
883 | Ираида f
884 | Гарри m
885 | Геодар m
886 | Эмма f
887 | Снежана f
888 | Флориана f
889 | Сила m
890 | Рудольф m
891 | Нора f
892 | Новомир m
893 | Зарема f
894 | Эммануил m
895 | Артамон m
896 | Святогор m
897 | Полина f
898 | Евлампий m
899 | Северина f
900 | Гали f
901 | Ирина f
902 | Вилли m
903 | Денис m
904 | Олимпиада f
905 | Адонис m
906 | Фридрих m
907 | Мюд m
908 | Аглая f
909 | Ираклий m
910 | Эйнар m
911 | Флегонт m
912 | Диана f
913 | Эльмира f
914 | Ждана f
915 | Анри m
916 | Бернард m
917 | Владлен m
918 | Афродита f
919 | Диодора f
920 | Онисим m
921 | Валентина f
922 | Остромир m
923 | Елисей m
924 | Вольмир m
925 | Аврора f
926 | Гертруд m
927 | Кир m
928 | Алан m
929 | Бенедикт m
930 | Таира f
931 | Агей m
932 | Карм m
933 | Эрлен m
934 | Агния f
935 | Стратон m
936 | Ермак m
937 | Велимир m
938 | Бажен m
939 | Ипполита f
940 | Инна f
941 | Филарет m
942 | Хиония f
943 | Авелина f
944 | Александра f
945 | Инесса f
946 | Рождеро m
947 | Авксентий m
948 | Арлен m
949 | Констанция f
950 | Порфирий m
951 | Ральф m
952 | Влад m
953 | Алевтина f
954 | Каспар m
955 | Вальтер m
956 | Ифигения f
957 | Леон m
958 | Услада f
959 | Фотина f
960 | Гликерия f
961 | Эвридика f
962 | Инга f
963 | Гелеон m
964 | Илария f
965 | Дмитрий m
966 | Бажена f
967 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irokez/Pyrus/9a5c64991592ca13e0a5726f394fbe2a399501c3/src/__init__.py


--------------------------------------------------------------------------------
/src/alg.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on Aug 19, 2011
 3 | 
 4 | @author: alexpak
 5 | '''
 6 | 
 7 | class Vector:
 8 | 	def __init__(self, iterable):
 9 | 		self.setData(iterable)
10 | 		
11 | 	def setData(self, iterable):
12 | 		self._data = list(iterable)
13 | 
14 | 	def __getitem__(self, i):
15 | 		return self._data[i]
16 | 	
17 | 	def __len__(self):
18 | 		return len(self._data)
19 | 		
20 | 	def __add__(self, term):
21 | 		data = self._data[:]
22 | 		if type(term) in (int, float):
23 | 			for i in range(0, len(data)):
24 | 				data[i] += term
25 | 		elif isinstance(term, Vector):
26 | 			for i in range(0, len(self._data)):
27 | 				data[i] += term[i]
28 | 				
29 | 		return Vector(data)
30 | 	
31 | 	def __sub__(self, term):
32 | 		data = self._data[:]
33 | 		if type(term) in (int, float):
34 | 			for i in range(0, len(data)):
35 | 				data[i] -= term
36 | 		elif isinstance(term, Vector):
37 | 			for i in range(0, len(self._data)):
38 | 				data[i] -= term[i]
39 | 				
40 | 		return Vector(data)
41 | 	
42 | 	def __iadd__(self, term):
43 | 		self.setData(self.__add__(term)) 
44 | 		return self
45 | 	
46 | 	def __repr__(self):
47 | 		return '[{0}]'.format(', '.join([str(i) for i in self._data]))
48 | 			
49 | 
50 | class Matrix:
51 | 	def __init__(self):
52 | 		self._data = None
53 | 		
54 | 	def __radd__(self, term):
55 | 		pass


--------------------------------------------------------------------------------
/src/aot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import sqlite3
  3 | import re
  4 | import os
  5 | import memcache
  6 | 
  7 | class Morphology:
  8 | 	conn = None
  9 | 	db = None
 10 | 	cache = {}
 11 | 	
 12 | 	def __init__(self, db, lexicon = None):
 13 | 		load = False
 14 | 		if not os.path.exists(db):
 15 | 			load = True
 16 | 			
 17 | 		self.conn = sqlite3.connect(db, check_same_thread = False)
 18 | 		self.mc = memcache.Client(['127.0.0.1:11211'], debug=0)
 19 | 
 20 | #		def re_match(a, b):
 21 | #			return re.match(a, b) is not None
 22 | #		
 23 | #		def concat(a, b):
 24 | #			return a + b
 25 | #			
 26 | #		self.conn.create_function('re_match', 2, re_match)
 27 | #		self.conn.create_function('concat', 2, concat)
 28 | 		
 29 | 		self.db = self.conn.cursor()
 30 | 
 31 | 		if load:
 32 | 			self.load(lexicon)
 33 | 			
 34 | 	def close(self):
 35 | 		self.db.close()
 36 | 		self.conn.close()
 37 | 			
 38 | 	def skip_lines(self, handle):
 39 | 		line = handle.readline()
 40 | 		if not len(line):
 41 | 			return False
 42 | 		
 43 | 		line = line.strip()
 44 | 		if re.search('^\d+$', line):
 45 | 			for i in range(0, int(line)):
 46 | 				line = handle.readline()
 47 | 				if not len(line):
 48 | 					return False
 49 | 		else:
 50 | 			print(line)
 51 | 			return False 
 52 | 		
 53 | 		return True
 54 | 
 55 | 	def load(self, file):
 56 | 		handle = open(file, 'r', encoding='cp1251')
 57 | 		
 58 | 		# load rules
 59 | 		self.load_rules(handle)
 60 | 		
 61 | 		# skip accents
 62 | 		if not self.skip_lines(handle):
 63 | 			return False
 64 | 		
 65 | 		# skip logs
 66 | 		if not self.skip_lines(handle):
 67 | 			return False
 68 | 		
 69 | 		# skip prefixes
 70 | 		if not self.skip_lines(handle):
 71 | 			return False
 72 | 		
 73 | 		print(self.load_lemmas(handle), 'lemmas loaded')
 74 | 		
 75 | 		handle.close()
 76 | 		
 77 | 	def load_rules(self, handle):
 78 | 		# create table
 79 | 		self.db.execute('''create table rules(
 80 | 							id integer,
 81 | 							prefix text,
 82 | 							suffix text)''')
 83 | 		
 84 | 		lines = handle.readline().strip()
 85 | 		reg_split = re.compile('\\%');
 86 | 		alf = '\w';
 87 | 		reg_rule = re.compile('^(?P<suffix>' + alf + '*)\\*(?P<ancode>' + alf + '+)(?:\\*(?P<prefix>' + alf + '+))?$')
 88 | 		
 89 | 		for i in range(0, int(lines)):
 90 | 			line = handle.readline()
 91 | 			if not len(line):
 92 | 				break
 93 | 			
 94 | 			rules = reg_split.split(line.strip())
 95 | 			
 96 | 			for rule in rules:
 97 | 				match = reg_rule.search(rule)
 98 | 				if match is not None:
 99 | 					record = match.groupdict()				  
100 | 					if 'prefix' not in record or record['prefix'] is None:
101 | 						record['prefix'] = ''
102 | 						
103 | 					suffix = record['suffix'].lower()
104 | 					prefix = record['prefix'].lower()
105 | 					
106 | 					self.db.execute('insert into rules (id, prefix, suffix) values (?, ?, ?)', (i, prefix, suffix))
107 | 
108 | 		self.db.execute('create index rules_id on rules(id)')
109 | 		return i
110 | 	
111 | 	def load_lemmas(self, handle):
112 | 		# create table
113 | 		self.db.execute('''create table lemmas(
114 | 							base text,
115 | 							rule integer)''')
116 | 
117 | 		lines = int(handle.readline().strip())
118 | 		reg_split = re.compile('\s+')
119 | 		
120 | 		for i in range(0, lines):
121 | 			line = handle.readline()
122 | 			if not len(line):
123 | 				break
124 | 			
125 | 			record = reg_split.split(line)
126 | 			self.db.execute('insert into lemmas values(?, ?)', (record[0].lower() + '%', int(record[1])))
127 | 			
128 | 		self.db.execute('create index lemmas_base on lemmas(base)')
129 | 		
130 | 		return i
131 | 	
132 | 	def make_forms(self, lemma):
133 | 		self.db.execute('select prefix, suffix from rules where id = ?', (lemma['rule'],))
134 | 
135 | 		forms = []
136 | 		for rule in self.db.fetchall():
137 | 			forms.append({
138 | 						  'base': lemma['base'],
139 | 						  'form': rule[0] + lemma['base'] + rule[1],
140 | 						  })
141 | 		return forms
142 | 
143 | 	def normalize(self, word):
144 | 		word = word.lower()
145 | 
146 | 		lemmas = self.mc.get(word)
147 | 		if lemmas is None:
148 | #		if word not in self.cache:
149 | 			self.db.execute('select base, rule from lemmas where ? like base', (word,))
150 | 			
151 | 			lemmas = []
152 | 			for lemma in self.db.fetchall():
153 | 				base = lemma[0][0:-1]
154 | 				forms = self.make_forms({'base': base, 'rule': lemma[1]})
155 | 				for form in forms:
156 | #					print(word, form['form'])
157 | 					if word == form['form']:
158 | 						init_form = forms[0]['form']
159 | 						lemmas.append(init_form)
160 | 						
161 | #			self.cache[word] = set(lemmas)
162 | 			lemmas = set(lemmas)
163 | 			self.mc.set(word, lemmas)
164 | 					
165 | 		return lemmas
166 | 
167 | if __name__ == '__main__':
168 | 	from optparse import OptionParser
169 | 	parser = OptionParser()
170 | 	parser.usage = '%prog [options]'
171 | 	parser.add_option('-i', '--index', action='store_const', const=True, dest='index')
172 | 	parser.add_option('-d', '--database', action='store', type='string', dest='database')
173 | 	
174 | 	(options, args) = parser.parse_args()
175 | 	if options.index:
176 | 		morphology = Morphology(options.database, args[0])
177 | 		print('Done')
178 | 	else:
179 | 		morphology = Morphology(options.database)
180 | 		print(morphology.normalize(args[0]))


--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Aug 5, 2011
3 | 
4 | @author: alexpak <irokez@gmail.com>
5 | '''
6 | import sys
7 | sys.path.append('/home/alexpak/tools/liblinear-1.8/python/')
8 | 


--------------------------------------------------------------------------------
/src/convert-rnc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | '''
 3 | Created on Aug 5, 2011
 4 | 
 5 | @author: alexpak <irokez@gmail.com>
 6 | '''
 7 | 
 8 | import sys
 9 | import re
10 | 
11 | if len(sys.argv) < 2:
12 | 	print('Usage: convert-rnc.py inputfile > outputfile')
13 | 	exit()
14 | 	
15 | skip_lines = [
16 | 	'<\?xml version="1.0" encoding="windows-1251"\?><html><head>',
17 | 	'</head>',
18 | 	'<body>'
19 | ]
20 | re_skip = re.compile('|'.join(skip_lines))
21 | re_del = re.compile('<p[^>]+>|</td><td>|</td></tr><tr><td>')
22 | re_fix1 = re.compile('</se>\s?<se>')
23 | re_fix2 = re.compile('<se><se>')
24 | 	
25 | print('<?xml version="1.0" encoding="utf-8" ?>')
26 | print('<corpus>')
27 | f = open(sys.argv[1], 'rb')
28 | for line in f:
29 | 	line = line.decode('cp1251')
30 | 
31 | 	if re_skip.search(line):
32 | 		continue
33 | 	
34 | 	line = re_del.sub('', line)
35 | 	line = re_fix1.sub('</se>', line)
36 | 	line = re_fix2.sub('<se>', line)
37 | 	
38 | 	print(line, end = '')
39 | f.close()
40 | print('</corpus>')


--------------------------------------------------------------------------------
/src/create-lexicon.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | '''
 3 | Created on Aug 6, 2011
 4 | 
 5 | @author: alexpak
 6 | '''
 7 | 
 8 | import sys
 9 | import os
10 | import rnc
11 | import sqlite3
12 | 
13 | if len(sys.argv) < 3:
14 | 	exit('Usage: create-lexicon.py filename dbname')
15 | 	
16 | dbname = sys.argv[2]
17 | db_exists = os.path.isfile(dbname)
18 | con = sqlite3.connect(dbname)
19 | cur = con.cursor()
20 | 
21 | def create_db():
22 | 	sql = '''
23 | 	create table words(
24 | 		id integer primary key autoincrement,
25 | 		lemma text,
26 | 		form text,
27 | 		accent integer,
28 | 		info text,
29 | 		freq integer
30 | 	);
31 | 	create index words_lemma_form_info_accent on words(lemma, form, info, accent);
32 | 	'''
33 | 	[cur.execute(st) for st in sql.split(';') if len(st.strip())]
34 | 
35 | if not db_exists:
36 | 	create_db()
37 | 
38 | sentences = rnc.Reader().read(sys.argv[1])
39 | for sentence in sentences:
40 | 	for word in sentence:
41 | 		accent = word[0].index('`') + 1 if '`' in word[0] else 0 
42 | 		form = word[0].replace('`', '')
43 | 		lemma = word[1]['lex']
44 | 		info = word[1]['gr']
45 | 		
46 | 		cur.execute('select id from words where lemma = ? and form = ? and info = ? and accent = ?', (lemma, form, info, accent))
47 | 		row = cur.fetchone()
48 | 		if row is None:
49 | 			cur.execute('insert into words (lemma, form, info, accent, freq) values (?, ?, ?, ?, 1)', (lemma, form, info, accent))
50 | 		else:
51 | 			cur.execute('update words set freq = freq + 1 where id = ?', row)
52 | 		
53 | con.commit()
54 | con.close()


--------------------------------------------------------------------------------
/src/dep.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | '''
  3 | Created on Nov 22, 2011
  4 | 
  5 | @author: alexpak
  6 | '''
  7 | 
  8 | import sys
  9 | import ml
 10 | import math
 11 | from ml.svm import SVM as Classifier
 12 | #from ml.nb import NaiveBayes as Classifier
 13 | from collections import Counter, OrderedDict
 14 | import sqlite3
 15 | import os
 16 | import syntagrus
 17 | 
 18 | features = {'m', 'f', 'n', 'nom', 'gen', 'gen2', 'dat', 'acc', 'ins', 'prep', 'loc', 'sg', 'pl', 'real', 'inf', 'advp', 'adjp', 'imp', 'pass', '1p', '2p', '3p'}
 19 | 
 20 | class Linker:
 21 | 	def __init__(self):
 22 | 		self._cl = Classifier()
 23 | 	
 24 | 	def traverse(self, sentences):
 25 | 		x = []
 26 | 		y = []
 27 | 		for sentence in sentences:
 28 | 			for w in range(0, len(sentence)):
 29 | 				word_from = sentence[w]
 30 | 				feats = {}
 31 | 
 32 | #				meta1 = word_from[1].pos + '_'.join(sorted(word_from[1].feat & features))
 33 | #				feats['f:' + meta1] = 1
 34 | 				
 35 | 				for feat in word_from[1].feat & features:
 36 | 					feats['f:' + feat] = 1
 37 | 				feats['fp:' + word_from[1].pos] = 1
 38 | 				feats['fw:' + word_from[0]] = 1
 39 | 				
 40 | 				for v in range(0, len(sentence)):
 41 | 					if v == w:
 42 | 						continue
 43 | 					
 44 | 					word_to = sentence[v]
 45 | 						
 46 | 					feats2 = feats.copy()
 47 | #					meta2 = word_to[1].pos + '_'.join(sorted(word_to[1].feat & features))
 48 | #					feats['t:' + meta2] = 1
 49 | 					
 50 | 					for feat in word_to[1].feat & features:
 51 | 						feats2['t:' + feat] = 1
 52 | 					for feat in word_from[1].feat & word_to[1].feat:
 53 | 						feats2['c:' + feat] = 1
 54 | 					feats2['tp:' + word_to[1].pos] = 1
 55 | 					feats2['tw:' + word_to[0]] = 1
 56 | 					feats2['dst'] = float(w - v)
 57 | 					
 58 | 					'''
 59 | 					for i in range(1, 3):
 60 | 						u = v - i
 61 | 						if u > 0 or u != w:
 62 | 							continue
 63 | 						word_prev = sentence[u]
 64 | 						for feat in word_prev[1].feat:
 65 | 							feats2[str(i) + 'p:' + feat] = 1
 66 | #						for feat in word_from[1].feat & word_prev[1].feat:
 67 | #							feats2[str(i) + 'pfc:' + feat] = 1
 68 | #						for feat in word_to[1].feat & word_prev[1].feat:
 69 | #							feats2[str(i) + 'ptc:' + feat] = 1
 70 | 						feats2[str(i) + 'pp:' + word_prev[1].pos] = 1
 71 | 						#feats2[str(i) + 'pw:' + word_prev[0]] = 1
 72 | 					'''
 73 | 						
 74 | 					
 75 | #					if word_from[1].dom != word_to[1].id:
 76 | #						continue
 77 | 					
 78 | 					x.append(feats2)
 79 | #					y.append(word_from[1].link if word_from[1].dom == word_to[1].id else 'none')
 80 | 					y.append(int(word_from[1].dom == word_to[1].id))
 81 | 					
 82 | 			#endfor w
 83 | 		#endfor sentence
 84 | 		return (x, y)
 85 | 	
 86 | 	def train(self, sentences):
 87 | 		x, y = self.traverse(sentences)
 88 | 		self._cl.train_regression(x, y)
 89 | 
 90 | 	def predict(self, sentences):
 91 | 		(test_x, test_y) = self.traverse(sentences)
 92 | 		return (self._cl.predict(test_x), test_y)
 93 | 	
 94 | 	def evaluate_bin(self, gold, test):
 95 | 		tp = 0; fp = 0; tn = 0; fn = 0
 96 | 		
 97 | 		for i in range(0, len(gold)):
 98 | 			if gold[i] != 'none':
 99 | 				if test[i] == gold[i]:
100 | 					tp += 1
101 | 				else:
102 | 					fn += 1
103 | 			else:
104 | 				if test[i] == gold[i]:
105 | 					tn += 1
106 | 				else:
107 | 					fp += 1
108 | 				
109 | 		
110 | 		acc = (tp + tn) / (tp + fp + tn + fn) if tp + fp + tn + fn else 0
111 | 		pr = tp / (tp + fp) if tp + fp else 0
112 | 		rec = tp / (tp + fn) if tp + fn else 0
113 | 		f1 = 2 * (pr * rec) / (pr + rec) if pr + rec else 0
114 | 		
115 | 		return (acc, pr, rec, f1)
116 | 	
117 | 	def evaluate_mul(self, gold, test):
118 | 		tp = 0; fp = 0; tn = 0; fn = 0; cl = 0
119 | 		
120 | 		for i in range(0, len(gold)):
121 | 			if gold[i] != 'none':
122 | 				if test[i] != 'none':
123 | 					tp += 1
124 | 				else:
125 | 					fn += 1
126 | 					
127 | 				if test[i] == gold[i]:
128 | 					cl += 1
129 | 					
130 | 			else:
131 | 				if test[i] == 'none':
132 | 					tn += 1
133 | 				else:
134 | 					fp += 1
135 | 		
136 | 		acc = cl / (tp + fn) if tp + fn else 0
137 | 		pr = tp / (tp + fp) if tp + fp else 0
138 | 		rec = tp / (tp + fn) if tp + fn else 0
139 | 		f1 = 2 * (pr * rec) / (pr + rec) if pr + rec else 0
140 | 		
141 | 		return (acc, pr, rec, f1)
142 | 	
143 | 	def test(self, sentences):
144 | 		(estim_y, test_y) = self.predict(sentences)
145 | 		print(Counter(test_y))
146 | 		print(Counter(estim_y))
147 | 		return self.evaluate_mul(test_y, estim_y)
148 | 	
149 | 	def save(self, path):
150 | 		self._cl.save(path)
151 | 		
152 | 	@staticmethod
153 | 	def load(path):
154 | 		obj = Linker()
155 | 		obj._cl = ml.Classifier.load(path)
156 | 		return obj
157 | 	
158 | def print_table(data, outfile = sys.stdout, maxlen = {}):
159 | 	vsep = '|'
160 | 	endl = '\n'
161 | 	s = ''
162 | 	
163 | 	keys = []
164 | 	maxkey = 0
165 | 	for rowkey, row in data.items():
166 | 		l = len(str(rowkey))
167 | 		if l > maxkey:
168 | 			maxkey = l
169 | 		for key in row:
170 | 			if key not in keys:
171 | 				keys.append(key)
172 | 			l = len(str(row[key]))
173 | 			if key not in maxlen or l > maxlen[key]:
174 | 				maxlen[key] = l
175 | 		
176 | 	for key in keys:
177 | 		l = len(str(key))
178 | 		if l > maxlen[key]:
179 | 			maxlen[key] = l
180 | 		if maxlen[key] < 3:
181 | 			maxlen[key] = 3
182 | 			
183 | 	hline = '+' + '-' * maxkey + '+' + '+'.join(['-' * maxlen[key] for key in keys]) + '+'
184 | 
185 | 	s += endl + hline + endl
186 | 	s += vsep
187 | 	s += ' ' * maxkey
188 | 	s += vsep
189 | 	s += vsep.join([str(key).ljust(maxlen[key]) for key in keys])
190 | 	s += vsep
191 | 	s += endl + hline + endl
192 | 	
193 | 	for rowkey, row in data.items():
194 | 		s += vsep
195 | 		s += str(rowkey).ljust(maxkey)
196 | 		s += vsep
197 | 		s += vsep.join([str(row[key] if key in row else '').rjust(maxlen[key]) for key in keys])
198 | 		s += vsep
199 | 		s += endl + hline + endl
200 | 			
201 | 	print(s, file=outfile)
202 | 	return maxlen
203 | 	
204 | class Parser:
205 | 	def __init__(self, linker):
206 | 		self._linker = linker
207 | 		
208 | 	def parse(self, sentence):
209 | 		table_estim = OrderedDict()
210 | 		table_true = OrderedDict()
211 | 		rowwords = set()
212 | 		
213 | 		con = sqlite3.connect('tmp/links')
214 | 		cur = con.cursor()
215 | 		
216 | 		prep = False
217 | 		for w in range(0, len(sentence)):
218 | 			source = sentence[w]
219 | 			source_word = source[0]
220 | 			
221 | 			if source_word in rowwords:
222 | 				source_word += '-' + str(source[1].id)
223 | 			else:
224 | 				rowwords.add(source_word)
225 | 				
226 | 			table_estim[source_word] = OrderedDict()
227 | 			table_true[source_word] = OrderedDict()
228 | 			
229 | 			source_feat = ' '.join([source[1].pos] + sorted(source[1].feat))
230 | 			
231 | 			# root
232 | 			target_word = '_root'
233 | 			if source[1].pos == 'PR':
234 | 				prep = True
235 | 				cur.execute('select sum(freq) from links where ffeat = ? and fword = ? and root', (source_feat, source_word))
236 | 			else:
237 | 				cur.execute('select sum(freq) from links where ffeat = ? and root', (source_feat, ))
238 | 				
239 | 			table_estim[source_word][target_word] = cur.fetchone()[0] or 0
240 | 			table_true[source_word][target_word] = 'root' if source[1].dom == 0 else ''
241 | 			
242 | 			colwords = set()
243 | 			no = False
244 | 			for v in range(0, len(sentence)):
245 | 				target = sentence[v]
246 | 				target_word = target[0]
247 | 				if target_word in colwords:
248 | 					target_word += '-' + str(target[1].id)
249 | 				else:
250 | 					colwords.add(target_word)
251 | 					
252 | 				target_feat = ' '.join([target[1].pos] + sorted(target[1].feat))
253 | 				if target[1].pos == 'CONJ' or (target[1].pos == 'PR' and source[1].pos in {'S', 'ADV', 'ADJ'}):
254 | 					cur.execute('select sum(freq) from links where ffeat = ? and tfeat = ? and tword = ?', (source_feat, target_feat, target_word))
255 | 				elif source[1].pos == 'CONJ' or (source[1].pos == 'PR' and target[1].pos in {'S', 'ADV', 'ADJ'}):
256 | 					cur.execute('select sum(freq) from links where ffeat = ? and fword = ? and tfeat = ?', (source_feat, source_word, target_feat))
257 | 				else:
258 | 					cur.execute('select sum(freq) from links where ffeat = ? and tfeat = ?', (source_feat, target_feat))
259 | #				table_estim[word_from][word_to] = '.' if v == w else round((cur.fetchone()[0] or 0) / (math.log(abs(v - w) + 2)))
260 | 				freq = cur.fetchone()[0] or 0
261 | 					
262 | 				if source[1].pos == 'S' and target[1].pos == 'PR' and w > v and prep:
263 | 					freq = 9999
264 | 					prep = False
265 | 
266 | 				if source_word == 'не' and w < v and not no:
267 | 					freq = 9999
268 | 					no = True
269 | 
270 | 				table_estim[source_word][target_word] = '.' if v == w else freq
271 | 				table_true[source_word][target_word] = '.' if v == w else source[1].link if source[1].dom == target[1].id else ''
272 | #				table_estim[word_from][word_to] = '.' if v == w else 'x' if estim_y[i] else ''
273 | #				table_true[word_from][word_to] = '.' if v == w else 'x' if test_y[i] else ''
274 | 
275 | 		maxlen = print_table(table_true)
276 | 		print_table(table_estim, maxlen = maxlen)
277 | #		'''
278 | 		for rowkey, row in table_estim.items():
279 | 			maxval = max([int(val) if val != '.' else 0 for val in list(row.values())[1:]])
280 | 			for key, val in row.items():
281 | 				if key == '_root':
282 | 					continue
283 | 				if val == '.':
284 | 					continue
285 | 				if val < maxval:
286 | 					table_estim[rowkey][key] = ''
287 | #		'''	
288 | 		print_table(table_estim, maxlen = maxlen)
289 | 		
290 | 	def parse0(self, sentence):
291 | 		estim_y, test_y = self._linker.predict([sentence])
292 | 		i = 0
293 | 		table_estim = OrderedDict()
294 | 		table_true = OrderedDict()
295 | 		rowwords = set()
296 | 		for w in range(0, len(sentence)):
297 | 			word_from = sentence[w][0]
298 | 			if word_from in rowwords:
299 | 				word_from += '-' + str(sentence[w][1].id)
300 | 			else:
301 | 				rowwords.add(word_from)
302 | 				
303 | 			table_estim[word_from] = OrderedDict()
304 | 			table_true[word_from] = OrderedDict()
305 | 			colwords = set()
306 | 
307 | 			for v in range(0, len(sentence)):
308 | 				word_to = sentence[v][0]
309 | 				if word_to in colwords:
310 | 					word_to += '-' + str(sentence[v][1].id)
311 | 				else:
312 | 					colwords.add(word_to)
313 | #				table_estim[word_from][word_to] = '.' if v == w else round(estim_y[i], 5) if estim_y[i] != 'none' else ''
314 | #				table_true[word_from][word_to] = '.' if v == w else test_y[i] if test_y[i] != 'none' else ''
315 | 				table_estim[word_from][word_to] = '.' if v == w else estim_y[i] if estim_y[i] else ''
316 | 				table_true[word_from][word_to] = '.' if v == w else 'x' if test_y[i] else ''
317 | 				i += v != w
318 | 				
319 | 		maxlen = print_table(table_true)
320 | 		print_table(table_estim, maxlen = maxlen)
321 | 		
322 | genders = {'m', 'f', 'n'}
323 | cases = {'nom', 'gen', 'dat', 'acc', 'ins', 'prep', 'gen2', 'loc'}
324 | animacy = {'anim', 'inan'}
325 | number = {'sg', 'pl'}
326 | person = {'1p', '2p', '3p'}
327 | vtypes = {'perf', 'imperf'}
328 | vmood = {'real', 'imp', 'pass'}
329 | vform = {'inf', 'advj', 'advp'}
330 | tenses = {'pst', 'npst', 'prs'}
331 | degree = {'comp', 'supl'}
332 | 
333 | class Links:
334 | 	def __init__(self, dbname):
335 | 		self.dbname = dbname
336 | 		db_exists = os.path.isfile(dbname)
337 | 		self.con = sqlite3.connect(dbname)
338 | 		self.cur = self.con.cursor()
339 | 		
340 | 		if not db_exists:
341 | 			self.create_db()		
342 | 	
343 | 	def create_db(self):
344 | 		sql = '''
345 |         create table links(
346 |         	id integer primary key autoincrement,
347 |         	name text,
348 | 
349 | 			fword text,
350 | 			ffeat text,
351 |         	fpos text,
352 | 			fnum text,
353 |         	fgen text,
354 | 			fcase text,
355 | 			fpers text,
356 |         	fanim text,
357 |         	ftype text,
358 |         	fmood text,
359 |         	ftens text,
360 |         	fdegr text,
361 | 
362 | 			tword text,
363 | 			tfeat text,
364 | 			tpos text,
365 | 			tnum text,
366 | 			tgen text,
367 | 			tcase text,
368 | 			tpers text,
369 | 			tanim text,
370 | 			ttype text,
371 | 			tmood text,
372 | 			ttens text,
373 | 			tdegr text,
374 | 
375 | 			root integer,
376 |         	freq integer,
377 |         	dist integer
378 |         );
379 |         create index links_info on links(name, fword, tword, root, ffeat, tfeat, dist);
380 |         create index links_info2 on links(ffeat, tfeat);
381 |         create index links_info3 on links(ffeat, root);
382 |         create index links_info4 on links(ffeat, fword, root);
383 |         create index links_info5 on links(ffeat, fword, tfeat);
384 |         create index links_info6 on links(ffeat, tfeat, tword);
385 |         '''
386 |         
387 | 		sql0 = '''
388 |         create table links(
389 |         	id integer primary key autoincrement,
390 |         	name text,
391 | 
392 | 			fword text,
393 | 			ffeat text,
394 | 
395 | 			tword text,
396 | 			tfeat text,
397 | 
398 | 			root integer,
399 |         	freq integer
400 |         );
401 |         create index links_info on links(name, fword, tword, root, ffeat, tfeat);
402 |         '''
403 | 		[self.cur.execute(st) for st in sql.split(';') if len(st.strip())]
404 | 		
405 | 	def index(self, sentences):
406 | 		for sentence in sentences:
407 | 			for word_from in sentence:
408 | 				is_root = 0
409 | 				if word_from[1].dom:
410 | 					word_to = sentence[word_from[1].dom - 1]
411 | 				else:
412 | 					word_to = ('', syntagrus.word_t(lemma='', pos='', dom='', link='root', id=0, feat=set()))
413 | 					is_root = 1
414 | 				from_feat = ' '.join([word_from[1].pos] + sorted(word_from[1].feat))
415 | 				to_feat = ' '.join([word_to[1].pos] + sorted(word_to[1].feat))
416 | 			
417 | 				fpos = word_from[1].pos
418 | 				fnum = (number & word_from[1].feat or {None}).pop()
419 | 				fgen = (genders & word_from[1].feat or {None}).pop()
420 | 				fcase = (cases & word_from[1].feat or {None}).pop()
421 | 				fpers = (person & word_from[1].feat or {None}).pop()
422 | 				fanim = (animacy & word_from[1].feat or {None}).pop()
423 | 				ftype = (vtypes & word_from[1].feat or {None}).pop()
424 | 				fmood = (vmood & word_from[1].feat or {None}).pop()
425 | 				ftens = (tenses & word_from[1].feat or {None}).pop()
426 | 				fdegr = (degree & word_from[1].feat or {None}).pop()
427 | 				
428 | 				tpos = word_to[1].pos
429 | 				tnum = (number & word_to[1].feat or {None}).pop()
430 | 				tgen = (genders & word_to[1].feat or {None}).pop()
431 | 				tcase = (cases & word_to[1].feat or {None}).pop()
432 | 				tpers = (person & word_to[1].feat or {None}).pop()
433 | 				tanim = (animacy & word_to[1].feat or {None}).pop()
434 | 				ttype = (vtypes & word_to[1].feat or {None}).pop()
435 | 				tmood = (vmood & word_to[1].feat or {None}).pop()
436 | 				ttens = (tenses & word_to[1].feat or {None}).pop()
437 | 				tdegr = (degree & word_to[1].feat or {None}).pop()
438 | 				
439 | 				dist = word_to[1].id - word_from[1].id
440 | 				
441 | 				self.cur.execute('select id from links where name = ? and fword = ? and tword = ? and root = ? and ffeat = ? and tfeat = ? and dist = ?', (word_from[1].link, word_from[0].lower(), word_to[0].lower(), is_root, from_feat, to_feat, dist))
442 | 				row = self.cur.fetchone()
443 | 				if row is None:
444 | 					sql = '''
445 | 					insert into links (name, fword, tword, root, ffeat, tfeat, freq, dist,
446 | 					fpos, fnum, fgen, fcase, fpers, fanim, ftype, fmood, ftens, fdegr, 
447 | 					tpos, tnum, tgen, tcase, tpers, tanim, ttype, tmood, ttens, tdegr
448 | 					) values (?, ?, ?, ?, ?, ?, 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
449 | 					'''
450 | 					self.cur.execute(sql, (word_from[1].link, word_from[0].lower(), word_to[0].lower(), is_root, from_feat, to_feat, dist, fpos, fnum, fgen, fcase, fpers, fanim, ftype, fmood, ftens, fdegr, tpos, tnum, tgen, tcase, tpers, tanim, ttype, tmood, ttens, tdegr))
451 | 				else:
452 | 					self.cur.execute('update links set freq = freq + 1 where id = ?', row)
453 | 					
454 | 	def close(self):
455 | 		self.con.commit()
456 | 		self.con.close()
457 | 		
458 | if __name__ == '__main__':
459 | 	import glob
460 | 	from optparse import OptionParser
461 | 	import syntagrus
462 | 	
463 | 	parser = OptionParser()
464 | 	parser.usage = '%prog [options]'
465 | 	
466 | 	(options, args) = parser.parse_args()
467 | 	
468 | 	if not len(args):
469 | 		files = glob.glob('res/*/*/*.tgt')
470 | 		corpus = []
471 | 		for file in files[0:10]:
472 | 			R = syntagrus.Reader()
473 | 			sentences = R.read(file)
474 | 			corpus.extend(sentences)
475 | 			del(R)
476 | 
477 | 		fold_size = round(len(corpus) / 10)
478 | 		
479 | 		train_set = corpus[0:-fold_size]
480 | 		test_set = corpus[-fold_size:]
481 | 		
482 | 		print('{0} sentences'.format(len(corpus)))
483 | 		
484 | 		del(corpus)
485 | 
486 | 		'''
487 | 				
488 | 		L = Links('tmp/links')
489 | 		L.index(train_set)
490 | 		L.close()
491 | 		exit()
492 | 		'''
493 | 		
494 | 		'''
495 | 		
496 | 		L = Linker()
497 | 		L.train(train_set)	
498 | #		results = L.test(test_set)
499 | #		print('Accuracy = {0[0]:.3f}, precision = {0[1]:.3f}, recall = {0[2]:.3f}, F1 = {0[3]:.3f}'.format(results))
500 | 		'''
501 | 		L = None
502 | 
503 | 		P = Parser(L)
504 | 		example = test_set[6] #6
505 | 		for word in example:
506 | 			print(word)
507 | 		P.parse(example)
508 | 


--------------------------------------------------------------------------------
/src/liblinear.patch:
--------------------------------------------------------------------------------
 1 | --- /home/alexpak/tools/liblinear-1.8/python/liblinearutil.py	2011-08-04 00:58:24.017288401 +0200
 2 | +++ /home/alexpak/tools/tmp/liblinear-1.8/python/liblinearutil.py	2011-03-05 09:46:26.000000000 +0100
 3 | @@ -30,7 +30,7 @@
 4 |  	
 5 |  	Load a LIBLINEAR model from model_file_name and return.
 6 |  	"""
 7 | -	model = liblinear.load_model(model_file_name.encode('utf-8'))
 8 | +	model = liblinear.load_model(model_file_name)
 9 |  	if not model: 
10 |  		print("can't open model file %s" % model_file_name)
11 |  		return None
12 | @@ -43,7 +43,7 @@
13 |  
14 |  	Save a LIBLINEAR model to the file model_file_name.
15 |  	"""
16 | -	liblinear.save_model(model_file_name.encode('utf-8'), model)
17 | +	liblinear.save_model(model_file_name, model)
18 |  
19 |  def evaluations(ty, pv):
20 |  	"""
21 | @@ -204,7 +204,7 @@
22 |  		y = [0] * len(x)
23 |  	ACC = evaluations(y, pred_labels)
24 |  	l = len(y)
25 | -	#print("Accuracy = %g%% (%d/%d)" % (ACC, int(l*ACC//100), l))
26 | +	print("Accuracy = %g%% (%d/%d)" % (ACC, int(l*ACC//100), l))
27 |  
28 |  	return pred_labels, ACC, pred_values
29 |  	
30 | 


--------------------------------------------------------------------------------
/src/ml/__init__.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | class Classifier:
 4 | 	def train(self, x, y):
 5 | 		pass
 6 | 	
 7 | 	def predict(self, x):
 8 | 		pass
 9 | 	
10 | 	def evaluate(self, gold, test):
11 | 		tp = 0; fp = 0
12 | 		
13 | 		for i in range(0, len(gold)):
14 | 			if test[i] == gold[i]:
15 | 				tp += 1
16 | 			else:
17 | 				fp += 1
18 | 		
19 | 		acc = tp / (tp + fp) if tp + fp else 0
20 | 		
21 | 		return (acc, )
22 | 	
23 | 	def evaluate_bin(self, gold, test, true_class):
24 | 		tp = 0; fp = 0; tn = 0; fn = 0
25 | 		
26 | 		for i in range(0, len(gold)):
27 | 			if gold[i] == true_class:
28 | 				if test[i] == gold[i]:
29 | 					tp += 1
30 | 				else:
31 | 					fn += 1
32 | 			else:
33 | 				if test[i] == gold[i]:
34 | 					tn += 1
35 | 				else:
36 | 					fp += 1
37 | 
38 | 		acc = (tp + tn) / (tp + fp + tn + fn) if tp + fp + tn + fn else 0
39 | 		pr = tp / (tp + fp) if tp + fp else 0
40 | 		rec = tp / (tp + fn) if tp + fn else 0
41 | 		prn = tn / (tn + fn) if tn + fn else 0
42 | 		f1 = 2 * (pr * rec) / (pr + rec) if pr + rec else 0
43 | 		
44 | 		return (acc, pr, rec, f1, prn)
45 | 
46 | 	
47 | 	def save(self, path):
48 | 		f = open(path, 'wb')
49 | 		pickle.dump(self, f)
50 | 		f.close()
51 | 	
52 | 	@staticmethod
53 | 	def load(path):
54 | 		f = open(path, 'rb')
55 | 		obj = pickle.load(f)
56 | 		f.close()
57 | 		return obj
58 | 	
59 | class Autoincrement:
60 | 	def __init__(self):
61 | 		self._ids = {}
62 | 		self._inv = {}
63 | 		
64 | 	def setId(self, val):
65 | 		if val not in self._ids:
66 | 			self._ids[val] = len(self._ids) + 1
67 | 			self._inv[self._ids[val]] = val
68 | 			
69 | 		return self._ids[val]
70 | 	
71 | 	def getId(self, val):
72 | 		return self._ids[val] if val in self._ids else 0
73 | 	
74 | 	def getVal(self, id):
75 | 		return self._inv[id] if id in self._inv else None
76 | 	
77 | 	def count(self):
78 | 		return len(self._ids)
79 | 	
80 | class FeatureSpace:
81 | 	def __init__(self):
82 | 		self.featureset = {}
83 | 		self.start = 0
84 | 		self.default_size = int(1e5)
85 | 		
86 | 	def add(self, featureset, size = None):
87 | 		if size is None:
88 | 			size = self.default_size
89 | 			
90 | 		for (feature, value) in featureset.items():
91 | 			self.featureset[feature + self.start] = value
92 | 		
93 | 		self.start += size


--------------------------------------------------------------------------------
/src/ml/nb.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on Aug 17, 2011
  3 | 
  4 | @author: alexpak
  5 | '''
  6 | 
  7 | import math
  8 | from collections import defaultdict
  9 | from .. import ml
 10 | 
 11 | class NaiveBayes(ml.Classifier):
 12 | 	def __init__(self):
 13 | 		pass
 14 | 	
 15 | 	def __repr__(self):
 16 | 		return 'NaiveBayes'
 17 | 	
 18 | 	def _gaus(self, i, mean, var):
 19 | 		return (1 / math.sqrt(2 * math.pi * var) * math.exp(- (i - mean) ** 2 / (2 * var))) if var > 0 else float(1 if i == mean else 0)
 20 | 			
 21 | 
 22 | 	def _prob(self, C, dim, val):
 23 | 		p = 0
 24 | 		if dim in self._P[C]:
 25 | 			p = self._P[C][dim]
 26 | 		elif dim in self._F[C]:
 27 | 			p = self._gaus(val, self._F[C][dim][0], self._F[C][dim][1])
 28 | 			
 29 | 		return p
 30 | 	
 31 | 	def train(self, x, y):
 32 | 		data = defaultdict(list)
 33 | 		labels = set()
 34 | 		discrete_features = set()
 35 | 		numeric_features = set()
 36 | 		i = 0
 37 | 		for C in y:
 38 | 			labels.add(C)
 39 | 			data[C].append(x[i])
 40 | 			for dim in x[i]:
 41 | 				if isinstance(x[i][dim], float): 
 42 | 					numeric_features.add(dim)
 43 | 				else:
 44 | 					discrete_features.add(dim)
 45 | 			i += 1
 46 | 		
 47 | 		ndim = len(discrete_features)
 48 | 
 49 | 		# train discrete features
 50 | 		P = {}
 51 | 		for C in data:
 52 | 			count = defaultdict(int)
 53 | 			total = 0
 54 | 			for sample in data[C]:
 55 | 				for dim, val in sample.items():
 56 | 					if dim in discrete_features:
 57 | 						count[dim] += val
 58 | 						total += val
 59 | 			
 60 | 			P[C] = {}
 61 | 			for dim in discrete_features:
 62 | 				P[C][dim] = (1 + count[dim]) / (ndim + total)
 63 | 		self._P = P
 64 | 				
 65 | 		# train numeric features
 66 | 		F = {}
 67 | 		for C in data:
 68 | 			F[C] = {}
 69 | 			n = 0
 70 | 			for dim in numeric_features:
 71 | 				n += 1
 72 | 				mean = 0; var = 0; N = 0
 73 | 				
 74 | 				# calculate mean and length
 75 | 				for sample in data[C]:
 76 | 					mean += sample[dim] if dim in sample else 0
 77 | 					N += 1
 78 | 				mean /= N
 79 | 				
 80 | 				# calculate variance
 81 | 				for sample in data[C]:
 82 | 					var += (mean - (sample[dim] if dim in sample else 0)) ** 2
 83 | 				var /= (N - 1) if N > 1 else N
 84 | 				
 85 | 				F[C][dim] = (mean, var)
 86 | 								
 87 | 		self._F = F
 88 | 	
 89 | 	def predict(self, x, return_likelihood = False):
 90 | 		y = []
 91 | 		for sample in x:
 92 | 			L = defaultdict(float)
 93 | 			for C in self._P:
 94 | 				for dim in sample:
 95 | 					L[C] += math.log(self._prob(C, dim, sample[dim]) or 1)
 96 | 					
 97 | #			y.append(max(L.keys(), key = lambda i: L[i]) if len(L) else next(iter(self._P)))
 98 | 			if return_likelihood:
 99 | 				y.append(L)
100 | 			else:
101 | 				y.append(max(L.keys(), key = lambda i: L[i]) if len(L) else None)
102 | 			
103 | 		return y
104 | 	
105 | 


--------------------------------------------------------------------------------
/src/ml/nn.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on Aug 18, 2011
  3 | 
  4 | @author: alexpak
  5 | '''
  6 | 
  7 | import ml
  8 | import math
  9 | import random
 10 | from collections import defaultdict
 11 | 
 12 | random.seed()
 13 | 
 14 | class Perceptron(ml.Classifier):
 15 | 	def __init__(self, Nh):
 16 | 		self.Nh = Nh
 17 | 		
 18 | 		self._labels = ml.Autoincrement()
 19 | 		self._features = ml.Autoincrement()
 20 | 		
 21 | 	def _init(self, Ni, Nh, No):
 22 | 		self.momentum = 0.9
 23 | 		self.learn_rate = 0.5
 24 | 		
 25 | 		self._Wh = [[self._seed() for _ in range(0, Ni)] for __ in range(0, Nh)]
 26 | 		self._Wo = [[self._seed() for _ in range(0, Nh)] for __ in range(0, No)]
 27 | 		
 28 | 		self._dWh = [[0] * Ni] * Nh
 29 | 		self._dWo = [[0] * Nh] * No
 30 | 
 31 | 	def get_class_id(self, C):
 32 | 		if C not in self._class_ids:
 33 | 			self._class_ids[C] = len(self._class_ids)
 34 | 		
 35 | 	def _seed(self):
 36 | 		return (random.random() - 0.5)
 37 | 		
 38 | 	def _sigmod(self, x):
 39 | 		return x
 40 | 		return 1 / (1 + math.exp(-x))
 41 | 	
 42 | 	def _calc_layer(self, input, W):
 43 | 		output = []
 44 | 		for i in range(0, len(W)):
 45 | 			s = 0
 46 | 			for j in range(0, len(W[i])):
 47 | 				s += W[i][j] * input[j]
 48 | 			output.append(self._sigmod(s))
 49 | 			
 50 | 		return output
 51 | 
 52 | 	def _propagate(self, input):
 53 | 		self._pi = input
 54 | 		self._ph = self._calc_layer(self._pi, self._Wh)
 55 | 		self._po = self._calc_layer(self._ph, self._Wo)
 56 | 		return self._po
 57 | 	
 58 | 	def _backpropagate(self, output):
 59 | 		# delta's for output layer 
 60 | 		do = []
 61 | 		for i in range(0, len(self._Wo)):
 62 | 			print(output[i], self._po[i])
 63 | 			do.append(self._po[i] * (1 - self._po[i]) * (output[i] - self._po[i]))
 64 | #		print(do)
 65 | 			
 66 | 		# correct output layer weights
 67 | 		for i in range(0, len(self._Wo)):
 68 | 			for j in range(0, len(self._Wo[i])):
 69 | 				self._dWo[i][j] = self.momentum * self._dWo[i][j] + (1 - self.momentum) * self.learn_rate * do[i] * self._ph[j]
 70 | 				self._Wo[i][j] += self._dWo[i][j]
 71 | 				
 72 | 		# delta's for hidden layer
 73 | 		dh = []
 74 | 		for i in range(0, len(self._Wh)):
 75 | 			d = 0
 76 | 			for j in range(0, len(self._Wo)):
 77 | 				d += do[j] * self._Wo[j][i]
 78 | 			d *= self._ph[i] * (1 - self._ph[i])
 79 | 			dh.append(d)
 80 | #		print(dh)
 81 | 			
 82 | 		# correct hidden layer weights
 83 | 		for i in range(0, len(self._Wh)):
 84 | 			for j in range(0, len(self._Wh[i])):
 85 | 				self._dWh[i][j] = self.momentum * self._dWh[i][j] + (1 - self.momentum) * self.learn_rate * dh[i] * self._pi[j]
 86 | 				self._Wh[i][j] += self._dWh[i][j]
 87 | 
 88 | 		print(self._Wo)
 89 | 		print(self._Wh)
 90 | 		print()
 91 | 	
 92 | 	def train(self, x, y):
 93 | 		labels = [self._labels.setId(C) for C in y]
 94 | 		data = []
 95 | 		for sample in x:
 96 | 			data.append(defaultdict(float, [(self._features.setId(d), sample[d]) for d in sample]))
 97 | 			
 98 | 		self._init(self._features.count(), self.Nh, self._labels.count())
 99 | 
100 | 		epsilon = 1e-3
101 | 		for epoch in range(1, 10):
102 | 			i = 0
103 | 			error = 0
104 | 			for sample in data:
105 | 				output = self._propagate(sample)
106 | 				target = defaultdict(float)
107 | 				target[labels[i] - 1] = 1
108 | 				self._backpropagate(target)
109 | 				for j in range(0, len(output)):
110 | 					error += (output[j] - target[j]) ** 2
111 | 					
112 | 				i += 1
113 | 			
114 | 			print(error)
115 | 			print()
116 | 			if error < epsilon:
117 | 				break
118 | 			
119 | 	def predict(self, x):
120 | 		y = []
121 | 		for sample in x:
122 | 			output = self._propagate(defaultdict(float, [(self._features.getId(d), sample[d]) for d in sample]))
123 | 			which_max = max(range(0, len(output)), key = lambda i: output[i])
124 | 			y.append(self._labels.getVal(which_max + 1))
125 | 			
126 | 		return y


--------------------------------------------------------------------------------
/src/ml/svm.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on Aug 18, 2011
 3 | 
 4 | @author: alexpak
 5 | '''
 6 | import sys
 7 | sys.path.append('/home/alexpak/tools/liblinear-1.8/python/')
 8 | 
 9 | import liblinearutil as liblinear
10 | from .. import ml
11 | import pickle
12 | 
13 | class SVM(ml.Classifier):
14 | 	def __init__(self):
15 | 		self._labels = ml.Autoincrement()
16 | 		self._features = ml.Autoincrement()
17 | 		self._regression = False
18 | 
19 | 	def __repr__(self):
20 | 		return 'SVM'
21 | 	
22 | 	def save(self, path):
23 | 		liblinear.save_model(path + '-model', self._model)
24 | 		del(self._model)
25 | 		ml.Classifier.save(self, path)
26 | 		
27 | 	@staticmethod
28 | 	def load(path):
29 | 		obj = ml.Classifier.load(path)
30 | 		obj._model = liblinear.load_model(path + '-model')
31 | 		return obj
32 | 	
33 | 	def train(self, x, y, biased = False):
34 | 		data = []
35 | 		for sample in x:
36 | 			data.append(dict([(self._features.setId(d), sample[d]) for d in sample]))
37 | 			
38 | 		labels = [self._labels.setId(C) for C in y]
39 | 		if self._labels.count() == 2:
40 | 			labels = [1 if label == 1 else -1 for label in labels]
41 | 			param = liblinear.parameter('-c 1 -s 2 -q' + (' -B {0}'.format(biased) if biased else ''))
42 | 		else:
43 | 			param = liblinear.parameter('-c 1 -s 4 -q' + (' -B {0}'.format(biased) if biased else ''))
44 | 		prob = liblinear.problem(labels, data)
45 | 		self._model = liblinear.train(prob, param)
46 | 		
47 | 	def train_regression(self, x, y):
48 | 		data = []
49 | 		for sample in x:
50 | 			data.append(dict([(self._features.setId(d), sample[d]) for d in sample]))
51 | 			
52 | 		self._regression = True
53 | 		param = liblinear.parameter('-c 1 -s 0')
54 | 		prob = liblinear.problem(y, data)
55 | 		self._model = liblinear.train(prob, param)
56 | 
57 | 	def predict(self, x):
58 | 		y = []
59 | 		for sample in x:
60 | 			data = dict([(self._features.getId(d), sample[d]) for d in sample if self._features.getId(d)])
61 | 			label, _, _ = liblinear.predict([0], [data], self._model, '')
62 | 			if self._regression:
63 | 				y.append(label[0])
64 | 			else:
65 | 				if self._labels.count() == 2:
66 | 					label[0] = 1 if label[0] == 1 else 2
67 | 				y.append(self._labels.getVal(label[0]))
68 | 			
69 | 		return y


--------------------------------------------------------------------------------
/src/morph.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | '''
  3 | Created on Nov 21, 2011
  4 | 
  5 | @author: alexpak
  6 | '''
  7 | 
  8 | import sys
  9 | 
 10 | from yatk import ml
 11 | from yatk.ml import svm
 12 | from yatk.ml.svm import SVM as Classifier
 13 | from collections import Counter
 14 | 
 15 | sys.modules['ml'] = ml
 16 | sys.modules['ml.svm'] = svm
 17 | 
 18 | def intersects_classes(classes):
 19 | 	return lambda w: (w[1].feat & classes).pop()
 20 | 
 21 | def intersects_classes_or_none(classes, none):
 22 | 	return lambda w: (w[1].feat & classes or {none}).pop()
 23 | 
 24 | def has_classes(pos, classes):
 25 | 	return lambda w: w[1].pos == pos and w[1].feat & classes
 26 | 
 27 | def pos_equals(pos):
 28 | 	return lambda w: w[1].pos == pos
 29 | 
 30 | def has_class(a_class):
 31 | 	return lambda w: int(a_class in w[1].feat)
 32 | 	
 33 | pos = {'S', 'A', 'V', 'ADV', 'NID', 'NUM', 'PR', 'PART', 'CONJ', 'COM', 'INTJ', 'P', 'UNK'}
 34 | 
 35 | genders = {'m', 'f', 'n'}
 36 | cases = {'nom', 'gen', 'dat', 'acc', 'ins', 'prep', 'gen2', 'loc'}
 37 | animacy = {'anim', 'inan'}
 38 | number = {'sg', 'pl'}
 39 | person = {'1p', '2p', '3p'}
 40 | vtypes = {'perf', 'imperf'}
 41 | vmood = {'real', 'imp', 'pass'}
 42 | vform = {'inf', 'advj', 'advp'}
 43 | tenses = {'pst', 'npst', 'prs'}
 44 | degree = {'comp', 'supl'}
 45 | 
 46 | cats = [
 47 | 	('pos', lambda w: True, lambda w: w[1].pos),
 48 | 	
 49 | 	('s-gender', has_classes('S', genders), intersects_classes(genders)),
 50 | 	('s-case', has_classes('S', cases), intersects_classes(cases)),
 51 | 	('s-animacy', has_classes('S', animacy), intersects_classes(animacy)),
 52 | 	('s-number', has_classes('S', number), intersects_classes(number)),
 53 | 
 54 | 	('v-form', pos_equals('V'), intersects_classes_or_none(vform, 'pers')),
 55 | 	('v-person', has_classes('V', person), intersects_classes(person)),
 56 | 	('v-number', has_classes('V', number), intersects_classes(number)),
 57 | 	('v-gender', has_classes('V', genders), intersects_classes(genders)),
 58 | 	('v-type', has_classes('V', vtypes), intersects_classes(vtypes)),
 59 | 	('v-tense', has_classes('V', tenses), intersects_classes(tenses)),
 60 | 	('v-mood', has_classes('V', vmood), intersects_classes(vmood)),
 61 | 
 62 | 	('vadj-number', has_classes('VADJ', number), intersects_classes(number)),
 63 | 	('vadj-gender', has_classes('VADJ', genders), intersects_classes(genders)),
 64 | 	('vadj-type', has_classes('VADJ', vtypes), intersects_classes(vtypes)),
 65 | 	('vadj-tense', has_classes('VADJ', tenses), intersects_classes(tenses)),
 66 | 	('vadj-mood', has_classes('VADJ', vmood), intersects_classes(vmood)),
 67 | 	('vadj-case', has_classes('VADJ', cases), intersects_classes(cases)),
 68 | 	
 69 | 	('a-gender', has_classes('A', genders), intersects_classes(genders)),
 70 | 	('a-case', has_classes('A', cases), intersects_classes(cases)),
 71 | 	('a-number', has_classes('A', number), intersects_classes(number)),
 72 | 	('a-degree', pos_equals('A'), intersects_classes_or_none(degree, 'ncomp')),
 73 | 	('a-short', pos_equals('A'), has_class('shrt')),
 74 | 	('a-animacy', has_classes('A', animacy), intersects_classes(animacy)),
 75 | 
 76 | 	('adv-comp', pos_equals('ADV'), intersects_classes_or_none(degree, 'ncomp')),
 77 | 	
 78 | 	('num-gender', has_classes('NUM', genders), intersects_classes(genders)),
 79 | 	('num-case', has_classes('NUM', cases), intersects_classes(cases)),
 80 | 	('num-number', has_classes('NUM', number), intersects_classes(number)),
 81 | 	('num-degree', pos_equals('NUM'), intersects_classes_or_none(degree, 'ncomp')),
 82 | ]
 83 | 
 84 | class Guesser:
 85 | 	def __init__(self):
 86 | 		self._cl = Classifier()
 87 | 		
 88 | 	def is_candidate(self, word):
 89 | 		return True
 90 | 		
 91 | 	def make_class(self, word):
 92 | 		pass
 93 | 		
 94 | 	def traverse(self, sentences):
 95 | 		x = []
 96 | 		y = []
 97 | 		for sentence in sentences:
 98 | 			for w in range(0, len(sentence)):
 99 | 				word = sentence[w]
100 | 				
101 | 				if not self.is_candidate(word):
102 | 					continue
103 | 				
104 | 				x.append(self.gen_features(sentence, w))
105 | 				y.append(self.make_class(word))
106 | 				
107 | 		return (x, y)
108 | 	
109 | 	def train(self, sentences):
110 | 		(train_x, train_y) = self.traverse(sentences)
111 | 		self._cl.train(train_x, train_y)
112 | 	
113 | 	def predict(self, sentences):
114 | 		(test_x, test_y) = self.traverse(sentences)
115 | 		return (self._cl.predict(test_x), test_y)
116 | 	
117 | 	def test(self, sentences):
118 | 		(estim_y, test_y) = self.predict(sentences)
119 | 		return self._cl.evaluate(test_y, estim_y)
120 | 	
121 | 	def guess(self, word):
122 | 		return self._cl.predict([self.gen_features([(word,)], 0)])[0]
123 | 	
124 | 	def gen_features(self, sentence, w):
125 | 		word = sentence[w][0]
126 | 		x = {}
127 | 		
128 | 		x['p3:' + word[0:3]] = 1
129 | 		x['p4:' + word[0:4]] = 1
130 | 		x['p5:' + word[0:5]] = 1
131 | 		x['p6:' + word[0:6]] = 1
132 | #		x['s1:' + word[-1:]] = 1
133 | 		x['s2:' + word[-2:]] = 1
134 | 		x['s3:' + word[-3:]] = 1
135 | 		x['s4:' + word[-4:]] = 1
136 | 		x['s5:' + word[-5:]] = 1
137 | 		x['w:' + word] = 1
138 | 		
139 | 		for i in range(1, 4):
140 | 			if w > i - 1:
141 | 				word = sentence[w - i][0]
142 | #				x[str(i) + 'p3:' + prev[0:3]] = 1
143 | #				x[str(i) + 'p4:' + prev[0:4]] = 1
144 | #				x[str(i) + 'p5:' + prev[0:5]] = 1
145 | #				x[str(i) + 'p6:' + prev[0:6]] = 1
146 | 		#		x['s1:' + word[-1:]] = 1
147 | 				x[str(i) + 's2:' + word[-2:]] = 1
148 | 				x[str(i) + 's3:' + word[-3:]] = 1
149 | 				x[str(i) + 's4:' + word[-4:]] = 1
150 | #				x[str(i) + 's5:' + prev[-5:]] = 1
151 | 				x[str(i) + 'w:' + word] = 1
152 | 				
153 | 		for i in range(1, 2):
154 | 			if w + i < len(sentence) - 1:
155 | 				word = sentence[w + i][0]
156 | #				x[str(i) + 'p3:' + prev[0:3]] = 1
157 | #				x[str(i) + 'p4:' + prev[0:4]] = 1
158 | #				x[str(i) + 'p5:' + prev[0:5]] = 1
159 | #				x[str(i) + 'p6:' + prev[0:6]] = 1
160 | 		#		x['s1:' + word[-1:]] = 1
161 | 				x[str(i) + '+s2:' + word[-2:]] = 1
162 | 				x[str(i) + '+s3:' + word[-3:]] = 1
163 | 				x[str(i) + '+s4:' + word[-4:]] = 1
164 | #				x[str(i) + 's5:' + prev[-5:]] = 1
165 | 				x[str(i) + '+w:' + word] = 1
166 | 		
167 | 		return x
168 | 	
169 | 	def save(self, path):
170 | 		self._cl.save(path)
171 | 		
172 | 	@staticmethod
173 | 	def load(path):
174 | 		obj = Guesser()
175 | 		obj._cl = Classifier.load(path)
176 | 		return obj
177 | 	
178 | class Tagger:
179 | 	def __init__(self):
180 | 		self._pos = Guesser.load('res/model/pos')
181 | 		self._guesser = {}
182 | 		for cat in cats:
183 | 			self._guesser[cat[0]] = Guesser.load('res/model/' + cat[0])
184 | 	
185 | 	def label(self, sentence):
186 | 		tagged = self._pos.predict([sentence])[0]
187 | 		feats = {}
188 | 		for cat, guesser in self._guesser.items():
189 | 			feats[cat] = guesser.predict([sentence])[0]
190 | 			
191 | 		labeled = []
192 | 		for w in range(0, len(sentence)):
193 | 			pos = tagged[w]
194 | 			feat = []
195 | 			cats = []
196 | 			if pos == 'S':
197 | 				cats = ['s-number', 's-case', 's-animacy']
198 | 				if True or feats['s-number'][w] == 'sg':
199 | 					feat.append(feats['s-gender'][w])
200 | 			elif pos == 'A':
201 | 				cats = ['a-number', 'a-degree']
202 | 				if feats['a-short'][w]:
203 | 					feat.append('shrt')
204 | 				else:
205 | 					feat.append(feats['a-case'][w])
206 | 				if feats['a-number'][w] == 'sg':
207 | 					feat.append(feats['a-gender'][w])
208 | 			elif pos == 'NUM':
209 | 				cats = ['num-gender', 'num-number', 'num-case', 'num-degree']
210 | 			elif pos == 'V':
211 | 				cats = ['v-number', 'v-tense', 'v-mood', 'v-type']
212 | 				if feats['v-tense'][w] == 'pst':
213 | 					if feats['v-number'][w] == 'sg':
214 | 						feat.append(feats['v-gender'][w])
215 | 				else:
216 | 					feat.append(feats['v-person'][w])
217 | 					
218 | 			elif pos == 'VINF':
219 | 				cats = ['v-type']
220 | 			elif pos == 'VADV':
221 | 				cats = ['v-type', 'v-tense']
222 | 			elif pos == 'VADJ':
223 | 				cats = ['vadj-number', 'vadj-gender', 'vadj-tense', 'vadj-type', 'vadj-mood']
224 | 				if feats['a-short'][w]:
225 | 					feat.append('shrt')
226 | 				else:
227 | 					feat.append(feats['vadj-case'][w])
228 | 					feat.append(feats['a-degree'][w])
229 | 				if feats['vadj-number'][w] == 'sg':
230 | 					feat.append(feats['vadj-gender'][w])
231 | 			elif pos == 'ADV':
232 | 				cats = ['adv-comp']
233 | 					
234 | 			for cat in cats:
235 | 				feat.append(feats[cat][w])
236 | 				
237 | 			featset = set(feat) - {'ncomp'}
238 | 				
239 | 			labeled.append((sentence[w][0], pos, featset))
240 | 
241 | 		return labeled
242 | 	
243 | if __name__ == '__main__':
244 | 	import glob
245 | 	from optparse import OptionParser
246 | 	import syntagrus
247 | 	
248 | 	parser = OptionParser()
249 | 	parser.usage = '%prog [options]'
250 | 	
251 | 	(options, args) = parser.parse_args()
252 | 	
253 | 	if not len(args):
254 | 		files = glob.glob('res/*/*/*.tgt')
255 | 		corpus = []
256 | 		for file in files:
257 | 			R = syntagrus.Reader()
258 | 			sentences = R.read(file)
259 | 			corpus.extend(sentences)
260 | 			del(R)
261 | 			
262 | 		print(len(corpus))
263 | 		
264 | 		fold_size = round(len(corpus) / 2)
265 | 		
266 | 		train_set = corpus[0:-fold_size]
267 | 		test_set = corpus[-fold_size:]
268 | 		del(corpus)
269 | 		
270 | 		for cat in cats:
271 | 			G = Guesser()
272 | 			G.is_candidate = cat[1]
273 | 			G.make_class = cat[2]
274 | 			G.train(train_set)
275 | 			results = G.test(test_set)
276 | 			G.save('res/model/' + cat[0])
277 | 			del(G)
278 | 			print('{0}\t\t{1:.3f}%'.format(cat[0], results[0] * 100))
279 | 		
280 | 	else:
281 | 		T = Tagger()
282 | 		print('Loaded')
283 | 		words = args[0].split(' ')
284 | 		sentence = []
285 | 		for word in words:
286 | 			sentence.append((word, tuple()))
287 | 			
288 | 		labeled = T.label(sentence)
289 | 		for word in labeled:
290 | 			print(word)
291 | 


--------------------------------------------------------------------------------
/src/mstparser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | '''
 3 | Created on Nov 29, 2011
 4 | 
 5 | @author: alexpak
 6 | '''
 7 | 
 8 | if __name__ == '__main__':
 9 | 	import glob
10 | 	from optparse import OptionParser
11 | 	import syntagrus
12 | 	import sys
13 | 	import morph
14 | 	
15 | 	parser = OptionParser()
16 | 	parser.usage = '%prog [options]'
17 | 	parser.add_option('-t', '--train', action='store_const', const=True, dest='train', help='generate train file')
18 | 	parser.add_option('-T', '--test', action='store_const', const=True, dest='test', help='generate test file')
19 | 	parser.add_option('-n', '--number', action='store', dest='number', type='int', help='number of files to process')
20 | 	parser.add_option('-f', '--format', action='store', dest='format', type='string', help='output format')
21 | 	parser.add_option('-M', '--nomorph', action='store_const', const=True, dest='nomorph', help='do not use morphology from annotations')
22 | 
23 | 	(options, args) = parser.parse_args()
24 | 
25 | 	if not options.train and not options.test:
26 | 		print('Specify --train or --test', file=sys.stderr)
27 | 		exit()
28 | 
29 | 	if not options.number:
30 | 		print('Specify number of files -n', file=sys.stderr)
31 | 		exit()
32 | 
33 | 	if not len(args):
34 | 		files = glob.glob('res/*/*/*.tgt')
35 | 		corpus = []
36 | 		for file in files[0:options.number]:
37 | 			R = syntagrus.Reader()
38 | 			sentences = R.read(file)
39 | 			corpus.extend(sentences)
40 | 			del(R)
41 | 
42 | 		fold_size = round(len(corpus) / 10)
43 | 		
44 | 		train_set = corpus[0:-fold_size]
45 | 		test_set = corpus[-fold_size:]
46 | 		
47 | 		print('{0} sentences'.format(len(corpus)), file=sys.stderr)
48 | 		
49 | 		del(corpus)
50 | 		
51 | 		a_set = test_set if options.test else train_set
52 | 		
53 | 		if options.nomorph:
54 | 			Tagger = morph.Tagger()
55 | 			for sentence in a_set:
56 | 				labeled = Tagger.label(sentence)
57 | 				for w in range(0, len(sentence)):
58 | 					sentence[w] = (sentence[w][0], sentence[w][1]._replace(pos=labeled[w][1], feat=labeled[w][2]))
59 | 			
60 | 		selected_feat = {'m', 'f', 'n', 'sg', 'pl', '1p', '2p', '3p', 'nom', 'gen', 'gen2', 'dat', 'acc', 'ins', 'prep', 'loc', 'real', 'imp', 'pass', 'comp', 'shrt'}		
61 | 		#selected_feat = {'nom', 'gen', 'gen2', 'dat', 'acc', 'ins', 'prep', 'loc', 'real', 'imp', 'pass', 'comp', 'shrt'}		
62 | 		
63 | 		if options.format == 'malt':
64 | 			# Malt TAB format
65 | 			for sentence in a_set:
66 | 				for word in sentence:
67 | 					w = word[0] or 'FANTOM'
68 | 					p = '.'.join([word[1].pos] + sorted(word[1].feat & selected_feat))
69 | 					l = word[1].link if word[1].dom else 'ROOT'
70 | 					d = str(word[1].dom)
71 | 					print('\t'.join([w, p, d, l]))
72 | 				print('')
73 | 					
74 | 		else:
75 | 			# MSTParser format
76 | 			for sentence in a_set:
77 | 				wn = []
78 | 				pn = []
79 | 				ln = []
80 | 				dn = []
81 | 				for word in sentence:
82 | 					wn.append(word[0] or 'FANTOM')
83 | 					pn.append('-'.join([word[1].pos] + sorted(word[1].feat & selected_feat)))
84 | 					ln.append(word[1].link if word[1].dom else 'ROOT')
85 | 					dn.append(str(word[1].dom))
86 | 					
87 | 				print('\t'.join(wn))
88 | 				print('\t'.join(pn))
89 | 				print('\t'.join(ln))
90 | 				print('\t'.join(dn))
91 | 				print('')
92 | 


--------------------------------------------------------------------------------
/src/parsers/__init__.py:
--------------------------------------------------------------------------------
 1 | WHITE_SPACE = {' ', '\t'}
 2 | LINE_BREAK = {'\n', '\r'}
 3 | 
 4 | class RuleError(Exception):
 5 |     def __init__(self, msg, line, pos):
 6 |         self.msg = msg
 7 |         self.line = line
 8 |         self.pos = pos
 9 | 
10 |     def __str__(self):
11 |         return repr('{0} at {1}:{2}'.format(self.msg, self.line, self.pos))
12 | 
13 | def read_rules(str):
14 |     rules = []
15 |     rule = []
16 |     token = ''
17 |     quoted = False
18 |     line_num = 0
19 |     char_num = 0
20 |     for ch in str:
21 |         if ch in WHITE_SPACE | LINE_BREAK:
22 |             if len(token):
23 |                 rule.append(token)
24 |                 token = ''
25 |                 
26 |             if ch in LINE_BREAK:
27 |                 if quoted:
28 |                     raise RuleError('Unexpected line break', line_num, char_num)
29 |                 
30 |                 line_num += 1
31 |                 char_num = 0
32 |                 if len(rule):
33 |                     rules.append(rule)
34 |                     rule = []
35 | 
36 |         elif ch == '=' and not quoted:
37 |             if len(token):
38 |                 rule.append(token)
39 |                 token = ''
40 | 
41 |             if len(rule) > 1:
42 |                 raise RuleError('Unexpected "="', line_num, char_num)
43 |         
44 |         elif ch == '"':
45 |             if quoted:
46 |                 rule.append(token)
47 |                 token = ''
48 |                 
49 |             quoted = not quoted
50 |         
51 |         elif ch == '|' and not quoted:
52 |             if len(token):
53 |                 rule.append(token)
54 |                 token = ''
55 | 
56 |             if len(rule) < 1:
57 |                 raise RuleError('Unexpected "|"', line_num, char_num)
58 |                 
59 |             rules.append(rule)
60 |             rule = [rule[0]]
61 |             
62 |         else:
63 |             token += ch
64 | 
65 |         char_num += 1
66 |         
67 |     return rules
68 |         


--------------------------------------------------------------------------------
/src/parsers/cyk.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on Sep 6, 2011
  3 | 
  4 | @author: alexpak
  5 | '''
  6 | 
  7 | from collections import defaultdict
  8 | 
  9 | class CYK:
 10 |     literals = set()
 11 |     nonterminal = set()
 12 |     rules = []
 13 |     
 14 |     index = defaultdict(set)
 15 |     rindex = defaultdict(set)
 16 |     
 17 |     def __init__(self, grammar):
 18 |         candidates = set()
 19 |         for rule in grammar:
 20 |             self.rules.append(rule)
 21 |             self.nonterminal.add(rule[0])
 22 |             
 23 |             if len(rule) == 2:
 24 |                 candidates.add(rule[1])
 25 |                 
 26 |         self.literals = candidates - self.nonterminal
 27 |         
 28 |         for n in range(0, len(self.rules)):
 29 |             rule = self.rules[n]
 30 |             if len(rule) == 2:
 31 |                 self.rindex[rule[1]].add(n)
 32 |                 
 33 |             self.index[rule[0]].add(n)
 34 |             
 35 |     def tokenize(self, str):
 36 |         return str.split(' ')
 37 | 
 38 |     def parse(self, str):
 39 |         tokens = self.tokenize(str)
 40 |         
 41 |         P = {};
 42 |         len_tokens = len(tokens)
 43 |         
 44 |         # returns positions of matching components or empty list
 45 |         def match(rule, start, length):
 46 |             if len(rule) == 1:
 47 |                 result = [length] if rule[0] in P[start][length] else []
 48 |             else:
 49 |                 result = []
 50 |                 for l in range(1, length):
 51 |                     if start + l > len_tokens:
 52 |                         break
 53 | 
 54 |                     if rule[0] not in P[start][l]:
 55 |                         continue
 56 |                     
 57 |                     tail = match(rule[1:], start + l, length - l)
 58 |                     if not len(tail):
 59 |                         continue
 60 |                     result = [l] + tail
 61 |                     break
 62 |                     
 63 |             return result
 64 |         
 65 |         # start at the leafs
 66 |         for p in range(0, len_tokens):
 67 |             tokenset = set(tokens[p])
 68 |             P[p] = {1: defaultdict(set)}
 69 |             while len(tokenset):
 70 |                 new_tokenset = set()
 71 |                 for token in tokenset:
 72 |                     for n in self.rindex[token]:
 73 |                         rule = self.rules[n]
 74 |                         P[p][1][rule[0]].add(n)
 75 |                         new_tokenset.add(rule[0])
 76 |                         
 77 |                 tokenset = new_tokenset
 78 |                     
 79 |         for l in range(2, len_tokens + 1): # length
 80 |             for p in range(0, len_tokens): # position
 81 |                 P[p][l] = defaultdict(set)
 82 |                 for n in range(0, len(self.rules)):
 83 |                     rule = self.rules[n]
 84 |                     matching = match(rule[1:], p, l)
 85 |                     if matching:
 86 |                         P[p][l][rule[0]].add((n, tuple(matching)))
 87 |         
 88 |         self.P = P
 89 |         self.tokens = tokens
 90 |         
 91 |         return len(P[0][len_tokens])
 92 |     
 93 |     def build_tree(self):
 94 |         def build(head, start, length):
 95 |             for matching in self.P[start][length][head]:
 96 |                 if type(matching) == tuple:
 97 |                     rule_n, lengths = matching
 98 |                     rule = self.rules[rule_n]
 99 |                     root = [head]
100 |                     start = start
101 |                     for i in range(0, len(rule) - 1):
102 |                         root.append(build(rule[i + 1], start, lengths[i]))
103 |                         start += lengths[i]
104 |                 else:
105 |                     rule = self.rules[matching]
106 |                     if rule[1] in self.literals:
107 |                         root = [head, rule[1]]
108 |                     else:
109 |                         root = [head, build(rule[1], start, length)]
110 |                     
111 |                 return tuple(root)
112 |             
113 |         return build('EX', 0, len(self.tokens))
114 |     
115 |     def print_tree(self, tree, padding = '', pad_with = '\t'):
116 |         if len(tree) == 2 and tree[1] in self.literals:
117 |             print('{0}({1} "{2}")'.format(padding, tree[0], tree[1]))
118 |         else:
119 |             print('{0}({1}'.format(padding, tree[0]))
120 |             for branch in tree[1:]:
121 |                 self.print_tree(branch, padding + pad_with, pad_with)
122 |             print('{0})'.format(padding))


--------------------------------------------------------------------------------
/src/pos.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on Aug 4, 2011
  3 | 
  4 | @author: alexpak <irokez@gmail.com>
  5 | '''
  6 | import config
  7 | import liblinearutil as svm
  8 | 
  9 | tagset = ['S', 'A', 'NUM', 'A-NUM', 'V', 'ADV', 'PRAEDIC', 'PARENTH', 'S-PRO', 'A-PRO', 'ADV-PRO', 'PRAEDIC-PRO', 'PR', 'CONJ', 'PART', 'INTJ', 'INIT', 'NONLEX']
 10 | tag_id = {}
 11 | tag_inv = {}
 12 | for i in range(0, len(tagset)):
 13 | 	tag_id[tagset[i]] = i + 1
 14 | 	tag_inv[i + 1] = tagset[i]
 15 | 
 16 | class Tagger:
 17 | 	def __init__(self):
 18 | 		self.chain_len = 3
 19 | 		self._features = TaggerFeatures()
 20 | 		pass
 21 | 	
 22 | 	def load(self, modelname, featuresname):
 23 | 		self._svm_model = svm.load_model(modelname)
 24 | 		self._features.load(open(featuresname, 'rb'))
 25 | 		
 26 | 	def save(self, modelname, featuresname):
 27 | 		svm.save_model(modelname, self._svm_model)
 28 | 		self._features.save(open(featuresname, 'wb'))
 29 | 		
 30 | 	def get_label_id(self, pos):
 31 | 		return tag_id[pos] if pos in tag_id else 0
 32 | 	
 33 | 	def get_label(self, id):
 34 | 		return tag_inv[id] if id in tag_inv else '?'
 35 | 		
 36 | 	def train(self, sentences, labels, cross_validation = False):
 37 | 		x = []
 38 | 		y = []
 39 | 		
 40 | 		for i in range(0, len(sentences)):
 41 | 			sentence = sentences[i]
 42 | 			prev = []
 43 | 			
 44 | 			j = 0
 45 | 			for word in sentence:
 46 | 				body = word.lower()
 47 | 				
 48 | 				featurespace = self._construct_featurespace(body, prev)
 49 | 				
 50 | 				prev.append((body, labels[i][j]))
 51 | 				if len(prev) > self.chain_len:
 52 | 					del(prev[0])
 53 | 					
 54 | 				x.append(featurespace.featureset)
 55 | 				j += 1
 56 | 
 57 | 			y.extend(labels[i])
 58 | 
 59 | 		prob = svm.problem(y, x)
 60 | 		
 61 | 		if cross_validation:
 62 | 			param = svm.parameter('-c 1 -v 4 -s 4')
 63 | 			svm.train(prob, param)
 64 | 		else:
 65 | 			param = svm.parameter('-c 1 -s 4')
 66 | 			self._svm_model = svm.train(prob, param)
 67 | 	
 68 | 	def label(self, sentence):
 69 | 		labeled = []
 70 | 		prev = []
 71 | 		for word in sentence:
 72 | 			body = word.lower()
 73 | 			
 74 | 			featurespace = self._construct_featurespace(body, prev)
 75 | 			
 76 | 			p_label, _, _ = svm.predict([0], [featurespace.featureset], self._svm_model, '')
 77 | 			label = p_label[0]
 78 | 			
 79 | 			prev.append((body, label))
 80 | 			if len(prev) > self.chain_len:
 81 | 				del(prev[0])
 82 | 				
 83 | 			labeled.append((word, label))
 84 | 			
 85 | 		return labeled
 86 | 				
 87 | 	def _construct_featurespace(self, word, prev):
 88 | 		featurespace = ml.FeatureSpace()
 89 | 			
 90 | 		featurespace.add({1: len(word)}, 10)
 91 | 		featurespace.add(self._features.from_suffix(word))
 92 | 		featurespace.add(self._features.from_prefix(word))
 93 | 		featurespace.add(self._features.from_body(word))
 94 | 		
 95 | 		for item in prev:
 96 | 			featurespace.add({1: item[1]}, 100)
 97 | #			featurespace.add(features.from_suffix(item[0]))
 98 | #			featurespace.add(features.from_prefix(item[0]))
 99 | #			featurespace.add(features.from_body(item[0]))
100 | 	
101 | 		return featurespace
102 | 	
103 | 				
104 | import pickle
105 | import ml
106 | class TaggerFeatures:
107 | 	def __init__(self):
108 | 		self._body_id = {}
109 | 		self._suffix_id = {}
110 | 		self._prefix_id = {}
111 | 		
112 | 		self._train = True
113 | 		self._featurespace = ml.FeatureSpace()
114 | 		
115 | 	def load(self, fp):
116 | 		(self._body_id, self._suffix_id, self._prefix_id) = pickle.load(fp)
117 | 		self._train = False
118 | 		
119 | 	def save(self, fp):
120 | 		pickle.dump((self._body_id, self._suffix_id, self._prefix_id), fp)
121 | 
122 | 	def from_body(self, body):
123 | 		featureset = {}
124 | 		if self._train:
125 | 			if body not in self._body_id:
126 | 				self._body_id[body] = len(self._body_id) + 1
127 | 					
128 | 			featureset[self._body_id[body]] = 1
129 | 		else:
130 | 			if body in self._body_id:
131 | 				featureset[self._body_id[body]] = 1
132 | 				
133 | 		return featureset
134 | 	
135 | 	def from_suffix(self, body):
136 | 		featureset = {}
137 | 		
138 | 		suffix2 = body[-2:]
139 | 		if suffix2 not in self._suffix_id:
140 | 			self._suffix_id[suffix2] = len(self._suffix_id) + 1
141 | 		featureset[self._suffix_id[suffix2]] = 1
142 | 		
143 | 		suffix3 = body[-3:]
144 | 		if suffix3 not in self._suffix_id:
145 | 			self._suffix_id[suffix3] = len(self._suffix_id) + 1
146 | 		featureset[self._suffix_id[suffix3]] = 1
147 | 		
148 | 		return featureset
149 | 	
150 | 	def from_prefix(self, body):
151 | 		featureset = {}
152 | 		
153 | 		prefix2 = body[:2]
154 | 		if prefix2 not in self._prefix_id:
155 | 			self._prefix_id[prefix2] = len(self._prefix_id) + 1
156 | 		featureset[self._prefix_id[prefix2]] = 1
157 | 		
158 | 		prefix3 = body[:3]
159 | 		if prefix3 not in self._prefix_id:
160 | 			self._prefix_id[prefix3] = len(self._prefix_id) + 1
161 | 		featureset[self._prefix_id[prefix3]] = 1
162 | 		
163 | 		return featureset


--------------------------------------------------------------------------------
/src/rnc.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on Aug 5, 2011
 3 | 
 4 | @author: alexpak <irokez@gmail.com>
 5 | '''
 6 | import xml.parsers.expat
 7 | 
 8 | class Reader:
 9 | 	def __init__(self):
10 | 		self._parser = xml.parsers.expat.ParserCreate()
11 | 		self._parser.StartElementHandler = self.start_element
12 | 		self._parser.EndElementHandler = self.end_element
13 | 		self._parser.CharacterDataHandler = self.char_data		
14 | 	
15 | 	def start_element(self, name, attr):
16 | 		if name == 'ana':
17 | 			self._info = attr
18 | 	
19 | 	def end_element(self, name):
20 | 		if name == 'se':
21 | 			self._sentences.append(self._sentence)
22 | 			self._sentence = []
23 | 		elif name == 'w':
24 | 			self._sentence.append((self._cdata, self._info))
25 | 		elif name == 'ana':
26 | 			self._cdata = ''
27 | 	
28 | 	def char_data(self, content):
29 | 		self._cdata += content
30 | 		
31 | 	def read(self, filename):
32 | 		f = open(filename)
33 | 		content = f.read()
34 | 		f.close()
35 | 		
36 | 		self._sentences = []
37 | 		self._sentence = []
38 | 		self._cdata = ''
39 | 		self._info = ''
40 | 		
41 | 		self._parser.Parse(content)		
42 | 		
43 | 		return self._sentences


--------------------------------------------------------------------------------
/src/sentiment/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irokez/Pyrus/9a5c64991592ca13e0a5726f394fbe2a399501c3/src/sentiment/__init__.py


--------------------------------------------------------------------------------
/src/sentiment/demo.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | 	<head>
 4 | 		<meta charset="utf-8" />
 5 | 		<title>Sentiment analysis demo - Pyrus</title>
 6 | 		<link rel="stylesheet" href="/main.css" />
 7 | 		<link rel="shortcut icon" href="/favicon.ico" />
 8 | 	</head>
 9 | 	<body>
10 | 		<div id="wrapper">
11 | 			<header>
12 | 				<hgroup>
13 | 					<h1>Pyrus</h1>
14 | 				</hgroup>
15 | 			</header>
16 | 			<article>
17 | 				<h1>Sentiment anlysis demo</h1>
18 | 				{if len(q) and not len(error)}
19 | 				<div id="result">
20 | 					<p>Movie: "{$q}"</p>
21 | 					<table>
22 | 						<tbody>
23 | 							{for cl, text in output}
24 | 							<tr class="{$cl}">
25 | 								<th>{$cl}</th>
26 | 								<td>{$text}</td>
27 | 							</tr>
28 | 							{end}
29 | 						</tbody>
30 | 					</table>
31 | 					<p>Classified {$msgs} messages in {$time_total} sec, {$msgs_per_sec} msgs per sec</p>
32 | 				</div>
33 | 				{else}
34 | 				<div id="description">
35 | 					<p>Enter a movie name and press "Search"</p>
36 | 					 {if len(error)}<p><b>{$error}</b></p>{end}
37 | 					<p><b>For example:</b> <a href="?q=темный+рыцарь">темный рыцарь</a></p>
38 | 				</div>
39 | 				{end}
40 | 				<form action="/">
41 | 					<fieldset>
42 | 						<div id="text"><input name="q" /></div>
43 | 						<div id="submit"><button type="submit">Search</button></div>
44 |                         <div id="hint">
45 |                             <a href="?q=темный+рыцарь">темный рыцарь</a>
46 |                             <a href="?q=третий+лишний">третий лишний</a>
47 |                             <a href="?q=вспомнить+все">вспомнить все</a>
48 |                         </div>
49 | 					</fieldset>
50 | 				</form>
51 | 			</article>
52 | 			<footer>
53 | 				<p>&copy; 2011&ndash;2012 Pyrus</p>
54 | 			</footer>
55 | 		</div>
56 | 	</body>
57 | </html>
58 | 


--------------------------------------------------------------------------------
/src/sentiment/demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | '''
  4 | Created on Aug 7, 2011
  5 | 
  6 | @author: alexpak
  7 | '''
  8 | 
  9 | import cherrypy
 10 | import sys
 11 | import time
 12 | import cgi
 13 | import os
 14 | import requests
 15 | import json
 16 | 
 17 | path = os.path.dirname(os.path.abspath(__file__)) + '/'
 18 | 
 19 | f = open(path + 'demo.html', 'rb')
 20 | content = f.read().decode()
 21 | f.close()
 22 | 
 23 | from pyrus.src import template
 24 | from yatk import ir
 25 | from yatk.ml.svm import SVM as Classifier
 26 | from red import pie
 27 | 
 28 | TTL = 60
 29 | 
 30 | r = pie.Redis()
 31 | cl = Classifier.load('test.svm')
 32 | index = ir.SentimentIndex.load('test.index', 'delta', 'bogram')
 33 | index.get_text = lambda x: x['text']
 34 | 
 35 | class HelloWorld:
 36 | 	@cherrypy.expose
 37 | 	def index(self, q = ''):
 38 | 		start = time.time()
 39 | 		q = q.strip()
 40 | 		error = ''
 41 | 
 42 | 		T = template.Template()
 43 | 
 44 | 		if len(q):
 45 | 			cached = r.get(q)
 46 | 
 47 | 			if not cached:
 48 | 				url = 'http://search.twitter.com/search.json'
 49 | 				req = requests.get(url, params={'q': q})
 50 | 				data = json.loads(req.text)
 51 | 
 52 | 				if 'results' not in data:
 53 | 					print('Error')
 54 | 					print(data)
 55 | 					exit()
 56 | 
 57 | 				cached = json.dumps(data['results'])
 58 | 				r.setex(q, TTL, cached)
 59 | 
 60 | 			results = json.loads(cached)
 61 | 
 62 | 			docs = []
 63 | 
 64 | 			for msg in results:
 65 | 				feats = index.weight(index.features(msg))
 66 | 				docs.append(feats)
 67 | 
 68 | 			labels = cl.predict(docs)
 69 | 			output = []
 70 | 			for n in range(len(results)):
 71 | 				output.append((labels[n], results[n]['text']))
 72 | 
 73 | 			end = time.time()
 74 | 
 75 | 			T.q = cgi.escape(q)
 76 | 			T.output = output			
 77 | 			T.time_total = round(end - start, 1)
 78 | 			T.msgs = len(output)
 79 | 			T.msgs_per_sec = round(len(output) / (end - start), 1)
 80 | 
 81 | 		T.error = error
 82 | 
 83 | 
 84 | 		return T.transform(content)
 85 | 	
 86 | 	@cherrypy.expose
 87 | 	def test(self):
 88 | 		return content
 89 | 
 90 | cherrypy.server.socket_host = '0.0.0.0'
 91 | config = {
 92 | 	'/': {
 93 | 		'tools.staticdir.on': True,
 94 | 		'tools.staticdir.dir': path + 'public/',
 95 | 		'tools.encode.encoding': 'utf8'
 96 | 	}
 97 | }
 98 | 
 99 | cherrypy.quickstart(HelloWorld(), config = config)
100 | 


--------------------------------------------------------------------------------
/src/sentiment/download-kinopoisk.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import requests
 4 | import time
 5 | import sqlite3
 6 | from bs4 import BeautifulSoup as soup
 7 | 
 8 | def download(cl, limit):
 9 | 	url = 'http://www.kinopoisk.ru/review/type/comment/status/{0}/period/year/perpage/100/page/{1}/'
10 | 
11 | 	texts = []
12 | 	p = 1
13 | 
14 | 	while True:
15 | 		r = requests.get(url.format(cl, p))
16 | 		s = soup(r.text)
17 | 		for div in s.find_all('div', {'class': 'userReview'}):
18 | 			div_resp = div.find('div', {'class': 'response'})
19 | 			div_text = div.find('div', {'class': 'brand_words'})
20 | 
21 | 			texts.append((div_text.text,))
22 | 
23 | 		print('Processed page {0}, {1} texts'.format(p, len(texts)))
24 | 		if len(texts) >= limit:
25 | 			break
26 | 
27 | 		p += 1
28 | 		time.sleep(1)
29 | 
30 | 	return texts[:limit]
31 | 
32 | con = sqlite3.connect('test.db')
33 | cur = con.cursor()
34 | 
35 | cur.execute('''
36 | 	create table docs(
37 | 		id integer primary key autoincrement,
38 | 		text text,
39 | 		class text
40 | 	)
41 | 	''')
42 | 
43 | limit = 500
44 | 
45 | texts_pos = download('good', limit)
46 | texts_neg = download('bad', limit)
47 | 
48 | cur.executemany('insert into docs (class, text) values ("pos", ?)', texts_pos)
49 | cur.executemany('insert into docs (class, text) values ("neg", ?)', texts_neg)
50 | 
51 | con.commit()
52 | con.close()


--------------------------------------------------------------------------------
/src/sentiment/index.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import sqlite3
 4 | import re
 5 | from yatk import ir
 6 | from collections import defaultdict
 7 | 
 8 | con = sqlite3.connect('test.db')
 9 | con.row_factory = sqlite3.Row
10 | cur = con.cursor()
11 | 
12 | cur.execute('''
13 | 	create table if not exists ngrams (
14 | 		id integer primary key autoincrement,
15 | 		body text,
16 | 		n_pos integer,
17 | 		n_neg integer
18 | 	)
19 | 	''')
20 | 
21 | count = defaultdict(lambda: {'pos': 0, 'neg': 0})
22 | 
23 | cur.execute('select class, text from docs')
24 | for row in cur.fetchall():
25 | 	words = ir.tokenize(row['text'].lower())
26 | 	ngrams = set(words + ir.ngrams(words, 2))
27 | 	for ngram in ngrams:
28 | 		count[ngram]['pos'] += row['class'] == 'pos'
29 | 		count[ngram]['neg'] += row['class'] == 'neg'
30 | 
31 | for ngram_id, ngram_count in count.items():
32 | 	cur.execute('insert into ngrams (body, n_pos, n_neg) values (?, ?, ?)', (ngram_id, ngram_count['pos'], ngram_count['neg']))
33 | 
34 | cur.execute('create index ngrams_body on ngrams(body)')
35 | 
36 | con.commit()
37 | con.close()


--------------------------------------------------------------------------------
/src/sentiment/public/main.css:
--------------------------------------------------------------------------------
  1 | @import "reset5.css";
  2 | 
  3 | body {
  4 |     font-family: sans-serif;
  5 | }
  6 | 
  7 | #wrapper {
  8 |     padding: 20px;
  9 | }
 10 | 
 11 | header {
 12 |     padding: 20px 0;
 13 | } 
 14 | 
 15 |     header h1 {
 16 |         font-size: 32px;
 17 |         font-weight: normal;
 18 |     }
 19 |     
 20 | article {
 21 |     
 22 | }
 23 | 
 24 |     article h1 {
 25 |         font-size: 18px;
 26 |         font-weight: normal;
 27 |     }
 28 |     
 29 |     #result {
 30 |         padding: 20px;
 31 |         background: #eee;
 32 |         margin-top: 16px;
 33 |         line-height: 24px;
 34 |     }
 35 | 
 36 | 	#result table {
 37 | 		border-collapse: collapse;
 38 | 		margin: 16px 0;
 39 | 	}
 40 | 
 41 |         #result .pos {
 42 |             background: PaleGreen;
 43 |         }
 44 |         #result .neg {
 45 |             background: LightSalmon;
 46 |         }
 47 | 
 48 | 		#result th {
 49 | 			padding: 0px 20px 0 0;
 50 | 			text-align: left;
 51 | 			font-size: 14px;
 52 | 			font-weight: normal;
 53 | 		}
 54 | 		#result td {
 55 | 			padding: 0px 0px 0 0;
 56 | 			font-style: italic;
 57 | 			font-size: 14px;
 58 | 			color: #333;
 59 | 		}
 60 | 
 61 |     #description {
 62 |         padding: 20px;
 63 |         background: #eee;
 64 |         margin-top: 16px;
 65 |         line-height: 24px;
 66 |     }
 67 |     
 68 |     form {
 69 |         padding: 20px 0;
 70 |         position: relative;
 71 |     }
 72 |     
 73 |         div#text {
 74 |             padding-right: 240px;
 75 |             margin-bottom: 16px;
 76 |         }
 77 |         
 78 |             input {
 79 |                 font-family: serif;
 80 |                 font-size: 24px;
 81 |                 padding: 10px;
 82 |                 width: 100%;
 83 |                 border: 1px inset #999;
 84 |             }
 85 |     
 86 |         div#submit {
 87 |             position: absolute;
 88 |             top: 19px;
 89 |             right: 0px;
 90 |             text-align: center;
 91 |         }
 92 |     
 93 |             button {
 94 |                 font-size: 24px;
 95 |                 padding: 10px;
 96 |                 text-align: center;
 97 |                 width: 200px;
 98 |             }
 99 |     
100 | footer {
101 |     padding: 20px 0;
102 | }
103 | 


--------------------------------------------------------------------------------
/src/sentiment/public/reset5.css:
--------------------------------------------------------------------------------
  1 | /* 
  2 | html5doctor.com Reset Stylesheet
  3 | v1.6.1
  4 | Last Updated: 2010-09-17
  5 | Author: Richard Clark - http://richclarkdesign.com 
  6 | Twitter: @rich_clark
  7 | */
  8 | 
  9 | html, body, div, span, object, iframe,
 10 | h1, h2, h3, h4, h5, h6, p, blockquote, pre,
 11 | abbr, address, cite, code,
 12 | del, dfn, em, img, ins, kbd, q, samp,
 13 | small, strong, sub, sup, var,
 14 | b, i,
 15 | dl, dt, dd, ol, ul, li,
 16 | fieldset, form, label, legend,
 17 | table, caption, tbody, tfoot, thead, tr, th, td,
 18 | article, aside, canvas, details, figcaption, figure, 
 19 | footer, header, hgroup, menu, nav, section, summary,
 20 | time, mark, audio, video {
 21 |     margin:0;
 22 |     padding:0;
 23 |     border:0;
 24 |     outline:0;
 25 |     font-size:100%;
 26 |     vertical-align:baseline;
 27 |     background:transparent;
 28 | }
 29 | 
 30 | body {
 31 |     line-height:1;
 32 | }
 33 | 
 34 | article,aside,details,figcaption,figure,
 35 | footer,header,hgroup,menu,nav,section { 
 36 |     display:block;
 37 | }
 38 | 
 39 | nav ul {
 40 |     list-style:none;
 41 | }
 42 | 
 43 | blockquote, q {
 44 |     quotes:none;
 45 | }
 46 | 
 47 | blockquote:before, blockquote:after,
 48 | q:before, q:after {
 49 |     content:'';
 50 |     content:none;
 51 | }
 52 | 
 53 | a {
 54 |     margin:0;
 55 |     padding:0;
 56 |     font-size:100%;
 57 |     vertical-align:baseline;
 58 |     background:transparent;
 59 | }
 60 | 
 61 | /* change colours to suit your needs */
 62 | ins {
 63 |     background-color:#ff9;
 64 |     color:#000;
 65 |     text-decoration:none;
 66 | }
 67 | 
 68 | /* change colours to suit your needs */
 69 | mark {
 70 |     background-color:#ff9;
 71 |     color:#000; 
 72 |     font-style:italic;
 73 |     font-weight:bold;
 74 | }
 75 | 
 76 | del {
 77 |     text-decoration: line-through;
 78 | }
 79 | 
 80 | abbr[title], dfn[title] {
 81 |     border-bottom:1px dotted;
 82 |     cursor:help;
 83 | }
 84 | 
 85 | table {
 86 |     border-collapse:collapse;
 87 |     border-spacing:0;
 88 | }
 89 | 
 90 | /* change border colour to suit your needs */
 91 | hr {
 92 |     display:block;
 93 |     height:1px;
 94 |     border:0;   
 95 |     border-top:1px solid #cccccc;
 96 |     margin:1em 0;
 97 |     padding:0;
 98 | }
 99 | 
100 | input, select {
101 |     vertical-align:middle;
102 | }


--------------------------------------------------------------------------------
/src/sentiment/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import sys
 4 | import requests
 5 | import json
 6 | from yatk import ir
 7 | from yatk.ml.svm import SVM as Classifier
 8 | # from yatk.ml.nb import NaiveBayes as Classifier
 9 | from red import pie
10 | 
11 | if len(sys.argv) < 2:
12 | 	print('Enter query')
13 | 	exit()
14 | 
15 | TTL = 60
16 | 
17 | r = pie.Redis()
18 | q = sys.argv[1]
19 | cached = r.get(q)
20 | 
21 | if not cached:
22 | 	url = 'http://search.twitter.com/search.json'
23 | 	req = requests.get(url, params={'q': sys.argv[1]})
24 | 	data = json.loads(req.text)
25 | 
26 | 	if 'results' not in data:
27 | 		print('Error')
28 | 		print(data)
29 | 		exit()
30 | 
31 | 	cached = json.dumps(data['results'])
32 | 	r.setex(q, TTL, cached)
33 | 
34 | results = json.loads(cached)
35 | 
36 | cl = Classifier.load('test.svm')
37 | index = ir.SentimentIndex.load('test.index', 'delta', 'bogram')
38 | index.get_text = lambda x: x['text']
39 | 
40 | docs = []
41 | 
42 | for msg in results:
43 | 	feats = index.weight(index.features(msg))
44 | 	docs.append(feats)
45 | 
46 | labels = cl.predict(docs)
47 | for n in range(len(results)):
48 | 	print('{0}.\t{1}\t{2}'.format(n + 1, labels[n], results[n]['text']))


--------------------------------------------------------------------------------
/src/sentiment/train.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import sqlite3
 4 | from yatk import ir
 5 | from yatk.ml.svm import SVM as Classifier
 6 | # from yatk.ml.nb import NaiveBayes as Classifier
 7 | 
 8 | con = sqlite3.connect('test.db')
 9 | con.row_factory = sqlite3.Row
10 | cur = con.cursor()
11 | 
12 | docs = []
13 | cur.execute('select class, text from docs')
14 | for row in cur.fetchall():
15 | 	docs.append((row['class'], row['text']))
16 | 
17 | index = ir.SentimentIndex('delta', 'bogram')
18 | index.get_class = lambda x: x[0]
19 | index.get_text = lambda x: x[1]
20 | index.build(docs)
21 | 
22 | x = []
23 | y = []
24 | for doc in docs:
25 | 	x.append(index.weight(index.features(doc)))
26 | 	y.append(doc[0])
27 | 
28 | cl = Classifier()
29 | cl.train(x, y)
30 | cl.save('test.svm')
31 | 
32 | index.save('test.index')
33 | 
34 | con.close()


--------------------------------------------------------------------------------
/src/sentiment/validate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import sqlite3
 4 | from yatk import ir
 5 | from yatk import ml
 6 | from yatk.ml.svm import SVM
 7 | from yatk.ml.nb import NaiveBayes
 8 | 
 9 | con = sqlite3.connect('test.db')
10 | con.row_factory = sqlite3.Row
11 | cur = con.cursor()
12 | 
13 | docs = []
14 | cur.execute('select class, text from docs')
15 | for row in cur.fetchall():
16 | 	docs.append((row['class'], row['text']))
17 | 
18 | con.close()
19 | 
20 | docs_even = []
21 | N = int(len(docs) / 2)
22 | for i in range(N):
23 | 	docs_even.append(docs[i])
24 | 	docs_even.append(docs[N + i])
25 | 
26 | def	test(classifier, features, weight):
27 | 	p = []
28 | 	for fold in range(1, 6):
29 | 		train_docs, test_docs = ml.folds(docs_even, 5, fold)
30 | 
31 | 		index = ir.SentimentIndex(weight, features)
32 | 		index.get_class = lambda x: x[0]
33 | 		index.get_text = lambda x: x[1]
34 | 		index.build(train_docs)
35 | 
36 | 		train_x = []
37 | 		train_y = []
38 | 		for doc in train_docs:
39 | 			train_x.append(index.weight(index.features(doc)))
40 | 			train_y.append(doc[0])
41 | 
42 | 		test_x = []
43 | 		test_y = []
44 | 		for doc in test_docs:
45 | 			test_x.append(index.weight(index.features(doc)))
46 | 			test_y.append(doc[0])
47 | 
48 | 
49 | 		cl = classifier()
50 | 		cl.train(train_x, train_y)
51 | 		labels = cl.predict(test_x)
52 | 		mic, mac = cl.evaluate(test_y, labels)
53 | 		p.append(mic)
54 | 
55 | 	print('{0} {1} {2}: {3:.1f}%'.format(classifier, features, weight, ir.avg(p) * 100))
56 | 
57 | test(NaiveBayes, 'unigram', 'bin')
58 | test(NaiveBayes, 'bigram', 'bin')
59 | test(NaiveBayes, 'bogram', 'bin')
60 | test(SVM, 'unigram', 'bin')
61 | test(SVM, 'bigram', 'bin')
62 | test(SVM, 'bogram', 'bin')
63 | test(SVM, 'unigram', 'delta')
64 | test(SVM, 'bigram', 'delta')
65 | test(SVM, 'bogram', 'delta')


--------------------------------------------------------------------------------
/src/syntagrus.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | '''
  3 | Created on Nov 21, 2011
  4 | 
  5 | @author: alexpak
  6 | '''
  7 | import os
  8 | import sqlite3
  9 | import xml.parsers.expat
 10 | import glob
 11 | from optparse import OptionParser
 12 | from collections import namedtuple
 13 | 
 14 | word_t = namedtuple('word_t', ['lemma', 'pos', 'feat', 'id', 'dom', 'link'])
 15 | feat_ru_en = {
 16 | 	'ЕД': 'sg',
 17 | 	'МН': 'pl',
 18 | 	'ЖЕН': 'f',
 19 | 	'МУЖ': 'm',
 20 | 	'СРЕД': 'n',
 21 | 	'ИМ': 'nom',
 22 | 	'РОД': 'gen',
 23 | 	'ДАТ': 'dat',
 24 | 	'ВИН': 'acc',
 25 | 	'ТВОР': 'ins',
 26 | 	'ПР': 'prep',
 27 | 	'ПАРТ': 'gen2',
 28 | 	'МЕСТН': 'loc',
 29 | 	'ОД': 'anim',
 30 | 	'НЕОД': 'inan',
 31 | 	'ИНФ': 'inf',
 32 | 	'ПРИЧ': 'adjp',
 33 | 	'ДЕЕПР': 'advp',
 34 | 	'ПРОШ': 'pst',
 35 | 	'НЕПРОШ': 'npst',
 36 | 	'НАСТ': 'prs',
 37 | 	'1-Л': '1p',
 38 | 	'2-Л': '2p',
 39 | 	'3-Л': '3p',
 40 | 	'ИЗЪЯВ': 'real',
 41 | 	'ПОВ': 'imp',
 42 | 	'КР': 'shrt',
 43 | 	'НЕСОВ': 'imperf',
 44 | 	'СОВ': 'perf',
 45 | 	'СТРАД': 'pass',
 46 | 	'СЛ': 'compl',
 47 | 	'СМЯГ': 'soft',
 48 | 	'СРАВ': 'comp',
 49 | 	'ПРЕВ': 'supl',
 50 | }
 51 | 
 52 | link_ru_en = {
 53 | 	'предик': 'subj',
 54 | 	'1-компл': 'obj',
 55 | 	'2-компл': 'obj',
 56 | 	'3-компл': 'obj',
 57 | 	'4-компл': 'obj',
 58 | 	'5-компл': 'obj',
 59 | 	'опред': 'amod',
 60 | 	'предл': 'prep',
 61 | 	'обст': 'pobj',
 62 | }
 63 | {
 64 | 	'огранич': '',      
 65 | 	'квазиагент': '',       
 66 | 	'сочин': '',      
 67 | 	'соч-союзн': '',      
 68 | 	'атриб': '',      
 69 | 	'аппоз': '',      
 70 | 	'подч-союзн': '',      
 71 | 	'вводн': '',      
 72 | 	'сент-соч': '',      
 73 | 	'количест': '',      
 74 | 	'разъяснит': '',       
 75 | 	'присвяз': '',      
 76 | 	'релят': '',      
 77 | 	'сравн-союзн': '',      
 78 | 	'примыкат': '',      
 79 | 	'сравнит': '',      
 80 | 	'соотнос': '',      
 81 | 	'эксплет': '',      
 82 | 	'аналит': '',      
 83 | 	'пасс-анал': '',      
 84 | 	'вспом': '',      
 85 | 	'агент': '',      
 86 | 	'кратн': '',      
 87 | 	'инф-союзн': '',      
 88 | 	'электив': '',      
 89 | 	'композ': '',      
 90 | 	'колич-огран': '',      
 91 | 	'неакт-компл': '',      
 92 | 	'пролепт': '',       
 93 | 	'суб-копр': '',       
 94 | 	'дат-субъект': '',      
 95 | 	'длительн': '',      
 96 | 	'об-аппоз': '',      
 97 | 	'изъясн': '',      
 98 | 	'компл-аппоз': '',      
 99 | 	'оп-опред': '',      
100 | 	'1-несобст-компл': '',      
101 | 	'распред': '',      
102 | 	'уточн': '',      
103 | 	'нум-аппоз': '',      
104 | 	'ном-аппоз': '',      
105 | 	'2-несобст-компл': '',      
106 | 	'аппрокс-колич': '',      
107 | 	'колич-вспом': '',      
108 | 	'колич-копред': '',      
109 | 	'кратно-длительн': '',      
110 | 	'об-копр': '',      
111 | 	'эллипт': '',      
112 | 	'3-несобст-компл': '',       
113 | 	'4-несобст-компл': '',       
114 | 	'fictit': '',       
115 | 	'авт-аппоз': '',       
116 | 	'аддит': '',       
117 | 	'адр-присв': '',       
118 | 	'дистанц': '',       
119 | 	'несобст-агент': '',       
120 | 	'об-обст': '',       
121 | 	'обст-тавт': '',       
122 | 	'презентат': '',       
123 | 	'сент-предик': '',       
124 | 	'суб-обст': '',       
125 | }
126 | 
127 | class Reader:
128 | 	def __init__(self):
129 | 		self._parser = xml.parsers.expat.ParserCreate()
130 | 		self._parser.StartElementHandler = self.start_element
131 | 		self._parser.EndElementHandler = self.end_element
132 | 		self._parser.CharacterDataHandler = self.char_data		
133 | 	
134 | 	def start_element(self, name, attr):
135 | 		if name == 'W':
136 | 			features = attr['FEAT'].split(' ') if 'FEAT' in attr else ['UNK']
137 | 			for i in range(0, len(features)):
138 | 				if features[i] in feat_ru_en:
139 | 					features[i] = feat_ru_en[features[i]]
140 | 					
141 | 			lemma = lemma=attr['LEMMA'].lower() if 'LEMMA' in attr else ''
142 | 			link = attr['LINK'] if 'LINK' in attr else None
143 | #			if link in link_ru_en:
144 | #				link = link_ru_en[link]
145 | 				
146 | 			dom = int(attr['DOM']) if attr['DOM'] != '_root' else 0
147 | 			pos = features[0]
148 | 			feat = set(features[1:])
149 | 			
150 | 			if 'adjp' in feat:
151 | 				pos = 'VADJ'
152 | 				feat -= {'adjp'}
153 | 				
154 | 			if 'advp' in feat:
155 | 				pos = 'VADV'
156 | 				feat -= {'advp'}
157 | 			
158 | 			if 'inf' in feat:
159 | 				pos = 'VINF'
160 | 				feat -= {'inf'}
161 | 			
162 | 			self._info = word_t(lemma=lemma, pos=pos, feat=feat, id=int(attr['ID']), dom=dom, link=link)
163 | 			self._cdata = ''
164 | 	
165 | 	def end_element(self, name):
166 | 		if name == 'S':
167 | 			self._sentences.append(self._sentence)
168 | 			self._sentence = []
169 | 		elif name == 'W':
170 | 			self._sentence.append((self._cdata, self._info))
171 | 			self._cdata = ''
172 | 	
173 | 	def char_data(self, content):
174 | 		self._cdata += content
175 | 		
176 | 	def read(self, filename):
177 | 		f = open(filename, encoding='windows-1251')
178 | 		content = f.read()
179 | 		f.close()
180 | 		content = content.replace('encoding="windows-1251"', 'encoding="utf-8"')
181 | 		
182 | 		self._sentences = []
183 | 		self._sentence = []
184 | 		self._cdata = ''
185 | 		self._info = ''
186 | 		
187 | 		self._parser.Parse(content)		
188 | 		
189 | 		return self._sentences
190 | 
191 | class Lexicon:
192 | 	def __init__(self, dbname):
193 | 		self.dbname = dbname
194 | 		db_exists = os.path.isfile(dbname)
195 | 		self.con = sqlite3.connect(dbname)
196 | 		self.cur = self.con.cursor()
197 | 		
198 | 		if not db_exists:
199 | 			self.create_db()		
200 | 	
201 | 	def create_db(self):
202 | 		sql = '''
203 |         create table words(
204 |         	id integer primary key autoincrement,
205 |         	lemma text,
206 |         	pos text,
207 |         	form text,
208 |         	info text,
209 |         	freq integer
210 |         );
211 |         create index words_lemma_form_info on words(lemma, form, info);
212 |         '''
213 | 		[self.cur.execute(st) for st in sql.split(';') if len(st.strip())]
214 | 		
215 | 	def index(self, filename):
216 | 		sentences = Reader().read(filename)
217 | 		for sentence in sentences:
218 | 			for word in sentence:
219 | 				feat = ' '.join(word[1].feat)
220 | 				self.cur.execute('select id from words where lemma = ? and form = ? and pos = ? and info = ?', (word[1].lemma, word[0], word[1].pos, feat))
221 | 				row = self.cur.fetchone()
222 | 				if row is None:
223 | 					self.cur.execute('insert into words (lemma, pos, form, info, freq) values (?, ?, ?, ?, 1)', (word[1].lemma, word[1].pos, word[0], feat))
224 | 				else:
225 | 					self.cur.execute('update words set freq = freq + 1 where id = ?', row)
226 | 					
227 | 	def close(self):
228 | 		self.con.commit()
229 | 		self.con.close()
230 | 	
231 | if __name__ == '__main__':
232 | 	parser = OptionParser()
233 | 	parser.usage = '%prog [options] inputfile'
234 | 	parser.add_option('-L', '--construct-lexicon', action = 'store_const', const = True	, dest = 'lexicon', help = 'construct lexicon')
235 | 
236 | 	(options, args) = parser.parse_args()
237 | 	
238 | 	if options.lexicon:
239 | 		L = Lexicon('tmp/lexicon')
240 | 		files = glob.glob('res/*/*/*.tgt')
241 | 		for file in files:
242 | 			L.index(file)
243 | 		
244 | 		L.close()
245 | 
246 | 	#R = Reader()
247 | 	#sentences = R.read(args[0])
248 | 	#print(len(sentences))
249 | 	#print(sentences[0])
250 | 	


--------------------------------------------------------------------------------
/src/template.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | '''
  3 | Created on Aug 7, 2011
  4 | 
  5 | @author: alexpak
  6 | '''
  7 | def print_stack(stack, padding = '\n', pad_with = '\t'):
  8 | 	s = ''
  9 | 	for expr in stack:
 10 | 		if isinstance(expr, str):
 11 | 			s += padding + '__s__ +="""' + expr.replace('"', '\\"') + '"""'
 12 | 		else:
 13 | 			if expr[0] == 'if':
 14 | 				s += padding + 'if ' + expr[1] + ':'
 15 | 				s += print_stack(expr[2], padding + pad_with, pad_with) or padding + pad_with + 'pass'
 16 | 				s += padding + 'else:'
 17 | 				s += print_stack(expr[3], padding + pad_with, pad_with) or padding + pad_with + 'pass'
 18 | 			elif expr[0] == 'for':
 19 | 				s += padding + 'for ' + expr[1] + ':'
 20 | 				s += print_stack(expr[2], padding + pad_with, pad_with) or padding + pad_with + 'pass'
 21 | 			elif expr[0] == 'print':
 22 | 				s += padding + '__s__+=str(' + expr[1] + ')'
 23 | 	return s
 24 | 
 25 | class Template:
 26 | 	vars = {}
 27 | 	
 28 | 	def assign(self, key, val):
 29 | 		self.vars[key] = val
 30 | 	
 31 | 	def __setattr__(self, key, val):
 32 | 		self.assign(key, val)
 33 | 		
 34 | 	def __setitem__(self, key, val):
 35 | 		self.assign(key, val)
 36 | 	
 37 | 	def transform(self, template):
 38 | 		buffer = ''
 39 | 		
 40 | 		stack = []
 41 | 		current_stack = stack
 42 | 		stack_chain = []
 43 | 		stack_chain.append(current_stack)
 44 | 		expr = tuple()
 45 | 		last_if = tuple()
 46 | 		open_bracket = False
 47 | 		for ch in template:
 48 | 			if ch == '{':
 49 | 				if open_bracket:
 50 | 					current_stack.append('{')
 51 | 					
 52 | 				open_bracket = True
 53 | 				current_stack.append(buffer)
 54 | 				buffer = ''
 55 | 			elif ch == '}' and len(buffer):
 56 | 				if buffer[0:3] == 'if ':
 57 | 					expr = ('if', buffer[3:], [], [])
 58 | 					current_stack.append(expr)
 59 | 					stack_chain.append(current_stack)
 60 | 					current_stack = expr[2]
 61 | 					last_if = expr
 62 | 					
 63 | 				elif buffer == 'else':
 64 | 					if last_if[0] != 'if':
 65 | 						exit('Expected IF for ELSE')
 66 | 
 67 | 					current_stack = last_if[3]
 68 | 					
 69 | 				elif buffer[0:4] == 'for ':
 70 | 					expr = ('for', buffer[4:], [])
 71 | 					current_stack.append(expr)
 72 | 					stack_chain.append(current_stack)
 73 | 					current_stack = expr[2]
 74 | 					
 75 | 				elif buffer[0] == '$':
 76 | 					expr = ('print', buffer[1:])
 77 | 					current_stack.append(expr)
 78 | 
 79 | 				elif buffer == 'end':
 80 | 					current_stack = stack_chain.pop()
 81 | 				
 82 | 				else:
 83 | 					if open_bracket:
 84 | 						current_stack.append('{')
 85 | 
 86 | 					current_stack.append(buffer + '}')
 87 | 				
 88 | 				open_bracket = False
 89 | 				buffer = ''
 90 | 			else:
 91 | 				buffer += ch
 92 | 		
 93 | 		if buffer:		
 94 | 			if open_bracket:
 95 | 				current_stack.append('{')
 96 | 			current_stack.append(buffer)
 97 | 
 98 | 		source = '__s__ = ""' + print_stack(stack)
 99 | 		
100 | 		exec(source, self.vars)
101 | 		return self.vars['__s__']
102 | 


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | '''
 3 | Created on Aug 3, 2011
 4 | 
 5 | @author: alexpak <irokez@gmail.com>
 6 | '''
 7 | import sys
 8 | import re
 9 | 
10 | import rnc
11 | import pos
12 | 
13 | sentences = []
14 | #sentences.extend(rnc.Reader().read('tmp/fiction.xml'))
15 | #sentences.extend(rnc.Reader().read('tmp/science.xml'))
16 | #sentences.extend(rnc.Reader().read('tmp/laws.xml'))
17 | sentences.extend(rnc.Reader().read('tmp/media1.xml'))
18 | sentences.extend(rnc.Reader().read('tmp/media2.xml'))
19 | sentences.extend(rnc.Reader().read('tmp/media3.xml'))
20 | 
21 | re_pos = re.compile('([\w-]+)(?:[^\w-]|$)'.format('|'.join(pos.tagset)))
22 | 
23 | tagger = pos.Tagger()
24 | 
25 | sentence_labels = []
26 | sentence_words = []
27 | for sentence in sentences:
28 | 	labels = []
29 | 	words = []
30 | 	for word in sentence:
31 | 		gr = word[1]['gr']
32 | 		m = re_pos.match(gr)
33 | 		if not m:
34 | 			print(gr, file = sys.stderr)
35 | 			
36 | 		pos = m.group(1)
37 | 		if pos == 'ANUM':
38 | 			pos = 'A-NUM'
39 | 			
40 | 		label = tagger.get_label_id(pos)
41 | 		if not label:
42 | 			print(gr, file = sys.stderr)
43 | 			
44 | 		labels.append(label)
45 | 		
46 | 		body = word[0].replace('`', '')
47 | 		words.append(body)
48 | 		
49 | 	sentence_labels.append(labels)
50 | 	sentence_words.append(words)
51 | 			
52 | tagger.train(sentence_words, sentence_labels, True)
53 | tagger.train(sentence_words, sentence_labels)
54 | tagger.save('tmp/svm.model', 'tmp/ids.pickle')


--------------------------------------------------------------------------------
/test/pos-test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | '''
 3 | Created on Aug 3, 2011
 4 | 
 5 | @author: alexpak <irokez@gmail.com>
 6 | '''
 7 | import sys
 8 | import pos
 9 | 
10 | sentence = sys.argv[1].split(' ')
11 | 
12 | tagger = pos.Tagger()
13 | tagger.load('tmp/svm.model', 'tmp/ids.pickle')
14 | 
15 | rus = {
16 | 	'S': 'сущ.', 
17 | 	'A': 'прил.', 
18 | 	'NUM': 'числ.', 
19 | 	'A-NUM': 'числ.-прил.', 
20 | 	'V': 'глаг.', 
21 | 	'ADV': 'нареч.', 
22 | 	'PRAEDIC': 'предикатив', 
23 | 	'PARENTH': 'вводное', 
24 | 	'S-PRO': 'местоим. сущ.', 
25 | 	'A-PRO': 'местоим. прил.', 
26 | 	'ADV-PRO': 'местоим. нареч.', 
27 | 	'PRAEDIC-PRO': 'местоим. предик.', 
28 | 	'PR': 'предлог', 
29 | 	'CONJ': 'союз', 
30 | 	'PART': 'частица', 
31 | 	'INTJ': 'межд.', 
32 | 	'INIT': 'инит', 
33 | 	'NONLEX': 'нонлекс'
34 | }
35 | 
36 | tagged = []
37 | for word, label in tagger.label(sentence):
38 | 	tagged.append((word, rus[tagger.get_label(label)]))
39 | 	
40 | print(tagged)


--------------------------------------------------------------------------------
/test/test-alg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | '''
 3 | Created on Aug 19, 2011
 4 | 
 5 | @author: alexpak
 6 | '''
 7 | 
 8 | import sys
 9 | sys.path.append('src')
10 | 
11 | from alg import Vector as V
12 | 
13 | a = V([1, 2, 3, 4])
14 | b = a + 3
15 | c = b + 4
16 | d = c - a
17 | f = V([1, 1, 1, 1])
18 | f += a
19 | print(a)
20 | print(b)
21 | print(c)
22 | print(d)
23 | print(f)


--------------------------------------------------------------------------------
/test/test-cyk.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | '''
 4 | Created on Sep 6, 2011
 5 | 
 6 | @author: alexpak
 7 | '''
 8 | import sys
 9 | sys.path.append('src')
10 | 
11 | import parsers
12 | from parsers.cyk import CYK
13 | 
14 | rules = '''
15 |     EX  = B1 EX B2 | N
16 |     B1  = "("
17 |     B2  = ")"
18 |     EX  = EX OP2 EX | OP1 EX
19 |     N   = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "0"
20 |     OP2 = "+" | "-" | "/" | "*"
21 |     OP1 = "+" | "-"
22 | '''
23 | 
24 | #rules = '''
25 | #    EX  = EX OP EX | N
26 | #    N   = "1" | "2" | "3"
27 | #    OP = "+" | "-"
28 | #'''
29 | 
30 | grammar = parsers.read_rules(rules)
31 | 
32 | parser = CYK(grammar)
33 | #result = parser.parse('1 + 2 * ( 3 - 6 ) + 9 + 0 / 2')
34 | result = parser.parse('1 + 2 + 3')
35 | #parser.parse('1 + 2')
36 | tree = parser.build_tree()
37 | parser.print_tree(tree)
38 | 
39 | '''
40 | (EX (EX 1)
41 |     (OP +)
42 |     (N 2))
43 | '''


--------------------------------------------------------------------------------
/test/test-iris.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | '''
 3 | Created on Aug 17, 2011
 4 | 
 5 | @author: alexpak
 6 | '''
 7 | 
 8 | import sys
 9 | sys.path.append('src')
10 | 
11 | f = open('res/iris.data')
12 | data = []; x = []; y =[]
13 | for line in f:
14 | 	rows = line.strip().split(',')
15 | 	if len(rows) == 5:
16 | 		x.append([float(i) for i in rows[0:4]])
17 | 		y.append(rows[4])
18 | f.close()
19 | 
20 | def list_to_dict(l):
21 | #	return dict(zip(l, [1 for _ in range(0, len(l))]))
22 | 	return dict(zip(range(0, len(l)), l))
23 | 
24 | # divide into training and test sets
25 | test_x = []; test_y = []; train_x = []; train_y = [];
26 | for i in range(0, len(x)):
27 | 	if i % 3:
28 | 		train_x.append(list_to_dict(x[i]))
29 | 		train_y.append(y[i])
30 | 	else:
31 | 		test_x.append(list_to_dict(x[i]))
32 | 		test_y.append(y[i])
33 | 	
34 | from ml.nb import NaiveBayes
35 | 
36 | classifier = NaiveBayes()
37 | classifier.train(train_x, train_y)
38 | estim_y = classifier.predict(test_x)
39 | (acc, ) = classifier.evaluate(test_y, estim_y)
40 | 
41 | print('Naive Bayes accuracy = {0:.2f}%'.format(acc * 100))
42 | 
43 | from ml.svm import SVM
44 | 
45 | classifier = SVM()
46 | classifier.train(train_x, train_y)
47 | estim_y = classifier.predict(test_x)
48 | (acc, ) = classifier.evaluate(test_y, estim_y)
49 | 
50 | print('SVM accuracy = {0:.2f}%'.format(acc * 100))


--------------------------------------------------------------------------------
/test/test-names.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | '''
 3 | Created on Aug 17, 2011
 4 | 
 5 | @author: alexpak
 6 | '''
 7 | import sys
 8 | sys.path.append('src')
 9 | 
10 | f = open('res/names.txt')
11 | 
12 | x = []; y = []
13 | 
14 | for line in f:
15 | 	(name, sex) = line.strip().split(' ')
16 | 	x.append({name[-1:]: 1, name[-2:]: 1, name[-3:]: 1, name[-4:]: 1})
17 | 	y.append(sex)
18 | 	
19 | fold = -100
20 | 
21 | f.close()
22 | 
23 | import os
24 | 
25 | from ml.nb import NaiveBayes
26 | 
27 | filename = 'tmp/nb.pickle'
28 | exists = os.path.isfile(filename)
29 | if exists:
30 | 	classifier = NaiveBayes.load(filename)
31 | else:
32 | 	classifier = NaiveBayes()
33 | 	
34 | classifier.train(x[0:fold], y[0:fold])
35 | estim_y = classifier.predict(x[fold:])
36 | (acc, ) = classifier.evaluate(y[fold:], estim_y)
37 | 
38 | if not exists:
39 | 	classifier.save(filename)
40 | 
41 | print('Naive Bayes accuracy = {0:.2f}%'.format(acc * 100))
42 | 
43 | from ml.svm import SVM
44 | 
45 | filename = 'tmp/svm.pickle'
46 | exists = os.path.isfile(filename)
47 | if exists:
48 | 	classifier = SVM.load(filename)
49 | else:
50 | 	classifier = SVM()
51 | 	
52 | classifier.train(x[0:fold], y[0:fold])
53 | estim_y = classifier.predict(x[fold:])
54 | (acc, ) = classifier.evaluate(y[fold:], estim_y)
55 | 
56 | if not exists:
57 | 	classifier.save(filename)
58 | 	
59 | print('SVM accuracy = {0:.2f}%'.format(acc * 100))


--------------------------------------------------------------------------------
/test/test-nn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | '''
 3 | Created on Aug 18, 2011
 4 | 
 5 | @author: alexpak
 6 | '''
 7 | import sys
 8 | sys.path.append('src')
 9 | 
10 | def list_to_dict(l):
11 | 	return dict(zip(range(0, len(l)), l))
12 | 
13 | from ml.nn import Perceptron
14 | 
15 | classifier = Perceptron(2)
16 | x = [
17 | 	list_to_dict([0, 0]),
18 | 	list_to_dict([0, 1]),
19 | 	list_to_dict([1, 0]),
20 | 	list_to_dict([1, 1])
21 | ]
22 | y = [0, 1, 1, 0]
23 | #y = [1]
24 | 
25 | classifier.train(x, y)
26 | y = classifier.predict(x)
27 | print(y)


--------------------------------------------------------------------------------
/test/test-polarity.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | '''
 3 | Created on Aug 17, 2011
 4 | 
 5 | @author: alexpak
 6 | '''
 7 | import sys
 8 | sys.path.append('src')
 9 | 
10 | from collections import defaultdict
11 | 
12 | f_pos = open('res/rt-polaritydata/rt-polaritydata/rt-polarity.pos', 'rb')
13 | f_neg = open('res/rt-polaritydata/rt-polaritydata/rt-polarity.neg', 'rb')
14 | 
15 | def count_words(line):
16 | 	count = defaultdict(bool)
17 | 	for word in line.strip().split(' '):
18 | 		if len(word) > 1:
19 | 			count[word] += 1
20 | 
21 | 	return count
22 | 
23 | x = []; y = []
24 | 
25 | i = 0
26 | eof = False
27 | while not eof:
28 | 	line = f_pos.readline()
29 | 	eof = not len(line)
30 | 	x.append(count_words(line.decode('utf-8', 'ignore')))
31 | 	y.append(+1)
32 | 	x.append(count_words(f_neg.readline().decode('utf-8', 'ignore')))
33 | 	y.append(-1)
34 | 	i += 1
35 | 	
36 | fold = int(i * 0.9)
37 | 
38 | f_pos.close()
39 | f_neg.close()
40 | 
41 | from ml.nb import NaiveBayes
42 | 
43 | classifier = NaiveBayes()
44 | classifier.train(x[0:fold], y[0:fold])
45 | estim_y = classifier.predict(x[fold:])
46 | (acc, ) = classifier.evaluate(y[fold:], estim_y)
47 | 
48 | print('Naive Bayes accuracy = {0:.2f}%'.format(acc * 100))
49 | 
50 | from ml.svm import SVM
51 | 
52 | classifier = SVM()
53 | classifier.train(x[0:fold], y[0:fold])
54 | estim_y = classifier.predict(x[fold:])
55 | (acc, ) = classifier.evaluate(y[fold:], estim_y)
56 | 
57 | print('SVM accuracy = {0:.2f}%'.format(acc * 100))


--------------------------------------------------------------------------------
/tmp/ids.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irokez/Pyrus/9a5c64991592ca13e0a5726f394fbe2a399501c3/tmp/ids.pickle


--------------------------------------------------------------------------------
/web/.server.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irokez/Pyrus/9a5c64991592ca13e0a5726f394fbe2a399501c3/web/.server.py.swp


--------------------------------------------------------------------------------
/web/html/.tagging.html.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irokez/Pyrus/9a5c64991592ca13e0a5726f394fbe2a399501c3/web/html/.tagging.html.swp


--------------------------------------------------------------------------------
/web/html/tagging.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |     <head>
 4 |         <meta charset="utf-8" />
 5 |         <title>Parsing demo - Pyrus</title>
 6 |         <link rel="stylesheet" href="/css/main.css" />
 7 |         <link rel="shortcut icon" href="/favicon.ico" />
 8 |         <!--[if lt IE 9]><script src="/js/html5.js"></script><![endif]-->
 9 |         <script type="text/javascript" src="/js/raphael-min.js"></script>
10 |         <script type="text/javascript" src="/js/dracula_graffle.js"></script>
11 |         <script type="text/javascript" src="/js/jquery-1.4.2.min.js"></script>
12 |         <script type="text/javascript" src="/js/dracula_graph.js"></script>
13 |         <script type="text/javascript" src="/js/d3.v2.min.js"></script>
14 |         <script type="text/javascript" src="/js/d3-tree-test.js"></script>
15 |     </head>
16 |     <body>
17 |         <div id="wrapper">
18 |             <header>
19 |                 <hgroup>
20 |                     <h1>Pyrus</h1>
21 |                 </hgroup>
22 |             </header>
23 |             <article>
24 |                 <h1>Parsing demo</h1>
25 |                 {if len(text) and not len(error)}
26 |                 <div id="result">
27 |                     <p>{$text}</p>
28 |                     <table>
29 | 			<tbody>
30 |                         {for word, pos in tagged}
31 | 				<tr>
32 | 					<th>{$word}</th>
33 | 		                        <td>{$pos}</td>
34 | 				</tr>
35 |                         {end}
36 | 			</tbody>
37 |                     </table>
38 |                     <div id="graph"></div>
39 | 
40 | <script>
41 | Data = {children: {}, nodes: {}};
42 | 
43 | function get_children(node)
44 | {
45 | 	var children = [];
46 | 	if (node.id in Data.children) {
47 | 		var ids = Data.children[node.id];
48 | 		for (var i = 0; i < ids.length; ++i) {
49 | 			children.push(Data.nodes[ids[i]]);
50 | 		}
51 | 	}
52 | 	return children;
53 | }
54 | 
55 | function add_child(id, parent, label)
56 | {
57 | 	if (!(parent in Data.children)) {
58 | 		Data.children[parent] = [];
59 | 	}
60 | 	Data.children[parent].push(id);
61 | 	Data.nodes[id].name += " (" + label + ")";
62 | }
63 | 
64 | {for id, name, color in nodes}
65 | Data.nodes[{$id}] = {id: {$id}, name: "{$name}"};
66 | {end}
67 | {for word1, word2, label in edges}
68 | add_child({$word1}, {$word2}, "{$label}");
69 | {end}
70 | 
71 |     $(function(){
72 |         buildTree("#graph");
73 |     });
74 | </script>
75 |                     <p>Parsed {$words} words in {$time_total} sec, {$words_per_sec} words per sec</p>
76 |                 </div>
77 |                 {else}
78 |                 <div id="description">
79 |                     <p>Enter the text and press "Tag"</p>
80 | 			{if len(error)}<p><b>{$error}</b></p>{end}
81 |                     <p><b>For example:</b> <a href="?text=Съешьте еще этих мягких французских булок, да выпейте же чаю">Съешьте еще этих мягких французских булок, да выпейте же чаю</a></p>
82 |                 </div>
83 |                 {end}
84 |                 <form action="/">
85 |                     <fieldset>
86 |                         <div id="text"><textarea name="text"></textarea></div>
87 |                         <div id="submit"><button type="submit">Tag</button></div>
88 |                     </fieldset>
89 |                 </form>
90 |             </article>
91 |             <footer>
92 |                 <p>&copy; 2011&ndash;2012 Pyrus</p>
93 |             </footer>
94 |         </div>
95 |     </body>
96 | </html>
97 | 


--------------------------------------------------------------------------------
/web/public/.htaccess:
--------------------------------------------------------------------------------
 1 | Options -Indexes
 2 | 
 3 | <IfModule mod_rewrite.c>
 4 |     Options +FollowSymLinks
 5 |     RewriteEngine On
 6 |     RewriteBase /
 7 | 
 8 |     RewriteCond %{REQUEST_FILENAME} !-f
 9 | #    RewriteCond %{REQUEST_FILENAME} !-d
10 |     RewriteRule ^(.*)$ http://127.0.0.1:8080/$1 [P]
11 | </IfModule>
12 | 
13 | 
14 | <FilesMatch "\.(ini|sql)$">
15 |     Order allow,deny
16 |     Deny from all
17 | </FilesMatch>
18 | 
19 | AddDefaultCharset utf-8
20 | 


--------------------------------------------------------------------------------
/web/public/css/html5.js:
--------------------------------------------------------------------------------
1 | // html5shiv @rem remysharp.com/html5-enabling-script
2 | // iepp v1.6.2 @jon_neal iecss.com/print-protector
3 | // Dual licensed under the MIT or GPL Version 2 licenses
4 | /*@cc_on(function(a,b){function r(a){var b=-1;while(++b<f)a.createElement(e[b])}if(!(!window.attachEvent||!b.createStyleSheet||!function(){var a=document.createElement("div");a.innerHTML="<elem></elem>";return a.childNodes.length!==1}())){a.iepp=a.iepp||{};var c=a.iepp,d=c.html5elements||"abbr|article|aside|audio|canvas|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",e=d.split("|"),f=e.length,g=new RegExp("(^|\\s)("+d+")","gi"),h=new RegExp("<(/*)("+d+")","gi"),i=/^\s*[\{\}]\s*$/,j=new RegExp("(^|[^\\n]*?\\s)("+d+")([^\\n]*)({[\\n\\w\\W]*?})","gi"),k=b.createDocumentFragment(),l=b.documentElement,m=l.firstChild,n=b.createElement("body"),o=b.createElement("style"),p=/print|all/,q;c.getCSS=function(a,b){if(a+""===undefined)return"";var d=-1,e=a.length,f,g=[];while(++d<e){f=a[d];if(f.disabled)continue;b=f.media||b,p.test(b)&&g.push(c.getCSS(f.imports,b),f.cssText),b="all"}return g.join("")},c.parseCSS=function(a){var b=[],c;while((c=j.exec(a))!=null)b.push(((i.exec(c[1])?"\n":c[1])+c[2]+c[3]).replace(g,"$1.iepp_$2")+c[4]);return b.join("\n")},c.writeHTML=function(){var a=-1;q=q||b.body;while(++a<f){var c=b.getElementsByTagName(e[a]),d=c.length,g=-1;while(++g<d)c[g].className.indexOf("iepp_")<0&&(c[g].className+=" iepp_"+e[a])}k.appendChild(q),l.appendChild(n),n.className=q.className,n.id=q.id,n.innerHTML=q.innerHTML.replace(h,"<$1font")},c._beforePrint=function(){o.styleSheet.cssText=c.parseCSS(c.getCSS(b.styleSheets,"all")),c.writeHTML()},c.restoreHTML=function(){n.innerHTML="",l.removeChild(n),l.appendChild(q)},c._afterPrint=function(){c.restoreHTML(),o.styleSheet.cssText=""},r(b),r(k);if(c.disablePP)return;m.insertBefore(o,m.firstChild),o.media="print",o.className="iepp-printshim",a.attachEvent("onbeforeprint",c._beforePrint),a.attachEvent("onafterprint",c._afterPrint)}})(this,document);@*/
5 | 


--------------------------------------------------------------------------------
/web/public/css/main.css:
--------------------------------------------------------------------------------
  1 | @import "reset5.css";
  2 | 
  3 | body {
  4 |     font-family: sans-serif;
  5 | }
  6 | 
  7 | #wrapper {
  8 |     padding: 20px;
  9 | }
 10 | 
 11 | header {
 12 |     padding: 20px 0;
 13 | } 
 14 | 
 15 |     header h1 {
 16 |         font-size: 32px;
 17 |         font-weight: normal;
 18 |     }
 19 |     
 20 | article {
 21 |     
 22 | }
 23 | 
 24 |     article h1 {
 25 |         font-size: 18px;
 26 |         font-weight: normal;
 27 |     }
 28 |     
 29 |     #result {
 30 |         padding: 20px;
 31 |         background: #eee;
 32 |         margin-top: 16px;
 33 |         line-height: 24px;
 34 |     }
 35 | 
 36 | 	#result table {
 37 | 		border-collapse: collapse;
 38 | 		margin: 16px 0;
 39 | 	}
 40 | 
 41 | 		#result th {
 42 | 			padding: 0px 20px 0 0;
 43 | 			text-align: left;
 44 | 			font-size: 14px;
 45 | 			font-weight: normal;
 46 | 		}
 47 | 		#result td {
 48 | 			padding: 0px 0px 0 0;
 49 | 			font-style: italic;
 50 | 			font-size: 14px;
 51 | 			color: #333;
 52 | 		}
 53 |         g.node {
 54 |             font-family: Verdana, Helvetica;
 55 |             font-size: 12px;
 56 |             font-weight: normal;
 57 |         }
 58 |         circle.node-dot {
 59 |             fill: white;
 60 |             stroke: steelblue;
 61 |             stroke-width: 1px;
 62 |         }
 63 | 
 64 |         path.link {
 65 |             fill: none;
 66 |             stroke: gray;
 67 |         }
 68 |   
 69 |     #description {
 70 |         padding: 20px;
 71 |         background: #eee;
 72 |         margin-top: 16px;
 73 |         line-height: 24px;
 74 |     }
 75 |     
 76 |     form {
 77 |         padding: 20px 0;
 78 |     }
 79 |     
 80 |         div#text {
 81 |             padding-right: 10px;
 82 |             margin-bottom: 16px;
 83 |         }
 84 |         
 85 |             textarea {
 86 |                 font-family: serif;
 87 |                 font-size: 24px;
 88 |                 padding: 5px;
 89 |                 width: 100%;
 90 |                 height: 300px;
 91 |                 border: 1px inset #999;
 92 |             }
 93 |     
 94 |         div#submit {
 95 |             text-align: center;
 96 |         }
 97 |     
 98 |             button {
 99 |                 font-size: 24px;
100 |                 padding: 10px;
101 |                 text-align: center;
102 |                 width: 300px;
103 |             }
104 |     
105 | footer {
106 |     padding: 20px 0;
107 | }
108 | 


--------------------------------------------------------------------------------
/web/public/css/reset5.css:
--------------------------------------------------------------------------------
  1 | /* 
  2 | html5doctor.com Reset Stylesheet
  3 | v1.6.1
  4 | Last Updated: 2010-09-17
  5 | Author: Richard Clark - http://richclarkdesign.com 
  6 | Twitter: @rich_clark
  7 | */
  8 | 
  9 | html, body, div, span, object, iframe,
 10 | h1, h2, h3, h4, h5, h6, p, blockquote, pre,
 11 | abbr, address, cite, code,
 12 | del, dfn, em, img, ins, kbd, q, samp,
 13 | small, strong, sub, sup, var,
 14 | b, i,
 15 | dl, dt, dd, ol, ul, li,
 16 | fieldset, form, label, legend,
 17 | table, caption, tbody, tfoot, thead, tr, th, td,
 18 | article, aside, canvas, details, figcaption, figure, 
 19 | footer, header, hgroup, menu, nav, section, summary,
 20 | time, mark, audio, video {
 21 |     margin:0;
 22 |     padding:0;
 23 |     border:0;
 24 |     outline:0;
 25 |     font-size:100%;
 26 |     vertical-align:baseline;
 27 |     background:transparent;
 28 | }
 29 | 
 30 | body {
 31 |     line-height:1;
 32 | }
 33 | 
 34 | article,aside,details,figcaption,figure,
 35 | footer,header,hgroup,menu,nav,section { 
 36 |     display:block;
 37 | }
 38 | 
 39 | nav ul {
 40 |     list-style:none;
 41 | }
 42 | 
 43 | blockquote, q {
 44 |     quotes:none;
 45 | }
 46 | 
 47 | blockquote:before, blockquote:after,
 48 | q:before, q:after {
 49 |     content:'';
 50 |     content:none;
 51 | }
 52 | 
 53 | a {
 54 |     margin:0;
 55 |     padding:0;
 56 |     font-size:100%;
 57 |     vertical-align:baseline;
 58 |     background:transparent;
 59 | }
 60 | 
 61 | /* change colours to suit your needs */
 62 | ins {
 63 |     background-color:#ff9;
 64 |     color:#000;
 65 |     text-decoration:none;
 66 | }
 67 | 
 68 | /* change colours to suit your needs */
 69 | mark {
 70 |     background-color:#ff9;
 71 |     color:#000; 
 72 |     font-style:italic;
 73 |     font-weight:bold;
 74 | }
 75 | 
 76 | del {
 77 |     text-decoration: line-through;
 78 | }
 79 | 
 80 | abbr[title], dfn[title] {
 81 |     border-bottom:1px dotted;
 82 |     cursor:help;
 83 | }
 84 | 
 85 | table {
 86 |     border-collapse:collapse;
 87 |     border-spacing:0;
 88 | }
 89 | 
 90 | /* change border colour to suit your needs */
 91 | hr {
 92 |     display:block;
 93 |     height:1px;
 94 |     border:0;   
 95 |     border-top:1px solid #cccccc;
 96 |     margin:1em 0;
 97 |     padding:0;
 98 | }
 99 | 
100 | input, select {
101 |     vertical-align:middle;
102 | }


--------------------------------------------------------------------------------
/web/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irokez/Pyrus/9a5c64991592ca13e0a5726f394fbe2a399501c3/web/public/favicon.ico


--------------------------------------------------------------------------------
/web/public/js/.d3-tree-test.js.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irokez/Pyrus/9a5c64991592ca13e0a5726f394fbe2a399501c3/web/public/js/.d3-tree-test.js.swp


--------------------------------------------------------------------------------
/web/public/js/Curry-1.0.1.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Curry - Function currying
 3 |  * Copyright (c) 2008 Ariel Flesler - aflesler(at)gmail(dot)com | http://flesler.blogspot.com
 4 |  * Licensed under BSD (http://www.opensource.org/licenses/bsd-license.php)
 5 |  * Date: 10/4/2008
 6 |  *
 7 |  * @author Ariel Flesler
 8 |  * @version 1.0.1
 9 |  */
10 | 
11 | function curry( fn ){
12 | 	return function(){
13 | 		var args = curry.args(arguments),
14 | 			master = arguments.callee,
15 | 			self = this;
16 | 
17 | 		return args.length >= fn.length ? fn.apply(self,args) :	function(){
18 | 			return master.apply( self, args.concat(curry.args(arguments)) );
19 | 		};
20 | 	};
21 | };
22 | 
23 | curry.args = function( args ){
24 | 	return Array.prototype.slice.call(args);
25 | };
26 | 
27 | Function.prototype.curry = function(){
28 | 	return curry(this);
29 | };


--------------------------------------------------------------------------------
/web/public/js/d3-tree-test.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Created by JetBrains RubyMine.
  3 |  * User: pavanpodila
  4 |  * Date: 7/17/11
  5 |  * Time: 4:30 PM
  6 |  * To change this template use File | Settings | File Templates.
  7 |  */
  8 | 
  9 | var treeData = {
 10 |     name: "/",
 11 |     contents: [
 12 |         {
 13 |             name: "Applications",
 14 |             contents: [
 15 |                 { name: "Mail.app" },
 16 |                 { name: "iPhoto.app" },
 17 |                 { name: "Keynote.app" },
 18 |                 { name: "iTunes.app" },
 19 |                 { name: "XCode.app" },
 20 |                 { name: "Numbers.app" },
 21 |                 { name: "Pages.app" }
 22 |             ]
 23 |         },
 24 |         {
 25 |             name: "System",
 26 |             contents: []
 27 |         },
 28 |         {
 29 |             name: "Library",
 30 |             contents: [
 31 |                 {
 32 |                     name: "Application Support",
 33 |                     contents: [
 34 |                         { name: "Adobe" },
 35 |                         { name: "Apple" },
 36 |                         { name: "Google" },
 37 |                         { name: "Microsoft" }
 38 |                     ]
 39 |                 },
 40 |                 {
 41 |                     name: "Languages",
 42 |                     contents: [
 43 |                         { name: "Ruby" },
 44 |                         { name: "Python" },
 45 |                         { name: "Javascript" },
 46 |                         { name: "C#" }
 47 |                     ]
 48 |                 },
 49 |                 {
 50 |                     name: "Developer",
 51 |                     contents: [
 52 |                         { name: "4.2" },
 53 |                         { name: "4.3" },
 54 |                         { name: "5.0" },
 55 |                         { name: "Documentation" }
 56 |                     ]
 57 |                 }
 58 |             ]
 59 |         },
 60 |         {
 61 |             name: "opt",
 62 |             contents: []
 63 |         },
 64 |         {
 65 |             name: "Users",
 66 |             contents: [
 67 |                 { name: "pavanpodila" },
 68 |                 { name: "admin" },
 69 |                 { name: "test-user" }
 70 |             ]
 71 |         }
 72 |     ]
 73 | };
 74 | 
 75 | function visit(parent, visitFn, childrenFn)
 76 | {
 77 |     if (!parent) return;
 78 | 
 79 |     visitFn(parent);
 80 | 
 81 |     var children = childrenFn(parent);
 82 |     if (children) {
 83 |         var count = children.length;
 84 |         for (var i = 0; i < count; i++) {
 85 |             visit(children[i], visitFn, childrenFn);
 86 |         }
 87 |     }
 88 | }
 89 | 
 90 | function buildTree(containerName, customOptions)
 91 | {
 92 |     // build the options object
 93 |     var options = $.extend({
 94 |         nodeRadius: 5, fontSize: 12
 95 |     }, customOptions);
 96 | 
 97 |     
 98 |     // Calculate total nodes, max label length
 99 |     var totalNodes = 0;
100 |     var maxLabelLength = 0;
101 |     visit(treeData, function(d)
102 |     {
103 |         totalNodes++;
104 |         maxLabelLength = Math.max(d.name.length, maxLabelLength);
105 |     }, function(d)
106 |     {
107 |         return d.contents && d.contents.length > 0 ? d.contents : null;
108 |     });
109 | 
110 |     // size of the diagram
111 |     var size = { width:$(containerName).outerWidth(), height: totalNodes * 15};
112 | 
113 |     var tree = d3.layout.tree()
114 |         .sort(null)
115 |         .size([size.height, size.width - maxLabelLength * options.fontSize])
116 |         .children(get_children);
117 | 
118 |     var nodes = tree.nodes({id: 0, name: '*'});
119 | 	console.log(nodes);
120 |     var links = tree.links(nodes);
121 | 	console.log(links);
122 | 
123 |     
124 |     /*
125 |         <svg>
126 |             <g class="container" />
127 |         </svg>
128 |      */
129 |     var layoutRoot = d3.select(containerName)
130 |         .append("svg:svg").attr("width", size.width).attr("height", size.height)
131 |         .append("svg:g")
132 |         .attr("class", "container")
133 |         .attr("transform", "translate(" + maxLabelLength + ",0)");
134 | 
135 | 
136 |     // Edges between nodes as a <path class="link" />
137 |     var link = d3.svg.diagonal()
138 |         .projection(function(d)
139 |         {
140 |             return [d.y, d.x];
141 |         });
142 | 
143 |     layoutRoot.selectAll("path.link")
144 |         .data(links)
145 |         .enter()
146 |         .append("svg:path")
147 |         .attr("class", "link")
148 |         .attr("d", link);
149 | 
150 | 
151 |     /*
152 |         Nodes as
153 |         <g class="node">
154 |             <circle class="node-dot" />
155 |             <text />
156 |         </g>
157 |      */
158 |     var nodeGroup = layoutRoot.selectAll("g.node")
159 |         .data(nodes)
160 |         .enter()
161 |         .append("svg:g")
162 |         .attr("class", "node")
163 |         .attr("transform", function(d)
164 |         {
165 |             return "translate(" + d.y + "," + d.x + ")";
166 |         });
167 | 
168 |     nodeGroup.append("svg:circle")
169 |         .attr("class", "node-dot")
170 |         .attr("r", options.nodeRadius);
171 | 
172 |     nodeGroup.append("svg:text")
173 |         .attr("text-anchor", function(d)
174 |         {
175 |             return d.children ? "end" : "start";
176 |         })
177 |         .attr("dx", function(d)
178 |         {
179 |             var gap = 2 * options.nodeRadius;
180 |             return d.children ? -gap : gap;
181 |         })
182 |         .attr("dy", 3)
183 |         .text(function(d)
184 |         {
185 |             return d.name;
186 |         });
187 | 
188 | }
189 | 


--------------------------------------------------------------------------------
/web/public/js/dracula_algorithms.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Various algorithms and data structures, licensed under the MIT-license.
  3 |  * (c) 2010 by Johann Philipp Strathausen <strathausen@gmail.com>
  4 |  * http://strathausen.eu
  5 |  *
  6 |  */
  7 | 
  8 | 
  9 | 
 10 | /*
 11 |         Bellman-Ford
 12 |     
 13 |     Path-finding algorithm, finds the shortest paths from one node to all nodes.
 14 |     
 15 |     
 16 |         Complexity
 17 |         
 18 |     O( |E| · |V| ), where E = edges and V = vertices (nodes)
 19 |     
 20 |     
 21 |         Constraints
 22 |     
 23 |     Can run on graphs with negative edge weights as long as they do not have
 24 |     any negative weight cycles.
 25 |     
 26 |  */
 27 | function bellman_ford(g, source) {
 28 | 
 29 |     /* STEP 1: initialisation */
 30 |     for(var n in g.nodes)
 31 |         g.nodes[n].distance = Infinity;
 32 |         /* predecessors are implicitly null */
 33 |     source.distance = 0;
 34 |     
 35 |     step("Initially, all distances are infinite and all predecessors are null.");
 36 |     
 37 |     /* STEP 2: relax each edge (this is at the heart of Bellman-Ford) */
 38 |     /* repeat this for the number of nodes minus one */
 39 |     for(var i = 1; i < g.nodes.length; i++)
 40 |         /* for each edge */
 41 |         for(var e in g.edges) {
 42 |             var edge = g.edges[e];
 43 |             if(edge.source.distance + edge.weight < edge.target.distance) {
 44 |                 step("Relax edge between " + edge.source.id + " and " + edge.target.id + ".");
 45 |                 edge.target.distance = edge.source.distance + edge.weight;
 46 |                 edge.target.predecessor = edge.source;
 47 |             }
 48 | 	    //Added by Jake Stothard (Needs to be tested)
 49 | 	    if(!edge.style.directed) {
 50 | 		if(edge.target.distance + edge.weight < edge.source.distance) {
 51 |                     g.snapShot("Relax edge between "+edge.target.id+" and "+edge.source.id+".");
 52 |                     edge.source.distance = edge.target.distance + edge.weight;
 53 |                     edge.source.predecessor = edge.target;
 54 | 		}
 55 | 	    }
 56 |         }
 57 |     step("Ready.");
 58 |     
 59 |     /* STEP 3: TODO Check for negative cycles */
 60 |     /* For now we assume here that the graph does not contain any negative
 61 |        weights cycles. (this is left as an excercise to the reader[tm]) */
 62 | }
 63 | 
 64 | 
 65 | 
 66 | /*
 67 |    Path-finding algorithm Dijkstra
 68 |    
 69 |    - worst-case running time is O((|E| + |V|) · log |V| ) thus better than
 70 |      Bellman-Ford for sparse graphs (with less edges), but cannot handle
 71 |      negative edge weights
 72 |  */
 73 | function dijkstra(g, source) {
 74 | 
 75 |     /* initially, all distances are infinite and all predecessors are null */
 76 |     for(var n in g.nodes)
 77 |         g.nodes[n].distance = Infinity;
 78 |         /* predecessors are implicitly null */
 79 | 
 80 |     g.snapShot("Initially, all distances are infinite and all predecessors are null.");
 81 | 
 82 |     source.distance = 0;
 83 |     /* set of unoptimized nodes, sorted by their distance (but a Fibonacci heap
 84 |        would be better) */
 85 |     var q = new BinaryMinHeap(g.nodes, "distance");
 86 | 
 87 |     /* pointer to the node in focus */
 88 |     var node;
 89 | 
 90 |     /* get the node with the smallest distance
 91 |        as long as we have unoptimized nodes. q.min() can have O(log n). */
 92 |     while(q.min() != undefined) {
 93 |         /* remove the latest */
 94 |         node = q.extractMin();
 95 |         node.optimized = true;
 96 | 
 97 |         /* no nodes accessible from this one, should not happen */
 98 |         if(node.distance == Infinity)
 99 |             throw "Orphaned node!";
100 | 
101 |         /* for each neighbour of node */
102 |         for(e in node.edges) {
103 | 	    var other = (node == node.edges[e].target) ? node.edges[e].source : node.edges[e].target;
104 | 		
105 |             if(other.optimized)
106 |                 continue;
107 | 
108 |             /* look for an alternative route */
109 |             var alt = node.distance + node.edges[e].weight;
110 |             
111 |             /* update distance and route if a better one has been found */
112 |             if (alt < other.distance) {
113 |             
114 |                 /* update distance of neighbour */
115 |                 other.distance = alt;
116 | 
117 |                 /* update priority queue */
118 |                 q.heapify();
119 | 
120 |                 /* update path */
121 |                 other.predecessor = node;
122 |                 g.snapShot("Enhancing node.")
123 |             }
124 |         }
125 |     }
126 | }
127 | 
128 | 
129 | /* All-Pairs-Shortest-Paths */
130 | /* Runs at worst in O(|V|³) and at best in Omega(|V|³) :-)
131 |    complexity Sigma(|V|²) */
132 | /* This implementation is not yet ready for general use, but works with the
133 |    Dracula graph library. */
134 | function floyd_warshall(g, source) {
135 | 
136 |     /* Step 1: initialising empty path matrix (second dimension is implicit) */
137 |     var path = [];
138 |     var next = [];
139 |     var n = g.nodes.length;
140 | 
141 |     /* construct path matrix, initialize with Infinity */
142 |     for(j in g.nodes) {
143 |         path[j] = [];
144 |         next[j] = [];
145 |         for(i in g.nodes)
146 |             path[j][i] = j == i ? 0 : Infinity;
147 |     }   
148 |     
149 |     /* initialize path with edge weights */
150 |     for(e in g.edges)
151 |         path[g.edges[e].source.id][g.edges[e].target.id] = g.edges[e].weight;
152 |     
153 |     /* Note: Usually, the initialisation is done by getting the edge weights
154 |        from a node matrix representation of the graph, not by iterating through
155 |        a list of edges as done here. */
156 |     
157 |     /* Step 2: find best distances (the heart of Floyd-Warshall) */
158 |     for(k in g.nodes){
159 |         for(i in g.nodes) {
160 |             for(j in g.nodes)
161 |                 if(path[i][j] > path[i][k] + path[k][j]) {
162 |                     path[i][j] = path[i][k] + path[k][j];
163 |                     /* Step 2.b: remember the path */
164 |                     next[i][j] = k;
165 |                 }
166 |         }
167 |     }
168 | 
169 |     /* Step 3: Path reconstruction, get shortest path */
170 |     function getPath(i, j) {
171 |         if(path[i][j] == Infinity)
172 |             throw "There is no path.";
173 |         var intermediate = next[i][j];
174 |         if(intermediate == undefined)
175 |             return null;
176 |         else
177 |             return getPath(i, intermediate)
178 |                 .concat([intermediate])
179 |                 .concat(getPath(intermediate, j));
180 |     }
181 | 
182 |     /* TODO use the knowledge, e.g. mark path in graph */
183 | }
184 | 
185 | /*
186 |         Ford-Fulkerson
187 |     
188 |     Max-Flow-Min-Cut Algorithm finding the maximum flow through a directed
189 |     graph from source to sink.
190 |     
191 |     
192 |         Complexity
193 | 
194 |     O(E * max(f)), max(f) being the maximum flow
195 |     
196 |     
197 |         Description
198 | 
199 |     As long as there is an open path through the residual graph, send the
200 |     minimum of the residual capacities on the path.
201 |     
202 |     
203 |         Constraints
204 |     
205 |     The algorithm works only if all weights are integers. Otherwise it is
206 |     possible that the Ford–Fulkerson algorithm will not converge to the maximum
207 |     value.
208 |     
209 |     
210 |         Input
211 |     
212 |     g - Graph object
213 |     s - Source ID
214 |     t - Target (sink) ID
215 |     
216 |     
217 |         Output
218 |     
219 |     Maximum flow from Source s to Target t
220 | 
221 |  */
222 | /*
223 |         Edmonds-Karp
224 |     
225 |     Max-Flow-Min-Cut Algorithm finding the maximum flow through a directed
226 |     graph from source to sink. An implementation of the Ford-Fulkerson
227 |     algorithm.
228 |     
229 |     
230 |         Complexity
231 |     
232 |     O(|V|*|E|²)
233 |     
234 |     
235 |         Input
236 |         
237 |     g - Graph object (with node and edge lists, capacity is a property of edge)
238 |     s - source ID
239 |     t - sink ID
240 |     
241 |  */
242 | function edmonds_karp(g, s, t) {
243 | 
244 | }
245 | 
246 | /*
247 |    A simple binary min-heap serving as a priority queue
248 |    - takes an array as the input, with elements having a key property
249 |    - elements will look like this:
250 |         {
251 |             key: "... key property ...", 
252 |             value: "... element content ..."
253 |         }
254 |     - provides insert(), min(), extractMin() and heapify()
255 |     - example usage (e.g. via the Firebug or Chromium console):
256 |         var x = {foo: 20, hui: "bla"};
257 |         var a = new BinaryMinHeap([x,{foo:3},{foo:10},{foo:20},{foo:30},{foo:6},{foo:1},{foo:3}],"foo");
258 |         console.log(a.extractMin());
259 |         console.log(a.extractMin());
260 |         x.foo = 0; // update key
261 |         a.heapify(); // call this always after having a key updated
262 |         console.log(a.extractMin());
263 |         console.log(a.extractMin());
264 |     - can also be used on a simple array, like [9,7,8,5]
265 |  */
266 | function BinaryMinHeap(array, key) {
267 |     
268 |     /* Binary tree stored in an array, no need for a complicated data structure */
269 |     var tree = [];
270 |     
271 |     var key = key || 'key';
272 |     
273 |     /* Calculate the index of the parent or a child */
274 |     var parent = function(index) { return Math.floor((index - 1)/2); };
275 |     var right = function(index) { return 2 * index + 2; };
276 |     var left = function(index) { return 2 * index + 1; };
277 | 
278 |     /* Helper function to swap elements with their parent 
279 |        as long as the parent is bigger */
280 |     function bubble_up(i) {
281 |         var p = parent(i);
282 |         while((p >= 0) && (tree[i][key] < tree[p][key])) {
283 |             /* swap with parent */
284 |             tree[i] = tree.splice(p, 1, tree[i])[0];
285 |             /* go up one level */
286 |             i = p;
287 |             p = parent(i);
288 |         }
289 |     }
290 | 
291 |     /* Helper function to swap elements with the smaller of their children
292 |        as long as there is one */
293 |     function bubble_down(i) {
294 |         var l = left(i);
295 |         var r = right(i);
296 |         
297 |         /* as long as there are smaller children */
298 |         while(tree[l] && (tree[i][key] > tree[l][key]) || tree[r] && (tree[i][key] > tree[r][key])) {
299 |             
300 |             /* find smaller child */
301 |             var child = tree[l] ? tree[r] ? tree[l][key] > tree[r][key] ? r : l : l : l;
302 |             
303 |             /* swap with smaller child with current element */
304 |             tree[i] = tree.splice(child, 1, tree[i])[0];
305 |             
306 |             /* go up one level */
307 |             i = child;
308 |             l = left(i);
309 |             r = right(i);
310 |         }
311 |     }
312 |     
313 |     /* Insert a new element with respect to the heap property
314 |        1. Insert the element at the end
315 |        2. Bubble it up until it is smaller than its parent */
316 |     this.insert = function(element) {
317 |     
318 |         /* make sure there's a key property */
319 |         (element[key] == undefined) && (element = {key:element});
320 |         
321 |         /* insert element at the end */
322 |         tree.push(element);
323 | 
324 |         /* bubble up the element */
325 |         bubble_up(tree.length - 1);
326 |     }
327 |     
328 |     /* Only show us the minimum */
329 |     this.min = function() {
330 |         return tree.length == 1 ? undefined : tree[0];
331 |     }
332 |     
333 |     /* Return and remove the minimum
334 |        1. Take the root as the minimum that we are looking for
335 |        2. Move the last element to the root (thereby deleting the root) 
336 |        3. Compare the new root with both of its children, swap it with the
337 |           smaller child and then check again from there (bubble down)
338 |     */
339 |     this.extractMin = function() {
340 |         var result = this.min();
341 |         
342 |         /* move the last element to the root or empty the tree completely */
343 |         /* bubble down the new root if necessary */
344 |         (tree.length == 1) && (tree = []) || (tree[0] = tree.pop()) && bubble_down(0);
345 |         
346 |         return result;        
347 |     }
348 |     
349 |     /* currently unused, TODO implement */
350 |     this.changeKey = function(index, key) {
351 |         throw "function not implemented";
352 |     }
353 | 
354 |     this.heapify = function() {
355 |         for(var start = Math.floor((tree.length - 2) / 2); start >= 0; start--) {
356 |             bubble_down(start);
357 |         }
358 |     }
359 |     
360 |     /* insert the input elements one by one only when we don't have a key property (TODO can be done more elegant) */
361 |     for(i in (array || []))
362 |         this.insert(array[i]);
363 | }
364 | 
365 | 
366 | 
367 | /*
368 |     Quick Sort:
369 |         1. Select some random value from the array, the median.
370 |         2. Divide the array in three smaller arrays according to the elements
371 |            being less, equal or greater than the median.
372 |         3. Recursively sort the array containg the elements less than the
373 |            median and the one containing elements greater than the median.
374 |         4. Concatenate the three arrays (less, equal and greater).
375 |         5. One or no element is always sorted.
376 |     TODO: This could be implemented more efficiently by using only one array object and several pointers.
377 | */
378 | function quickSort(arr) {
379 |     /* recursion anchor: one element is always sorted */
380 |     if(arr.length <= 1) return arr;
381 |     /* randomly selecting some value */
382 |     var median = arr[Math.floor(Math.random() * arr.length)];
383 |     var arr1 = [], arr2 = [], arr3 = [];
384 |     for(var i in arr) {
385 |         arr[i] < median && arr1.push(arr[i]);
386 |         arr[i] == median && arr2.push(arr[i]);
387 |         arr[i] > median && arr3.push(arr[i]);
388 |     }
389 |     /* recursive sorting and assembling final result */
390 |     return quickSort(arr1).concat(arr2).concat(quickSort(arr3));
391 | }
392 | 
393 | /*
394 |     Selection Sort:
395 |         1. Select the minimum and remove it from the array
396 |         2. Sort the rest recursively
397 |         3. Return the minimum plus the sorted rest
398 |         4. An array with only one element is already sorted
399 | */
400 | function selectionSort(arr) {
401 |     /* recursion anchor: one element is always sorted */
402 |     if(arr.length == 1) return arr;
403 |     var minimum = Infinity;
404 |     var index;
405 |     for(var i in arr) {
406 |         if(arr[i] < minimum) {
407 |             minimum = arr[i];
408 |             index = i; /* remember the minimum index for later removal */
409 |         }
410 |     }
411 |     /* remove the minimum */
412 |     arr.splice(index, 1);
413 |     /* assemble result and sort recursively (could be easily done iteratively as well)*/
414 |     return [minimum].concat(selectionSort(arr));
415 | }
416 | 
417 | /*
418 |     Merge Sort:
419 |         1. Cut the array in half
420 |         2. Sort each of them recursively
421 |         3. Merge the two sorted arrays
422 |         4. An array with only one element is considered sorted
423 | 
424 | */
425 | function mergeSort(arr) {
426 |     /* merges two sorted arrays into one sorted array */
427 |     function merge(a, b) {
428 |         /* result set */
429 |         var c = [];
430 |         /* as long as there are elements in the arrays to be merged */
431 |         while(a.length > 0 || b.length > 0){
432 |             /* are there elements to be merged, if yes, compare them and merge */
433 |             var n = a.length > 0 && b.length > 0 ? a[0] < b[0] ? a.shift() : b.shift() : b.length > 0 ? b.shift() : a.length > 0 ? a.shift() : null;
434 |             /* always push the smaller one onto the result set */
435 |             n != null && c.push(n);
436 |         }
437 |         return c;
438 |     }
439 |     /* this mergeSort implementation cuts the array in half, wich should be fine with randomized arrays, but introduces the risk of a worst-case scenario */
440 |     median = Math.floor(arr.length / 2);
441 |     var part1 = arr.slice(0, median); /* for some reason it doesn't work if inserted directly in the return statement (tried so with firefox) */
442 |     var part2 = arr.slice(median - arr.length);
443 |     return arr.length <= 1 ? arr : merge(
444 |         mergeSort(part1), /* first half */
445 |         mergeSort(part2) /* second half */
446 |     );
447 | }
448 | 
449 | /* Balanced Red-Black-Tree */
450 | function RedBlackTree(arr) {
451 |     
452 | }
453 | 
454 | function BTree(arr) {
455 |     
456 | }
457 | 
458 | function NaryTree(n, arr) {
459 |     
460 | }
461 | 
462 | /**
463 |  * Knuth-Morris-Pratt string matching algorithm - finds a pattern in a text.
464 |  * FIXME: Doesn't work correctly yet.
465 |  */
466 | function kmp(p, t) {
467 | 
468 |     /**
469 |      * PREFIX, OVERLAP or FALIURE function for KMP. Computes how many iterations
470 |      * the algorithm can skip after a mismatch.
471 |      *
472 |      * @input p - pattern (string)
473 |      * @result array of skippable iterations
474 |      */
475 |     function prefix(p) {
476 |         /* pi contains the computed skip marks */
477 |         var pi = [0], k = 0;
478 |         for(q = 1; q < p.length; q++) {
479 |             while(k > 0 && (p.charAt(k) != p.charAt(q)))
480 |                 k = pi[k-1];
481 |             
482 |             (p.charAt(k) == p.charAt(q)) && k++;
483 |             
484 |             pi[q] = k;
485 |         }
486 |         return pi;
487 |     }
488 |     
489 |     /* The actual KMP algorithm starts here. */
490 |     
491 |     var pi = prefix(p), q = 0, result = [];
492 |     
493 |     for(var i = 0; i < t.length; i++) {
494 |         /* jump forward as long as the character doesn't match */
495 |         while((q > 0) && (p.charAt(q) != t.charAt(i)))
496 |             q = pi[q];
497 |         
498 |         (p.charAt(q) == t.charAt(i)) && q++;
499 |         
500 |         (q == p.length) && result.push(i - p.length) && (q = pi[q]);
501 |     }
502 |     
503 |     return result;
504 | }
505 | 
506 | /* step for algorithm visualisation */
507 | function step(comment, funct) {
508 |     //wait for input
509 |     //display comment (before or after waiting)
510 | //    next.wait();
511 |     /* execute callback function */
512 |     funct();
513 | }
514 | 
515 | /**
516 |  * Curry - Function currying
517 |  * Copyright (c) 2008 Ariel Flesler - aflesler(at)gmail(dot)com | http://flesler.blogspot.com
518 |  * Licensed under BSD (http://www.opensource.org/licenses/bsd-license.php)
519 |  * Date: 10/4/2008
520 |  *
521 |  * @author Ariel Flesler
522 |  * @version 1.0.1
523 |  */
524 | function curry( fn ){
525 | 	return function(){
526 | 		var args = curry.args(arguments),
527 | 			master = arguments.callee,
528 | 			self = this;
529 | 
530 | 		return args.length >= fn.length ? fn.apply(self,args) :	function(){
531 | 			return master.apply( self, args.concat(curry.args(arguments)) );
532 | 		};
533 | 	};
534 | };
535 | 
536 | curry.args = function( args ){
537 | 	return Array.prototype.slice.call(args);
538 | };
539 | 
540 | Function.prototype.curry = function(){
541 | 	return curry(this);
542 | };
543 | 
544 | /**
545 |  * Topological Sort
546 |  *
547 |  * Sort a directed graph based on incoming edges
548 |  *
549 |  * Coded by Jake Stothard
550 |  */
551 | function topological_sort(g) {
552 |     //Mark nodes as "deleted" instead of actually deleting them
553 |     //That way we don't have to copy g
554 | 
555 |     for(i in g.nodes)
556 | 	g.nodes[i].deleted = false;
557 |     
558 |     var ret = topological_sort_helper(g);
559 | 
560 |     //Cleanup: Remove the deleted property
561 |     for(i in g.nodes)
562 | 	delete g.nodes[i].deleted
563 | 
564 |     return ret;
565 | }
566 | function topological_sort_helper(g) {
567 |     //Find node with no incoming edges
568 |     var node;
569 |     for(i in g.nodes) {
570 | 	if(g.nodes[i].deleted)
571 | 	    continue; //Bad style, meh
572 | 	
573 | 	var incoming = false;
574 | 	for(j in g.nodes[i].edges) {
575 | 	    if(g.nodes[i].edges[j].target == g.nodes[i]
576 | 	      && g.nodes[i].edges[j].source.deleted == false) {
577 | 		incoming = true;
578 | 		break;
579 | 	    }
580 | 	}
581 | 	if(!incoming) {
582 | 	    node = g.nodes[i];
583 | 	    break;
584 | 	}
585 |     }
586 | 
587 |     // Either unsortable or done. Either way, GTFO
588 |     if(node == undefined)
589 | 	return [];
590 | 
591 |     //"Delete" node from g
592 |     node.deleted = true;
593 |     
594 |     var tail = topological_sort_helper(g);
595 | 
596 |     tail.unshift(node);
597 | 
598 |     return tail;
599 | }
600 | 


--------------------------------------------------------------------------------
/web/public/js/dracula_graffle.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Originally grabbed from the official RaphaelJS Documentation
  3 |  * http://raphaeljs.com/graffle.html
  4 |  * Adopted (arrows) and commented by Philipp Strathausen http://blog.ameisenbar.de
  5 |  * Licenced under the MIT licence.
  6 |  */
  7 | 
  8 | /**
  9 |  * Usage:
 10 |  * connect two shapes
 11 |  * parameters: 
 12 |  *      source shape [or connection for redrawing], 
 13 |  *      target shape,
 14 |  *      style with { fg : linecolor, bg : background color, directed: boolean }
 15 |  * returns:
 16 |  *      connection { draw = function() }
 17 |  */
 18 | Raphael.fn.connection = function (obj1, obj2, style) {
 19 |     var selfRef = this;
 20 |     /* create and return new connection */
 21 |     var edge = {/*
 22 |         from : obj1,
 23 |         to : obj2,
 24 |         style : style,*/
 25 |         draw : function() {
 26 |             /* get bounding boxes of target and source */
 27 |             var bb1 = obj1.getBBox();
 28 |             var bb2 = obj2.getBBox();
 29 |             var off1 = 0;
 30 |             var off2 = 0;
 31 |             /* coordinates for potential connection coordinates from/to the objects */
 32 |             var p = [
 33 |                 {x: bb1.x + bb1.width / 2, y: bb1.y - off1},              /* NORTH 1 */
 34 |                 {x: bb1.x + bb1.width / 2, y: bb1.y + bb1.height + off1}, /* SOUTH 1 */
 35 |                 {x: bb1.x - off1, y: bb1.y + bb1.height / 2},             /* WEST  1 */
 36 |                 {x: bb1.x + bb1.width + off1, y: bb1.y + bb1.height / 2}, /* EAST  1 */
 37 |                 {x: bb2.x + bb2.width / 2, y: bb2.y - off2},              /* NORTH 2 */
 38 |                 {x: bb2.x + bb2.width / 2, y: bb2.y + bb2.height + off2}, /* SOUTH 2 */
 39 |                 {x: bb2.x - off2, y: bb2.y + bb2.height / 2},             /* WEST  2 */
 40 |                 {x: bb2.x + bb2.width + off2, y: bb2.y + bb2.height / 2}  /* EAST  2 */
 41 |             ];
 42 |             
 43 |             /* distances between objects and according coordinates connection */
 44 |             var d = {}, dis = [];
 45 | 
 46 |             /*
 47 |              * find out the best connection coordinates by trying all possible ways
 48 |              */
 49 |             /* loop the first object's connection coordinates */
 50 |             for (var i = 0; i < 4; i++) {
 51 |                 /* loop the seond object's connection coordinates */
 52 |                 for (var j = 4; j < 8; j++) {
 53 |                     var dx = Math.abs(p[i].x - p[j].x),
 54 |                         dy = Math.abs(p[i].y - p[j].y);
 55 |                     if ((i == j - 4) || (((i != 3 && j != 6) || p[i].x < p[j].x) && ((i != 2 && j != 7) || p[i].x > p[j].x) && ((i != 0 && j != 5) || p[i].y > p[j].y) && ((i != 1 && j != 4) || p[i].y < p[j].y))) {
 56 |                         dis.push(dx + dy);
 57 |                         d[dis[dis.length - 1].toFixed(3)] = [i, j];
 58 |                     }
 59 |                 }
 60 |             }
 61 |             var res = dis.length == 0 ? [0, 4] : d[Math.min.apply(Math, dis).toFixed(3)];
 62 |             /* bezier path */
 63 |             var x1 = p[res[0]].x,
 64 |                 y1 = p[res[0]].y,
 65 |                 x4 = p[res[1]].x,
 66 |                 y4 = p[res[1]].y,
 67 |                 dx = Math.max(Math.abs(x1 - x4) / 2, 10),
 68 |                 dy = Math.max(Math.abs(y1 - y4) / 2, 10),
 69 |                 x2 = [x1, x1, x1 - dx, x1 + dx][res[0]].toFixed(3),
 70 |                 y2 = [y1 - dy, y1 + dy, y1, y1][res[0]].toFixed(3),
 71 |                 x3 = [0, 0, 0, 0, x4, x4, x4 - dx, x4 + dx][res[1]].toFixed(3),
 72 |                 y3 = [0, 0, 0, 0, y1 + dy, y1 - dy, y4, y4][res[1]].toFixed(3);
 73 |             /* assemble path and arrow */
 74 |             var path = ["M", x1.toFixed(3), y1.toFixed(3), "C", x2, y2, x3, y3, x4.toFixed(3), y4.toFixed(3)].join(",");
 75 |             /* arrow */
 76 |             if(style && style.directed) {
 77 |                 /* magnitude, length of the last path vector */
 78 |                 var mag = Math.sqrt((y4 - y3) * (y4 - y3) + (x4 - x3) * (x4 - x3));
 79 |                 /* vector normalisation to specified length  */
 80 |                 var norm = function(x,l){return (-x*(l||5)/mag);};
 81 |                 /* calculate array coordinates (two lines orthogonal to the path vector) */
 82 |                 var arr = [
 83 |                     {x:(norm(x4-x3)+norm(y4-y3)+x4).toFixed(3), y:(norm(y4-y3)+norm(x4-x3)+y4).toFixed(3)},
 84 |                     {x:(norm(x4-x3)-norm(y4-y3)+x4).toFixed(3), y:(norm(y4-y3)-norm(x4-x3)+y4).toFixed(3)}
 85 |                 ];
 86 |                 path = path + ",M"+arr[0].x+","+arr[0].y+",L"+x4+","+y4+",L"+arr[1].x+","+arr[1].y; 
 87 |             }
 88 |             /* function to be used for moving existent path(s), e.g. animate() or attr() */
 89 |             var move = "attr";
 90 |             /* applying path(s) */
 91 |             edge.fg && edge.fg[move]({path:path}) 
 92 |                 || (edge.fg = selfRef.path(path).attr({stroke: style && style.stroke || "#000", fill: "none"}).toBack());
 93 |             edge.bg && edge.bg[move]({path:path})
 94 |                 || style && style.fill && (edge.bg = style.fill.split && selfRef.path(path).attr({stroke: style.fill.split("|")[0], fill: "none", "stroke-width": style.fill.split("|")[1] || 3}).toBack());
 95 |             /* setting label */
 96 |             style && style.label 
 97 |                 && (edge.label && edge.label.attr({x:(x1+x4)/2, y:(y1+y4)/2}) 
 98 |                     || (edge.label = selfRef.text((x1+x4)/2, (y1+y4)/2, style.label).attr({fill: "#000", "font-size": style["font-size"] || "12px"})));
 99 |             style && style.label && style["label-style"] && edge.label && edge.label.attr(style["label-style"]);
100 |             style && style.callback && style.callback(edge);
101 |         }
102 |     }
103 |     edge.draw();
104 |     return edge;
105 | };
106 | //Raphael.prototype.set.prototype.dodo=function(){console.log("works");};
107 | 


--------------------------------------------------------------------------------
/web/public/js/dracula_graph.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Dracula Graph Layout and Drawing Framework 0.0.3alpha
  3 |  *  (c) 2010 Philipp Strathausen <strathausen@gmail.com>, http://strathausen.eu
  4 |  *  Contributions by Jake Stothard <stothardj@gmail.com>.
  5 |  *
  6 |  *  based on the Graph JavaScript framework, version 0.0.1
  7 |  *  (c) 2006 Aslak Hellesoy <aslak.hellesoy@gmail.com>
  8 |  *  (c) 2006 Dave Hoover <dave.hoover@gmail.com>
  9 |  *
 10 |  *  Ported from Graph::Layouter::Spring in
 11 |  *    http://search.cpan.org/~pasky/Graph-Layderer-0.02/
 12 |  *  The algorithm is based on a spring-style layouter of a Java-based social
 13 |  *  network tracker PieSpy written by Paul Mutton <paul@jibble.org>.
 14 |  *
 15 |  *  This code is freely distributable under the MIT license. Commercial use is
 16 |  *  hereby granted without any cost or restriction.
 17 |  *
 18 |  *  Links:
 19 |  *
 20 |  *  Graph Dracula JavaScript Framework:
 21 |  *      http://graphdracula.net
 22 |  *
 23 |  /*--------------------------------------------------------------------------*/
 24 | 
 25 | /*
 26 |  * Edge Factory
 27 |  */
 28 | var AbstractEdge = function() {
 29 | }
 30 | AbstractEdge.prototype = {
 31 |     hide: function() {
 32 |         this.connection.fg.hide();
 33 |         this.connection.bg && this.bg.connection.hide();
 34 |     }
 35 | };
 36 | var EdgeFactory = function() {
 37 |     this.template = new AbstractEdge();
 38 |     this.template.style = new Object();
 39 |     this.template.style.directed = false;
 40 |     this.template.weight = 1;
 41 | };
 42 | EdgeFactory.prototype = {
 43 |     build: function(source, target) {
 44 |         var e = jQuery.extend(true, {}, this.template);
 45 |         e.source = source;
 46 |         e.target = target;
 47 |         return e;
 48 |     }
 49 | };
 50 | 
 51 | /*
 52 |  * Graph
 53 |  */
 54 | var Graph = function() {
 55 |     this.nodes = {};
 56 |     this.edges = [];
 57 |     this.snapshots = []; // previous graph states TODO to be implemented
 58 |     this.edgeFactory = new EdgeFactory();
 59 | };
 60 | Graph.prototype = {
 61 |     /* 
 62 |      * add a node
 63 |      * @id          the node's ID (string or number)
 64 |      * @content     (optional, dictionary) can contain any information that is
 65 |      *              being interpreted by the layout algorithm or the graph
 66 |      *              representation
 67 |      */
 68 |     addNode: function(id, content) {
 69 |         /* testing if node is already existing in the graph */
 70 |         if(this.nodes[id] == undefined) {
 71 |             this.nodes[id] = new Graph.Node(id, content);
 72 |         }
 73 |         return this.nodes[id];
 74 |     },
 75 | 
 76 |     addEdge: function(source, target, style) {
 77 |         var s = this.addNode(source);
 78 |         var t = this.addNode(target);
 79 |         var edge = this.edgeFactory.build(s, t);
 80 |         jQuery.extend(edge.style,style);
 81 |         s.edges.push(edge);
 82 |         this.edges.push(edge);
 83 |         // NOTE: Even directed edges are added to both nodes.
 84 |         t.edges.push(edge);
 85 |     },
 86 |     
 87 |     /* TODO to be implemented
 88 |      * Preserve a copy of the graph state (nodes, positions, ...)
 89 |      * @comment     a comment describing the state
 90 |      */
 91 |     snapShot: function(comment) {
 92 |         /* FIXME
 93 |         var graph = new Graph();
 94 |         graph.nodes = jQuery.extend(true, {}, this.nodes);
 95 |         graph.edges = jQuery.extend(true, {}, this.edges);
 96 |         this.snapshots.push({comment: comment, graph: graph});
 97 |         */
 98 |     },
 99 |     removeNode: function(id) {
100 |         delete this.nodes[id];
101 |         for(var i = 0; i < this.edges.length; i++) {
102 |             if (this.edges[i].source.id == id || this.edges[i].target.id == id) {
103 |                 this.edges.splice(i, 1);
104 |                 i--;
105 |             }
106 |         }
107 |     }
108 | };
109 | 
110 | /*
111 |  * Node
112 |  */
113 | Graph.Node = function(id, node){
114 |     node = node || {};
115 |     node.id = id;
116 |     node.edges = [];
117 |     node.hide = function() {
118 |         this.hidden = true;
119 |         this.shape && this.shape.hide(); /* FIXME this is representation specific code and should be elsewhere */
120 |         for(i in this.edges)
121 |             (this.edges[i].source.id == id || this.edges[i].target == id) && this.edges[i].hide && this.edges[i].hide();
122 |     };
123 |     node.show = function() {
124 |         this.hidden = false;
125 |         this.shape && this.shape.show();
126 |         for(i in this.edges)
127 |             (this.edges[i].source.id == id || this.edges[i].target == id) && this.edges[i].show && this.edges[i].show();
128 |     };
129 |     return node;
130 | };
131 | Graph.Node.prototype = {
132 | };
133 | 
134 | /*
135 |  * Renderer base class
136 |  */
137 | Graph.Renderer = {};
138 | 
139 | /*
140 |  * Renderer implementation using RaphaelJS
141 |  */
142 | Graph.Renderer.Raphael = function(element, graph, width, height) {
143 |     this.width = width || 400;
144 |     this.height = height || 400;
145 |     var selfRef = this;
146 |     this.r = Raphael(element, this.width, this.height);
147 |     this.radius = 40; /* max dimension of a node */
148 |     this.graph = graph;
149 |     this.mouse_in = false;
150 | 
151 |     /* TODO default node rendering function */
152 |     if(!this.graph.render) {
153 |         this.graph.render = function() {
154 |             return;
155 |         }
156 |     }
157 |     
158 |     /*
159 |      * Dragging
160 |      */
161 |     this.isDrag = false;
162 |     this.dragger = function (e) {
163 |         this.dx = e.clientX;
164 |         this.dy = e.clientY;
165 |         selfRef.isDrag = this;
166 |         this.set && this.set.animate({"fill-opacity": .1}, 200) && this.set.toFront();
167 |         e.preventDefault && e.preventDefault();
168 |     };
169 |     
170 |     var d = document.getElementById(element);
171 |     d.onmousemove = function (e) {
172 |         e = e || window.event;
173 |         if (selfRef.isDrag) {
174 |             var bBox = selfRef.isDrag.set.getBBox();
175 |             // TODO round the coordinates here (eg. for proper image representation)
176 |             var newX = e.clientX - selfRef.isDrag.dx + (bBox.x + bBox.width / 2);
177 |             var newY = e.clientY - selfRef.isDrag.dy + (bBox.y + bBox.height / 2);
178 |             /* prevent shapes from being dragged out of the canvas */
179 |             var clientX = e.clientX - (newX < 20 ? newX - 20 : newX > selfRef.width - 20 ? newX - selfRef.width + 20 : 0);
180 |             var clientY = e.clientY - (newY < 20 ? newY - 20 : newY > selfRef.height - 20 ? newY - selfRef.height + 20 : 0);
181 |             selfRef.isDrag.set.translate(clientX - Math.round(selfRef.isDrag.dx), clientY - Math.round(selfRef.isDrag.dy));
182 |             //            console.log(clientX - Math.round(selfRef.isDrag.dx), clientY - Math.round(selfRef.isDrag.dy));
183 |             for (var i in selfRef.graph.edges) {
184 |                 selfRef.graph.edges[i].connection && selfRef.graph.edges[i].connection.draw();
185 |             }
186 |             //selfRef.r.safari();
187 |             selfRef.isDrag.dx = clientX;
188 |             selfRef.isDrag.dy = clientY;
189 |         }
190 |     };
191 |     d.onmouseup = function () {
192 |         selfRef.isDrag && selfRef.isDrag.set.animate({"fill-opacity": .6}, 500);
193 |         selfRef.isDrag = false;
194 |     };
195 |     this.draw();
196 | };
197 | Graph.Renderer.Raphael.prototype = {
198 |     translate: function(point) {
199 |         return [
200 |             (point[0] - this.graph.layoutMinX) * this.factorX + this.radius,
201 |             (point[1] - this.graph.layoutMinY) * this.factorY + this.radius
202 |         ];
203 |     },
204 | 
205 |     rotate: function(point, length, angle) {
206 |         var dx = length * Math.cos(angle);
207 |         var dy = length * Math.sin(angle);
208 |         return [point[0]+dx, point[1]+dy];
209 |     },
210 | 
211 |     draw: function() {
212 |         this.factorX = (this.width - 2 * this.radius) / (this.graph.layoutMaxX - this.graph.layoutMinX);
213 |         this.factorY = (this.height - 2 * this.radius) / (this.graph.layoutMaxY - this.graph.layoutMinY);
214 |         for (i in this.graph.nodes) {
215 |             this.drawNode(this.graph.nodes[i]);
216 |         }
217 |         for (var i = 0; i < this.graph.edges.length; i++) {
218 |             this.drawEdge(this.graph.edges[i]);
219 |         }
220 |     },
221 | 
222 |     drawNode: function(node) {
223 |         var point = this.translate([node.layoutPosX, node.layoutPosY]);
224 |         node.point = point;
225 | 
226 |         /* if node has already been drawn, move the nodes */
227 |         if(node.shape) {
228 |             var oBBox = node.shape.getBBox();
229 |             var opoint = { x: oBBox.x + oBBox.width / 2, y: oBBox.y + oBBox.height / 2};
230 |             node.shape.translate(Math.round(point[0] - opoint.x), Math.round(point[1] - opoint.y));
231 |             this.r.safari();
232 |             return node;
233 |         }/* else, draw new nodes */
234 | 
235 |         var shape;
236 | 
237 |         /* if a node renderer function is provided by the user, then use it 
238 |            or the default render function instead */
239 |         if(!node.render) {
240 |             node.render = function(r, node) {
241 |                 /* the default node drawing */
242 |                 var color = Raphael.getColor();
243 |                 var ellipse = r.ellipse(0, 0, 30, 20).attr({fill: color, stroke: color, "stroke-width": 2});
244 |                 /* set DOM node ID */
245 |                 ellipse.node.id = node.label || node.id;
246 |                 shape = r.set().
247 |                     push(ellipse).
248 |                     push(r.text(0, 30, node.label || node.id));
249 |                 return shape;
250 |             }
251 |         }
252 |         /* or check for an ajax representation of the nodes */
253 |         if(node.shapes) {
254 |             // TODO ajax representation evaluation
255 |         }
256 | 
257 |         shape = node.render(this.r, node).hide();
258 | 
259 |         shape.attr({"fill-opacity": .6});
260 |         /* re-reference to the node an element belongs to, needed for dragging all elements of a node */
261 |         shape.items.forEach(function(item){ item.set = shape; item.node.style.cursor = "move"; });
262 |         shape.mousedown(this.dragger);
263 | 
264 |         var box = shape.getBBox();
265 |         shape.translate(Math.round(point[0]-(box.x+box.width/2)),Math.round(point[1]-(box.y+box.height/2)))
266 |         //console.log(box,point);
267 |         node.hidden || shape.show();
268 |         node.shape = shape;
269 |     },
270 |     drawEdge: function(edge) {
271 |         /* if this edge already exists the other way around and is undirected */
272 |         if(edge.backedge)
273 |             return;
274 |         if(edge.source.hidden || edge.target.hidden) {
275 |             edge.connection && edge.connection.fg.hide() | edge.connection.bg && edge.connection.bg.hide();
276 |             return;
277 |         }
278 |         /* if edge already has been drawn, only refresh the edge */
279 |         if(!edge.connection) {
280 |             edge.style && edge.style.callback && edge.style.callback(edge); // TODO move this somewhere else
281 |             edge.connection = this.r.connection(edge.source.shape, edge.target.shape, edge.style);
282 |             return;
283 |         }
284 |         //FIXME showing doesn't work well
285 |         edge.connection.fg.show();
286 |         edge.connection.bg && edge.connection.bg.show();
287 |         edge.connection.draw();
288 |     }
289 | };
290 | Graph.Layout = {};
291 | Graph.Layout.Spring = function(graph) {
292 |     this.graph = graph;
293 |     this.iterations = 500;
294 |     this.maxRepulsiveForceDistance = 6;
295 |     this.k = 2;
296 |     this.c = 0.01;
297 |     this.maxVertexMovement = 0.5;
298 |     this.layout();
299 | };
300 | Graph.Layout.Spring.prototype = {
301 |     layout: function() {
302 |         this.layoutPrepare();
303 |         for (var i = 0; i < this.iterations; i++) {
304 |             this.layoutIteration();
305 |         }
306 |         this.layoutCalcBounds();
307 |     },
308 |     
309 |     layoutPrepare: function() {
310 |         for (i in this.graph.nodes) {
311 |             var node = this.graph.nodes[i];
312 |             node.layoutPosX = 0;
313 |             node.layoutPosY = 0;
314 |             node.layoutForceX = 0;
315 |             node.layoutForceY = 0;
316 |         }
317 |         
318 |     },
319 |     
320 |     layoutCalcBounds: function() {
321 |         var minx = Infinity, maxx = -Infinity, miny = Infinity, maxy = -Infinity;
322 | 
323 |         for (i in this.graph.nodes) {
324 |             var x = this.graph.nodes[i].layoutPosX;
325 |             var y = this.graph.nodes[i].layoutPosY;
326 |             
327 |             if(x > maxx) maxx = x;
328 |             if(x < minx) minx = x;
329 |             if(y > maxy) maxy = y;
330 |             if(y < miny) miny = y;
331 |         }
332 | 
333 |         this.graph.layoutMinX = minx;
334 |         this.graph.layoutMaxX = maxx;
335 |         this.graph.layoutMinY = miny;
336 |         this.graph.layoutMaxY = maxy;
337 |     },
338 |     
339 |     layoutIteration: function() {
340 |         // Forces on nodes due to node-node repulsions
341 | 
342 |         var prev = new Array();
343 |         for(var c in this.graph.nodes) {
344 |             var node1 = this.graph.nodes[c];
345 |             for (var d in prev) {
346 |                 var node2 = this.graph.nodes[prev[d]];
347 |                 this.layoutRepulsive(node1, node2);
348 |                 
349 |             }
350 |             prev.push(c);
351 |         }
352 |         
353 |         // Forces on nodes due to edge attractions
354 |         for (var i = 0; i < this.graph.edges.length; i++) {
355 |             var edge = this.graph.edges[i];
356 |             this.layoutAttractive(edge);             
357 |         }
358 |         
359 |         // Move by the given force
360 |         for (i in this.graph.nodes) {
361 |             var node = this.graph.nodes[i];
362 |             var xmove = this.c * node.layoutForceX;
363 |             var ymove = this.c * node.layoutForceY;
364 | 
365 |             var max = this.maxVertexMovement;
366 |             if(xmove > max) xmove = max;
367 |             if(xmove < -max) xmove = -max;
368 |             if(ymove > max) ymove = max;
369 |             if(ymove < -max) ymove = -max;
370 |             
371 |             node.layoutPosX += xmove;
372 |             node.layoutPosY += ymove;
373 |             node.layoutForceX = 0;
374 |             node.layoutForceY = 0;
375 |         }
376 |     },
377 | 
378 |     layoutRepulsive: function(node1, node2) {
379 |         if (typeof node1 == 'undefined' || typeof node2 == 'undefined')
380 |             return;
381 |         var dx = node2.layoutPosX - node1.layoutPosX;
382 |         var dy = node2.layoutPosY - node1.layoutPosY;
383 |         var d2 = dx * dx + dy * dy;
384 |         if(d2 < 0.01) {
385 |             dx = 0.1 * Math.random() + 0.1;
386 |             dy = 0.1 * Math.random() + 0.1;
387 |             var d2 = dx * dx + dy * dy;
388 |         }
389 |         var d = Math.sqrt(d2);
390 |         if(d < this.maxRepulsiveForceDistance) {
391 |             var repulsiveForce = this.k * this.k / d;
392 |             node2.layoutForceX += repulsiveForce * dx / d;
393 |             node2.layoutForceY += repulsiveForce * dy / d;
394 |             node1.layoutForceX -= repulsiveForce * dx / d;
395 |             node1.layoutForceY -= repulsiveForce * dy / d;
396 |         }
397 |     },
398 | 
399 |     layoutAttractive: function(edge) {
400 |         var node1 = edge.source;
401 |         var node2 = edge.target;
402 |         
403 |         var dx = node2.layoutPosX - node1.layoutPosX;
404 |         var dy = node2.layoutPosY - node1.layoutPosY;
405 |         var d2 = dx * dx + dy * dy;
406 |         if(d2 < 0.01) {
407 |             dx = 0.1 * Math.random() + 0.1;
408 |             dy = 0.1 * Math.random() + 0.1;
409 |             var d2 = dx * dx + dy * dy;
410 |         }
411 |         var d = Math.sqrt(d2);
412 |         if(d > this.maxRepulsiveForceDistance) {
413 |             d = this.maxRepulsiveForceDistance;
414 |             d2 = d * d;
415 |         }
416 |         var attractiveForce = (d2 - this.k * this.k) / this.k;
417 |         if(edge.attraction == undefined) edge.attraction = 1;
418 |         attractiveForce *= Math.log(edge.attraction) * 0.5 + 1;
419 |         
420 |         node2.layoutForceX -= attractiveForce * dx / d;
421 |         node2.layoutForceY -= attractiveForce * dy / d;
422 |         node1.layoutForceX += attractiveForce * dx / d;
423 |         node1.layoutForceY += attractiveForce * dy / d;
424 |     }
425 | };
426 | 
427 | Graph.Layout.Ordered = function(graph, order) {
428 |     this.graph = graph;
429 |     this.order = order;
430 |     this.layout();
431 | };
432 | Graph.Layout.Ordered.prototype = {
433 |     layout: function() {
434 |         this.layoutPrepare();
435 |         this.layoutCalcBounds();
436 |     },
437 |     
438 |     layoutPrepare: function(order) {
439 |         for (i in this.graph.nodes) {
440 |             var node = this.graph.nodes[i];
441 |             node.layoutPosX = 0;
442 |             node.layoutPosY = 0;
443 |         }
444 |             var counter = 0;
445 |             for (i in this.order) {
446 |                 var node = this.order[i];
447 |                 node.layoutPosX = counter;
448 |                 node.layoutPosY = Math.random();
449 |                 counter++;
450 |             }
451 |     },
452 |     
453 |     layoutCalcBounds: function() {
454 |         var minx = Infinity, maxx = -Infinity, miny = Infinity, maxy = -Infinity;
455 | 
456 |         for (i in this.graph.nodes) {
457 |             var x = this.graph.nodes[i].layoutPosX;
458 |             var y = this.graph.nodes[i].layoutPosY;
459 |             
460 |             if(x > maxx) maxx = x;
461 |             if(x < minx) minx = x;
462 |             if(y > maxy) maxy = y;
463 |             if(y < miny) miny = y;
464 |         }
465 | 
466 |         this.graph.layoutMinX = minx;
467 |         this.graph.layoutMaxX = maxx;
468 | 
469 |         this.graph.layoutMinY = miny;
470 |         this.graph.layoutMaxY = maxy;
471 |     }
472 | };
473 | 
474 | /*
475 |  * usefull JavaScript extensions, 
476 |  */
477 | 
478 | function log(a) {console.log&&console.log(a);}
479 | 
480 | /*
481 |  * Raphael Tooltip Plugin
482 |  * - attaches an element as a tooltip to another element
483 |  *
484 |  * Usage example, adding a rectangle as a tooltip to a circle:
485 |  *
486 |  *      paper.circle(100,100,10).tooltip(paper.rect(0,0,20,30));
487 |  *
488 |  * If you want to use more shapes, you'll have to put them into a set.
489 |  *
490 |  */
491 | Raphael.el.tooltip = function (tp) {
492 |     this.tp = tp;
493 |     this.tp.o = {x: 0, y: 0};
494 |     this.tp.hide();
495 |     this.hover(
496 |         function(event){ 
497 |             this.mousemove(function(event){ 
498 |                 this.tp.translate(event.clientX - 
499 |                                   this.tp.o.x,event.clientY - this.tp.o.y);
500 |                 this.tp.o = {x: event.clientX, y: event.clientY};
501 |             });
502 |             this.tp.show().toFront();
503 |         }, 
504 |         function(event){
505 |             this.tp.hide();
506 |             this.unmousemove();
507 |         });
508 |     return this;
509 | };
510 | 
511 | /* For IE */
512 | if (!Array.prototype.forEach)
513 | {
514 |   Array.prototype.forEach = function(fun /*, thisp*/)
515 |   {
516 |     var len = this.length;
517 |     if (typeof fun != "function")
518 |       throw new TypeError();
519 | 
520 |     var thisp = arguments[1];
521 |     for (var i = 0; i < len; i++)
522 |     {
523 |       if (i in this)
524 |         fun.call(thisp, this[i], i, this);
525 |     }
526 |   };
527 | }
528 | 


--------------------------------------------------------------------------------
/web/public/js/html5.js:
--------------------------------------------------------------------------------
1 | // html5shiv @rem remysharp.com/html5-enabling-script
2 | // iepp v1.6.2 @jon_neal iecss.com/print-protector
3 | // Dual licensed under the MIT or GPL Version 2 licenses
4 | /*@cc_on(function(a,b){function r(a){var b=-1;while(++b<f)a.createElement(e[b])}if(!(!window.attachEvent||!b.createStyleSheet||!function(){var a=document.createElement("div");a.innerHTML="<elem></elem>";return a.childNodes.length!==1}())){a.iepp=a.iepp||{};var c=a.iepp,d=c.html5elements||"abbr|article|aside|audio|canvas|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",e=d.split("|"),f=e.length,g=new RegExp("(^|\\s)("+d+")","gi"),h=new RegExp("<(/*)("+d+")","gi"),i=/^\s*[\{\}]\s*$/,j=new RegExp("(^|[^\\n]*?\\s)("+d+")([^\\n]*)({[\\n\\w\\W]*?})","gi"),k=b.createDocumentFragment(),l=b.documentElement,m=l.firstChild,n=b.createElement("body"),o=b.createElement("style"),p=/print|all/,q;c.getCSS=function(a,b){if(a+""===undefined)return"";var d=-1,e=a.length,f,g=[];while(++d<e){f=a[d];if(f.disabled)continue;b=f.media||b,p.test(b)&&g.push(c.getCSS(f.imports,b),f.cssText),b="all"}return g.join("")},c.parseCSS=function(a){var b=[],c;while((c=j.exec(a))!=null)b.push(((i.exec(c[1])?"\n":c[1])+c[2]+c[3]).replace(g,"$1.iepp_$2")+c[4]);return b.join("\n")},c.writeHTML=function(){var a=-1;q=q||b.body;while(++a<f){var c=b.getElementsByTagName(e[a]),d=c.length,g=-1;while(++g<d)c[g].className.indexOf("iepp_")<0&&(c[g].className+=" iepp_"+e[a])}k.appendChild(q),l.appendChild(n),n.className=q.className,n.id=q.id,n.innerHTML=q.innerHTML.replace(h,"<$1font")},c._beforePrint=function(){o.styleSheet.cssText=c.parseCSS(c.getCSS(b.styleSheets,"all")),c.writeHTML()},c.restoreHTML=function(){n.innerHTML="",l.removeChild(n),l.appendChild(q)},c._afterPrint=function(){c.restoreHTML(),o.styleSheet.cssText=""},r(b),r(k);if(c.disablePP)return;m.insertBefore(o,m.firstChild),o.media="print",o.className="iepp-printshim",a.attachEvent("onbeforeprint",c._beforePrint),a.attachEvent("onafterprint",c._afterPrint)}})(this,document);@*/
5 | 


--------------------------------------------------------------------------------
/web/public/js/seedrandom.js:
--------------------------------------------------------------------------------
  1 | // seedrandom.js
  2 | // Author: David Bau 3/11/2010
  3 | //
  4 | // Defines a method Math.seedrandom() that, when called, substitutes
  5 | // an explicitly seeded RC4-based algorithm for Math.random().  Also
  6 | // supports automatic seeding from local or network sources of entropy.
  7 | //
  8 | // Usage:
  9 | //
 10 | //   <script src=http://davidbau.com/encode/seedrandom-min.js></script>
 11 | //
 12 | //   Math.seedrandom('yipee'); Sets Math.random to a function that is
 13 | //                             initialized using the given explicit seed.
 14 | //
 15 | //   Math.seedrandom();        Sets Math.random to a function that is
 16 | //                             seeded using the current time, dom state,
 17 | //                             and other accumulated local entropy.
 18 | //                             The generated seed string is returned.
 19 | //
 20 | //   Math.seedrandom('yowza', true);
 21 | //                             Seeds using the given explicit seed mixed
 22 | //                             together with accumulated entropy.
 23 | //
 24 | //   <script src="http://bit.ly/srandom-512"></script>
 25 | //                             Seeds using physical random bits downloaded
 26 | //                             from random.org.
 27 | //
 28 | // Examples:
 29 | //
 30 | //   Math.seedrandom("hello");            // Use "hello" as the seed.
 31 | //   document.write(Math.random());       // Always 0.5463663768140734
 32 | //   document.write(Math.random());       // Always 0.43973793770592234
 33 | //   var rng1 = Math.random;              // Remember the current prng.
 34 | //
 35 | //   var autoseed = Math.seedrandom();    // New prng with an automatic seed.
 36 | //   document.write(Math.random());       // Pretty much unpredictable.
 37 | //
 38 | //   Math.random = rng1;                  // Continue "hello" prng sequence.
 39 | //   document.write(Math.random());       // Always 0.554769432473455
 40 | //
 41 | //   Math.seedrandom(autoseed);           // Restart at the previous seed.
 42 | //   document.write(Math.random());       // Repeat the 'unpredictable' value.
 43 | //
 44 | // Notes:
 45 | //
 46 | // Each time seedrandom('arg') is called, entropy from the passed seed
 47 | // is accumulated in a pool to help generate future seeds for the
 48 | // zero-argument form of Math.seedrandom, so entropy can be injected over
 49 | // time by calling seedrandom with explicit data repeatedly.
 50 | //
 51 | // On speed - This javascript implementation of Math.random() is about
 52 | // 3-10x slower than the built-in Math.random() because it is not native
 53 | // code, but this is typically fast enough anyway.  Seeding is more expensive,
 54 | // especially if you use auto-seeding.  Some details (timings on Chrome 4):
 55 | //
 56 | // Our Math.random()            - avg less than 0.002 milliseconds per call
 57 | // seedrandom('explicit')       - avg less than 0.5 milliseconds per call
 58 | // seedrandom('explicit', true) - avg less than 2 milliseconds per call
 59 | // seedrandom()                 - avg about 38 milliseconds per call
 60 | //
 61 | // LICENSE (BSD):
 62 | //
 63 | // Copyright 2010 David Bau, all rights reserved.
 64 | //
 65 | // Redistribution and use in source and binary forms, with or without
 66 | // modification, are permitted provided that the following conditions are met:
 67 | // 
 68 | //   1. Redistributions of source code must retain the above copyright
 69 | //      notice, this list of conditions and the following disclaimer.
 70 | //
 71 | //   2. Redistributions in binary form must reproduce the above copyright
 72 | //      notice, this list of conditions and the following disclaimer in the
 73 | //      documentation and/or other materials provided with the distribution.
 74 | // 
 75 | //   3. Neither the name of this module nor the names of its contributors may
 76 | //      be used to endorse or promote products derived from this software
 77 | //      without specific prior written permission.
 78 | // 
 79 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 80 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 81 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 82 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 83 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 84 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 85 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 86 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 87 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 88 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 89 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 90 | //
 91 | /**
 92 |  * All code is in an anonymous closure to keep the global namespace clean.
 93 |  *
 94 |  * @param {number=} overflow 
 95 |  * @param {number=} startdenom
 96 |  */
 97 | (function (pool, math, width, chunks, significance, overflow, startdenom) {
 98 | 
 99 | 
100 | //
101 | // seedrandom()
102 | // This is the seedrandom function described above.
103 | //
104 | math['seedrandom'] = function seedrandom(seed, use_entropy) {
105 |   var key = [];
106 |   var arc4;
107 | 
108 |   // Flatten the seed string or build one from local entropy if needed.
109 |   seed = mixkey(flatten(
110 |     use_entropy ? [seed, pool] :
111 |     arguments.length ? seed :
112 |     [new Date().getTime(), pool, window], 3), key);
113 | 
114 |   // Use the seed to initialize an ARC4 generator.
115 |   arc4 = new ARC4(key);
116 | 
117 |   // Mix the randomness into accumulated entropy.
118 |   mixkey(arc4.S, pool);
119 | 
120 |   // Override Math.random
121 | 
122 |   // This function returns a random double in [0, 1) that contains
123 |   // randomness in every bit of the mantissa of the IEEE 754 value.
124 | 
125 |   math['random'] = function random() {  // Closure to return a random double:
126 |     var n = arc4.g(chunks);             // Start with a numerator n < 2 ^ 48
127 |     var d = startdenom;                 //   and denominator d = 2 ^ 48.
128 |     var x = 0;                          //   and no 'extra last byte'.
129 |     while (n < significance) {          // Fill up all significant digits by
130 |       n = (n + x) * width;              //   shifting numerator and
131 |       d *= width;                       //   denominator and generating a
132 |       x = arc4.g(1);                    //   new least-significant-byte.
133 |     }
134 |     while (n >= overflow) {             // To avoid rounding up, before adding
135 |       n /= 2;                           //   last byte, shift everything
136 |       d /= 2;                           //   right using integer math until
137 |       x >>>= 1;                         //   we have exactly the desired bits.
138 |     }
139 |     return (n + x) / d;                 // Form the number within [0, 1).
140 |   };
141 | 
142 |   // Return the seed that was used
143 |   return seed;
144 | };
145 | 
146 | //
147 | // ARC4
148 | //
149 | // An ARC4 implementation.  The constructor takes a key in the form of
150 | // an array of at most (width) integers that should be 0 <= x < (width).
151 | //
152 | // The g(count) method returns a pseudorandom integer that concatenates
153 | // the next (count) outputs from ARC4.  Its return value is a number x
154 | // that is in the range 0 <= x < (width ^ count).
155 | //
156 | /** @constructor */
157 | function ARC4(key) {
158 |   var t, u, me = this, keylen = key.length;
159 |   var i = 0, j = me.i = me.j = me.m = 0;
160 |   me.S = [];
161 |   me.c = [];
162 | 
163 |   // The empty key [] is treated as [0].
164 |   if (!keylen) { key = [keylen++]; }
165 | 
166 |   // Set up S using the standard key scheduling algorithm.
167 |   while (i < width) { me.S[i] = i++; }
168 |   for (i = 0; i < width; i++) {
169 |     t = me.S[i];
170 |     j = lowbits(j + t + key[i % keylen]);
171 |     u = me.S[j];
172 |     me.S[i] = u;
173 |     me.S[j] = t;
174 |   }
175 | 
176 |   // The "g" method returns the next (count) outputs as one number.
177 |   me.g = function getnext(count) {
178 |     var s = me.S;
179 |     var i = lowbits(me.i + 1); var t = s[i];
180 |     var j = lowbits(me.j + t); var u = s[j];
181 |     s[i] = u;
182 |     s[j] = t;
183 |     var r = s[lowbits(t + u)];
184 |     while (--count) {
185 |       i = lowbits(i + 1); t = s[i];
186 |       j = lowbits(j + t); u = s[j];
187 |       s[i] = u;
188 |       s[j] = t;
189 |       r = r * width + s[lowbits(t + u)];
190 |     }
191 |     me.i = i;
192 |     me.j = j;
193 |     return r;
194 |   };
195 |   // For robust unpredictability discard an initial batch of values.
196 |   // See http://www.rsa.com/rsalabs/node.asp?id=2009
197 |   me.g(width);
198 | }
199 | 
200 | //
201 | // flatten()
202 | // Converts an object tree to nested arrays of strings.
203 | //
204 | /** @param {Object=} result 
205 |   * @param {string=} prop */
206 | function flatten(obj, depth, result, prop) {
207 |   result = [];
208 |   if (depth && typeof(obj) == 'object') {
209 |     for (prop in obj) {
210 |       if (prop.indexOf('S') < 5) {    // Avoid FF3 bug (local/sessionStorage)
211 |         try { result.push(flatten(obj[prop], depth - 1)); } catch (e) {}
212 |       }
213 |     }
214 |   }
215 |   return result.length ? result : '' + obj;
216 | }
217 | 
218 | //
219 | // mixkey()
220 | // Mixes a string seed into a key that is an array of integers, and
221 | // returns a shortened string seed that is equivalent to the result key.
222 | //
223 | /** @param {number=} smear 
224 |   * @param {number=} j */
225 | function mixkey(seed, key, smear, j) {
226 |   seed += '';                         // Ensure the seed is a string
227 |   smear = 0;
228 |   for (j = 0; j < seed.length; j++) {
229 |     key[lowbits(j)] =
230 |       lowbits((smear ^= key[lowbits(j)] * 19) + seed.charCodeAt(j));
231 |   }
232 |   seed = '';
233 |   for (j in key) { seed += String.fromCharCode(key[j]); }
234 |   return seed;
235 | }
236 | 
237 | //
238 | // lowbits()
239 | // A quick "n mod width" for width a power of 2.
240 | //
241 | function lowbits(n) { return n & (width - 1); }
242 | 
243 | //
244 | // The following constants are related to IEEE 754 limits.
245 | //
246 | startdenom = math.pow(width, chunks);
247 | significance = math.pow(2, significance);
248 | overflow = significance * 2;
249 | 
250 | //
251 | // When seedrandom.js is loaded, we immediately mix a few bits
252 | // from the built-in RNG into the entropy pool.  Because we do
253 | // not want to intefere with determinstic PRNG state later,
254 | // seedrandom will not call math.random on its own again after
255 | // initialization.
256 | //
257 | mixkey(math.random(), pool);
258 | 
259 | // End anonymous scope, and pass initial values.
260 | })(
261 |   [],   // pool: entropy pool starts empty
262 |   Math, // math: package containing random, pow, and seedrandom
263 |   256,  // width: each RC4 output is 0 <= x < 256
264 |   6,    // chunks: at least six RC4 outputs for each double
265 |   52    // significance: there are 52 significant digits in a double
266 | );
267 | 


--------------------------------------------------------------------------------
/web/public/js/sigma.min.js:
--------------------------------------------------------------------------------
 1 | /* sigmajs.org - an open-source light-weight JavaScript graph drawing library - Version: 0.1 - Author:  Alexis Jacomy - License: MIT */
 2 | var sigma={tools:{},classes:{},instances:{}};
 3 | (function(){if(!Array.prototype.some)Array.prototype.some=function(i,m){var f=this.length;if("function"!=typeof i)throw new TypeError;for(var j=0;j<f;j++)if(j in this&&i.call(m,this[j],j,this))return!0;return!1};if(!Array.prototype.forEach)Array.prototype.forEach=function(i,m){var f=this.length;if("function"!=typeof i)throw new TypeError;for(var j=0;j<f;j++)j in this&&i.call(m,this[j],j,this)};if(!Array.prototype.map)Array.prototype.map=function(i,m){var f=this.length;if("function"!=typeof i)throw new TypeError;
 4 | for(var j=Array(f),n=0;n<f;n++)n in this&&(j[n]=i.call(m,this[n],n,this));return j};if(!Array.prototype.filter)Array.prototype.filter=function(i,m){var f=this.length;if("function"!=typeof i)throw new TypeError;for(var j=[],n=0;n<f;n++)if(n in this){var u=this[n];i.call(m,u,n,this)&&j.push(u)}return j};if(!Object.keys)Object.keys=function(){var i=Object.prototype.hasOwnProperty,m=!{toString:null}.propertyIsEnumerable("toString"),f="toString,toLocaleString,valueOf,hasOwnProperty,isPrototypeOf,propertyIsEnumerable,constructor".split(","),
 5 | j=f.length;return function(n){if("object"!==typeof n&&"function"!==typeof n||null===n)throw new TypeError("Object.keys called on non-object");var u=[],r;for(r in n)i.call(n,r)&&u.push(r);if(m)for(r=0;r<j;r++)i.call(n,f[r])&&u.push(f[r]);return u}}()})();sigma.classes.Cascade=function(){this.p={};this.config=function(i,m){if("string"==typeof i&&void 0==m)return this.p[i];var f="object"==typeof i&&void 0==m?i:{};"string"==typeof i&&(f[i]=m);for(var j in f)void 0!=this.p[j]&&(this.p[j]=f[j]);return this}};
 6 | sigma.classes.EventDispatcher=function(){var i={},m=this;this.one=function(f,j){if(!j||!f)return m;("string"==typeof f?f.split(" "):f).forEach(function(f){i[f]||(i[f]=[]);i[f].push({h:j,one:!0})});return m};this.bind=function(f,j){if(!j||!f)return m;("string"==typeof f?f.split(" "):f).forEach(function(f){i[f]||(i[f]=[]);i[f].push({h:j,one:!1})});return m};this.unbind=function(f,j){f||(i={});var n="string"==typeof f?f.split(" "):f;j?n.forEach(function(f){i[f]&&(i[f]=i[f].filter(function(f){return f.h!=
 7 | j}));i[f]&&0==i[f].length&&delete i[f]}):n.forEach(function(f){delete i[f]});return m};this.dispatch=function(f,j){i[f]&&(i[f].forEach(function(i){i.h({type:f,content:j,target:m})}),i[f]=i[f].filter(function(f){return!f.one}));return m}};
 8 | (function(){var i;function m(){function b(a){return{x:a.x,y:a.y,size:a.size,degree:a.degree,inDegree:a.inDegree,outDegree:a.outDegree,displayX:a.displayX,displayY:a.displayY,displaySize:a.displaySize,label:a.label,id:a.id,color:a.color,fixed:a.fixed,active:a.active,hidden:a.hidden,forceLabel:a.forceLabel,attr:a.attr}}function l(a){return{source:a.source.id,target:a.target.id,size:a.size,type:a.type,weight:a.weight,displaySize:a.displaySize,label:a.label,hidden:a.hidden,id:a.id,attr:a.attr,color:a.color}}
 9 | function g(){d.nodes=[];d.nodesIndex={};d.edges=[];d.edgesIndex={};return d}sigma.classes.Cascade.call(this);sigma.classes.EventDispatcher.call(this);var d=this;this.p={minNodeSize:0,maxNodeSize:0,minEdgeSize:0,maxEdgeSize:0,scalingMode:"inside",nodesPowRatio:0.5,edgesPowRatio:0};this.borders={};g();this.addNode=function(a,b){if(d.nodesIndex[a])throw Error('Node "'+a+'" already exists.');var b=b||{},c={x:0,y:0,size:1,degree:0,inDegree:0,outDegree:0,fixed:!1,active:!1,hidden:!1,forceLabel:!1,label:a.toString(),
10 | id:a.toString(),attr:{}},g;for(g in b)switch(g){case "id":break;case "x":case "y":case "size":c[g]=+b[g];break;case "fixed":case "active":case "hidden":case "forceLabel":c[g]=!!b[g];break;case "color":case "label":c[g]=b[g];break;default:c.attr[g]=b[g]}d.nodes.push(c);d.nodesIndex[a.toString()]=c;return d};this.addEdge=function(a,b,c,g){if(d.edgesIndex[a])throw Error('Edge "'+a+'" already exists.');if(!d.nodesIndex[b])throw Error("Edge's source \""+b+'" does not exist yet.');if(!d.nodesIndex[c])throw Error("Edge's target \""+
11 | c+'" does not exist yet.');g=g||{};b={source:d.nodesIndex[b],target:d.nodesIndex[c],size:1,weight:1,displaySize:0.5,label:a.toString(),id:a.toString(),hidden:!1,attr:{}};b.source.degree++;b.source.outDegree++;b.target.degree++;b.target.inDegree++;for(var o in g)switch(o){case "id":case "source":case "target":break;case "hidden":b[o]=!!g[o];break;case "size":case "weight":b[o]=+g[o];break;case "color":b[o]=g[o].toString();break;case "type":b[o]=g[o].toString();break;case "label":b[o]=g[o];break;default:b.attr[o]=
12 | g[o]}d.edges.push(b);d.edgesIndex[a.toString()]=b;return d};this.dropNode=function(a){((a instanceof Array?a:[a])||[]).forEach(function(a){if(d.nodesIndex[a]){var c=null;d.nodes.some(function(b,d){return b.id==a?(c=d,!0):!1});null!=c&&d.nodes.splice(c,1);delete d.nodesIndex[a];d.edges=d.edges.filter(function(c){if(c.source.id==a)return delete d.edgesIndex[c.id],c.target.degree--,c.target.inDegree--,!1;return c.target.id==a?(delete d.edgesIndex[c.id],c.source.degree--,c.source.outDegree--,!1):!0})}else sigma.log('Node "'+
13 | a+'" does not exist.')});return d};this.dropEdge=function(a){((a instanceof Array?a:[a])||[]).forEach(function(a){if(d.edgesIndex[a]){d.edgesIndex[a].source.degree--;d.edgesIndex[a].source.outDegree--;d.edgesIndex[a].target.degree--;d.edgesIndex[a].target.inDegree--;var c=null;d.edges.some(function(b,d){return b.id==a?(c=d,!0):!1});null!=c&&d.edges.splice(c,1);delete d.edgesIndex[a]}else sigma.log('Edge "'+a+'" does not exist.')});return d};this.iterEdges=function(a,b){var c=b?b.map(function(a){return d.edgesIndex[a]}):
14 | d.edges,g=c.map(l);g.forEach(a);c.forEach(function(a,c){var b=g[c],k;for(k in b)switch(k){case "id":case "displaySize":break;case "weight":case "size":a[k]=+b[k];break;case "source":case "target":a[k]=d.nodesIndex[k]||a[k];break;case "hidden":a[k]=!!b[k];break;case "color":case "label":case "type":a[k]=(b[k]||"").toString();break;default:a.attr[k]=b[k]}});return d};this.iterNodes=function(a,g){var c=g?g.map(function(a){return d.nodesIndex[a]}):d.nodes,l=c.map(b);l.forEach(a);c.forEach(function(a,
15 | c){var b=l[c],d;for(d in b)switch(d){case "id":case "attr":case "degree":case "inDegree":case "outDegree":case "displayX":case "displayY":case "displaySize":break;case "x":case "y":case "size":a[d]=+b[d];break;case "fixed":case "active":case "hidden":case "forceLabel":a[d]=!!b[d];break;case "color":case "label":a[d]=(b[d]||"").toString();break;default:a.attr[d]=b[d]}});return d};this.getEdges=function(a){var b=((a instanceof Array?a:[a])||[]).map(function(a){return l(d.edgesIndex[a])});return a instanceof
16 | Array?b:b[0]};this.getNodes=function(a){var g=((a instanceof Array?a:[a])||[]).map(function(a){return b(d.nodesIndex[a])});return a instanceof Array?g:g[0]};this.empty=g;this.rescale=function(a,b,c,g){var o=0,h=0;c&&d.nodes.forEach(function(a){h=Math.max(a.size,h)});g&&d.edges.forEach(function(a){o=Math.max(a.size,o)});var h=h||1,o=o||1,l,k,s,p;c&&d.nodes.forEach(function(a){k=Math.max(a.x,k||a.x);l=Math.min(a.x,l||a.x);p=Math.max(a.y,p||a.y);s=Math.min(a.y,s||a.y)});var f="outside"==d.p.scalingMode?
17 | Math.max(a/Math.max(k-l,1),b/Math.max(p-s,1)):Math.min(a/Math.max(k-l,1),b/Math.max(p-s,1)),i=(d.p.maxNodeSize||h)/f;k+=i;l-=i;p+=i;s-=i;var f="outside"==d.p.scalingMode?Math.max(a/Math.max(k-l,1),b/Math.max(p-s,1)):Math.min(a/Math.max(k-l,1),b/Math.max(p-s,1)),j,n;!d.p.maxNodeSize&&!d.p.minNodeSize?(j=1,n=0):d.p.maxNodeSize==d.p.minNodeSize?(j=0,n=d.p.maxNodeSize):(j=(d.p.maxNodeSize-d.p.minNodeSize)/h,n=d.p.minNodeSize);var x,m;!d.p.maxEdgeSize&&!d.p.minEdgeSize?(x=1,m=0):(x=d.p.maxEdgeSize==d.p.minEdgeSize?
18 | 0:(d.p.maxEdgeSize-d.p.minEdgeSize)/o,m=d.p.minEdgeSize);c&&d.nodes.forEach(function(c){c.displaySize=c.size*j+n;if(!c.fixed)c.displayX=(c.x-(k+l)/2)*f+a/2,c.displayY=(c.y-(p+s)/2)*f+b/2});g&&d.edges.forEach(function(a){a.displaySize=a.size*x+m});return d};this.translate=function(a,b,c,g,l){var h=Math.pow(c,d.p.nodesPowRatio);g&&d.nodes.forEach(function(d){if(!d.fixed)d.displayX=d.displayX*c+a,d.displayY=d.displayY*c+b;d.displaySize*=h});h=Math.pow(c,d.p.edgesPowRatio);l&&d.edges.forEach(function(a){a.displaySize*=
19 | h});return d};this.setBorders=function(){d.borders={};d.nodes.forEach(function(a){d.borders.minX=Math.min(void 0==d.borders.minX?a.displayX-a.displaySize:d.borders.minX,a.displayX-a.displaySize);d.borders.maxX=Math.max(void 0==d.borders.maxX?a.displayX+a.displaySize:d.borders.maxX,a.displayX+a.displaySize);d.borders.minY=Math.min(void 0==d.borders.minY?a.displayY-a.displaySize:d.borders.minY,a.displayY-a.displaySize);d.borders.maxY=Math.max(void 0==d.borders.maxY?a.displayY-a.displaySize:d.borders.maxY,
20 | a.displayY-a.displaySize)})};this.checkHover=function(a,b){var c,g,l,h=[],f=[];d.nodes.forEach(function(d){if(d.hidden)d.hover=!1;else{c=Math.abs(d.displayX-a);g=Math.abs(d.displayY-b);l=d.displaySize;var s=d.hover,p=c<l&&g<l&&Math.sqrt(c*c+g*g)<l;if(s&&!p)d.hover=!1,f.push(d.id);else if(p&&!s)d.hover=!0,h.push(d.id)}});h.length&&d.dispatch("overnodes",h);f.length&&d.dispatch("outnodes",f);return d}}function f(b,l){function g(){var a;a="<p>GLOBAL :</p>";for(var b in d.p.globalProbes)a+="<p>"+b+" : "+
21 | d.p.globalProbes[b]()+"</p>";a+="<br><p>LOCAL :</p>";for(b in d.p.localProbes)a+="<p>"+b+" : "+d.p.localProbes[b]()+"</p>";d.p.dom.innerHTML=a;return d}sigma.classes.Cascade.call(this);var d=this;this.instance=b;this.monitoring=!1;this.p={fps:40,dom:l,globalProbes:{"Time (ms)":sigma.chronos.getExecutionTime,Queue:sigma.chronos.getQueuedTasksCount,Tasks:sigma.chronos.getTasksCount,FPS:sigma.chronos.getFPS},localProbes:{"Nodes count":function(){return d.instance.graph.nodes.length},"Edges count":function(){return d.instance.graph.edges.length}}};
22 | this.activate=function(){if(!d.monitoring)d.monitoring=window.setInterval(g,1E3/d.p.fps);return d};this.desactivate=function(){if(d.monitoring)window.clearInterval(d.monitoring),d.monitoring=null,d.p.dom.innerHTML="";return d}}function j(b){function l(b){if(a.p.mouseEnabled&&(g(a.mouseX,a.mouseY,a.ratio*(0<(void 0!=b.wheelDelta&&b.wheelDelta||void 0!=b.detail&&-b.detail)?a.p.zoomMultiply:1/a.p.zoomMultiply)),a.p.blockScroll))b.preventDefault?b.preventDefault():b.returnValue=!1}function g(b,c,g){if(!a.isMouseDown&&
23 | (window.clearInterval(a.interpolationID),w=void 0!=g,i=a.stageX,j=b,o=a.stageY,k=c,h=g||a.ratio,h=Math.min(Math.max(h,a.p.minRatio),a.p.maxRatio),m=a.p.directZooming?1-(w?a.p.zoomDelta:a.p.dragDelta):0,a.ratio!=h||a.stageX!=j||a.stageY!=k))d(),a.interpolationID=window.setInterval(d,50),a.dispatch("startinterpolate")}function d(){m+=w?a.p.zoomDelta:a.p.dragDelta;m=Math.min(m,1);var b=sigma.easing.quadratic.easeout(m),c=a.ratio;a.ratio=c*(1-b)+h*b;w?(a.stageX=j+(a.stageX-j)*a.ratio/c,a.stageY=k+(a.stageY-
24 | k)*a.ratio/c):(a.stageX=i*(1-b)+j*b,a.stageY=o*(1-b)+k*b);a.dispatch("interpolate");if(1<=m)window.clearInterval(a.interpolationID),b=a.ratio,w?(a.ratio=h,a.stageX=j+(a.stageX-j)*a.ratio/b,a.stageY=k+(a.stageY-k)*a.ratio/b):(a.stageX=j,a.stageY=k),a.dispatch("stopinterpolate")}sigma.classes.Cascade.call(this);sigma.classes.EventDispatcher.call(this);var a=this;this.p={minRatio:1,maxRatio:32,marginRatio:1,zoomDelta:0.1,dragDelta:0.3,zoomMultiply:2,directZooming:!1,blockScroll:!0,inertia:1.1,mouseEnabled:!0};
25 | var f=0,c=0,i=0,o=0,h=1,j=0,k=0,s=0,p=0,C=0,n=0,m=0,w=!1;this.stageY=this.stageX=0;this.ratio=1;this.mouseY=this.mouseX=0;this.isMouseDown=!1;b.addEventListener("DOMMouseScroll",l,!0);b.addEventListener("mousewheel",l,!0);b.addEventListener("mousemove",function(b){a.mouseX=void 0!=b.offsetX&&b.offsetX||void 0!=b.layerX&&b.layerX||void 0!=b.clientX&&b.clientX;a.mouseY=void 0!=b.offsetY&&b.offsetY||void 0!=b.layerY&&b.layerY||void 0!=b.clientY&&b.clientY;if(a.isMouseDown){var d=a.mouseX-f+i,h=a.mouseY-
26 | c+o;if(d!=a.stageX||h!=a.stageY)p=s,n=C,s=d,C=h,a.stageX=d,a.stageY=h,a.dispatch("drag")}a.dispatch("move");b.preventDefault?b.preventDefault():b.returnValue=!1},!0);b.addEventListener("mousedown",function(b){if(a.p.mouseEnabled)a.isMouseDown=!0,a.dispatch("mousedown"),i=a.stageX,o=a.stageY,f=a.mouseX,c=a.mouseY,p=s=a.stageX,n=C=a.stageY,a.dispatch("startdrag"),b.preventDefault?b.preventDefault():b.returnValue=!1},!0);document.addEventListener("mouseup",function(b){if(a.p.mouseEnabled&&a.isMouseDown)a.isMouseDown=
27 | !1,a.dispatch("mouseup"),(i!=a.stageX||o!=a.stageY)&&g(a.stageX+a.p.inertia*(a.stageX-p),a.stageY+a.p.inertia*(a.stageY-n)),b.preventDefault?b.preventDefault():b.returnValue=!1},!0);this.checkBorders=function(){return a};this.interpolate=g}function n(b,l,g,d,a,f,c){function i(a){var b=d,c="fixed"==h.p.labelSize?h.p.defaultLabelSize:h.p.labelSizeRatio*a.displaySize;b.font=(h.p.hoverFontStyle||h.p.fontStyle||"")+" "+c+"px "+(h.p.hoverFont||h.p.font||"");b.fillStyle="node"==h.p.labelHoverBGColor?a.color||
28 | h.p.defaultNodeColor:h.p.defaultHoverLabelBGColor;b.beginPath();if(h.p.labelHoverShadow)b.shadowOffsetX=0,b.shadowOffsetY=0,b.shadowBlur=4,b.shadowColor=h.p.labelHoverShadowColor;sigma.tools.drawRoundRect(b,Math.round(a.displayX-c/2-2),Math.round(a.displayY-c/2-2),Math.round(b.measureText(a.label).width+1.5*a.displaySize+c/2+4),Math.round(c+4),Math.round(c/2+2),"left");b.closePath();b.fill();b.shadowOffsetX=0;b.shadowOffsetY=0;b.shadowBlur=0;b.beginPath();b.fillStyle="node"==h.p.nodeBorderColor?a.color||
29 | h.p.defaultNodeColor:h.p.defaultNodeBorderColor;b.arc(Math.round(a.displayX),Math.round(a.displayY),a.displaySize+h.p.borderSize,0,2*Math.PI,!0);b.closePath();b.fill();b.beginPath();b.fillStyle="node"==h.p.nodeHoverColor?a.color||h.p.defaultNodeColor:h.p.defaultNodeHoverColor;b.arc(Math.round(a.displayX),Math.round(a.displayY),a.displaySize,0,2*Math.PI,!0);b.closePath();b.fill();b.fillStyle="node"==h.p.labelHoverColor?a.color||h.p.defaultNodeColor:h.p.defaultLabelHoverColor;b.fillText(a.label,Math.round(a.displayX+
30 | 1.5*a.displaySize),Math.round(a.displayY+c/2-3));return h}function o(a){if(isNaN(a.x)||isNaN(a.y))throw Error("A node's coordinate is not a number (id: "+a.id+")");return!a.hidden&&a.displayX+a.displaySize>-j/3&&a.displayX-a.displaySize<4*j/3&&a.displayY+a.displaySize>-k/3&&a.displayY-a.displaySize<4*k/3}sigma.classes.Cascade.call(this);var h=this;this.p={labelColor:"default",defaultLabelColor:"#000",labelHoverBGColor:"default",defaultHoverLabelBGColor:"#fff",labelHoverShadow:!0,labelHoverShadowColor:"#000",
31 | labelHoverColor:"default",defaultLabelHoverColor:"#000",labelActiveBGColor:"default",defaultActiveLabelBGColor:"#fff",labelActiveShadow:!0,labelActiveShadowColor:"#000",labelActiveColor:"default",defaultLabelActiveColor:"#000",labelSize:"fixed",defaultLabelSize:12,labelSizeRatio:2,labelThreshold:6,font:"Arial",hoverFont:"",activeFont:"",fontStyle:"",hoverFontStyle:"",activeFontStyle:"",edgeColor:"source",defaultEdgeColor:"#aaa",defaultEdgeType:"line",defaultNodeColor:"#aaa",nodeHoverColor:"node",
32 | defaultNodeHoverColor:"#fff",nodeActiveColor:"node",defaultNodeActiveColor:"#fff",borderSize:0,nodeBorderColor:"node",defaultNodeBorderColor:"#fff",edgesSpeed:200,nodesSpeed:200,labelsSpeed:200};var j=f,k=c;this.currentLabelIndex=this.currentNodeIndex=this.currentEdgeIndex=0;this.task_drawLabel=function(){for(var b=a.nodes.length,c=0;c++<h.p.labelsSpeed&&h.currentLabelIndex<b;)if(h.isOnScreen(a.nodes[h.currentLabelIndex])){var d=a.nodes[h.currentLabelIndex++],l=g;if(d.displaySize>=h.p.labelThreshold||
33 | d.forceLabel){var f="fixed"==h.p.labelSize?h.p.defaultLabelSize:h.p.labelSizeRatio*d.displaySize;l.font=h.p.fontStyle+f+"px "+h.p.font;l.fillStyle="node"==h.p.labelColor?d.color||h.p.defaultNodeColor:h.p.defaultLabelColor;l.fillText(d.label,Math.round(d.displayX+1.5*d.displaySize),Math.round(d.displayY+f/2-3))}}else h.currentLabelIndex++;return h.currentLabelIndex<b};this.task_drawEdge=function(){for(var b=a.edges.length,c,d,g=0;g++<h.p.edgesSpeed&&h.currentEdgeIndex<b;)if(e=a.edges[h.currentEdgeIndex],
34 | c=e.source,d=e.target,e.hidden||c.hidden||d.hidden||!h.isOnScreen(c)&&!h.isOnScreen(d))h.currentEdgeIndex++;else{c=a.edges[h.currentEdgeIndex++];d=c.source.displayX;var f=c.source.displayY,o=c.target.displayX,i=c.target.displayY,j=c.color;if(!j)switch(h.p.edgeColor){case "source":j=c.source.color||h.p.defaultNodeColor;break;case "target":j=c.target.color||h.p.defaultNodeColor;break;default:j=h.p.defaultEdgeColor}var k=l;switch(c.type||h.p.defaultEdgeType){case "curve":k.strokeStyle=j;k.lineWidth=
35 | c.displaySize/3;k.beginPath();k.moveTo(d,f);k.quadraticCurveTo((d+o)/2+(i-f)/4,(f+i)/2+(d-o)/4,o,i);k.stroke();break;default:k.strokeStyle=j,k.lineWidth=c.displaySize/3,k.beginPath(),k.moveTo(d,f),k.lineTo(o,i),k.stroke()}}return h.currentEdgeIndex<b};this.task_drawNode=function(){for(var c=a.nodes.length,d=0;d++<h.p.nodesSpeed&&h.currentNodeIndex<c;)if(h.isOnScreen(a.nodes[h.currentNodeIndex])){var g=a.nodes[h.currentNodeIndex++],l=Math.round(10*g.displaySize)/10,f=b;f.fillStyle=g.color;f.beginPath();
36 | f.arc(g.displayX,g.displayY,l,0,2*Math.PI,!0);f.closePath();f.fill();g.hover&&i(g)}else h.currentNodeIndex++;return h.currentNodeIndex<c};this.drawActiveNode=function(a){var b=d;if(!o(a))return h;var c="fixed"==h.p.labelSize?h.p.defaultLabelSize:h.p.labelSizeRatio*a.displaySize;b.font=(h.p.activeFontStyle||h.p.fontStyle||"")+" "+c+"px "+(h.p.activeFont||h.p.font||"");b.fillStyle="node"==h.p.labelHoverBGColor?a.color||h.p.defaultNodeColor:h.p.defaultActiveLabelBGColor;b.beginPath();if(h.p.labelActiveShadow)b.shadowOffsetX=
37 | 0,b.shadowOffsetY=0,b.shadowBlur=4,b.shadowColor=h.p.labelActiveShadowColor;sigma.tools.drawRoundRect(b,Math.round(a.displayX-c/2-2),Math.round(a.displayY-c/2-2),Math.round(b.measureText(a.label).width+1.5*a.displaySize+c/2+4),Math.round(c+4),Math.round(c/2+2),"left");b.closePath();b.fill();b.shadowOffsetX=0;b.shadowOffsetY=0;b.shadowBlur=0;b.beginPath();b.fillStyle="node"==h.p.nodeBorderColor?a.color||h.p.defaultNodeColor:h.p.defaultNodeBorderColor;b.arc(Math.round(a.displayX),Math.round(a.displayY),
38 | a.displaySize+h.p.borderSize,0,2*Math.PI,!0);b.closePath();b.fill();b.beginPath();b.fillStyle="node"==h.p.nodeActiveColor?a.color||h.p.defaultNodeColor:h.p.defaultNodeActiveColor;b.arc(Math.round(a.displayX),Math.round(a.displayY),a.displaySize,0,2*Math.PI,!0);b.closePath();b.fill();b.fillStyle="node"==h.p.labelActiveColor?a.color||h.p.defaultNodeColor:h.p.defaultLabelActiveColor;b.fillText(a.label,Math.round(a.displayX+1.5*a.displaySize),Math.round(a.displayY+c/2-3));return h};this.drawHoverNode=
39 | i;this.isOnScreen=o;this.resize=function(a,b){j=a;k=b;return h}}function u(b,l){function g(){sigma.chronos.removeTask("node_"+c.id,2).removeTask("edge_"+c.id,2).removeTask("label_"+c.id,2).stopTasks();return c}function d(a,b){c.domElements[a]=document.createElement(b);c.domElements[a].style.position="absolute";c.domElements[a].setAttribute("id","sigma_"+a+"_"+c.id);c.domElements[a].setAttribute("class","sigma_"+a+"_"+b);c.domElements[a].setAttribute("width",c.width+"px");c.domElements[a].setAttribute("height",
40 | c.height+"px");c.domRoot.appendChild(c.domElements[a]);return c}function a(){c.p.drawHoverNodes&&(c.graph.checkHover(c.mousecaptor.mouseX,c.mousecaptor.mouseY),c.graph.nodes.forEach(function(a){a.hover&&!a.active&&c.plotter.drawHoverNode(a)}));return c}function A(){c.p.drawActiveNodes&&c.graph.nodes.forEach(function(a){a.active&&c.plotter.drawActiveNode(a)});return c}sigma.classes.Cascade.call(this);sigma.classes.EventDispatcher.call(this);var c=this;this.id=l.toString();this.p={auto:!0,drawNodes:2,
41 | drawEdges:1,drawLabels:2,lastNodes:2,lastEdges:0,lastLabels:2,drawHoverNodes:!0,drawActiveNodes:!0};this.domRoot=b;this.width=this.domRoot.offsetWidth;this.height=this.domRoot.offsetHeight;this.graph=new m;this.domElements={};d("edges","canvas");d("nodes","canvas");d("labels","canvas");d("hover","canvas");d("monitor","div");d("mouse","canvas");this.plotter=new n(this.domElements.nodes.getContext("2d"),this.domElements.edges.getContext("2d"),this.domElements.labels.getContext("2d"),this.domElements.hover.getContext("2d"),
42 | this.graph,this.width,this.height);this.monitor=new f(this,this.domElements.monitor);this.mousecaptor=new j(this.domElements.mouse,this.id);this.mousecaptor.bind("drag interpolate",function(){c.draw(c.p.auto?2:c.p.drawNodes,c.p.auto?0:c.p.drawEdges,c.p.auto?2:c.p.drawLabels,!0)}).bind("stopdrag stopinterpolate",function(){c.draw(c.p.auto?2:c.p.drawNodes,c.p.auto?1:c.p.drawEdges,c.p.auto?2:c.p.drawLabels,!0)}).bind("mousedown mouseup",function(a){var b=c.graph.nodes.filter(function(a){return!!a.hover}).map(function(a){return a.id});
43 | c.dispatch("mousedown"==a.type?"downgraph":"upgraph");b.length&&c.dispatch("mousedown"==a.type?"downnodes":"upnodes",b)}).bind("move",function(){c.domElements.hover.getContext("2d").clearRect(0,0,c.domElements.hover.width,c.domElements.hover.height);a();A()});sigma.chronos.bind("startgenerators",function(){sigma.chronos.getGeneratorsIDs().some(function(a){return!!a.match(RegExp("_ext_"+c.id+"$",""))})&&c.draw(c.p.auto?2:c.p.drawNodes,c.p.auto?0:c.p.drawEdges,c.p.auto?2:c.p.drawLabels)}).bind("stopgenerators",
44 | function(){c.draw()});for(var B=0;B<i.length;B++)i[B](this);this.draw=function(a,b,d,l){if(l&&sigma.chronos.getGeneratorsIDs().some(function(a){return!!a.match(RegExp("_ext_"+c.id+"$",""))}))return c;a=void 0==a?c.p.drawNodes:a;b=void 0==b?c.p.drawEdges:b;d=void 0==d?c.p.drawLabels:d;l={nodes:a,edges:b,labels:d};c.p.lastNodes=a;c.p.lastEdges=b;c.p.lastLabels=d;g();c.graph.rescale(c.width,c.height,0<a,0<b).setBorders();c.mousecaptor.checkBorders(c.graph.borders,c.width,c.height);c.graph.translate(c.mousecaptor.stageX,
45 | c.mousecaptor.stageY,c.mousecaptor.ratio,0<a,0<b);c.dispatch("graphscaled");for(var f in c.domElements)"canvas"==c.domElements[f].nodeName.toLowerCase()&&(void 0==l[f]||0<=l[f])&&c.domElements[f].getContext("2d").clearRect(0,0,c.domElements[f].width,c.domElements[f].height);c.plotter.currentEdgeIndex=0;c.plotter.currentNodeIndex=0;c.plotter.currentLabelIndex=0;f=null;l=!1;if(a)if(1<a)for(;c.plotter.task_drawNode(););else sigma.chronos.addTask(c.plotter.task_drawNode,"node_"+c.id,!1),l=!0,f="node_"+
46 | c.id;if(d)if(1<d)for(;c.plotter.task_drawLabel(););else f?sigma.chronos.queueTask(c.plotter.task_drawLabel,"label_"+c.id,f):sigma.chronos.addTask(c.plotter.task_drawLabel,"label_"+c.id,!1),l=!0,f="label_"+c.id;if(b)if(1<b)for(;c.plotter.task_drawEdge(););else f?sigma.chronos.queueTask(c.plotter.task_drawEdge,"edge_"+c.id,f):sigma.chronos.addTask(c.plotter.task_drawEdge,"edge_"+c.id,!1),l=!0,f="edge_"+c.id;c.dispatch("draw");c.refresh();l&&sigma.chronos.runTasks();return c};this.resize=function(a,
47 | b){var d=c.width,g=c.height;void 0!=a&&void 0!=b?(c.width=a,c.height=b):(c.width=c.domRoot.offsetWidth,c.height=c.domRoot.offsetHeight);if(d!=c.width||g!=c.height){for(var f in c.domElements)c.domElements[f].setAttribute("width",c.width+"px"),c.domElements[f].setAttribute("height",c.height+"px");c.plotter.resize(c.width,c.height);c.draw(c.p.lastNodes,c.p.lastEdges,c.p.lastLabels,!0)}return c};this.refresh=function(){c.domElements.hover.getContext("2d").clearRect(0,0,c.domElements.hover.width,c.domElements.hover.height);
48 | a();A();return c};this.drawHover=a;this.drawActive=A;this.clearSchedule=g;window.addEventListener("resize",function(){c.resize()})}function r(b){var f=this;sigma.classes.EventDispatcher.call(this);this._core=b;this.kill=function(){};this.getID=function(){return b.id};this.configProperties=function(g,d){var a=b.config(g,d);return a==b?f:a};this.drawingProperties=function(g,d){var a=b.plotter.config(g,d);return a==b.plotter?f:a};this.mouseProperties=function(g,d){var a=b.mousecaptor.config(g,d);return a==
49 | b.mousecaptor?f:a};this.graphProperties=function(g,d){var a=b.graph.config(g,d);return a==b.graph?f:a};this.getMouse=function(){return{mouseX:b.mousecaptor.mouseX,mouseY:b.mousecaptor.mouseY,down:b.mousecaptor.isMouseDown}};this.position=function(g,d,a){if(0==arguments.length)return{stageX:b.mousecaptor.stageX,stageY:b.mousecaptor.stageY,ratio:b.mousecaptor.ratio};b.mousecaptor.stageX=void 0!=g?g:b.mousecaptor.stageX;b.mousecaptor.stageY=void 0!=d?d:b.mousecaptor.stageY;b.mousecaptor.ratio=void 0!=
50 | a?a:b.mousecaptor.ratio;return f};this.goTo=function(g,d,a){b.mousecaptor.interpolate(g,d,a);return f};this.zoomTo=function(g,d,a){a=Math.min(Math.max(b.mousecaptor.config("minRatio"),a),b.mousecaptor.config("maxRatio"));a==b.mousecaptor.ratio?b.mousecaptor.interpolate(g-b.width/2+b.mousecaptor.stageX,d-b.height/2+b.mousecaptor.stageY):b.mousecaptor.interpolate((a*g-b.mousecaptor.ratio*b.width/2)/(a-b.mousecaptor.ratio),(a*d-b.mousecaptor.ratio*b.height/2)/(a-b.mousecaptor.ratio),a);return f};this.resize=
51 | function(g,d){b.resize(g,d);return f};this.draw=function(g,d,a,i){b.draw(g,d,a,i);return f};this.refresh=function(){b.refresh();return f};this.addGenerator=function(g,d,a){sigma.chronos.addGenerator(g+"_ext_"+b.id,d,a);return f};this.removeGenerator=function(g){sigma.chronos.removeGenerator(g+"_ext_"+b.id);return f};this.addNode=function(g,d){b.graph.addNode(g,d);return f};this.addEdge=function(g,d,a,i){b.graph.addEdge(g,d,a,i);return f};this.dropNode=function(g){b.graph.dropNode(g);return f};this.dropEdge=
52 | function(g){b.graph.dropEdge(g);return f};this.pushGraph=function(g,d){g.nodes&&g.nodes.forEach(function(a){a.id&&(!d||!b.graph.nodesIndex[a.id])&&f.addNode(a.id,a)});g.edges&&g.edges.forEach(function(a){(validID=a.source&&a.target&&a.id)&&(!d||!b.graph.edgesIndex[a.id])&&f.addNode(a.id,a.source,a.target,a)});return f};this.emptyGraph=function(){b.graph.empty();return f};this.getNodesCount=function(){return b.graph.nodes.length};this.getEdgesCount=function(){return b.graph.edges.length};this.iterNodes=
53 | function(g,d){b.graph.iterNodes(g,d);return f};this.iterEdges=function(g,d){b.graph.iterEdges(g,d);return f};this.getNodes=function(g){return b.graph.getNodes(g)};this.getEdges=function(g){return b.graph.getEdges(g)};this.activateMonitoring=function(){return b.monitor.activate()};this.desactivateMonitoring=function(){return b.monitor.desactivate()};b.bind("downnodes upnodes downgraph upgraph",function(b){f.dispatch(b.type,b.content)});b.graph.bind("overnodes outnodes",function(b){f.dispatch(b.type,
54 | b.content)})}var y=0;i=void 0;i=[];sigma.init=function(b){b=new u(b,(++y).toString());sigma.instances[y]=new r(b);return sigma.instances[y]};sigma.addPlugin=function(b,f,g){r.prototype[b]=f;i.push(g)};sigma.chronos=new function(){function b(a){window.setTimeout(a,0);return h}function f(){for(h.dispatch("frameinserted");m&&t.length&&g(););!m||!t.length?a():(x=(new Date).getTime(),p++,D=y-r,u=r-D,h.dispatch("insertframe"),b(f))}function g(){z%=t.length;if(!t[z].task()){var a=t[z].taskName;v=v.filter(function(b){b.taskParent==
55 | a&&t.push({taskName:b.taskName,task:b.task});return b.taskParent!=a});h.dispatch("killed",t.splice(z--,1)[0])}z++;y=(new Date).getTime()-x;return y<=u}function d(){m=!0;p=z=0;w=x=(new Date).getTime();h.dispatch("start");h.dispatch("insertframe");b(f);return h}function a(){h.dispatch("stop");m=!1;return h}function i(a,b,c){if("function"!=typeof a)throw Error('Task "'+b+'" is not a function');t.push({taskName:b,task:a});m=!(!m&&!(c&&d()||1));return h}function c(a){return a?Object.keys(q).filter(function(a){return!!q[a].on}).length:
56 | Object.keys(q).length}function j(){Object.keys(q).length?(h.dispatch("startgenerators"),h.unbind("killed",o),b(function(){for(var a in q)q[a].on=!0,i(q[a].task,a,!1)}),h.bind("killed",o).runTasks()):h.dispatch("stopgenerators");return h}function o(a){if(void 0!=q[a.content.taskName])q[a.content.taskName].del||!q[a.content.taskName].condition()?delete q[a.content.taskName]:q[a.content.taskName].on=!1,0==c(!0)&&j()}sigma.classes.EventDispatcher.call(this);var h=this,m=!1,k=80,n=0,p=0,r=1E3/k,u=r,y=
57 | 0,w=0,x=0,D=0,q={},t=[],v=[],z=0;this.frequency=function(a){return void 0!=a?(k=Math.abs(1*a),r=1E3/k,p=0,h):k};this.runTasks=d;this.stopTasks=a;this.insertFrame=b;this.addTask=i;this.queueTask=function(a,b,c){if("function"!=typeof a)throw Error('Task "'+b+'" is not a function');if(!t.concat(v).some(function(a){return a.taskName==c}))throw Error('Parent task "'+c+'" of "'+b+'" is not attached.');v.push({taskParent:c,taskName:b,task:a});return h};this.removeTask=function(b,c){if(void 0==b)t=[],1==
58 | c?v=[]:2==c&&(t=v,v=[]),a();else{var d="string"==typeof b?b:"";t=t.filter(function(a){return("string"==typeof b?a.taskName==b:a.task==b)?(d=a.taskName,!1):!0});0<c&&(v=v.filter(function(a){1==c&&a.taskParent==d&&t.push(a);return a.taskParent!=d}))}m=!(t.length&&(!a()||1));return h};this.addGenerator=function(a,b,d){if(void 0!=q[a])return h;q[a]={task:b,condition:d};0==c(!0)&&j();return h};this.removeGenerator=function(a){if(q[a])q[a].on=!1,q[a].del=!0;return h};this.startGenerators=j;this.getGeneratorsIDs=
59 | function(){return Object.keys(q)};this.getFPS=function(){m&&(n=Math.round(1E4*(p/((new Date).getTime()-w)))/10);return n};this.getTasksCount=function(){return t.length};this.getQueuedTasksCount=function(){return v.length};this.getExecutionTime=function(){return x-w};return this};sigma.debugMode=0;sigma.log=function(){if(1==sigma.debugMode)for(var b in arguments)console.log(arguments[b]);else if(1<sigma.debugMode)for(b in arguments)throw Error(arguments[b]);return sigma};sigma.easing={linear:{},quadratic:{}};
60 | sigma.easing.linear.easenone=function(b){return b};sigma.easing.quadratic.easein=function(b){return b*b};sigma.easing.quadratic.easeout=function(b){return-b*(b-2)};sigma.easing.quadratic.easeinout=function(b){return 1>(b*=2)?0.5*b*b:-0.5*(--b*(b-2)-1)};sigma.tools.drawRoundRect=function(b,f,g,d,a,i,c){var i=i?i:0,j=c?c:[],j="string"==typeof j?j.split(" "):j,c=i&&(0<=j.indexOf("topleft")||0<=j.indexOf("top")||0<=j.indexOf("left")),m=i&&(0<=j.indexOf("topright")||0<=j.indexOf("top")||0<=j.indexOf("right")),
61 | h=i&&(0<=j.indexOf("bottomleft")||0<=j.indexOf("bottom")||0<=j.indexOf("left")),j=i&&(0<=j.indexOf("bottomright")||0<=j.indexOf("bottom")||0<=j.indexOf("right"));b.moveTo(f,g+i);c?b.arcTo(f,g,f+i,g,i):b.lineTo(f,g);m?(b.lineTo(f+d-i,g),b.arcTo(f+d,g,f+d,g+i,i)):b.lineTo(f+d,g);j?(b.lineTo(f+d,g+a-i),b.arcTo(f+d,g+a,f+d-i,g+a,i)):b.lineTo(f+d,g+a);h?(b.lineTo(f+i,g+a),b.arcTo(f,g+a,f,g+a-i,i)):b.lineTo(f,g+a);b.lineTo(f,g+i)};sigma.tools.getRGB=function(b,f){var b=b.toString(),g={r:0,g:0,b:0};if(3<=
62 | b.length&&"#"==b.charAt(0)){var d=b.length-1;6==d?g={r:parseInt(b.charAt(1)+b.charAt(2),16),g:parseInt(b.charAt(3)+b.charAt(4),16),b:parseInt(b.charAt(5)+b.charAt(5),16)}:3==d&&(g={r:parseInt(b.charAt(1)+b.charAt(1),16),g:parseInt(b.charAt(2)+b.charAt(2),16),b:parseInt(b.charAt(3)+b.charAt(3),16)})}f&&(g=[g.r,g.g,g.b]);return g};sigma.tools.rgbToHex=function(b,f,g){return sigma.tools.toHex(b)+sigma.tools.toHex(f)+sigma.tools.toHex(g)};sigma.tools.toHex=function(b){b=parseInt(b,10);if(isNaN(b))return"00";
63 | b=Math.max(0,Math.min(b,255));return"0123456789ABCDEF".charAt((b-b%16)/16)+"0123456789ABCDEF".charAt(b%16)};sigma.publicPrototype=r.prototype})();
64 | 


--------------------------------------------------------------------------------
/web/server.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | '''
  4 | Created on Aug 7, 2011
  5 | 
  6 | @author: alexpak
  7 | '''
  8 | 
  9 | import cherrypy
 10 | import sys
 11 | import time
 12 | import cgi
 13 | import os
 14 | 
 15 | path = os.path.dirname(os.path.abspath(__file__)) + '/'
 16 | 
 17 | sys.path.append(path + '../src')
 18 | 
 19 | f = open(path + 'html/tagging.html', 'rb')
 20 | content = f.read().decode()
 21 | f.close()
 22 | 
 23 | import re
 24 | import template
 25 | import socket
 26 | import time
 27 | 
 28 | def recvall(sock):
 29 | 	output = ''
 30 | 	while True:
 31 | 		data = sock.recv(4096)
 32 | 		if not data:
 33 | 			break
 34 | 		output += data.decode('utf-8')
 35 | 	return output
 36 | 
 37 | import morph
 38 | import re
 39 | 
 40 | Tagger = morph.Tagger()
 41 | 
 42 | def get_color(pos):
 43 | 	if pos[0] == 'S':
 44 | 		return 'blue'
 45 | 	elif pos[0] == 'V':
 46 | 		return 'green'
 47 | 	elif pos[0] == 'A':
 48 | 		return 'orange'
 49 | 	else:
 50 | 		return 'gray'
 51 | 
 52 | categories = {
 53 | 	'S': 'сущ.',
 54 | 	'A': 'прил.',
 55 | 	'V': 'глагол',
 56 | 	'VINF': 'инф.',
 57 | 	'VADJ': 'прич.',
 58 | 	'VADV': 'дееп.',
 59 | 	'ADV': 'нар.',
 60 | 	'NID': 'инoстр.',
 61 | 	'NUM': 'числ.',
 62 | 	'PR': 'предлог',
 63 | 	'PART': 'част.',
 64 | 	'CONJ': 'союз',
 65 | 	'COM': 'ком.',
 66 | 	'INTJ': 'межд.',
 67 | 	'P': 'P',
 68 | 	'UNK': '???',
 69 | 	'm': 'муж. род',
 70 | 	'f': 'жен. род',
 71 | 	'n': 'ср. род',
 72 | 	'sg': 'ед. число',
 73 | 	'pl': 'мн. число',
 74 | 	'nom': 'им. падеж',
 75 | 	'gen': 'род. падеж',
 76 | 	'dat': 'дат. падеж',
 77 | 	'acc': 'вин. падеж',
 78 | 	'ins': 'твор. падеж',
 79 | 	'prep': 'пред. падеж',
 80 | 	'gen2': '2й род. падеж',
 81 | 	'loc': 'мест. падеж',
 82 | 	'anim': 'одуш.',
 83 | 	'inan': 'неодуш.',
 84 | 	'1p': '1е лицо',
 85 | 	'2p': '2е лицо',
 86 | 	'3p': '3е лицо',
 87 | 	'perf': 'соверш.',
 88 | 	'imperf': 'несоверш.',
 89 | 	'real': 'действ.',
 90 | 	'imp': 'повелит.',
 91 | 	'pass': 'страд.',
 92 | 	'pst': 'прош. время',
 93 | 	'npst': 'непрош. время',
 94 | 	'prs': 'наст. время',
 95 | 	'comp': 'сравн. степень',
 96 | 	'supl': 'превосх. степень',
 97 | 	'shrt': 'кратк.'
 98 | }
 99 | def pos_to_human(pos):
100 | 	loc = []
101 | 	for feat in pos.split('.'):
102 | 		if feat in categories:
103 | 			loc.append(categories[feat])
104 | 		else:
105 | 			loc.append(feat)
106 | 
107 | 	return loc
108 | 
109 | class HelloWorld:
110 | 	@cherrypy.expose
111 | 	def index(self, text = ''):
112 | 
113 | 		start = time.time()
114 | 		text = text.strip()
115 | 		T = template.Template()
116 | 		T.text = cgi.escape(text)
117 | 		error = ''
118 | 		
119 | 		sentence = [[w] for w in re.split('\W+', text) if len(w)] if len(text) else []
120 | 		
121 | 		if 0 < len(sentence) < 25:
122 | 				
123 | 			labeled = Tagger.label(sentence)
124 | 			for w in range(0, len(sentence)):
125 | 				sentence[w] = (sentence[w][0], labeled[w][1], labeled[w][2])
126 | 				
127 | 			selected_feat = {'m', 'f', 'n', 'sg', 'pl', '1p', '2p', '3p', 'nom', 'gen', 'gen2', 'dat', 'acc', 'ins', 'prep', 'loc', 'real', 'imp', 'pass', 'comp', 'shrt'}		
128 | 			
129 | 			parser_input = []
130 | 			for word in sentence:
131 | 				w = word[0] or 'FANTOM'
132 | 				p = '.'.join([word[1]] + sorted(word[2] & selected_feat))
133 | 				parser_input.append('{0}\t{1}\n'.format(w, p))
134 | 
135 | 			client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
136 | 			client_socket.connect(("localhost", 5000))
137 | 			for word in parser_input:
138 | 				client_socket.send(bytes(word, 'utf-8'))
139 | 			
140 | 			client_socket.send(bytes('\n', 'utf-8'))
141 | 			data = recvall(client_socket).strip()
142 | 			client_socket.close()
143 | 				
144 | 			time_total = time.time() - start
145 | 			words = len(sentence)
146 | 			words_per_sec = words / time_total
147 | 			
148 | 			edges = []
149 | 			nodes = [(0, 'ROOT', 'red')]
150 | 			
151 | 			tagged = [tuple(row.split('\t')) for row in data.split('\n')]
152 | 			n = 0
153 | 			for word in tagged:
154 | 				n += 1
155 | 				if len(word) < 4:
156 | 					continue
157 | 				
158 | 				nodes.append((n, word[0], get_color(word[1])))
159 | 				
160 | 			n = 0
161 | 			for word in tagged:
162 | 				n += 1
163 | 				if len(word) < 4:
164 | 					continue
165 | 				head = int(word[2])
166 | 				if len(tagged) < head:
167 | 					head = 0
168 | 				
169 | 				try:
170 | 					edges.append((n, head, word[3]))
171 | 				finally:
172 | 					pass
173 | 
174 | 			print(tagged, file=sys.stderr)
175 | 			T.tagged = [(word[0], ', '.join(pos_to_human(word[1]))) for word in tagged]
176 | 			T.edges = edges
177 | 			T.nodes = nodes
178 | 			T.time_total = round(time_total, 2)
179 | 			T.words_per_sec = round(words_per_sec)
180 | 			T.words = words
181 | 		elif len(sentence) > 25:
182 | 			error = 'Sentence is too long, looks like "War and Peace"'
183 | 
184 | 		T.error = error
185 | 		
186 | 		return T.transform(content)
187 | 	
188 | 	@cherrypy.expose
189 | 	def test(self):
190 | 		return content
191 | 
192 | cherrypy.server.socket_host = '0.0.0.0'
193 | config = {
194 | 	'/': {
195 | 		'tools.staticdir.on': True,
196 | 		'tools.staticdir.dir': path + 'public/',
197 | 		'tools.encode.encoding': 'utf8'
198 | 	}
199 | }
200 | 
201 | cherrypy.quickstart(HelloWorld(), config = config)
202 | 


--------------------------------------------------------------------------------