├── .github
    └── FUNDING.yml
├── .gitignore
├── .travis.yml
├── CITATION.cff
├── LICENSE
├── MANIFEST.in
├── README.md
├── setup.cfg
├── setup.py
├── syntaxmaker
    ├── __init__.py
    ├── adposition_tool.py
    ├── converter.py
    ├── data
    │   ├── postpositions.csv
    │   └── prepositions.csv
    ├── grammar.json
    ├── head.py
    ├── inflector.py
    ├── locative_case.json
    ├── noun_tool.py
    ├── phrase.py
    ├── pronoun_tool.py
    ├── syntax_maker.py
    ├── ud_map.json
    ├── verb_valence.py
    └── verb_valences_new.json
├── test
    ├── 100verbs.txt
    ├── generate_sentences.py
    ├── results.csv
    ├── results.xlsx
    └── wiktionary_verbs.py
├── testi.py
└── travis_test.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: [mikahama]
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.*~
2 | *.pyc
3 | .idea/*
4 | syntaxmaker.egg-info/*
5 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.6"
 4 | # command to install dependencies
 5 | install:
 6 |   - pip install python-coveralls
 7 |   - python setup.py install
 8 | # command to run tests
 9 | script:
10 |   - coverage run travis_test.py
11 | after_success:
12 |   - coveralls
13 | 
14 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | # This CITATION.cff file was generated with cffinit.
 2 | # Visit https://bit.ly/cffinit to generate yours today!
 3 | 
 4 | cff-version: 1.2.0
 5 | title: Syntax Maker
 6 | message: >-
 7 |   If you use this software, please cite it using the
 8 |   metadata from this file.
 9 | type: software
10 | authors:
11 |   - given-names: Mika
12 |     family-names: Hämäläinen
13 |   - given-names: Jack
14 |     family-names: Rueter
15 | identifiers:
16 |   - type: doi
17 |     value: 10.5281/zenodo.1143056
18 |     description: Zenodo
19 | repository-code: 'https://github.com/mikahama/syntaxmaker'
20 | date-released: '2018-01-09'
21 | preferred-citation:
22 |   type: article
23 |   authors:
24 |   - family-names: "Hämäläinen"
25 |     given-names: "Mika"
26 |   - family-names: "Rueter"
27 |     given-names: "Jack"
28 |   journal: "Proceedings of the Fourth International Workshop on Computational Linguistics of Uralic Languages"
29 |   title: "Development of an Open Source Natural Language Generation Tool for Finnish"
30 |   year: 2018


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2015-2019 Mika Hämäläinen
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include DESCRIPTION.rst
 2 | 
 3 | # Include the test suite (FIXME: does not work yet)
 4 | # recursive-include tests *
 5 | 
 6 | # If using Python 2.6 or less, then have to include package data, even though
 7 | # it's already declared in setup.py
 8 | include verb_valences_new.json
 9 | include data/postpositions.csv
10 | include data/prepositions.csv
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Syntax maker
 2 | =======
 3 | 
 4 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3483626.svg)](https://doi.org/10.5281/zenodo.3483626)
 5 | 
 6 | The tool NLG tool for Finnish by [Mika Hämäläinen](https://mikakalevi.com)
 7 | 
 8 | Syntax maker is the natural language generation tool for generating syntactically correct sentences in Finnish automatically. The tool is especially useful in the case of Finnish which has such a high diversity in its morphosyntax. All you need to know are the lemmas and their parts-of-speech and syntax maker will take care of the rest.
 9 | 
10 | For instance, just throw in words `rantaleijona`, `uneksia`, `korkea` and `aalto` and you will get `rantaleijonat uneksivat korkeista aalloista`. So you will get the morphology right automatically! Don't believe me? [Just take a look at this tutorial to find out how.](https://github.com/mikahama/syntaxmaker/wiki/Creating-a-sentence,-the-basics)
11 | 
12 | # Installing
13 | Run
14 | 
15 |     pip install syntaxmaker
16 |     python -m uralicNLP.download -l fin
17 | 
18 | 
19 | # Usage
20 | 
21 | An example for generating a sentence in Finnish:
22 | 
23 |     from syntaxmaker.syntax_maker import *
24 |     vp = create_verb_pharse("antaa")
25 |     subject = create_phrase("NP", "hevonen", {"NUM": "PL"})
26 | 
27 |     dobject = create_phrase("NP", "lahja", {"NUM": "PL"})
28 |     dobject.components["attribute"] = create_phrase("AP", "mahtava")
29 |     dobject.components["attribute"].components["attribute"] = create_phrase("AdvP", "erittäin")
30 | 
31 |     indobject = create_phrase("NP", "lehmä")
32 |     vp.components["subject"] = subject
33 |     vp.components["dir_object"] = dobject
34 |     vp.components["indir_object"] = indobject
35 |     print(vp)
36 |     >> hevoset antavat erittäin mahtavia lahjoja lehmälle
37 | 
38 | Go to [Creating a sentence, the basics](https://github.com/mikahama/syntaxmaker/wiki/Creating-a-sentence,-the-basics) for a quick start guide.
39 | 
40 | A good source of example code with the expected output can be found in [the Travis test file](https://github.com/mikahama/syntaxmaker/blob/master/travis_test.py).
41 | 
42 | Don't forget to [read the Wiki](https://github.com/mikahama/syntaxmaker/wiki) for more instructions.
43 | 
44 | # Cite
45 | 
46 | If you use Syntax Maker in any academic publication, please cite it as follows:
47 | 
48 | Hämäläinen, Mika and Rueter, Jack  2018.  [Development of an Open Source Natural Language Generation Tool for Finnish](http://aclweb.org/anthology/W18-0205).  In *Proceedings of the Fourth International Workshop on Computational Linguistics of Uralic Languages*, 51–58.
49 | 
50 | # More information?
51 | 
52 | Just go ahead and [take a look at the wiki](https://github.com/mikahama/syntaxmaker/wiki) or my [blog post about Syntax maker](https://mikalikes.men/create-finnish-sentences-computationally-in-python-nlg/).
53 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | # This flag says that the code is written to work on both Python 2 and Python
3 | # 3. If at all possible, it is good practice to do this. If you cannot, you
4 | # will need to generate wheels for each Python version that you support.
5 | universal=1


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """A setuptools based setup module.
 3 | See:
 4 | https://packaging.python.org/en/latest/distributing.html
 5 | https://github.com/pypa/sampleproject
 6 | """
 7 | 
 8 | # Always prefer setuptools over distutils
 9 | from setuptools import setup, find_packages
10 | # To use a consistent encoding
11 | from codecs import open
12 | from os import path
13 | 
14 | here = path.abspath(path.dirname(__file__))
15 | 
16 | # Get the long description from the relevant file
17 | with open(path.join(here, 'README.md'), encoding='utf-8') as f:
18 |     long_description = f.read()
19 | 
20 | setup(
21 |     name='syntaxmaker',
22 | 
23 |     # Versions should comply with PEP440.  For a discussion on single-sourcing
24 |     # the version across setup.py and the project code, see
25 |     # https://packaging.python.org/en/latest/single_source_version.html
26 |     version='2.0.1',
27 |     zip_safe=False,
28 |     description='The NLG tool for Finnish',
29 |     long_description=long_description,
30 |     long_description_content_type="text/markdown",
31 |     # The project's main homepage.
32 |     url='https://github.com/mikahama/syntaxmaker/',
33 | 
34 |     # Author details
35 |     author='Mika Hämäläinen',
36 |     author_email='mika@rootroo.com',
37 | 
38 |     # Choose your license
39 |     license='Apache License, Version 2.0',
40 | 
41 |     # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
42 |     classifiers=[
43 |         # How mature is this project? Common values are
44 |         #   3 - Alpha
45 |         #   4 - Beta
46 |         #   5 - Production/Stable
47 |         'Development Status :: 5 - Production/Stable',
48 | 
49 |         # Indicate who your project is intended for
50 |         'Intended Audience :: Developers',
51 |         'Topic :: Text Processing',
52 |         "Natural Language :: Finnish",
53 | 
54 |         # Specify the Python versions you support here. In particular, ensure
55 |         # that you indicate whether you support Python 2, Python 3 or both.
56 |         'Programming Language :: Python :: 3',
57 | 
58 |     ],
59 | 
60 |     # What does your project relate to?
61 |     keywords='NLG Finnish',
62 | 
63 |     # You can just specify the packages manually here if your project is
64 |     # simple. Or you can use find_packages().
65 |     packages=["syntaxmaker"],
66 |     package_dir={'syntaxmaker': 'syntaxmaker'},
67 | 
68 |     # List run-time dependencies here.  These will be installed by pip when
69 |     # your project is installed. For an analysis of "install_requires" vs pip's
70 |     # requirements files see:
71 |     # https://packaging.python.org/en/latest/requirements.html
72 |     install_requires=["uralicNLP>=1.2.2"],
73 | 
74 |     # List additional groups of dependencies here (e.g. development
75 |     # dependencies). You can install these using the following syntax,
76 |     # for example:
77 |     # $ pip install -e .[dev,test]
78 |     extras_require={},
79 | 
80 |     # If there are data files included in your packages that need to be
81 |     # installed, specify them here.  If using Python 2.6 or less, then these
82 |     # have to be included in MANIFEST.in as well.
83 |     package_data={
84 |         'syntaxmaker': ['verb_valences_new.json', 'data/*.csv', '*.json'],
85 |     },
86 | 
87 |     # Although 'package_data' is the preferred approach, in some case you may
88 |     # need to place data files outside of your packages. See:
89 |     # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa
90 |     # In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
91 |     data_files=[],
92 | 
93 |     # To provide executable scripts, use entry points in preference to the
94 |     # "scripts" keyword. Entry points provide cross-platform support and allow
95 |     # pip to create the appropriate form of executable for the target platform.
96 |     entry_points={},
97 | )
98 | 


--------------------------------------------------------------------------------
/syntaxmaker/__init__.py:
--------------------------------------------------------------------------------
 1 | import os, codecs
 2 | import json
 3 | from uralicNLP import uralicApi
 4 | 
 5 | valence_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'verb_valences_new.json')
 6 | valences = json.load(codecs.open(valence_path, "r", encoding="utf-8"))
 7 | 
 8 | locative_cases_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'locative_case.json')
 9 | locative_cases = json.load(codecs.open(locative_cases_path, "r", encoding="utf-8"))
10 | 
11 | if not uralicApi.is_language_installed("fin"):
12 | 	print("Finnish morphology is missing\nStarting download... (this should only happen once)")
13 | 	uralicApi.download("fin")
14 | 
15 | class ValencyException(Exception):
16 |     pass


--------------------------------------------------------------------------------
/syntaxmaker/adposition_tool.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | __author__ = 'Mika Hämäläinen'
 3 | import csv
 4 | import random
 5 | import os
 6 | 
 7 | prepositions = {}
 8 | postpositions = {}
 9 | 
10 | def load_csv(dictionary, file):
11 | 
12 |     reader = csv.reader(open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", file), 'r'))
13 |     for row in reader:
14 |        k, v = row
15 |        dictionary[k] = v
16 | 
17 | load_csv(prepositions, "prepositions.csv")
18 | load_csv(postpositions, "postpositions.csv")
19 | 
20 | def preposition_case(prep):
21 |     if prep in prepositions:
22 |         return prepositions[prep]
23 |     else:
24 |         return None
25 | 
26 | def postposition_case(post):
27 |     if post in postpositions:
28 |         return postpositions[post]
29 |     else:
30 |         return None
31 | 
32 | def get_an_adposition():
33 |     if random.choice([True, False]):
34 |         return random.choice(postpositions)
35 |     else:
36 |         return random.choice(prepositions)


--------------------------------------------------------------------------------
/syntaxmaker/converter.py:
--------------------------------------------------------------------------------
 1 | from .syntaxmaker import *
 2 | 
 3 | def convert_UD(UD_structure):
 4 | 	nodes = UD_structure.find()
 5 | 	phrases = [_node_to_phrase(x) for x in nodes]
 6 | 
 7 | 
 8 | def _node_to_phrase(UD_node):
 9 | 	pos = UD_node.upostag
10 | 	if pos == "NOUN" or pos =="PROPN":
11 | 		return create_noun_phrase(UD_node.lemma, morphology=_noun_morphology(UD_node.feats))
12 | 	elif pos == "ADJ":
13 | 		return create_adjective_phrase(UD_node.lemma, morphology=_noun_morphology(UD_node.feats))
14 | 	elif pos == "ADV":
15 | 		return create_adverb_phrase(UD_node.lemma, morphology=_noun_morphology(UD_node.feats))
16 | 	elif pos == "VERB":
17 | 		pass
18 | 	elif pos == "ADP":
19 | 		return create_adposition_phrase(UD_node.lemma, np=None)
20 | 	elif pos == "AUX":
21 | 		pass
22 | 	elif pos == "CCONJ":
23 | 		pass
24 | 	elif pos == "DET":
25 | 		pass
26 | 	elif pos == "NUM":
27 | 		pass
28 | 	elif pos == "PART":
29 | 		pass
30 | 	elif pos == "PRON":
31 | 		pass
32 | 	elif pos == "SCONJ":
33 | 		pass
34 | 	elif pos == "PUNCT" or pos == "SYM" or pos == "X" or pos == "INTJ":
35 | 		return create_phrase("GENERIC_P", UD_node.lemma)
36 | 
37 | def _noun_morphology(UD_node):
38 | 	ud_morphs = UD_node.feats.split("|")
39 | 	morphology = {}
40 | 	psor_n = None
41 | 	psor_p = None
42 | 	for ud_morph in ud_morphs:
43 | 		if ud_morph.startswith("Case="):
44 | 			morphology["CASE"] = ud_morph.replace("Case=", "")
45 | 		elif ud_morph == "Number=Sing":
46 | 			morphology["NUM"] = "Sg"
47 | 		elif ud_morph == "Number=Plur":
48 | 			morphology["NUM"] = "Pl"
49 | 		elif "Number[psor]" in ud_morph:
50 | 			if "Sing" in ud_morph:
51 | 				psor_n = "Sg"
52 | 			else:
53 | 				psor_n = "Pl"
54 | 		elif "Person[psor]" in ud_morph:
55 | 			psor_p = ud_morph[-1]
56 | 		elif ud_morph == "Degree=Cmp":
57 | 			morphology["DEGREE"] = "Comp"
58 | 		elif ud_morph == "Degree=Sup":
59 | 			morphology["DEGREE"] = "Superl"
60 | 	if psor_n is not None and psor_p is not None:
61 | 		morphology["POSS"] = "Px" + psor_n + psor_p
62 | 	return morphology
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/syntaxmaker/data/postpositions.csv:
--------------------------------------------------------------------------------
 1 | aikana,Gen
 2 | alapuolella,Gen
 3 | alapuolelle,Gen
 4 | alapuolelta,Gen
 5 | ali,Gen
 6 | alla,Gen
 7 | alle,Gen
 8 | asti,Ill
 9 | edelle,Gen
10 | edellä,Gen
11 | perässä,Gen
12 | edessä,Gen
13 | johdosta,Gen
14 | jälkeen,Gen
15 | kanssa,Gen
16 | kautta,Gen
17 | edestä,Gen
18 | eteen,Gen
19 | kesken,Gen
20 | taakse,Gen
21 | kohtaan,Par
22 | kohti,Par
23 | lisäksi,Gen
24 | luo,Gen
25 | luokse,Gen
26 | luona,Gen
27 | luota,Gen
28 | lähtien,Ela
29 | läpi,Gen
30 | mennessä,Ill
31 | mukaan,Gen
32 | mukaisesti,Gen
33 | myötä,Gen
34 | nähden,Gen
35 | oheen,Gen
36 | ohella,Gen
37 | ohelle,Gen
38 | ohelta,Gen
39 | ohessa,Gen
40 | ohesta,Gen
41 | perästä,Gen
42 | perään,Gen
43 | poikitse,Gen
44 | puolesta,Gen
45 | päin,Par
46 | päähän,Gen
47 | päällä,Gen
48 | päältä,Gen
49 | päästä,Gen
50 | sijaan,Gen
51 | sisäpuolella,Gen
52 | sisäpuolelta,Gen
53 | sisäpuolelle,Gen
54 | suhteen,Gen
55 | takaa,Gen
56 | takana,Gen
57 | tykö,Gen
58 | tykönä,Gen
59 | tyköä,Gen
60 | ulkopuolella,Gen
61 | ulkopuolelle,Gen
62 | ulkopuolelta,Gen
63 | varrella,Gen
64 | varten,Par
65 | vastaan,Par
66 | vasten,Par
67 | verran,Gen
68 | viereen,Gen
69 | vierelle,Gen
70 | vierellä,Gen
71 | viereltä,Gen
72 | vieressä,Gen
73 | vierestä,Gen
74 | vuoksi,Gen
75 | välillä,Gen
76 | välissä,Gen
77 | välistä,Gen
78 | yläpuolella,Gen
79 | yläpuolelle,Gen
80 | yläpuolelta,Gen
81 | ympäri,Gen
82 | ympärillä,Gen
83 | 


--------------------------------------------------------------------------------
/syntaxmaker/data/prepositions.csv:
--------------------------------------------------------------------------------
 1 | ennen,Par
 2 | ilman,Par
 3 | keskelle,Par
 4 | keskellä,Par
 5 | keskeltä,Par
 6 | kohti,Par
 7 | loitolla,Ela
 8 | loitolle,Ela
 9 | loitolta,Ela
10 | päin,Par
11 | riippumatta,Ela
12 | vastoin,Par
13 | ylle,Gen
14 | ympäri,Par
15 | 


--------------------------------------------------------------------------------
/syntaxmaker/grammar.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "VP0": {
 3 |         "components": null,
 4 |         "head": "V"
 5 |     },
 6 |     "VP1": {
 7 |         "components": {
 8 |             "subject": "NP"
 9 |         },
10 |         "order": ["subject", "head"],
11 |         "head": "V",
12 |         "agreement": {
13 |             "subject": ["PERS", "NUM"]
14 |         }
15 |     },
16 |     "VP_COPULA": {
17 |         "components": {
18 |             "subject": "NP",
19 |             "predicative": "NP"
20 |         },
21 |         "order": ["subject", "head", "predicative"],
22 |         "head": "V",
23 |         "agreement": {
24 |             "subject": ["PERS", "NUM"]
25 |         }
26 |     },
27 |     "VP2": {
28 |         "components": {
29 |             "subject": "NP",
30 |             "dir_object": "NP"
31 |         },
32 |         "order": ["subject", "head", "dir_object"],
33 |         "head": "V",
34 |         "agreement": {
35 |             "subject": ["PERS", "NUM"]
36 |         }
37 |     },
38 |     "VP3": {
39 |         "components": {
40 |             "subject": "NP",
41 |             "dir_object": "NP",
42 |             "indir_object": "NP"
43 |         },
44 |         "order": ["subject", "head", "dir_object", "indir_object"],
45 |         "head": "V",
46 |         "agreement": {
47 |             "subject": ["PERS", "NUM"]
48 |         }
49 |     },
50 |     "GENERIC_P":{
51 |         "components": null,
52 |         "head": "GENERIC",
53 |         "order": ["head"]
54 |     },
55 |     "NP": {
56 |         "components": {
57 |             "attribute": "AP*"
58 |         },
59 |         "order": ["attribute", "head"],
60 |         "head": "N"
61 |     },
62 |     "AP": {
63 |         "components": {
64 |             "attribute": "AdvP?"
65 |         },
66 |         "order": ["attribute", "head"],
67 |         "head": "A",
68 |         "agreement": {
69 |             "parent": ["CASE", "NUM"]
70 |         }
71 |     },
72 |     "AdvP": {
73 |         "components": {
74 |             "attribute": "AdvP?"
75 |         },
76 |         "order": ["attribute", "head"],
77 |         "head": "Adv"
78 |     },
79 |     "PrepP":{
80 |         "components": {
81 |             "complement": "NP"
82 |         },
83 |         "order": ["head", "complement"],
84 |         "head": "Prep"
85 |     },
86 |     "PostP":{
87 |         "components": {
88 |             "complement": "NP"
89 |         },
90 |         "order": ["complement", "head"],
91 |         "head": "Post"
92 |     }
93 | 
94 | }


--------------------------------------------------------------------------------
/syntaxmaker/head.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | __author__ = 'Mika Hämäläinen'
 3 | from . import inflector
 4 | 
 5 | class Head:
 6 |     def __init__(self, head, pos):
 7 |         self.lemma = head
 8 |         self.pos = pos
 9 | 
10 |     def get_form(self, governance, agreement):
11 |         if self.lemma is None:
12 |             return ""
13 |         governance.update(agreement)
14 |         return inflector.inflect(self.lemma, self.pos, governance)
15 | 
16 |     def __str__(self):
17 |         return self.lemma
18 | 


--------------------------------------------------------------------------------
/syntaxmaker/inflector.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | __author__ = 'Mika Hämäläinen'
  3 | 
  4 | import os
  5 | from . import pronoun_tool
  6 | import sys
  7 | from uralicNLP import uralicApi
  8 | 
  9 | if (sys.version_info > (3, 0)):
 10 |     # Python 3
 11 |     new_python = True
 12 |     from itertools import filterfalse as ffilter
 13 | else:
 14 |     # Python 2
 15 |     new_python = False
 16 |     from itertools import ifilterfalse as ffilter
 17 | 
 18 | 
 19 | case_suffixes ={"PAR":"A", "NOM": "","GEN":"n","ESS":"nA", "TRA": "ksi", "INE": "ssA", "ELA": "stA", "ADE": "llA", "ABL": "ltA", "ALL": "lle", "ABE": "ttA", "ILL": "n"}
 20 | ei_forms = {"SG1": "en", "SG2":"et","SG3":"ei","PL1":"emme", "PL2":"ette", "PL3":"eivät", "PE4": "ei"}
 21 | back_vowels = "aou"
 22 | front_vowels = "äöy"
 23 | vowels = "aeiouyäö"
 24 | 
 25 | def inflect(word, pos, args):
 26 |     beginning = ""
 27 |     if "|" in word:
 28 |         beginning, word = word.rsplit("|",1)
 29 |         beginning = beginning.replace("|","")
 30 |     if len(args) == 0:
 31 |         return beginning + word
 32 |     clit =""
 33 |     if "CLIT" in args:
 34 |         if args["CLIT"] == "KO":
 35 |             args["CLIT"] = "QST"
 36 |         clit = "+" +args["CLIT"].title()
 37 |     if pos == "GENERIC":
 38 |         return word
 39 |     elif pos == "V":
 40 |         if "PERS" in args and args["PERS"] == "4":
 41 |             voice = "PSS"
 42 |         else:
 43 |             voice = "ACT"
 44 |         if "MOOD" not in args or args["MOOD"] == "INDV":
 45 |             args["MOOD"] = "IND"
 46 |         if args["MOOD"] == "POTN":
 47 |             args["MOOD"] = "POT"
 48 |         if args["MOOD"] != "IND":
 49 |             tense = ""
 50 |         else:
 51 |             if "TENSE" not in args:
 52 |                 tense = "+Prs"
 53 |             elif args["TENSE"] == "PRESENT":
 54 |                 tense = "+Prs"
 55 |             elif args["TENSE"] == "PAST":
 56 |                 tense = "+Prt"
 57 | 
 58 |         if word == "ei":
 59 |             ei_form = ei_forms[args["NUM"]+args["PERS"]]
 60 |             if "CLIT" in args and args["CLIT"] == "QST":
 61 |                 ei_form = ei_form + "kö"
 62 |             return beginning + ei_form
 63 | 
 64 |         if "INF" in args:
 65 |             if args["INF"] == "A":
 66 |                 #syödä, juoda
 67 |                 return beginning + word
 68 |             else:
 69 |                 #syömään, juomaan
 70 |                 #omorfi_query = "[WORD_ID="+word+"][POS=VERB][VOICE="+voice+"][INF="+args["INF"]+"][CASE=ILL]"
 71 |                 omorfi_query = word + "+V+Act+Inf"+args["INF"].title()+"+Sg+Ill"
 72 |         elif "NEG" in args and args["NEG"]:
 73 |             #(en) syö, juo...
 74 |             if "TEMPAUX"  in args and args["PERS"] == "4" and tense == "+Prs":
 75 |                 # ei ole syöty
 76 |                 #omorfi_query = "[WORD_ID="+word+"][POS=VERB][VOICE=ACT][MOOD="+ args["MOOD"] +"]"+tense+"[NEG=CON]"
 77 |                 omorfi_query = word+"+V+Pss+"+ args["MOOD"].title() +"+Prt+ConNeg"
 78 |             else:
 79 |                 omorfi_query = word+"+V+"+voice.title()+"+"+args["MOOD"].title()+tense.title()+"+ConNeg"
 80 |                 #omorfi_query = "[WORD_ID="+word+"][POS=VERB][VOICE="+voice+"][MOOD="+ args["MOOD"] +"]"+tense+ pers_string+"[NEG=CON]"
 81 |         else:
 82 |             #syön, juon
 83 |             if "TEMPAUX"  in args and args["PERS"] == "4":
 84 |                 #on syöty
 85 |                 omorfi_query = word + "+V+Act+"+args["MOOD"].title()+tense+"+Sg3"
 86 |                 #omorfi_query = "[WORD_ID="+word+"][POS=VERB][VOICE=ACT][MOOD="+ args["MOOD"] +"]"+tense+"[PERS=SG3]"
 87 |             elif "PERS" in args and args["PERS"] == "4":
 88 |                 #omorfi_query = "[WORD_ID="+word+"][POS=VERB][VOICE="+voice+"][MOOD="+ args["MOOD"] +"]"+tense+pers_string +clit
 89 |                 omorfi_query = word + "+V+Pss+"+args["MOOD"].title()+tense+"+Pe4" +clit
 90 |             else:
 91 |                 omorfi_query = word + "+V+"+voice.title()+"+"+ args["MOOD"].title() +tense+ "+" + args["NUM"].title() + args["PERS"]+ clit
 92 |                 #omorfi_query = "[WORD_ID="+word+"][POS=VERB][VOICE="+voice+"][MOOD="+ args["MOOD"] +"]"+tense+"[PERS="+args["NUM"]+args["PERS"]+"]" +clit
 93 |     elif pos == "PPron":
 94 |         #personal pronoun
 95 |         if "CASE" in args and args["CASE"] == "Gen":
 96 |             args["CASE"] = "ACC"
 97 |         if "CASE" in args and args["CASE"] == "TrueGen":
 98 |             args["CASE"] = "GEN"
 99 |         else:
100 |             args["CASE"] = args["CASE"].upper()
101 |         omorfi_query = word + "+Pron+Pers+"+args["NUM"].title()+args["PERS"]+"+"+ args["CASE"].title() + clit
102 |         #omorfi_query = "[WORD_ID="+word+"][POS=PRONOUN][SUBCAT=PERSONAL][PERS="+args["NUM"]+args["PERS"]+"][NUM="+args["NUM"]+"][CASE="+args["CASE"]+"]"
103 |     elif pos == "PastParticiple":
104 |         #participle, syönyt, syöneet, syöty
105 |         if args["NUM"] == "PE":
106 |             #passive
107 |             #omorfi_query = "[WORD_ID="+word+"][POS=VERB][VOICE=PSS][MOOD=INDV][TENSE=PAST][PERS=PE4][NEG=CON]"
108 |             omorfi_query = word + "+V+Pss+PrfPrc+Sg+Nom"
109 |         else:
110 |             #active
111 |             num = args["NUM"]
112 |             #omorfi_query = "[WORD_ID="+word+"][POS=VERB][VOICE=ACT][MOOD=INDV][TENSE=PAST][NUM="+num+"][NEG=CON]"
113 |             omorfi_query = word+"+V+Act+PrfPrc+"+num.title()+"+Nom"
114 |     elif pos == "RelPron":
115 |         case = args["CASE"]
116 |         #omorfi_query = "[WORD_ID="+word+"][POS=PRONOUN][SUBCAT=RELATIVE][NUM="+args["NUM"]+"][CASE="+case+"]"
117 |         omorfi_query = word + "+Pron+Rel+"+args["NUM"].title()+"+" + case.title()
118 |     else:
119 |         degree = ""
120 |         if pos == "N":
121 |             pos = "N"
122 |         elif pos == "N+Prop":
123 |             pass
124 |         elif pos == "Adv":
125 |             if "DEGREE" in args:
126 |                 pos = "A"
127 |                 args["CASE"] = "Ins"
128 |                 args["NUM"] = "Pl"
129 |         else:
130 |             pos = "A"
131 | 
132 |         if "CASE" not in args:
133 |             args["CASE"] = "NOM"
134 |         else:
135 |             args["CASE"] = args["CASE"].upper()
136 |         if "DEGREE" in args:
137 |             degree = "+" + args["DEGREE"]
138 |         possessive = ""
139 |         if "POSS" in args:
140 |             possessive = "+" + args["POSS"]
141 |         #omorfi_query = "[WORD_ID="+word+"][POS="+pos+"][NUM="+args["NUM"]+"][CASE="+args["CASE"]+"]"
142 |         omorfi_query = word +"+" +pos+ degree +"+" + args["NUM"].title() +"+" + args["CASE"].title() + possessive + clit
143 |     word_form = _filter_generated(uralicApi.generate(omorfi_query, "fin"), word)
144 |     if len(word_form) == 0:
145 |         #Generation failed!
146 |         if pos == "N":
147 |             return inflect(beginning + "|" + word, "N+Prop", args)
148 |         else:
149 |             return beginning + backup_inflect(word, pos, args)
150 |     else:
151 |         return beginning + word_form[0][0]
152 | 
153 | def _filter_generated(res, lemma):
154 |     if len(res) < 2:
155 |         return res
156 |     for r in res:
157 |         r_as = uralicApi.analyze(r[0], "fin", dictionary_forms=True)
158 |         for r_a in r_as:
159 |             r_a = r_a[0]
160 |             if "+Use/Arch" not in r_a and "+Dial/" not in r_a and r_a.startswith(lemma):
161 |                 return [r]
162 | 
163 | def backup_inflect(word, pos, args):
164 |     if pos == "NOUN" or pos == "ADJECTIVE":
165 |         #Nouns and adjectives
166 |         if pronoun_tool.is_personal_pronoun(word):
167 |             return word
168 |         return standard_nominal_inflection(word, args["CASE"],args["NUM"])
169 |     else:
170 |         return word
171 | 
172 | def case_harmony(case, word):
173 |     case = case_suffixes[case]
174 |     if "A" in case:
175 |         if has_back_vowels(word):
176 |             return case.replace("A", "a")
177 |         else:
178 |             return case.replace("A", "ä")
179 |     else:
180 |         return case
181 | 
182 | def has_back_vowels(word):
183 |     word = word[::-1]
184 |     for letter in word:
185 |         if letter in back_vowels:
186 |             return True
187 |         elif letter in front_vowels:
188 |             return False
189 |     return False
190 | 
191 | def standard_nominal_inflection(noun, case, number):
192 |     if case not in case_suffixes:
193 |         return noun
194 | 
195 |     last_letter = noun[-1]
196 | 
197 |     if case == "NOM":
198 |         if number =="SG":
199 |             return noun
200 |         else:
201 |             if last_letter not in vowels:
202 |                 return noun + "it"
203 |             else:
204 |                 return noun + "t"
205 | 
206 | 
207 |     if last_letter not in vowels:
208 |         noun = noun + "i"
209 | 
210 |     if number == "PL" and noun[-1] == "i":
211 |         if case != "PAR":
212 |             noun = noun[:-1] + "ei"
213 |         else:
214 |             noun = noun[:-1] + "ej"
215 |     elif number =="PL":
216 |         noun = noun + "i"
217 | 
218 |     if case == "ILL":
219 |         if number == "PL":
220 |             noun = noun + "hi"
221 |         else:
222 |             noun = noun + noun[-1]
223 |     noun = noun + case_harmony(case, noun)
224 |     return noun
225 | 
226 | 


--------------------------------------------------------------------------------
/syntaxmaker/noun_tool.py:
--------------------------------------------------------------------------------
 1 | from . import locative_cases
 2 | 
 3 | locative_map = {"external":{"in":"Ade","to":"All","from":"Abl"},"internal":{"in":"Ine","to":"Ill","from":"Ela"}}
 4 | 
 5 | def get_locative(noun):
 6 | 	if noun in locative_cases:
 7 | 		noun_data = locative_cases[noun]
 8 | 		if noun_data["Ade"] > noun_data["Ine"]:
 9 | 			return "external"
10 | 		else:
11 | 			return "internal"
12 | 	else:
13 | 		return None
14 | 
15 | def resolve_locative_case(locative_category, direction):
16 | 	return locative_map[locative_category][direction]
17 | 


--------------------------------------------------------------------------------
/syntaxmaker/phrase.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | __author__ = 'Mika Hämäläinen'
 3 | from .head import Head
 4 | import copy
 5 | import re, sys
 6 | 
 7 | unicode = str
 8 | 
 9 | class Phrase:
10 |     def __init__(self, head, structure, morphology={}):
11 |         self.new_python = True
12 |         self.parent = None
13 |         self.head = Head(head, structure["head"])
14 |         self.components = copy.deepcopy(structure["components"])
15 |         if self.components is None:
16 |             self.components = {}
17 |             self.order = ["head"]
18 |         else:
19 |             self.order = copy.deepcopy(structure["order"])
20 |         if "agreement" in structure:
21 |             self.agreement = copy.deepcopy(structure["agreement"])
22 |         else:
23 |             self.agreement = {}
24 |         if "governance" in structure:
25 |             self.governance = copy.deepcopy(structure["governance"])
26 |         else:
27 |             self.governance = {}
28 |         self.morphology = copy.deepcopy(morphology)
29 | 
30 |     def resolve_agreement(self):
31 |         forms = {}
32 |         for key in self.agreement:
33 |             if key == "parent" and self.parent is not None:
34 |                 morphology = self.parent.morphology
35 |             elif key.startswith("parent->")and self.parent is not None:
36 |                 key_p = key[8:]
37 |                 morphology = self.parent.components[key_p].morphology
38 |             elif key in self.components:
39 |                 morphology = self.components[key].morphology
40 |             else:
41 |                 r = {"CASE": "Nom", "NUM": "SG", "PERS": "3"}
42 |                 r.update(self.morphology)
43 |                 return r
44 |             for agreement_type in self.agreement[key]:
45 |                 forms[agreement_type] = morphology[agreement_type]
46 |         return forms
47 | 
48 |     def to_string(self, received_governance = {}):
49 |         self.morphology.update(received_governance)
50 |         string_representation = ""
51 |         if "dir_object" in self.components:
52 |             if type(self.components["dir_object"]) is not str:
53 |                 if "NUM" in self.components["dir_object"].morphology and self.components["dir_object"].morphology["NUM"] == "PL":
54 |                     if "dir_object" in self.governance:
55 |                         if self.governance["dir_object"]["CASE"] == "Gen":
56 |                             self.governance["dir_object"]["CASE"] = "Par"
57 |         for item in self.order:
58 |             if item == "head":
59 |                 head_word = self.head.get_form(self.morphology, self.resolve_agreement())
60 |                 string_representation = string_representation + " " + head_word
61 |             else:
62 |                 phrase = self.components[item]
63 |                 if type(phrase) is str or (not self.new_python and type(phrase) is unicode):
64 |                     #Data not set
65 |                     pass
66 |                 else:
67 |                     phrase.parent = self
68 |                     governance = {}
69 |                     if item in self.governance:
70 |                         governance = self.governance[item]
71 |                     if "PREDICATIVE" in governance and governance["PREDICATIVE"]:
72 |                         if governance["NUM"] is None:
73 |                             governance["NUM"] = self.components["subject"].morphology["NUM"]
74 |                         if governance["CASE"] is None:
75 |                             if governance["NUM"] == "SG":
76 |                                 governance["CASE"] = "Nom"
77 |                             else:
78 |                                 governance["CASE"] = "Par"
79 |                     string_representation = string_representation + " " + phrase.to_string(governance)
80 |         return string_representation.strip()
81 | 
82 |     def __str__(self):
83 |         text = self.to_string()
84 |         #remove multiple spaces
85 |         text = re.sub("\s\s+", " ", text)
86 |         #remove spaces before punctuation
87 |         text = self.__remove_spaces_punct__(text)
88 |         return text.strip()
89 | 
90 |     def __remove_spaces_punct__(self, text):
91 |         puncts = ".,;:?!"
92 |         for punct in puncts:
93 |             if " "+punct in text:
94 |                 text = text.replace(" " + punct, punct)
95 |         return text
96 | 


--------------------------------------------------------------------------------
/syntaxmaker/pronoun_tool.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | __author__ = 'mika hämäläinen'
 3 | 
 4 | 
 5 | pronouns = {"SG1" : "minä", "SG2" : "sinä", "SG3" : "se", "PL1" : "me", "PL2": "te", "PL3": "ne"}
 6 | 
 7 | 
 8 | def pronoun(person, human=True):
 9 |     if human and person is "SG3":
10 |         return "hän"
11 |     if human and person is "PL3":
12 |         return "he"
13 |     if person in pronouns:
14 |         return pronouns[person]
15 |     else:
16 |         return None
17 | 
18 | def is_personal_pronoun(p_pronoun):
19 |     if p_pronoun in pronouns.values():
20 |         return True
21 |     else:
22 |         return False
23 | 


--------------------------------------------------------------------------------
/syntaxmaker/syntax_maker.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | __author__ = 'Mika Hämäläinen'
  3 | from . import verb_valence
  4 | from .phrase import Phrase
  5 | import json
  6 | import random
  7 | from . import pronoun_tool
  8 | from . import adposition_tool
  9 | import os
 10 | from . import noun_tool, ValencyException
 11 | 
 12 | auxiliary_verbs = {"voida" : "A",
 13 | "saada" : "A",
 14 | "alkaa" : "A",
 15 | "haluta" : "A",
 16 | "ruveta" : "MA",
 17 | "saattaa" : "A",
 18 | "kehdata": "A",
 19 | "jäädä": "MA",
 20 | "yrittää": "A",
 21 | "unohtaa":"A"}
 22 | 
 23 | grammar =""
 24 | 
 25 | def load_grammar():
 26 |     global grammar
 27 |     path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "grammar.json")
 28 |     f = open(path, "r")
 29 |     jsonText = f.read()
 30 |     f.close()
 31 |     grammar = json.loads(jsonText)
 32 | 
 33 | load_grammar()
 34 | 
 35 | def is_auxiliary_verb(verb):
 36 |     if verb in auxiliary_verbs:
 37 |         return True
 38 |     else:
 39 |         return False
 40 | 
 41 | def create_verb_pharse(head):
 42 |     global grammar
 43 |     phrase_type = verb_valence.valency_count(head)
 44 |     governance = {}
 45 |     dir_obj = {}
 46 |     dir_obj[u"CASE"] = verb_valence.most_frequent_case(verb_valence.verb_direct_objects(head))
 47 |     governance["dir_object"] = dir_obj
 48 |     indir_obj = {}
 49 |     indir_obj[u"CASE"] = verb_valence.most_frequent_case(verb_valence.verb_indirect_objects(head))
 50 |     governance["indir_object"] = indir_obj
 51 | 
 52 |     phrase_structure = grammar["VP"+str(phrase_type)]
 53 |     phrase_structure["governance"] = governance
 54 |     vp = Phrase(head, phrase_structure)
 55 |     vp.morphology["VOICE"] = "ACT"
 56 |     return vp
 57 | 
 58 | #oopsie, typos...
 59 | create_verb_phrase = create_verb_pharse
 60 | 
 61 | default_np_morphology = {u"CASE": "Nom", u"NUM": "SG", u"PERS": "3"}
 62 | 
 63 | def create_phrase(name, head, morphology={}):
 64 |     global grammar
 65 |     if name in grammar:
 66 |         structure = grammar[name]
 67 |     else:
 68 |         structure = grammar["GENERIC_P"]
 69 |     if name == "NP":
 70 |         for key in default_np_morphology.keys():
 71 |             if key not in morphology:
 72 |                 morphology[key] = default_np_morphology[key]
 73 |     return Phrase(head, structure, morphology)
 74 | 
 75 | def create_noun_phrase(head, morphology={}, number=None, case=None):
 76 |     if case is not None:
 77 |         morphology["CASE"] = case
 78 |     if number is not None:
 79 |         morphology["NUM"] = number
 80 |     return create_phrase("NP", head, morphology=morphology)
 81 | 
 82 | def create_adjective_phrase(head, morphology={}, degree=None):
 83 |     if degree is not None:
 84 |         morphology["DEGREE"] = degree
 85 |     return create_phrase("AP", head, morphology=morphology)
 86 | 
 87 | def create_personal_pronoun_phrase(person = "1", number = "SG", prodrop=False, human=False):
 88 |     if prodrop and person != "3":
 89 |         pronoun = None
 90 |     else:
 91 |         pronoun = pronoun_tool.pronoun(number + person, human=human)
 92 |     pp = create_phrase("NP", pronoun, morphology={u"PERS": person, u"NUM": number})
 93 |     pp.head.pos = "PPron"
 94 |     return pp
 95 | 
 96 | def create_copula_phrase(predicative_case=None, predicative_number=None):
 97 |     global grammar
 98 |     structure = grammar["VP_COPULA"]
 99 |     governance = { "predicative" :  {u"CASE" : predicative_case, u"NUM":predicative_number, u"PREDICATIVE":True}}
100 |     structure["governance"] = governance
101 |     vp = Phrase("olla", structure)
102 |     vp.morphology["VOICE"] = "ACT"
103 |     return vp
104 | 
105 | 
106 | def negate_verb_pharse(vp):
107 |     aux = create_phrase("GENERIC_P", "ei")
108 |     aux.agreement["parent->subject"] = ["PERS", "NUM"]
109 |     aux.head.pos = "V"
110 |     vp.morphology["NEG"] = True
111 |     vp.components["AUX"] = aux
112 |     head_index = vp.order.index("head")
113 |     vp.order.insert(head_index, "AUX")
114 |     if "dir_object" in vp.governance:
115 |         if vp.governance["dir_object"][u"CASE"] == "Gen" or vp.governance["dir_object"][u"CASE"] == "Nom":
116 |             #Genitive or nomintaive objects to partitive syön kakun/syödään kakku -> en syö kakkua/ei syödä kakkua
117 |             vp.governance["dir_object"][u"CASE"] = "Par"
118 | 
119 | negate_verb_phrase = negate_verb_pharse
120 | 
121 | def turn_vp_into_question(vp):
122 |     if "NEG" in vp.morphology:
123 |         vp.components["AUX"].morphology["CLIT"] = "KO"
124 |         move_front = "AUX"
125 |     else:
126 |         vp.morphology["CLIT"] = "KO"
127 |         move_front = "head"
128 | 
129 |     vp.order.remove(move_front)
130 |     vp.order.insert(0, move_front)
131 | 
132 | def add_np_subject_to_vp(vp, np):
133 |     if "subject" not in vp.order:
134 |         raise ValencyException("This verb "+str(vp.head)+" does not accept a subject")
135 |     else:
136 |         vp.components["subject"] = np
137 | 
138 | def add_np_object_to_vp(vp, np, indirect=False, check_valency=False):
139 |     if not indirect:
140 |         if "predicative" in vp.order:
141 |             vp.components["predicative"] = np
142 |         elif "dir_object"  in vp.order:
143 |             vp.components["dir_object"] = np
144 |         elif check_valency == False:
145 |             vp.order.append("dir_object")
146 |             vp.components["dir_object"] = np
147 |         else:
148 |             raise ValencyException("This verb "+str(vp.head)+" does not accept an object or a predicative")
149 |     else:
150 |         if "indir_object"  in vp.order:
151 |             vp.components["indir_object"] = np
152 |         elif check_valency == False:
153 |             vp.components["indir_object"] = np
154 |             vp.order.append("indir_object")
155 |         else:
156 |             raise ValencyException("This verb "+str(vp.head)+" does not accept an indirect object")
157 | 
158 | 
159 | def add_auxiliary_verb_to_vp(vp, aux=None):
160 |     if aux is None or aux not in auxiliary_verbs:
161 |         return
162 |     infinitive = auxiliary_verbs[aux]
163 | 
164 |     infp = create_phrase("GENERIC_P", vp.head.lemma)
165 |     infp.head.pos = "V"
166 |     infp.morphology["INF"] = infinitive
167 | 
168 |     vp.components["INF"] = infp
169 |     head_index = vp.order.index("head")
170 |     vp.order.insert(head_index+1, "INF")
171 |     vp.head.lemma = aux
172 | 
173 | def turn_vp_into_prefect(vp):
174 |     old_verb = vp.head.lemma
175 |     vp.head.lemma = "olla"
176 | 
177 |     participle = create_phrase("GENERIC_P", old_verb)
178 |     participle.head.pos = "PastParticiple"
179 |     participle.agreement["parent->subject"] = ["NUM"]
180 | 
181 |     vp.components["Participle"] = participle
182 |     vp.morphology["TEMPAUX"] = True
183 |     head_index = vp.order.index("head")
184 |     vp.order.insert(head_index+1, "Participle")
185 | 
186 | def set_vp_mood_and_tense(vp, mood="INDV", tense="PRESENT"):
187 |     vp.morphology["MOOD"] = mood
188 |     vp.morphology["TENSE"] = tense
189 | 
190 | def turn_vp_into_passive(vp):
191 |     subject_p = create_phrase("GENERIC_P", None, {u"PERS": "4", u"NUM": "PE"})
192 |     vp.components["subject"] = subject_p
193 |     if "dir_object" in vp.governance:
194 |         if vp.governance["dir_object"][u"CASE"] == "Gen":
195 |             #Genitive object to nominative: Syön kaukun -> syödään kakku
196 |             vp.governance["dir_object"][u"CASE"] = "Nom"
197 | 
198 | def add_relative_clause_to_np(np, realtivep, case=None, subject=False):
199 |     component = None
200 |     if subject:
201 |         #e.g. kissa, joka kiipesi puuhun
202 |         component = "subject"
203 |         if case is None:
204 |             case = "Nom"
205 |     #if case is none -> the antecedent of the relative clause will be the object of the verb e.g. talo, jonka näin
206 |     elif case is None:
207 |         #set the relative pronoun at the first free component such as direct object or indirect object
208 |         objs = ["dir_object", "indir_object", "predicative"]
209 |         for obj in objs:
210 |             if obj in realtivep.components and type(realtivep.components[obj]) is not Phrase:
211 |                 component = "dir_object"
212 |                 break
213 | 
214 |     if component is None:
215 |         #If can't be added to nowhere else or has a specific case e.g. päivä, jona kävelin kadulla
216 |         component = "relative_pron"
217 |         np.components[component] = "NP"
218 |         np.order.append(component)
219 | 
220 |     morphology = {"NUM":"SG"}
221 |     if case:
222 |         morphology["CASE"] = case
223 |     morphology["NUM"] = np.morphology["NUM"]
224 |     if subject:
225 |         morphology["PERS"] = np.morphology["PERS"]
226 |     rel_pron = create_phrase("NP", "joka",morphology)
227 |     rel_pron.head.pos = "RelPron"
228 | 
229 |     realtivep.components[component] = rel_pron
230 |     realtivep.order.remove(component)
231 |     realtivep.order.insert(0, component)
232 | 
233 |     realtivep.components["comma"] = create_phrase("GENERIC_P", ",")
234 |     realtivep.order.insert(0, "comma")
235 |     realtivep.order.append("comma")
236 | 
237 |     np.components["relative_attribute"] = realtivep
238 |     np.order.append("relative_attribute")
239 | 
240 | def add_advlp_to_vp(vp, advlp, place_type=None, default_locative_category="internal"):
241 |     index = 0
242 |     for component in vp.components:
243 |         if component.startswith("AdvlP"):
244 |             index = index + 1
245 |     comp_name = "AdvlP" + str(index)
246 |     if place_type is not None:
247 |         loc = noun_tool.get_locative(advlp.head.lemma) or default_locative_category
248 |         advlp.morphology["CASE"] = noun_tool.resolve_locative_case(loc, place_type)
249 | 
250 |     vp.components[comp_name] = advlp
251 |     vp.order.append(comp_name)
252 | 
253 | def create_adposition_phrase(adposition, np=None):
254 |     if adposition is None:
255 |         adposition = adposition_tool.get_an_adposition()
256 |     case = adposition_tool.postposition_case(adposition)
257 |     if case is not None:
258 |         phrase = create_phrase("PostP", adposition)
259 |     else:
260 |         case = adposition_tool.preposition_case(adposition)
261 |         if case is None:
262 |             return None
263 |         phrase = create_phrase("PrepP", adposition)
264 |     phrase.governance["complement"] = {u"CASE": case}
265 |     if np is None:
266 |         np = ""
267 |     phrase.components["complement"] = np
268 |     return phrase
269 | 
270 | def create_adverb_phrase(head, morphology={}, degree=None):
271 |     if degree is not None:
272 |         morphology["DEGREE"] = degree
273 |     return create_phrase("AdvP", head, morphology=morphology)
274 | 
275 | def add_possessive_to_np(np, person, number, prodrop=False, human=False, suffix=True):
276 |     if human and person == "3":
277 |         suffix = False
278 |     persp = create_personal_pronoun_phrase(person, number, prodrop=prodrop, human=human)
279 |     persp.morphology["CASE"] = "TrueGen"
280 |     if suffix:
281 |         np.morphology["POSS"] = "Px" + number.title() + person
282 |     if not prodrop:
283 |         np.components["det"] = persp
284 |         np.order.insert(0, "det")
285 | 
286 | 
287 | 
288 | 
289 | """
290 | vp = create_verb_pharse("uneksia")
291 | add_auxiliary_verb_to_vp(vp)
292 | 
293 | 
294 | 
295 | subject = create_phrase("NP", "rantaleijona", {u"PERS": "3", u"NUM": "PL"})
296 | 
297 | 
298 | dobject = create_phrase("NP", "aalto", {u"PERS": "3", u"NUM": "PL"})
299 | dobject.components["attribute"] = create_phrase("AP", "korkea")
300 | 
301 | dobject.components["attribute"].components["attribute"] = create_phrase("AdvP", "erittäin")
302 | 
303 | 
304 | vp.order.insert(0, "Advl")
305 | advl = {u"CASE": "Ess" }
306 | vp.governance["Advl"] = advl
307 | vp.components["Advl"] = create_phrase("NP","hipsteri",{u"PERS": "3", u"NUM": "PL"})
308 | 
309 | vp.components["subject"] = subject
310 | vp.components["dir_object"] = dobject
311 | 
312 | #turn_vp_into_passive(vp)
313 | #negate_verb_pharse(vp)
314 | 
315 | turn_vp_into_prefect(vp)
316 | set_vp_mood_and_tense(vp, mood="POTN")
317 | 
318 | turn_vp_into_question(vp)
319 | print(vp.to_string())
320 | 
321 | np = create_phrase("NP", "kissa")
322 | pp = create_adposition_phrase("ilman", np)
323 | print(pp.to_string())
324 | 
325 | 
326 | 
327 | np1 = create_phrase("NP", "mies")
328 | relp = create_verb_pharse("katsoa")
329 | ppp = create_phrase("NP", "orava")
330 | relpp = create_verb_pharse("vaania")
331 | relpp.components["subject"] = create_phrase("NP", "kissa")
332 | add_relative_clause_to_np(ppp, relpp)
333 | 
334 | relp.components["subject"] = ppp
335 | add_relative_clause_to_np(np1,relp)
336 | 
337 | vep = create_verb_pharse("juosta")
338 | vep.components["subject"] = np1
339 | 
340 | np2 = create_phrase("NP", "silta")
341 | pp = create_adposition_phrase("alla", np2)
342 | 
343 | add_advlp_to_vp(vep, pp)
344 | 
345 | print(vep)
346 | 
347 | 
348 | """
349 | 


--------------------------------------------------------------------------------
/syntaxmaker/ud_map.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Aspect=Perf": "PrfPrc",
 3 | 
 4 |     "Connegative=Yes": "ConNeg",
 5 |     "Interj": "Interj",
 6 |     "Mood=Cnd": "Cond",
 7 |     "Mood=Imp": "Imprt",
 8 |     "Mood=Ind": "Ind",
 9 |     "Mood=Pot": "Pot",
10 |     "N": "N",
11 |     "N*": "N",
12 |     "Num": "Num",
13 |     "NumType=Card": "",
14 |     "PUNCT": "CLB",
15 |     "Pcle": "Pcle",
16 |     "Person=1": "1",
17 |     "Person=2": "2",
18 |     "Person=3": "3",
19 |     "Person[psor]=1": "1",
20 |     "Person[psor]=2": "2",
21 |     "Person[psor]=3": "3",
22 |     "Po": "Po",
23 |     "Polarity=Neg": "Neg",
24 |     "Pr": "Pr",
25 |     "Pron": "Pron",
26 |     "PronType=Coll": "",
27 |     "PronType=Dem": "",
28 |     "PronType=Ind": "",
29 |     "PronType=Int": "",
30 |     "PronType=Prs": "",
31 |     "PronType=Rcp": "",
32 |     "PronType=Rel": "",
33 |     "Reflex=Yes": "",
34 |     "Tense=Past": "Prt",
35 |     "Tense=Pres": "Prs",
36 |     "V": "V",
37 |     "V*": "V",
38 |     "VerbForm=Fin": "",
39 |     "VerbForm=Ger": "Ger",
40 |     "VerbForm=Inf": "Inf",
41 |     "VerbForm=Part": "PrfPrc",
42 |     "VerbForm=Sup": "Sup",
43 |     "Voice=Pass": "Pass",
44 |     "_": ""
45 | }


--------------------------------------------------------------------------------
/syntaxmaker/verb_valence.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | __author__ = 'Mika Hämäläinen'
  3 | import os
  4 | import random
  5 | import json
  6 | import codecs
  7 | from . import valences
  8 | 
  9 | 
 10 | direct_cases = {"Gen", "Par", "Ela", "Ill"}
 11 | indirect_cases = {"Ess", "Tra", "Abl", "All", "Ill"}
 12 | direct_threshold = 0.23
 13 | indirect_threshold = 0.18
 14 | 
 15 | stop_adverbs = ["laisesti", "näköisesti", "kuuloisesti", "kaltaisesti"]
 16 | 
 17 | 
 18 | 
 19 | 
 20 | def cases_total(verb, cases_list):
 21 | 	global valences
 22 | 	if verb not in valences:
 23 | 		return 0
 24 | 	cases = valences[verb]
 25 | 	count = 0
 26 | 	for case in cases:
 27 | 		if case in cases_list:
 28 | 			count = count + cases[case]
 29 | 	return count
 30 | 
 31 | 
 32 | def all_cases_total(verb):
 33 | 	global indirect_cases
 34 | 	global direct_cases
 35 | 	return cases_total(verb, indirect_cases) + cases_total(verb, direct_cases)
 36 | 
 37 | 
 38 | def verb_objects(verb, cases_list, threshold):
 39 | 	global valences
 40 | 	if verb not in valences:
 41 | 		return {}
 42 | 	total = float(all_cases_total(verb))
 43 | 	object_cases = {}
 44 | 	verb_cases = valences[verb]
 45 | 	for case in cases_list:
 46 | 		if case in verb_cases:
 47 | 			ratio = verb_cases[case] / total
 48 | 			if ratio > threshold:
 49 | 				object_cases[case] = ratio
 50 | 	return object_cases
 51 | 
 52 | 
 53 | def verb_direct_objects(verb):
 54 | 	global direct_cases
 55 | 	global direct_threshold
 56 | 	objs = verb_objects(verb, direct_cases, direct_threshold)
 57 | 	return objs
 58 | 
 59 | 
 60 | def verb_indirect_objects(verb):
 61 | 	global indirect_cases
 62 | 	global indirect_threshold
 63 | 	objs = verb_objects(verb, indirect_cases, indirect_threshold)
 64 | 	return objs
 65 | 
 66 | 
 67 | def valency_count(verb):
 68 | 	direct_objects = verb_direct_objects(verb)
 69 | 	indirect_objects = verb_indirect_objects(verb)
 70 | 	if not direct_objects:
 71 | 		# If the verb has no direct objects, it can only have a subject
 72 | 		return 1
 73 | 	elif not indirect_objects:
 74 | 		# The verb has a direct object but no indirect objects
 75 | 		if most_frequent_case(direct_objects) == "Gen" and "Par" not in direct_objects:
 76 | 			# In case of genetive, partitive cases must also be present due to the Finnish syntax
 77 | 			# If no partitive cases are present, genetives obtained from bigrams may be something else than direct objects
 78 | 			return 1
 79 | 		else:
 80 | 			return 2
 81 | 	else:
 82 | 		if most_frequent_case(direct_objects) == most_frequent_case(indirect_objects):
 83 | 			#Direct and indirect objects would be the same, so only direct object
 84 | 			return 2
 85 | 		else:
 86 | 			# The verb has both kinds of objects
 87 | 			return 3
 88 | 
 89 | 
 90 | def most_frequent_case(case_dict):
 91 | 	m_case = ""
 92 | 	m_count = 0
 93 | 	for case in case_dict:
 94 | 		if case_dict[case] > m_count:
 95 | 			m_count = case_dict[case]
 96 | 			m_case = case
 97 | 	return m_case
 98 | 
 99 | 
100 | def inflect_noun(noun, case):
101 | 	case = case.upper()
102 | 	query = "[WORD_ID=" + noun + "][POS=NOUN][NUM=SG][CASE=" + case + "]"
103 | 	result = os.popen("echo \"" + query + "\" | omorfi-generate.sh").read()
104 | 	word = result.split("\t")[1]
105 | 	if "[" in word:
106 | 		# Fail
107 | 		return None
108 | 	else:
109 | 		return word
110 | 
111 | 
112 | def inflect_objects(verb, direct_object, indirect_object=None):
113 | 	vals = valency_count(verb)
114 | 	if vals < 2:
115 | 		# The verb has no objects -> can't inflect
116 | 		return []
117 | 	direct_case = most_frequent_case(verb_direct_objects(verb))
118 | 	indirect_case = None
119 | 	if vals == 3:
120 | 		indirect_case = most_frequent_case(verb_indirect_objects(verb))
121 | 	direct = inflect_noun(direct_object, direct_case)
122 | 	if indirect_case is not None and indirect_object is not None:
123 | 		indirect = inflect_noun(indirect_object, indirect_case)
124 | 		return [direct, indirect]
125 | 	else:
126 | 		return [direct]
127 | 
128 | 
129 | def is_copula(verb):
130 | 	if verb == "olla":
131 | 		# There's only one copulative verb in Finnish
132 | 		return True
133 | 	else:
134 | 		return False
135 | 
136 | 


--------------------------------------------------------------------------------
/test/100verbs.txt:
--------------------------------------------------------------------------------
  1 | suorittaa
  2 | ilmaista
  3 | kummuta
  4 | tasata
  5 | tehdä
  6 | pohtia
  7 | säännöstellä
  8 | hakea
  9 | siirtää
 10 | sonnustautua
 11 | järistä
 12 | uskoa
 13 | henkilöidä
 14 | ruuhkauttaa
 15 | myrskytä
 16 | hämmästyttää
 17 | subventoida
 18 | absorboida
 19 | mutista
 20 | tapella
 21 | paukkua
 22 | seurata
 23 | sopeuttaa
 24 | edetä
 25 | vihkiä
 26 | kesannoida
 27 | suodattaa
 28 | meluta
 29 | ihastuttaa
 30 | todeta
 31 | järjestää
 32 | paiskautua
 33 | poiketa
 34 | palkita
 35 | sihistä
 36 | muhentaa
 37 | hakata
 38 | murehtia
 39 | särkeä
 40 | yllättää
 41 | pystyttää
 42 | laittaa
 43 | leikata
 44 | kuivata
 45 | hivuttaa
 46 | jutustella
 47 | pyrkiä
 48 | mennä
 49 | arkailla
 50 | kuivua
 51 | mädäntyä
 52 | vajota
 53 | pestä
 54 | hankkia
 55 | pyytää
 56 | desinfioida
 57 | tuntea
 58 | poistua
 59 | mahtailla
 60 | hinata
 61 | musertaa
 62 | photoshopata
 63 | esiintyä
 64 | torkkua
 65 | pyytää
 66 | tyynnyttää
 67 | lingota
 68 | armahtaa
 69 | hypistellä
 70 | kunnioittaa
 71 | korjauttaa
 72 | käsitellä
 73 | pakata
 74 | katsoa
 75 | ottaa
 76 | kiinnittää
 77 | hivellä
 78 | muuttua
 79 | paistaa
 80 | kutsua
 81 | murehduttaa
 82 | siksakata
 83 | päästä
 84 | juoda
 85 | mäskätä
 86 | käydä
 87 | osata
 88 | täristää
 89 | tarjota
 90 | laskea
 91 | äristä
 92 | jättää
 93 | säästää
 94 | hankkia
 95 | masentua
 96 | kurittaa
 97 | suurustaa
 98 | muodostaa
 99 | haista
100 | asettaa


--------------------------------------------------------------------------------
/test/generate_sentences.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | from syntaxmaker.syntax_maker import *
 3 | import codecs
 4 | 
 5 | f = codecs.open("100verbs.txt", "r", encoding="utf-8")
 6 | results = []
 7 | for verb in f:
 8 | 	verb = verb.replace("\n", "")
 9 | 	vp = create_verb_pharse(verb)
10 | 	components= vp.components.keys()
11 | 	valency = str(len(components))
12 | 	if u"subject" in components:
13 | 		vp.components["subject"] = create_phrase("NP", "lehmä")
14 | 	if u"dir_object" in components:
15 | 		vp.components["dir_object"] = create_phrase("NP", "koira")
16 | 	if u"indir_object" in components:
17 | 		vp.components["indir_object"] = create_phrase("NP", "kissa")
18 | 	phrase = vp.to_string()
19 | 	results.append([verb, phrase.decode('utf-8'), valency])
20 | 
21 | fo = codecs.open("results.csv", "w", encoding="utf-8")
22 | for result in results:
23 | 	fo.write(";".join(result) + "\n")
24 | fo.close()
25 | 


--------------------------------------------------------------------------------
/test/results.csv:
--------------------------------------------------------------------------------
  1 | suorittaa;lehmä suorittaa koiran;2
  2 | ilmaista;lehmä ilmaisee koiraa;2
  3 | kummuta;lehmä kumpuaa koirasta;2
  4 | tasata;lehmä tasaa koiraa;2
  5 | tehdä;lehmä tekee;1
  6 | pohtia;lehmä pohtii koiraa;2
  7 | säännöstellä;lehmä säännöstelee;1
  8 | hakea;lehmä hakee koiraa;2
  9 | siirtää;lehmä siirtää;1
 10 | sonnustautua;lehmä sonnustautuu koiraan;2
 11 | järistä;lehmä järisee;1
 12 | uskoa;lehmä uskoo koiraan;2
 13 | henkilöidä;lehmä henkilöi;1
 14 | ruuhkauttaa;lehmä ruuhkauttaa koiraa;2
 15 | myrskytä;lehmä myrskyää;1
 16 | hämmästyttää;lehmä hämmästyttää;1
 17 | subventoida;lehmä subventoi koiraa;2
 18 | absorboida;lehmä absorboi koiraa;2
 19 | mutista;lehmä mutisee koirasta;2
 20 | tapella;lehmä tappelee;1
 21 | paukkua;lehmä paukkuu;1
 22 | seurata;lehmä seuraa koiraa;2
 23 | sopeuttaa;lehmä sopeuttaa koiraan;2
 24 | edetä;lehmä etenee;1
 25 | vihkiä;lehmä vihkii;1
 26 | kesannoida;lehmä kesannoi;1
 27 | suodattaa;lehmä suodattaa koiraa;2
 28 | meluta;lehmä meluaa koiraa;2
 29 | ihastuttaa;lehmä ihastuttaa koiraa;2
 30 | todeta;lehmä toteaa;1
 31 | järjestää;lehmä järjestää;1
 32 | paiskautua;lehmä paiskautuu koiraan;2
 33 | poiketa;lehmä poikkeaa koirasta;2
 34 | palkita;lehmä palkitsee koiran;2
 35 | sihistä;lehmä sihisee;1
 36 | muhentaa;lehmä muhentaa koiraa kissaan;3
 37 | hakata;lehmä hakkaa koiraa;2
 38 | murehtia;lehmä murehtii koiraa;2
 39 | särkeä;lehmä särkee;1
 40 | yllättää;lehmä yllättää;1
 41 | pystyttää;lehmä pystyttää;1
 42 | laittaa;lehmä laittaa koiran kissaan;3
 43 | leikata;lehmä leikkaa koiraa;2
 44 | kuivata;lehmä kuivaa koiraa;2
 45 | hivuttaa;lehmä hivuttaa koiraa;2
 46 | jutustella;lehmä jutustelee;1
 47 | pyrkiä;lehmä pyrkii;1
 48 | mennä;lehmä menee;1
 49 | arkailla;lehmä arkailee koiraa;2
 50 | kuivua;lehmä kuivuu;1
 51 | mädäntyä;lehmä mädäntyy;1
 52 | vajota;lehmä vajoaa koiraan;2
 53 | pestä;lehmä pesee;1
 54 | hankkia;lehmä hankkii koiran;2
 55 | pyytää;lehmä pyytää;1
 56 | desinfioida;lehmä desinfioi koiran;2
 57 | tuntea;lehmä tuntee koiraa;2
 58 | poistua;lehmä poistuu koirasta;2
 59 | mahtailla;lehmä mahtailee;1
 60 | hinata;lehmä hinaa koiraa kissaan;3
 61 | musertaa;lehmä musertaa koiran;2
 62 | photoshopata;lehmä photoshopata koiraa kissaan;3
 63 | esiintyä;lehmä esiintyy;1
 64 | torkkua;lehmä torkkuu koiran;2
 65 | pyytää;lehmä pyytää;1
 66 | tyynnyttää;lehmä tyynnyttää;1
 67 | lingota;lehmä linkoaa koiran kissaan;3
 68 | armahtaa;lehmä armahtaa koiraa;2
 69 | hypistellä;lehmä hypistelee;1
 70 | kunnioittaa;lehmä kunnioittaa koiraa;2
 71 | korjauttaa;lehmä korjauttaa koiran;2
 72 | käsitellä;lehmä käsittelee;1
 73 | pakata;lehmä pakkaa koiraan;2
 74 | katsoa;lehmä katsoo koiraa;2
 75 | ottaa;lehmä ottaa;1
 76 | kiinnittää;lehmä kiinnittää;1
 77 | hivellä;lehmä hivelee;1
 78 | muuttua;lehmä muuttuu;1
 79 | paistaa;lehmä paistaa koiraa;2
 80 | kutsua;lehmä kutsuu;1
 81 | murehduttaa;lehmä murehduttaa koiraa;2
 82 | siksakata;lehmä siksakkaa koiraa kissalle;3
 83 | päästä;lehmä pääsee;1
 84 | juoda;lehmä juo koiraa;2
 85 | mäskätä;lehmä mäskää;1
 86 | käydä;lehmä käy;1
 87 | osata;lehmä osaa koiraa;2
 88 | täristää;lehmä täristää;1
 89 | tarjota;lehmä tarjoaa koiraa;2
 90 | laskea;lehmä laskee koiraa;2
 91 | äristä;lehmä ärisee;1
 92 | jättää;lehmä jättää;1
 93 | säästää;lehmä säästää;1
 94 | hankkia;lehmä hankkii koiran;2
 95 | masentua;lehmä masentuu koirasta;2
 96 | kurittaa;lehmä kurittaa koiraa;2
 97 | suurustaa;lehmä suurustaa koiraa;2
 98 | muodostaa;lehmä muodostaa;1
 99 | haista;lehmä haisee;1
100 | asettaa;lehmä asettaa koiran kissalle;3
101 | 


--------------------------------------------------------------------------------
/test/results.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mikahama/syntaxmaker/f1ef72b2ee0daaf709927a97c6e5ef6bef0c5fb7/test/results.xlsx


--------------------------------------------------------------------------------
/test/wiktionary_verbs.py:
--------------------------------------------------------------------------------
 1 | import codecs
 2 | import random
 3 | f = codecs.open("fiwiktionary-latest-pages-articles.xml", "r", encoding="utf-8")
 4 | seen_verb = False
 5 | verbs = []
 6 | for line in f:
 7 | 	if line.startswith(u"===Verbi==="):
 8 | 		seen_verb = True
 9 | 	elif "{{" in line or ":" in line:
10 | 		pass
11 | 	elif seen_verb and line.startswith(u"#") and "{{" not in line:
12 | 		verb = line.replace("#", "").replace("[", "").replace("]","").replace("\n","")
13 | 		verb = verb.strip()
14 | 		if "<" in verb:
15 | 			verb = verb.split("<")[0]
16 | 		if "," not in verb:
17 | 			if " " in verb:
18 | 				verb = verb.split(" ")[0]
19 | 			verbs.append(verb)
20 | 		else:
21 | 			verbs.extend(verb.split(","))
22 | 		seen_verb = False
23 | 
24 | verbs = list(set(verbs))
25 | random.shuffle(verbs)
26 | fo = codecs.open("verbs.txt", "w", encoding="utf-8")
27 | for verb in verbs:
28 | 	fo.write(verb.strip() + "\n")
29 | fo.close()
30 | 


--------------------------------------------------------------------------------
/testi.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | from syntaxmaker.syntax_maker import *
 3 | """
 4 | vp = create_verb_pharse("uneksia")
 5 | add_auxiliary_verb_to_vp(vp)
 6 | 
 7 | 
 8 | 
 9 | subject = create_phrase("NP", "rantaleijona", {u"PERS": "3", u"NUM": "PL"})
10 | 
11 | 
12 | dobject = create_phrase("NP", "aalto", {u"PERS": "3", u"NUM": "PL"})
13 | dobject.components["attribute"] = create_phrase("AP", "korkea")
14 | 
15 | dobject.components["attribute"].components["attribute"] = create_phrase("AdvP", "erittäin")
16 | 
17 | 
18 | vp.order.insert(0, "Advl")
19 | advl = {u"CASE": "Ess" }
20 | vp.governance["Advl"] = advl
21 | vp.components["Advl"] = create_phrase("NP","hipsteri",{u"PERS": "3", u"NUM": "PL"})
22 | 
23 | vp.components["subject"] = subject
24 | vp.components["dir_object"] = dobject
25 | 
26 | print vp
27 | """
28 | 
29 | vp = create_verb_pharse("antaa")
30 | subject = create_phrase("NP", "hevonen", {"NUM": "PL"})
31 | 
32 | dobject = create_phrase("NP", "lahja", {"NUM": "PL"})
33 | dobject.components["attribute"] = create_phrase("AP", "mahtava")
34 | dobject.components["attribute"].components["attribute"] = create_phrase("AdvP", "erittäin")
35 | 
36 | indobject = create_phrase("NP", "lehmä")
37 | vp.components["subject"] = subject
38 | vp.components["dir_object"] = dobject
39 | vp.components["indir_object"] = indobject
40 | 
41 | print(vp)


--------------------------------------------------------------------------------
/travis_test.py:
--------------------------------------------------------------------------------
  1 | #encoding: utf-8
  2 | import unittest
  3 | from uralicNLP import uralicApi
  4 | if not uralicApi.is_language_installed("fin"):
  5 |     uralicApi.download("fin",show_progress=False)
  6 | from syntaxmaker.syntax_maker import *
  7 | import codecs
  8 | import copy
  9 | 
 10 | 
 11 | class TestFSTS(unittest.TestCase):
 12 | 
 13 |     def setUp(self):
 14 |         
 15 |         vp = create_verb_pharse("uneksia")
 16 |         subject = create_phrase("NP", "rantaleijona", {u"PERS": "3", u"NUM": "PL"})
 17 |         dobject = create_phrase("NP", "aalto", {u"PERS": "3", u"NUM": "PL"})
 18 |         dobject.components["attribute"] = create_phrase("AP", "korkea")
 19 |         dobject.components["attribute"].components["attribute"] = create_phrase("AdvP", "erittäin")
 20 |         vp.components["subject"] = subject
 21 |         vp.components["dir_object"] = dobject
 22 |         self.vp = vp
 23 | 
 24 |     def test_sentence(self):
 25 |         vp = copy.deepcopy(self.vp)
 26 |         self.assertEqual(str(vp) , "rantaleijonat uneksivat erittäin korkeista aalloista")
 27 |     def test_sentence_pass(self):
 28 |         vp = copy.deepcopy(self.vp)
 29 |         turn_vp_into_passive(vp)
 30 |         self.assertEqual(str(vp) , "uneksitaan erittäin korkeista aalloista")
 31 |     def test_sentence_neg(self):
 32 |         vp = copy.deepcopy(self.vp)
 33 |         negate_verb_pharse(vp)
 34 |         self.assertEqual(str(vp) , "rantaleijonat eivät uneksi erittäin korkeista aalloista")
 35 |     def test_prefect(self):
 36 |         vp = copy.deepcopy(self.vp)
 37 |         turn_vp_into_prefect(vp)
 38 |         self.assertEqual(str(vp) , "rantaleijonat ovat uneksineet erittäin korkeista aalloista")
 39 |   
 40 |     def test_prefect_pass(self):
 41 |         vp = copy.deepcopy(self.vp)
 42 |         turn_vp_into_prefect(vp)
 43 |         turn_vp_into_passive(vp)
 44 |         self.assertEqual(str(vp) , "on uneksittu erittäin korkeista aalloista")
 45 |     
 46 |     def test_prefect_pass_cond(self):
 47 |         vp = copy.deepcopy(self.vp)
 48 |         turn_vp_into_prefect(vp)
 49 |         turn_vp_into_passive(vp)
 50 |         set_vp_mood_and_tense(vp, mood="COND")
 51 |         self.assertEqual(str(vp) , "olisi uneksittu erittäin korkeista aalloista")
 52 | 
 53 |     def test_prefect_pass_pot(self):
 54 |         vp = copy.deepcopy(self.vp)
 55 |         turn_vp_into_prefect(vp)
 56 |         turn_vp_into_passive(vp)
 57 |         set_vp_mood_and_tense(vp, mood="POTN")
 58 |         self.assertEqual(str(vp) , "lie uneksittu erittäin korkeista aalloista")
 59 | 
 60 |     def test_pot(self):
 61 |         vp = copy.deepcopy(self.vp)
 62 |         set_vp_mood_and_tense(vp, mood="POTN")
 63 |         self.assertEqual(str(vp) , "rantaleijonat uneksinevat erittäin korkeista aalloista")
 64 | 
 65 |     def test_total_plural(self):
 66 |         vp = create_verb_pharse("antaa")
 67 |         subject = create_phrase("NP", "hevonen", {"NUM": "PL"})
 68 | 
 69 |         dobject = create_phrase("NP", "lahja", {"NUM": "PL"})
 70 |         dobject.components["attribute"] = create_phrase("AP", "hyvä")
 71 |         dobject.components["attribute"].components["attribute"] = create_phrase("AdvP", "erittäin")
 72 | 
 73 |         indobject = create_phrase("NP", "lehmä")
 74 |         vp.components["subject"] = subject
 75 |         vp.components["dir_object"] = dobject
 76 |         vp.components["indir_object"] = indobject
 77 |         self.assertEqual(str(vp) , "hevoset antavat erittäin hyviä lahjoja lehmälle")
 78 | 
 79 |     def test_total_plural_neg(self):
 80 |         vp = create_verb_pharse("antaa")
 81 |         subject = create_phrase("NP", "hevonen", {"NUM": "PL"})
 82 | 
 83 |         dobject = create_phrase("NP", "lahja", {"NUM": "PL"})
 84 |         dobject.components["attribute"] = create_phrase("AP", "hyvä")
 85 |         dobject.components["attribute"].components["attribute"] = create_phrase("AdvP", "erittäin")
 86 | 
 87 |         indobject = create_phrase("NP", "lehmä")
 88 |         vp.components["subject"] = subject
 89 |         vp.components["dir_object"] = dobject
 90 |         vp.components["indir_object"] = indobject
 91 |         negate_verb_pharse(vp)
 92 |         self.assertEqual(str(vp) , "hevoset eivät anna erittäin hyviä lahjoja lehmälle")
 93 | 
 94 |     def test_adj(self):
 95 |         ap = create_adjective_phrase("kaunis", degree="Comp")
 96 |         self.assertEqual(str(ap), "kauniimpi")
 97 |         
 98 |     def test_cond(self):
 99 |         vp = copy.deepcopy(self.vp)
100 |         set_vp_mood_and_tense(vp, mood="COND")
101 |         self.assertEqual(str(vp) , "rantaleijonat uneksisivat erittäin korkeista aalloista")
102 | 
103 |     def test_imp(self):
104 |         vp = copy.deepcopy(self.vp)
105 |         set_vp_mood_and_tense(vp, mood="IMPRT")
106 |         self.assertEqual(str(vp) , "rantaleijonat uneksikoot erittäin korkeista aalloista")
107 | 
108 |     def test_quest(self):
109 |         vp = copy.deepcopy(self.vp)
110 |         turn_vp_into_question(vp)
111 |         self.assertEqual(str(vp) , "uneksivatko rantaleijonat erittäin korkeista aalloista")
112 | 
113 |     def test_rela(self):
114 |         np1 = create_phrase("NP", "mies")
115 |         relp = create_verb_pharse("katsoa")
116 |         ppp = create_phrase("NP", "orava")
117 |         relpp = create_verb_pharse("vaania")
118 |         relpp.components["subject"] = create_phrase("NP", "kissa")
119 |         add_relative_clause_to_np(ppp, relpp)
120 | 
121 |         relp.components["subject"] = ppp
122 |         add_relative_clause_to_np(np1,relp)
123 | 
124 |         vep = create_verb_pharse("juosta")
125 |         vep.components["subject"] = np1
126 | 
127 |         np2 = create_phrase("NP", "silta")
128 |         pp = create_adposition_phrase("alla", np2)
129 | 
130 |         add_advlp_to_vp(vep, pp)
131 |         self.assertEqual(str(vep) , "mies, jota orava, jota kissa vaanii, katsoo, juoksee sillan alla")
132 | 
133 |     def test_copula_pl(self):
134 |         vp = create_copula_phrase()
135 |         subject = create_phrase("NP", "koira", {u"NUM": "PL"})
136 |         predicative = create_phrase("NP", "eläin")
137 |         vp.components["subject"] = subject
138 |         vp.components["predicative"] = predicative
139 |         self.assertEqual(str(vp) , "koirat ovat eläimiä")
140 | 
141 |     def test_copula_sg(self):
142 |         vp = create_copula_phrase()
143 |         subject = create_phrase("NP", "koira", {u"PERS": "3", u"NUM": "SG"})
144 |         predicative = create_phrase("NP", "eläin")
145 |         vp.components["subject"] = subject
146 |         vp.components["predicative"] = predicative
147 |         self.assertEqual(str(vp) , "koira on eläin")
148 | 
149 |     def test_adpos(self):
150 |         vp = create_copula_phrase()
151 |         subject = create_phrase("NP", "koira", {u"PERS": "3", u"NUM": "SG"})
152 |         predicative = create_phrase("NP", "eläin")
153 |         adp = create_adposition_phrase("ilman", predicative)
154 |         vp.components["subject"] = subject
155 |         add_advlp_to_vp(vp, adp)
156 |         self.assertEqual(str(vp) , "koira on ilman eläintä")
157 | 
158 |     def test_place_name(self):
159 |         vp = create_copula_phrase()
160 |         subject = create_phrase("NP", "koira", {u"PERS": "3", u"NUM": "SG"})
161 |         predicative = create_phrase("NP", "Venäjä")
162 |         vp.components["subject"] = subject
163 |         add_advlp_to_vp(vp, predicative, place_type="in")
164 |         self.assertEqual(str(vp) , "koira on Venäjällä")
165 | 
166 |     def test_possessive_name(self):
167 |         vp = create_copula_phrase()
168 |         subject = create_phrase("NP", "koira", {u"PERS": "3", u"NUM": "SG"})
169 |         predicative = create_phrase("NP", "Lontoo")
170 |         add_possessive_to_np(predicative, "1", "SG")
171 |         vp.components["subject"] = subject
172 |         add_advlp_to_vp(vp, predicative, place_type="in")
173 |         self.assertEqual(str(vp) , "koira on minun Lontoossani")
174 | 
175 |     def test_pp_acc(self):
176 |         vp = create_verb_pharse("nähdä")
177 |         add_np_subject_to_vp(vp, create_noun_phrase("hattu"))
178 |         add_np_object_to_vp(vp, create_personal_pronoun_phrase("1", "SG"))
179 |         self.assertEqual(str(vp) , "hattu näkee minut")
180 | 
181 |     def test_neg(self):
182 |         vp = create_verb_pharse("nähdä")
183 |         add_np_subject_to_vp(vp, create_personal_pronoun_phrase("1", "SG"))
184 |         add_np_object_to_vp(vp, create_personal_pronoun_phrase("2", "SG"))
185 |         negate_verb_pharse(vp)
186 |         turn_vp_into_question(vp)
187 |         self.assertEqual(str(vp) , "enkö minä näe sinua")
188 | 
189 |     def test_prefect_last_pass(self):
190 |         vp = copy.deepcopy(self.vp)
191 |         turn_vp_into_prefect(vp)
192 |         turn_vp_into_passive(vp)
193 |         set_vp_mood_and_tense(vp, tense="PAST")
194 |         self.assertEqual(str(vp) , "oli uneksittu erittäin korkeista aalloista")
195 | 
196 |     def test_prefect_last_pass_neg(self):
197 |         vp = copy.deepcopy(self.vp)
198 |         turn_vp_into_prefect(vp)
199 |         turn_vp_into_passive(vp)
200 |         set_vp_mood_and_tense(vp, tense="PAST")
201 |         negate_verb_pharse(vp)
202 |         self.assertEqual(str(vp) , "ei oltu uneksittu erittäin korkeista aalloista")
203 | 
204 |     def test_sentence_can(self):
205 |         vp = copy.deepcopy(self.vp)
206 |         add_auxiliary_verb_to_vp(vp, "voida")
207 |         self.assertEqual(str(vp) , "rantaleijonat voivat uneksia erittäin korkeista aalloista")
208 | 
209 |     def test_sentence_stay(self):
210 |         vp = copy.deepcopy(self.vp)
211 |         add_auxiliary_verb_to_vp(vp, "jäädä")
212 |         self.assertEqual(str(vp) , "rantaleijonat jäävät uneksimaan erittäin korkeista aalloista")
213 | 
214 |     def test_adj_comp(self):
215 |         vp = create_copula_phrase()
216 |         subject = create_phrase("NP", "koira", {u"PERS": "3", u"NUM": "SG"})
217 |         predicative = create_phrase("NP", "eläin")
218 |         adj = create_adjective_phrase("hieno", degree="Comp")
219 |         predicative.components["attribute"] = adj
220 |         vp.components["subject"] = subject
221 |         vp.components["predicative"] = predicative
222 |         self.assertEqual(str(vp) , "koira on hienompi eläin")
223 | 
224 |     def test_adj_superl(self):
225 |         vp = create_copula_phrase()
226 |         subject = create_phrase("NP", "koira", {u"PERS": "3", u"NUM": "SG"})
227 |         predicative = create_phrase("NP", "eläin")
228 |         adj = create_adjective_phrase("hieno", degree="Superl")
229 |         predicative.components["attribute"] = adj
230 |         vp.components["subject"] = subject
231 |         vp.components["predicative"] = predicative
232 |         self.assertEqual(str(vp) , "koira on hienoin eläin")
233 | 
234 |     def test_adv_superl(self):
235 |         vp = create_copula_phrase()
236 |         subject = create_phrase("NP", "koira", {u"PERS": "3", u"NUM": "SG"})
237 |         predicative = create_phrase("NP", "eläin")
238 |         adj = create_adverb_phrase("yleinen", degree="Superl")
239 |         predicative.components["attribute"] = adj
240 |         vp.components["subject"] = subject
241 |         vp.components["predicative"] = predicative
242 |         self.assertEqual(str(vp) , "koira on yleisimmin eläin")
243 | 
244 |     def test_adv_comp(self):
245 |         vp = create_copula_phrase()
246 |         subject = create_phrase("NP", "koira", {u"PERS": "3", u"NUM": "SG"})
247 |         predicative = create_phrase("NP", "eläin")
248 |         adj = create_adverb_phrase("yleinen", degree="Comp")
249 |         predicative.components["attribute"] = adj
250 |         vp.components["subject"] = subject
251 |         vp.components["predicative"] = predicative
252 |         self.assertEqual(str(vp) , "koira on yleisemmin eläin")
253 | 
254 | 
255 | if __name__ == '__main__':
256 |     unittest.main()
257 | 
258 | 


--------------------------------------------------------------------------------