├── test ├── __init__.py ├── lat.py ├── lit.py ├── lrt.py ├── rules.py ├── templates.py ├── derivations.py ├── ashtadhyayi.py ├── dhatupatha.py ├── util.py ├── data │ ├── lrt.csv │ ├── lat.csv │ └── lit.csv ├── trees.py ├── helpers.py ├── operators.py ├── sounds.py ├── upadesha.py └── filters.py ├── requirements.txt ├── vyakarana ├── adhyaya1 │ ├── __init__.py │ ├── pada2.py │ ├── pada3.py │ └── pada1.py ├── adhyaya2 │ ├── __init__.py │ └── pada4.py ├── adhyaya3 │ ├── __init__.py │ ├── pada1.py │ └── pada4.py ├── adhyaya6 │ ├── __init__.py │ ├── pada1.py │ └── pada4.py ├── adhyaya7 │ ├── __init__.py │ ├── pada1.py │ ├── pada2.py │ ├── pada4.py │ └── pada3.py ├── __init__.py ├── sandhi.py ├── derivations.py ├── reranking.py ├── lists.py ├── ashtadhyayi.py ├── util.py ├── templates.py ├── dhatupatha.py ├── trees.py ├── siddha.py ├── rules.py ├── expand.py ├── sounds.py ├── terms.py └── operators.py ├── .gitignore ├── docs ├── selecting_rules.rst ├── index.rst ├── contents.rst.inc ├── api.rst ├── asiddha.rst ├── inputs_and_outputs.rst ├── design_overview.rst ├── introduction.rst ├── sounds.rst ├── defining_rules.rst ├── modeling_rules.rst ├── rule_types.rst ├── terms.rst ├── glossary.rst ├── make.bat ├── Makefile └── conf.py ├── compare.sh ├── fabfile.py └── README.md /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | -------------------------------------------------------------------------------- /vyakarana/adhyaya1/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vyakarana/adhyaya2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vyakarana/adhyaya3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vyakarana/adhyaya6/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vyakarana/adhyaya7/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | docs/_build 2 | env/ 3 | -------------------------------------------------------------------------------- /docs/selecting_rules.rst: -------------------------------------------------------------------------------- 1 | Selecting Rules 2 | =============== 3 | 4 | Rank 5 | ---- 6 | 7 | 8 | Conflict resolution 9 | ------------------- 10 | 11 | -------------------------------------------------------------------------------- /compare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Get the test diff between the previous commit and the current workspace. 3 | # Assumes old.txt already exists. 4 | py.test test/*.py --tb=line > new.txt 5 | diff old.txt new.txt > diff.txt 6 | -------------------------------------------------------------------------------- /vyakarana/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | logger = logging.getLogger(__name__) 5 | out = logging.StreamHandler(sys.stdout) 6 | out.setLevel(logging.DEBUG) 7 | logger.addHandler(out) 8 | logger.setLevel(logging.DEBUG) 9 | -------------------------------------------------------------------------------- /test/lat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | test.lat 4 | ~~~~~~~~ 5 | 6 | Tests for words formed with the suffix "laṭ". 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | import pytest 12 | from helpers import verb_data 13 | 14 | 15 | @pytest.mark.parametrize(('expected', 'actual'), verb_data('lat.csv', 'la~w')) 16 | def test_all(expected, actual): 17 | assert expected == actual, '%s != %s' % (list(expected), list(actual)) 18 | -------------------------------------------------------------------------------- /test/lit.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | test.lit 4 | ~~~~~~~~ 5 | 6 | Tests for words formed with the suffix "liṭ". 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | import pytest 12 | from helpers import verb_data 13 | 14 | 15 | @pytest.mark.parametrize(('expected', 'actual'), verb_data('lit.csv', 'li~w')) 16 | def test_all(expected, actual): 17 | assert expected == actual, '%s != %s' % (list(expected), list(actual)) 18 | -------------------------------------------------------------------------------- /test/lrt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | test.lit 4 | ~~~~~~~~ 5 | 6 | Tests for words formed with the suffix "liṭ". 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | import pytest 12 | from helpers import verb_data 13 | 14 | 15 | @pytest.mark.parametrize(('expected', 'actual'), verb_data('lrt.csv', 'lf~w')) 16 | def test_all(expected, actual): 17 | assert expected == actual, '%s != %s' % (list(expected), list(actual)) 18 | -------------------------------------------------------------------------------- /vyakarana/adhyaya2/pada4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.adhyaya2.pada4 4 | ~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | :license: MIT and BSD 7 | """ 8 | 9 | from .. import filters as F, operators as O 10 | from ..templates import * 11 | 12 | f = F.auto 13 | 14 | 15 | RULES = [ 16 | Anuvrtti(None, F.raw('Sap'), None), 17 | ('2.4.71', F.gana('a\da~'), None, None, 'lu~k'), 18 | ('2.4.74', F.gana('hu\\'), None, None, 'Slu~'), 19 | ] 20 | -------------------------------------------------------------------------------- /test/rules.py: -------------------------------------------------------------------------------- 1 | from vyakarana.rules import * 2 | 3 | 4 | def test_init(): 5 | r = Rule('name', list('filters'), 'operator') 6 | assert r.name == 'name' 7 | assert r.filters == list('filters') 8 | assert r.operator == 'operator' 9 | assert not r.optional 10 | assert not r.utsarga 11 | 12 | 13 | def test_new_paribhasha(): 14 | pass 15 | 16 | 17 | def test_new_samjna(): 18 | pass 19 | 20 | 21 | def test_new_tasmat(): 22 | pass 23 | 24 | 25 | def test_new_tasya(): 26 | pass 27 | 28 | 29 | -------------------------------------------------------------------------------- /vyakarana/adhyaya1/pada2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.adhyaya1.pada2 4 | ~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | :license: MIT and BSD 7 | """ 8 | 9 | from .. import filters as F, operators as O 10 | from ..templates import * 11 | 12 | f = F.auto 13 | 14 | 15 | RULES = [ 16 | Anuvrtti(None, None, None), 17 | ('1.2.4', None, f('sarvadhatuka') & ~f('pit'), None, 'Nit'), 18 | ('1.2.5', ~F.samyoga, ~f('pit') & f('li~w'), None, 'kit'), 19 | Ca('1.2.6', f('YiinDI~\\', 'BU'), f('li~w'), None, True), 20 | ] 21 | -------------------------------------------------------------------------------- /test/templates.py: -------------------------------------------------------------------------------- 1 | from vyakarana.templates import * 2 | 3 | 4 | def test_init(): 5 | t = RuleStub('name', 'L', 'C', 'R', 'op') 6 | assert t.name == 'name' 7 | assert t.window == ['L', 'C', 'R'] 8 | assert t.operator == 'op' 9 | 10 | 11 | def test_init_with_base(): 12 | t = RuleStub('name', 'L', 'C', 'R', 'op') 13 | assert t.name == 'name' 14 | assert t.window == ['L', 'C', 'R'] 15 | assert t.operator == 'op' 16 | 17 | 18 | def test_center(): 19 | t = RuleStub('name', 'L', 'C', 'R', 'op') 20 | assert t.center == 'C' 21 | -------------------------------------------------------------------------------- /test/derivations.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | test.derivations 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | Tests for `State` 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | from vyakarana.derivations import State 12 | 13 | 14 | class TestState(object): 15 | 16 | def test_init_no_args(self): 17 | s = State() 18 | assert s.terms == [] 19 | assert s.history == [] 20 | 21 | def test_init_with_terms(self): 22 | items = list('abc') 23 | s = State(items) 24 | assert s.terms == items 25 | assert s.history == [] 26 | -------------------------------------------------------------------------------- /test/ashtadhyayi.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from vyakarana.ashtadhyayi import Ashtadhyayi 4 | from vyakarana.terms import Upadesha, Vibhakti 5 | 6 | 7 | @pytest.fixture(scope='session') 8 | def ashtadhyayi(): 9 | return Ashtadhyayi() 10 | 11 | 12 | def test_init(ashtadhyayi): 13 | assert ashtadhyayi.rule_tree 14 | 15 | 16 | def test_with_rules_in(): 17 | a = Ashtadhyayi.with_rules_in('3.1.68', '3.1.82') 18 | assert a.rule_tree 19 | 20 | 21 | def test_derive(ashtadhyayi): 22 | dhatu = Upadesha.as_dhatu('BU') 23 | la = Vibhakti('la~w').add_samjna('prathama', 'ekavacana') 24 | items = [dhatu, la] 25 | assert 'Bavati' in ashtadhyayi.derive(items) 26 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | Vyakarana Documentation 4 | ======================= 5 | 6 | This is the documentation for Vyakarana, a program that derives Sanskrit words. 7 | To get the most out of the documentation, you should have a working knowledge 8 | of Sanskrit. 9 | 10 | .. important:: 11 | All data handled by the system is represented in `SLP1`_. SLP1 also uses 12 | the following symbols: 13 | 14 | - ``'\\'`` to indicate *anudātta* 15 | - ``'^'`` to indicate *svarita* 16 | - ``'~'`` to indicate a nasal sound 17 | 18 | Unmarked vowels are *udātta*. 19 | 20 | .. _SLP1: http://sanskrit1.ccv.brown.edu/Sanskrit/Vyakarana/Dhatupatha/mdhvcanidx/disp1/encodinghelp.html 21 | 22 | .. include:: contents.rst.inc 23 | -------------------------------------------------------------------------------- /vyakarana/adhyaya7/pada1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.adhyaya7.pada1 4 | ~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | :license: MIT and BSD 7 | """ 8 | 9 | from .. import filters as F, operators as O 10 | from ..templates import * 11 | 12 | f = F.auto 13 | 14 | 15 | RULES = [ 16 | Anuvrtti('anga', 'pratyaya', None), 17 | ('7.1.3', None, None, None, O.replace('J', 'ant')), 18 | ('7.1.4', 'abhyasta', None, None, O.replace('J', 'at')), 19 | ('7.1.5', ~F.al('at'), 'atmanepada', None, True), 20 | 21 | Anuvrtti(f('At') & F.samjna('anga'), F.raw('Ral'), None), 22 | ('7.1.34', None, None, None, 'O'), 23 | 24 | Anuvrtti(None, None, None), 25 | Va('7.1.91', None, f('Ral') & f('uttama'), None, 'Rit'), 26 | ] 27 | -------------------------------------------------------------------------------- /docs/contents.rst.inc: -------------------------------------------------------------------------------- 1 | Background 2 | ---------- 3 | 4 | This is a high-level overview of the Ashtadhyayi and how it works. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | introduction 10 | rule_types 11 | terms 12 | sounds 13 | asiddha 14 | glossary 15 | 16 | 17 | Architecture 18 | ------------ 19 | 20 | This describes the overall architecture of the system. 21 | 22 | .. toctree:: 23 | :maxdepth: 2 24 | 25 | design_overview 26 | inputs_and_outputs 27 | modeling_rules 28 | selecting_rules 29 | defining_rules 30 | 31 | 32 | API Reference 33 | ------------- 34 | 35 | This contains information about specific classes, functions, and methods. 36 | 37 | .. toctree:: 38 | :maxdepth: 2 39 | 40 | api 41 | 42 | -------------------------------------------------------------------------------- /fabfile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.fabfile 4 | ~~~~~~~~~~~~~~~~~ 5 | 6 | Helpful commands for debugging and testing. 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | import code 12 | from fabric.api import * 13 | from vyakarana.ashtadhyayi import Ashtadhyayi 14 | 15 | PROJECT_NAME = 'vyakarana' 16 | 17 | 18 | @task 19 | def shell(): 20 | """Create an interactive shell with some useful locals.""" 21 | 22 | banner = """ 23 | {1}~~~~~~ 24 | {0} shell 25 | {1}~~~~~~ 26 | """.format(PROJECT_NAME, '~' * len(PROJECT_NAME)) 27 | 28 | a = Ashtadhyayi() 29 | context = { 30 | 'a': a, 31 | 'rules': a.rules, 32 | } 33 | for rule in a.rules: 34 | context['r' + rule.name.replace('.', '_')] = rule 35 | 36 | code.interact(banner, local=context) 37 | -------------------------------------------------------------------------------- /test/dhatupatha.py: -------------------------------------------------------------------------------- 1 | import vyakarana.dhatupatha as D 2 | 3 | 4 | def test_init(): 5 | d = D.Dhatupatha() 6 | assert not d.gana_map 7 | assert not d.all_dhatu 8 | assert not d.index_map 9 | 10 | 11 | def test_init_with_filename(): 12 | d = D.Dhatupatha(D.DHATUPATHA_CSV) 13 | assert d.gana_map 14 | assert d.all_dhatu 15 | assert d.index_map 16 | 17 | 18 | def test_dhatu_list(): 19 | cases = [ 20 | # 6.1.15 21 | ('ya\\ja~^', None, 9), 22 | # 6.4.125 23 | ('PaRa~', 'svana~', 7), 24 | # 7.3.74 25 | ('Samu~', 'madI~', 8), 26 | # 7.3.80 27 | ('pUY', 'plI\\', 25), 28 | ] 29 | d = D.Dhatupatha(D.DHATUPATHA_CSV) 30 | for start, end, expected_len in cases: 31 | results = d.dhatu_list(start, end) 32 | assert len(results) == expected_len 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Vyākaraṇa 2 | 3 | `vyakarana` derives Sanskrit words by applying the rules of the Ashtadhyayi. 4 | For a given step in the derivation, the system repeatedly selects and applies 5 | an appropriate rule until no further changes can be made. 6 | 7 | ## Current progress 8 | 9 | Strong support for *liṭ* and *laṭ*. Experimental support for *lṛṭ*. 10 | 11 | ## Setup 12 | 13 | `vyakarana` has no external dependencies. [pytest](http://pytest.org/latest/) 14 | is used for testing. To install pytest, run 15 | 16 | pip install pytest 17 | 18 | Or just install from requirements.txt: 19 | 20 | pip install -r requirements.txt 21 | 22 | ## Tests 23 | 24 | All test code is in the `test` directory. To run all tests: 25 | 26 | py.test test/*.py --tb=line 27 | 28 | ## Documentation 29 | 30 | Go to http://vyakarana.readthedocs.org for details. 31 | -------------------------------------------------------------------------------- /vyakarana/adhyaya1/pada3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.adhyaya1.pada3 4 | ~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | :license: MIT and BSD 7 | """ 8 | 9 | from .. import filters as F, operators as O 10 | from ..templates import * 11 | from ..dhatupatha import DHATUPATHA as DP 12 | 13 | f = F.auto 14 | 15 | 16 | RULES = [ 17 | Anuvrtti(None, None, None), 18 | ('1.3.1', None, F.raw(*DP.all_dhatu), None, 'dhatu'), 19 | 20 | # Strictly speaking, the rules below are distortions of the actual 21 | # rules in the Ashtadhyayi. The terms "parasamipada" and "atmanepada" 22 | # refer to the *replacements* of the "la" affixes, not to the "la" 23 | # affixes themselves. 24 | Anuvrtti('dhatu', None, None), 25 | ('1.3.12', f('anudattet', 'Nit'), None, None, 'atmanepada'), 26 | Artha('1.3.72', f('svaritet', 'Yit'), None, None, True), 27 | # TODO: infer by anuvrtti 28 | Artha('1.3.76', 'jYA\\', None, None, True), 29 | ('1.3.78', Shesha, None, None, 'parasmaipada'), 30 | ] 31 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api: 2 | 3 | API 4 | === 5 | 6 | Lists 7 | ----- 8 | 9 | .. automodule:: vyakarana.lists 10 | :members: 11 | 12 | Inputs and Outputs 13 | ------------------ 14 | 15 | .. autoclass:: vyakarana.terms.Upadesha 16 | :member-order: bysource 17 | :members: 18 | 19 | .. autoclass:: vyakarana.derivations.State 20 | :member-order: bysource 21 | :members: 22 | 23 | Filters 24 | ------- 25 | 26 | .. automodule:: vyakarana.filters 27 | :member-order: bysource 28 | :members: 29 | 30 | Operators 31 | --------- 32 | 33 | .. automodule:: vyakarana.operators 34 | :member-order: bysource 35 | :members: 36 | 37 | Rules and Rule Stubs 38 | -------------------- 39 | 40 | .. autoclass:: vyakarana.rules.Rule 41 | :member-order: bysource 42 | :members: 43 | 44 | .. automodule:: vyakarana.templates 45 | :show-inheritance: 46 | :member-order: bysource 47 | :members: 48 | 49 | Texts 50 | ----- 51 | 52 | .. autoclass:: vyakarana.ashtadhyayi.Ashtadhyayi 53 | :members: 54 | 55 | .. autoclass:: vyakarana.dhatupatha.Dhatupatha 56 | :members: 57 | -------------------------------------------------------------------------------- /vyakarana/adhyaya1/pada1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.adhyaya1.pada1 4 | ~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | :license: MIT and BSD 7 | """ 8 | 9 | from .. import filters as F, operators as O 10 | from ..templates import * 11 | 12 | f = F.auto 13 | 14 | 15 | @O.Operator.no_params 16 | def _47(state, index, locus=None): 17 | """Apply 'mit' substitution more generally. 18 | 19 | Rule 1.1.47 of the Ashtadhyayi defines how to substitute a term 20 | marked with indicatory 'm': 21 | 22 | 1.1.47 mid aco 'ntyāt paraḥ 23 | 24 | But if the term is introduced by a "tasmāt" rule instead, then this 25 | rule has no time to act. This function allows 1.1.47 to act even 26 | when a 'mit' term is introduced by a "tasmāt" rule. 27 | """ 28 | mit = state[index] 29 | op = O.tasya(mit) 30 | state = op.apply(state, index - 1, locus) 31 | return state.remove(index) 32 | 33 | 34 | @O.Operator.no_params 35 | def _60_63(state, index, locus=None): 36 | """Perform pratyaya-lopa.""" 37 | lopa = state[index] 38 | raw = lopa.raw 39 | pratyaya = state[index + 1].add_lakshana(raw) 40 | return state.remove(index).swap(index, pratyaya) 41 | 42 | 43 | RULES = [ 44 | Anuvrtti(category='paribhasha'), 45 | ('1.1.47', None, 'mit', None, _47), 46 | ('1.1.60', None, f('lu~k', 'Slu~', 'lu~p'), None, _60_63), 47 | ] 48 | -------------------------------------------------------------------------------- /test/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | test.util 4 | ~~~~~~~~~ 5 | 6 | Tests for the utility functions. 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | import pytest 12 | 13 | from vyakarana.derivations import State 14 | from vyakarana.util import * 15 | from vyakarana.terms import * 16 | 17 | 18 | def test_iter_group(): 19 | items = range(18) 20 | groups = [range(6), range(6, 12), range(12, 18)] 21 | assert list(iter_group(items, 6)) == groups 22 | 23 | 24 | def test_iter_pairwise(): 25 | items = 'abcdefg' 26 | 27 | actual_list = list(iter_pairwise(items)) 28 | expected_list = [tuple(x) for x in 'ab bc cd de ef fg'.split()] 29 | assert actual_list == expected_list 30 | 31 | 32 | def test_rank(): 33 | pass 34 | 35 | 36 | @pytest.fixture 37 | def editor_data(): 38 | data = 'abcdefghijklmnopqrstuvxwyz1234567890' 39 | terms = [Upadesha('_').set_value(group) for group in iter_group(data, 6)] 40 | state = State(terms) 41 | editor = SoundEditor(state) 42 | return (data, terms, state, editor) 43 | 44 | 45 | def test_sound_editor_iter(editor_data): 46 | data, terms, state, editor = editor_data 47 | for i, index in enumerate(editor): 48 | assert index.value == data[i] 49 | 50 | 51 | def test_sound_editor_prev_next(editor_data): 52 | data, terms, state, editor = editor_data 53 | for i, index in enumerate(editor): 54 | prev = index.prev 55 | next = index.next 56 | if i > 0: 57 | assert prev.value == data[i - 1] 58 | if i < len(data) - 1: 59 | assert next.value == data[i + 1] 60 | -------------------------------------------------------------------------------- /docs/asiddha.rst: -------------------------------------------------------------------------------- 1 | *asiddha* and *asiddhavat* 2 | ========================== 3 | 4 | When a rule applies to some input to yield some output, the input is discarded 5 | and all future applications act on the output. But sometimes the original input 6 | preserves some information that we want to keep. 7 | 8 | *asiddha* 9 | --------- 10 | 11 | TODO 12 | 13 | 14 | *asiddhavat* 15 | ------------ 16 | 17 | Consider the following input: 18 | 19 | *śās + hi* 20 | 21 | By 6.4.35, *śās* becomes *śā* when followed by *hi*. By 6.4.101, *hi* becomes 22 | *dhi* when preceded by a consonant. If one applies, the other is blocked. But 23 | to get the correct form *śādhi*, we have to apply both rules together. 24 | 25 | The Ashtadhyayi solves this problem by placing both rules in a section called 26 | **asiddhavat**. For any two rules A and B within this section, the results of 27 | A are invisible to B (or "as if not completed", i.e. *a-siddha-vat*). This 28 | allows each rule to act without being blocked by the other. 29 | 30 | In practical terms, this means that each term has at least two values 31 | simultaneously: one accessible only to the non-*asiddhavat* world (e.g. *śā*) 32 | and one accessible only to the *asiddhavat* world (*śās*). 33 | 34 | To see how the program handles these problems, see the :ref:`data spaces 35 | ` stuff in :doc:`inputs_and_outputs`. 36 | 37 | .. note:: 38 | Issues of *asiddha* and *asiddhavat* are subtle and outside the scope of 39 | this documentation. Those interested might see `rule 6.4.22`_ of the 40 | Ashtadhyayi or section 3.5 of `Goyal et al.`_ 41 | 42 | .. _rule 6.4.22: http://avg-sanskrit.org/avgupload/dokuwiki/doku.php?id=sutras:6-4-22 43 | .. _Goyal et al.: http://sanskrit1.ccv.brown.edu/Sanskrit/Symposium/Papers/AmbaSimulation.pdf 44 | -------------------------------------------------------------------------------- /vyakarana/adhyaya3/pada1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.adhyaya3.pada1 4 | ~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | :license: MIT and BSD 7 | """ 8 | 9 | from .. import filters as F, operators as O 10 | from ..templates import * 11 | from ..terms import Krt 12 | 13 | f = F.auto 14 | 15 | 16 | def k_dhatu(s): 17 | return Krt(s).add_samjna('dhatu', 'anga') 18 | 19 | 20 | def k_anga(s): 21 | return Krt(s).add_samjna('anga') 22 | 23 | 24 | GUPU_DHU = f('gupU~', 'DUpa~', 'vicCa~', 'paRa~\\', 'pana~\\') 25 | 26 | 27 | BHRASHA_BHLASHA = f('wuBrASf~\\', 'wuBlASf~\\', 'Bramu~', 'kramu~', 28 | 'klamu~', 'trasI~', 'truwa~', 'laza~^') 29 | 30 | 31 | STAMBHU_STUMBHU = f('sta\mBu~', 'stu\mBu~', 'ska\mBu~', 'sku\mBu~', 32 | 'sku\Y') 33 | 34 | 35 | RULES = [ 36 | Anuvrtti('dhatu', None, 'tin'), 37 | ('3.1.25', F.gana('cura~'), None, 'tin', k_dhatu('Ric')), 38 | ('3.1.28', GUPU_DHU, None, 'tin', k_dhatu('Aya')), 39 | ('3.1.29', 'fti~\\', None, True, k_dhatu('IyaN')), 40 | ('3.1.30', 'kamu~\\', None, True, k_dhatu('RiN')), 41 | 42 | Anuvrtti('dhatu', None, f('tin') & f('sarvadhatuka')), 43 | ('3.1.33', None, None, 'lf~w', k_anga('sya')), 44 | ('3.1.68', None, None, None, k_anga('Sap')), 45 | ('3.1.69', F.gana('divu~'), None, None, k_anga('Syan')), 46 | Va('3.1.70', BHRASHA_BHLASHA, None, None, True), 47 | ('3.1.73', F.gana('zu\\Y'), None, None, k_anga('Snu')), 48 | ('3.1.77', F.gana('tu\da~^'), None, None, k_anga('Sa')), 49 | ('3.1.78', F.gana('ru\Di~^r'), None, None, k_anga('Snam')), 50 | ('3.1.79', F.gana('tanu~^'), None, None, k_anga('u')), 51 | ('3.1.81', F.gana('qukrI\\Y'), None, None, k_anga('SnA')), 52 | Ca('3.1.82', STAMBHU_STUMBHU, None, None, k_anga('Snu')), 53 | ] 54 | -------------------------------------------------------------------------------- /vyakarana/adhyaya7/pada2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.adhyaya7.pada2 4 | ~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | :license: MIT and BSD 7 | """ 8 | 9 | from .. import filters as F, operators as O 10 | from ..templates import * 11 | from ..terms import Upadesha as U, Pratyaya 12 | 13 | f = F.auto 14 | it_ashtadhyayi = None 15 | TAS = Pratyaya('tAsi~').add_samjna('ardhadhatuka') 16 | 17 | 18 | @O.Operator.no_params 19 | def tasvat(state, index, **kw): 20 | global it_ashtadhyayi 21 | if it_ashtadhyayi is None: 22 | from ..ashtadhyayi import Ashtadhyayi 23 | it_ashtadhyayi = Ashtadhyayi.with_rules_in('7.2.8', '7.2.78') 24 | 25 | import sys 26 | sys.exit() 27 | 28 | 29 | titutra = f('ti', 'tu', 'tra', 'ta', 'Ta', 'si', 'su', 'sara', 'ka', 'sa') 30 | kr_sr_bhr = F.value('kf', 'sf', 'Bf', 'vf', 'stu', 'dru', 'sru', 'Sru') 31 | svarati_suti = f('svf', 'zUG', 'zUN', 'DUY', 'Udit') 32 | 33 | 34 | RULES = [ 35 | Anuvrtti('anga', None, None), 36 | Na('7.2.8', None, None, f('krt') & F.adi('vaS'), O.tasya(U('iw'))), 37 | Ca('7.2.9', None, f('krt') & titutra, None, True), 38 | Na('7.2.13', kr_sr_bhr, f('li~w'), None, True), 39 | ('7.2.35', None, f('ardhadhatuka') & F.adi('val'), None, True), 40 | Va('7.2.44', svarati_suti, True, None, True), 41 | # ('7.2.61', 'ac', True, None, True), 42 | 43 | Anuvrtti('anga', 'sarvadhatuka', None), 44 | ('7.2.81', 'at', F.adi('At') & F.samjna('Nit'), None, O.adi('iy')), 45 | 46 | Anuvrtti(None, 'anga', None), 47 | ('7.2.114', None, 'mfjU~', None, O.vrddhi), 48 | ('7.2.115', None, 'ac', f('Yit', 'Rit'), True), 49 | # This should really apply `O.vrddhi`, but by 1.1.3 it's tricky. 50 | # Since this is a one-off, apply a fuction with the same effect: 51 | ('7.2.116', None, F.upadha('at'), True, O.upadha('A')), 52 | ] 53 | -------------------------------------------------------------------------------- /docs/inputs_and_outputs.rst: -------------------------------------------------------------------------------- 1 | Inputs and Outputs 2 | ================== 3 | 4 | With rare exception, all data handled by the system is processed functionally. 5 | That is, every operation applied to an input must create a new input, without 6 | exception. The program follows this principle for two reasons: 7 | 8 | - branching. Since one input can produce multiple outputs, it's easier to just 9 | create new outputs and ensure that no implicit information can be propagated. 10 | - basic sanity. This makes the system easier to model mentally. 11 | 12 | 13 | Terms 14 | ----- 15 | A rule accepts a list of **terms** as input and returns the same as output. 16 | A term is an arbitrary piece of sound and usually represents a morphere, but 17 | that's not always the case. 18 | 19 | In the Ashtadhyayi, these terms are usually called :term:`upadeśa`, since 20 | the grammar is taught (*upadiśyate*) by means of these terms, And in the 21 | program, these terms are usually represented by instances of the 22 | :class:`~vyakarana.upadesha.Upadesha` class. These classes provide some nice 23 | methods for accessing and modifying various parts of the term. For details, 24 | see the documentation on the :class:`~vyakarana.upadesha.Upadesha` class. 25 | 26 | 27 | .. _data-spaces: 28 | 29 | Data spaces 30 | ^^^^^^^^^^^ 31 | 32 | :doc:`As mentioned earlier `, terms in the Ashtadhyayi often contain 33 | multiple values at once. Within the program, these are modeled by **data 34 | spaces**, which make it easier to access and manipulate these values. These 35 | data spaces are basically just tuples; instead of containing a single data 36 | value, each term contains a variety of values that are valid simultaneously. 37 | 38 | TODO 39 | 40 | States 41 | ------ 42 | 43 | A :class:`~vyakarana.derivations.State` is a list of terms. Like the other 44 | inputs used by the grammar, states are modified functionally. For details, see 45 | the documentation on the :class:`~vyakarana.derivations.State` class. 46 | -------------------------------------------------------------------------------- /test/data/lrt.csv: -------------------------------------------------------------------------------- 1 | # ~~~~~~~~~~~ 2 | # Kale p. 302 3 | # ~~~~~~~~~~~ 4 | BU Bavizyati Bavizyatas Bavizyanti Bavizyasi BavizyaTas BavizyaTa BavizyAmi BavizyAvas BavizyAmas 5 | stf\Y starizyati/starIzyati/starizyate/starIzyate 6 | 7 | # ~~~~~~~~~~~ 8 | # Kale p. 303 9 | # ~~~~~~~~~~~ 10 | yu yavizyati 11 | SIN Sayizyate 12 | zRu snavizyati 13 | wvo~Svi Svayizyati 14 | SriY Srayizyati/Srayizyate 15 | qupa\ca~^z pakzyati 16 | mu\cx~^ mokzyati/mokzyate 17 | zi\ca~^ sekzyati 18 | Bra\sja~^ BaNkzyati 19 | Bu\ja~ Bokzyati 20 | Bra\sja~^ Brakzyati/Barkzyati 21 | wuma\sjo~ maNkzyati 22 | ra\Yja~^ raNkzyati 23 | sf\ja~ srakzyati 24 | a\da~ atsyati 25 | pa\da~\ patsyate 26 | ska\ndi~r skantsyati 27 | ba\nDa~ Bantsyati 28 | vya\Da~ vyatsyati 29 | ma\na~\ maMsyate 30 | tf\pa~ tarpizyati/tarpsyati/trapsyati 31 | # TODO: saM-gam 32 | df\Si~r drakzyati 33 | Gasx~ Gatsyati 34 | 35 | # ~~~~~~~~~~~ 36 | # Kale p. 304 37 | # ~~~~~~~~~~~ 38 | va\sa~ vatsyati 39 | da\ha~ Dakzyati 40 | Rah\a~^ natsyati/natsyate 41 | vah\a~^ vakzyati 42 | anjU~ aYjizyati/aNkzyati 43 | aSU~\ aSizyate/akzyate 44 | klidi~\ kledizyati/kletsyati 45 | kliSa~\ kleSizyati/klekzyati 46 | kzamU~ kzamizyate/kzaMsyate 47 | gAhU~\ gAhizyate/GAkzye 48 | gupU~ gopizyati/gopsyati/gopAyizyati 49 | guhU~^ gUhizyati/gUhizyate/Gokzyati/Gokzyate 50 | takzU~ takzizyati/takzyati 51 | 52 | # ~~~~~~~~~~~ 53 | # Kale p. 305 54 | # ~~~~~~~~~~~ 55 | # tfh? jaB? 56 | trapU~\z trapizyate/trapsyate 57 | DU Davizyati/Dozyati 58 | tfha~ tarhizyati/tarkSyati 59 | muh\a~ mohizyati/mokSyati 60 | mfjU~ mArjizyati/mArkzyati 61 | # raDizyati/ratsyati 62 | o~vraScU~ vraScizyati/vrakzyati 63 | zRih\a~ snehizyati/snekzyati 64 | svara~ svarizyati 65 | ku\ kuzyati 66 | kuwa~ kuwizyati 67 | DU Duvizyati 68 | # upizyati/DUpAyizyati 69 | vicCa~ vicCizyati/vicCAyizyati 70 | fti~\ artizyate/ftIyizyate 71 | kamu~\ kamizyate/kAmayizyate 72 | jaBI~\ jamBizyate 73 | mI\Y mAsyati/mAsyate 74 | dIN dAsyate 75 | lI\ lezyate/lAsyati 76 | cftI~ cartizyati/cartsyati 77 | 78 | # ~~~~~~~~~~~ 79 | # Kale p. 306 80 | # ~~~~~~~~~~~ 81 | # Cardizyati/Cartsyati/Cardizyate/Cartsyate 82 | -------------------------------------------------------------------------------- /vyakarana/adhyaya7/pada4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.adhyaya7.pada4 4 | ~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | :license: MIT and BSD 7 | """ 8 | 9 | from .. import filters as F, operators as O 10 | from ..sounds import Sounds 11 | from ..templates import * 12 | from ..terms import Upadesha as U 13 | 14 | f = F.auto 15 | 16 | ac = Sounds('ac') 17 | shar = Sounds('Sar') 18 | khay = Sounds('Kay') 19 | 20 | 21 | @O.Operator.no_params 22 | def hal_shesha(state, index, locus=None): 23 | cur = state[index] 24 | first_hal = first_ac = '' 25 | for i, L in enumerate(cur.value): 26 | if i == 1 and cur.value[0] in shar and L in khay: 27 | first_hal = L 28 | if L in ac: 29 | first_ac = L 30 | break 31 | elif not first_hal: 32 | first_hal = L 33 | 34 | new_value = first_hal + first_ac 35 | if new_value != cur.value: 36 | return state.swap(index, cur.set_value(new_value)) 37 | else: 38 | return state 39 | 40 | 41 | @F.AlFilter.no_params 42 | def dvihal(term): 43 | hal = Sounds('hal') 44 | hal_r = Sounds('hal f') 45 | return term.upadha in hal_r and term.antya in hal 46 | 47 | 48 | RULES = [ 49 | Anuvrtti(None, 'anga', F.lakshana('li~w')), 50 | ('7.4.10', None, F.samyogadi & F.al('ft'), None, O.force_guna), 51 | ('7.4.11', None, F.raw('f\\') | F.al('Ft'), None, True), 52 | Va('7.4.12', None, f('SF', 'dF', 'pF'), None, O.hrasva), 53 | 54 | Anuvrtti(None, 'abhyasa', None), 55 | ('7.4.59', None, None, None, O.hrasva), 56 | ('7.4.60', None, None, None, hal_shesha), 57 | ('7.4.61', None, F.adi('Sar'), None, True), 58 | ('7.4.62', None, None, None, O.al_tasya('ku h', 'cu')), 59 | ('7.4.66', None, F.contains('f'), None, O.al_tasya('f', 'at')), 60 | ('7.4.69', None, None, ['i\\R', 'kit'], O.dirgha), 61 | ('7.4.70', None, F.adi('at'), None, True), 62 | 63 | Anuvrtti('abhyasa', 'anga', None), 64 | ('7.4.71', F.value('A'), dvihal, None, U('nu~w')), 65 | Ca('7.4.72', True, 'aSU~\\', None, True), 66 | 67 | Anuvrtti(None, 'abhyasa', None), 68 | ('7.4.73', None, None, 'BU', 'a'), 69 | ] 70 | -------------------------------------------------------------------------------- /vyakarana/adhyaya7/pada3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.adhyaya7.pada3 4 | ~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | :license: MIT and BSD 7 | """ 8 | 9 | from .. import filters as F, operators as O 10 | from ..sounds import Sounds 11 | from ..templates import * 12 | 13 | f = F.auto 14 | 15 | 16 | _78_roots = ['pA\\', 'GrA\\', 'DmA\\', 'zWA\\', 'mnA\\', 'dA\R', 17 | 'df\Si~r', 'f\\', 'sf\\', 'Sa\dx~', 'za\dx~'] 18 | 19 | 20 | _78_stems = ['piba', 'jiGra', 'Dama', 'tizWa', 'mana', 'yacCa', 'paSya', 21 | 'fcCa', 'DO', 'SIya', 'sIda'] 22 | 23 | 24 | sarva_ardha = f('sarvadhatuka', 'ardhadhatuka') 25 | 26 | 27 | @F.TermFilter.no_params 28 | def puganta_laghupadha(term): 29 | # TODO: puganta 30 | return term.upadha in Sounds('at it ut ft xt') 31 | 32 | 33 | RULES = [ 34 | Anuvrtti(None, 'anga', None), 35 | ('7.3.52', None, F.al('c j'), f('Git', 'Ryat'), Sounds('ku')), 36 | ('7.3.54', None, 'ha\\na~', 37 | f('Yit', 'Rit', F.adi('n')), O.al_tasya('h', 'ku')), 38 | ('7.3.55', 'abhyasa', True, None, True), 39 | ('7.3.56', True, 'hi\\', ~F.samjna('caN'), True), 40 | ('7.3.57', True, 'ji\\', f('san', 'li~w'), O.al_tasya('j', 'ku')), 41 | Vibhasha('7.3.58', True, 'ci\\Y', True, O.al_tasya('c', 'ku')), 42 | 43 | Anuvrtti(None, 'anga', F.raw('Syan')), 44 | ('7.3.74', None, F.gana('Samu~', 'madI~'), None, O.dirgha), 45 | 46 | Anuvrtti(None, 'anga', F.Sit_adi), 47 | ('7.3.75', None, f('zWivu~', 'klamu~'), None, O.dirgha), 48 | ('7.3.76', None, F.raw('kramu~') & F.samjna('parasmaipada'), None, True), 49 | ('7.3.77', None, f('izu~', 'ga\mx~', 'ya\ma~'), None, 'C'), 50 | ('7.3.78', None, f(*_78_roots), None, 51 | O.yathasamkhya(_78_roots, _78_stems)), 52 | ('7.3.79', None, f('jYA\\', 'janI~\\'), None, 'jA'), 53 | ('7.3.80', None, F.gana('pUY', 'plI\\'), None, O.hrasva), 54 | ('7.3.82', None, 'YimidA~', None, O.force_guna), 55 | 56 | Anuvrtti(None, 'anga', None), 57 | ('7.3.83', None, None, 'jus', O.guna), 58 | ('7.3.84', None, F.al('ik'), sarva_ardha, True), 59 | ('7.3.85', None, 'jAgf', ~F.samjna('vi', 'ciR', 'Ral', 'Nit'), True), 60 | ('7.3.86', None, puganta_laghupadha & F.upadha('ik'), sarva_ardha, True), 61 | 62 | Anuvrtti(None, 'anga', 'sarvadhatuka'), 63 | ('7.3.101', None, 'at', F.adi('yaY'), O.dirgha), 64 | ] 65 | -------------------------------------------------------------------------------- /test/trees.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | test.inference 4 | ~~~~~~~~~~~~~~ 5 | 6 | Tests for vyakarana/inference.py 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | import pytest 12 | 13 | from vyakarana import expand, trees 14 | 15 | 16 | def apavada(): 17 | """Return a list of 3-tuples containing: 18 | 19 | 1. The rule name 20 | 2. The expected apavādas 21 | 3. The observed apavādas 22 | """ 23 | 24 | schema = [ 25 | # atmanepada / parasmaipada 26 | ('1.3.12', '1.3.78', { 27 | '78': ['12', '72', '76'] 28 | }), 29 | # vikarana 30 | ('3.1.68', '3.1.82', { 31 | '68': ['69', '70', '73', '77', '78', '79', '81', '82'], 32 | '81': ['82'], # ? 33 | }), 34 | # asiddhavat aci 35 | ('6.4.77', '6.4.88', { 36 | '77': ['81', '82', '83', '87', '88'], 37 | '79': ['80'], 38 | }), 39 | # nA -> nI 40 | ('6.4.112', '6.4.113', { 41 | '112': ['113'] 42 | }), 43 | # 'e' substitution (e.g. 'bheje') 44 | ('6.4.120', '6.4.126', { 45 | '120': ['126'], 46 | '121': ['126'], # ? 47 | '122': ['126'], 48 | '123': ['126'], # ? 49 | '124': ['126'], 50 | '125': ['126'], 51 | }), 52 | # jha replacement 53 | ('7.1.3', '7.1.5', { 54 | '3': ['4', '5'] 55 | }), 56 | # abhyasa 57 | ('7.4.59', '7.4.70', { 58 | '59': ['69', '70'], 59 | '69': ['70'] 60 | }) 61 | ] 62 | 63 | def full(x, prefix): return prefix + '.' + x 64 | 65 | results = [] 66 | for start, end, matches in schema: 67 | stubs = expand.fetch_stubs_in_range(start, end) 68 | rules = expand.build_from_stubs(stubs) 69 | apavadas = trees.find_apavada_rules(rules) 70 | 71 | for rule in rules: 72 | prefix, suffix = rule.name.rsplit('.', 1) 73 | 74 | observed = set(x.name for x in apavadas[rule]) 75 | expected = set(prefix + '.' + x for x in matches.get(suffix, [])) 76 | results.append((rule, expected, observed)) 77 | 78 | return results 79 | 80 | 81 | @pytest.mark.parametrize(('rule', 'expected', 'observed'), apavada()) 82 | def test_apavada(rule, expected, observed): 83 | assert expected == observed 84 | -------------------------------------------------------------------------------- /vyakarana/sandhi.py: -------------------------------------------------------------------------------- 1 | import operators as O 2 | from derivations import State 3 | from sounds import Sound, Sounds 4 | from terms import Upadesha 5 | from util import SoundEditor 6 | 7 | 8 | def convert(op): 9 | """A temporary fix to a deeper problem.""" 10 | def func(s): 11 | return op.apply(State([Upadesha(s + 'a~')]), 0)[0].value 12 | return func 13 | 14 | dirgha = O.dirgha.body 15 | iko_yan_aci = O.al_tasya('ik', 'yaR').body 16 | guna = convert(O.guna) 17 | vrddhi = convert(O.vrddhi) 18 | 19 | 20 | def apply(state): 21 | editor = SoundEditor(state) 22 | for cur in editor: 23 | next = cur.next 24 | if next.value is None: 25 | continue 26 | 27 | x, y = cur.value, next.value 28 | if x in Sounds('ac'): 29 | cur.value, next.value = ac_sandhi(x, y) 30 | elif x in Sounds('hal'): 31 | cur.value, next.value = hal_sandhi(x, y) 32 | 33 | yield editor.join() 34 | 35 | 36 | def ac_sandhi(x, y): 37 | """Apply the rules of ac sandhi to `x` as followed by `y`. 38 | 39 | These rules are from 6.1. A rule is part of ac sandhi iff the first 40 | letter is a vowel. 41 | 42 | :param x: the first letter. 43 | :param y: the second letter. 44 | """ 45 | 46 | # 6.1.97 ato guNe 47 | if x == 'a' and y in Sounds('at eN'): 48 | x = '' 49 | 50 | # 6.1.101 akaH savarNe dIrghaH 51 | elif Sound(x).savarna(y): 52 | x = '' 53 | y = dirgha(y) 54 | 55 | # 6.1.77 iko yaN aci 56 | elif x in Sounds('ik') and y in Sounds('ac'): 57 | x = iko_yan_aci(x) 58 | 59 | # 6.1.78 eco 'yavAyAvaH 60 | elif x in Sounds('ec') and y in Sounds('ac'): 61 | converter = dict(zip('eEoO', 'ay Ay av Av'.split())) 62 | x = converter[x] 63 | 64 | elif x in 'aA' and y in Sounds('ic'): 65 | x = '' 66 | 67 | # 6.1.87 Ad guNaH 68 | # 6.1.88 vRddhir eci 69 | y = vrddhi(y) if y in Sounds('ec') else guna(y) 70 | 71 | return x, y 72 | 73 | 74 | def hal_sandhi(x, y): 75 | """Apply the rules of hal sandhi to `x` as followed by `y`. 76 | 77 | These rules are from 6.1. A rule is part of hal sandhi iff the first 78 | letter is a consonant. 79 | 80 | :param x: the first letter. 81 | :param y: the second letter. 82 | """ 83 | 84 | # 6.1.66 lopo vyor vali 85 | if x in Sounds('v y') and y in Sounds('val'): 86 | x = '' 87 | 88 | return x, y 89 | -------------------------------------------------------------------------------- /docs/design_overview.rst: -------------------------------------------------------------------------------- 1 | Design Overview 2 | =============== 3 | 4 | Philosophy 5 | ---------- 6 | 7 | As much as possible, the program follows the principles of the Ashtadhyayi. It 8 | makes use of almost all of its technical devices, and many of its methods and 9 | classes have 1:1 correspondence to particular concepts from the grammatical 10 | tradition. This is the case for a few reasons: 11 | 12 | - We can model a system that's well-known and (fairly) easy to understand. 13 | - We can take advantage of the tradition's prior work. 14 | - We can make it easier to prove certain properties of the system. 15 | 16 | The program's performance is currently just OK, but only a few parts of it use 17 | any kind of optimization. With more aggressive caching it can probably run 18 | respectably, but if it stays bad (and if those problems are due to language 19 | features), I will probably port it to Scala or some other statically-typed 20 | functional language. 21 | 22 | How the program works 23 | --------------------- 24 | 25 | We pass a single input to :meth:`ashtadhyayi.Ashtadhyayi.derive`, the most 26 | interesting method in the :class:`Ashtadhyayi` class. This input is stored on 27 | an internal stack. As long as the stack is non-empty, we: 28 | 29 | 1. Pop an input off of the stack. 30 | 31 | 2. Find all rules such that that: 32 | 33 | - the rule has space to apply to the input 34 | - if applied, the rule would yield at least one new result. 35 | 36 | Instead of applying these rules simultaneously, we apply just one then 37 | repeat the loop. 38 | 39 | 3. Pick the rule from (2) with highest rank. If no rules were found in (2), 40 | send the input to the :mod:`asiddha` module and yield the results. 41 | 42 | .. note:: 43 | The :mod:`asiddha` module is basically legacy code. Currently it's 44 | too complicated to model easily, but in the future it will be modeled 45 | like the rest of the system. 46 | 47 | 4. Apply the rule and push the results back onto the stack. 48 | 49 | In other words, the main function of interest is a generator that loops over 50 | a stack and yields finished sequences. 51 | 52 | The following pages explore elements of this process in detail. In particular: 53 | 54 | - what inputs and outputs look like (:doc:`inputs_and_outputs`) 55 | - determining whether a rule has "space to apply" (:doc:`modeling_rules`) 56 | - ranking rules (:doc:`selecting_rules`) 57 | - defining rules tersely (:doc:`defining_rules`) 58 | -------------------------------------------------------------------------------- /vyakarana/adhyaya6/pada1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.adhyaya6.pada1 4 | ~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | Ashtadhyayi 6.1. 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | from .. import filters as F, operators as O 12 | from ..dhatupatha import DHATUPATHA as DP 13 | from ..sounds import Sounds 14 | from ..templates import * 15 | from ..terms import Upadesha 16 | 17 | f = F.auto 18 | 19 | #: 6.1.15 20 | VACI_SVAPI = f(*['va\ca~', 'Yizva\pa~'] + DP.dhatu_list('ya\\ja~^')) 21 | 22 | #: 6.1.16 23 | GRAHI_JYA = f(*['graha~^', 'jyA\\', 'vaya~\\', 'vya\Da~', 'vaSa~', 24 | 'vyaca~', 'o~vraScU~', 'pra\cCa~', 'Bra\sja~^']) 25 | 26 | 27 | @O.Operator.no_params 28 | def do_dvirvacana(state, i, locus=None): 29 | # 6.1.1 ekAco dve prathamasya 30 | # 6.1.2 ajAder dvitIyasya 31 | # 6.1.3 na ndrAH saMyogAdayaH 32 | # 6.1.4 pUrvo 'bhyAsaH 33 | # 6.1.5 ubhe abhyastam 34 | cur = state[i] 35 | abhyasa = Upadesha(data=cur.data, samjna=frozenset(['abhyasa'])) 36 | abhyasta = cur.add_samjna('abhyasta') 37 | return state.swap(i, abhyasta).insert(i, abhyasa) 38 | 39 | 40 | @O.DataOperator.no_params 41 | def sa_adesha(value): 42 | if value.startswith('z'): 43 | converter = {'w': 't', 'W': 'T', 'R': 'n'} 44 | v = value[1] 45 | value = 's' + converter.get(v, v) + value[2:] 46 | return value 47 | 48 | 49 | @O.DataOperator.no_params 50 | def na_adesha(value): 51 | if value.startswith('R'): 52 | value = 'n' + value[1:] 53 | return value 54 | 55 | 56 | @F.TermFilter.no_params 57 | def ec_upadesha(term): 58 | clean = term.clean 59 | return clean and clean[-1] in Sounds('ec') 60 | 61 | 62 | RULES = [ 63 | Anuvrtti(None, 'dhatu', None), 64 | # TODO: why stated as abhyasa? 65 | ('6.1.8', None, ~f('abhyasta'), 'li~w', do_dvirvacana), 66 | ('6.1.9', None, True, f('san', 'yaN'), True), 67 | ('6.1.10', None, True, F.lakshana('Slu~'), True), 68 | ('6.1.11', None, True, 'caN', True), 69 | 70 | Anuvrtti(None, None, None), 71 | ('6.1.15', None, VACI_SVAPI, 'kit', O.samprasarana), 72 | Ca('6.1.16', None, GRAHI_JYA, F.knit, True), 73 | ('6.1.17', None, 'abhyasa', VACI_SVAPI | GRAHI_JYA, True), 74 | 75 | Anuvrtti(None, None, None), 76 | ('6.1.45', None, f('dhatu') & ec_upadesha, 77 | f('tin') & ~F.Sit_adi, O.tasya('A')), 78 | ('6.1.64', None, f('dhatu') & F.adi('z'), None, sa_adesha), 79 | ('6.1.65', None, f('dhatu') & F.adi('R'), None, na_adesha), 80 | ] 81 | -------------------------------------------------------------------------------- /vyakarana/derivations.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.derivations 4 | ~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | :license: MIT and BSD 7 | """ 8 | 9 | 10 | class State(object): 11 | 12 | """A sequence of terms. 13 | 14 | This represents a single step in some derivation.""" 15 | 16 | __slots__ = ['terms', 'history'] 17 | 18 | def __init__(self, terms=None, history=None): 19 | #: A list of terms. 20 | self.terms = terms or [] 21 | self.history = history or [] 22 | 23 | def __eq__(self, other): 24 | if other is None: 25 | return False 26 | if self is other: 27 | return True 28 | 29 | return self.terms == other.terms 30 | 31 | def __ne__(self, other): 32 | return not self == other 33 | 34 | def __getitem__(self, index): 35 | return self.terms[index] 36 | 37 | def __iter__(self): 38 | return iter(self.terms) 39 | 40 | def __len__(self): 41 | return len(self.terms) 42 | 43 | def __repr__(self): 44 | return '' % self.terms 45 | 46 | def __str__(self): 47 | return repr([x.asiddha for x in self.terms]) 48 | 49 | def pprint(self): 50 | data = [] 51 | append = data.append 52 | append('---------------------') 53 | append(str(self)) 54 | for item in self.terms: 55 | append(' %s' % item) 56 | append(' data : %s' % (tuple(item.data),)) 57 | append(' samjna : %s' % sorted(item.samjna)) 58 | append(' lakshana: %s' % sorted(item.lakshana)) 59 | append(' ops : %s' % sorted(item.ops)) 60 | append('---------------------') 61 | print '\n'.join(data) 62 | 63 | def copy(self): 64 | return State(self.terms[:], self.history[:]) 65 | 66 | def insert(self, index, term): 67 | c = self.copy() 68 | c.terms.insert(index, term) 69 | return c 70 | 71 | def mark_rule(self, rule, index): 72 | c = self.copy() 73 | c.history.append((rule, index)) 74 | c.terms[index] = c.terms[index].add_op(rule) 75 | return c 76 | 77 | def remove(self, index): 78 | c = self.copy() 79 | c.terms.pop(index) 80 | return c 81 | 82 | def replace_all(self, terms): 83 | c = self.copy() 84 | c.terms = terms 85 | return c 86 | 87 | def swap(self, index, term): 88 | c = self.copy() 89 | c.terms[index] = term 90 | return c 91 | -------------------------------------------------------------------------------- /test/helpers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | test.util 4 | ~~~~~~~~~ 5 | 6 | Utility functions for testing the Ashtadhyayi. 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | 12 | import os 13 | from collections import OrderedDict 14 | from vyakarana import ashtadhyayi as A 15 | from vyakarana.terms import Upadesha, Vibhakti 16 | 17 | def data_path(name): 18 | """Return a relative path to test file `name`.""" 19 | TEST_DIR = os.path.dirname(__file__) 20 | return os.path.join(TEST_DIR, 'data', name) 21 | 22 | 23 | def read_data(filename): 24 | """Read lines from `filename`, ignoring comments.""" 25 | filename = data_path(filename) 26 | with open(filename) as f: 27 | for line in f: 28 | if line.startswith('#'): 29 | continue 30 | if line.startswith('\n'): 31 | continue 32 | yield line 33 | 34 | 35 | def load_forms(filename): 36 | """Load verb forms from `filename`. 37 | 38 | :param filename: the name of some test file. 39 | """ 40 | data = OrderedDict() 41 | for line in read_data(filename): 42 | tokens = line.split() 43 | dhatu = tokens[0] 44 | paradigm = tokens[1:] 45 | 46 | if dhatu in data: 47 | for i, items in enumerate(data[dhatu]): 48 | try: 49 | items.update(paradigm[i].split('/')) 50 | except IndexError: 51 | break 52 | else: 53 | data[dhatu] = [set(x.split('/')) if x != '_' else set() for x in paradigm] 54 | 55 | for dhatu, paradigm in data.items(): 56 | purusha = ['prathama', 'madhyama', 'uttama'] 57 | vacana = ['ekavacana', 'dvivacana', 'bahuvacana'] 58 | 59 | for i, forms in enumerate(paradigm): 60 | if forms: 61 | person, number = purusha[i / 3], vacana[i % 3] 62 | yield dhatu, forms, person, number 63 | 64 | 65 | def verb_data(filename, la): 66 | """Generate verb data as part of a parametrized test. 67 | 68 | Each datum in the returned list is a 2-tuple containing a single 69 | form and the result set in which it is expected to appear. 70 | 71 | :param filename: the name of some test file. 72 | :param la: the upadeśa name of one of the lakāras. 73 | """ 74 | test_cases = [] 75 | ash = A.Ashtadhyayi() 76 | for dhatu, expected, person, number in load_forms(filename): 77 | d = Upadesha.as_dhatu(dhatu) 78 | p = Vibhakti(la).add_samjna(person, number) 79 | actual = set(ash.derive([d, p])) 80 | print actual 81 | 82 | test_cases.append((expected, actual)) 83 | 84 | return test_cases 85 | -------------------------------------------------------------------------------- /vyakarana/reranking.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.reranking 4 | ~~~~~~~~~~~~~~~~~~~ 5 | 6 | Defines various **rankers**, each of which can be used as a key 7 | function while sorting a list of rules. Something is a ranker if 8 | and only if it has these three properties: 9 | 10 | 1. It is a callable. 11 | 2. It returns a value that can be compared. 12 | 3. It returns a higher value for rules that should have a higher 13 | rank. 14 | 15 | Rules are sorted with `reverse=True`, so higher values appear 16 | earlier in the list. 17 | 18 | :license: MIT and BSD 19 | """ 20 | 21 | import filters as F 22 | from rules import Rule 23 | 24 | #: Artificially boosted rules. 25 | BOOST = ['6.1.45', '6.1.64', '6.1.65'] 26 | 27 | 28 | class NameRanker(object): 29 | 30 | """Ranker for specific rule names.""" 31 | 32 | def __init__(self, *args): 33 | self.names = args 34 | 35 | def __call__(self, rule): 36 | if rule.name in self.names: 37 | return 1 38 | return 0 39 | 40 | 41 | class FilterRanker(object): 42 | 43 | """Ranker for a specific filter type. 44 | 45 | If the filter domain is small, the score is large. 46 | """ 47 | 48 | def __init__(self, superclass): 49 | self.superclass = superclass 50 | 51 | def __call__(self, rule): 52 | score = 0 53 | for filt in rule.filters: 54 | for s in filt.supersets: 55 | if s.domain and isinstance(s, self.superclass): 56 | # Smaller domain -> higher score. 57 | score += 1.0 / len(s.domain) 58 | return score 59 | 60 | 61 | class CompositeRanker(object): 62 | 63 | """Combines multiple rankers.""" 64 | 65 | def __init__(self, rankers=None): 66 | self.rankers = rankers 67 | if rankers is None: 68 | self.rankers = [ 69 | # Artificially boosted rules 70 | NameRanker(*BOOST), 71 | by_category, 72 | by_locus, 73 | FilterRanker(F.UpadeshaFilter), 74 | FilterRanker(F.SamjnaFilter), 75 | FilterRanker(F.AlFilter), 76 | ] 77 | 78 | def __call__(self, rule): 79 | return tuple(r(rule) for r in self.rankers) 80 | 81 | 82 | def by_category(rule): 83 | """Ranker for a rule's category. 84 | 85 | :param rule: the rule to score 86 | """ 87 | if rule.category in (Rule.SAMJNA, Rule.ATIDESHA, Rule.PARIBHASHA): 88 | return 1 89 | return 0 90 | 91 | 92 | def by_locus(rule): 93 | """Ranker for a rule's locus. 94 | 95 | :param rule: the rule to score 96 | """ 97 | if rule.locus == Rule.SIDDHA: 98 | return 1 99 | return 0 100 | -------------------------------------------------------------------------------- /docs/introduction.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | This program has two goals: 5 | 6 | 1. To generate the entire set of forms allowed by the Ashtadhyayi without over- 7 | or under-generating. 8 | 2. To do so while staying true to the spirit of the Ashtadhyayi. 9 | 10 | Goal 1 is straightforward, but the "under-generating" is subtle. For some 11 | inputs, the Ashtadhyayi can yield multiple results; ideally, we should be able 12 | to generate all of them. 13 | 14 | Goal 2 is more vague. I want to create a program that defines and chooses its 15 | rules using the same mechanisms used by the Ashtadhyayi. 16 | 17 | In other words, I want to create a full simulation of the Ashtadhyayi. 18 | 19 | The Ashtadhyayi 20 | --------------- 21 | 22 | The Ashtadhyayi (*Aṣṭādhyāyi*) is a list of about 4000 rules. It has **ordinary 23 | rules**, which take some input and yield some output(s), and **metarules**, 24 | which describe how to interpret other rules. If Sanskrit grammar is a factory, 25 | then its ordinary rules are the machines inside and its metarules are the 26 | instructions used to build the machines. 27 | 28 | Given some input, the Ashtadhyayi applies a rule that changes the input in 29 | some way. The output of the rule is then sent to another rule, just as items 30 | on the assembly line move from one machine to the other. This continues until 31 | there's no way to change the result any further. When this occurs, the process 32 | is complete. The result is a correct Sanskrit expression. 33 | 34 | This documentation makes reference to various rules from the Ashtadhayi. All 35 | rules are numbered *x.y.z*, where: 36 | 37 | - *x* is the **book** that contains the rule. There are 8 books in total. 38 | - *y* is the **chapter** that contains the rule. Each book has 4 chapters. 39 | - *z* is the rule's position within the chapter. 40 | 41 | For example, 1.1.1 is the first rule of the text, and 8.4.68 is the last. 42 | 43 | The Dhatupatha 44 | -------------- 45 | 46 | If the Ashtadhyayi is the stuff inside the factory, then the Dhatupatha 47 | (*Dhātupāṭha*) is the raw material that enters the factory. It is a list of 48 | about 2000 verb roots, each stated with a basic meaning: 49 | 50 | | 1.1 *bhū sattāyām* 51 | | *bhū* in the sense of existence (*sattā*) 52 | 53 | Modern editions of the Dhatupatha are numbered *x.y*, where: 54 | 55 | - *x* is the root's verb class (**gaṇa**). There are 10 classes in total. 56 | - *y* is the root's position within the *gaṇa*. 57 | 58 | Thus *bhū* is entry 1 in *gaṇa* 1; it's the first root in the list. 59 | 60 | There is no single version of the Dhātupāṭha. I used a version I found on 61 | `Sanskrit Documents`_ (specifically, `this file`_) and made some small 62 | corrections. So far, it's been totally competent for the task. 63 | 64 | .. _Sanskrit Documents: http://sanskritdocuments.org 65 | .. _this file: http://sanskritdocuments.org/doc_z_misc_major_works/dhatupatha_svara.itx 66 | -------------------------------------------------------------------------------- /docs/sounds.rst: -------------------------------------------------------------------------------- 1 | Sounds 2 | ====== 3 | 4 | Sandhi is an important part of Sanskrit. Thus sandhi is an important part of 5 | the Ashtadhyayi. The metalanguage of the Ashtadhyayi gives us a few ways to 6 | describe different groups of sounds as tersely as possible. 7 | 8 | *Savarṇa* sets 9 | -------------- 10 | 11 | First, a way to describe related sounds: 12 | 13 | Vowels and semivowels, as well as consonants with *u* as an *it* letter, 14 | refer to all **savarṇa** ("homogeneous") terms. (1.1.69) 15 | 16 | *Savarṇa* has a precise definition, but generally it refers to sounds that are 17 | similar in some way. Anyway, some examples: 18 | 19 | - *a* refers to *a* and *ā* 20 | - *i* refers to *i* and *ī* 21 | - *ku* refers to all sounds in *kavarga* 22 | - *cu* refers to all sounds in *cavarga* 23 | 24 | *a* and *i* also refer to the corresponding nasal vowels, but generally we can 25 | ignore the nasal sounds entirely. (The rule mentions semivowels because some 26 | semivowels can be nasal, too.) 27 | 28 | Single vowels 29 | ------------- 30 | In the grammar, *a* always refers to both *a* and *ā*. To refer to just the 31 | sound *a*, we use the following rule: 32 | 33 | A vowel stated with *t* refers to just that vowel. (1.1.70) 34 | 35 | Some examples: 36 | 37 | - *at* refers to just *a* 38 | - *āt* refers to just *ā* 39 | 40 | These terms refer to nasal sounds too, but generally we can ignore the nasal 41 | sounds entirely. 42 | 43 | *Pratyāhāra* 44 | ------------ 45 | Finally, a way to refer to other groups of interest. Consider the following 46 | list: 47 | 48 | 1. a i u **ṇ** 49 | 2. ṛ ḷ **k** 50 | 3. e o **ṅ** 51 | 4. ai au **c** 52 | 5. ha ya va ra **ṭ** 53 | 6. la **ṇ** 54 | 7. ña ma ṅa ṇa na **m** 55 | 8. jha bha **ñ** 56 | 9. gha ḍha dha **ṣ** 57 | 10. ja ba ga ḍa da **ś** 58 | 11. kha pha cha ṭha tha ca ṭa ta **v** 59 | 12. ka pa **y** 60 | 13. śa ṣa sa **r** 61 | 14. ha **l** 62 | 63 | These rows are usually called the **Shiva Sutras**. They were arranged 64 | deliberately so that similar sounds would appear next to each other. 65 | 66 | Here's how we use the list. Each row has a list of sounds that ends with an 67 | *it* tag. We take advantage of the following metarule: 68 | 69 | In lists like the one above, an item stated with an *it* refers to all 70 | the items between them, too. (1.1.71) 71 | 72 | and use it to produce concise terms for various Sanskrit sounds. 73 | 74 | For example, the *ha* on row 5, when used with *it* letter *l* on row 14, 75 | creates the term *hal*. And this *hal* refers to all sounds between *ha* and 76 | that *it* letter *l*. That is, it refers to the set of Sanskrit consonants. 77 | 78 | Such groups are called **pratyāhāra**. Other examples: 79 | 80 | - *ac* refers to all vowels. By rule 1.1.69, *a* refers to *ā*, and so on for 81 | the other vowels. 82 | - *khar* refers to all unvoiced consonants. 83 | - *yaṇ* refers to all semivowels. 84 | - *al* refers to all sounds. 85 | 86 | Certain sounds and *it* letters are used in the list twice, but context is 87 | enough to tell us how to interpret a given *pratyāhāra*. 88 | -------------------------------------------------------------------------------- /vyakarana/lists.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.lists 4 | ~~~~~~~~~~~~~~~ 5 | 6 | Lists of various terms, designations, and sounds. Some of these 7 | lists could probably be inferred programmatically, but for the sake 8 | of basic sanity these are encoded explicitly. Thankfully these lists 9 | are rather small. 10 | 11 | :license: MIT and BSD 12 | """ 13 | 14 | #: Defined in rule 3.4.78. These 18 affixes are used to form verbs. 15 | #: The first 9 are called "parasmaipada" (1.4.99), and the last 9 are 16 | #: called "ātmanepada" (1.4.100). 17 | TIN = ['tip', 'tas', 'Ji', 'sip', 'Tas', 'Ta', 'mip', 'vas', 'mas', 18 | 'ta', 'AtAm', 'Ja', 'TAs', 'ATAm', 'Dvam', 'iw', 'vahi', 'mahiN'] 19 | 20 | 21 | #: Abstract suffixes that are replaced with items from `TIN`. 22 | #: Collectively, they are called the "lakāra" or just "la". 23 | LA = set([ 24 | 'la~w', 'li~w', 'lu~w', 'lf~w', 'le~w', 'lo~w', 25 | 'la~N', 'li~N', 'lu~N', 'lf~N' 26 | ]) 27 | 28 | 29 | #: Various pratyaya 30 | PRATYAYA = set([ 31 | 'luk', 'Slu', 'lup', 32 | 'Sap', 'Syan', 'Snu', 'Sa', 'Snam', 'u', 'SnA', 33 | 'Ric', 'Rin' 34 | ]) | LA 35 | 36 | 37 | #: Technical designations (1.3.2 - 1.3.9) 38 | IT = (set([L + 'it' for L in 'kKGNcYwqRpmS']) 39 | | set([L + 'dit' for L in 'aiuUfxo']) 40 | | set(['qvit', 'wvit']) 41 | | set(['svaritet', 'anudattet', 'svarita', 'anudatta'])) 42 | 43 | 44 | #: saṃjñā for verb 'pada' 45 | PADA = ['parasmaipada', 'atmanepada'] 46 | 47 | 48 | #: saṃjñā for various persons 49 | PURUSHA = ['prathama', 'madhyama', 'uttama'] 50 | 51 | 52 | #: saṃjñā for various numbers 53 | VACANA = ['ekavacana', 'dvivacana', 'bahuvacana'] 54 | 55 | 56 | #: saṃjñā for case triplets 57 | VIBHAKTI = ['prathama', 'dvitiya', 'trtiya', 'caturthi', 58 | 'pancami', 'sasthi', 'saptami'] 59 | 60 | 61 | #: saṃjñā for verb suffixes 62 | DHATUKA = ['sarvadhatuka', 'ardhadhatuka'] 63 | 64 | 65 | #: saṃjñā for kāraka relations (currently unused) 66 | KARAKA = ['karta', 'karma', 'karana', 'adhikarana', 'sampradana', 'apadana'] 67 | 68 | 69 | #: All saṃjñā 70 | SAMJNA = set([ 71 | 'guna', 'vrddhi', 72 | 'dhatu', 'anga', 'pada', 'pratyaya', 73 | 'krt', 'taddhita', 74 | 'abhyasa', 'abhyasta', 75 | 'tin', 'sup', 76 | ]) | set(PADA + PURUSHA + VACANA + VIBHAKTI + DHATUKA + KARAKA) 77 | 78 | 79 | #: A collection of various sounds, including: 80 | #: 81 | #: - savarṇa sets (1.1.69) 82 | #: - single-item sets (1.1.70) 83 | #: - pratyāhāra (1.1.71) 84 | SOUNDS = set([ 85 | # 1.1.69 aṇudit savarṇasya cāpratyayaḥ 86 | 'a', 'i', 'u', 'f', 'x', 87 | 'ku~', 'cu~', 'wu~', 'tu~', 'pu~', 88 | 89 | # 1.1.70 taparas tatkālasya 90 | 'at', 'At', 'it', 'It', 'ut', 'Ut', 'ft', 'Ft', 'et', 'Et', 'ot', 'Ot', 91 | 92 | # 1.1.71 ādir antyena sahetā 93 | # Although the Shiva Sutras allow a large number of pratyāhāras, 94 | # only the following are used in the Ashtadhyayi. 95 | # (Sharma Volume I, p. 33) 96 | 'eN', 'yaY', 'aR', 'Cav', 'aw', 97 | 'Jaz', 'Baz', 98 | 'ak', 'ik', 'uk', 'yaR', 'iR', 'Nam', 'am', 'yam', 99 | 'ac', 'ec', 'Ec', 'ic', 'may', 'Jay', 'Kay', 'yay', 100 | 'Sar', 'yar', 'Jar', 'Kar', 'car', 101 | 'JaS', 'jaS', 'baS', 'S', 'haS', 'vaS', 102 | 'al', 'hal', 'sal', 'val', 'ral', 'Jal' 103 | ]) 104 | -------------------------------------------------------------------------------- /test/operators.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from vyakarana.derivations import State 4 | import vyakarana.operators as O 5 | from vyakarana.terms import Upadesha 6 | 7 | 8 | # Constructors 9 | # ~~~~~~~~~~~~ 10 | 11 | def test_init_with_kw(): 12 | o = O.Operator(name='name', body='body', category='category', 13 | params='params') 14 | assert o.name == 'name' 15 | assert o.body == 'body' 16 | assert o.category == 'category' 17 | assert o.params == 'params' 18 | 19 | 20 | def test_init_with_kw_no_category(): 21 | o = O.Operator(name='name', body='body', params='params') 22 | assert o.name == 'name' 23 | assert o.body == 'body' 24 | assert o.category == 'name' 25 | assert o.params == 'params' 26 | 27 | 28 | def test_init_with_kw_no_params(): 29 | o = O.Operator(name='name', body='body', category='category') 30 | assert o.name == 'name' 31 | assert o.body == 'body' 32 | assert o.category == 'category' 33 | assert o.params is None 34 | 35 | 36 | def test_no_params(): 37 | def apples(state, index, locus=None): 38 | return state 39 | 40 | o = O.Operator.no_params(apples) 41 | assert o.name == 'apples' 42 | assert o.body is apples 43 | assert o.category == 'apples' 44 | assert o.params is None 45 | 46 | 47 | # (Python) Operators 48 | # ~~~~~~~~~~~~~~~~~~ 49 | 50 | @pytest.fixture 51 | def eq_ops(): 52 | return [ 53 | O.Operator(name=1, body=2, category=3, params=4), 54 | O.Operator(name=1, body=2, category=3, params=4), 55 | O.Operator(name=100, body=2, category=3, params=4), 56 | O.Operator(name=1, body=2, category=3, params=100), 57 | O.Operator(name=100, body=2, category=3, params=100) 58 | ] 59 | 60 | 61 | def test_eq(eq_ops): 62 | o1, o2, o3, o4, o5 = eq_ops 63 | assert o1 == o2 64 | assert not o1 == o3 65 | assert not o1 == o4 66 | assert not o1 == o5 67 | 68 | 69 | def test_ne(eq_ops): 70 | o1, o2, o3, o4, o5 = eq_ops 71 | assert not o1 != o2 72 | assert o1 != o3 73 | assert o1 != o4 74 | assert o1 != o5 75 | 76 | 77 | # Operator usage 78 | # ~~~~~~~~~~~~~~ 79 | 80 | def verify(cases, operator): 81 | for original, expected in cases: 82 | term = Upadesha('a~').set_value(original) 83 | state = State([term]) 84 | assert operator.apply(state, 0)[0].value == expected 85 | 86 | 87 | def test_dirgha(): 88 | cases = [ 89 | ('kram', 'krAm'), 90 | ('zWiv', 'zWIv'), 91 | ] 92 | verify(cases, O.dirgha) 93 | 94 | 95 | def test_guna(): 96 | cases = [ 97 | ('sad', 'sad'), 98 | ('KAd', 'KAd'), 99 | ('mid', 'med'), 100 | ('mud', 'mod'), 101 | ] 102 | verify(cases, O.guna) 103 | 104 | 105 | def test_hrasva(): 106 | cases = [ 107 | ('rI', 'ri'), 108 | ('pU', 'pu'), 109 | ] 110 | verify(cases, O.hrasva) 111 | 112 | 113 | def test_samprasarana(): 114 | cases = [ 115 | ('vac', 'uc'), 116 | ('svap', 'sup'), 117 | ('yaj', 'ij'), 118 | ('grah', 'gfh'), 119 | ('jyA', 'ji'), 120 | ('vyaD', 'viD'), 121 | ('Brasj', 'Bfsj'), 122 | ] 123 | verify(cases, O.samprasarana) 124 | 125 | 126 | def test_vrddhi(): 127 | cases = [ 128 | ('ji', 'jE'), 129 | ('nI', 'nE'), 130 | ('lu', 'lO'), 131 | ('pU', 'pO'), 132 | ('sad', 'sad'), # iko guNavRddhI 133 | ] 134 | verify(cases, O.vrddhi) 135 | -------------------------------------------------------------------------------- /test/sounds.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | test.sounds 4 | ~~~~~~~~~~~ 5 | 6 | 7 | 8 | :license: MIT and BSD 9 | """ 10 | import pytest 11 | 12 | from vyakarana.sounds import Sound, Sounds, Pratyahara, SoundCollection 13 | 14 | VOWELS = set('aAiIuUfFxXeEoO') 15 | SHORT_VOWELS = set('aiufx') 16 | LONG_VOWELS = VOWELS - SHORT_VOWELS 17 | 18 | STOPS = set('kKgGcCjJwWqQtTdDpPbB') 19 | NASALS = set('NYRnm') 20 | SEMIVOWELS = set('yrlv') 21 | SAVARGA = set('Szsh') 22 | CONSONANTS = STOPS | NASALS | SEMIVOWELS | SAVARGA 23 | 24 | 25 | class TestSound(object): 26 | 27 | def test_init(self): 28 | v = Sound('a') 29 | assert v.value == 'a' 30 | 31 | def test_savarna(self): 32 | for L in 'aAiIuUfF': 33 | v = Sound(L) 34 | assert v.savarna_set == set(L.lower() + L.upper()) 35 | assert v.savarna(L.lower()) 36 | assert v.savarna(L.upper()) 37 | 38 | 39 | class TestSoundCollection(object): 40 | 41 | """Test the shared methods of Pratyahara and Sounds.""" 42 | 43 | def test_init(self): 44 | with pytest.raises(NotImplementedError): 45 | SoundCollection() 46 | 47 | def test_attributes(self): 48 | for cls in [Pratyahara, Sounds]: 49 | s = cls('ac') 50 | assert s.name 51 | assert s.values 52 | 53 | def test_contains(self): 54 | s = Sounds('pu') 55 | assert 'p' in s 56 | 57 | def test_iter(self): 58 | s = Sounds('pu') 59 | i = 0 60 | for x in s: 61 | i += 1 62 | assert i == 5 63 | 64 | def test_len(self): 65 | assert len(Sounds('pu')) == 5 66 | 67 | 68 | class TestPratyahara(object): 69 | 70 | def test_init(self): 71 | p = Pratyahara('aR') 72 | assert p.name == 'aR' 73 | assert p.values == set('aAiIuU') 74 | 75 | def test_init_with_second_R(self): 76 | p = Pratyahara('aR', second_R=True) 77 | assert p.name == 'aR' 78 | assert p.values == VOWELS | SEMIVOWELS | set('h') 79 | 80 | def test_basics(self): 81 | def yes(p, s, **kw): 82 | pra = Pratyahara(p, **kw) 83 | assert pra.values == set(s) 84 | assert len(pra) == len(s) 85 | 86 | yes('eN', 'eo') 87 | yes('ec', 'eEoO') 88 | yes('ac', VOWELS) 89 | yes('Jay', STOPS) 90 | yes('Yam', NASALS) 91 | yes('yaR', SEMIVOWELS) 92 | yes('Sal', SAVARGA) 93 | yes('hal', CONSONANTS) 94 | 95 | 96 | class TestSounds(object): 97 | 98 | def test_init_vowel(self): 99 | for v in 'aAiIuUfFxX': 100 | s = Sounds(v) 101 | assert s.name == v 102 | assert s.values == Sound(v).savarna_set 103 | 104 | def test_init_u(self): 105 | s = Sounds('pu') 106 | assert s.name == 'pu' 107 | assert s.values == set('pPbBm') 108 | 109 | def test_init_t(self): 110 | for v in 'aAiIuUfFxX': 111 | name = v + 't' 112 | s = Sounds(name) 113 | assert s.name == name 114 | assert s.values == set(v) 115 | 116 | def test_init_pratyahara(self): 117 | for name in ['aR', 'eN', 'ac', 'Jay', 'Yam', 'hal']: 118 | s = Sounds(name) 119 | assert s.name == name 120 | assert s.values == Pratyahara(name).values 121 | 122 | def test_init_multiple(self): 123 | name = 'a pu it' 124 | s = Sounds(name) 125 | assert s.name == name 126 | assert s.values == set('aApPbBmi') 127 | -------------------------------------------------------------------------------- /vyakarana/ashtadhyayi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.ashtadhyayi 4 | ~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | Coordinates the rules of the Sūtrapāṭha. 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | import expand 12 | import reranking 13 | import sandhi 14 | import siddha 15 | import trees 16 | 17 | from . import logger 18 | from derivations import State 19 | 20 | 21 | class Ashtadhyayi(object): 22 | 23 | """Given some input terms, yields a list of Sanskrit words. 24 | 25 | This is the most abstract part of the system and doesn't expect any 26 | internal knowledge about how the system works. This is almost always 27 | the only class that client libraries should use. 28 | 29 | The heart of the class is :meth:`derive`, which accepts a list of 30 | terms and yields :class:`~vyakarana.derivations.State` objects that 31 | represent finished words. 32 | """ 33 | 34 | def __init__(self, stubs=None): 35 | rules = expand.build_from_stubs(stubs) 36 | ranker = reranking.CompositeRanker() 37 | 38 | #: Indexed arrangement of rules 39 | self.rule_tree = trees.RuleTree(rules, ranker=ranker) 40 | 41 | @classmethod 42 | def with_rules_in(cls, start, end, **kw): 43 | """Constructor using only a subset of the Ashtadhyayi's rules. 44 | 45 | This is provided to make it easier to test certain rule groups. 46 | 47 | :param start: name of the first rule to use, e.g. "1.1.1" 48 | :param end: name of the last rule to use, e.g. "1.1.73" 49 | """ 50 | 51 | stubs = expand.fetch_stubs_in_range(start, end) 52 | return cls(stubs=stubs, **kw) 53 | 54 | def _apply_next_rule(self, state): 55 | """Apply one rule and return a list of new states. 56 | 57 | This function applies conflict resolution to a list of candidate 58 | rules until one rule remains. 59 | 60 | :param state: the current state 61 | """ 62 | for ra, ia in self.rule_tree.candidates(state): 63 | # Ignore redundant applications 64 | if ra in state[ia].ops: 65 | continue 66 | 67 | # Only worthwhile rules 68 | ra_states = list(ra.apply(state, ia)) 69 | if not ra_states: 70 | continue 71 | 72 | for s in ra_states: 73 | logger.debug(' %s : %s --> %s' % (ra.name, state, s)) 74 | return ra_states 75 | 76 | def _sandhi_asiddha(self, state): 77 | """Apply rules from the 'sandhi' and 'asiddha' sections. 78 | 79 | TODO: rewrite the rules in the sandhi and asiddha sections until 80 | this function is no longer needed. 81 | 82 | :param state: the current state 83 | """ 84 | for s in sandhi.apply(state): 85 | for t in siddha.asiddha(s): 86 | yield ''.join(x.asiddha for x in t) 87 | 88 | def derive(self, sequence): 89 | """Yield all possible results. 90 | 91 | :param sequence: a starting sequence 92 | """ 93 | start = State(sequence) 94 | stack = [start] 95 | 96 | logger.debug('---') 97 | logger.debug('start: %s' % start) 98 | while stack: 99 | state = stack.pop() 100 | new_states = self._apply_next_rule(state) 101 | if new_states: 102 | stack.extend(new_states) 103 | 104 | # No applicable rules; state is in its final form. 105 | else: 106 | for result in self._sandhi_asiddha(state): 107 | logger.debug('yield: %s' % result) 108 | yield result 109 | -------------------------------------------------------------------------------- /vyakarana/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.util 4 | ~~~~~~~~~~~~~~ 5 | 6 | Classes and functions that are shared across multiple modules. 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | import itertools 12 | 13 | 14 | def iter_group(items, n): 15 | """Iterate over `items` by taking `n` items at a time.""" 16 | for i in range(0, len(items), n): 17 | yield items[i:i + n] 18 | 19 | 20 | def iter_pairwise(items): 21 | x, y = itertools.tee(items) 22 | next(y, None) 23 | return itertools.izip(x, y) 24 | 25 | 26 | class SoundEditor(object): 27 | 28 | def __init__(self, state, locus='asiddha'): 29 | self.state = state 30 | self.locus = locus 31 | self.data = [list(term.asiddha) for term in state] 32 | 33 | self.indices = [] 34 | abs_index = 0 35 | for i, term in enumerate(state): 36 | for j, sound in enumerate(term.asiddha): 37 | sound_index = SoundIndex(value=sound, term=term, state_index=i, 38 | term_index=j, absolute_index=abs_index, editor=self) 39 | self.indices.append(sound_index) 40 | abs_index += 1 41 | 42 | def __iter__(self): 43 | for index in self.indices: 44 | yield index 45 | 46 | def join(self): 47 | state = self.state 48 | new_terms = [] 49 | for i, term in enumerate(state): 50 | new_value = ''.join(L for L in self.data[i]) 51 | new_term = self.state[i].set_at(self.locus, new_value) 52 | new_terms.append(new_term) 53 | 54 | return state.replace_all(new_terms) 55 | 56 | def next(self, index): 57 | try: 58 | return self.indices[index.absolute_index + 1] 59 | except (TypeError, IndexError): 60 | return SoundIndex(editor=self) 61 | 62 | def prev(self, index): 63 | try: 64 | new_index = index.absolute_index - 1 65 | if new_index >= 0: 66 | return self.indices[new_index] 67 | except TypeError: 68 | pass 69 | return SoundIndex(editor=self) 70 | 71 | 72 | class SoundIndex(object): 73 | 74 | __slots__ = ['_value', 'term', 'state_index', 'term_index', 75 | 'absolute_index', 'editor', 'first', 'last'] 76 | 77 | def __init__(self, value=None, term=None, state_index=None, 78 | term_index=None, absolute_index=None, editor=None): 79 | #: The value associated with this index. 80 | self._value = value 81 | #: The term associated with this index. 82 | self.term = term 83 | #: The state index that corresponds to `self.term`. 84 | self.state_index = state_index 85 | #: The term index that corresponds to `self.value`. 86 | self.term_index = term_index 87 | #: The absolute index of this `SoundIndex` within the editor 88 | self.absolute_index = absolute_index 89 | #: The sound iterator that produced this index 90 | self.editor = editor 91 | 92 | #: True iff this is the first letter in the term. 93 | self.first = term_index == 0 94 | #: True iff this is the last letter in the term. 95 | self.last = term_index == len(term.value) - 1 if term else False 96 | 97 | @property 98 | def next(self): 99 | return self.editor.next(self) 100 | 101 | @property 102 | def prev(self): 103 | return self.editor.prev(self) 104 | 105 | @property 106 | def value(self): 107 | return self._value 108 | 109 | @value.setter 110 | def value(self, new_value): 111 | self._value = new_value 112 | self.editor.data[self.state_index][self.term_index] = new_value 113 | -------------------------------------------------------------------------------- /vyakarana/adhyaya3/pada4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.adhyaya3.pada4 4 | ~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | :license: MIT and BSD 7 | """ 8 | 9 | import itertools 10 | 11 | from .. import filters as F, operators as O, util 12 | from ..lists import PADA, PURUSHA, VACANA, VIBHAKTI, TIN, LA 13 | from ..sounds import Sounds 14 | from ..templates import * 15 | 16 | 17 | f = F.auto 18 | 19 | 20 | def label_by_triplet(terms, labels): 21 | """ 22 | Apply a single label to each triplet of terms. 23 | 24 | :param terms: a list of sets 25 | :param labels: a list of strings 26 | """ 27 | num_labels = len(labels) 28 | for i, chunk in enumerate(util.iter_group(terms, 3)): 29 | for term in chunk: 30 | term.add(labels[i % num_labels]) 31 | 32 | 33 | def label_by_item(terms, labels): 34 | """ 35 | Label each term with a corresponding label. 36 | 37 | Suppose there are 4 terms and 2 labels. Then:: 38 | 39 | term[0] -> label[0] 40 | term[1] -> label[1] 41 | term[2] -> label[0] 42 | term[3] -> label[1] 43 | 44 | :param terms: a list of sets 45 | :param labels: a list of strings 46 | """ 47 | labels = itertools.cycle(labels) 48 | for term in terms: 49 | term.add(next(labels)) 50 | 51 | 52 | def label_by_group(terms, labels): 53 | """ 54 | Split `terms` into `len(labels)` groups and mark each group accordingly. 55 | 56 | Suppose there are 4 terms and 2 labels. Then:: 57 | 58 | term[0] -> label[0] 59 | term[1] -> label[0] 60 | term[2] -> label[1] 61 | term[3] -> label[1] 62 | 63 | :param terms: a list of sets 64 | :param labels: a list of strings 65 | """ 66 | num_groups = len(terms) / len(labels) 67 | for i, group in enumerate(util.iter_group(terms, num_groups)): 68 | for term in group: 69 | term.add(labels[i]) 70 | 71 | 72 | def tin_key(samjna, pada=None): 73 | if pada: 74 | x = pada 75 | else: 76 | for x in PADA: 77 | if x in samjna: 78 | break 79 | for y in PURUSHA: 80 | if y in samjna: 81 | break 82 | for z in VACANA: 83 | if z in samjna: 84 | break 85 | 86 | return x, y, z 87 | 88 | 89 | base_samjna = [set(['tin']) for s in TIN] 90 | 91 | # 1.4.99 laH parasmaipadam 92 | # 1.4.100 taGAnAv Atmanepadam 93 | label_by_group(base_samjna, PADA) 94 | 95 | # 1.4.101 tiGas trINi trINi prathamamadhyamottamAH 96 | label_by_triplet(base_samjna, PURUSHA) 97 | 98 | # 1.4.102 tAnyekavacananadvivacanabahuvacanAnyekazaH 99 | label_by_item(base_samjna, VACANA) 100 | 101 | key2index = {tin_key(x): i for i, x in enumerate(base_samjna)} 102 | 103 | BASE_TIN = 'tip tas Ji sip Tas Ta mip vas mas'.split() 104 | LIT_TIN = 'Ral atus us Tal aTus a Ral va ma'.split() 105 | 106 | 107 | @O.Operator.no_params 108 | def tin_adesha(state, index, locus=None): 109 | """tiṅ ādeśa""" 110 | la = state[index] 111 | la_type = la.raw 112 | # TODO: remove hacks 113 | dhatuka = 'ardhadhatuka' if la_type == 'li~w' else 'sarvadhatuka' 114 | i = key2index[tin_key(la.samjna)] 115 | new_raw = TIN[i] 116 | tin = la.set_raw(new_raw).add_samjna('tin', dhatuka) 117 | return state.swap(index, tin) 118 | 119 | 120 | RULES = [ 121 | Anuvrtti(None, None, None), 122 | ('3.4.78', None, F.raw(*LA), None, tin_adesha), 123 | ('3.4.79', None, f('atmanepada') & f('wit'), None, O.ti('e')), 124 | ('3.4.80', 125 | None, f('atmanepada') & f('wit') & F.raw('TAs'), None, 126 | 'se'), 127 | ('3.4.81', 128 | None, f('atmanepada') & F.raw('ta', 'Ja') & f('li~w'), None, 129 | O.yathasamkhya(['ta', 'Ja'], ['eS', 'irec'])), 130 | ('3.4.82', 131 | None, F.raw(*BASE_TIN) & f('parasmaipada') & f('li~w'), None, 132 | O.yathasamkhya(BASE_TIN, LIT_TIN)), 133 | ] 134 | -------------------------------------------------------------------------------- /vyakarana/templates.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.templates 4 | ~~~~~~~~~~~~~~~~~~~ 5 | 6 | This module contains classes and functions that let us define 7 | the Ashtadhyayi's rules as tersely as possible. 8 | 9 | :license: MIT and BSD 10 | """ 11 | 12 | 13 | class RuleStub(object): 14 | 15 | """Wrapper for tuple rules. 16 | 17 | The Ashtadhyayi uses a variety of terms to control when and how a 18 | rule applies. For example, 'anyatarasyām' denotes that a rule 19 | specifies an optional operation that can be accepted or rejected. 20 | 21 | In this system, these terms are marked by wrapping a rule in this 22 | class or one of its subclasses. 23 | """ 24 | 25 | def __init__(self, name, left, center, right, op, **kw): 26 | #: Thte rule name 27 | self.name = name 28 | 29 | #: The rule context 30 | self.window = [left, center, right] 31 | 32 | #: The rule operator 33 | self.operator = op 34 | 35 | @property 36 | def center(self): 37 | return self.window[1] 38 | 39 | def __repr__(self): 40 | cls_name = self.__class__.__name__ 41 | if cls_name == 'RuleStub': 42 | cls_name = 'R' 43 | return '<%s(%s)>' % (cls_name, repr(self.name)) 44 | 45 | 46 | class Adhikara(RuleStub): 47 | 48 | def __init__(self, name, end, on_tuple=None): 49 | RuleStub.__init__(self, name, None, None, None, None) 50 | self.end = end 51 | self.on_tuple = on_tuple 52 | 53 | def transform_tuple(self, rule_tuple): 54 | if self.on_tuple is None: 55 | return rule_tuple 56 | return self.on_tuple(rule_tuple) 57 | 58 | 59 | class Ca(RuleStub): 60 | 61 | """Wrapper for a rule that contains the word "ca". 62 | 63 | "ca" has a variety of functions, but generally it preserves parts 64 | of the previous rule in the current rule. 65 | """ 66 | 67 | 68 | class Na(RuleStub): 69 | 70 | """Wrapper for a rule that just blocks other rules.""" 71 | 72 | 73 | class Nityam(RuleStub): 74 | 75 | """Wrapper for a rule that cannot be rejected. 76 | 77 | This is used to cancel earlier conditions. 78 | """ 79 | 80 | 81 | class Option(RuleStub): 82 | 83 | """Wrapper for a rule that can be accepted optionally. 84 | 85 | This is a superclass for a variety of optional conditions. 86 | """ 87 | 88 | 89 | class Anyatarasyam(Option): 90 | 91 | """Wrapper for a rule that is indifferently accepted. 92 | 93 | Modern scholarship rejects the traditional definition of anyatarasyām, 94 | but this system treats it as just a regular option. 95 | """ 96 | 97 | 98 | class Va(Option): 99 | 100 | """Wrapper for a rule that is preferably accepted. 101 | 102 | Modern scholarship rejects the traditional definiton of vā, but 103 | this system treats it as just a regular option. 104 | """ 105 | 106 | 107 | class Vibhasha(Option): 108 | 109 | """Wrapper for a rule that is preferably not accepted. 110 | 111 | Modern scholarship rejects the traditional definiton of vibhāṣā, 112 | but this system treats it as just a regular option. 113 | """ 114 | 115 | 116 | class Artha(Option): 117 | 118 | """Wrapper for a rule that applies only in some semantic condition. 119 | 120 | Since the semantic condition can be declined, this is essentially 121 | an optional provision. 122 | """ 123 | 124 | 125 | class Opinion(Option): 126 | 127 | """Wrapper for a rule that is accepted by prior opinion. 128 | 129 | Since the opinion can be declined, this is essentially the same as 130 | an optional provision. 131 | """ 132 | 133 | 134 | class Anuvrtti(object): 135 | 136 | def __init__(self, left=None, center=None, right=None, **kw): 137 | self.base_args = [left, center, right] 138 | self.base_kw = kw 139 | 140 | 141 | #: Signals use of the *śeṣa* device, which affects utsarga-apavāda 142 | #: inference. 143 | Shesha = object() 144 | -------------------------------------------------------------------------------- /vyakarana/dhatupatha.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.dhatupatha 4 | ~~~~~~~~~~~~~~~~~~~~ 5 | 6 | Coordinates access to the Dhātupāṭha. 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | import os 12 | from collections import defaultdict 13 | 14 | vyak = os.path.dirname(os.path.dirname(__file__)) 15 | DHATUPATHA_CSV = os.path.join(vyak, 'data', 'dhatupatha.csv') 16 | 17 | 18 | class Dhatupatha(object): 19 | 20 | """A collection of all verb roots in the Sanskrit language. 21 | 22 | This class makes it easy to select a continuous range of roots from 23 | the Dhātupāṭha and query for other properties of interest, such as 24 | the original gaṇa. 25 | 26 | All data is stored in a CSV file, which is read when the program 27 | begins. 28 | 29 | The Dhātupāṭha is traditionally given as a list of roots, each 30 | stated in upadeśa with a basic gloss. An example: 31 | 32 | 1.1 bhū sattāyām 33 | 34 | The first number indicates the root gaṇa, of which there are ten. 35 | This gaṇa determines the form that the root takes when followed by 36 | :term:`sārvadhātuka` affixes. The second number indicates the root's 37 | relative position within the gaṇa. 38 | 39 | Although few modern editions of the text have accent markings, the 40 | Sanskrit grammatical tradition has preserved the original accents 41 | all of the original items. Per the conventions of SLP1, these are 42 | written as follows: 43 | 44 | ======== ========= ========== ==== 45 | Accent SLP1 Devanagari IAST 46 | ======== ========= ========== ==== 47 | udātta (no mark) 48 | anudātta ``\\`` 49 | svarita ``^`` 50 | ======== ========= ========== ==== 51 | 52 | """ 53 | 54 | def __init__(self, filename=None): 55 | self.gana_map = {} 56 | 57 | #: List of all dhatu, one for each row in the original CSV file. 58 | self.all_dhatu = [] 59 | 60 | #: Maps a dhatu to its indices in `self.all_dhatu`. 61 | self.index_map = defaultdict(list) 62 | 63 | if filename is not None: 64 | self.init(filename) 65 | 66 | def __repr__(self): 67 | return '' % len(self.all_dhatu) 68 | 69 | def init(self, filename): 70 | """ 71 | :param filename: path to the Dhatupatha file 72 | """ 73 | with open(filename) as f: 74 | i = 0 75 | for line in f: 76 | if line.startswith('#') or not line.strip(): 77 | continue 78 | gana, number, dhatu = line.strip().split(',') 79 | self.all_dhatu.append(dhatu) 80 | self.index_map[dhatu].append(i) 81 | self.gana_map[i] = gana 82 | i += 1 83 | 84 | def dhatu_list(self, start, end=None): 85 | """Get an inclusive list of of dhatus. 86 | 87 | :param start: the first dhatu in the list 88 | :param end: the last dhatu in the list. If ``None``, add until 89 | the end of the gana. 90 | """ 91 | start_index = self.index_map[start][0] 92 | 93 | # From `start` to the end of the gana 94 | if end is None: 95 | gana = self.gana_map[start_index] 96 | returned = [] 97 | end_index = start_index 98 | while True: 99 | try: 100 | dhatu = self.all_dhatu[end_index] 101 | if self.gana_map[end_index] == gana: 102 | returned.append(dhatu) 103 | else: 104 | return returned 105 | except IndexError: 106 | return returned 107 | end_index += 1 108 | 109 | # From start to last instance of `end` (inclusive) 110 | else: 111 | end_index = self.index_map[end][-1] 112 | return self.all_dhatu[start_index:end_index + 1] 113 | 114 | def dhatu_set(self, *args): 115 | return frozenset(self.dhatu_list(*args)) 116 | 117 | 118 | #: A singleton instance available to all other modules. This has bad 119 | #: code smell, but I'm not compelled to change it. 120 | DHATUPATHA = Dhatupatha(DHATUPATHA_CSV) 121 | -------------------------------------------------------------------------------- /docs/defining_rules.rst: -------------------------------------------------------------------------------- 1 | Defining Rules 2 | ============== 3 | 4 | The machinery behind a given rule is often complex and complicated. But by 5 | abstracting away the right things, we can greatly reduce the code required 6 | per rule, often to just **one line** in length. 7 | 8 | Rule tuples 9 | ----------- 10 | 11 | A :term:`rule tuple` is a 5-tuple containing the following elements: 12 | 13 | 1. the rule name, e.g. ``'6.4.77'`` 14 | 2. the left context 15 | 3. the center context 16 | 4. the right context 17 | 5. the operator to apply 18 | 19 | These tuples contain the essential information needed to create a full rule, 20 | but they are often underspecified in various ways. Some examples: 21 | 22 | - A context can take the value ``True``, which means that the rule should use 23 | the context defined for the previous rule. 24 | - A context can take the value ``None``, which means that it uses the base 25 | filter (see :ref:`below `). 26 | - A context can be an arbitrary string. All contexts are post-processed with 27 | :func:`~vyakarana.filters.auto`, which converts them into actual 28 | :class:`~vyakarana.filters.Filter` objects. 29 | - An operator can be an arbitrary object, usually a string. The program 30 | usually does a good job of transforming these "operator strings" into actual 31 | :class:`~vyakarana.operators.Operator` objects. For example, if the operator 32 | is just ``'Nit'``, the program recognizes that this is an *it* and that the 33 | rule is assigning a *saṃjñā*. 34 | 35 | Rule tuples are usually contained in :class:`~vyakarana.templates.RuleTuple` 36 | objects, but most rules are just stated as tuples. 37 | 38 | Some example rule tuples, from throughout the program:: 39 | 40 | # Analogous extension of ṅit 41 | ('1.2.4', None, f('sarvadhatuka') & ~f('pit'), None, 'Nit'), 42 | 43 | # Adding vikaraṇa "śap" 44 | ('3.1.77', F.gana('tu\da~^'), None, None, k('Sa')), 45 | 46 | # Performing dvirvacana 47 | # do_dvirvacana is an unparameterized operator defined separately. 48 | ('6.1.8', None, ~f('abhyasta'), 'li~w', do_dvirvacana), 49 | 50 | # Vowel substitution 51 | # _6_4_77 is an unparameterized operator defined separately. 52 | ('6.4.77', None, snu_dhatu_yvor, None, _6_4_77), 53 | 54 | # Replacing 'jh' with 'a' 55 | ('7.1.3', None, None, None, O.replace('J', 'ant')), 56 | 57 | Those familiar with these rules will wonder why so much crucial information 58 | is missing (e.g. that the center context in 7.1.3 should be a *pratyaya*). 59 | This information is supplied in a special decorator, which we discuss now. 60 | 61 | .. _inherit: 62 | 63 | ``@inherit`` 64 | ------------ 65 | 66 | When an :class:`~vyakarana.ashtadhyayi.Ashtadhyayi` object is created, the 67 | system searches through all modules for functions decorated with the 68 | :func:`~vyakarana.rules.inherit` decorator. These functions create and return 69 | a list of rule tuples. An example:: 70 | 71 | @inherit(None, F.raw('Sap'), None) 72 | def sap_lopa(): 73 | return [ 74 | ('2.4.71', F.gana('a\da~'), None, None, 'lu~k'), 75 | ('2.4.74', F.gana('hu\\'), None, None, 'Slu~') 76 | ] 77 | 78 | :func:`~vyakarana.rules.inherit` takes at least 3 arguments, which correspond 79 | to the three contexts (left, center, and right). These arguments define 80 | :term:`base filters ` that are "and"-ed with all of the returned 81 | tuples. If the context in some rule tuple is ``None``, the system uses just 82 | the base filter. That is, the rules above will take the following form:: 83 | 84 | ('2.4.71', F.gana('a\da~'), F.raw('Sap'), None, 'lu~k'), 85 | ('2.4.74', F.gana('hu\\'), F.raw('Sap'), None, 'Slu~') 86 | 87 | Rule conditions 88 | --------------- 89 | 90 | The majority of the Ashtadhyayi's rules consists of some context window and an 91 | operator. But many rules are modified by some other term, such as *na* 92 | (blocking) or *vibhāṣā* (optionality). These terms are defined as subclasses 93 | of :class:`~vyakarana.templates.RuleTuple`:: 94 | 95 | # 'iṭ' augment denied 96 | Na('7.2.8', None, None, f('krt') & F.adi('vaS'), U('iw')), 97 | 98 | #: Denied in another context 99 | Ca('7.2.9', None, f('krt') & titutra, None, True), 100 | 101 | Converting tuples to rules 102 | -------------------------- 103 | 104 | To interpret a rule tuple, we need: 105 | 106 | - the tuple itself 107 | - the previous tuple 108 | - any base filters defined in the :func:`~vyakarana.rules.inherit` function. 109 | 110 | These are combined as described above. For details, see 111 | :func:`vyakarana.inference.create_rules`. 112 | -------------------------------------------------------------------------------- /docs/modeling_rules.rst: -------------------------------------------------------------------------------- 1 | Modeling Rules 2 | ============== 3 | 4 | As a reminder, this is how :ref:`ordinary rules ` are usually 5 | structured: 6 | 7 | - C is replaced by X (when L comes before C) (when C comes before R). 8 | - C is called X (when L comes before C) (when C comes before R). 9 | - X is inserted after L (when L comes before R). 10 | - C does not accept rule Y (when L comes before C) (when C comes before X). 11 | 12 | We can rewrite these templates into a more general form: 13 | 14 | When we see some context window W, perform some operation O. 15 | 16 | where *W* is an arbitrary set of contexts and *O* is an abstraction for some 17 | arbitrary change, such as: 18 | 19 | - replacing C with X 20 | - calling C by the name of X 21 | - inserting X after L 22 | - blocking rule Y on C 23 | 24 | With this general form in mind, we can decompose a rule model into two parts: 25 | 26 | - matching a context. To do so, we use :term:`filters `. 27 | - applying an operation. To do so, we use :term:`operators `. 28 | 29 | Or in other words: filters *test* and operators *transform*. 30 | 31 | Filters 32 | ------- 33 | 34 | A :class:`~vyakarana.filters.Filter` is a callable object that accepts a state 35 | and index, performs some test on ``state[index]``, and returns ``True`` or 36 | ``False`` as appropriate. For example, the :class:`~vyakarana.filters.samjna` 37 | filter returns whether or not ``state[index]`` has some particular samjna. 38 | 39 | If all of a rule's filters return ``True``, then the rule has scope to apply. 40 | 41 | In older version of the code base, filters were functions that accepted an 42 | :class:`~vyakarana.upadesha.Upadesha` and returned ``True`` or ``False``. This 43 | approach changed for two reasons: 44 | 45 | - A few filters require global access to the state. If they accept just a 46 | single `term`, there`s no way to get information on the rest of the state. 47 | So filters were changed to accept state-index pairs. 48 | - Usually, a rule`s filter is a combination of two other filters. One nice 49 | way to do this is to use Python's unary operators (e.g. ``&``, ``|``). But 50 | custom operators are supported only for class instances. So filters were 51 | changed to class instances. 52 | 53 | Parameterized filters 54 | ^^^^^^^^^^^^^^^^^^^^^ 55 | 56 | *Parameterized filters* group filters into families and make it easier to 57 | create a lot of related filters. Specifically, they are classes that can be 58 | instantiated (parameterized) by passing arguments. 59 | 60 | For example, the :class:`~vyakarana.filters.al` class tests whether a term 61 | has a particular final letter:: 62 | 63 | ac = al('ac') 64 | ak = al('ak') 65 | hal = al('hal') 66 | 67 | .. note:: 68 | Parameterized filters have lowercase names for historical reasons. Also, 69 | they better match the names for unparameterized filters, e.g. 70 | ``al('i') & ~samyogapurva``. 71 | 72 | Combining filters 73 | ^^^^^^^^^^^^^^^^^ 74 | 75 | We can create new filters by using Python's unary operators. 76 | 77 | We can invert a filter ("not"):: 78 | 79 | # ekac: having one vowel 80 | anekac = ~ekac 81 | 82 | take the intersection of two filters ("and"):: 83 | 84 | # samyoga: ending in a conjunct consonant 85 | # samjna('dhatu'): having 'dhatu' samjna 86 | samyoga_dhatu = samyoga & samjna('dhatu') 87 | 88 | and take the union of two filters ("or"):: 89 | 90 | # raw('Snu'): raw value is the 'nu' of e.g. 'sunute', 'Apnuvanti' 91 | # samjna('dhatu'): having 'dhatu' samjna 92 | # raw('BrU'): raw value is 'BrU' 93 | snu_dhatu_bhru = raw('Snu') | samjna('dhatu') | raw('BrU') 94 | 95 | Operators 96 | --------- 97 | 98 | An :class:`~vyakarana.operators.Operator` is a callable object that accepts a 99 | state and index, performs some operation, and returns the result. For example, 100 | the :class:`~vyakarana.operators.guna` operator applies guna to 101 | ``state[index]`` and returns a new state. 102 | 103 | Parameterized operators 104 | ^^^^^^^^^^^^^^^^^^^^^^^ 105 | 106 | *Parameterized operators* group operators into families and make it easier to 107 | create a lot of related operators. Specifically, they are classes that can be 108 | instantiated (parameterized) by passing arguments. 109 | 110 | For example, the :class:`~vyakarana.operators.al_tasya` class does arbitrary 111 | letter substitution:: 112 | 113 | # ku h: k, kh, g, gh, ṅ, h 114 | # cu: c, ch, j, jh, ñ 115 | kuhos_cu = al_tasya('ku h', 'cu') 116 | 117 | # f: ṛ, ṝ 118 | # at: a 119 | ur_at = al_tasya('f', 'at') 120 | 121 | .. note:: 122 | Parameterized operators have lowercase names for historical reasons. 123 | Also, they better match the names for unparameterized operators. 124 | -------------------------------------------------------------------------------- /docs/rule_types.rst: -------------------------------------------------------------------------------- 1 | Rule Types 2 | ========== 3 | 4 | The Ashtadhyayi has **ordinary rules**, which take some input and yield some 5 | output(s), and metarules, which describe how to interpret other rules. 6 | 7 | .. note:: 8 | The types loosely correspond to the traditional classification, but there 9 | is no 1:1 mapping. 10 | 11 | .. _ordinary-rules: 12 | 13 | Ordinary rules 14 | -------------- 15 | 16 | Ordinary rules, or just "rules" for short, are the bulk of the Ashtadhyayi. 17 | These rules accept a list of terms as input, where a **term** is some group 18 | of sounds. For example, the input to a rule might be something like 19 | *ca + kṛ + a*. Outputs have the same form. 20 | 21 | There are various kinds of ordinary rules; 22 | 23 | - rules that substitute 24 | - rules that designate 25 | - rules that insert 26 | - rules that block 27 | 28 | These are described below. 29 | 30 | .. _iko-yan-aci: 31 | 32 | Substituting 33 | ^^^^^^^^^^^^ 34 | 35 | Most rules **substitute** one term for another. They look something like this: 36 | 37 | C is replaced by X (when L comes before C) (when C comes before R). 38 | 39 | Here, *L*, *C*, *R*, and *X* are terms: 40 | 41 | - *L* is the **left context** and appears immediately before *C*. Not all 42 | rules use it. 43 | - *R* is the **right context** and appears immediately after *C*. Not all 44 | rules use it. 45 | - *C* is the **center context**. It defines where the substitution occurs. 46 | - *X* is the **replacement**. It defines the new value for *C*. 47 | 48 | For each input, we look for a place where we have *L*, *C*, and *R* in order. 49 | Then we replace *C* with X. 50 | 51 | For example, rule 6.1.77 of the Ashtadhyayi states that simple vowels (or 52 | *ik*, if we use a *pratyāhāra*) are replaced by semivowels (*yaṇ*) when 53 | followed by other vowels (*ac*). Given this input: 54 | 55 | *ca + kṛ + a* 56 | 57 | we have a match when *C = ṛ* and *R = a*. (*L* is unspecified, so we ignore 58 | it.) We replace with *X = r* to get our output: 59 | 60 | *ca + kṛ + a → ca + kr + a* 61 | 62 | Designating 63 | ^^^^^^^^^^^ 64 | 65 | Some rules **designate** a term by assigning some name to it. They look 66 | something like this: 67 | 68 | C is called X (when L comes before C) (when C comes before R). 69 | 70 | where *X* is the name given to the center context *C*. 71 | 72 | For example, rule 1.3.1 states that items in the Dhatupatha are called 73 | :term:`dhātu` ("root") Given this input: 74 | 75 | *bhū* 76 | 77 | we have a match where *C = bhū*, with *L* and *R* unspecified. We then give 78 | *bhū* the name "dhātu." In other words, *bhū* is a *dhātu*. 79 | 80 | Inserting 81 | ^^^^^^^^^ 82 | 83 | Of the rules left, most **insert**: 84 | 85 | X is inserted after L (when L comes before R). 86 | 87 | For example, rule 3.1.68 states that *a* is inserted after a verb root when 88 | the root is followed by a certain kind of verb ending. Given this input: 89 | 90 | *car + ti* 91 | 92 | we have a match where *L = car* and *R = ti*. So, we insert *X = a* to get 93 | our output: 94 | 95 | *car + ti → car + a + ti* 96 | 97 | Blocking 98 | ^^^^^^^^ 99 | 100 | Some rules are used to *block* other rules from occurring: 101 | 102 | C does not accept rule X (when L comes before C) (when C comes before R). 103 | 104 | For example, rule 1.1.5 blocks *guṇa* substitution if the right context has 105 | a certain property. 106 | 107 | Other rules 108 | ^^^^^^^^^^^ 109 | 110 | A few rules are combinations of the ones above. For example, rule 3.1.80 111 | inserts one term then performs a substitution on another. 112 | 113 | Metarules 114 | --------- 115 | 116 | Metarules define the metalanguage used by the Ashtadhyayi. Since we're using 117 | our own metalanguage (Python), many of these metarules are modeled implicitly. 118 | 119 | There are basically two kinds of metarules: 120 | 121 | - rules that help us interpret other rules 122 | - rules that provide useful context for other rules 123 | 124 | These are described below. 125 | 126 | Interpreting 127 | ^^^^^^^^^^^^ 128 | 129 | Most metarules are intended to help us understand what rules in the 130 | Ashtadhyayi mean. Such rules are called **paribhāṣā**. Some examples: 131 | 132 | | Terms in case 6 define the center context. (1.1.49) 133 | | Terms in case 7 (*tasmin*) define the right context. (1.1.66) 134 | | Terms in case 5 (*tasmāt*) define the left context. (1.1.67) 135 | | If *X* is just a single letter, then only the last letter of *C* is 136 | replaced. (1.1.52) 137 | 138 | Contextualizing 139 | ^^^^^^^^^^^^^^^ 140 | 141 | All other metarules provide some extra context for other rules. Such rules 142 | are called **adhikāra**. Some examples: 143 | 144 | | In the rules below, all inserted terms are called *pratyaya*. (3.1.1) 145 | | In the rules below, *L* and *R* together are replaced by *X*. (6.1.84) 146 | -------------------------------------------------------------------------------- /docs/terms.rst: -------------------------------------------------------------------------------- 1 | Terms and Data 2 | ============== 3 | 4 | The rules of the Ashtadhyayi accept a list of **terms** as input and produce 5 | a new list of terms as output. Let's start by discussing what terms are and 6 | what information they contain. 7 | 8 | Throughout this section, our working example will be *ca + kṛ + a*, a sequence 9 | of three terms. Depending on the data attached to these terms, this sequence 10 | can yield a variety of outputs: 11 | 12 | - *cakāra* ("he/I did", perfect tense) 13 | - *cakara* ("I did", perfect tense) 14 | - *cakra* ("he did", perfect tense) 15 | 16 | 17 | Sounds 18 | ------ 19 | 20 | Our example has three terms, each of which represents a piece of sound. 21 | These "pieces of sound" usually represent morphemes, but that's not always 22 | the case. 23 | 24 | We'll have more to say about these sounds later, but for now they're 25 | pretty straightforward. 26 | 27 | 28 | *Saṃjñā* 29 | -------- 30 | 31 | Each term has a variety of designations (**saṃjñā**) associated with it. 32 | These *saṃjñā*, which are assigned by the Ashtadhyayi itself, enable 33 | some rules and block others. By assigning names to different terms and 34 | changing which rules can be used, the system can guide the original 35 | input toward the desired output. 36 | 37 | Our example uses the following *saṃjñā*: 38 | 39 | =============== ============= ==================== 40 | ca kṛ a 41 | =============== ============= ==================== 42 | :term:`abhyāsa` :term:`dhātu` :term:`pratyaya` 43 | _ _ :term:`vibhakti` 44 | _ _ :term:`tiṅ` 45 | _ _ :term:`ārdhadhātuka` 46 | =============== ============= ==================== 47 | 48 | In addition, *ca + kṛ* together are called both :term:`abhyasta` and 49 | :term:`aṅga`. 50 | 51 | Some examples of what these *saṃjñā* do: 52 | 53 | - *dhātu* allows the rule that creates the *abhyāsa*. 54 | - *abhyāsa* allows a rule that changes *ka* to *ca*. 55 | - *ārdhadhātuka* allows a rule that strengthens the vowel of the term before it. 56 | 57 | *it* tags 58 | --------- 59 | 60 | Terms also use a second set of designations, which we can call **it** tags. 61 | Just a shirt might have a label that tells us how to wash it, a term might 62 | have an *it* that tells us how it behaves in certain contexts. 63 | 64 | For example, *kṛ* has two *it* tags. The first is *ḍu*, and it allows *kṛ* to 65 | take a certain suffix. The second is *ñ*, and it allows *kṛ* to use both 66 | :term:`parasmaipada` and :term:`ātmanepada` endings in its verbs. *it* tags 67 | are attached directly to the term of interest, like so: 68 | 69 | *ḍukṛñ* 70 | 71 | We can remove *it* tags by applying some metarules. For some term T, the 72 | following are *it* tags: 73 | 74 | - nasal vowels (1.3.2) 75 | - at the end of T: 76 | 77 | - consonants (1.3.3) 78 | - but not {*t, th, d, dh, n, s, m*} when T is a :term:`vibhakti` (1.3.4) 79 | 80 | - at the beginning of T: 81 | 82 | - *ñi*, *ṭu*, and *ḍu* (1.3.5) 83 | 84 | - at the beginning of T, if T is a :term:`pratyaya`: 85 | 86 | - *ṣ* (1.3.6) 87 | - *c, ch, j, jh, ñ, ṭ, ṭh, ḍ, ḍh, ṇ* (1.3.7) 88 | - *l, ś, k, kh, g, gh, ṅ* if not a *taddhita* suffix 89 | 90 | *it* tags are not letters in any meaningful sense, and they have no meaning 91 | outside of the metalanguage of the Ashtadhyayi. In other words, all they do 92 | is describe certain properties; they have no deeper linguistic meaning and are 93 | not a fundamental part of Sanskrit. So if you see a term like *ḍukṛñ*, you 94 | should read it as: 95 | 96 | *kṛ* with the *it* tags *ḍu* and *ñ*. 97 | 98 | The *it* tags are often stated with the word *it* after them. Thus *ḍvit* and 99 | *ñit*. A term stated with its *it* letters is called the **upadeśa** of the 100 | term. Thus *ḍukṛñ* is the **upadeśa** of the root *kṛ*. 101 | 102 | Usage 103 | ^^^^^ 104 | 105 | *it* tags are basically just *saṃjñā* that are expressed more tersely. 106 | 107 | To illustrate how alike these two are, let's return to our *ca + kṛ + a* 108 | example. We saw above that this sequence can yield three different results. 109 | But the result depends on the *saṃjñā* and *it* tags applied to the suffix *a*. 110 | As you read on, note how the different *saṃjñā* and *it* tags interact. 111 | 112 | - If the *upadeśa* is just *a*, then rule 1.2.5 tags the suffix with *kit*. 113 | This prevents :term:`guṇa`. After a few more rules, we get *cakra* for our 114 | result. 115 | - If the *upadeśa* is *ṇal*, the suffix has *ṇit*, which causes :term:`vṛddhi`. 116 | After a few more rules, we get *cakāra* for our result. 117 | - If the *upadeśa* is *ṇal*, the suffix has *ṇit*. But if the suffix has 118 | *uttama* as a *saṃjñā* -- that is, if it is in the first person -- then *ṇit* 119 | is used only optionally. If we reject *ṇit*, then the *ārdhadhātuka-saṃjñā* 120 | causes :term:`guṇa`. After a few more rules, we get *cakara* for our result. 121 | 122 | The :ref:`glossary ` describes the most common *it* tags and some 123 | of the roles they perform. Many *it* tags are overloaded to provide a variety 124 | of different functions. 125 | -------------------------------------------------------------------------------- /docs/glossary.rst: -------------------------------------------------------------------------------- 1 | Glossary 2 | ======== 3 | 4 | Sanskrit 5 | -------- 6 | 7 | Generally, these are used to describe concepts from the grammatical tradition. 8 | 9 | .. glossary:: 10 | aṅga 11 | _ 12 | 13 | anubandha 14 | See :term:`it`. 15 | 16 | abhyāsa 17 | If a term is doubled, *abhyāsa* refers to the first part. 18 | 19 | abhyasta 20 | If a term is doubled, *abhyasta* refers to the two parts together. 21 | 22 | ātmanepada 23 | The last 9 tiṅ suffixes. 24 | 25 | ārdhadhātuka 26 | Refers to certain kinds of verb suffixes. 27 | 28 | Aṣṭādhyāyī 29 | Ashtadhyayi 30 | A list of rules. It takes some input and produces one or more valid 31 | Sanskrit expressions. 32 | 33 | it 34 | An indicatory letter. 35 | 36 | upadeśa 37 | A term stated with its indicatory letters (:term:`it`). 38 | 39 | guṇa 40 | An operation that strengthens a vowel to the "medium" level 41 | (*a, e, o*, but *ṛ* and *ṝ* become *ar*). Also refers to the result 42 | of this operation. 43 | 44 | vṛddhi 45 | An operation that strengthens a vowel to the "strong" level 46 | (*ā, ai, au*, but *ṛ* and *ṝ* become *ār*). Also refers to the result 47 | of this operation. 48 | 49 | tiṅ 50 | Refers to one of the 18 basic verb suffixes: 9 in :term:`parasmaipada` 51 | and 9 in :term:`ātmanepada`. 52 | 53 | dhātu 54 | A verb root. 55 | 56 | Dhātupāṭha 57 | Dhatupatha 58 | A list of verb roots. These roots are used as input to the Ashtadhyayi. 59 | 60 | parasmaipada 61 | The first 9 tiṅ suffixes. 62 | 63 | pratyaya 64 | A suffix. 65 | 66 | vibhakti 67 | A triplet of noun/verb endings. Also, an ending within that triplet. 68 | 69 | saṃjñā 70 | A technical name that is assigned to a group of terms. For 71 | example, *pratyaya* is a *saṃjñā* for the set of all suffixes. 72 | 73 | sārvadhātuka 74 | Refers to certain kinds of verb suffixes. Generally, :term:`tiṅ` and 75 | :term:`śit` suffixes receive this saṃjñā. 76 | 77 | sthānī 78 | In a substitution, the term where the substitution occurs. 79 | 80 | 81 | English 82 | ------- 83 | 84 | Generally, these are used to describe concepts in the program. 85 | 86 | .. glossary:: 87 | 88 | base filter 89 | A filter defined in an :func:`~vyakarana.rules.inherit` decorator. 90 | It is "and"-ed with all of the rule tuples created by the decorated 91 | function. 92 | 93 | center context 94 | The term that undergoes substitution. In a *saṃjñā* rule: the term 95 | that receives the *saṃjñā*. 96 | 97 | filter 98 | A callable object that is used to test for a certain context. For 99 | details, see the :class:`~vyakarana.filters.Filter` class. 100 | 101 | left context 102 | The term(s) that appear immediately before the center context. If no 103 | center context is defined: the term(s) after which something is 104 | inserted. 105 | 106 | metarule 107 | A rule that defines part of the metalanguage of the Ashtadhyayi. Some 108 | are explicitly stated, but many are implicit. 109 | 110 | operator 111 | A callable object that is used to apply an operation to a state. For 112 | details, see the :class:`~vyakarana.operators.Operator` class. 113 | 114 | ordinary rule 115 | A rule that takes some input and produces some output(s). In this 116 | documentation, such rules are usually just called "rules." 117 | 118 | right context 119 | The term(s) that appear immediately after the center context. If no 120 | center context is defined: the term(s) before which something is 121 | inserted. 122 | 123 | rule tuple 124 | A special shorthand for specifying rules of the Ashtadhyayi. This must 125 | be expanded into a full :class:`~vyakarana.rules.Rule` definition 126 | before it can be used. 127 | 128 | .. _it-glossary: 129 | 130 | *it* tags 131 | --------- 132 | 133 | .. glossary:: 134 | kit 135 | Prevents *guṇa* and *vṛddhi*. If a replacement is marked with *k*, it 136 | is added to the end of the :term:`sthānī`. 137 | 138 | ṅit 139 | Prevents *guṇa* and *vṛddhi*. If a replacement is marked with *ṅ*, it 140 | replaces the last letter of the *sthānī*. 141 | 142 | ñit 143 | Causes *vṛddhi* for certain vowels. 144 | 145 | ṭit 146 | If a replacement is marked with *ṭ*, it is added to the beginning of 147 | the *sthānī*. If a *lakāra* is marked with *ṭ*, then it undergoes 148 | some basic rules, e.g. replacement of *thās* with *se*. 149 | 150 | ṇit 151 | Causes *vṛddhi* for certain vowels. 152 | 153 | pit 154 | Causes *anudātta* accent on a :term:`pratyaya`. A :term:`sārvadhātuka` 155 | suffix not marked by *p* is treated as :term:`ṅit`. 156 | 157 | mit 158 | If a replacement is marked with *m*, it is inserted after the last 159 | vowel of the *sthānī*. 160 | 161 | śit 162 | If a replacement is marked with *ś*, it replaces the entire *sthānī*. 163 | Generally, a :term:`pratyaya` marked with *ś* can be called 164 | :term:`sārvadhātuka`. 165 | -------------------------------------------------------------------------------- /vyakarana/trees.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.trees 4 | ~~~~~~~~~~~~~~~ 5 | 6 | Functions for reading and interpreting the rules of the Ashtadhayayi. 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | import itertools 12 | from collections import defaultdict 13 | 14 | from templates import * 15 | 16 | 17 | def find_apavada_rules(rules): 18 | """Find all utsarga-apavāda relationships in the given rules. 19 | 20 | :param rules: a list of rules 21 | :returns: a `dict` that maps a rule to its apavāda rules. 22 | """ 23 | apavadas = defaultdict(set) 24 | for i, rule in enumerate(rules): 25 | # 'na' negates an operator, so we can just match on operators. 26 | if rule.modifier == Na: 27 | for other in rules: 28 | if (rule.operator == other.operator and rule != other): 29 | apavadas[other].add(rule) 30 | else: 31 | # For a śeṣa rule, an apavāda comes before the rule: 32 | if rule.modifier == Shesha: 33 | rule_slice = itertools.islice(rules, 0, i) 34 | # But generally, an apavāda comes after the rule: 35 | else: 36 | rule_slice = itertools.islice(rules, i, None) 37 | 38 | new_apavadas = (r for r in rule_slice if rule.has_apavada(r)) 39 | apavadas[rule].update(new_apavadas) 40 | 41 | return apavadas 42 | 43 | 44 | class RuleTree(object): 45 | 46 | """A hierarchical arrangment of rules. 47 | 48 | There are roughly 4000 rules in the Ashtadhyayi, almost all of which 49 | define operations on some input sequence. Since any of these rules 50 | could apply at any given moment, we must check all R rules against 51 | each state. And since a rule could apply to any of the T terms within 52 | a state, we must check against all terms as well. This leaves us with 53 | RT candidates for each state. 54 | 55 | By arranging rules hierarchically, we greatly reduce the number of 56 | comparisons we have to make. Rule selection becomes roughly log(RT). 57 | """ 58 | 59 | def __init__(self, rules, ranker=None, used_features=None): 60 | # HACK 61 | if ranker is not None: 62 | self.ranked_rules = sorted(rules, key=ranker, reverse=True) 63 | apavadas = find_apavada_rules(rules) 64 | for rule, values in apavadas.iteritems(): 65 | rule.apavada = values 66 | for a in values: 67 | a.utsarga.append(rule) 68 | 69 | #: A list of rules that could not be subdivided any further. 70 | #: This is usually because the rule is unspecified in some way. 71 | self.rules = [] 72 | #: Maps from features to :class:`RuleTree` subtrees. 73 | self.features = {} 74 | used_features = used_features or frozenset() 75 | 76 | # Maps a feature tuple to a list of rules 77 | feature_map = defaultdict(list) 78 | for rule in rules: 79 | appended = False 80 | for feat in rule.features(): 81 | if feat not in used_features: 82 | feature_map[feat].append(rule) 83 | appended = True 84 | 85 | # No special features: just append to our rule list. 86 | if not appended: 87 | self.rules.append(rule) 88 | 89 | # Sort from most general to most specific. 90 | buckets = sorted(feature_map.iteritems(), key=lambda p: -len(p[1])) 91 | 92 | seen = set() 93 | for feat, rule_list in buckets: 94 | unseen = [r for r in rule_list if r not in seen] 95 | if not unseen: 96 | continue 97 | subtree = RuleTree(rules=unseen, 98 | used_features=used_features | set([feat])) 99 | self.features[feat] = subtree 100 | seen.update(rule_list) 101 | 102 | def __len__(self): 103 | """The number of rules in the tree.""" 104 | self_len = len(self.rules) 105 | return self_len + sum(len(v) for k, v in self.features.iteritems()) 106 | 107 | def candidates(self, state): 108 | """Generate all rule-index pairs that could apply to the state. 109 | 110 | :param state: the current state 111 | """ 112 | state_indices = range(len(state)) 113 | candidates = [self.select(state, i) for i in state_indices] 114 | 115 | for i, ra in enumerate(self.ranked_rules): 116 | for ia in state_indices: 117 | if ra in candidates[ia]: 118 | yield ra, ia 119 | 120 | def pprint(self, depth=0): 121 | """Pretty-print the tree.""" 122 | if self.rules: 123 | rule_token = [x.name for x in self.rules] 124 | print ' ' * depth, '[%s] %s' % (len(self.rules), rule_token) 125 | for feature, tree in self.features.iteritems(): 126 | print ' ' * depth, '[%s]' % len(tree), feature 127 | tree.pprint(depth + 1) 128 | 129 | def select(self, state, index): 130 | """Return a set of rules that might be applicable. 131 | 132 | :param state: the current :class:`State` 133 | :param index: the current index 134 | """ 135 | selection = set(self.rules) 136 | 137 | for feature, tree in self.features.iteritems(): 138 | filt, i = feature 139 | j = index + i 140 | if j >= 0 and filt.allows(state, j): 141 | selection.update(tree.select(state, index)) 142 | 143 | return selection 144 | -------------------------------------------------------------------------------- /vyakarana/siddha.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.siddha 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | Rules in the asiddha and asiddhavat sections of the Ashtadhyayi. 7 | 8 | :license: MIT and BSD 9 | """ 10 | from sounds import Sounds, Sound, Pratyahara 11 | from terms import Upadesha 12 | from util import SoundEditor, SoundIndex 13 | 14 | 15 | def asiddha_helper(state): 16 | """Chapter 8.2 of the Ashtadhyayi starts the 'asiddha' section of 17 | the text: 18 | 19 | 8.2.1 pUrvatrAsiddham 20 | 21 | The asiddha section lasts until the end of the text, and for that 22 | reason, it is often called the tripAdI ("having three pAdas"). 23 | 24 | The rules in the tripAdI are treated as not having taken effect 25 | ('asiddha') as far as the prior rules are concerned. This is an 26 | abstract notion, but practically it means that these are the last 27 | rules we apply in a derivation. 28 | 29 | :param state: 30 | """ 31 | 32 | had_rs = False 33 | 34 | editor = SoundEditor(state) 35 | for c in editor: 36 | p = c.prev 37 | n = c.next 38 | n2 = n.next 39 | 40 | w, x, y, z = (p.value, c.value, n.value, n2.value) 41 | 42 | # 8.2.29 skoH saMyogAdyor ante ca 43 | # TODO: pada end 44 | if x in 'sk' and y in Sounds('hal') and z in Sounds('Jal'): 45 | x = '_' 46 | 47 | if y in Sounds('Jal'): 48 | # 8.2.30 coH kuH 49 | cu = Sounds('cu') 50 | if x in cu and y in Sounds('Jal') and y not in cu: 51 | x = Sound(x).closest(Sounds('ku')) 52 | 53 | # 8.2.31 ho DhaH 54 | elif x == 'h': 55 | x = 'Q' 56 | 57 | # 8.2.36 vrazca-bhrasja-sRja-mRja-yaja-rAja-bhrAjacCazAM SaH 58 | roots = {'vraSc', 'Brasj', 'sfj', 'mfj', 'yaj', 'rAj', 'BrAj'} 59 | if c.last and (c.term.value in roots or c.term.antya in 'SC'): 60 | x = 'z' 61 | 62 | # 8.2.40 (TODO: not dhA) 63 | if w in Sounds('Jaz') and x in 'tT': 64 | x = 'D' 65 | elif x == 'D' and y in 'tT': 66 | continue 67 | 68 | # 8.2.41 SaDhoH kaH si 69 | elif x in 'zQ' and y == 's': 70 | x = 'k' 71 | 72 | # 8.2.41 SaDhoH kaH si 73 | if x in 'zQ' and y == 's': 74 | x = 'k' 75 | 76 | # 8.3.23 mo 'nusvAraH 77 | # elif x == 'm' and y in Sounds('hal'): 78 | # x = 'M' 79 | 80 | # 8.3.24 naz cApadAntasya jhali 81 | elif x in 'mn' and y in Sounds('Jal'): 82 | x = 'M' 83 | 84 | # 8.3.59 AdezapratyayayoH 85 | if w in Sounds('iN ku'): 86 | if not c.last and x == 's' and (c.term.raw[0] == 'z' 87 | or 'pratyaya' in c.term.samjna): 88 | x = 'z' 89 | 90 | # 8.3.78 iNaH SIdhvaMluGliTAM dho 'GgAt 91 | # 8.3.79 vibhASeTaH 92 | # TODO: SIdhvam, luG 93 | if (x == 'D' 94 | and w in Pratyahara('iR', second_R=True) 95 | and c.first # not triggered by iT 96 | and 'li~w' in c.term.lakshana): 97 | x = 'Q' 98 | 99 | # 8.4.1 raSAbhyAM no NaH samAnapade 100 | # 8.4.2 aTkupvAGnuMvyavAye 'pi 101 | # According to commentary, 8.4.1 also applies to 'f' and 'F'. 102 | # TODO: AG, num 103 | if x in 'rzfF': 104 | had_rs = True 105 | elif x == 'n' and had_rs and p.term.value != 'kzuB': 106 | x = 'R' 107 | had_rs = False 108 | elif x not in Sounds('aw ku pu'): 109 | had_rs = False 110 | 111 | stu = Sounds('s tu') 112 | if x in stu: 113 | 114 | # 8.4.40 stoH zcunA zcuH 115 | # 8.4.44 zAt (na) 116 | scu = Sounds('S cu') 117 | if w == 'S': 118 | pass 119 | elif w in scu or y in scu: 120 | x = Sound(x).closest(scu) 121 | 122 | # 8.4.41 STunA STuH 123 | zwu = Sounds('z wu') 124 | if w in zwu or y in zwu: 125 | x = Sound(x).closest(zwu) 126 | 127 | if x in Sounds('Jal'): 128 | x_ = x 129 | 130 | # 8.4.53 jhalAM jaz jhazi 131 | if y in Sounds('JaS'): 132 | x = Sound(x_).closest(Sounds('jaS')) 133 | 134 | # 8.4.54 abhyAse car ca 135 | if 'abhyasa' in c.term.samjna and c.first: 136 | x = Sound(x_).closest(Sounds('car jaS')) 137 | 138 | # 8.4.55 khari ca 139 | if y in Sounds('Kar'): 140 | x = Sound(x_).closest(Sounds('car')) 141 | 142 | # 8.4.58 anusvArasya yayi parasavarNaH 143 | if x == 'M' and y in Sounds('yay'): 144 | x = Sound(x).closest(Sound(y).savarna_set) 145 | 146 | c.value = x if x != '_' else '' 147 | 148 | yield editor.join() 149 | 150 | 151 | def asiddhavat(state): 152 | """ 153 | The 'asiddhavat' section of the text starts in 6.4 and lasts until 154 | the end of the chapter: 155 | 156 | 6.4.22 asiddhavad atrAbhAt 157 | 158 | :param state: 159 | """ 160 | 161 | 162 | def asiddha(state): 163 | state_value, result_value = (''.join(x.asiddha for x in state), None) 164 | for result in asiddha_helper(state): 165 | result_value = ''.join(x.asiddha for x in result) 166 | if result_value == state_value: 167 | yield state 168 | return 169 | else: 170 | for x in asiddha(result): 171 | yield x 172 | return 173 | -------------------------------------------------------------------------------- /vyakarana/adhyaya6/pada4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.adhyaya6.pada4 4 | ~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | Ashtadhyayi 6.4 7 | 8 | The chapter starts with an adhikāra aṅga: 9 | 10 | 6.4.1 aṅgasya 11 | 12 | which lasts until the end of 7.4. 13 | 14 | Some of the rules contained in this section apply in filters where 15 | only a dhātu would make sense. But since a dhātu is a type of aṅga, 16 | there's no harm in matching on an aṅga generally. 17 | 18 | :license: MIT and BSD 19 | """ 20 | 21 | from .. import filters as F, operators as O 22 | from ..sounds import Sounds 23 | from ..templates import * 24 | from ..terms import Upadesha as U 25 | 26 | f = F.auto 27 | 28 | 29 | @O.DataOperator.no_params 30 | def shnam_lopa(value): 31 | ac = Sounds('ac') 32 | nasal = Sounds('Yam') 33 | letters = list(reversed(value)) 34 | for i, L in enumerate(letters): 35 | if L in ac: 36 | break 37 | if L in nasal: 38 | letters[i] = '' 39 | break 40 | return ''.join(reversed(letters)) 41 | 42 | 43 | @O.DataOperator.no_params 44 | def bhrasjo_ram(value, **kw): 45 | return 'Barj' 46 | 47 | 48 | @O.Operator.no_params 49 | def iyan_uvan(state, index, locus): 50 | iyan = O.tasya(U('iya~N')) 51 | uvan = O.tasya(U('uva~N')) 52 | 53 | cur = state[index] 54 | if cur.antya in 'iI': 55 | return iyan.apply(state, index, locus) 56 | else: 57 | return uvan.apply(state, index, locus) 58 | 59 | iyan_uvan.category = 'tasya' 60 | 61 | GAMA_HANA_JANA = f('ga\\mx~', 'ha\\na~', 'janI~\\', 'Kanu~^', 'Gasx~') 62 | 63 | snu_dhatu_yvor = f('Snu', 'dhatu', 'BrU') & F.al('i u') 64 | 65 | # TODO: anekac 66 | anekac_asamyogapurva = f('dhatu') & ~F.samyogapurva 67 | 68 | 69 | @O.DataOperator.no_params 70 | def allopa(value): 71 | letters = list(reversed(value)) 72 | for i, L in enumerate(letters): 73 | if L == 'a': 74 | letters[i] = '' 75 | break 76 | 77 | return ''.join(reversed(letters)) 78 | 79 | 80 | @F.Filter.no_params 81 | def at_ekahalmadhya_anadeshadi(state, index): 82 | try: 83 | abhyasa = state[index - 1] 84 | anga = state[index] 85 | a, b, c = anga.value 86 | hal = Sounds('hal') 87 | # Anga has the pattern CVC, where C is a consonant and V 88 | # is a vowel. 89 | eka_hal_madhya = a in hal and b == 'a' and c in hal 90 | # Abhyasa and anga have the same initial letter. I'm not 91 | # sure how to account for 8.4.54 in the normal way, so as 92 | # a hack, I check for the consonants that 8.4.54 would 93 | # modify. 94 | _8_4_54 = anga.adi not in Sounds('Jaz') 95 | anadeshadi = abhyasa.adi == anga.adi and _8_4_54 96 | 97 | return eka_hal_madhya and anadeshadi 98 | except (IndexError, ValueError): 99 | return False 100 | 101 | 102 | @O.Operator.no_params 103 | def et_abhyasa_lopa(state, i, locus): 104 | abhyasa = state[i - 1].set_asiddhavat('') 105 | ed_adesha = O.replace('a', 'e') 106 | 107 | abhyasta = state[i] 108 | abhyasta_value = ed_adesha.body(abhyasta.value) 109 | abhyasta = abhyasta.set_asiddhavat(abhyasta_value) 110 | return state.swap(i - 1, abhyasa).swap(i, abhyasta) 111 | 112 | 113 | RULES = [ 114 | 115 | # asiddhavat (6.4.22 - 6.4.175) 116 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 117 | # The effects of an asiddhavat rule are hidden from other 118 | # asiddhavat rules. 119 | Anuvrtti(None, 'anga', None, locus='asiddhavat'), 120 | ('6.4.23', None, F.part('Snam'), None, shnam_lopa), 121 | ('6.4.24', 122 | None, ~F.samjna('idit') & F.al( 123 | 'hal') & F.upadha('Yam'), f('kit', 'Nit'), 124 | O.upadha('')), 125 | 126 | Anuvrtti(None, 'anga', 'ardhadhatuka', locus='asiddhavat'), 127 | Anyatarasyam('6.4.47', None, 'Bra\sja~^', None, bhrasjo_ram), 128 | # ('6.4.48', None, 'a', None, F.lopa), 129 | # ('6.4.49', 'hal', F.antya('ya'), None, None), 130 | # Vibhasha('6.4.50', True, F.antya('kya'), None, None), 131 | ('6.4.64', None, 'At', (F.adi('ac') & F.knit) | F.part('iw'), ''), 132 | 133 | Anuvrtti(None, 'anga', F.adi('ac'), locus='asiddhavat'), 134 | ('6.4.77', None, snu_dhatu_yvor, None, iyan_uvan), 135 | ('6.4.78', None, 'abhyasa', F.asavarna, True), 136 | ('6.4.79', None, 'strI', None, True), 137 | Va('6.4.80', None, True, f('am', 'Sas'), True), 138 | ('6.4.81', None, 'i\R', None, Sounds('yaR')), 139 | ('6.4.82', None, F.al('i') & anekac_asamyogapurva, None, True), 140 | ('6.4.83', None, F.al('u') & anekac_asamyogapurva, 'sup', True), 141 | # TODO: Snu 142 | ('6.4.87', None, 'hu\\', 'sarvadhatuka', True), 143 | ('6.4.88', None, 'BU', f('lu~N', 'li~w'), U('vu~k')), 144 | ('6.4.89', None, F.value('goh'), None, O.upadha('U')), 145 | ('6.4.98', None, GAMA_HANA_JANA, F.knit & ~F.raw('aN'), O.upadha('')), 146 | 147 | Anuvrtti(None, 'anga', F.knit & f('sarvadhatuka'), locus='asiddhavat'), 148 | ('6.4.111', None, F.part('Snam'), None, allopa), 149 | ('6.4.112', None, f('SnA') & F.al('At'), None, ''), 150 | ('6.4.113', None, True, F.adi('hal'), 'I'), 151 | 152 | Anuvrtti('abhyasa', 'anga', 'li~w', locus='asiddhavat'), 153 | ('6.4.120', None, at_ekahalmadhya_anadeshadi, 'kit', et_abhyasa_lopa), 154 | Ca('6.4.121', None, True, F.value('iTa'), True), 155 | Ca('6.4.122', None, f('tF', 'YiPalA~', 'Ba\ja~^', 'trapU~\z'), 156 | f('kit') | F.value('iTa'), True), 157 | Artha('6.4.123', None, F.value('rAD'), True, True), 158 | Va('6.4.124', None, f('jF', 'Bramu~', 'trasI~'), True, True), 159 | Ca('6.4.125', None, F.gana('PaRa~', 'svana~'), True, True), 160 | Na('6.4.126', None, f('Sasu~', 'dada~\\', F.adi('v'), 'guna'), True, True), 161 | ] 162 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\vyakarana.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\vyakarana.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /vyakarana/rules.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.rules 4 | ~~~~~~~~~~~~~~~ 5 | 6 | This module creates a single :class:`~vyakarana.rules.Rule` object 7 | for each rule. These objects coordinate various lower-level 8 | components, such as: 9 | 10 | - filters that test some state 11 | - an operator that transform the state 12 | 13 | In addition, these objects handle other matters like optionality, 14 | inference, and so on. 15 | 16 | :license: MIT and BSD 17 | """ 18 | 19 | from templates import Na 20 | 21 | 22 | class Rule(object): 23 | 24 | """A single rule from the Ashtadhyayi. 25 | 26 | Rules are of various kinds. Currently, the system deals only with 27 | transformational rules ("vidhi") explicitly. 28 | """ 29 | 30 | #: Denotes an ordinary rule 31 | VIDHI = 'vidhi' 32 | #: Denotes a *saṃjñā* rule 33 | SAMJNA = 'samjna' 34 | #: Denotes an *atideśa* rule 35 | ATIDESHA = 'atidesha' 36 | #: Denotes a *paribhāṣā* rule 37 | PARIBHASHA = 'paribhasha' 38 | 39 | SIDDHA = 'value' 40 | ASIDDHAVAT = 'asiddhavat' 41 | 42 | def __init__(self, name, window, operator, modifier=None, category=None, 43 | locus='value', optional=False): 44 | 45 | #: A unique ID for this rule, e.g. ``'6.4.1'``. For most rules, 46 | #: this is just the rule's position within the Ashtadhyayi. 47 | #: But a few rules combine multiple rules and have hyphenated 48 | #: names, e.g. ``'1.1.60 - 1.1.63'``. 49 | self.name = name 50 | 51 | self.window = window 52 | self.offset = len(self.window[0]) 53 | self.modifier = modifier 54 | self.category = category 55 | 56 | #: A list of filter functions to apply to some subsequence in 57 | #: a state. If the subsequence matches, then we can apply the 58 | #: rule to the appropriate location in the state.. 59 | self.filters = [x for items in window for x in items] 60 | 61 | #: An operator to apply to some part of a state. 62 | self.operator = operator 63 | 64 | #: 65 | self.locus = locus 66 | 67 | #: Indicates whether or not the rule is optional 68 | self.optional = optional 69 | 70 | #: A list of rules. These rules are all blocked if the current 71 | #: rule can apply. 72 | self.utsarga = [] 73 | self.apavada = [] 74 | 75 | def __repr__(self): 76 | return '' % repr(self.name) 77 | 78 | def __str__(self): 79 | return self.name 80 | 81 | def _apply_option_declined(self, state, index): 82 | if self.operator.category == 'add_samjna': 83 | new_cur = state[index].remove_samjna(*self.operator.params) 84 | result = state.swap(index, new_cur) 85 | else: 86 | result = state 87 | 88 | return result.mark_rule(self, index) 89 | 90 | def apply(self, state, index): 91 | """Apply this rule and yield the results. 92 | 93 | :param state: a state 94 | :param index: the index where the first filter is applied. 95 | """ 96 | if self.optional: 97 | # Option declined. Mark the state but leave the rest alone. 98 | yield self._apply_option_declined(state, index) 99 | 100 | # 'na' rule. Apply no operation, but block any general rules 101 | # from applying. 102 | if self.modifier is Na: 103 | new = state.mark_rule(self, index) 104 | new = new.swap(index, new[index].add_op(*self.utsarga)) 105 | yield new 106 | return 107 | 108 | # Mandatory, or option accepted. Apply the operator and yield. 109 | # Also, block all utsarga rules. 110 | # 111 | # We yield only if the state is different; otherwise the system 112 | # will loop. 113 | new = self.operator.apply(state, index + self.offset, self.locus) 114 | if new != state or self.optional: 115 | new = new.mark_rule(self, index) 116 | new = new.swap(index, new[index].add_op(*self.utsarga)) 117 | yield new 118 | 119 | def features(self): 120 | feature_set = set() 121 | for i, filt in enumerate(self.filters): 122 | feature_set.update((f, i) for f in filt.supersets) 123 | return feature_set 124 | 125 | def has_apavada(self, other): 126 | """Return whether the other rule is an apavada to this one. 127 | 128 | Rule B is an apavada to rule A if and only if: 129 | 130 | 1. A != B 131 | 2. If A matches some position, then B matches too. 132 | 3. A and B have the same locus 133 | 4. The operations performed by A and B are in conflict 134 | 135 | For details on what (4) means specifically, see the comments on 136 | :meth:`operators.Operator.conflicts_with`. 137 | 138 | :param other: a rule 139 | """ 140 | 141 | # Condition 1 142 | if self.name == other.name: 143 | return False 144 | 145 | # Condition 2 146 | filter_pairs = zip(self.filters, other.filters) 147 | if not all(f2.subset_of(f1) for f1, f2 in filter_pairs): 148 | return False 149 | 150 | # Condition 3 151 | if self.locus != other.locus: 152 | return False 153 | 154 | # Condition 4 155 | return self.operator.conflicts_with(other.operator) 156 | 157 | def pprint(self): 158 | data = [] 159 | append = data.append 160 | append('Rule %s' % self.name) 161 | append(' Filters :') 162 | for f in self.filters: 163 | append(' %r' % f) 164 | append(' Operator : %r' % self.operator) 165 | append(' Locus : %r' % (self.locus,)) 166 | append(' Utsarga : %r' % (self.utsarga,)) 167 | append(' Apavada : %r' % (self.apavada,)) 168 | append('') 169 | print '\n'.join(data) 170 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/vyakarana.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/vyakarana.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/vyakarana" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/vyakarana" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /vyakarana/expand.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.expand 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | Code to convert a list of :class:`~vyakarana.template.RuleStub` 7 | objects into a list of :class:`~vyakarana.rules.Rule` objects. 8 | 9 | This code does only the most basic sort of inference. It takes a 10 | stub and uses the previous rule (and the current Anuvrtti) to fill 11 | in any gaps. 12 | 13 | For more complex inference, see :mod:`vyakarana.trees`. 14 | 15 | TODO: think of better name for this module 16 | 17 | :license: MIT and BSD 18 | """ 19 | 20 | import importlib 21 | 22 | import filters as F 23 | import lists 24 | import operators as O 25 | from templates import Anuvrtti, RuleStub, Option, Shesha 26 | from rules import Rule 27 | 28 | 29 | def fetch_all_stubs(): 30 | """Create a list of all rule stubs defined in the system. 31 | 32 | We find rule stubs by programmatically importing every pada in 33 | the Ashtadhyayi. Undefined padas are skipped. 34 | """ 35 | 36 | # All padas follow this naming convention. 37 | mod_string = 'vyakarana.adhyaya{0}.pada{1}' 38 | combos = [(a, p) for a in '12345678' for p in '1234'] 39 | rule_stubs = [] 40 | 41 | for adhyaya, pada in combos: 42 | try: 43 | mod_name = mod_string.format(adhyaya, pada) 44 | mod = importlib.import_module(mod_name) 45 | rule_stubs.extend(mod.RULES) 46 | except ImportError: 47 | pass 48 | 49 | # Convert tuples to RuleStubs 50 | for i, r in enumerate(rule_stubs): 51 | if isinstance(r, tuple): 52 | rule_stubs[i] = RuleStub(*r) 53 | 54 | return rule_stubs 55 | 56 | 57 | def fetch_stubs_in_range(start, end): 58 | selection = [] 59 | active = False 60 | for stub in fetch_all_stubs(): 61 | if isinstance(stub, Anuvrtti): 62 | selection.append(stub) 63 | continue 64 | 65 | if stub.name == start: 66 | active = True 67 | 68 | if active: 69 | selection.append(stub) 70 | 71 | if stub.name == end: 72 | active = False 73 | assert selection 74 | return selection 75 | 76 | 77 | def make_context(data, base=None, prev=None): 78 | """Create and return a filter list for some tuple rule. 79 | 80 | :param data: a list of items. These items are one of the following: 81 | - ``None``, which signals that `base` should be used. 82 | - ``True``, which signals that `prev` should be used. 83 | - an arbitrary object, which is sent to `filters.auto`. 84 | The result is "and"-ed with `base`, if `base` is 85 | defined. 86 | :param base: the corresponding base filter. 87 | :param prev: the corresponding filter created on the previous tuple. 88 | """ 89 | returned = [] 90 | for i, datum in enumerate(data): 91 | if datum is None: 92 | result = base[i] 93 | elif datum is True: 94 | result = prev[i] 95 | else: 96 | extension = F.auto(datum) 97 | try: 98 | b = base[i] 99 | except IndexError: 100 | b = None 101 | if b is None or b is F.allow_all: 102 | result = extension 103 | else: 104 | result = extension & b 105 | returned.append(result) 106 | return returned 107 | 108 | 109 | def _make_window(row, anuvrtti, prev_rule): 110 | returned = [] 111 | base_args = anuvrtti.base_args 112 | prev_window = prev_rule.window if prev_rule else ([None], [None], [None]) 113 | for base, item, p_item in zip(base_args, row.window, prev_window): 114 | if item is Shesha: 115 | item = None 116 | if not hasattr(item, '__iter__'): 117 | item = [item] 118 | returned.append(make_context(item, base=[F.auto(base)], prev=p_item)) 119 | 120 | return returned 121 | 122 | 123 | def _reduce_window(window, operator): 124 | for i in (0, 2): 125 | if window[i] == [F.allow_all]: 126 | window[i] = [] 127 | if operator.category == 'insert': 128 | window[1] = [] 129 | return window 130 | 131 | 132 | def _make_operator(row, anuvrtti, prev_rule, window): 133 | if row.operator is True: 134 | return prev_rule.operator 135 | else: 136 | left, center, right = window 137 | op = row.operator 138 | if isinstance(op, O.Operator): 139 | return row.operator 140 | elif op in lists.SAMJNA: 141 | return O.add_samjna(op) 142 | elif op in lists.IT: 143 | return O.add_samjna(op) 144 | elif center[0] is F.allow_all: 145 | return O.insert(op) 146 | else: 147 | return O.tasya(op) 148 | 149 | 150 | def _make_kw(row, anuvrtti, prev_rule, operator): 151 | optional = isinstance(row, Option) 152 | modifier = row.__class__ 153 | if any(x is Shesha for x in row.window): 154 | modifier = Shesha 155 | 156 | if operator.name.startswith('add_samjna'): 157 | category = Rule.SAMJNA 158 | elif anuvrtti.base_kw.get('category') == 'paribhasha': 159 | category = Rule.PARIBHASHA 160 | else: 161 | category = Rule.VIDHI 162 | 163 | locus = anuvrtti.base_kw.get('locus', Rule.SIDDHA) 164 | 165 | return dict(optional=optional, 166 | modifier=modifier, 167 | category=category, 168 | locus=locus) 169 | 170 | 171 | def build_from_stubs(rule_stubs=None): 172 | """Expand rule stubs into usable rules. 173 | 174 | Throughout this program, rules are defined in a special shorthand. 175 | This function converts each line of shorthand into a proper rule. 176 | 177 | :param rule_stubs: a list of :class:`RuleStub`s 178 | """ 179 | rule_stubs = rule_stubs or fetch_all_stubs() 180 | 181 | rules = [] 182 | 183 | anuvrtti = None 184 | prev_rule = None 185 | for row in rule_stubs: 186 | if isinstance(row, Anuvrtti): 187 | anuvrtti = row 188 | continue 189 | 190 | name = row.name 191 | window = _make_window(row, anuvrtti, prev_rule) 192 | operator = _make_operator(row, anuvrtti, prev_rule, window) 193 | window = _reduce_window(window, operator) 194 | rule_kw = _make_kw(row, anuvrtti, prev_rule, operator) 195 | 196 | rule = prev_rule = Rule(name, window, operator, **rule_kw) 197 | rules.append(rule) 198 | 199 | return rules 200 | -------------------------------------------------------------------------------- /test/data/lat.csv: -------------------------------------------------------------------------------- 1 | # From Kale 384 - 456 2 | 3 | # ======= 4 | # Group I 5 | # ======= 6 | 7 | # ~~~~~~~~~~~~~~~~~~~~~~~~~ 8 | # Kale 392 - 393, 395 - 397 9 | # ~~~~~~~~~~~~~~~~~~~~~~~~~ 10 | BU Bavati Bavatas Bavanti Bavasi BavaTas BavaTa BavAmi BavAvas BavAmas 11 | 12 | RI\Y nayati nayatas nayanti nayasi nayaTas nayaTa nayAmi nayAvas nayAmas 13 | RI\Y nayate nayete nayante nayase nayeTe nayaDve naye nayAvahe nayAmahe 14 | 15 | gupU~ gopAyati 16 | 17 | DUpa~ DUpAyati 18 | 19 | vicCa~ vicCAyati 20 | 21 | paRa~\ paRAyati 22 | 23 | guhU~^ gUhati 24 | guhU~^ gUhate 25 | 26 | kamu~\ kAmayate 27 | 28 | zWivu~ zWIvati 29 | 30 | wuBrASf~\ BrASate/BrASyate 31 | 32 | wuBlASf~\ BlASate/BlASyate 33 | 34 | Bramu~ Bramati/Bramyati/BrAmyati 35 | 36 | kramu~ krAmati/krAmyati 37 | 38 | laza~^ lazati/lazyati 39 | 40 | Dinv Dinoti 41 | 42 | kfRv krRoti 43 | 44 | fti~\ ftIyate 45 | 46 | ga\mx~ gacCati 47 | 48 | ya\ma~ yacCati 49 | 50 | pA\ pibati 51 | 52 | GrA\ jiGrati 53 | 54 | DmA\ Damati 55 | 56 | zWA\ tizWati 57 | 58 | mnA\ manati 59 | 60 | dA\R yacCati 61 | 62 | df\Si~r paSyati 63 | 64 | f\ fcCati 65 | 66 | # ~~~~ 67 | # Kale 68 | # ~~~~ 69 | kramu~ krAmyati 70 | 71 | janI~\ jAyate 72 | 73 | Samu~ SAmyati 74 | 75 | tamu~ tAmyati 76 | 77 | damu~ dAmyati 78 | 79 | Sramu~ SrAmyati 80 | 81 | kzamU~ kzAmyati 82 | 83 | klamu~ klAmyati/klAmati 84 | 85 | madI~ mAdyate 86 | 87 | Bramu~ BrAmyati 88 | 89 | yasyati/yasati 90 | 91 | Syati 92 | 93 | Cyati 94 | 95 | syati 96 | 97 | vyati 98 | 99 | BraSyati 100 | 101 | rajyati 102 | 103 | YimidA~ medyati 104 | 105 | vya\Da~ viDyati 106 | 107 | # ~~~~ 108 | # Kale 109 | # ~~~~ 110 | tu\da~^ tudati tudatas tudanti tudasi tudaTas tudaTa tudAmi tudAvas tudAmas 111 | tu\da~^ tudate tudete tudante tudase tudeTe tudaDve tude tudAvahe tudAmahe 112 | 113 | izu~ icCati 114 | 115 | # krntati 116 | # Kindati 117 | # girati 118 | # gilati 119 | # truwyati/truwati 120 | # pfcCati 121 | # Brajjati 122 | # majjati 123 | # vfScati 124 | # vicati 125 | # vicCAyati 126 | # sajjati 127 | # muYjati 128 | # limpati 129 | # lumpati 130 | # vindati 131 | # siYcati 132 | # piMSati 133 | 134 | # ~~~~ 135 | # Kale 136 | # ~~~~ 137 | cura~ corayati corayatas corayanti corayasi corayaTas corayaTa corayAmi corayAvas corayAmas 138 | 139 | 140 | # ======== 141 | # Group II 142 | # ======== 143 | 144 | 145 | # ~~~~~~~~ 146 | # Kale 409 147 | # ~~~~~~~~ 148 | zu\Y sunoti sunutas sunvanti sunozi sunuTas sunuTa sunomi sunuvas/sunvas sunumas/sunmas 149 | zu\Y sunute sunvAte sunvate sunuze sunvATe sunuDve sunve sunuvahe/sunvahe sunumahe/sunmahe 150 | 151 | sA\Da~ sADnoti sADnutas sADnuvanti sADnozi sADnuTas sADnuTa sADnomi sADnuvas sADnumas 152 | 153 | aSU~\ aSnute aSnuvAte aSnuvate aSnuze aSnuvATe aSnuDve aSnuve aSnuvahe aSnumahe 154 | 155 | # ~~~~~~~~~~~~~~ 156 | # Kale 409 - 410 157 | # ~~~~~~~~~~~~~~ 158 | tanu~^ tanoti tanutas tanvanti tanozi tanuTas tanuTa tanomi tanuvas/tanvas tanumas/tanmas 159 | tanu~^ tanute tanvAte tanvate tanuze tanvATe tanuDve tanve tanuvahe/tanvahe tanumahe/tanmahe 160 | 161 | qukf\Y karoti kurutas kurvanti karozi kuruTas kuruTa karomi kurvas kurmas 162 | qukf\Y kurute kurvAte kurvate kuruze kurvATe kuruDve kurve kurvahe kurmahe 163 | 164 | # ~~~~~~~~~~~~~~ 165 | # Kale 410 - 415 166 | # ~~~~~~~~~~~~~~ 167 | qukrI\Y krIRAti krIRItas krIRanti krIRAsi krIRITas krIRITa krIRAmi krIRIvas krIRImas 168 | qukrI\Y krIRIte krIRAte krIRate krIRIze krIRATe krIRIDve krIRe krIRIvahe krIRImahe 169 | 170 | sta\mBu~ staBnAti staBnItas staBnanti staBnAsi staBnITas staBnITa staBnAmi staBnIvas staBnImas 171 | 172 | kzuBa~ kzuBnAti kzuBnItas kzuBnanti kzuBnAsi kzuBnITas kzuBnITa kzuBnAmi kzuBnIvas kzuBnImas 173 | 174 | jYA\ jAnAti jAnItas jAnanti jAnAsi jAnITas jAnITa jAnAmi jAnIvas jAnImas 175 | jYA\ jAnIte jAnAte jAnate jAnIze jAnATe jAnIDve jAne jAnIvahe jAnImahe 176 | 177 | jyA\ jinAti jinItas jinanti jinAsi jinITas jinITa jinAmi jinIvas jinImas 178 | 179 | graha~^ gfhRAti gfhRItas gfhRanti gfhRAsi gfhRITas gfhRITa gfhRAmi gfhRIvas gfhRImas 180 | graha~^ gfhRIte gfhRAte gfhRate gfhRIze gfhRATe gfhRIDve gfhRe gfhRIvahe gfhRImahe 181 | 182 | rI\ riRAti 183 | 184 | lI\ linAti 185 | 186 | vlI\ vlinAti 187 | 188 | plI\ plinAti 189 | 190 | DUY DunAti 191 | 192 | DUY DunIte 193 | 194 | pUY punAti 195 | 196 | pUY punIte 197 | 198 | lUY lunAti 199 | 200 | lUY lunIte 201 | 202 | F fRAti 203 | 204 | kF kfRAti 205 | 206 | gF gfRAti 207 | 208 | jF jfRAti 209 | 210 | nF nfRAti 211 | 212 | pF pfRAti 213 | 214 | BF BfRAti 215 | 216 | mF mfRAti 217 | 218 | vFY vfRAti 219 | 220 | vFY vfRIte 221 | 222 | SF SfRAti 223 | 224 | stFY stfRAti 225 | 226 | stFY stfRIte 227 | 228 | BrI\ BriRAti/BrIRAti 229 | 230 | kzI\z kziRAti/kzIRAti 231 | 232 | vrI\ vriRAti/vrIRAti 233 | 234 | # For others, see forms at the top of this section 235 | sta\mBu~ staBnoti staBnutas staBnuvanti staBnozi staBnuTas staBnuTa staBnomi staBnuvas staBnumas 236 | 237 | stu\mBu~ stuBnAti/stuBnoti 238 | 239 | ska\mBu~ skaBnAti/skaBnoti 240 | 241 | sku\mBu~ skuBnAti/skuBnoti 242 | 243 | sku\Y skunAti/skunoti 244 | sku\Y skunIte/skunute 245 | 246 | # ~~~~~~~~~~~~~~ 247 | # Kale 419 - 442 248 | # ~~~~~~~~~~~~~~ 249 | dvi\za~^ dvezwi dvizwas dvizanti dvekzi dvizWas dvizWa dvezmi dvizvas dvizmas 250 | dvi\za~^ dvizwe dvizAte dvizate dvikze dvizATe dviqQve dvize dvizvahe dvizmahe 251 | 252 | # ~~~~~~~~~~~~~~ 253 | # Kale 443 - 454 254 | # ~~~~~~~~~~~~~~ 255 | ki\ ciketi cikitas cikyati cikezi cikiTas cikiTa cikemi cikivas cikimas 256 | 257 | hu\ juhoti juhutas juhvati juhozi juhuTas juhuTa juhomi juhuvas juhumas 258 | 259 | hrI\ jihreti jihrItas jihriyati jihrezi jihrITas jihrITa jihremi jihrIvas jihrImas 260 | 261 | # ~~~~~~~~~~~~~~ 262 | # Kale 455 - 456 263 | # ~~~~~~~~~~~~~~ 264 | aYjU~ anakti aNktas aYjanti anakzi aNkTas aNkTa anajmi aYjvas aYjmas 265 | 266 | YiinDI~\ indDe inDAte inDate intse inDATe inDve inDe inDvahe inDmahe 267 | 268 | kzu\di~^r kzuRatti kzunttas kzundanti kzuRatsi kzuntTas kzuntTa kzuRadmi kzundvas kzundmas 269 | kzu\di~^r kzuntte kzundAte kzundate kzuntse kzundATe kzunDve kzunde kzundvahe kzundmahe 270 | 271 | tfha~ tfReQi tfRQas tfMhanti tfRekzi tfRQas tfRQa tfRehmi tfMhvas tfMhmas 272 | 273 | pi\zx~ pinazwi piMzwas piMzanti pinakzi piMzWas piMzWa pinazmi piMzvas piMzmas 274 | 275 | yu\ji~^r yunakti yuNktas yuYjanti yunakzi yuNkTas yuNkTa yunajmi yuYjvas yuYjmas 276 | yu\ji~^r yuNkte yuYjAte yuYjate yuNkze yuYjATe yuNgDve yuYje yuYjvahe yuYjmahe 277 | 278 | ri\ci~^r riRakti riNktas riYcanti riRakzi riNkTas riNkTa riRacmi riYcvas riYcmas 279 | ri\ci~^r riNkte riYcAte riYcate riNkze riYcATe riNgDve riYce riYcvahe riYcmahe 280 | 281 | ru\Di~^r ruRadDi rundDas runDanti ruRatsi rundDas rundDa ruRaDmi runDvas runDmas 282 | ru\Di~^r rundDe runDAte runDate runtse runDATe runDve runDe runDvahe runDmahe 283 | 284 | hisi~ hinasti hiMstas hiMsanti hinassi hiMsTas hiMsTa hinasmi hiMsvas hiMsmas 285 | -------------------------------------------------------------------------------- /test/data/lit.csv: -------------------------------------------------------------------------------- 1 | # From Kale 488 - 528 2 | # TODO: what are the rules of iDve -> iQve ? Can't find rules corresponding to his explanation. 3 | 4 | # ~~~~~~~~ 5 | # Kale 504 6 | # ~~~~~~~~ 7 | # TODO: saMcaskAra 8 | qukf\Y cakAra cakratus cakrus cakarTa cakraTus cakra cakAra/cakara cakfva cakfma 9 | qukf\Y cakre cakrAte cakrire cakfze cakrATe cakfQve cakre cakfvahe cakfmahe 10 | 11 | zwuY tuzwAva tuzwuvatus tuzwuvus tuzwoTa tuzwuvaTus tuzwuva tuzwAva/tuzwava tuzwuva tuzwuma 12 | zwuY tuzwuve tuzwuvAte tuzwuvire tuzwuze tuzwuvATe tuzwuQve tuzwuve tuzwuvahe tuzwumahe 13 | 14 | # ~~~~~~~~~~~~ 15 | # Kale 505 (1) 16 | # ~~~~~~~~~~~~ 17 | vFY vavAra vavaratus vavarus vavariTa vavaraTus vavara vavAra/vavara vavariva vavarima 18 | vFY vavare vavarAte vavarire vavarize vavarATe vavariDve/vavariQve vavare vavarivahe vavarimahe 19 | 20 | SF SaSAra SaSaratus/SaSratus SaSrus/SaSarus SaSariTa SaSaraTus/SaSraTus SaSra/SaSara SaSAra/SaSara SaSariva/SaSriva SaSarima/SaSrima 21 | 22 | kzRu cukzRAva cukzRuvatus cukzRuvus cukzRaviTa cukzRuvaTus cukzRuva cukzRAva/cukzRava cukzRuviva cukzRuvima 23 | 24 | # 'ru' doesn't support ruruve -- what is Kale doing here? 25 | ru rurAva ruruvatus ruruvus ruraviTa ruruvaTus ruruva rurAva/rurava ruruviva ruruvima 26 | 27 | SIN SiSye SiSyAte SiSyire SiSyize SiSyATe SiSyiDve/SiSyiQve SiSye SiSyivahe SiSyimahe 28 | 29 | SriY SiSrAya SiSriyatus SiSriyus SiSrayiTa SiSriyaTus SiSriya SiSrAya/SiSraya SiSriyiva SiSriyima 30 | SriY SiSriye SiSriyAte SiSriyire SiSriyize SiSriyATe SiSriyiDve/SiSriyiQve SiSriye SiSriyivahe SiSriyimahe 31 | 32 | # ~~~~~~~~~~~~ 33 | # Kale 505 (2) 34 | # ~~~~~~~~~~~~ 35 | dA\Y dadO dadatus dadus dadATa/dadiTa dadaTus dada dadO dadiva dadima 36 | dA\Y dade dadAte dadire dadize dadATe dadiDve/dadiQve dade dadivahe dadimahe 37 | 38 | gE\ jagO jagatus jagus jagATa/jagiTa jagaTus jaga jagO jagiva jagima 39 | 40 | i\R iyAya Iyatus Iyus iyeTa/iyayiTa IyaTus Iya iyAya/iyaya Iyiva Iyima 41 | 42 | RI\ ninAya ninyatus ninyus nineTa/ninayiTa ninyaTus ninya ninAya/ninaya ninyiva ninyima 43 | 44 | smf\ sasmAra sasmaratus sasmarus sasmarTa sasmaraTus sasmara sasmAra/sasmara sasmariva sasmarima 45 | 46 | qu\mi\Y mamO mimyatus mimyus mamATa/mamiTa mimyaTus mimya mamO mimyiva mimyima 47 | qu\mi\Y mimye mimyAte mimyire mimyize mimyATe mimyiDve/mimyiQve mimye mimyivahe mimyimahe 48 | 49 | lI\ lilAya/lalO lilyatus lilyus lileTa/lilayiTa/lalATa/laliTa lilyaTus lilya lilAya/lilaya/lalO lilyiva lilyima 50 | 51 | # ~~~~~~~~~~~~ 52 | # Kale 505 (3) 53 | # ~~~~~~~~~~~~ 54 | Sa\kx~ SaSAka Sekatus Sekus SekiTa/SaSakTa SekaTus Seka SaSAka/SaSaka Sekiva Sekima 55 | 56 | qupa\ca~^z papAca pecatus pecus peciTa/papakTa pecaTus peca papAca/papaca peciva pecima 57 | qupa\ca~^z pece pecAte pecire pecize pecATe peciDve pece pecivahe pecimahe 58 | 59 | mu\cx~^ mumoca mumucatus mumucus mumociTa mumucaTus mumuca mumoca mumuciva mumucima 60 | mu\cx~^ mumuce mumucAte mumucire mumucize mumucATe mumuciDve/mumuciQve mumuce mumucivahe mumucimahe 61 | 62 | ri\ci~^r rireca riricatus riricus rireciTa riricaTus ririca rireca ririciva riricima 63 | ri\ci~^r ririce riricAte riricire riricize riricATe ririciDve/ririciQve ririce riricivahe riricimahe 64 | 65 | pra\cCa~ papracCa papracCatus papracCus papracCiTa/paprazWa papracCaTus papracCa papracCa papracCiva papracCima 66 | 67 | tya\ja~ tatyAja tatyajatus tatyajus tatyajiTa/tatyakTa tatyajaTus tatyaja tatyaja/tatyAja tatyajiva tatyajima 68 | 69 | Ba\Yja~ baBaYja baBaYjatus baBaYjus baBaYjiTa/baBaNkTa baBaYjaTus baBaYja baBaYja baBaYjiva baBaYjima 70 | 71 | Bra\sja~^ baBarja/baBrajja baBarjatus/baBrajjatus baBarjus/baBrajjus baBarjiTa/baBrajjiTa/baBarzWa/baBrazWa baBarjaTus/baBrajjaTus baBarja/baBrajja baBarja/baBrajja baBarjiva/baBrajjiva baBarjima/baBrajjima 72 | Bra\sja~^ baBarje/baBrajje baBarjAte/baBrajjAte baBarjire/baBrajjire baBarjize/baBrajjize baBarjATe/baBrajjATe baBarjiDve/baBrajjiDve baBarje/baBrajje baBarjivahe/baBrajjivahe baBarjimahe/baBrajjimahe 73 | 74 | quBfY baBAra baBratus baBrus baBarTa baBraTus baBra baBAra/baBara baBfva baBfma 75 | quBfY baBre baBrAte baBrire baBfze baBrATe baBfQve baBre baBfvahe baBfmahe 76 | 77 | sTA\ tasTO tasTatus tasTus tasTATa/tasTiTa tasTaTus tasTa tasTO tasTiva tasTima 78 | 79 | # ~~~~~~~~ 80 | # Kale 506 81 | # ~~~~~~~~ 82 | va\ca~ uvAca Ucatus Ucus uvakTa/uvaciTa UcaTus Uca uvAca/uvaca Uciva Ucima 83 | 84 | vya\Da~ vivyADa viviDatus viviDus 85 | 86 | # ~~~~~~~~ 87 | # Kale 508 88 | # ~~~~~~~~ 89 | anjU~ AnaYja AnaYjatus AnaYjus AnaYjiTa/AnaNkTa AnaYjaTus AnaYja AnaYja AnaYjiva AnaYjima 90 | 91 | aSU~\ AnaSe AnaSAte AnaSire AnaSize/AnakSe AnaSATe AnaSiDve/AnaqQve AnaSe AnaSivahe/AnaSvahe AnaSimahe/AnaSmahe 92 | fDu~ AnarDa AnfDatus AnfDus 93 | 94 | # e substitution 95 | cara~ cacAra ceratus cerus ceriTa ceraTus cera cacAra/cacara ceriva cerima 96 | 97 | guhU~^ jugUha juguhatus juguhus jugUhiTa/jugoQa juguhaTus juguha jugUha juguhva/juguhiva juguhma/juguhima 98 | guhU~^ juguhe juguhAte juguhire juGukze/juguhize juguhATe juguhiDve/juguhiQue/jugUQve juguhe juguhivahe juguhimahe 99 | 100 | zRih\a~ sizReha sizRihatus sizRihus sizRehiTa/sizReQa/sizRogDa sizRihaTus sizRiha sizReha sizRihiva/sizRihva sizRihima/sizRihma 101 | 102 | # ~~~~~~~~ 103 | # Kale 510 104 | # ~~~~~~~~ 105 | ga\mx~ jagAma jagmatus jagmus jaganTa/jagamiTa jagmaTus jagma jagAma/jagama jagmiva jagmima 106 | 107 | ha\na~ jaGAna jaGnatus jaGnus jaGanTa/jaGaniTa jaGnaTus jaGna jaGAna/jaGana jaGniva jaGnima 108 | 109 | janI~\ jajYe jajYAte jajYire jajYize jajYATe jajYiDve jajYe jajYivahe jajYimahe 110 | 111 | Kanu~^ caKAna caKnatus caKnus caKaniTa caKnaTus caKna caKAna/caKana caKniva caKnima 112 | Kanu~^ caKne caKnAte caKnire caKnize caKnATe caKniDve caKne caKnivahe caKnimahe 113 | 114 | # ~~~~~~~~ 115 | # Kale 512 116 | # ~~~~~~~~ 117 | tF tatAra teratus terus teriTa teraTus tera tatAra/tatara teriva terima 118 | 119 | Pala~ paPala Pelatus Pelus PeliTa PelaTus Pela paPAla/paPala Peliva Pelima 120 | 121 | Ba\ja~^ baBAja Bejatus Bejus BejiTa/baBakTa BejaTus Beja baBAja/baBaja Bejiva Bejima 122 | Ba\ja~^ Beje BejAte Bejire Bejize BejATe BejiDve Beje Bejivahe Bejimahe 123 | 124 | jF jajAra jajaratus/jeratus jajarus/jerus 125 | 126 | Bramu~ baBrAma baBramatus/Brematus baBramus/Bremus 127 | 128 | # ~~~~~~~~ 129 | # Kale 513 130 | # ~~~~~~~~ 131 | # TODO: Atmanepada bhu 132 | BU baBUva baBUvatus baBUvus baBUviTa baBUvaTus baBUva baBUva baBUviva baBUvima 133 | 134 | # ~~~~~~~~ 135 | # Kale 514 136 | # ~~~~~~~~ 137 | ji\ jigAya jigyatus jigyus jigeTa/jigayiTa jigyaTus jigya jigAya/jigaya jigyiva jigyima 138 | 139 | hi\ jiGAya jiGyatus jiGyus jiGeTa/jiGayiTa jiGyaTus jiGya jiGAya/jiGaya jiGyiva jiGyima 140 | 141 | ci\Y cikAya/cicAya cikyatus/cicyatus cikyus/cicyus cikayiTa/cikeTa/cicayiTa/ciceTa cikyaTus/cicyaTus cikya/cicya cikAya/cikaya/cicAya/cicaya cikyiva/cicyiva cikyima/cicyima 142 | ci\Y cikye/cicye cikyAte/cicyAte cicyire/cikyire cikyize/cicyize cikyATe/cicyATe cikyiDve/cicyiDve cikye/cicye cikyivahe/cicyivahe cikyimahe/cicyimahe 143 | 144 | 145 | # ~~~~~~~~ 146 | # Kale 515 147 | # ~~~~~~~~ 148 | f\ Ara Aratus Arus AriTa AraTus Ara Ara Ariva Arima 149 | -------------------------------------------------------------------------------- /test/upadesha.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | test.upadesha 4 | ~~~~~~~~~~~~~ 5 | 6 | 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | import pytest 12 | 13 | from vyakarana.terms import * 14 | 15 | 16 | class TestDataSpace(object): 17 | 18 | d = DataSpace('A', 'A', 'A', 'A', 'A') 19 | 20 | def test_init(self): 21 | assert self.d == ('A',) * 5 22 | 23 | def test_replace(self): 24 | d2 = self.d.replace(asiddha='B') 25 | assert d2 == ('A', 'A', 'A', 'A', 'B') 26 | 27 | d2 = self.d.replace(asiddhavat='B') 28 | assert d2 == ('A', 'A', 'A', 'B', 'B') 29 | 30 | d2 = self.d.replace(value='B') 31 | assert d2 == ('A', 'A', 'B', 'B', 'B') 32 | 33 | def test_replace_blank(self): 34 | d2 = self.d.replace(asiddha='') 35 | assert d2 == ('A', 'A', 'A', 'A', '') 36 | 37 | d2 = self.d.replace(asiddhavat='') 38 | assert d2 == ('A', 'A', 'A', '', '') 39 | 40 | d2 = self.d.replace(value='') 41 | assert d2 == ('A', 'A', '', '', '') 42 | 43 | 44 | # Constructors 45 | # ~~~~~~~~~~~~ 46 | 47 | def test_init(): 48 | u = Upadesha('vu~k') 49 | assert u.data == ('vu~k', 'v', 'v', 'v', 'v') 50 | assert u.samjna == set(['udit', 'kit']) 51 | assert not u.lakshana 52 | assert not u.ops 53 | assert not u.parts 54 | 55 | 56 | def test_init_no_raw(): 57 | u = Upadesha(data='data', samjna='samjna', lakshana='lakshana', 58 | ops='ops', parts='parts') 59 | 60 | assert u.data == 'data' 61 | assert u.samjna == 'samjna' 62 | assert u.lakshana == 'lakshana' 63 | assert u.ops == 'ops' 64 | assert u.parts == 'parts' 65 | 66 | 67 | # Properties 68 | # ~~~~~~~~~~ 69 | 70 | def test_properties_no_it(): 71 | t = Upadesha('gati') 72 | assert t 73 | assert t.adi == 'g' 74 | assert t.antya == 'i' 75 | assert t.asiddha == 'gati' 76 | assert t.asiddhavat == 'gati' 77 | assert t.clean == 'gati' 78 | assert t.raw == 'gati' 79 | assert t.upadha == 't' 80 | assert t.value == 'gati' 81 | 82 | 83 | def test_properties_final_it(): 84 | t = Upadesha('anta~') 85 | assert t 86 | assert t.adi == 'a' 87 | assert t.antya == 't' 88 | assert t.asiddha == 'ant' 89 | assert t.asiddhavat == 'ant' 90 | assert t.clean == 'ant' 91 | assert t.raw == 'anta~' 92 | assert t.upadha == 'n' 93 | assert t.value == 'ant' 94 | 95 | 96 | # Operators 97 | # ~~~~~~~~~ 98 | 99 | def test_copy(): 100 | values = { 101 | 'data': 'data', 102 | 'samjna': 'samjna', 103 | 'lakshana': 'lakshana', 104 | 'ops': 'ops', 105 | 'parts': 'parts', 106 | } 107 | 108 | u = Upadesha(**values) 109 | 110 | u2 = u.copy(data='data2') 111 | assert u2.data == 'data2' 112 | 113 | u2 = u.copy(samjna='samjna2') 114 | assert u2.samjna == 'samjna2' 115 | 116 | u2 = u.copy(lakshana='lakshana2') 117 | assert u2.lakshana == 'lakshana2' 118 | 119 | u2 = u.copy(ops='ops2') 120 | assert u2.ops == 'ops2' 121 | 122 | u2 = u.copy(parts='parts2') 123 | assert u2.parts == 'parts2' 124 | 125 | 126 | @pytest.fixture 127 | def eq_upadeshas(): 128 | u2 = Upadesha('a') 129 | return [ 130 | Upadesha('a'), 131 | Upadesha('a'), 132 | u2.copy(data='data'), 133 | u2.copy(data='samjna'), 134 | u2.copy(data='lakshana'), 135 | u2.copy(data='ops'), 136 | u2.copy(data='parts') 137 | ] 138 | 139 | 140 | def test_eq(eq_upadeshas): 141 | u1, u2, u3, u4, u5, u6, u7 = eq_upadeshas 142 | 143 | assert u1 == u1 # same object 144 | assert u1 == u2 # same values 145 | assert not u1 == None 146 | assert not u1 == u3 147 | assert not u1 == u4 148 | assert not u1 == u5 149 | assert not u1 == u6 150 | assert not u1 == u7 151 | 152 | 153 | def test_ne(eq_upadeshas): 154 | u1, u2, u3, u4, u5, u6, u7 = eq_upadeshas 155 | 156 | assert not u1 != u1 # same object 157 | assert not u1 != u2 # same values 158 | assert u1 != None 159 | assert u1 != u3 160 | assert u1 != u4 161 | assert u1 != u5 162 | assert u1 != u6 163 | assert u1 != u7 164 | 165 | 166 | def test_upadesha_dataspace(): 167 | dhatu = Upadesha('Pala~') 168 | assert dhatu.data == ('Pala~', 'Pal', 'Pal', 'Pal', 'Pal') 169 | 170 | abhyasa = dhatu.set_value('pa') 171 | assert abhyasa.data == ('Pala~', 'Pal', 'pa', 'pa', 'pa') 172 | 173 | abhyasa = abhyasa.set_asiddhavat('') 174 | assert abhyasa.data == ('Pala~', 'Pal', 'pa', '', '') 175 | 176 | abhyasta = dhatu.set_asiddhavat('Pel') 177 | assert abhyasta.data == ('Pala~', 'Pal', 'Pal', 'Pel', 'Pel') 178 | 179 | 180 | def test_parse_it(): 181 | # 1.3.2 182 | for v in Sounds('ac'): 183 | for c in Sounds('hal'): 184 | if c + v in ('Yi', 'wu', 'qu'): 185 | continue 186 | s = c + v + '~' 187 | u = Upadesha(s) 188 | assert u.raw == s 189 | assert u.value == c 190 | assert v + 'dit' in u.samjna 191 | 192 | # 1.3.3 193 | for v in Sounds('ac'): 194 | for c in Sounds('hal'): 195 | s = v + c 196 | u = Upadesha(s) 197 | assert u.raw == s 198 | assert u.value == v 199 | assert c + 'it' in u.samjna 200 | 201 | # 1.3.4 202 | for v in Sounds('ac'): 203 | for c in Sounds('tu s m'): 204 | s = v + c 205 | u = Upadesha(s, vibhakti=True) 206 | assert u.raw == s 207 | assert u.value == s 208 | assert c + 'it' not in u.samjna 209 | 210 | pairs = [ 211 | ('iya~N', 'iy'), 212 | ('uva~N', 'uv'), 213 | ('vu~k', 'v'), 214 | ] 215 | for raw, value in pairs: 216 | u = Upadesha(raw) 217 | assert u.raw == raw 218 | assert u.value == value 219 | 220 | 221 | def test_anga(): 222 | a = Upadesha.as_anga('nara') 223 | assert 'anga' in a.samjna 224 | 225 | 226 | def test_dhatu(): 227 | pairs = [ 228 | ('BU', 'BU'), 229 | ('qukf\Y', 'kf'), 230 | ('sta\mBu~', 'stamB'), 231 | ('qukrI\Y', 'krI'), 232 | ] 233 | for raw, value in pairs: 234 | d = Upadesha.as_dhatu(raw) 235 | assert 'anga' in d.samjna 236 | assert 'dhatu' in d.samjna 237 | assert d.raw == raw 238 | assert d.value == value 239 | 240 | 241 | def test_krt(): 242 | pairs = [ 243 | ('san', 'sa', ['nit']), 244 | ('yaN', 'ya', ['Nit']), 245 | ('yak', 'ya', ['kit']), 246 | ('kyac', 'ya', ['kit', 'cit']), 247 | 248 | ('Sap', 'a', ['Sit', 'pit']), 249 | ('Syan', 'ya', ['Sit', 'nit']), 250 | ('Sa', 'a', ['Sit']), 251 | ('Snam', 'na', ['Sit', 'mit']), 252 | ('Ric', 'i', ['Rit', 'cit']), 253 | ('kvasu~', 'vas', ['kit', 'udit']) 254 | ] 255 | for raw, value, its in pairs: 256 | p = Krt(raw) 257 | assert 'pratyaya' in p.samjna 258 | assert 'krt' in p.samjna 259 | assert p.raw == raw 260 | assert p.value == value 261 | 262 | for it in its: 263 | assert it in p.samjna 264 | 265 | 266 | def test_vibhakti(): 267 | pairs = [ 268 | ('tip', 'ti', ['pit']), 269 | ('iw', 'i', ['wit']), 270 | ('Ral', 'a', ['Rit', 'lit']), 271 | # ('eS', 'e', ['S']), 272 | ('irec', 'ire', ['cit']), 273 | ('wA', 'A', ['wit']), 274 | ('Nas', 'as', ['Nit']), 275 | ('Nasi~', 'as', ['Nit', 'idit']), 276 | ('sup', 'su', ['pit']), 277 | ] 278 | for raw, value, its in pairs: 279 | v = Vibhakti(raw) 280 | assert 'pratyaya' in v.samjna 281 | assert 'vibhakti' in v.samjna 282 | assert v.raw == raw 283 | assert v.value == value 284 | 285 | for it in its: 286 | assert it in v.samjna 287 | 288 | -------------------------------------------------------------------------------- /vyakarana/sounds.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.sounds 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | Classes for working with various sounds. 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | 12 | def memoize(c): 13 | cache = {} 14 | get_key = lambda a, kw: tuple(a) + (frozenset(kw.items()),) 15 | 16 | def memoized(*a, **kw): 17 | key = get_key(a, kw) 18 | if key not in cache: 19 | cache[key] = c(*a, **kw) 20 | return cache[key] 21 | return memoized 22 | 23 | 24 | @memoize 25 | class Sound(object): 26 | 27 | """A Sanskrit sound. 28 | 29 | These sounds can be transformed in ways defined by the grammar. 30 | 31 | :param value: the Sound's value 32 | """ 33 | 34 | #: This organizes sounds by their point of articulation. 35 | ASYA = [ 36 | # kaṇṭha 37 | set('aAkKgGNh'), 38 | # tālu 39 | set('iIcCjJYyS'), 40 | # mūrdhan 41 | set('fFwWqQRrz'), 42 | # danta 43 | set('xXtTdDnls'), 44 | # oṣṭha 45 | set('uUpPbBmv'), 46 | # kaṇṭha-tālu 47 | set('eE'), 48 | # kaṇṭha-oṣṭha 49 | set('oO'), 50 | # pure nasal 51 | set('M') 52 | ] 53 | 54 | #: This organizes sounds by their articulatory effort. 55 | PRAYATNA = [ 56 | # spṛṣṭa 57 | set('kKgGNcCjJYwWqQRtTdDnpPbBmh'), 58 | # īṣatspṛṣṭa 59 | set('yrlv'), 60 | # śar 61 | set('Szs'), 62 | # vowels 63 | set('aAiIuUfFxeEoO'), 64 | ] 65 | 66 | #: This organizes sounds by their nasality. 67 | NASIKA = [ 68 | # nasal 69 | set('NYRnmM'), 70 | # non-nasal 71 | set('aAiIuUfFxeEoOkKgGcCjJwWQQtTdDpPbByrlvSzsh'), 72 | ] 73 | 74 | #: This organizes sounds by their "voice." 75 | GHOSA = [ 76 | # ghoṣavat (voiced) 77 | set('aAiIuUfFxXeEoOgGNjJYqQRdDnbBmyrlvh'), 78 | # aghoṣa (unvoiced) 79 | set('kKcCwWtTpPSzs'), 80 | ] 81 | 82 | #: This organizes sounds by their aspiration. 83 | PRANA = [ 84 | # mahāprāṇa (aspirated) 85 | set('KGCJWQTDPBh'), 86 | # alpaprāṇa (unaspirated) 87 | set('aAiIuUfFxXeEoOkgNcjYwqRtdnpbmyrlvSzs'), 88 | ] 89 | 90 | def __init__(self, value): 91 | self.value = value 92 | 93 | def asavarna(self, other): 94 | """Returns the sounds that are not savarna to this one. 95 | 96 | One subtle point here is that the 'savarna' and 'asavarna' are 97 | both undefined between consonants and vowels. 98 | 99 | :param other: 100 | """ 101 | ac = Pratyahara('ac') 102 | same_ac = self.value in ac and other in ac 103 | return same_ac and other not in self.savarna_set 104 | 105 | def closest(self, items): 106 | """Return the phonetically closest value. If no close value 107 | exists, return `self.value`. 108 | 109 | :param items: a list of letters 110 | """ 111 | best = self.value 112 | best_score = 0 113 | 114 | self_names = self.names() 115 | for x in items: 116 | score = len(Sound(x).names().intersection(self_names)) 117 | if score > best_score: 118 | best, best_score = x, score 119 | return best 120 | 121 | def names(self): 122 | """Get the various designations that apply to this sound. This 123 | is used to determine how similar two sounds are to each other. 124 | """ 125 | try: 126 | return self._names 127 | except AttributeError: 128 | pass 129 | 130 | self._names = set() 131 | categories = [self.ASYA, self.PRAYATNA, self.NASIKA, self.GHOSA, 132 | self.PRANA] 133 | for i, category in enumerate(categories): 134 | for j, group in enumerate(category): 135 | if self.value in group: 136 | self._names.add('%s_%s' % (i, j)) 137 | 138 | return self._names 139 | 140 | def savarna(self, other): 141 | """ 142 | 143 | :param other: some sound 144 | """ 145 | return other in self.savarna_set 146 | 147 | @property 148 | def savarna_set(self): 149 | """Return the sounds that are savarna to this one. The 'savarna' 150 | relation is defined by the following rules: 151 | 152 | 1.1.9 tulyAsyaprayatnaM savarNam 153 | 1.1.10 nAjjhalau 154 | """ 155 | s = self.value 156 | a = p = None 157 | 158 | for a in self.ASYA: 159 | if s in a: 160 | break 161 | for p in self.PRAYATNA: 162 | if s in p: 163 | break 164 | if a is None: 165 | a = p 166 | elif p is None: 167 | p = a 168 | 169 | results = a.intersection(p) 170 | is_ac = s in Pratyahara('ac') 171 | 172 | # 1.1.10 na ac-halau 173 | return set([x for x in results if (x in Pratyahara('ac')) == is_ac]) 174 | 175 | 176 | class SoundCollection(object): 177 | 178 | def __init__(self, *a, **kw): 179 | raise NotImplementedError 180 | 181 | def __contains__(self, item): 182 | """ 183 | :param item: some sound 184 | """ 185 | return item in self.values 186 | 187 | def __iter__(self): 188 | return iter(self.values) 189 | 190 | def __len__(self): 191 | return len(self.values) 192 | 193 | def __repr__(self): 194 | return "<%s('%s')>" % (self.__class__.__name__, self.name) 195 | 196 | 197 | @memoize 198 | class Sounds(SoundCollection): 199 | 200 | """A shorthand for grouping Sanskrit sounds. 201 | 202 | :param phrase: a group of designations 203 | """ 204 | 205 | def __init__(self, phrase): 206 | self.name = phrase 207 | if isinstance(phrase, basestring): 208 | items = phrase.split() 209 | else: 210 | items = phrase 211 | 212 | v = self.values = set() 213 | for item in items: 214 | 215 | first, last = (item[0], item[-1]) 216 | simple_vowel = len(item) == 1 and item in Pratyahara('ak') 217 | 218 | # 1.1.69 aNudit savarNasya cApratyayaH 219 | if last == 'u' or simple_vowel: 220 | v.update(Sound(first).savarna_set) 221 | # 1.1.70 taparas tatkAlasya 222 | elif last == 't': 223 | v.update([first]) 224 | # Generic letter 225 | elif len(item) == 1: 226 | v.update(item) 227 | # Pratyahara 228 | else: 229 | v.update(Pratyahara(item).values) 230 | 231 | 232 | @memoize 233 | class Pratyahara(SoundCollection): 234 | 235 | """A shorthand for grouping Sanskrit sounds. 236 | 237 | The various pratyaharas are defined in the Shiva Sutras, which 238 | precede the Ashtadhyayi proper. 239 | 240 | :param value: the pratyahara itself, e.g. 'hal', 'ak', 'Jal' 241 | :param second_R: ``True`` iff we should use the second 'R' as our 242 | boundary. Since pratyaharas formed with this letter 243 | are usually ambiguous, we have to be explicit here. 244 | """ 245 | 246 | rules = [ 247 | ('aAiIuU', 'R'), 248 | ('fFxX', 'k'), 249 | ('eo', 'N'), 250 | ('EO', 'c'), 251 | ('hyvr', 'w'), 252 | ('l', 'R'), 253 | ('YmNRn', 'm'), 254 | ('JB', 'Y'), 255 | ('GQD', 'z'), 256 | ('jbgqd', 'S'), 257 | ('KPCWTcwt', 'v'), 258 | ('kp', 'y'), 259 | ('Szs', 'r'), 260 | ('h', 'l'), 261 | ] 262 | 263 | def __init__(self, name, second_R=False): 264 | first = name[0] 265 | limit = name[-1] 266 | found_first = False 267 | 268 | self.name = name 269 | self.values = set([first]) 270 | 271 | for items, it in self.rules: 272 | if found_first: 273 | self.values.update(items) 274 | elif first in items: 275 | self.values.update(items.partition(first)[-1]) 276 | found_first = True 277 | if found_first and it == limit: 278 | if second_R: 279 | second_R = False 280 | else: 281 | break 282 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # vyakarana documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Nov 15 14:47:22 2013. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | sys.path.insert(0, os.path.abspath('..')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = ['sphinx.ext.autodoc'] 29 | 30 | # Add any paths that contain templates here, relative to this directory. 31 | templates_path = ['_templates'] 32 | 33 | # The suffix of source filenames. 34 | source_suffix = '.rst' 35 | 36 | # The encoding of source files. 37 | #source_encoding = 'utf-8-sig' 38 | 39 | # The master toctree document. 40 | master_doc = 'index' 41 | 42 | # General information about the project. 43 | project = u'vyakarana' 44 | copyright = u'2013, Arun Prasad' 45 | 46 | # The version info for the project you're documenting, acts as replacement for 47 | # |version| and |release|, also used in various other places throughout the 48 | # built documents. 49 | # 50 | # The short X.Y version. 51 | version = '0.1' 52 | # The full version, including alpha/beta/rc tags. 53 | release = '0.1' 54 | 55 | # The language for content autogenerated by Sphinx. Refer to documentation 56 | # for a list of supported languages. 57 | #language = None 58 | 59 | # There are two options for replacing |today|: either, you set today to some 60 | # non-false value, then it is used: 61 | #today = '' 62 | # Else, today_fmt is used as the format for a strftime call. 63 | #today_fmt = '%B %d, %Y' 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | exclude_patterns = ['_build'] 68 | 69 | # The reST default role (used for this markup: `text`) to use for all documents. 70 | #default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | #add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | #add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | #show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | #modindex_common_prefix = [] 88 | 89 | 90 | # -- Options for HTML output --------------------------------------------------- 91 | 92 | # The theme to use for HTML and HTML Help pages. See the documentation for 93 | # a list of builtin themes. 94 | html_theme = 'default' 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | #html_theme_options = {} 100 | 101 | # Add any paths that contain custom themes here, relative to this directory. 102 | #html_theme_path = [] 103 | 104 | # The name for this set of Sphinx documents. If None, it defaults to 105 | # " v documentation". 106 | #html_title = None 107 | 108 | # A shorter title for the navigation bar. Default is the same as html_title. 109 | #html_short_title = None 110 | 111 | # The name of an image file (relative to this directory) to place at the top 112 | # of the sidebar. 113 | #html_logo = None 114 | 115 | # The name of an image file (within the static path) to use as favicon of the 116 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 117 | # pixels large. 118 | #html_favicon = None 119 | 120 | # Add any paths that contain custom static files (such as style sheets) here, 121 | # relative to this directory. They are copied after the builtin static files, 122 | # so a file named "default.css" will overwrite the builtin "default.css". 123 | html_static_path = ['_static'] 124 | 125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 126 | # using the given strftime format. 127 | #html_last_updated_fmt = '%b %d, %Y' 128 | 129 | # If true, SmartyPants will be used to convert quotes and dashes to 130 | # typographically correct entities. 131 | #html_use_smartypants = True 132 | 133 | # Custom sidebar templates, maps document names to template names. 134 | #html_sidebars = {} 135 | 136 | # Additional templates that should be rendered to pages, maps page names to 137 | # template names. 138 | #html_additional_pages = {} 139 | 140 | # If false, no module index is generated. 141 | #html_domain_indices = True 142 | 143 | # If false, no index is generated. 144 | #html_use_index = True 145 | 146 | # If true, the index is split into individual pages for each letter. 147 | #html_split_index = False 148 | 149 | # If true, links to the reST sources are added to the pages. 150 | #html_show_sourcelink = True 151 | 152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 153 | #html_show_sphinx = True 154 | 155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 156 | #html_show_copyright = True 157 | 158 | # If true, an OpenSearch description file will be output, and all pages will 159 | # contain a tag referring to it. The value of this option must be the 160 | # base URL from which the finished HTML is served. 161 | #html_use_opensearch = '' 162 | 163 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 164 | #html_file_suffix = None 165 | 166 | # Output file base name for HTML help builder. 167 | htmlhelp_basename = 'vyakaranadoc' 168 | 169 | 170 | # -- Options for LaTeX output -------------------------------------------------- 171 | 172 | latex_elements = { 173 | # The paper size ('letterpaper' or 'a4paper'). 174 | #'papersize': 'letterpaper', 175 | 176 | # The font size ('10pt', '11pt' or '12pt'). 177 | #'pointsize': '10pt', 178 | 179 | # Additional stuff for the LaTeX preamble. 180 | #'preamble': '', 181 | } 182 | 183 | # Grouping the document tree into LaTeX files. List of tuples 184 | # (source start file, target name, title, author, documentclass [howto/manual]). 185 | latex_documents = [ 186 | ('index', 'vyakarana.tex', u'vyakarana Documentation', 187 | u'Arun Prasad', 'manual'), 188 | ] 189 | 190 | # The name of an image file (relative to this directory) to place at the top of 191 | # the title page. 192 | #latex_logo = None 193 | 194 | # For "manual" documents, if this is true, then toplevel headings are parts, 195 | # not chapters. 196 | #latex_use_parts = False 197 | 198 | # If true, show page references after internal links. 199 | #latex_show_pagerefs = False 200 | 201 | # If true, show URL addresses after external links. 202 | #latex_show_urls = False 203 | 204 | # Documents to append as an appendix to all manuals. 205 | #latex_appendices = [] 206 | 207 | # If false, no module index is generated. 208 | #latex_domain_indices = True 209 | 210 | 211 | # -- Options for manual page output -------------------------------------------- 212 | 213 | # One entry per manual page. List of tuples 214 | # (source start file, name, description, authors, manual section). 215 | man_pages = [ 216 | ('index', 'vyakarana', u'vyakarana Documentation', 217 | [u'Arun Prasad'], 1) 218 | ] 219 | 220 | # If true, show URL addresses after external links. 221 | #man_show_urls = False 222 | 223 | 224 | # -- Options for Texinfo output ------------------------------------------------ 225 | 226 | # Grouping the document tree into Texinfo files. List of tuples 227 | # (source start file, target name, title, author, 228 | # dir menu entry, description, category) 229 | texinfo_documents = [ 230 | ('index', 'vyakarana', u'vyakarana Documentation', 231 | u'Arun Prasad', 'vyakarana', 'One line description of project.', 232 | 'Miscellaneous'), 233 | ] 234 | 235 | # Documents to append as an appendix to all manuals. 236 | #texinfo_appendices = [] 237 | 238 | # If false, no module index is generated. 239 | #texinfo_domain_indices = True 240 | 241 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 242 | #texinfo_show_urls = 'footnote' 243 | -------------------------------------------------------------------------------- /test/filters.py: -------------------------------------------------------------------------------- 1 | import vyakarana.filters as F 2 | import vyakarana.lists as L 3 | from vyakarana.terms import Upadesha, Pratyaya 4 | from vyakarana.dhatupatha import DHATUPATHA as DP 5 | 6 | 7 | # Constructors 8 | # ~~~~~~~~~~~~ 9 | 10 | def test_init_with_kw(): 11 | f = F.Filter(category='category', name='name', body='body', 12 | domain='domain') 13 | 14 | assert f.category == 'category' 15 | assert f.name == 'name' 16 | assert f.body == 'body' 17 | assert f.domain == 'domain' 18 | 19 | 20 | def test_no_params(): 21 | def apples(state, index): 22 | return True 23 | 24 | f = F.Filter.no_params(apples) 25 | 26 | assert f.category == 'apples' 27 | assert f.name == 'apples' 28 | assert f.body is apples 29 | assert f.domain is None 30 | 31 | 32 | def verify(cases, filter_creator, tester): 33 | """Verify a filter function on some input. 34 | 35 | :param cases: a list of 3-tuples containing: 36 | - a list of arguments to `filter_creator` 37 | - a list of positive examples 38 | - a list of negative examples 39 | :param filter_creator: a function that takes a list of *args and 40 | returns a parameterized filter function. 41 | :param test: a function that takes a parameterized filter function 42 | and an example and returns a bool. 43 | """ 44 | for pattern, yes, no in cases: 45 | filt = filter_creator(*pattern) 46 | for y in yes.split(): 47 | assert tester(filt, y) 48 | for n in no.split(): 49 | assert not tester(filt, n) 50 | 51 | 52 | def term_tester(filt, data): 53 | term = Upadesha('a~').set_value(data) 54 | return filt.allows([term], 0) 55 | 56 | 57 | def pratyaya_tester(filt, data): 58 | return filt.allows([Pratyaya(data)], 0) 59 | 60 | 61 | def dhatu_tester(filt, data): 62 | return filt.allows([Upadesha.as_dhatu(data)], 0) 63 | 64 | 65 | # Ordinary filters 66 | # ~~~~~~~~~~~~~~~~ 67 | 68 | def test_adi(): 69 | cases = [ 70 | (['al'], 71 | 'indra agni vAyu jAne agnO Bagavat Atman lih dfS', 72 | '', 73 | ), 74 | (['ac'], 75 | 'indra agni agnO Atman', 76 | 'vAyu jAne Bagavat lih dfS', 77 | ), 78 | (['hal'], 79 | 'vAyu jAne Bagavat lih dfS', 80 | 'indra agni agnO Atman', 81 | ), 82 | (['ec'], 83 | 'eDa ozaDi EkzvAka Oqulomi', 84 | 'indra agni vAyu jAne agnO Bagavat Atman lih dfS', 85 | ), 86 | (['Sar'], 87 | 'SItala za sarpa', 88 | 'indra agni vAyu jAne agnO Bagavat Atman lih dfS', 89 | ), 90 | ] 91 | verify(cases, F.adi, term_tester) 92 | 93 | 94 | def test_al(): 95 | cases = [ 96 | (['al'], 97 | 'indra agni vAyu jAne agnO Bagavat Atman lih dfS', 98 | '', 99 | ), 100 | (['ac'], 101 | 'indra agni vAyu jAne agnO', 102 | 'Bagavat Atman lih dfS', 103 | ), 104 | (['hal'], 105 | 'Bagavat Atman lih dfS', 106 | 'indra agni vAyu jAne agnO', 107 | ), 108 | (['ec'], 109 | 'jAne agnO', 110 | 'indra agni vAyu Bagavat Atman lih dfS', 111 | ), 112 | (['Sar'], 113 | 'dfS', 114 | 'indra agni vAyu jAne agnO Bagavat Atman lih', 115 | ), 116 | ] 117 | verify(cases, F.al, term_tester) 118 | 119 | 120 | def test_ekac(): 121 | yes = 'sa nI BU yo vE ad or Ow rah grah' 122 | no = 'nara siMha Sarad jAgf' 123 | 124 | for y in yes.split(): 125 | assert term_tester(F.ekac, y) 126 | for n in no.split(): 127 | assert not term_tester(F.ekac, n) 128 | 129 | 130 | def test_gana(): 131 | pairs = [ 132 | ('BU', 'wvo~Svi'), 133 | ('a\\da~', 'hnu\N'), 134 | ('hu\\', 'gA\\'), 135 | ('divu~', 'gfDu~'), 136 | ('zu\Y', 'kzI'), 137 | ('tu\da~^', 'piSa~'), 138 | ('ru\Di~^r', 'pfcI~'), 139 | ('tanu~^', 'qukf\Y'), 140 | ('qukrI\Y', 'graha~^'), 141 | ('cura~', 'tutTa~'), 142 | ] 143 | cases = [] 144 | for i, pair in enumerate(pairs): 145 | first, last = pair 146 | yes = ' '.join(pair) 147 | no = ' '.join(' '.join(p) for j, p in enumerate(pairs) if i != j) 148 | cases.append(([first], yes, no)) 149 | 150 | verify(cases, F.gana, dhatu_tester) 151 | 152 | 153 | def test_it_samjna(): 154 | cases = [ 155 | (['kit', 'Nit'], 156 | 'kta ktvA iyaN uvaN kvasu~', 157 | 'GaY ap yat anIyar', 158 | ), 159 | (['Rit'], 160 | 'Ral Rvul', 161 | 'tip lyuw', 162 | ), 163 | ] 164 | verify(cases, F.samjna, pratyaya_tester) 165 | 166 | 167 | def test_raw(): 168 | cases = [ 169 | (['jYA\\', 'janI~\\'], 170 | 'jYA\\ janI~\\', 171 | 'gamx~ SF dF pF jYA janI janI~', 172 | ) 173 | ] 174 | verify(cases, F.raw, dhatu_tester) 175 | 176 | 177 | def test_samjna(): 178 | cases = [ 179 | (['anga'], 180 | 'nara grAma vIra', 181 | 'nara grAma vIra', 182 | ) 183 | ] 184 | 185 | for pattern, yes, no in cases: 186 | f = F.samjna(*pattern) 187 | for y in yes.split(): 188 | assert f.allows([Upadesha.as_anga(y)], 0) 189 | for n in no.split(): 190 | assert not f.allows([Pratyaya(n)], 0) 191 | 192 | 193 | def test_upadha(): 194 | cases = [ 195 | (['Yam'], 196 | 'banD granT stamB pAna', 197 | 'granTa nara narAn', 198 | ), 199 | (['at'], 200 | 'vac svap yaj', 201 | 'granT nI paca', 202 | ) 203 | ] 204 | verify(cases, F.upadha, term_tester) 205 | 206 | 207 | def test_value(): 208 | cases = [ 209 | (['jYA', 'jan'], 210 | 'jYA\\ janI~\\', 211 | 'gamx~ SF dF pF', 212 | ) 213 | ] 214 | verify(cases, F.value, dhatu_tester) 215 | 216 | 217 | # Filter combination 218 | # ~~~~~~~~~~~~~~~~~~ 219 | 220 | def test_and_(): 221 | cases = [ 222 | (['Yam'], 223 | 'banD granT stamB', 224 | 'car pAna granTa nara nayati', 225 | ) 226 | ] 227 | verify(cases, lambda names: F.upadha(names) & F.al('hal'), 228 | term_tester) 229 | 230 | 231 | def test_or_(): 232 | cases = [ 233 | (['Yam'], 234 | 'banD granT stamB pAna car', 235 | 'granTa nara nayati', 236 | ) 237 | ] 238 | verify(cases, lambda names: F.upadha(names) | F.al('hal'), 239 | term_tester) 240 | 241 | 242 | def test_not_(): 243 | cases = [ 244 | (['al'], 245 | '', 246 | 'indra agni vAyu jAne agnO Bagavat Atman lih dfS', 247 | ), 248 | (['ac'], 249 | 'Bagavat Atman lih dfS', 250 | 'indra agni vAyu jAne agnO', 251 | ), 252 | (['hal'], 253 | 'indra agni vAyu jAne agnO', 254 | 'Bagavat Atman lih dfS', 255 | ), 256 | (['ec'], 257 | 'indra agni vAyu Bagavat Atman lih dfS', 258 | 'jAne agnO', 259 | ), 260 | (['Sar'], 261 | 'indra agni vAyu jAne agnO Bagavat Atman lih', 262 | 'dfS', 263 | ), 264 | ] 265 | verify(cases, lambda x: ~F.al(x), term_tester) 266 | 267 | 268 | # Filter operators 269 | # ~~~~~~~~~~~~~~~~ 270 | 271 | def test_equality(): 272 | pairs = [ 273 | (L.IT, F.samjna), 274 | (L.LA, F.lakshana), 275 | (L.SAMJNA, F.samjna), 276 | (L.SOUNDS, F.al), 277 | (L.TIN, F.raw), 278 | ] 279 | for items, function in pairs: 280 | for item in items: 281 | assert function(item) == function(item) 282 | 283 | 284 | # 'auto' filter 285 | # ~~~~~~~~~~~~~ 286 | 287 | def test_auto_on_lists(): 288 | pairs = [ 289 | (L.IT, F.samjna), 290 | (L.LA, F.lakshana), 291 | (L.SAMJNA, F.samjna), 292 | (L.SOUNDS, F.al), 293 | (L.TIN, F.raw), 294 | ] 295 | 296 | for items, function in pairs: 297 | for item in items: 298 | assert F.auto(item) == function(item) 299 | 300 | 301 | def test_auto_on_dhatu(): 302 | for item in DP.all_dhatu: 303 | # Ambiguity with F.al('f') 304 | if item == 'f': 305 | continue 306 | assert F.auto(item) == F.dhatu(item) 307 | 308 | 309 | # Filter relationships 310 | # ~~~~~~~~~~~~~~~~~~~~ 311 | 312 | def test_subset_of_and_or(): 313 | """Ordinary subset ("and", "or")""" 314 | cases = [ 315 | [F.al('ac'), F.samjna('dhatu'), F.upadha('Yam')] 316 | ] 317 | for filters in cases: 318 | intersection = F.Filter._and(*filters) 319 | for f in filters: 320 | assert intersection.subset_of(f) 321 | 322 | union = F.Filter._or(*filters) 323 | for f in filters: 324 | assert f.subset_of(union) 325 | 326 | 327 | def test_subset_of_inference(): 328 | """Inferential subset""" 329 | bhu = F.dhatu('BU') 330 | dhatu = F.samjna('dhatu') 331 | assert bhu.subset_of(dhatu) 332 | 333 | 334 | def test_subset_of_domain(): 335 | """Subset with different domains.""" 336 | ak = F.auto('ak') 337 | ac = F.auto('ac') 338 | assert ak.subset_of(ac) 339 | assert not ac.subset_of(ak) 340 | 341 | dhatu = F.auto('dhatu') 342 | anga = F.auto('dhatu', 'anga') 343 | assert dhatu.subset_of(anga) 344 | assert not anga.subset_of(dhatu) 345 | 346 | 347 | def test_subset_of_combined(): 348 | """Combined subset (ordinary, inferential, domain)""" 349 | f = F.auto 350 | 351 | # Examples from 6.4.77 and 6.4.88 352 | snu_dhatu_bhru = f('Snu', 'dhatu', 'BrU') 353 | bhu = f('BU') 354 | assert bhu.subset_of(snu_dhatu_bhru) 355 | assert not snu_dhatu_bhru.subset_of(bhu) 356 | 357 | snu_dhatu_bhru_yv = snu_dhatu_bhru & F.al('i u') 358 | assert bhu.subset_of(snu_dhatu_bhru_yv) 359 | assert not snu_dhatu_bhru_yv.subset_of(bhu) 360 | -------------------------------------------------------------------------------- /vyakarana/terms.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.upadesha 4 | ~~~~~~~~~~~~~~~~~~ 5 | 6 | Classes for working with an upadeśa. 7 | 8 | :license: MIT and BSD 9 | """ 10 | 11 | import re 12 | from collections import namedtuple 13 | 14 | from sounds import Sounds 15 | 16 | 17 | _DataSpace = namedtuple('_DataSpace', 18 | ['raw', 'clean', 'value', 'asiddhavat', 'asiddha']) 19 | 20 | 21 | class DataSpace(_DataSpace): 22 | 23 | def replace(self, **kw): 24 | prev = None 25 | new = dict() 26 | for field in self._fields: 27 | if field in kw: 28 | new[field] = prev = kw[field] 29 | elif prev is not None: 30 | new[field] = prev 31 | return self._replace(**new) 32 | 33 | 34 | class Upadesha(object): 35 | 36 | """A term with indicatory letters.""" 37 | 38 | __slots__ = ['data', 'samjna', 'lakshana', 'ops', 'parts', '_filter_cache'] 39 | nasal_re = re.compile('([aAiIuUfFxeEoO])~') 40 | 41 | def __init__(self, raw=None, **kw): 42 | # Initialized with new raw value: parse off its 'it' letters. 43 | if raw: 44 | clean, it_samjna = self._parse_it(raw, **kw) 45 | data = DataSpace(raw, clean, clean, clean, clean) 46 | samjna = it_samjna 47 | else: 48 | data = samjna = None 49 | 50 | #: The term`s data space. A given term is represented in a 51 | #: variety of ways, depending on the circumstance. For example, 52 | #: a rule might match based on a specific upadeśa (including 53 | #: 'it' letters) in one context and might match on a term's 54 | #: final sound (excluding 'it' letters) in another. 55 | self.data = kw.pop('data', data) 56 | 57 | #: The set of markers that apply to this term. Although the 58 | #: Ashtadhyayi distinguishes between samjna and *it* tags, 59 | #: the program merges them together. Thus this set might 60 | #: contain both ``'kit'`` and ``'pratyaya'``. 61 | self.samjna = kw.pop('samjna', samjna) 62 | 63 | #: The set of values that this term used to have. Technically, 64 | #: only pratyaya need to have access to this information. 65 | self.lakshana = kw.pop('lakshana', frozenset()) 66 | 67 | #: The set of rules that have been applied to this term. This 68 | #: set is maintained for two reasons. First, it prevents us 69 | #: from redundantly applying certain rules. Second, it supports 70 | #: painless rule blocking in other parts of the grammar. 71 | self.ops = kw.pop('ops', frozenset()) 72 | 73 | #: The various augments that have been added to this term. Some 74 | #: examples: 75 | #: 76 | #: - ``'aw'`` (verb prefix for past forms) 77 | #: - ``'iw'`` ('it' augment on suffixes) 78 | #: - ``'vu~k'`` ('v' for 'BU' in certain forms) 79 | self.parts = kw.pop('parts', frozenset()) 80 | 81 | self._filter_cache = {} 82 | 83 | def __eq__(self, other): 84 | if self is other: 85 | return True 86 | if other is None: 87 | return False 88 | return (self.__class__ == other.__class__ and 89 | self.data == other.data and 90 | self.samjna == other.samjna and 91 | self.lakshana == other.lakshana and 92 | self.ops == other.ops and 93 | self.parts == other.parts) 94 | 95 | def __ne__(self, other): 96 | return not self == other 97 | 98 | def __repr__(self): 99 | return "<%s('%s')>" % (self.__class__.__name__, self.value) 100 | 101 | def copy(self, **kw): 102 | for x in ['data', 'samjna', 'lakshana', 'ops', 'parts']: 103 | if x not in kw: 104 | kw[x] = getattr(self, x) 105 | 106 | return self.__class__(**kw) 107 | 108 | @staticmethod 109 | def as_anga(*a, **kw): 110 | """Create the upadesha then mark it as an ``'anga'``.""" 111 | return Upadesha(*a, **kw).add_samjna('anga') 112 | 113 | @staticmethod 114 | def as_dhatu(*a, **kw): 115 | """Create the upadesha then mark it as a ``'dhatu'``.""" 116 | return Upadesha(*a, **kw).add_samjna('anga', 'dhatu') 117 | 118 | @property 119 | def adi(self, locus='value'): 120 | """The term's first sound, or ``None`` if there isn't one.""" 121 | try: 122 | return getattr(self.data, locus)[0] 123 | except IndexError: 124 | return None 125 | 126 | @property 127 | def antya(self, locus='value'): 128 | """The term's last sound, or ``None`` if there isn't one.""" 129 | try: 130 | return getattr(self.data, locus)[-1] 131 | except IndexError: 132 | return None 133 | 134 | @property 135 | def asiddha(self): 136 | """The term's value in the asiddha space.""" 137 | return self.data.asiddha 138 | 139 | @property 140 | def asiddhavat(self): 141 | """The term's value in the asiddhavat space.""" 142 | return self.data.asiddhavat 143 | 144 | @property 145 | def clean(self): 146 | """The term's value without svaras and anubandhas.""" 147 | return self.data.clean 148 | 149 | @property 150 | def raw(self): 151 | """The term's raw value.""" 152 | return self.data.raw 153 | 154 | @property 155 | def upadha(self, locus='value'): 156 | """The term's penultimate sound, or ``None`` if there isn't one.""" 157 | try: 158 | return getattr(self.data, locus)[-2] 159 | except IndexError: 160 | return None 161 | 162 | @property 163 | def value(self): 164 | """The term's value in the siddha space.""" 165 | return self.data.value 166 | 167 | def _parse_it(self, raw, **kw): 168 | pratyaya = kw.pop('pratyaya', False) 169 | vibhakti = kw.pop('vibhakti', False) 170 | taddhita = kw.pop('taddhita', False) 171 | 172 | it = set() 173 | samjna = set() 174 | 175 | # svara 176 | for i, L in enumerate(raw): 177 | if L in ('\\', '^'): 178 | # anudattet and svaritet 179 | if raw[i - 1] == '~': 180 | if L == '\\': 181 | samjna.add('anudattet') 182 | else: 183 | samjna.add('svaritet') 184 | # anudatta and svarita 185 | else: 186 | if L == '\\': 187 | samjna.add('anudatta') 188 | else: 189 | samjna.add('svarita') 190 | 191 | clean = re.sub('[\\\\^]', '', raw) 192 | keep = [True] * len(clean) 193 | 194 | # ir 195 | if clean.endswith('i~r'): 196 | it.add('ir') 197 | keep[-3:] = [True, True, True] 198 | 199 | # 1.3.2 "upadeśe 'janunāsika iṭ" 200 | for i, L in enumerate(clean): 201 | if L == '~': 202 | it.add(clean[i - 1] + 'd') 203 | keep[i - 1] = False 204 | keep[i] = False 205 | 206 | # 1.3.3. hal antyam 207 | antya = clean[-1] 208 | if antya in Sounds('hal'): 209 | # 1.3.4 "na vibhaktau tusmāḥ" 210 | if vibhakti and antya in Sounds('tu s m'): 211 | pass 212 | else: 213 | it.add(antya) 214 | keep[-1] = False 215 | 216 | # 1.3.5 ādir ñituḍavaḥ 217 | try: 218 | two_letter = clean[:2] 219 | if two_letter in ('Yi', 'wu', 'wv', 'qu'): 220 | keep[0] = keep[1] = False 221 | if two_letter.endswith('u'): 222 | samjna.add(clean[0] + 'vit') 223 | else: 224 | samjna.add(clean[0] + 'It') 225 | except IndexError: 226 | pass 227 | 228 | # 1.3.6 "ṣaḥ pratyayasya" 229 | # 1.3.7 "cuṭū" 230 | # 231 | # It is interesting to note that no examples involving the 232 | # initial ch, jh, Th, and Dh of an affix were provided. This 233 | # omission is significant since affix initials ch, jh, Th, 234 | # and Dh always are replaced by Iy (7.1.2 AyaneyI...) ant 235 | # (7.1.3 jho 'ntaH), ik (7.3.50 ThasyekaH), and ey (7.1.2) 236 | # respectively. Thus the question of treating each of these 237 | # as an it does not arise. 238 | # 239 | # Rama Nath Sharma 240 | # The Ashtadhyayi of Panini Vol. II 241 | # Notes on 1.3.7 (p. 145) 242 | adi = clean[0] 243 | if pratyaya: 244 | # no C, J, W, Q by note above. 245 | if raw[0] in 'zcjYwqR': 246 | it.add(adi) 247 | keep[0] = False 248 | 249 | # 1.3.8 "laśakv ataddhite" 250 | if not taddhita: 251 | if adi in Sounds('l S ku'): 252 | it.add(adi) 253 | keep[0] = False 254 | 255 | # 1.3.9 tasya lopaḥ 256 | clean = ''.join(L for i, L in enumerate(clean) if keep[i]) 257 | samjna = samjna.union([x + 'it' for x in it]) 258 | return clean, samjna 259 | 260 | def add_lakshana(self, *names): 261 | """ 262 | 263 | :param names: the lakshana to add 264 | """ 265 | return self.copy(lakshana=self.lakshana.union(names)) 266 | 267 | def add_op(self, *names): 268 | """ 269 | 270 | :param names: the ops to add 271 | """ 272 | return self.copy(ops=self.ops.union(names)) 273 | 274 | def add_part(self, *names): 275 | """ 276 | 277 | :param names: the parts to add 278 | """ 279 | return self.copy(parts=self.parts.union(names)) 280 | 281 | def add_samjna(self, *names): 282 | """ 283 | 284 | :param names: the samjna to add 285 | """ 286 | return self.copy(samjna=self.samjna.union(names)) 287 | 288 | def any_samjna(self, *names): 289 | """ 290 | 291 | :param names: 292 | """ 293 | return any(a in self.samjna for a in names) 294 | 295 | def get_at(self, locus): 296 | """ 297 | 298 | :param locus: 299 | """ 300 | return getattr(self.data, locus) 301 | 302 | def remove_samjna(self, *names): 303 | """ 304 | 305 | :param names: the samjna to remove 306 | """ 307 | return self.copy(samjna=self.samjna.difference(names)) 308 | 309 | def set_asiddha(self, asiddha): 310 | """ 311 | 312 | :param asiddha: the new asiddha value 313 | """ 314 | return self.copy(data=self.data.replace(asiddha=asiddha)) 315 | 316 | def set_asiddhavat(self, asiddhavat): 317 | """ 318 | 319 | :param asiddhavat: the new asiddhavat value 320 | """ 321 | return self.copy(data=self.data.replace(asiddhavat=asiddhavat)) 322 | 323 | def set_at(self, locus, value): 324 | """ 325 | 326 | :param locus: 327 | :param value: 328 | """ 329 | funcs = { 330 | 'raw': self.set_raw, 331 | 'value': self.set_value, 332 | 'asiddhavat': self.set_asiddhavat, 333 | 'asiddha': self.set_asiddha 334 | } 335 | try: 336 | return funcs[locus](value) 337 | except KeyError: 338 | raise NotImplementedError 339 | 340 | def set_raw(self, raw): 341 | """ 342 | 343 | :param raw: the new raw value 344 | """ 345 | clean, it_samjna = self._parse_it(raw) 346 | samjna = self.samjna | it_samjna 347 | return self.copy( 348 | data=self.data.replace(raw=raw, clean=clean), 349 | samjna=samjna, 350 | lakshana=self.lakshana | set([self.raw]) 351 | ) 352 | 353 | def set_value(self, value): 354 | """ 355 | 356 | :param value: the new value 357 | """ 358 | return self.copy(data=self.data.replace(value=value)) 359 | 360 | 361 | class Pratyaya(Upadesha): 362 | 363 | __slots__ = () 364 | 365 | def __init__(self, *a, **kw): 366 | Upadesha.__init__(self, *a, **kw) 367 | self.samjna |= set(['pratyaya']) 368 | 369 | # 1.1.__ pratyayasya lukzlulupaH 370 | if self.value in ('lu~k', 'Slu~', 'lu~p'): 371 | self.data = self.data.replace(raw=self.value, clean='') 372 | 373 | def _parse_it(self, value): 374 | return Upadesha._parse_it(self, value, pratyaya=True) 375 | 376 | 377 | class Krt(Pratyaya): 378 | 379 | __slots__ = () 380 | 381 | def __init__(self, *a, **kw): 382 | Pratyaya.__init__(self, *a, **kw) 383 | self.samjna |= set(['krt']) 384 | 385 | # 3.4.113 tiGzit sArvadhAtukam 386 | # 3.4.115 liT ca (ArdhadhAtukam) 387 | if 'Sit' in self.samjna and self.raw != 'li~w': 388 | self.samjna |= set(['sarvadhatuka']) 389 | else: 390 | self.samjna |= set(['ardhadhatuka']) 391 | 392 | 393 | class Vibhakti(Pratyaya): 394 | 395 | __slots__ = () 396 | 397 | def __init__(self, *a, **kw): 398 | Pratyaya.__init__(self, *a, **kw) 399 | self.samjna |= set(['vibhakti']) 400 | 401 | def _parse_it(self, value): 402 | return Upadesha._parse_it(self, value, pratyaya=True, vibhakti=True) 403 | -------------------------------------------------------------------------------- /vyakarana/operators.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | vyakarana.operators 4 | ~~~~~~~~~~~~~~~~~~~ 5 | 6 | Excluding paribhāṣā, all rules in the Ashtadhyayi describe a context 7 | then specify an operation to apply based on that context. Within 8 | this simulator, operations are defined using *operators*, which 9 | take some (state, index) pair and return a new state. 10 | 11 | This module defines a variety of parameterized and unparameterized 12 | operators. 13 | 14 | :license: MIT and BSD 15 | """ 16 | 17 | from sounds import Sound, Sounds 18 | 19 | conflicts = [ 20 | ('dirgha', 'hrasva'), 21 | ('insert', ), 22 | ('replace', ), 23 | ('add_samjna', ), 24 | ('ti', 'tasya'), 25 | ] 26 | 27 | 28 | class Operator(object): 29 | 30 | """A callable class that returns states.""" 31 | 32 | def __init__(self, *args, **kw): 33 | #: The operator type. For example, a substitution operator has 34 | #: category ``tasya``. 35 | self.category = self._make_category(*args, **kw) 36 | 37 | #: A unique name for this operator. If the operator is not 38 | #: parameterized, then this is the same as `self.category`. 39 | self.name = self._make_name(*args, **kw) 40 | 41 | #: The function that corresponds to this operator. The input 42 | #: and output of the function depend on the operator class. For 43 | #: a general :class:`Operator`, this function accepts a state 44 | #: and index and returns a new state. 45 | self.body = self._make_body(*args, **kw) 46 | 47 | #: the operator's parameters, if any. 48 | self.params = self._make_params(*args, **kw) 49 | 50 | def apply(self, state, index, locus='value'): 51 | return self.body(state, index, locus) 52 | 53 | def __eq__(self, other): 54 | """Equality operator. 55 | 56 | Two operators are the same if they perform the same operation. 57 | 58 | :param other: the other :class:`Operator` 59 | """ 60 | if self is other: 61 | return True 62 | if other is None: 63 | return False 64 | return self.name == other.name and self.params == other.params 65 | 66 | def __ne__(self, other): 67 | return not self.__eq__(other) 68 | 69 | def __repr__(self): 70 | return '' % self.name 71 | 72 | def _make_body(self, *args, **kw): 73 | try: 74 | # Parameterized: defined in class 75 | return self.body 76 | except AttributeError: 77 | # Unparameterized: passed by kwarg 78 | return kw.get('body') 79 | 80 | def _make_category(self, *args, **kw): 81 | return kw.get('category') or kw.get('name') 82 | 83 | def _make_name(self, *args, **kw): 84 | result = kw.get('name') 85 | if result: 86 | return result 87 | else: 88 | category = self._make_category(*args, **kw) 89 | try: 90 | return '%s(%s)' % (category, ', '.join(args)) 91 | except TypeError: 92 | return '%s(...)' % category 93 | 94 | def _make_params(self, *args, **kw): 95 | return kw.get('params') or args or None 96 | 97 | @classmethod 98 | def parameterized(cls, fn): 99 | """Decorator constructor for parameterized operators. 100 | 101 | :param fn: a function factory. It accepts parameters and returns 102 | a parameterized operator function. 103 | """ 104 | def wrapped(*args, **kw): 105 | category = fn.__name__ 106 | try: 107 | name = '%s(%s)' % (category, ', '.join(args)) 108 | except TypeError: 109 | name = '%s(...)' % category 110 | body = fn(*args, **kw) 111 | return cls(name=name, body=body, category=category, params=args) 112 | return wrapped 113 | 114 | @classmethod 115 | def no_params(cls, fn): 116 | """Decorator constructor for unparameterized operators. 117 | 118 | :param fn: some operator function 119 | """ 120 | name = fn.__name__ 121 | return cls(name=name, body=fn, category=name) 122 | 123 | def conflicts_with(self, other): 124 | """ 125 | Return whether this operator conflicts with another. 126 | 127 | Two operators are in conflict if any of the following hold: 128 | 129 | - they each insert something into the state 130 | - one prevents or nullifies the change caused by the other. By 131 | "nullify" I mean that the result is as if neither operator 132 | was applied. 133 | 134 | For example, two `insert` operators are always in conflict. And 135 | `hrasva` and `dirgha` are in conflict, since `hrasva` undoes 136 | `dirgha`. But `hrasva` and `guna` are not in conflict, since 137 | neither blocks or nullifies the other. 138 | 139 | :param other: an operator 140 | """ 141 | for c in conflicts: 142 | if self.category in c and other.category in c: 143 | return True 144 | return False 145 | 146 | 147 | class DataOperator(Operator): 148 | 149 | """An operator whose `body` modifies a term's data. 150 | 151 | `body` accepts and returns a single string. 152 | """ 153 | 154 | def apply(self, state, index, locus='value'): 155 | cur = state[index] 156 | _input = cur.value 157 | if not _input: 158 | return state 159 | output = self.body(_input) 160 | if output != _input: 161 | return state.swap(index, cur.set_at(locus, output)) 162 | else: 163 | return state 164 | 165 | 166 | # Parameterized operators 167 | # ~~~~~~~~~~~~~~~~~~~~~~~ 168 | # Each function accepts arbitrary arguments and returns a valid operator. 169 | 170 | @Operator.parameterized 171 | def add_samjna(*names): 172 | def func(state, index, locus=None): 173 | cur = state[index] 174 | return state.swap(index, cur.add_samjna(*names)) 175 | return func 176 | 177 | 178 | def adi(result): 179 | return tasya(result, adi=True) 180 | 181 | 182 | @DataOperator.parameterized 183 | def al_tasya(target, result): 184 | target = Sounds(target) 185 | result = Sounds(result) 186 | 187 | def func(value): 188 | letters = list(value) 189 | for i, L in enumerate(letters): 190 | if L in target: 191 | letters[i] = Sound(L).closest(result) 192 | # 1.1.51 ur aṇ raparaḥ 193 | if L in 'fF' and letters[i] in Sounds('aR'): 194 | letters[i] += 'r' 195 | break 196 | return ''.join(letters) 197 | return func 198 | 199 | 200 | @Operator.parameterized 201 | def insert(term): 202 | def func(state, index, *a): 203 | return state.insert(index, term) 204 | return func 205 | 206 | 207 | @DataOperator.parameterized 208 | def replace(target, result): 209 | def func(value): 210 | return value.replace(target, result) 211 | return func 212 | 213 | 214 | @Operator.parameterized 215 | def tasya(sthani, adi=False): 216 | def func(state, index, locus): 217 | term = state[index] 218 | term_value = term.get_at(locus) 219 | new_value = None 220 | add_part = False 221 | 222 | # 1.1.54 ādeḥ parasya 223 | if adi: 224 | try: 225 | new_value = sthani.value + term_value[1:] 226 | except AttributeError: 227 | new_value = sthani + term_value[1:] 228 | 229 | elif isinstance(sthani, basestring): 230 | # 1.1.52 alo 'ntyasya 231 | # 1.1.55 anekālśit sarvasya 232 | if len(sthani) <= 1: 233 | new_value = term_value[:-1] + sthani 234 | else: 235 | new_value = sthani 236 | 237 | elif not hasattr(sthani, 'value'): 238 | # 1.1.50 sthāne 'ntaratamaḥ 239 | last = Sound(term.antya).closest(sthani) 240 | new_value = term_value[:-1] + last 241 | 242 | # 1.1.47 mid aco 'ntyāt paraḥ 243 | elif 'mit' in sthani.samjna: 244 | ac = Sounds('ac') 245 | for i, L in enumerate(reversed(term_value)): 246 | if L in ac: 247 | break 248 | new_value = term_value[:-i] + sthani.value + term_value[-i:] 249 | add_part = True 250 | 251 | # 1.1.46 ādyantau ṭakitau 252 | elif 'kit' in sthani.samjna: 253 | new_value = term_value + sthani.value 254 | add_part = True 255 | elif 'wit' in sthani.samjna: 256 | new_value = sthani.value + term_value 257 | add_part = True 258 | 259 | # 1.1.52 alo 'ntyasya 260 | # 1.1.53 ṅic ca 261 | elif len(sthani.value) == 1 or 'Nit' in sthani.samjna: 262 | new_value = term_value[:-1] + sthani.value 263 | 264 | # 1.1.55 anekālśit sarvasya 265 | elif 'S' in sthani.it or len(sthani.value) > 1: 266 | new_value = sthani.value 267 | 268 | if new_value is not None: 269 | new_term = term.set_at(locus, new_value) 270 | if add_part: 271 | new_term = new_term.add_part(sthani.raw) 272 | return state.swap(index, new_term) 273 | 274 | raise NotImplementedError(sthani) 275 | 276 | return func 277 | 278 | 279 | @DataOperator.parameterized 280 | def ti(result): 281 | """Create an operator that replaces the *ṭi* of some value. 282 | 283 | 1.1.64 aco 'ntyādi ṭi 284 | The portion starting with the last vowel is called *ṭi*. 285 | 286 | :param result: the replacement 287 | """ 288 | ac = Sounds('ac') 289 | 290 | def func(value): 291 | for i, L in enumerate(reversed(value)): 292 | if L in ac: 293 | break 294 | return value[:-(i + 1)] + result 295 | 296 | return func 297 | 298 | 299 | @DataOperator.parameterized 300 | def upadha(result): 301 | """Create an operator that replaces the *upadhā* of some value. 302 | 303 | 1.1.65 alo 'ntyāt pūrva upadhā 304 | The letter before the last is called *upadhā*. 305 | 306 | :param result: the replacement 307 | """ 308 | def func(value): 309 | try: 310 | return value[:-2] + result + value[-1] 311 | except IndexError: 312 | return value 313 | 314 | return func 315 | 316 | 317 | @Operator.parameterized 318 | def yathasamkhya(targets, results): 319 | converter = dict(zip(targets, results)) 320 | 321 | def func(state, index, locus): 322 | cur = state[index] 323 | cur = cur.set_raw(converter[cur.raw]) 324 | return state.swap(index, cur) 325 | return func 326 | 327 | 328 | # Unparameterized operators 329 | # ~~~~~~~~~~~~~~~~~~~~~~~~~ 330 | # Each function defines an operator. 331 | 332 | @DataOperator.no_params 333 | def dirgha(value): 334 | converter = dict(zip('aiufx', 'AIUFX')) 335 | letters = list(value) 336 | for i, L in enumerate(letters): 337 | if L in converter: 338 | letters[i] = converter[L] 339 | break 340 | 341 | return ''.join(letters) 342 | 343 | 344 | @Operator.no_params 345 | def guna(state, index, locus=None): 346 | cur = state[index] 347 | try: 348 | right = state[index + 1] 349 | except (IndexError, TypeError): 350 | right = None 351 | 352 | # 1.1.5 kGiti ca (na) 353 | if right is not None and right.any_samjna('kit', 'Nit'): 354 | return state 355 | 356 | # 1.1.2 adeG guNaH 357 | # 1.1.3 iko guNavRddhI 358 | converter = dict(zip('iIuUfFxX', 'eeooaaaa')) 359 | letters = list(cur.value) 360 | for i, L in enumerate(letters): 361 | if L in converter: 362 | letters[i] = converter[L] 363 | if L in 'fF': 364 | letters[i] += 'r' 365 | break 366 | 367 | cur = cur.set_value(''.join(letters)).add_samjna('guna') 368 | return state.swap(index, cur) 369 | 370 | 371 | @DataOperator.no_params 372 | def hrasva(value): 373 | converter = dict(zip('AIUFXeEoO', 'aiufxiiuu')) 374 | letters = list(value) 375 | for i, L in enumerate(letters): 376 | if L in converter: 377 | letters[i] = converter[L] 378 | break 379 | 380 | return ''.join(letters) 381 | 382 | 383 | @DataOperator.no_params 384 | def samprasarana(value): 385 | rev_letters = list(reversed(value)) 386 | found = False 387 | for i, L in enumerate(rev_letters): 388 | # 1.1.45 ig yaNaH saMprasAraNAm 389 | # TODO: enforce short vowels automatically 390 | if L in Sounds('yaR'): 391 | rev_letters[i] = Sound(L).closest('ifxu') 392 | found = True 393 | break 394 | 395 | if not found: 396 | return value 397 | 398 | # 6.4.108 saMprasAraNAc ca 399 | try: 400 | L = rev_letters[i - 1] 401 | if L in Sounds('ac'): 402 | rev_letters[i - 1] = '' 403 | except IndexError: 404 | pass 405 | 406 | return ''.join(reversed(rev_letters)) 407 | 408 | 409 | @Operator.no_params 410 | def vrddhi(state, index, locus=None): 411 | cur = state[index] 412 | try: 413 | right = state[index + 1] 414 | except (IndexError, TypeError): 415 | right = None 416 | 417 | # 1.1.5 kGiti ca (na) 418 | if right and right.any_samjna('kit', 'Nit'): 419 | cur = cur 420 | return state.swap(index, cur) 421 | 422 | # 1.1.1 vRddhir Adaic 423 | # 1.1.3 iko guNavRddhI 424 | converter = dict(zip('iIuUfFxX', 'EEOOAAAA')) 425 | letters = list(cur.value) 426 | for i, L in enumerate(letters): 427 | if L in converter: 428 | letters[i] = converter[L] 429 | if L in 'fF': 430 | letters[i] += 'r' 431 | break 432 | 433 | cur = cur.set_value(''.join(letters)) 434 | return state.swap(index, cur) 435 | 436 | 437 | @Operator.no_params 438 | def force_guna(state, index, locus=None): 439 | cur = state[index] 440 | converter = dict(zip('iIuUfFxX', 'eeooaaaa')) 441 | letters = list(cur.value) 442 | for i, L in enumerate(letters): 443 | if L in converter: 444 | letters[i] = converter[L] 445 | if L in 'fF': 446 | letters[i] += 'r' 447 | break 448 | 449 | cur = cur.set_value(''.join(letters)).add_samjna('guna') 450 | return state.swap(index, cur) 451 | --------------------------------------------------------------------------------