├── tests
    ├── __init__.py
    ├── test_pattern.py
    ├── test_pop_iita.py
    ├── test_state2imp.py
    ├── test_variance.py
    ├── test_pop_variance.py
    ├── test_summary_iita.py
    ├── test_summary_popiita.py
    ├── data
    │   ├── test_delineate.csv
    │   └── test_data.csv
    ├── test_z_test.py
    ├── test_simu.py
    ├── test_ind_gen.py
    ├── test_ob_counter.py
    ├── test_gradedness.py
    ├── test_corr_iita.py
    ├── test_mini_iita.py
    ├── test_orig_iita.py
    ├── test_delineation.py
    ├── test_imp2state.py
    ├── test_conversion.py
    ├── test_iita.py
    └── test_blim.py
├── learning_spaces
    ├── __init__.py
    ├── kst
    │   ├── z_test.py
    │   ├── pop_iita.py
    │   ├── variance.py
    │   ├── pop_variance.py
    │   ├── state2imp.py
    │   ├── summary_popiita.py
    │   ├── print_pat.py
    │   ├── print_iita.py
    │   ├── summary_iita.py
    │   ├── print_sumpopiita.py
    │   ├── print_popiita.py
    │   ├── ob_counter.py
    │   ├── __init__.py
    │   ├── pattern.py
    │   ├── print_ztest.py
    │   ├── orig_iita.py
    │   ├── corr_iita.py
    │   ├── imp2state.py
    │   ├── ind_gen.py
    │   ├── mini_iita.py
    │   ├── iita.py
    │   ├── hasse.py
    │   ├── stochastic_markov.py
    │   └── simu.py
    └── pks
    │   ├── __init__.py
    │   ├── gradedness.py
    │   ├── conversion.py
    │   ├── delineation.py
    │   └── blim.py
├── setup.cfg
├── dist
    ├── learning_spaces-0.2.0.tar.gz
    └── learning_spaces-0.2.0-py3-none-any.whl
├── .gitignore
├── setup.py
├── requirements.txt
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_pattern.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_pop_iita.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_state2imp.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_variance.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/learning_spaces/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_pop_variance.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_summary_iita.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [easy_install]
2 | 
3 | 


--------------------------------------------------------------------------------
/tests/test_summary_popiita.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/z_test.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | def z_test():
4 | 
5 |     return {}


--------------------------------------------------------------------------------
/learning_spaces/kst/pop_iita.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | 
4 | def pop_iita():
5 | 
6 |     return {}


--------------------------------------------------------------------------------
/learning_spaces/kst/variance.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | 
4 | def variance():
5 | 
6 |     return {}


--------------------------------------------------------------------------------
/learning_spaces/kst/pop_variance.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | 
4 | def pop_variance():
5 | 
6 |     return {}


--------------------------------------------------------------------------------
/learning_spaces/pks/__init__.py:
--------------------------------------------------------------------------------
1 | from learning_spaces.pks.conversion import convert_as_pattern, convert_as_bin_mat
2 | 


--------------------------------------------------------------------------------
/tests/data/test_delineate.csv:
--------------------------------------------------------------------------------
1 | item,s,t,u
2 | e,1,1,0
3 | e,1,0,1
4 | f,0,0,1
5 | g,1,0,0
6 | g,0,1,0
7 | h,0,1,0
8 | 


--------------------------------------------------------------------------------
/dist/learning_spaces-0.2.0.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/milansegedinac/kst/HEAD/dist/learning_spaces-0.2.0.tar.gz


--------------------------------------------------------------------------------
/learning_spaces/kst/state2imp.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | 
4 | def state2imp(P):
5 | 
6 |     return {}


--------------------------------------------------------------------------------
/dist/learning_spaces-0.2.0-py3-none-any.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/milansegedinac/kst/HEAD/dist/learning_spaces-0.2.0-py3-none-any.whl


--------------------------------------------------------------------------------
/learning_spaces/kst/summary_popiita.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | 
4 | def summary_popiita(obj):
5 |     return {'value': obj, 'class_name': "sumpopiita"}


--------------------------------------------------------------------------------
/tests/data/test_data.csv:
--------------------------------------------------------------------------------
 1 | a,b,c,d,e
 2 | 0,0,0,0,0
 3 | 1,0,0,0,0
 4 | 0,1,0,0,0
 5 | 1,1,0,0,0
 6 | 1,1,1,0,0
 7 | 1,1,0,1,0
 8 | 1,1,1,1,0
 9 | 1,1,1,0,1
10 | 1,1,1,1,1
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled python modules.
 2 | *.pyc
 3 | 
 4 | # Setuptools distribution folder.
 5 | # /dist/
 6 | 
 7 | # Python egg metadata, regenerated from source files by setuptools.
 8 | /*.egg-info
 9 | 
10 | .idea/
11 | /env/
12 | .venv
13 | .vscode


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='learning_spaces',
 5 |     version='0.2.0',
 6 |     description='Knowledge Space Theory',
 7 |     url='https://github.com/milansegedinac/kst',
 8 |     packages=find_packages(),
 9 |     install_requires=['numpy', 'pandas', 'pydot', 'matplotlib']
10 | )
11 | 


--------------------------------------------------------------------------------
/tests/test_z_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import sys
 3 | sys.path.append('../learning_spaces/')
 4 | from learning_spaces.kst import z_test
 5 | 
 6 | 
 7 | class TestZTest(unittest.TestCase):
 8 | 
 9 |     def test_ZTest(self):
10 |         result = z_test()
11 | 
12 |         self.assertTrue(False)
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     unittest.main()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cycler==0.11.0
 2 | fonttools==4.28.2
 3 | kiwisolver==1.3.2
 4 | matplotlib==3.5.0
 5 | nose==1.3.7
 6 | numpy==1.21.4
 7 | packaging==21.3
 8 | pandas==1.3.4
 9 | patsy==0.5.2
10 | Pillow==8.4.0
11 | pydot==1.4.2
12 | pyparsing==3.0.6
13 | python-dateutil==2.8.2
14 | pytz==2021.3
15 | scipy==1.7.3
16 | setuptools-scm==6.3.2
17 | six==1.16.0
18 | tomli==1.2.2
19 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/print_pat.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def print_pat(x):
 3 |     """
 4 |     Formatted print of pattern response
 5 |     :param x: dictionary - response from pattern function
 6 |     :return:
 7 |     """
 8 |     print('\nlargest response patterns in the data: {}'.format(x['n']))
 9 |     print(x['response.patterns'])
10 |     if x['states'] is not None:
11 |         print("Number of times a state occurs in the data:")
12 |         print(x['states'])
13 | 


--------------------------------------------------------------------------------
/tests/test_simu.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import sys
 3 | sys.path.append('../learning_spaces/')
 4 | from learning_spaces.kst import simu
 5 | 
 6 | 
 7 | class TestSimu(unittest.TestCase):
 8 | 
 9 |     def test_simu(self):
10 |         result = simu(items=3, size=3, ce=0.0, lg=0.0, delta=0.0)
11 | 
12 |         self.assertTrue('dataset' in result)
13 |         self.assertTrue('implications' in result)
14 |         self.assertTrue('states' in result)
15 | 
16 | 
17 | if __name__ == '__main__':
18 |     unittest.main()
19 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/print_iita.py:
--------------------------------------------------------------------------------
 1 | def print_iita(obj):
 2 |     """
 3 |     Formatted print of iita response
 4 | 
 5 |     :param obj: dictionary - response from iita function
 6 |     :return:
 7 |     """
 8 | 
 9 |     print('\n\tInductive Item Tree Analysis\n')
10 | 
11 |     algorithm = '-'
12 |     if obj['v'] == 1:
13 |         algorithm = 'minimized corrected'
14 |     elif obj['v'] == 2:
15 |         algorithm = 'corrected'
16 |     elif obj['v'] == 3:
17 |         algorithm = 'original'
18 | 
19 |     print('\nAlgorithm: {} IITA'.format(algorithm))
20 |     print('\nQuasi order: {}'.format(obj['implications']))
21 | 


--------------------------------------------------------------------------------
/tests/test_ind_gen.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | import sys
 4 | sys.path.append('../learning_spaces/')
 5 | from learning_spaces.kst import ind_gen
 6 | 
 7 | 
 8 | class TestIndGen(unittest.TestCase):
 9 | 
10 |     def test_ind_gen(self):
11 |         b = np.array([[0, 1, 1], [2, 0, 1], [1, 0, 0]])
12 |         result = ind_gen(b)
13 | 
14 |         self.assertEqual(3, len(result))
15 |         self.assertEqual([(2, 1)], result[0])
16 |         self.assertEqual([(0, 1), (0, 2), (2, 0), (2, 1)], result[1])
17 |         self.assertEqual([(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)], result[2])
18 | 
19 | 
20 | if __name__ == '__main__':
21 |     unittest.main()
22 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/summary_iita.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def summary_iita(obj):
 5 | 
 6 |     print('\n\tInductive Item Tree Analysis\n')
 7 | 
 8 |     algorithm = '-'
 9 |     if obj['v'] == 1:
10 |         algorithm = 'minimized corrected'
11 |     elif obj['v'] == 2:
12 |         algorithm = 'corrected'
13 |     elif obj['v'] == 3:
14 |         algorithm = 'original'
15 | 
16 |     print('\nAlgorithm: {} IITA'.format(algorithm))
17 |     print("error rate: ")
18 |     print("diff values: {}".format(round(obj['diff'], 3)))
19 |     print('\nQuasi order: {}'.format(obj['implications']))
20 |     print("index in the selection set: ")
21 |     print(str(obj['selection.set.index']))


--------------------------------------------------------------------------------
/learning_spaces/kst/print_sumpopiita.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def print_sumpopiita(x):
 5 |     print("\n \t Inductive Item Tree Analysis in population values\n")
 6 |     print("\nAlgorithm:")
 7 |     if (x['v'] == 1) :
 8 |         print(" minimized corrected IITA\n")
 9 |     if (x['v'] == 2):
10 |         print(" corrected IITA\n")
11 |     if (x['v'] == 3):
12 |         print(" original IITA\n")
13 |     print("\npopulation diff values:\n")
14 |     print(round(x['pop.diff'],  3))
15 |     print("\npopulation error rates:\n")
16 |     print(round(x['error.pop'],  3))
17 |     print("\npopulation matrix:\n")
18 |     print(round(x['pop.matrix'],  3))
19 |     print("\nobtained selection set:\n")
20 |     print(x['selection.set'])


--------------------------------------------------------------------------------
/learning_spaces/kst/print_popiita.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def print_popiita(obj):
 5 |     print("\n \t Inductive Item Tree Analysis in population values\n")
 6 |     print("\nAlgorithm:")
 7 |     if (obj['v'] == 1):
 8 |         print(" minimized corrected IITA\n")
 9 |     if (obj['v'] == 2):
10 |         print(" corrected IITA\n")
11 |     if (obj['v'] == 3):
12 |         print(" original IITA\n")
13 |     print("\npopulation diff values:\n")
14 |     print(round(obj['pop.diff'],  3))
15 |     print("\npopulation error rates:\n")
16 |     print(round(obj['error.pop'],  3))
17 |     print("\nquasi order:\n")
18 |     selection_set = obj['selection.set']
19 |     diff = obj['pop.diff']
20 |     index = np.min(np.where(selection_set == diff))
21 |     print(selection_set[index][0])
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/ob_counter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def ob_counter(dataset):
 6 |     """
 7 |     Computation of numbers of counterexamples
 8 |     Computes from a dataset for all item pairs the corresponding numbers of counterexamples.
 9 | 
10 |     :param dataset: dataframe or matrix consisted of ones and zeros
11 |     :return: matrix of the numbers of counterexamples for all pairs of items
12 |     """
13 | 
14 |     (n, m) = dataset.shape
15 |     b = np.zeros((m, m), dtype=np.int32)
16 | 
17 |     data = dataset
18 |     if isinstance(dataset, pd.DataFrame):
19 |         data = dataset.values
20 | 
21 |     for i in range(m):
22 |         for j in range(m):
23 |             if i != j:
24 |                 b[i, j] = sum(np.logical_and(data[:, i] == 0, data[:, j] == 1))
25 | 
26 |     return b
27 | 


--------------------------------------------------------------------------------
/tests/test_ob_counter.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import pandas as pd
 3 | import numpy as np
 4 | import sys
 5 | sys.path.append('../learning_spaces/')
 6 | from learning_spaces.kst import ob_counter
 7 | 
 8 | 
 9 | class TestObCounter(unittest.TestCase):
10 | 
11 |     def test_ob_counter_with_dataframe(self):
12 |         data_frame = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]})
13 |         response = ob_counter(data_frame)
14 | 
15 |         self.assertEqual([[0, 1, 1], [2, 0, 1], [1, 0, 0]], response.tolist())
16 | 
17 |     def test_ob_counter_with_matrix(self):
18 |         matrix = np.matrix([[1, 0, 0], [0, 1, 1], [1, 0, 1]])
19 |         response = ob_counter(matrix)
20 | 
21 |         self.assertEqual([[0, 1, 1], [2, 0, 1], [1, 0, 0]], response.tolist())
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     unittest.main()
26 | 


--------------------------------------------------------------------------------
/tests/test_gradedness.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import pandas as pd
 3 | import sys
 4 | sys.path.append('../learning_spaces/')
 5 | from learning_spaces.pks import gradedness
 6 | 
 7 | 
 8 | class TestGradedness(unittest.TestCase):
 9 | 
10 |     def setUp(self):
11 |         self.df = pd.read_csv("data/test_data.csv")
12 | 
13 |     def test_is_forward_graded_df(self):
14 |         response = gradedness.is_forward_graded(self.df)
15 |         self.assertTrue(response['a'])
16 |         self.assertTrue(response['b'])
17 |         self.assertFalse(response['c'])
18 |         self.assertFalse(response['d'])
19 |         self.assertFalse(response['e'])
20 | 
21 |     def test_is_backward_graded_df(self):
22 |         response = gradedness.is_backward_graded(self.df)
23 |         self.assertFalse(response['a'])
24 |         self.assertFalse(response['b'])
25 |         self.assertFalse(response['c'])
26 |         self.assertTrue(response['d'])
27 |         self.assertTrue(response['e'])
28 | 


--------------------------------------------------------------------------------
/tests/test_corr_iita.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import pandas as pd
 3 | import numpy as np
 4 | import sys
 5 | sys.path.append('../learning_spaces/')
 6 | from learning_spaces.kst import corr_iita
 7 | 
 8 | 
 9 | class TestCorrIita(unittest.TestCase):
10 | 
11 |     def test_corr_iita_with_dataframe(self):
12 |         data_frame = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]})
13 |         A = [[(2, 1)], [(0, 1), (0, 2), (2, 0), (2, 1)]]
14 |         response = corr_iita(data_frame, A)
15 | 
16 |         self.assertEqual([0.18518518518518515, 0.16666666666666666], response['diff.value'].tolist())
17 |         self.assertEqual([0.0, 0.5], response['error.rate'].tolist())
18 | 
19 |     def test_corr_iita_with_matrix(self):
20 |         matrix = np.matrix([[1, 0, 0], [0, 1, 1], [1, 0, 1]])
21 |         A = [[(2, 1)], [(0, 1), (0, 2), (2, 0), (2, 1)]]
22 |         response = corr_iita(matrix, A)
23 | 
24 |         self.assertEqual([0.18518518518518515, 0.16666666666666666], response['diff.value'].tolist())
25 |         self.assertEqual([0.0, 0.5], response['error.rate'].tolist())
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     unittest.main()
30 | 


--------------------------------------------------------------------------------
/tests/test_mini_iita.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import pandas as pd
 3 | import numpy as np
 4 | import sys
 5 | sys.path.append('../learning_spaces/')
 6 | from learning_spaces.kst import mini_iita
 7 | 
 8 | 
 9 | class TestMiniIita(unittest.TestCase):
10 | 
11 |     def test_mini_iita_with_dataframe(self):
12 |         data_frame = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]})
13 |         A = [[(2, 1)], [(0, 1), (0, 2), (2, 0), (2, 1)]]
14 |         response = mini_iita(data_frame, A)
15 | 
16 |         self.assertEqual([0.18518518518518515, 0.16666666666666666], response['diff.value'].tolist())
17 |         self.assertEqual([0.0, 0.5], response['error.rate'].tolist())
18 | 
19 |     def test_mini_iita_with_matrix(self):
20 |         matrix = np.matrix([[1, 0, 0], [0, 1, 1], [1, 0, 1]])
21 |         A = [[(2, 1)], [(0, 1), (0, 2), (2, 0), (2, 1)]]
22 |         response = mini_iita(matrix, A)
23 | 
24 |         self.assertEqual([0.18518518518518515, 0.16666666666666666], response['diff.value'].tolist())
25 |         self.assertEqual([0.0, 0.5], response['error.rate'].tolist())
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     unittest.main()
30 | 


--------------------------------------------------------------------------------
/tests/test_orig_iita.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import pandas as pd
 3 | import numpy as np
 4 | import sys
 5 | sys.path.append('../learning_spaces/')
 6 | from learning_spaces.kst import orig_iita
 7 | 
 8 | 
 9 | class TestOrigIita(unittest.TestCase):
10 | 
11 |     def test_orig_iita_with_dataframe(self):
12 |         data_frame = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]})
13 |         A = [[(2, 1)], [(0, 1), (0, 2), (2, 0), (2, 1)]]
14 |         response = orig_iita(data_frame, A)
15 | 
16 |         self.assertEqual([0.20370370370370369, 0.39814814814814814], response['diff.value'].tolist())
17 |         self.assertEqual([0.0, 0.5], response['error.rate'].tolist())
18 | 
19 |     def test_orig_iita_with_martrix(self):
20 |         matrix = np.matrix([[1, 0, 0], [0, 1, 1], [1, 0, 1]])
21 |         A = [[(2, 1)], [(0, 1), (0, 2), (2, 0), (2, 1)]]
22 |         response = orig_iita(matrix, A)
23 | 
24 |         self.assertEqual([0.20370370370370369, 0.39814814814814814], response['diff.value'].tolist())
25 |         self.assertEqual([0.0, 0.5], response['error.rate'].tolist())
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     unittest.main()
30 | 


--------------------------------------------------------------------------------
/tests/test_delineation.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import pandas as pd
 3 | import sys
 4 | sys.path.append('../learning_spaces/')
 5 | from learning_spaces.pks import delineation
 6 | 
 7 | 
 8 | class TestDelineation(unittest.TestCase):
 9 | 
10 |     def setUp(self):
11 |         self.df = pd.read_csv("data/test_delineate.csv")
12 |         self.columns = ['e', 'f', 'g', 'h']
13 |         self.values = [
14 |             [0, 0, 0, 0],
15 |             [0, 0, 1, 0],
16 |             [0, 0, 1, 1],
17 |             [0, 1, 0, 0],
18 |             [1, 0, 1, 1],
19 |             [1, 1, 1, 0],
20 |             [0, 1, 1, 1],
21 |             [1, 1, 1, 1]
22 |         ]
23 |         self.ddf = pd.DataFrame(self.values, columns=self.columns)
24 |         self.classes = {'s': '0010', 'su': '1110', 'st': '1011', 'u': '0100', 'tu': '0111', 'stu': '1111',
25 |                         '0': '0000', 't': '0011'}
26 | 
27 |     def test_delineate_df(self):
28 |         dataframe, classes = delineation.delineate(self.df)
29 |         self.assertDictEqual(self.classes, classes)
30 |         self.assertListEqual(list(self.ddf), list(dataframe))
31 |         self.assertListEqual(self.ddf.values.tolist(), dataframe.values.tolist())
32 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/__init__.py:
--------------------------------------------------------------------------------
 1 | from learning_spaces.kst.ob_counter import ob_counter
 2 | from learning_spaces.kst.orig_iita import orig_iita
 3 | from learning_spaces.kst.corr_iita import corr_iita
 4 | from learning_spaces.kst.mini_iita import mini_iita
 5 | from learning_spaces.kst.ind_gen import ind_gen
 6 | from learning_spaces.kst.iita import iita, iita_exclude_transitive
 7 | from learning_spaces.kst.imp2state import imp2state
 8 | from learning_spaces.kst.simu import simu
 9 | from learning_spaces.kst.print_iita import print_iita
10 | from learning_spaces.kst.hasse import hasse
11 | from learning_spaces.kst.pattern import pattern
12 | from learning_spaces.kst.pop_iita import pop_iita
13 | from learning_spaces.kst.pop_variance import pop_variance
14 | from learning_spaces.kst.print_pat import print_pat
15 | from learning_spaces.kst.print_popiita import print_popiita
16 | from learning_spaces.kst.print_sumpopiita import print_sumpopiita
17 | from learning_spaces.kst.print_ztest import print_ztest
18 | from learning_spaces.kst.state2imp import state2imp
19 | from learning_spaces.kst.summary_iita import summary_iita
20 | from learning_spaces.kst.summary_popiita import summary_popiita
21 | from learning_spaces.kst.variance import variance
22 | from learning_spaces.kst.z_test import z_test


--------------------------------------------------------------------------------
/tests/test_imp2state.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import sys
 3 | sys.path.append('../learning_spaces/')
 4 | from learning_spaces.kst import imp2state
 5 | 
 6 | 
 7 | class TestImp2state(unittest.TestCase):
 8 | 
 9 |     def setUp(self):
10 |         # data-provider alternative
11 |         self.tests = [
12 |             {
13 |                 'imp': [(1, 0)],
14 |                 'items': 2,
15 |                 'expected': [[0, 0], [0, 1], [1, 1]]
16 |             },
17 |             {
18 |                 'imp': [(0, 1), (0, 2), (2, 0), (2, 1)],
19 |                 'items': 3,
20 |                 'expected': [[0, 0, 0], [1, 0, 1], [1, 1, 1]]
21 |             },
22 |             {
23 |                 'imp': [(0, 1), (0, 3), (1, 0), (1, 3), (2, 0), (2, 1), (2, 3), (3, 0), (3, 1)],
24 |                 'items': 4,
25 |                 'expected': [[0, 0, 0, 0], [0, 0, 1, 0], [1, 1, 1, 1]]
26 |             },
27 |             {
28 |                 'imp': [(0, 3), (0, 4), (2, 0), (2, 3), (2, 4), (3, 0), (3, 4), (4, 0), (4, 3)],
29 |                 'items': 5,
30 |                 'expected': [[0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 1, 1, 0, 0], [1, 0, 1, 1, 1], [1, 1, 1, 1, 1]]
31 |             }
32 |         ]
33 | 
34 |     def test_imp2state(self):
35 |         for test in self.tests:
36 |             self.assertEqual(test['expected'], imp2state(test['imp'], test['items']).tolist())
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     unittest.main()
41 | 


--------------------------------------------------------------------------------
/learning_spaces/pks/gradedness.py:
--------------------------------------------------------------------------------
 1 | from .conversion import convert_as_pattern
 2 | 
 3 | 
 4 | def is_forward_graded(data):
 5 |     """
 6 |     Checks if a knowledge structure is forward-graded in any item
 7 |     :param data: dataframe with binary matrix representing the knowledge structure
 8 |     :return: logical dict of items
 9 |     """
10 |     ret_val = {}
11 |     data_pattern = convert_as_pattern(data)
12 |     header = list(data)
13 |     for item in header:
14 |         new_data = data.copy(deep=True)
15 |         new_data[item] = 1
16 |         new_data_pattern = convert_as_pattern(new_data)
17 |         graded = []
18 |         for pattern in new_data_pattern:
19 |             graded.append(pattern in data_pattern)
20 |         ret_val[item] = all(graded)
21 |     return ret_val
22 | 
23 | 
24 | def is_backward_graded(data):
25 |     """
26 |     Checks if a knowledge structure is backward-graded in any item
27 |     :param data: dataframe with binary matrix representing the knowledge structure
28 |     :return: logical dict of items
29 |     """
30 |     ret_val = {}
31 |     data_pattern = convert_as_pattern(data)
32 |     header = list(data)
33 |     for item in header:
34 |         new_data = data.copy(deep=True)
35 |         new_data[item] = 0
36 |         new_data_pattern = convert_as_pattern(new_data)
37 |         graded = []
38 |         for pattern in new_data_pattern:
39 |             graded.append(pattern in data_pattern)
40 |         ret_val[item] = all(graded)
41 |     return ret_val
42 | 


--------------------------------------------------------------------------------
/tests/test_conversion.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import pandas as pd
 3 | import sys
 4 | sys.path.append('../learning_spaces/')
 5 | from learning_spaces.pks import conversion
 6 | 
 7 | 
 8 | class TestConversion(unittest.TestCase):
 9 | 
10 |     def setUp(self):
11 |         self.df = pd.read_csv("data/test_data.csv")
12 | 
13 |     def test_convert_as_pattern_df(self):
14 |         response = conversion.convert_as_pattern(self.df)
15 |         self.assertListEqual(['00000', '10000', '01000', '11000', '11100', '11010', '11110', '11101', '11111'], response)
16 | 
17 |     def test_convert_as_pattern_df_freq(self):
18 |         patterns, freq = conversion.convert_as_pattern(self.df, freq=True)
19 |         self.assertListEqual(['00000', '10000', '01000', '11000', '11100', '11010', '11110', '11101', '11111'], patterns)
20 |         self.assertListEqual([1, 1, 1, 1, 1, 1, 1, 1, 1], freq)
21 | 
22 |     def test_convert_as_bin_mat_df(self):
23 |         pattern = conversion.convert_as_pattern(self.df)
24 |         response = conversion.convert_as_bin_mat(pattern)
25 |         self.assertListEqual(list(self.df), list(response))
26 |         self.assertListEqual(self.df.values.tolist(), response.values.tolist())
27 | 
28 |     def test_convert_as_bin_mat_df_col_names(self):
29 |         pattern = conversion.convert_as_pattern(self.df)
30 |         col_names = ['i', 'j', 'k', 'l', 'm']
31 |         response = conversion.convert_as_bin_mat(pattern, col_names=col_names)
32 |         self.assertListEqual(col_names, list(response))
33 |         self.assertListEqual(self.df.values.tolist(), response.values.tolist())
34 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/pattern.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from collections import Counter
 3 | 
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | 
 8 | def pattern(dataset, n=5, p=None):
 9 |     """
10 |     pattern
11 |     computes the absolute frequencies of the response patterns,
12 |     and optionally, the absolute frequencies of a collection of
13 |     specified knowledge states in a dataset.
14 | 
15 |     :param dataset: dataframe or matrix consisted of ones and zeros
16 |     :param n: number of patterns (must be greater than zero)
17 |     :param p: dataframe or matrix 
18 |     :return: dictionary representing pattern data
19 |     """
20 | 
21 |     if n < 1:
22 |         sys.exit('Number of patterns must be greater than zero.')
23 | 
24 |     data = dataset
25 |     if isinstance(dataset, pd.DataFrame):
26 |         data = dataset.values
27 | 
28 |     def ks_to_str(ks): return ''.join((str(is_correct_answer) for is_correct_answer in ks))
29 | 
30 |     pattern = Counter(np.apply_along_axis(ks_to_str, axis=1, arr=data))
31 |     if n > len(pattern):
32 |         n = len(pattern)
33 | 
34 |     if p is None:
35 |         return {'response.patterns': pattern.most_common(n), 'states': p, 'n': n}
36 | 
37 |     def getKnowledgeStatesFrequencies(p):
38 |         return np.apply_along_axis(lambda row: pattern[ks_to_str(row)], axis=1, arr=p)
39 | 
40 |     if isinstance(p, pd.DataFrame):
41 |         states = p.assign(size=getKnowledgeStatesFrequencies(p.values))
42 |     else:
43 |         frequencies = getKnowledgeStatesFrequencies(p)
44 |         states = np.hstack((p, np.reshape(frequencies, (-1, 1))))
45 | 
46 |     return {'response.patterns': pattern.most_common(n), 'states': states, 'n': n}
47 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/print_ztest.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def print_ztest(x):
 4 |     if len(x['diff_value']) == 1:
 5 |         print("\n One sample Z-test\n")
 6 |     if len(x['diff_value']) == 2:
 7 |         print("\n \t Two sample Z-test\n")
 8 |     print("\nz = {}".format(round(x['Z.value'], 4)))
 9 |     print("p-value = {}".format(round(x['p.value'], 4)))
10 |     if "two.sided" == x['alternative']:
11 |         if x is None:
12 |             print("\nalternative hypothesis: true mean is not equal {}".format(x['mu']))
13 |         else:
14 |             print("\nalternative hypothesis: true difference in means is not equal {}".format(x['mu']))
15 | 
16 |     if "greater" == x['alternative']:
17 |         if x['imp_alt'] is None:
18 |             print("\nalternative hypothesis: true mean is greater {}".format(x['mu']))
19 |         else:
20 |             print("\nalternative hypothesis: true difference in means is greater {}".format(x['mu']))
21 | 
22 |     if x['alternative'] == "less":
23 |         if x['imp_alt'] is None:
24 |             print("\nalternative hypothesis: true mean is less {}".format(x['mu']))
25 |         else:
26 |             print("\nalternative hypothesis: true difference in means is less {}".format(x['mu']))
27 | 
28 |     print(str(x['conf.level'] * 100) + " percent confidence interval:\n")
29 |     print(x['conf'])
30 |     print("sample estimates:\n")
31 |     if len(x['diff_value']) == 1:
32 |         estimate = round(x['diff_value'][0],  5)
33 |         names = {}
34 |         names[estimate] = "mean in imp"
35 |         print(estimate)
36 |     if len(x['diff_value']) == 2:
37 |         estimate = round(x['diff_value'], 5)
38 |         names = {}
39 |         names[estimate] = "mean in imp_alt"
40 |         print(estimate)
41 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/orig_iita.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import pandas as pd
 4 | from learning_spaces.kst import ob_counter
 5 | 
 6 | 
 7 | def orig_iita(dataset, A):
 8 |     """
 9 |     Original Inductive Item Tree Analysis
10 |     Performs the original inductive item tree analysis procedure and returns the corresponding diff values.
11 | 
12 |     :param dataset: dataframe or matrix consisted of ones and zeros
13 |     :param A: list of competing quasi orders
14 |     :return: dictionary
15 |     """
16 | 
17 |     data = dataset
18 |     if isinstance(dataset, pd.DataFrame):
19 |         data = dataset.values
20 | 
21 |     b = ob_counter(data)
22 |     if sum(b.sum(axis=0) == 0):
23 |         sys.exit('Each item must be solved at least once')
24 | 
25 |     n, m = data.shape
26 | 
27 |     bs = []
28 |     for i in range(len(A)):
29 |         bs.insert(i, np.zeros(b.shape))
30 | 
31 |     diff_value_alt = np.repeat(0.0, len(A))
32 |     error = np.repeat(0.0, len(A))
33 | 
34 |     # computation of error rate
35 |     for k in range(len(A)):
36 |         for i in A[k]:
37 |             error[k] += (b[i[0]][i[1]] / float(data[:, i[1]].sum()))
38 |         if not A[k]:
39 |             error[k] = None
40 |         else:
41 |             error[k] /= len(A[k])
42 | 
43 |     # computation of diff values
44 |     all_imp = set()
45 |     for i in range(m-1):
46 |         for j in range(i+1, m):
47 |             all_imp = all_imp.union(all_imp, {(i, j), (j, i)})
48 | 
49 |     for k in range(len(A)):
50 |         if not A[k]:
51 |             diff_value_alt[k] = None
52 |         else:
53 |             for i in all_imp:
54 |                 if i in A[k]:
55 |                     bs[k][i[0]][i[1]] = error[k] * data[:, i[1]].sum()
56 |                 else:
57 |                     bs[k][i[0]][i[1]] = (1.0 - data[:, i[0]].sum() / float(n)) * data[:, i[1]].sum() * (1.0 - error[k])
58 |             diff_value_alt[k] = ((b - bs[k]) ** 2).sum() / (m ** 2 - m)
59 | 
60 |     return {'diff.value': diff_value_alt, 'error.rate': error}
61 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/corr_iita.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import pandas as pd
 4 | from learning_spaces.kst import ob_counter
 5 | 
 6 | 
 7 | def corr_iita(dataset, A):
 8 |     """
 9 |     Corrected Inductive Item Tree Analysis
10 |     Performs the corrected inductive item tree analysis procedure and returns the corresponding diff values.
11 | 
12 |     :param dataset: dataframe or matrix consisted of ones and zeros
13 |     :param A: list of competing quasi orders
14 |     :return: dictionary
15 |     """
16 | 
17 |     data = dataset
18 |     if isinstance(dataset, pd.DataFrame):
19 |         data = dataset.values
20 | 
21 |     b = ob_counter(data)
22 |     if sum(b.sum(axis=0) == 0):
23 |         sys.exit('Each item must be solved at least once')
24 | 
25 |     n, m = data.shape
26 | 
27 |     bs = []
28 |     for i in range(len(A)):
29 |         bs.insert(i, np.zeros((m, m)))
30 | 
31 |     diff_value_alt = np.repeat(0.0, len(A))
32 |     error = np.repeat(0.0, len(A))
33 | 
34 |     # computation of error rate
35 |     for k in range(len(A)):
36 |         for i in A[k]:
37 |             error[k] += (b[i[0]][i[1]] / float(data[:, i[1]].sum()))
38 |         if not A[k]:
39 |             error[k] = None
40 |         else:
41 |             error[k] /= len(A[k])
42 | 
43 |     # computation of diff values
44 |     all_imp = set()
45 |     for i in range(m - 1):
46 |         for j in range(i + 1, m):
47 |             all_imp = all_imp.union(all_imp, {(i, j), (j, i)})
48 | 
49 |     for k in range(len(A)):
50 |         if not A[k]:
51 |             diff_value_alt[k] = None
52 |         else:
53 |             for i in all_imp:
54 |                 if i in A[k]:
55 |                     bs[k][i[0]][i[1]] = error[k] * data[:, i[1]].sum()
56 |                 if (i not in A[k]) and ((i[1], i[0]) not in A[k]):
57 |                     bs[k][i[0]][i[1]] = (1.0 - data[:, i[0]].sum() / float(n)) * data[:, i[1]].sum()
58 |                 if (i not in A[k]) and ((i[1], i[0]) in A[k]):
59 |                     bs[k][i[0]][i[1]] = data[:, i[1]].sum() - data[:, i[0]].sum() + data[:, i[0]].sum() * error[k]
60 |             diff_value_alt[k] = ((b - bs[k]) ** 2).sum() / (m ** 2 - m)
61 | 
62 |     return {'diff.value': diff_value_alt, 'error.rate': error}
63 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/imp2state.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def imp2state(imp, items):
 5 |     """
 6 |     Transformation from Implications to Knowledge States
 7 |     Transforms a set of implications to the corresponding set of knowledge states (the quasi ordinal knowledge space).
 8 | 
 9 |     :param imp: list of implications
10 |     :param items: number of items
11 |     :return: matrix consisted of ones and zeros
12 |     """
13 | 
14 |     R_2 = np.ones((items, items))
15 |     for i in range(items):
16 |         for j in range(items):
17 |             if (i != j) and ((i, j) not in imp):
18 |                 R_2[j, i] = 0
19 | 
20 |     # base
21 |     base = []
22 | 
23 |     for i in range(items):
24 |         tmp = []
25 |         for j in range(items):
26 |             if R_2[i, j] == 1:
27 |                 tmp.append(j)
28 |         base.insert(i, tmp)
29 | 
30 |     base_list = []
31 |     for i in range(items):
32 |         base_list.insert(i, set())
33 |         for j in range(len(base[i])):
34 |             base_list[i].update(frozenset([base[i][j]]))
35 | 
36 |     # span of base
37 |     G = []
38 |     G.insert(0, {frozenset()})
39 |     G.insert(1, set())
40 |     for i in range(len(base[0])):
41 |         G[1].update(frozenset([base[0][i]]))
42 |     G[1] = {frozenset(), frozenset(G[1])}
43 | 
44 |     for i in range(1, items):
45 |         H = {frozenset()}
46 |         for j in G[i]:
47 |             if not base_list[i].issubset(j):
48 |                 for d in range(i):
49 |                     if base_list[d].issubset(j.union(base_list[i])):
50 |                         if base_list[d].issubset(j):
51 |                             H.update(frozenset([j.union(base_list[i])]))
52 |                     if not base_list[d].issubset(j.union(base_list[i])):
53 |                         H.update(frozenset([j.union(base_list[i])]))
54 |         G.insert(i+1, G[i].union(H))
55 | 
56 |     # patterns
57 |     P = np.zeros((len(G[items]), items), dtype=np.int8)
58 |     i = 0
59 |     sorted_g = [list(i) for i in G[items]]
60 |     sorted_g.sort(key=lambda x: (len(x), x))
61 | 
62 |     for k in sorted_g:
63 |         for j in range(items):
64 |             if j in k:
65 |                 P[i, j] = 1
66 |         i += 1
67 | 
68 |     return P
69 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/ind_gen.py:
--------------------------------------------------------------------------------
 1 | def ind_gen(b):
 2 |     """
 3 |     Inductive Generation Procedure
 4 |     Generates inductively a list of competing quasi orders.
 5 | 
 6 |     :param b: matrix of the numbers of counterexamples for all pairs of items
 7 |     :return: list of inductively generated quasi orders
 8 |     """
 9 | 
10 |     (n, m) = b.shape
11 | 
12 |     # set of all pairs with a maximum of k-1 counterexamples
13 |     S = []
14 | 
15 |     # constructed relation for a maximum of k-1 counterexamples
16 |     A = []
17 | 
18 |     # set of non-transitive triples
19 |     M = []
20 |     M.append([])
21 |     S.append([])
22 |     for i in range(m):
23 |         for j in range(m):
24 |             if (i != j) and (b[i, j] == b.min()):
25 |                 S[0].append((i, j))
26 | 
27 |     A.append(list(S[0]))
28 | 
29 |     # inductive generation process
30 |     elements = list(set(b.flatten().ravel()))
31 |     elements.sort()
32 |     if 0 in elements:
33 |         elements = elements[1:]
34 | 
35 |     k = 1
36 | 
37 |     for element in elements:
38 |         S.insert(k, [])
39 |         A.insert(k, [])
40 |         M.insert(k, [])
41 | 
42 |         # building of S
43 |         for i in range(m):
44 |             for j in range(m):
45 |                 if (i != j) and (b[i, j] <= element) and ((i, j) not in A[k-1]):
46 |                     S[k].append((i, j))
47 | 
48 |         # transitivity test
49 |         if S[k]:
50 |             M[k] = list(S[k])
51 |             brake_test = 1
52 |             while brake_test != 0:
53 |                 brake = list(M[k])
54 |                 for i in list(M[k]):
55 |                     for h in range(m):
56 |                         if (h != i[0]) and (h != i[1]) and ((i[1], h) in (A[k-1] + M[k])) and ((i[0], h) not in (A[k-1] + M[k])):
57 |                             if i in M[k]:
58 |                                 M[k].remove(i)
59 |                         if (h != i[0]) and (h != i[1]) and ((h, i[0]) in (A[k-1] + M[k])) and ((h, i[1]) not in (A[k-1] + M[k])):
60 |                             if i in M[k]:
61 |                                 M[k].remove(i)
62 |                 if brake == M[k]:
63 |                     brake_test = 0
64 |             A[k] = A[k-1] + M[k]
65 | 
66 |         k += 1
67 | 
68 |     # deletion of empty and duplicated quasi orders
69 |     A = {frozenset(x) for x in A}
70 |     A.discard(set())
71 |     # sort
72 |     A = [sorted(list(x)) for x in A]
73 |     A.sort(key=len)
74 | 
75 |     return A


--------------------------------------------------------------------------------
/learning_spaces/kst/mini_iita.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from learning_spaces.kst import ob_counter
 4 | 
 5 | 
 6 | def mini_iita(dataset, A):
 7 |     """
 8 |     Minimized Corrected Inductive Item Tree Analysis
 9 |     Performs the minimized corrected inductive item tree analysis procedure and returns the corresponding diff values.
10 | 
11 |     :param dataset: dataframe or matrix consisted of ones and zeros
12 |     :param A: list of competing quasi orders
13 |     :return: dictionary
14 |     """
15 | 
16 |     data = dataset
17 |     if isinstance(dataset, pd.DataFrame):
18 |         data = dataset.values
19 | 
20 |     b = ob_counter(data)
21 |     n, m = data.shape
22 | 
23 |     bs_num = []
24 |     for i in range(len(A)):
25 |         bs_num.insert(i, np.zeros((m, m)))
26 | 
27 |     p = []
28 |     for i in range(m):
29 |         p.insert(i, data[:, i].sum())
30 | 
31 |     diff_value_alt = np.repeat(0.0, len(A))
32 |     error = np.repeat(0.0, len(A))
33 | 
34 |     # computation of error rate
35 |     for k in range(len(A)):
36 |         x = np.repeat(0.0, 4)
37 |         for i in range(m):
38 |             for j in range(m):
39 |                 if (i != j) and ((i, j) in A[k]):
40 |                     x[1] += -2 * b[i, j] * p[j]
41 |                     x[3] += 2 * p[j] ** 2
42 |                 if (i != j) and ((i, j) not in A[k]) and ((j, i) in A[k]):
43 |                     x[0] += -2 * b[i, j] * p[i] + 2 * p[i] * p[j] - 2 * p[i] ** 2
44 |                     x[2] += 2 * p[i] ** 2
45 | 
46 |         error[k] = -(x[0] + x[1]) / (x[2] + x[3])
47 | 
48 |     # computation of diff values
49 |     all_imp = set()
50 |     for i in range(m - 1):
51 |         for j in range(i + 1, m):
52 |             all_imp = all_imp.union(all_imp, {(i, j), (j, i)})
53 | 
54 |     for k in range(len(A)):
55 |         if not A[k]:
56 |             diff_value_alt[k] = None
57 |         else:
58 |             for i in all_imp:
59 |                 if i in A[k]:
60 |                     bs_num[k][i[0]][i[1]] = error[k] * data[:, i[1]].sum()
61 |                 if (i not in A[k]) and ((i[1], i[0]) not in A[k]):
62 |                     bs_num[k][i[0]][i[1]] = (1.0 - data[:, i[0]].sum() / float(n)) * data[:, i[1]].sum()
63 |                 if (i not in A[k]) and ((i[1], i[0]) in A[k]):
64 |                     bs_num[k][i[0]][i[1]] = data[:, i[1]].sum() - data[:, i[0]].sum() + data[:, i[0]].sum() * error[k]
65 |             diff_value_alt[k] = ((b - bs_num[k]) ** 2).sum() / (m ** 2 - m)
66 | 
67 |     return {'diff.value': diff_value_alt, 'error.rate': error}
68 | 


--------------------------------------------------------------------------------
/learning_spaces/pks/conversion.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import string
 3 | 
 4 | 
 5 | def convert_as_pattern(data, freq=False, as_letters=False):
 6 |     """
 7 |     Convert binary matrix of response patterns or knowledge spaces into pattern representation
 8 |     :param data: dataframe with binary matrix
 9 |     :param freq: displaying frequencies of response patterns
10 |     :param as_letters: return response patterns as combination of header letters
11 |     :return: list of patterns or list of patterns with list of frequencies of patterns
12 |     """
13 |     ret_val = []
14 |     for row in data.itertuples():
15 |         pattern = ""
16 |         for i in range(1, len(row)):
17 |             if as_letters:
18 |                 if row[i] == 1:
19 |                     pattern += list(data)[i-1]
20 |             else:
21 |                 pattern += str(row[i])
22 |         if pattern == "":
23 |             ret_val.append(str(0))
24 |         else:
25 |             ret_val.append(pattern)
26 | 
27 |     if freq:
28 |         ret_pat = []
29 |         counts = []
30 |         for pattern in ret_val:
31 |             if pattern not in ret_pat:
32 |                 ret_pat.append(pattern)
33 |                 counts.append(ret_val.count(pattern))
34 |         return ret_pat, counts
35 |     else:
36 |         return ret_val
37 | 
38 | 
39 | def convert_as_bin_mat(data, col_names=None):
40 |     """
41 |     Convert pattern representation of response patterns or knowledge spaces into binary matrix
42 |     :param data: list of response patterns
43 |     :param col_names: list of names of matrix columns
44 |     :return: dataframe with binary matrix
45 |     """
46 |     header = []
47 |     if col_names is None:
48 |         num_of_letters = 0
49 |         for pattern in data:
50 |             if len(pattern) > num_of_letters:
51 |                 num_of_letters = len(pattern)
52 |         header = list(string.ascii_lowercase[:num_of_letters])
53 |     else:
54 |         header = col_names
55 | 
56 |     values = []
57 |     for pattern in data:
58 |         value = []
59 |         if "0" in pattern or "1" in pattern:
60 |             if pattern == "0":  # empty set
61 |                 value = [int(0)] * len(header)
62 |             else:
63 |                 for p in pattern:
64 |                     value.append(int(p))
65 |         else:  # pattern is combination of header letters
66 |             for h in header:
67 |                 if h in pattern:
68 |                     value.append(int(1))
69 |                 else:
70 |                     value.append(int(0))
71 |         values.append(value)
72 | 
73 |     return pd.DataFrame(values, columns=header)
74 | 


--------------------------------------------------------------------------------
/learning_spaces/pks/delineation.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | from .conversion import convert_as_bin_mat
 3 | 
 4 | 
 5 | def delineate(skill_fun, item_id=0):
 6 |     """
 7 |     Computes knowledge structure delineated by a skill function
 8 |     :param skill_fun: dataframe representing the skill function. Consists of an item indicator and a
 9 |     problem-by-skill indicator binary matrix
10 |     :param item_id: index of a column in skill_fun that holds the item indicator
11 |     :return: dataframe representing the knowledge structure and a dict of equivalence classes of competence states
12 |     """
13 |     # extracting skill set
14 |     data = skill_fun.drop(skill_fun.columns[item_id], axis=1)
15 |     skills = list(data)
16 |     # extracting item set with corresponding skills
17 |     items = []
18 |     items_skills = {}
19 |     for row in data.itertuples():
20 |         item = skill_fun.iloc[row.Index, item_id]
21 |         item_skill = ""
22 |         for i in range(1, len(row)):
23 |             if int(row[i]) == 1:
24 |                 item_skill += skills[i-1]
25 |         if item not in items:
26 |             items.append(item)
27 |             items_skills[item] = []
28 |         items_skills[item].append(item_skill)
29 |     # generating 2 ^ skills mapping
30 |     combinations = get_all_combinations(skills)
31 |     # empty set
32 |     # generating knowledge structure and appropriate classes
33 |     values = ['0' * len(items)]
34 |     classes = {}
35 |     classes['0'] = values[0]
36 |     # generating from skill function
37 |     for combination in combinations:
38 |         value = ""
39 |         for item in items:
40 |             if contains_string(combination, items_skills[item]):
41 |                 value += "1"
42 |             else:
43 |                 value += "0"
44 |         classes[combination] = value
45 |         values.append(value)
46 |     return convert_as_bin_mat(values, items), classes
47 | 
48 | 
49 | def get_all_combinations(input_chars):
50 |     """
51 |     Generate all combinations of given characters
52 |     :param input_chars: input characters
53 |     :return: list of all combinations
54 |     """
55 |     ret_val = []
56 |     for i in range(len(input_chars)):
57 |         temp = list(itertools.combinations(input_chars, i + 1))
58 |         for t in temp:
59 |             ret_val.append(''.join(t))
60 |     return ret_val
61 | 
62 | 
63 | def contains_string(src, dest):
64 |     """
65 |     Checking if destination string contains any subset of source string
66 |     :param src: source string
67 |     :param dest: list of destination strings
68 |     :return: True or False
69 |     """
70 |     chars = list(src)
71 |     combinations = get_all_combinations(chars)
72 |     for combination in combinations:
73 |         if combination in dest:
74 |             return True
75 |     return False
76 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Knowledge Space Theory
 2 | 
 3 | KST is an open source software library with Python implementations of basic Knowledge Space Theory algorithms.
 4 | 
 5 | ## Usage
 6 | 
 7 | ### Installation
 8 | You can either clone the project or download a distribution [file](./dist/learning_spaces-0.2.0-py3-none-any.whl) and run command:
 9 | `pip install /path-to-downloaded-file/learning_spaces-0.2.0-py3-none-any.whl`
10 | 
11 | ### Setup in Python
12 | KST requires installed Python 3.9. It is recommended to use the library in a separate virtual environment. A brief and practical introduction to virtual environments can be found on the following [link](https://docs.python-guide.org/dev/virtualenvs/).
13 | First, a virtual environment should be created.
14 | ```
15 | mkvirtualenv kst
16 | ```
17 | After creating a virtual environment, you should install the requirements.
18 | ```
19 | pip install -r requirements.txt
20 | ```
21 | After that, the library can be used.
22 | ```python
23 | >>> import pandas as pd
24 | >>> from learning_spaces.kst import iita
25 | >>> data_frame = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]})
26 | >>> response = iita(data_frame, v=1)
27 | >>> print(response)
28 | {'diff': array([ 0.18518519,  0.16666667,  0.21296296]), 'implications': [(0, 1), (0, 2), (2, 0), (2, 1)], 'error.rate': 0.5, 'selection.set.index': 1, 'v': 1}
29 | ```
30 | 
31 | ### Setup in a browser
32 | KST can be run in a browser environment, without need for Python server. We use [Pyodide](https://github.com/pyodide/pyodide) which brings the Python runtime to the browser via WebAssembly.
33 | 
34 | Full Example (open console to see the result):
35 | ```html
36 | <!DOCTYPE html>
37 |   <html>
38 |   <head>
39 |     <script src="https://cdn.jsdelivr.net/pyodide/v0.18.1/full/pyodide.js"></script>
40 |   </head>
41 |   <body>
42 |     <script>
43 |       let pyodide;
44 | 
45 |       async function init() {
46 |         pyodide = await loadPyodide({ indexURL: "https://cdn.jsdelivr.net/pyodide/v0.18.1/full/" });
47 |         await pyodide.loadPackage('micropip');
48 |         await pyodide.runPythonAsync(`
49 |           from micropip import install
50 |           await install('https://raw.githubusercontent.com/milansegedinac/kst/master/dist/learning_spaces-0.2.0-py3-none-any.whl')
51 |         `);
52 |       }
53 | 
54 |       async function run() {
55 |         await pyodide.runPython(`
56 |           import pandas as pd
57 |           from learning_spaces.kst import iita
58 |           data_frame = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]})
59 |           response = iita(data_frame, v=1)
60 |         `);
61 | 
62 |         const response = pyodide.globals.get('response').toJs()
63 |         console.log(response)
64 |       }
65 | 
66 |       (async () => {
67 |         await init()
68 |         await run()
69 |       })();
70 |     </script>
71 |   </body>
72 | </html>
73 | ```


--------------------------------------------------------------------------------
/learning_spaces/kst/iita.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pandas as pd
 3 | import numpy as np
 4 | from learning_spaces.kst import ind_gen
 5 | from learning_spaces.kst import ob_counter
 6 | from learning_spaces.kst import orig_iita
 7 | from learning_spaces.kst import mini_iita
 8 | from learning_spaces.kst import corr_iita
 9 | 
10 | 
11 | def iita(dataset, v):
12 |     """
13 |     Inductive Item Tree Analysis
14 |     Performs one of the three inductive item tree analysis algorithms (minimized corrected, corrected and original).
15 | 
16 |     :param dataset: dataframe or matrix consisted of ones and zeros
17 |     :param v: algorithm: v=1 (minimized corrected), v=2 (corrected) and v=3 (original)
18 |     :return: dictionary
19 |     """
20 | 
21 |     if (not isinstance(dataset, pd.DataFrame) and not isinstance(dataset, np.ndarray)) or (dataset.shape[1] == 1):
22 |         sys.exit('data must be either a numeric matrix or a dataframe, with at least two columns.')
23 | 
24 |     data = dataset
25 |     if isinstance(dataset, pd.DataFrame):
26 |         data = dataset.values
27 | 
28 |     if np.logical_not(np.logical_or(data == 0, data == 1)).sum() != 0:
29 |         sys.exit('data must contain only 0 and 1')
30 | 
31 |     if v not in (1, 2, 3):
32 |         sys.exit('IITA version must be specified')
33 | 
34 |     # inductively generated set of competing quasi orders
35 |     i = ind_gen(ob_counter(data))
36 | 
37 |     # call chosen algorithm
38 |     if v == 1:
39 |         ii = mini_iita(data, i)
40 |     elif v == 2:
41 |         ii = corr_iita(data, i)
42 |     elif v == 3:
43 |         ii = orig_iita(data, i)
44 | 
45 |     index = list(ii['diff.value']).index(min(ii['diff.value']))
46 |     return {'diff': ii['diff.value'], 'implications': i[index], 'error.rate': ii['error.rate'][index], 'selection.set.index': index, 'v': v}
47 | 
48 | 
49 | def iita_exclude_transitive(dataset, v):
50 |     """
51 |     Inductive Item Tree Analysis
52 |     Performs one of the three inductive item tree analysis algorithms (minimized corrected, corrected and original)
53 |     and then performs transitive reduction (removes transitive edges).
54 |     Implications array will have the same vertices and as few edges as possible.
55 | 
56 |     :param dataset: dataframe or matrix consisted of ones and zeros
57 |     :param v: algorithm: v=1 (minimized corrected), v=2 (corrected) and v=3 (original)
58 |     :return: dictionary
59 |     """
60 |     response = iita(dataset, v)
61 |     impl = response['implications']
62 | 
63 |     # reflexive reduction
64 |     # edges is a list of implication without reflexive edges
65 |     edges = []
66 |     for x, y in impl:
67 |         if (y, x) not in edges:
68 |             edges.append((x, y))
69 | 
70 | 
71 |     # nodes is a list of all nodes extracted from edges
72 |     nodes = list(set([node for pair in edges for node in pair]))
73 | 
74 |     # transitive reduction
75 |     # remove transitive edges from the list of edges
76 |     for x in nodes:
77 |         for y in nodes:
78 |             for z in nodes:
79 |                 if (x, y) in edges and (y, z) in edges:
80 |                     try:
81 |                         edges.remove((x, z))
82 |                     except:
83 |                         pass
84 |     
85 |     # update a list of implications after transitive reduction
86 |     response['implications'] = edges
87 |     return response


--------------------------------------------------------------------------------
/learning_spaces/kst/hasse.py:
--------------------------------------------------------------------------------
 1 | import pydot
 2 | import matplotlib.pyplot as plt
 3 | import matplotlib.image as mpimg
 4 | import tempfile
 5 | import os
 6 | 
 7 | def hasse(imp, items, dir_path = None, labels = None):
 8 |     """
 9 |     Hasse diagram of Surmise Relation
10 |     Plots the Hasse diagram of surmise relation.
11 | 
12 |     :param imp: list of implications
13 |     :param items: number of items of the domain
14 |     :param dir_path: path to the png directory
15 |     :param labels: string labels for items
16 |     :return: produces a plot and returns a list of the equally informative items
17 |     """
18 | 
19 |     parallel_items = {}
20 |     implications = list(imp)
21 | 
22 |     # generate partially ordered set
23 |     for i in implications:
24 |         if (i[1], i[0]) in implications:
25 |             if i[0] in parallel_items:
26 |                 parallel_items[i[0]].append(i[1])
27 |             else:
28 |                 parallel_items[i[0]] = [i[0], i[1]]
29 |             implications.remove(i)
30 |             implications.remove((i[1], i[0]))
31 |             for j in range(len(implications)):
32 |                 if i[1] == implications[j][0]:
33 |                     implications[j] = (i[0], implications[j][1])
34 |                 elif i[1] == implications[j][1]:
35 |                     implications[j] = (implications[j][0], i[0])
36 | 
37 |     implications = list(set(implications))
38 |     # remove reflexive properties
39 |     for i in list(implications):
40 |         if i[0] == i[1]:
41 |             implications.remove(i)
42 | 
43 |     #   i    j     k
44 |     # (0,1)(1,2),(0,2)
45 |     # remove transitive properites
46 |     for i in list(implications):
47 |         for j in list(implications):
48 |             for k in list(implications):
49 |                 if i[1]==j[0] and j[1]==k[1] and i[0]==k[0]:
50 |                     implications.remove(k)
51 | 
52 |     for i in list(implications):
53 |         for j in range(items):
54 |             if (i[0] != j) and (i[1] != j) and ((i[0], j) in implications) and ((i[1], j) in implications):
55 |                 implications.remove((i[0], j))
56 | 
57 |     # bottom-up approach
58 |     for i in range(len(implications)):
59 |         implications[i] = (implications[i][1], implications[i][0])
60 | 
61 |     graph = pydot.Dot(graph_type='graph')
62 |     print(implications)
63 |     if labels:
64 |         for i in implications:
65 |             graph.add_edge(pydot.Edge(str(labels[int(i[0])]), str(labels[int(i[1])])))
66 |     else:
67 |         for i in implications:
68 |             graph.add_edge(pydot.Edge(i[0], i[1]))
69 | 
70 |     # standalone nodes
71 |     for i in range(items):
72 |         found = False
73 |         for implication in implications:
74 |             if i in implication:
75 |                 found = True
76 |                 break
77 |         if not found:
78 |             parallel = False
79 |             for key, value in parallel_items.items():
80 |                 if i in value:
81 |                     parallel = True
82 |                     break
83 |             if not parallel:
84 |                 graph.add_node(pydot.Node(i))
85 | 
86 |     fout = tempfile.NamedTemporaryFile(mode = 'w+t', dir = dir_path, suffix=".png", delete = False)
87 |     graph.write(fout.name, format="png")
88 |     img = mpimg.imread(fout.name)
89 |     plt.axis('off')
90 |     plt.imshow(img)
91 |     plt.show()
92 |     os.remove(fout.name)
93 | 
94 |     return [list(set(value)) for key, value in parallel_items.items()]
95 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/stochastic_markov.py:
--------------------------------------------------------------------------------
 1 | import operator
 2 | import random
 3 | from typing import Tuple
 4 | 
 5 | import numpy as np
 6 | 
 7 | def _array2dict_vals(array: np.ndarray, dict: dict):
 8 |     for i, key in enumerate(dict):
 9 |         dict[key] = array[i]
10 | 
11 | def _scale_probabilites(states: dict[Tuple[str], float]):
12 |     probabilites = np.array(list(states.values()))
13 |     probabilites /= sum(probabilites)
14 |     _array2dict_vals(probabilites, states)
15 | 
16 | def _likeliest_state(states: dict[Tuple[str], float]) -> Tuple[Tuple[str], float]:
17 |     """
18 |     Returns likeliest state and its probability.
19 |     :return: (state, probability)
20 |     """
21 |     return max(states.items(), key=operator.itemgetter(1))
22 | 
23 | def _take_answer(question: str) -> bool:
24 |     print(f'{question}: correct/incorrect? [1/0]')
25 |     return int(input()) == 1
26 | 
27 | def questioning_rule(states: dict[Tuple[str], float]) -> str:
28 |     """
29 |     :param states: dictionary mapping states (sets of problems/questions) to probabilities
30 |     :return: question to be asked
31 |     """
32 |     if not np.isclose(1, sum(states.values()), atol=0.01):
33 |         raise ValueError('Probabilities do not add up to 1!')
34 | 
35 |     state, _ = _likeliest_state(states)
36 |     return random.choice(state)
37 | 
38 | def response_rule(question: str, states: dict[Tuple[str], float]) -> float:
39 |     """
40 |     :param question: question the answer is given to
41 |     :param states: dictionary mapping states (sets of problems/questions) to probabilities
42 |     :return: probability of giving correct answer according to given states
43 |     """
44 |     ret_val = 0
45 |     for state, probability in states.items():
46 |         if question in state:
47 |             ret_val += probability
48 |     return ret_val
49 | 
50 | def updating_rule(question: str, answer_correct: bool, r: float, states: dict[Tuple[str], float]):
51 |     """
52 |     Updates probabilites on passed states.
53 |     :param question: question the answer is given to
54 |     :param answer_correct: whether answer is correct
55 |     :param r: response rule output
56 |     :param states: dictionary mapping states (sets of problems/questions) to probabilities
57 |     """
58 |     theta = 0.1 * r
59 |     theta_compl = 1 - theta
60 |     if not answer_correct:
61 |         theta, theta_compl = theta_compl, theta
62 | 
63 |     for state in states:
64 |         if question in state:
65 |             states[state] *= theta_compl
66 |         else:
67 |             states[state] *= theta
68 |     _scale_probabilites(states)
69 | 
70 | def final_state(states: dict[Tuple[str], float]):
71 |     state, probability = _likeliest_state(states)
72 |     return state if probability >  0.75 else None
73 | 
74 | def stochastic_markov(states: dict[Tuple[str], float]) -> Tuple[str]:
75 |     max_iter = 100
76 |     for _ in range(max_iter):
77 |         question = questioning_rule(states)
78 |         r = response_rule(question, states)
79 |         answer_correct = _take_answer(question)
80 |         updating_rule(question, answer_correct, r, states)
81 |         print(states)
82 |         final = final_state(states)
83 |         if final is not None:
84 |             print(final)
85 |             return
86 |     print('Non-conclusive.')
87 | 
88 | def demo():
89 |     states = {('a'): 0.125, ('a', 'b'): 0.25, ('b'): 0.125, ('a', 'b', 'c'): 0.5}
90 |     print(states)
91 |     stochastic_markov(states)
92 | 
93 | if __name__ == '__main__':
94 |     demo()
95 | 


--------------------------------------------------------------------------------
/learning_spaces/kst/simu.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from learning_spaces.kst import imp2state
  3 | 
  4 | 
  5 | def simu(items, size, ce, lg, delta, imp=None):
  6 |     """
  7 |     Data and Quasi Order Simulation Tool
  8 | 
  9 |     :param items: number of items of the domain taken as a basis for the simulation
 10 |     :param size: number of response patterns to be simulated (the sample size)
 11 |     :param ce: probability for a careless error
 12 |     :param lg: probability for a lucky guess
 13 |     :param delta: probability for adding an item pair to the randomly generated quasi order
 14 |     :param imp: list of implications (assumed to be a quasi order) used for simulating the data
 15 |     :return: dictionary
 16 |     """
 17 | 
 18 |     R = set()
 19 | 
 20 |     if imp is None:
 21 |         # computation of transitive relations
 22 |         for i in range(items):
 23 |             for j in range(items):
 24 |                 if (i != j) and (delta > np.random.uniform(1, 0, 1)):
 25 |                     R.update({(i, j)})
 26 |                 if i == j:
 27 |                     R.update({(i, j)})
 28 | 
 29 |         R_2 = np.zeros((items, items), dtype=np.int8)
 30 |         for t in R:
 31 |             R_2[t[0], t[1]] = 1
 32 | 
 33 |         # base
 34 |         base = []
 35 | 
 36 |         for i in range(items):
 37 |             tmp = []
 38 |             for j in range(items):
 39 |                 if R_2[i, j] == 1:
 40 |                     tmp.append(j)
 41 |             base.insert(i, tmp)
 42 | 
 43 |         base_list = []
 44 |         for i in range(items):
 45 |             base_list.insert(i, set())
 46 |             for j in range(len(base[i])):
 47 |                 base_list[i].update(frozenset([base[i][j]]))
 48 | 
 49 |         # span of base
 50 |         G = []
 51 |         G.insert(0, {frozenset()})
 52 |         G.insert(1, set())
 53 |         for i in range(len(base[0])):
 54 |             G[1].update(frozenset([base[0][i]]))
 55 |         G[1] = {frozenset(), frozenset(G[1])}
 56 | 
 57 |         for i in range(1, items):
 58 |             H = {frozenset()}
 59 |             for j in G[i]:
 60 |                 if not base_list[i].issubset(j):
 61 |                     for d in range(i):
 62 |                         if base_list[d].issubset(j.union(base_list[i])):
 63 |                             if base_list[d].issubset(j):
 64 |                                 H.update(frozenset([j.union(base_list[i])]))
 65 |                         if not base_list[d].issubset(j.union(base_list[i])):
 66 |                             H.update(frozenset([j.union(base_list[i])]))
 67 |             G.insert(i + 1, G[i].union(H))
 68 | 
 69 |         # patterns
 70 |         P = np.zeros((len(G[items]), items), dtype=np.int8)
 71 |         i = 0
 72 |         sorted_g = [list(i) for i in G[items]]
 73 |         sorted_g.sort(key=lambda x: (len(x), x))
 74 | 
 75 |         for k in sorted_g:
 76 |             for j in range(items):
 77 |                 if j in k:
 78 |                     P[i, j] = 1
 79 |             i += 1
 80 | 
 81 |         # implications
 82 |         imp = set()
 83 |         for i in range(items):
 84 |             for j in range(items):
 85 |                 if (i != j) and (base_list[i].issubset(base_list[j])):
 86 |                     imp.update({(i, j)})
 87 |     else:
 88 |         # patterns
 89 |         P = imp2state(imp, items)
 90 | 
 91 |     # simulating the dataset
 92 |     sim = np.zeros((size, items), dtype=np.int8)
 93 | 
 94 |     for i in range(size):
 95 |         sim[i,] = P[np.random.randint(0, P.shape[0], 1), ]
 96 |         for j in range(items):
 97 |             if (sim[i, j] == 1) and (np.random.uniform(1, 0, 1) < ce):
 98 |                 sim[i, j] = 0
 99 |             if (sim[i, j] == 0) and (np.random.uniform(1, 0, 1) < lg):
100 |                 sim[i, j] = 1
101 | 
102 |     return {'dataset': sim, 'implications': imp, 'states': P}
103 | 


--------------------------------------------------------------------------------
/tests/test_iita.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import pandas as pd
 3 | import numpy as np
 4 | import sys
 5 | sys.path.append('../learning_spaces/')
 6 | from learning_spaces.kst import iita
 7 | 
 8 | 
 9 | class TestIita(unittest.TestCase):
10 | 
11 |     def setUp(self):
12 |         self.dataframe = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]})
13 |         self.matrix = np.matrix([[1, 0, 0], [0, 1, 1], [1, 0, 1]])
14 | 
15 |     def test_iita_with_invalid_first_argument(self):
16 |         self.assertRaises(SystemExit, lambda: iita(pd.DataFrame({'a': [1, 0, 1]}), v=1))
17 |         self.assertRaises(SystemExit, lambda: iita('Invalid dataset', v=1))
18 | 
19 |     def test_iita_when_dataset_has_nan_values(self):
20 |         dataset = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, float('nan')], 'c': [0, 1, 1]})
21 |         self.assertRaises(SystemExit, lambda: iita(dataset, v=1))
22 | 
23 |     def test_iita_when_dataset_has_invalid_values(self):
24 |         dataset = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 5], 'c': [0, 1, 1]})
25 |         self.assertRaises(SystemExit, lambda: iita(dataset, v=1))
26 | 
27 |     def test_iita_with_invalid_second_argument(self):
28 |         self.assertRaises(SystemExit, lambda: iita(self.dataframe, -100))
29 |         self.assertRaises(SystemExit, lambda: iita(self.dataframe, -1))
30 |         self.assertRaises(SystemExit, lambda: iita(self.dataframe, 0))
31 |         self.assertRaises(SystemExit, lambda: iita(self.dataframe, 4))
32 |         self.assertRaises(SystemExit, lambda: iita(self.dataframe, 100))
33 |         self.assertRaises(SystemExit, lambda: iita(self.dataframe, (1, 2)))
34 |         self.assertRaises(SystemExit, lambda: iita(self.dataframe, [1, 2]))
35 | 
36 |     def test_mini_iita_with_dataframe(self):
37 |         response = iita(self.dataframe, v=1)
38 | 
39 |         self.assertEqual([0.18518518518518515, 0.16666666666666666, 0.21296296296296294], response['diff'].tolist())
40 |         self.assertEqual(0.5, response['error.rate'])
41 |         self.assertEqual([(0, 1), (0, 2), (2, 0), (2, 1)], response['implications'])
42 |         self.assertEqual(1, response['selection.set.index'])
43 |         self.assertEqual(1, response['v'])
44 | 
45 |     def test_corr_iita_with_dataframe(self):
46 |         response = iita(self.dataframe, v=2)
47 | 
48 |         self.assertEqual([0.18518518518518515, 0.16666666666666666, 0.215277777777777776], response['diff'].tolist())
49 |         self.assertEqual(0.5, response['error.rate'])
50 |         self.assertEqual([(0, 1), (0, 2), (2, 0), (2, 1)], response['implications'])
51 |         self.assertEqual(1, response['selection.set.index'])
52 |         self.assertEqual(2, response['v'])
53 | 
54 |     def test_orig_iita_with_dataframe(self):
55 |         response = iita(self.dataframe, v=3)
56 | 
57 |         self.assertEqual([0.20370370370370369, 0.39814814814814814, 0.215277777777777776], response['diff'].tolist())
58 |         self.assertEqual(0, response['error.rate'])
59 |         self.assertEqual([(2, 1)], response['implications'])
60 |         self.assertEqual(0, response['selection.set.index'])
61 |         self.assertEqual(3, response['v'])
62 | 
63 |     def test_mini_iita_with_matrix(self):
64 |         response = iita(self.matrix, v=1)
65 | 
66 |         self.assertEqual([0.18518518518518515, 0.16666666666666666, 0.21296296296296294], response['diff'].tolist())
67 |         self.assertEqual(0.5, response['error.rate'])
68 |         self.assertEqual([(0, 1), (0, 2), (2, 0), (2, 1)], response['implications'])
69 |         self.assertEqual(1, response['selection.set.index'])
70 |         self.assertEqual(1, response['v'])
71 | 
72 |     def test_corr_iita_with_matrix(self):
73 |         response = iita(self.matrix, v=2)
74 | 
75 |         self.assertEqual([0.18518518518518515, 0.16666666666666666, 0.215277777777777776], response['diff'].tolist())
76 |         self.assertEqual(0.5, response['error.rate'])
77 |         self.assertEqual([(0, 1), (0, 2), (2, 0), (2, 1)], response['implications'])
78 |         self.assertEqual(1, response['selection.set.index'])
79 |         self.assertEqual(2, response['v'])
80 | 
81 |     def test_orig_iita_with_matrix(self):
82 |         response = iita(self.matrix, v=3)
83 | 
84 |         self.assertEqual([0.20370370370370369, 0.39814814814814814, 0.215277777777777776], response['diff'].tolist())
85 |         self.assertEqual(0, response['error.rate'])
86 |         self.assertEqual([(2, 1)], response['implications'])
87 |         self.assertEqual(0, response['selection.set.index'])
88 |         self.assertEqual(3, response['v'])
89 | 
90 | if __name__ == '__main__':
91 |     unittest.main()
92 | 


--------------------------------------------------------------------------------
/tests/test_blim.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import pandas as pd
  3 | from collections import OrderedDict
  4 | import sys
  5 | sys.path.append('../learning_spaces/')
  6 | from learning_spaces.pks.blim import BLIM
  7 | 
  8 | 
  9 | class TestBlim(unittest.TestCase):
 10 | 
 11 |     def setUp(self):
 12 |         self.k = pd.read_csv("data/test_data.csv")
 13 |         self.n_r = OrderedDict()
 14 |         self.n_r["00000"] = 80
 15 |         self.n_r["10000"] = 92
 16 |         self.n_r["01000"] = 89
 17 |         self.n_r["00100"] = 3
 18 |         self.n_r["00010"] = 2
 19 |         self.n_r["00001"] = 1
 20 |         self.n_r["11000"] = 89
 21 |         self.n_r["10100"] = 16
 22 |         self.n_r["10010"] = 18
 23 |         self.n_r["10001"] = 10
 24 |         self.n_r["01100"] = 18
 25 |         self.n_r["01010"] = 20
 26 |         self.n_r["01001"] = 4
 27 |         self.n_r["00110"] = 2
 28 |         self.n_r["00101"] = 2
 29 |         self.n_r["00011"] = 3
 30 |         self.n_r["11100"] = 89
 31 |         self.n_r["11010"] = 89
 32 |         self.n_r["11001"] = 19
 33 |         self.n_r["10110"] = 16
 34 |         self.n_r["10101"] = 16
 35 |         self.n_r["10011"] = 3
 36 |         self.n_r["01110"] = 18
 37 |         self.n_r["01101"] = 16
 38 |         self.n_r["01011"] = 2
 39 |         self.n_r["00111"] = 2
 40 |         self.n_r["11110"] = 73
 41 |         self.n_r["11101"] = 82
 42 |         self.n_r["11011"] = 19
 43 |         self.n_r["10111"] = 15
 44 |         self.n_r["01111"] = 15
 45 |         self.n_r["11111"] = 77
 46 | 
 47 |     def test_blim_md(self):
 48 |         blim_md = BLIM(self.k, self.n_r)
 49 |         self.assertEqual(9, blim_md.n_states)
 50 |         self.assertEqual(32, blim_md.n_patterns)
 51 |         self.assertEqual(1000, blim_md.n_total)
 52 |         self.assertEqual("MD", blim_md.method)
 53 |         self.assertEqual(1, blim_md.iteration)
 54 |         self.assertEqual(91.28362323477515, blim_md.goodness_of_fit['g2'])
 55 |         self.assertEqual(13, blim_md.goodness_of_fit['df'])
 56 |         self.assertEqual(7.938094626069869e-14, blim_md.goodness_of_fit['pval'])
 57 |         self.assertEqual(0.254, blim_md.discrepancy)
 58 |         self.assertEqual(0.090000000000000011, blim_md.n_errors['lucky'])
 59 |         self.assertEqual(0.16399999999999998, blim_md.n_errors['careless'])
 60 |         self.assertListEqual([0.09208874005860192, 0.08871989860583017, 0.04505813953488372, 0.0, 0.0],
 61 |                              blim_md.beta.values.tolist()[0])
 62 |         self.assertListEqual([0.0, 0.0,  0.04064039408866995,  0.04085801838610828,  0.05472197705207414],
 63 |                              blim_md.eta.values.tolist()[0])
 64 | 
 65 |     def test_log_likelihood_md(self):
 66 |         blim_md = BLIM(self.k, self.n_r)
 67 |         self.assertEqual(blim_md.log_lik, blim_md.log_likelihood())
 68 | 
 69 |     def test_number_of_obs_md(self):
 70 |         blim_md = BLIM(self.k, self.n_r)
 71 |         self.assertEqual(blim_md.n_patterns, blim_md.number_of_observations())
 72 | 
 73 |     def test_deviance_md(self):
 74 |         blim_md = BLIM(self.k, self.n_r)
 75 |         self.assertEqual(blim_md.goodness_of_fit['g2'], blim_md.deviance())
 76 | 
 77 |     def test_coef_md(self):
 78 |         blim_md = BLIM(self.k, self.n_r)
 79 |         beta, eta, p_k = blim_md.coef()
 80 |         self.assertListEqual(list(blim_md.beta), list(beta))
 81 |         self.assertListEqual(blim_md.beta.values.tolist(), beta.values.tolist())
 82 |         self.assertListEqual(list(blim_md.eta), list(eta))
 83 |         self.assertListEqual(blim_md.eta.values.tolist(), eta.values.tolist())
 84 |         self.assertListEqual(list(blim_md.p_k), list(p_k))
 85 |         self.assertListEqual(blim_md.p_k.values.tolist(), p_k.values.tolist())
 86 | 
 87 |     def test_blim_ml(self):
 88 |         blim_ml = BLIM(self.k, self.n_r, method="ML")
 89 |         self.assertEqual(9, blim_ml.n_states)
 90 |         self.assertEqual(32, blim_ml.n_patterns)
 91 |         self.assertEqual(1000, blim_ml.n_total)
 92 |         self.assertEqual("ML", blim_ml.method)
 93 |         self.assertEqual(300, blim_ml.iteration)
 94 |         self.assertEqual(12.622816435940905, blim_ml.goodness_of_fit['g2'])
 95 |         self.assertEqual(13, blim_ml.goodness_of_fit['df'])
 96 |         self.assertEqual(0.477349992130788, blim_ml.goodness_of_fit['pval'])
 97 |         self.assertEqual(0.254, blim_ml.discrepancy)
 98 |         self.assertEqual(0.044865390859123146, blim_ml.n_errors['lucky'])
 99 |         self.assertEqual(0.44280715825096656, blim_ml.n_errors['careless'])
100 |         self.assertListEqual([0.1648712647718087, 0.16311278151263192, 0.18883863747163213,
101 |                               0.07983530446636058, 0.08864829052919883], blim_ml.beta.values.tolist()[0])
102 |         self.assertListEqual([0.10306473120044671, 0.09507429143942243, 3.5426760020042067e-06,
103 |                               3.157133824028973e-06, 0.019909716488346413], blim_ml.eta.values.tolist()[0])
104 | 
105 |     def test_log_likelihood_ml(self):
106 |         blim_ml = BLIM(self.k, self.n_r, method="ML")
107 |         self.assertEqual(blim_ml.log_lik, blim_ml.log_likelihood())
108 | 
109 |     def test_number_of_obs_ml(self):
110 |         blim_ml = BLIM(self.k, self.n_r, method="ML")
111 |         self.assertEqual(blim_ml.n_patterns, blim_ml.number_of_observations())
112 | 
113 |     def test_deviance_ml(self):
114 |         blim_ml = BLIM(self.k, self.n_r, method="ML")
115 |         self.assertEqual(blim_ml.goodness_of_fit['g2'], blim_ml.deviance())
116 | 
117 |     def test_coef_ml(self):
118 |         blim_ml = BLIM(self.k, self.n_r, method="ML")
119 |         beta, eta, p_k = blim_ml.coef()
120 |         self.assertListEqual(list(blim_ml.beta), list(beta))
121 |         self.assertListEqual(blim_ml.beta.values.tolist(), beta.values.tolist())
122 |         self.assertListEqual(list(blim_ml.eta), list(eta))
123 |         self.assertListEqual(blim_ml.eta.values.tolist(), eta.values.tolist())
124 |         self.assertListEqual(list(blim_ml.p_k), list(p_k))
125 |         self.assertListEqual(blim_ml.p_k.values.tolist(), p_k.values.tolist())
126 | 


--------------------------------------------------------------------------------
/learning_spaces/pks/blim.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from scipy.stats import chi2
  4 | from .conversion import convert_as_pattern, convert_as_bin_mat
  5 | 
  6 | 
  7 | class BLIM:
  8 |     """
  9 |     Fits a basic local independence model (BLIM) for probabilistic knowledge structures by
 10 |     Minimum Discrepancy and Maximum Likelihood estimation
 11 |     """
 12 | 
 13 |     def __init__(self, k, n_r, method="MD", r=None, p_k=None, beta=None, eta=None, rand_init=False, inc_radius=0,
 14 |                  tol=1e-07, max_iter=10000):
 15 |         """
 16 |         Fit a Basic Local Independence Model (BLIM) for probabilistic knowledge structures by
 17 |         Minimum Discrepancy Maximum Likelihood estimation
 18 |         :param k: a dataframe with binary matrix representing the knowledge structure
 19 |         :param n_r: dictionary of absolute frequencies of response patterns
 20 |         :param method: MD for Minimum Discrepancy estimation, ML for Maximum Likelihood estimation
 21 |         :param r: dataframe with binary matrix of unique response patterns. Per default inferred from the names of n_r
 22 |         :param p_k: list of initial parameter values for probabilities of knowledge states
 23 |         :param beta: list of initial parameter values for probabilities of a careless error
 24 |         :param eta: list of initial parameter values for probabilities of a lucky guess
 25 |         :param rand_init: if True then initial parameter values are sampled uniformly with constraints
 26 |         :param inc_radius: include knowledge states of distance from the minimum discrepant states less than or equal
 27 |          to inc_radius
 28 |         :param tol: tolerance, stopping criterion for iteration
 29 |         :param max_iter: the maximum number of iterations
 30 |         """
 31 |         # setting initial BLIM object values
 32 |         self.k = k
 33 |         self.n_r = n_r
 34 |         self.n_total = sum(n_r.values())
 35 |         self.method = method
 36 |         self.n_items = len(list(k))
 37 |         if r is None:
 38 |             r = convert_as_bin_mat(list(self.n_r), col_names=list(self.k))
 39 |         self.n_patterns = len(r.index)
 40 |         self.n_states = len(k.index)
 41 |         if p_k is None:
 42 |             self.p_k = [1 / self.n_states] * self.n_states
 43 |         else:
 44 |             self.p_k = p_k
 45 |         if beta is None:
 46 |             self.beta = [0.1] * self.n_items
 47 |         else:
 48 |             self.beta = beta
 49 |         if eta is None:
 50 |             self.eta = [0.1] * self.n_items
 51 |         else:
 52 |             self.eta = eta
 53 | 
 54 |         # uniformly random initial values
 55 |         if rand_init:
 56 |             self.beta = np.random.uniform(0, 1, self.n_items)
 57 |             self.eta = np.random.uniform(0, 1, self.n_items)
 58 |             # constraint: beta + eta < 1
 59 |             for i in range(len(self.beta)):
 60 |                 if self.beta[i] + self.eta[i] >= 1:
 61 |                     self.beta[i] = 1 - self.beta[i]
 62 |                     self.eta[i] = 1 - self.eta[i]
 63 |             # constraint sum(p_k) == 1
 64 |             x = []
 65 |             x.append(0)
 66 |             x += list(np.random.uniform(0, 1, self.n_states - 1))
 67 |             x.append(1)
 68 |             x.sort()
 69 |             a = x[1:]
 70 |             b = x[:-1]
 71 |             for i in range(len(self.p_k)):
 72 |                 self.p_k[i] = a[i] - b[i]
 73 | 
 74 |         # converting to dataframes
 75 |         self.p_k = pd.DataFrame([self.p_k], columns=convert_as_pattern(self.k))
 76 |         self.beta = pd.DataFrame([self.beta], columns=list(self.k))
 77 |         self.eta = pd.DataFrame([self.eta], columns=list(self.k))
 78 |         # assigning state K given response R
 79 |         d_rk_header = convert_as_pattern(self.k)
 80 |         d_rk = pd.DataFrame(columns=d_rk_header)
 81 |         for i in range(len(self.k.index)):
 82 |             rk_matrix = np.logical_xor(r, list(self.k.iloc[i]))
 83 |             d_rk[d_rk_header[i]] = list(rk_matrix.sum(axis=1))
 84 |         # minimum discrepancy
 85 |         d_min = d_rk.apply(min, axis=1)
 86 |         i_rk = np.logical_and(d_rk <= list(d_min + inc_radius), ~(d_rk is None))
 87 |         # minimum discrepancy distribution
 88 |         frequencies = list(self.n_r.values())
 89 |         values = pd.unique(d_min)
 90 |         sums = {}
 91 |         disc_sum = 0
 92 |         disc_count = 0
 93 |         for value in values:
 94 |             sums[value] = 0
 95 |         for i in range(len(d_min)):
 96 |             sums[d_min[i]] += frequencies[i]
 97 |             disc_sum += d_min[i] * frequencies[i]
 98 |             disc_count += frequencies[i]
 99 |         self.discrepancy = disc_sum / disc_count
100 |         self.disc_tab = pd.DataFrame(sums, columns=sums.keys(), index=[0])
101 | 
102 |         # selected method
103 |         em = 1
104 |         if method == "MD":
105 |             em = 0
106 |         md = 1
107 |         if method == "ML":
108 |             md = 0
109 | 
110 |         self.iteration = 0
111 |         max_diff = 2 * tol
112 |         beta_num = self.beta.copy(deep=True)
113 |         beta_denom = self.beta.copy(deep=True)
114 |         eta_num = self.beta.copy(deep=True)
115 |         eta_denom = self.beta.copy(deep=True)
116 | 
117 |         while (max_diff > tol) and (self.iteration < max_iter) and ((md * (1 - em) != 1) or (self.iteration == 0)):
118 |             pi_old = self.p_k.copy(deep=True)
119 |             beta_old = self.beta.copy(deep=True)
120 |             eta_old = self.eta.copy(deep=True)
121 | 
122 |             p_r_k = pd.DataFrame(columns=d_rk_header)
123 |             for i in range(len(self.k.index)):
124 |                 p_r_k[d_rk_header[i]] = calculate_p_r_k(self.k.iloc[i], self.beta, self.eta, r)
125 | 
126 |             p_r = numpy_list_to_list(np.inner(np.asmatrix(p_r_k), np.asarray(self.p_k)).tolist())
127 |             # prediction of P(K|R)
128 |             p_k_r = pd.DataFrame(np.multiply(np.asmatrix(p_r_k), np.outer((1 / np.asarray(p_r)), np.asarray(self.p_k))),
129 |                                  columns=d_rk_header)
130 | 
131 |             mat_rk = pd.DataFrame(np.multiply(np.asmatrix(i_rk ** md), np.asmatrix(p_k_r ** em)), columns=d_rk_header)
132 | 
133 |             # m_r_k = E(M_RK) = P(K|R) * N(R)
134 |             np_mat_rk = np.asmatrix(mat_rk)
135 |             mat_rk_row_sum = np_mat_rk / np_mat_rk.sum(axis=1)
136 |             list_n_r = np.array(list(self.n_r.values()))
137 |             m_r_k = pd.DataFrame(np.multiply(mat_rk_row_sum, list_n_r[:, np.newaxis]), columns=d_rk_header)
138 | 
139 |             # distribution of knowledge states
140 |             self.p_k = m_r_k.sum(axis=0) / self.n_total
141 | 
142 |             # careless error and guessing parameters
143 |             k_header = list(self.k)
144 |             for i in range(self.n_items):
145 |                 current_header = k_header[i]
146 |                 # filter by columns first
147 |                 del_col_0 = np.where(np.array(self.k[current_header]) == 0)[0]
148 |                 m_r_k_0 = m_r_k.drop(m_r_k.columns[del_col_0], axis=1)
149 |                 del_col_1 = np.where(np.array(self.k[current_header]) == 1)[0]
150 |                 m_r_k_1 = m_r_k.drop(m_r_k.columns[del_col_1], axis=1)
151 |                 # calculate errors
152 |                 beta_num[current_header] = m_r_k_0.loc[r[current_header] == 0].values.sum()
153 |                 beta_denom[current_header] = m_r_k_0.values.sum()
154 |                 eta_num[current_header] = m_r_k_1.loc[r[current_header] == 1].values.sum()
155 |                 eta_denom[current_header] = m_r_k_1.values.sum()
156 | 
157 |             # updating error values
158 |             for header in k_header:
159 |                 self.beta[header] = beta_num[header] / beta_denom[header]
160 |                 self.beta.fillna(0)
161 |                 self.eta[header] = eta_num[header] / eta_denom[header]
162 |                 self.eta.fillna(0)
163 | 
164 |             # updating max_diff
165 |             p_max = np.amax(abs(self.p_k - pi_old).values)
166 |             beta_max = np.amax(abs(self.beta - beta_old).values)
167 |             eta_max = np.amax(abs(self.eta - eta_old).values)
168 |             max_diff = max(p_max, beta_max, eta_max)
169 |             # updating iterations
170 |             self.iteration += 1
171 | 
172 |         if self.iteration >= max_iter:
173 |             print("Iteration maximum has been exceeded")
174 | 
175 |         # mean number of errors
176 |         p_kq = [0] * self.n_items
177 |         for i in range(self.n_items):
178 |             current_header = k_header[i]
179 |             selected_headers = np.where(np.array(self.k[current_header] == 1))[0]
180 |             sums = 0
181 |             for header in selected_headers:
182 |                 sums += self.p_k[d_rk_header[header]]
183 |             p_kq[i] = sums
184 | 
185 |         self.n_errors = {}
186 |         self.n_errors['careless'] = (self.beta * p_kq).values.sum()
187 |         self.n_errors['lucky'] = (self.eta * (1 - np.array(p_kq))).values.sum()
188 | 
189 |         # recompute predictions and likelihood
190 |         for i in range(len(self.k.index)):
191 |             p_r_k[d_rk_header[i]] = calculate_p_r_k(self.k.iloc[i], self.beta, self.eta, r)
192 | 
193 |         p_r = np.inner(np.asmatrix(p_r_k), np.asarray(self.p_k)).tolist()[0]
194 |         if sum(p_r) < 1:
195 |             p_r = p_r / sum(p_r)
196 | 
197 |         self.log_lik = sum(np.log(p_r) * list(self.n_r.values()))
198 | 
199 |         # goodness of fit
200 |         self.goodness_of_fit = {}
201 |         fitted = np.asarray(p_r) * self.n_total
202 |         self.fitted_values = pd.DataFrame([fitted], columns=self.n_r.keys())
203 |         n_r_list = list(self.n_r.values())
204 | 
205 |         self.goodness_of_fit['g2'] = 2 * sum(n_r_list * np.log(n_r_list / fitted))
206 |         self.goodness_of_fit['df'] = min(2 ** self.n_items - 1, self.n_total) - 2 * self.n_states
207 |         self.goodness_of_fit['pval'] = 1 - chi2.cdf(self.goodness_of_fit['g2'], self.goodness_of_fit['df'])
208 | 
209 |     def describe(self):
210 |         """
211 |         Print BLIM object values
212 |         """
213 |         print("\nBasic local independence models (BLIMs)\n")
214 |         print("Number of knowledge states: {0}".format(self.n_states))
215 |         print("Number of response patterns: {0}".format(self.n_patterns))
216 |         print("Number of respondents: {0}".format(self.n_total))
217 |         print("\nMethod: " + self.method)
218 |         print("Number of iterations: {0}".format(self.iteration))
219 |         g2 = self.goodness_of_fit['g2']
220 |         df = self.goodness_of_fit['df']
221 |         pval = self.goodness_of_fit['pval']
222 |         print("Goodness of fit (2 log likelihood ratio):\n")
223 |         print("\tG2({0}) = {1}, p = {2} \n".format(df, g2, pval))
224 |         print("Minimum discrepancy distribution (mean = {0})\n".format(self.discrepancy))
225 |         print("Mean number of errors (total = {0})".format(sum(self.n_errors.values())))
226 |         print(self.n_errors)
227 |         print("\nDistribution of knowledge states:")
228 |         print(self.p_k)
229 |         print("\nError and guessing parameters:")
230 |         print("Beta")
231 |         print(self.beta)
232 |         print("Eta")
233 |         print(self.eta)
234 | 
235 |     def log_likelihood(self):
236 |         """
237 |         Log-Likelihood for BLIM object
238 |         """
239 |         return self.log_lik
240 | 
241 |     def number_of_observations(self):
242 |         """
243 |         Number of observations
244 |         """
245 |         return self.n_patterns
246 | 
247 |     def simulate(self):
248 |         """
249 |         Simulates responses from the distribution corresponding to a fitted BLIM model object.
250 |         :return: dataframe of frequencies of response patterns
251 |         """
252 |         seq_len = list(range(len(self.p_k.values)))
253 |         states_id = np.random.choice(seq_len, size=self.n_total, replace=True, p=self.p_k.values)
254 |         beta_inv = 1 - self.beta
255 |         # P(resp = 1 | K)
256 |         p_1_k = np.multiply(np.asmatrix(self.k), np.asarray(beta_inv.values)) + np.multiply(np.asmatrix(1 - self.k), np.asarray(self.eta.values))
257 |         p_1_k_df = pd.DataFrame(np.transpose(p_1_k), columns=convert_as_pattern(self.k))
258 |         # initialize response matrix
259 |         r_mat = pd.DataFrame(0, index=np.arange(self.n_total), columns=list(self.k))
260 |         # draw a response
261 |         for i in range(self.n_total):
262 |             r_mat.loc[i, :] = np.random.binomial(n=1, size=self.n_items, p=np.array(p_1_k_df.iloc[:, states_id[i]]))
263 | 
264 |         patterns, frequencies = convert_as_pattern(r_mat, freq=True)
265 |         return pd.DataFrame([frequencies], columns=patterns)
266 | 
267 |     def deviance(self):
268 |         """
269 |         Deviance
270 |         """
271 |         return self.goodness_of_fit['g2']
272 | 
273 |     def coef(self):
274 |         """
275 |         BLIM object parameters
276 |         :return: dataframe for beta, eta nad p_k
277 |         """
278 |         return self.beta, self.eta, self.p_k
279 | 
280 | 
281 | def calculate_p_r_k(k_row, beta, eta, r):
282 |     """
283 |     Calculating P(R|K) for every row from knowledge structure matrix
284 |     :param k_row: dataframe representing knowledge structure matrix row
285 |     :param beta: dataframe representing beta
286 |     :param eta: dataframe representing eta
287 |     :param r: dataframe with binary matrix of unique response patterns
288 |     :return: list of calculated values
289 |     """
290 |     # converting data into numpy arrays and matrices
291 |     k = np.asarray(k_row)
292 |     k_inv = np.asarray(1 - k_row)
293 |     beta_mat = np.asmatrix(beta)
294 |     beta_inv_mat = np.asmatrix(1 - beta)
295 |     eta_mat = np.asmatrix(eta)
296 |     eta_inv_mat = np.asmatrix(1 - eta)
297 |     r_mat = np.asmatrix(r)
298 |     r_inv_mat = np.asmatrix(1 - r)
299 |     # calculating betas
300 |     beta1 = np.power(beta_mat, np.multiply(r_inv_mat, k))
301 |     beta2 = np.power(beta_inv_mat, np.multiply(r_mat, k))
302 |     eta1 = np.power(eta_mat, np.multiply(r_mat, k_inv))
303 |     eta2 = np.power(eta_inv_mat, np.multiply(r_inv_mat, k_inv))
304 |     # multiply betas and etas
305 |     mul_mat = np.multiply(np.multiply(beta1, beta2), np.multiply(eta1, eta2))
306 |     # multiply by row
307 |     row_prod = np.prod(mul_mat, axis=1).tolist()
308 |     return numpy_list_to_list(row_prod)
309 | 
310 | 
311 | def numpy_list_to_list(numpy_list):
312 |     """
313 |     Convert nested list to list
314 |     :param numpy_list: nested list
315 |     :return: list
316 |     """
317 |     ret_val = [x[0] for x in numpy_list]
318 |     return ret_val
319 | 


--------------------------------------------------------------------------------