├── tests ├── __init__.py ├── test_pattern.py ├── test_pop_iita.py ├── test_state2imp.py ├── test_variance.py ├── test_pop_variance.py ├── test_summary_iita.py ├── test_summary_popiita.py ├── data │ ├── test_delineate.csv │ └── test_data.csv ├── test_z_test.py ├── test_simu.py ├── test_ind_gen.py ├── test_ob_counter.py ├── test_gradedness.py ├── test_corr_iita.py ├── test_mini_iita.py ├── test_orig_iita.py ├── test_delineation.py ├── test_imp2state.py ├── test_conversion.py ├── test_iita.py └── test_blim.py ├── learning_spaces ├── __init__.py ├── kst │ ├── z_test.py │ ├── pop_iita.py │ ├── variance.py │ ├── pop_variance.py │ ├── state2imp.py │ ├── summary_popiita.py │ ├── print_pat.py │ ├── print_iita.py │ ├── summary_iita.py │ ├── print_sumpopiita.py │ ├── print_popiita.py │ ├── ob_counter.py │ ├── __init__.py │ ├── pattern.py │ ├── print_ztest.py │ ├── orig_iita.py │ ├── corr_iita.py │ ├── imp2state.py │ ├── ind_gen.py │ ├── mini_iita.py │ ├── iita.py │ ├── hasse.py │ ├── stochastic_markov.py │ └── simu.py └── pks │ ├── __init__.py │ ├── gradedness.py │ ├── conversion.py │ ├── delineation.py │ └── blim.py ├── setup.cfg ├── dist ├── learning_spaces-0.2.0.tar.gz └── learning_spaces-0.2.0-py3-none-any.whl ├── .gitignore ├── setup.py ├── requirements.txt └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_pattern.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_pop_iita.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_state2imp.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_variance.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /learning_spaces/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_pop_variance.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_summary_iita.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [easy_install] 2 | 3 | -------------------------------------------------------------------------------- /tests/test_summary_popiita.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /learning_spaces/kst/z_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def z_test(): 4 | 5 | return {} -------------------------------------------------------------------------------- /learning_spaces/kst/pop_iita.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def pop_iita(): 5 | 6 | return {} -------------------------------------------------------------------------------- /learning_spaces/kst/variance.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def variance(): 5 | 6 | return {} -------------------------------------------------------------------------------- /learning_spaces/kst/pop_variance.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def pop_variance(): 5 | 6 | return {} -------------------------------------------------------------------------------- /learning_spaces/pks/__init__.py: -------------------------------------------------------------------------------- 1 | from learning_spaces.pks.conversion import convert_as_pattern, convert_as_bin_mat 2 | -------------------------------------------------------------------------------- /tests/data/test_delineate.csv: -------------------------------------------------------------------------------- 1 | item,s,t,u 2 | e,1,1,0 3 | e,1,0,1 4 | f,0,0,1 5 | g,1,0,0 6 | g,0,1,0 7 | h,0,1,0 8 | -------------------------------------------------------------------------------- /dist/learning_spaces-0.2.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/milansegedinac/kst/HEAD/dist/learning_spaces-0.2.0.tar.gz -------------------------------------------------------------------------------- /learning_spaces/kst/state2imp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def state2imp(P): 5 | 6 | return {} -------------------------------------------------------------------------------- /dist/learning_spaces-0.2.0-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/milansegedinac/kst/HEAD/dist/learning_spaces-0.2.0-py3-none-any.whl -------------------------------------------------------------------------------- /learning_spaces/kst/summary_popiita.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def summary_popiita(obj): 5 | return {'value': obj, 'class_name': "sumpopiita"} -------------------------------------------------------------------------------- /tests/data/test_data.csv: -------------------------------------------------------------------------------- 1 | a,b,c,d,e 2 | 0,0,0,0,0 3 | 1,0,0,0,0 4 | 0,1,0,0,0 5 | 1,1,0,0,0 6 | 1,1,1,0,0 7 | 1,1,0,1,0 8 | 1,1,1,1,0 9 | 1,1,1,0,1 10 | 1,1,1,1,1 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled python modules. 2 | *.pyc 3 | 4 | # Setuptools distribution folder. 5 | # /dist/ 6 | 7 | # Python egg metadata, regenerated from source files by setuptools. 8 | /*.egg-info 9 | 10 | .idea/ 11 | /env/ 12 | .venv 13 | .vscode -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='learning_spaces', 5 | version='0.2.0', 6 | description='Knowledge Space Theory', 7 | url='https://github.com/milansegedinac/kst', 8 | packages=find_packages(), 9 | install_requires=['numpy', 'pandas', 'pydot', 'matplotlib'] 10 | ) 11 | -------------------------------------------------------------------------------- /tests/test_z_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | sys.path.append('../learning_spaces/') 4 | from learning_spaces.kst import z_test 5 | 6 | 7 | class TestZTest(unittest.TestCase): 8 | 9 | def test_ZTest(self): 10 | result = z_test() 11 | 12 | self.assertTrue(False) 13 | 14 | 15 | if __name__ == '__main__': 16 | unittest.main() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cycler==0.11.0 2 | fonttools==4.28.2 3 | kiwisolver==1.3.2 4 | matplotlib==3.5.0 5 | nose==1.3.7 6 | numpy==1.21.4 7 | packaging==21.3 8 | pandas==1.3.4 9 | patsy==0.5.2 10 | Pillow==8.4.0 11 | pydot==1.4.2 12 | pyparsing==3.0.6 13 | python-dateutil==2.8.2 14 | pytz==2021.3 15 | scipy==1.7.3 16 | setuptools-scm==6.3.2 17 | six==1.16.0 18 | tomli==1.2.2 19 | -------------------------------------------------------------------------------- /learning_spaces/kst/print_pat.py: -------------------------------------------------------------------------------- 1 | 2 | def print_pat(x): 3 | """ 4 | Formatted print of pattern response 5 | :param x: dictionary - response from pattern function 6 | :return: 7 | """ 8 | print('\nlargest response patterns in the data: {}'.format(x['n'])) 9 | print(x['response.patterns']) 10 | if x['states'] is not None: 11 | print("Number of times a state occurs in the data:") 12 | print(x['states']) 13 | -------------------------------------------------------------------------------- /tests/test_simu.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | sys.path.append('../learning_spaces/') 4 | from learning_spaces.kst import simu 5 | 6 | 7 | class TestSimu(unittest.TestCase): 8 | 9 | def test_simu(self): 10 | result = simu(items=3, size=3, ce=0.0, lg=0.0, delta=0.0) 11 | 12 | self.assertTrue('dataset' in result) 13 | self.assertTrue('implications' in result) 14 | self.assertTrue('states' in result) 15 | 16 | 17 | if __name__ == '__main__': 18 | unittest.main() 19 | -------------------------------------------------------------------------------- /learning_spaces/kst/print_iita.py: -------------------------------------------------------------------------------- 1 | def print_iita(obj): 2 | """ 3 | Formatted print of iita response 4 | 5 | :param obj: dictionary - response from iita function 6 | :return: 7 | """ 8 | 9 | print('\n\tInductive Item Tree Analysis\n') 10 | 11 | algorithm = '-' 12 | if obj['v'] == 1: 13 | algorithm = 'minimized corrected' 14 | elif obj['v'] == 2: 15 | algorithm = 'corrected' 16 | elif obj['v'] == 3: 17 | algorithm = 'original' 18 | 19 | print('\nAlgorithm: {} IITA'.format(algorithm)) 20 | print('\nQuasi order: {}'.format(obj['implications'])) 21 | -------------------------------------------------------------------------------- /tests/test_ind_gen.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | import sys 4 | sys.path.append('../learning_spaces/') 5 | from learning_spaces.kst import ind_gen 6 | 7 | 8 | class TestIndGen(unittest.TestCase): 9 | 10 | def test_ind_gen(self): 11 | b = np.array([[0, 1, 1], [2, 0, 1], [1, 0, 0]]) 12 | result = ind_gen(b) 13 | 14 | self.assertEqual(3, len(result)) 15 | self.assertEqual([(2, 1)], result[0]) 16 | self.assertEqual([(0, 1), (0, 2), (2, 0), (2, 1)], result[1]) 17 | self.assertEqual([(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)], result[2]) 18 | 19 | 20 | if __name__ == '__main__': 21 | unittest.main() 22 | -------------------------------------------------------------------------------- /learning_spaces/kst/summary_iita.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def summary_iita(obj): 5 | 6 | print('\n\tInductive Item Tree Analysis\n') 7 | 8 | algorithm = '-' 9 | if obj['v'] == 1: 10 | algorithm = 'minimized corrected' 11 | elif obj['v'] == 2: 12 | algorithm = 'corrected' 13 | elif obj['v'] == 3: 14 | algorithm = 'original' 15 | 16 | print('\nAlgorithm: {} IITA'.format(algorithm)) 17 | print("error rate: ") 18 | print("diff values: {}".format(round(obj['diff'], 3))) 19 | print('\nQuasi order: {}'.format(obj['implications'])) 20 | print("index in the selection set: ") 21 | print(str(obj['selection.set.index'])) -------------------------------------------------------------------------------- /learning_spaces/kst/print_sumpopiita.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def print_sumpopiita(x): 5 | print("\n \t Inductive Item Tree Analysis in population values\n") 6 | print("\nAlgorithm:") 7 | if (x['v'] == 1) : 8 | print(" minimized corrected IITA\n") 9 | if (x['v'] == 2): 10 | print(" corrected IITA\n") 11 | if (x['v'] == 3): 12 | print(" original IITA\n") 13 | print("\npopulation diff values:\n") 14 | print(round(x['pop.diff'], 3)) 15 | print("\npopulation error rates:\n") 16 | print(round(x['error.pop'], 3)) 17 | print("\npopulation matrix:\n") 18 | print(round(x['pop.matrix'], 3)) 19 | print("\nobtained selection set:\n") 20 | print(x['selection.set']) -------------------------------------------------------------------------------- /learning_spaces/kst/print_popiita.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def print_popiita(obj): 5 | print("\n \t Inductive Item Tree Analysis in population values\n") 6 | print("\nAlgorithm:") 7 | if (obj['v'] == 1): 8 | print(" minimized corrected IITA\n") 9 | if (obj['v'] == 2): 10 | print(" corrected IITA\n") 11 | if (obj['v'] == 3): 12 | print(" original IITA\n") 13 | print("\npopulation diff values:\n") 14 | print(round(obj['pop.diff'], 3)) 15 | print("\npopulation error rates:\n") 16 | print(round(obj['error.pop'], 3)) 17 | print("\nquasi order:\n") 18 | selection_set = obj['selection.set'] 19 | diff = obj['pop.diff'] 20 | index = np.min(np.where(selection_set == diff)) 21 | print(selection_set[index][0]) 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /learning_spaces/kst/ob_counter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def ob_counter(dataset): 6 | """ 7 | Computation of numbers of counterexamples 8 | Computes from a dataset for all item pairs the corresponding numbers of counterexamples. 9 | 10 | :param dataset: dataframe or matrix consisted of ones and zeros 11 | :return: matrix of the numbers of counterexamples for all pairs of items 12 | """ 13 | 14 | (n, m) = dataset.shape 15 | b = np.zeros((m, m), dtype=np.int32) 16 | 17 | data = dataset 18 | if isinstance(dataset, pd.DataFrame): 19 | data = dataset.values 20 | 21 | for i in range(m): 22 | for j in range(m): 23 | if i != j: 24 | b[i, j] = sum(np.logical_and(data[:, i] == 0, data[:, j] == 1)) 25 | 26 | return b 27 | -------------------------------------------------------------------------------- /tests/test_ob_counter.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pandas as pd 3 | import numpy as np 4 | import sys 5 | sys.path.append('../learning_spaces/') 6 | from learning_spaces.kst import ob_counter 7 | 8 | 9 | class TestObCounter(unittest.TestCase): 10 | 11 | def test_ob_counter_with_dataframe(self): 12 | data_frame = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]}) 13 | response = ob_counter(data_frame) 14 | 15 | self.assertEqual([[0, 1, 1], [2, 0, 1], [1, 0, 0]], response.tolist()) 16 | 17 | def test_ob_counter_with_matrix(self): 18 | matrix = np.matrix([[1, 0, 0], [0, 1, 1], [1, 0, 1]]) 19 | response = ob_counter(matrix) 20 | 21 | self.assertEqual([[0, 1, 1], [2, 0, 1], [1, 0, 0]], response.tolist()) 22 | 23 | 24 | if __name__ == '__main__': 25 | unittest.main() 26 | -------------------------------------------------------------------------------- /tests/test_gradedness.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pandas as pd 3 | import sys 4 | sys.path.append('../learning_spaces/') 5 | from learning_spaces.pks import gradedness 6 | 7 | 8 | class TestGradedness(unittest.TestCase): 9 | 10 | def setUp(self): 11 | self.df = pd.read_csv("data/test_data.csv") 12 | 13 | def test_is_forward_graded_df(self): 14 | response = gradedness.is_forward_graded(self.df) 15 | self.assertTrue(response['a']) 16 | self.assertTrue(response['b']) 17 | self.assertFalse(response['c']) 18 | self.assertFalse(response['d']) 19 | self.assertFalse(response['e']) 20 | 21 | def test_is_backward_graded_df(self): 22 | response = gradedness.is_backward_graded(self.df) 23 | self.assertFalse(response['a']) 24 | self.assertFalse(response['b']) 25 | self.assertFalse(response['c']) 26 | self.assertTrue(response['d']) 27 | self.assertTrue(response['e']) 28 | -------------------------------------------------------------------------------- /tests/test_corr_iita.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pandas as pd 3 | import numpy as np 4 | import sys 5 | sys.path.append('../learning_spaces/') 6 | from learning_spaces.kst import corr_iita 7 | 8 | 9 | class TestCorrIita(unittest.TestCase): 10 | 11 | def test_corr_iita_with_dataframe(self): 12 | data_frame = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]}) 13 | A = [[(2, 1)], [(0, 1), (0, 2), (2, 0), (2, 1)]] 14 | response = corr_iita(data_frame, A) 15 | 16 | self.assertEqual([0.18518518518518515, 0.16666666666666666], response['diff.value'].tolist()) 17 | self.assertEqual([0.0, 0.5], response['error.rate'].tolist()) 18 | 19 | def test_corr_iita_with_matrix(self): 20 | matrix = np.matrix([[1, 0, 0], [0, 1, 1], [1, 0, 1]]) 21 | A = [[(2, 1)], [(0, 1), (0, 2), (2, 0), (2, 1)]] 22 | response = corr_iita(matrix, A) 23 | 24 | self.assertEqual([0.18518518518518515, 0.16666666666666666], response['diff.value'].tolist()) 25 | self.assertEqual([0.0, 0.5], response['error.rate'].tolist()) 26 | 27 | 28 | if __name__ == '__main__': 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /tests/test_mini_iita.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pandas as pd 3 | import numpy as np 4 | import sys 5 | sys.path.append('../learning_spaces/') 6 | from learning_spaces.kst import mini_iita 7 | 8 | 9 | class TestMiniIita(unittest.TestCase): 10 | 11 | def test_mini_iita_with_dataframe(self): 12 | data_frame = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]}) 13 | A = [[(2, 1)], [(0, 1), (0, 2), (2, 0), (2, 1)]] 14 | response = mini_iita(data_frame, A) 15 | 16 | self.assertEqual([0.18518518518518515, 0.16666666666666666], response['diff.value'].tolist()) 17 | self.assertEqual([0.0, 0.5], response['error.rate'].tolist()) 18 | 19 | def test_mini_iita_with_matrix(self): 20 | matrix = np.matrix([[1, 0, 0], [0, 1, 1], [1, 0, 1]]) 21 | A = [[(2, 1)], [(0, 1), (0, 2), (2, 0), (2, 1)]] 22 | response = mini_iita(matrix, A) 23 | 24 | self.assertEqual([0.18518518518518515, 0.16666666666666666], response['diff.value'].tolist()) 25 | self.assertEqual([0.0, 0.5], response['error.rate'].tolist()) 26 | 27 | 28 | if __name__ == '__main__': 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /tests/test_orig_iita.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pandas as pd 3 | import numpy as np 4 | import sys 5 | sys.path.append('../learning_spaces/') 6 | from learning_spaces.kst import orig_iita 7 | 8 | 9 | class TestOrigIita(unittest.TestCase): 10 | 11 | def test_orig_iita_with_dataframe(self): 12 | data_frame = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]}) 13 | A = [[(2, 1)], [(0, 1), (0, 2), (2, 0), (2, 1)]] 14 | response = orig_iita(data_frame, A) 15 | 16 | self.assertEqual([0.20370370370370369, 0.39814814814814814], response['diff.value'].tolist()) 17 | self.assertEqual([0.0, 0.5], response['error.rate'].tolist()) 18 | 19 | def test_orig_iita_with_martrix(self): 20 | matrix = np.matrix([[1, 0, 0], [0, 1, 1], [1, 0, 1]]) 21 | A = [[(2, 1)], [(0, 1), (0, 2), (2, 0), (2, 1)]] 22 | response = orig_iita(matrix, A) 23 | 24 | self.assertEqual([0.20370370370370369, 0.39814814814814814], response['diff.value'].tolist()) 25 | self.assertEqual([0.0, 0.5], response['error.rate'].tolist()) 26 | 27 | 28 | if __name__ == '__main__': 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /tests/test_delineation.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pandas as pd 3 | import sys 4 | sys.path.append('../learning_spaces/') 5 | from learning_spaces.pks import delineation 6 | 7 | 8 | class TestDelineation(unittest.TestCase): 9 | 10 | def setUp(self): 11 | self.df = pd.read_csv("data/test_delineate.csv") 12 | self.columns = ['e', 'f', 'g', 'h'] 13 | self.values = [ 14 | [0, 0, 0, 0], 15 | [0, 0, 1, 0], 16 | [0, 0, 1, 1], 17 | [0, 1, 0, 0], 18 | [1, 0, 1, 1], 19 | [1, 1, 1, 0], 20 | [0, 1, 1, 1], 21 | [1, 1, 1, 1] 22 | ] 23 | self.ddf = pd.DataFrame(self.values, columns=self.columns) 24 | self.classes = {'s': '0010', 'su': '1110', 'st': '1011', 'u': '0100', 'tu': '0111', 'stu': '1111', 25 | '0': '0000', 't': '0011'} 26 | 27 | def test_delineate_df(self): 28 | dataframe, classes = delineation.delineate(self.df) 29 | self.assertDictEqual(self.classes, classes) 30 | self.assertListEqual(list(self.ddf), list(dataframe)) 31 | self.assertListEqual(self.ddf.values.tolist(), dataframe.values.tolist()) 32 | -------------------------------------------------------------------------------- /learning_spaces/kst/__init__.py: -------------------------------------------------------------------------------- 1 | from learning_spaces.kst.ob_counter import ob_counter 2 | from learning_spaces.kst.orig_iita import orig_iita 3 | from learning_spaces.kst.corr_iita import corr_iita 4 | from learning_spaces.kst.mini_iita import mini_iita 5 | from learning_spaces.kst.ind_gen import ind_gen 6 | from learning_spaces.kst.iita import iita, iita_exclude_transitive 7 | from learning_spaces.kst.imp2state import imp2state 8 | from learning_spaces.kst.simu import simu 9 | from learning_spaces.kst.print_iita import print_iita 10 | from learning_spaces.kst.hasse import hasse 11 | from learning_spaces.kst.pattern import pattern 12 | from learning_spaces.kst.pop_iita import pop_iita 13 | from learning_spaces.kst.pop_variance import pop_variance 14 | from learning_spaces.kst.print_pat import print_pat 15 | from learning_spaces.kst.print_popiita import print_popiita 16 | from learning_spaces.kst.print_sumpopiita import print_sumpopiita 17 | from learning_spaces.kst.print_ztest import print_ztest 18 | from learning_spaces.kst.state2imp import state2imp 19 | from learning_spaces.kst.summary_iita import summary_iita 20 | from learning_spaces.kst.summary_popiita import summary_popiita 21 | from learning_spaces.kst.variance import variance 22 | from learning_spaces.kst.z_test import z_test -------------------------------------------------------------------------------- /tests/test_imp2state.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | sys.path.append('../learning_spaces/') 4 | from learning_spaces.kst import imp2state 5 | 6 | 7 | class TestImp2state(unittest.TestCase): 8 | 9 | def setUp(self): 10 | # data-provider alternative 11 | self.tests = [ 12 | { 13 | 'imp': [(1, 0)], 14 | 'items': 2, 15 | 'expected': [[0, 0], [0, 1], [1, 1]] 16 | }, 17 | { 18 | 'imp': [(0, 1), (0, 2), (2, 0), (2, 1)], 19 | 'items': 3, 20 | 'expected': [[0, 0, 0], [1, 0, 1], [1, 1, 1]] 21 | }, 22 | { 23 | 'imp': [(0, 1), (0, 3), (1, 0), (1, 3), (2, 0), (2, 1), (2, 3), (3, 0), (3, 1)], 24 | 'items': 4, 25 | 'expected': [[0, 0, 0, 0], [0, 0, 1, 0], [1, 1, 1, 1]] 26 | }, 27 | { 28 | 'imp': [(0, 3), (0, 4), (2, 0), (2, 3), (2, 4), (3, 0), (3, 4), (4, 0), (4, 3)], 29 | 'items': 5, 30 | 'expected': [[0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 1, 1, 0, 0], [1, 0, 1, 1, 1], [1, 1, 1, 1, 1]] 31 | } 32 | ] 33 | 34 | def test_imp2state(self): 35 | for test in self.tests: 36 | self.assertEqual(test['expected'], imp2state(test['imp'], test['items']).tolist()) 37 | 38 | 39 | if __name__ == '__main__': 40 | unittest.main() 41 | -------------------------------------------------------------------------------- /learning_spaces/pks/gradedness.py: -------------------------------------------------------------------------------- 1 | from .conversion import convert_as_pattern 2 | 3 | 4 | def is_forward_graded(data): 5 | """ 6 | Checks if a knowledge structure is forward-graded in any item 7 | :param data: dataframe with binary matrix representing the knowledge structure 8 | :return: logical dict of items 9 | """ 10 | ret_val = {} 11 | data_pattern = convert_as_pattern(data) 12 | header = list(data) 13 | for item in header: 14 | new_data = data.copy(deep=True) 15 | new_data[item] = 1 16 | new_data_pattern = convert_as_pattern(new_data) 17 | graded = [] 18 | for pattern in new_data_pattern: 19 | graded.append(pattern in data_pattern) 20 | ret_val[item] = all(graded) 21 | return ret_val 22 | 23 | 24 | def is_backward_graded(data): 25 | """ 26 | Checks if a knowledge structure is backward-graded in any item 27 | :param data: dataframe with binary matrix representing the knowledge structure 28 | :return: logical dict of items 29 | """ 30 | ret_val = {} 31 | data_pattern = convert_as_pattern(data) 32 | header = list(data) 33 | for item in header: 34 | new_data = data.copy(deep=True) 35 | new_data[item] = 0 36 | new_data_pattern = convert_as_pattern(new_data) 37 | graded = [] 38 | for pattern in new_data_pattern: 39 | graded.append(pattern in data_pattern) 40 | ret_val[item] = all(graded) 41 | return ret_val 42 | -------------------------------------------------------------------------------- /tests/test_conversion.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pandas as pd 3 | import sys 4 | sys.path.append('../learning_spaces/') 5 | from learning_spaces.pks import conversion 6 | 7 | 8 | class TestConversion(unittest.TestCase): 9 | 10 | def setUp(self): 11 | self.df = pd.read_csv("data/test_data.csv") 12 | 13 | def test_convert_as_pattern_df(self): 14 | response = conversion.convert_as_pattern(self.df) 15 | self.assertListEqual(['00000', '10000', '01000', '11000', '11100', '11010', '11110', '11101', '11111'], response) 16 | 17 | def test_convert_as_pattern_df_freq(self): 18 | patterns, freq = conversion.convert_as_pattern(self.df, freq=True) 19 | self.assertListEqual(['00000', '10000', '01000', '11000', '11100', '11010', '11110', '11101', '11111'], patterns) 20 | self.assertListEqual([1, 1, 1, 1, 1, 1, 1, 1, 1], freq) 21 | 22 | def test_convert_as_bin_mat_df(self): 23 | pattern = conversion.convert_as_pattern(self.df) 24 | response = conversion.convert_as_bin_mat(pattern) 25 | self.assertListEqual(list(self.df), list(response)) 26 | self.assertListEqual(self.df.values.tolist(), response.values.tolist()) 27 | 28 | def test_convert_as_bin_mat_df_col_names(self): 29 | pattern = conversion.convert_as_pattern(self.df) 30 | col_names = ['i', 'j', 'k', 'l', 'm'] 31 | response = conversion.convert_as_bin_mat(pattern, col_names=col_names) 32 | self.assertListEqual(col_names, list(response)) 33 | self.assertListEqual(self.df.values.tolist(), response.values.tolist()) 34 | -------------------------------------------------------------------------------- /learning_spaces/kst/pattern.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from collections import Counter 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | 8 | def pattern(dataset, n=5, p=None): 9 | """ 10 | pattern 11 | computes the absolute frequencies of the response patterns, 12 | and optionally, the absolute frequencies of a collection of 13 | specified knowledge states in a dataset. 14 | 15 | :param dataset: dataframe or matrix consisted of ones and zeros 16 | :param n: number of patterns (must be greater than zero) 17 | :param p: dataframe or matrix 18 | :return: dictionary representing pattern data 19 | """ 20 | 21 | if n < 1: 22 | sys.exit('Number of patterns must be greater than zero.') 23 | 24 | data = dataset 25 | if isinstance(dataset, pd.DataFrame): 26 | data = dataset.values 27 | 28 | def ks_to_str(ks): return ''.join((str(is_correct_answer) for is_correct_answer in ks)) 29 | 30 | pattern = Counter(np.apply_along_axis(ks_to_str, axis=1, arr=data)) 31 | if n > len(pattern): 32 | n = len(pattern) 33 | 34 | if p is None: 35 | return {'response.patterns': pattern.most_common(n), 'states': p, 'n': n} 36 | 37 | def getKnowledgeStatesFrequencies(p): 38 | return np.apply_along_axis(lambda row: pattern[ks_to_str(row)], axis=1, arr=p) 39 | 40 | if isinstance(p, pd.DataFrame): 41 | states = p.assign(size=getKnowledgeStatesFrequencies(p.values)) 42 | else: 43 | frequencies = getKnowledgeStatesFrequencies(p) 44 | states = np.hstack((p, np.reshape(frequencies, (-1, 1)))) 45 | 46 | return {'response.patterns': pattern.most_common(n), 'states': states, 'n': n} 47 | -------------------------------------------------------------------------------- /learning_spaces/kst/print_ztest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def print_ztest(x): 4 | if len(x['diff_value']) == 1: 5 | print("\n One sample Z-test\n") 6 | if len(x['diff_value']) == 2: 7 | print("\n \t Two sample Z-test\n") 8 | print("\nz = {}".format(round(x['Z.value'], 4))) 9 | print("p-value = {}".format(round(x['p.value'], 4))) 10 | if "two.sided" == x['alternative']: 11 | if x is None: 12 | print("\nalternative hypothesis: true mean is not equal {}".format(x['mu'])) 13 | else: 14 | print("\nalternative hypothesis: true difference in means is not equal {}".format(x['mu'])) 15 | 16 | if "greater" == x['alternative']: 17 | if x['imp_alt'] is None: 18 | print("\nalternative hypothesis: true mean is greater {}".format(x['mu'])) 19 | else: 20 | print("\nalternative hypothesis: true difference in means is greater {}".format(x['mu'])) 21 | 22 | if x['alternative'] == "less": 23 | if x['imp_alt'] is None: 24 | print("\nalternative hypothesis: true mean is less {}".format(x['mu'])) 25 | else: 26 | print("\nalternative hypothesis: true difference in means is less {}".format(x['mu'])) 27 | 28 | print(str(x['conf.level'] * 100) + " percent confidence interval:\n") 29 | print(x['conf']) 30 | print("sample estimates:\n") 31 | if len(x['diff_value']) == 1: 32 | estimate = round(x['diff_value'][0], 5) 33 | names = {} 34 | names[estimate] = "mean in imp" 35 | print(estimate) 36 | if len(x['diff_value']) == 2: 37 | estimate = round(x['diff_value'], 5) 38 | names = {} 39 | names[estimate] = "mean in imp_alt" 40 | print(estimate) 41 | -------------------------------------------------------------------------------- /learning_spaces/kst/orig_iita.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import pandas as pd 4 | from learning_spaces.kst import ob_counter 5 | 6 | 7 | def orig_iita(dataset, A): 8 | """ 9 | Original Inductive Item Tree Analysis 10 | Performs the original inductive item tree analysis procedure and returns the corresponding diff values. 11 | 12 | :param dataset: dataframe or matrix consisted of ones and zeros 13 | :param A: list of competing quasi orders 14 | :return: dictionary 15 | """ 16 | 17 | data = dataset 18 | if isinstance(dataset, pd.DataFrame): 19 | data = dataset.values 20 | 21 | b = ob_counter(data) 22 | if sum(b.sum(axis=0) == 0): 23 | sys.exit('Each item must be solved at least once') 24 | 25 | n, m = data.shape 26 | 27 | bs = [] 28 | for i in range(len(A)): 29 | bs.insert(i, np.zeros(b.shape)) 30 | 31 | diff_value_alt = np.repeat(0.0, len(A)) 32 | error = np.repeat(0.0, len(A)) 33 | 34 | # computation of error rate 35 | for k in range(len(A)): 36 | for i in A[k]: 37 | error[k] += (b[i[0]][i[1]] / float(data[:, i[1]].sum())) 38 | if not A[k]: 39 | error[k] = None 40 | else: 41 | error[k] /= len(A[k]) 42 | 43 | # computation of diff values 44 | all_imp = set() 45 | for i in range(m-1): 46 | for j in range(i+1, m): 47 | all_imp = all_imp.union(all_imp, {(i, j), (j, i)}) 48 | 49 | for k in range(len(A)): 50 | if not A[k]: 51 | diff_value_alt[k] = None 52 | else: 53 | for i in all_imp: 54 | if i in A[k]: 55 | bs[k][i[0]][i[1]] = error[k] * data[:, i[1]].sum() 56 | else: 57 | bs[k][i[0]][i[1]] = (1.0 - data[:, i[0]].sum() / float(n)) * data[:, i[1]].sum() * (1.0 - error[k]) 58 | diff_value_alt[k] = ((b - bs[k]) ** 2).sum() / (m ** 2 - m) 59 | 60 | return {'diff.value': diff_value_alt, 'error.rate': error} 61 | -------------------------------------------------------------------------------- /learning_spaces/kst/corr_iita.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import pandas as pd 4 | from learning_spaces.kst import ob_counter 5 | 6 | 7 | def corr_iita(dataset, A): 8 | """ 9 | Corrected Inductive Item Tree Analysis 10 | Performs the corrected inductive item tree analysis procedure and returns the corresponding diff values. 11 | 12 | :param dataset: dataframe or matrix consisted of ones and zeros 13 | :param A: list of competing quasi orders 14 | :return: dictionary 15 | """ 16 | 17 | data = dataset 18 | if isinstance(dataset, pd.DataFrame): 19 | data = dataset.values 20 | 21 | b = ob_counter(data) 22 | if sum(b.sum(axis=0) == 0): 23 | sys.exit('Each item must be solved at least once') 24 | 25 | n, m = data.shape 26 | 27 | bs = [] 28 | for i in range(len(A)): 29 | bs.insert(i, np.zeros((m, m))) 30 | 31 | diff_value_alt = np.repeat(0.0, len(A)) 32 | error = np.repeat(0.0, len(A)) 33 | 34 | # computation of error rate 35 | for k in range(len(A)): 36 | for i in A[k]: 37 | error[k] += (b[i[0]][i[1]] / float(data[:, i[1]].sum())) 38 | if not A[k]: 39 | error[k] = None 40 | else: 41 | error[k] /= len(A[k]) 42 | 43 | # computation of diff values 44 | all_imp = set() 45 | for i in range(m - 1): 46 | for j in range(i + 1, m): 47 | all_imp = all_imp.union(all_imp, {(i, j), (j, i)}) 48 | 49 | for k in range(len(A)): 50 | if not A[k]: 51 | diff_value_alt[k] = None 52 | else: 53 | for i in all_imp: 54 | if i in A[k]: 55 | bs[k][i[0]][i[1]] = error[k] * data[:, i[1]].sum() 56 | if (i not in A[k]) and ((i[1], i[0]) not in A[k]): 57 | bs[k][i[0]][i[1]] = (1.0 - data[:, i[0]].sum() / float(n)) * data[:, i[1]].sum() 58 | if (i not in A[k]) and ((i[1], i[0]) in A[k]): 59 | bs[k][i[0]][i[1]] = data[:, i[1]].sum() - data[:, i[0]].sum() + data[:, i[0]].sum() * error[k] 60 | diff_value_alt[k] = ((b - bs[k]) ** 2).sum() / (m ** 2 - m) 61 | 62 | return {'diff.value': diff_value_alt, 'error.rate': error} 63 | -------------------------------------------------------------------------------- /learning_spaces/kst/imp2state.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def imp2state(imp, items): 5 | """ 6 | Transformation from Implications to Knowledge States 7 | Transforms a set of implications to the corresponding set of knowledge states (the quasi ordinal knowledge space). 8 | 9 | :param imp: list of implications 10 | :param items: number of items 11 | :return: matrix consisted of ones and zeros 12 | """ 13 | 14 | R_2 = np.ones((items, items)) 15 | for i in range(items): 16 | for j in range(items): 17 | if (i != j) and ((i, j) not in imp): 18 | R_2[j, i] = 0 19 | 20 | # base 21 | base = [] 22 | 23 | for i in range(items): 24 | tmp = [] 25 | for j in range(items): 26 | if R_2[i, j] == 1: 27 | tmp.append(j) 28 | base.insert(i, tmp) 29 | 30 | base_list = [] 31 | for i in range(items): 32 | base_list.insert(i, set()) 33 | for j in range(len(base[i])): 34 | base_list[i].update(frozenset([base[i][j]])) 35 | 36 | # span of base 37 | G = [] 38 | G.insert(0, {frozenset()}) 39 | G.insert(1, set()) 40 | for i in range(len(base[0])): 41 | G[1].update(frozenset([base[0][i]])) 42 | G[1] = {frozenset(), frozenset(G[1])} 43 | 44 | for i in range(1, items): 45 | H = {frozenset()} 46 | for j in G[i]: 47 | if not base_list[i].issubset(j): 48 | for d in range(i): 49 | if base_list[d].issubset(j.union(base_list[i])): 50 | if base_list[d].issubset(j): 51 | H.update(frozenset([j.union(base_list[i])])) 52 | if not base_list[d].issubset(j.union(base_list[i])): 53 | H.update(frozenset([j.union(base_list[i])])) 54 | G.insert(i+1, G[i].union(H)) 55 | 56 | # patterns 57 | P = np.zeros((len(G[items]), items), dtype=np.int8) 58 | i = 0 59 | sorted_g = [list(i) for i in G[items]] 60 | sorted_g.sort(key=lambda x: (len(x), x)) 61 | 62 | for k in sorted_g: 63 | for j in range(items): 64 | if j in k: 65 | P[i, j] = 1 66 | i += 1 67 | 68 | return P 69 | -------------------------------------------------------------------------------- /learning_spaces/kst/ind_gen.py: -------------------------------------------------------------------------------- 1 | def ind_gen(b): 2 | """ 3 | Inductive Generation Procedure 4 | Generates inductively a list of competing quasi orders. 5 | 6 | :param b: matrix of the numbers of counterexamples for all pairs of items 7 | :return: list of inductively generated quasi orders 8 | """ 9 | 10 | (n, m) = b.shape 11 | 12 | # set of all pairs with a maximum of k-1 counterexamples 13 | S = [] 14 | 15 | # constructed relation for a maximum of k-1 counterexamples 16 | A = [] 17 | 18 | # set of non-transitive triples 19 | M = [] 20 | M.append([]) 21 | S.append([]) 22 | for i in range(m): 23 | for j in range(m): 24 | if (i != j) and (b[i, j] == b.min()): 25 | S[0].append((i, j)) 26 | 27 | A.append(list(S[0])) 28 | 29 | # inductive generation process 30 | elements = list(set(b.flatten().ravel())) 31 | elements.sort() 32 | if 0 in elements: 33 | elements = elements[1:] 34 | 35 | k = 1 36 | 37 | for element in elements: 38 | S.insert(k, []) 39 | A.insert(k, []) 40 | M.insert(k, []) 41 | 42 | # building of S 43 | for i in range(m): 44 | for j in range(m): 45 | if (i != j) and (b[i, j] <= element) and ((i, j) not in A[k-1]): 46 | S[k].append((i, j)) 47 | 48 | # transitivity test 49 | if S[k]: 50 | M[k] = list(S[k]) 51 | brake_test = 1 52 | while brake_test != 0: 53 | brake = list(M[k]) 54 | for i in list(M[k]): 55 | for h in range(m): 56 | if (h != i[0]) and (h != i[1]) and ((i[1], h) in (A[k-1] + M[k])) and ((i[0], h) not in (A[k-1] + M[k])): 57 | if i in M[k]: 58 | M[k].remove(i) 59 | if (h != i[0]) and (h != i[1]) and ((h, i[0]) in (A[k-1] + M[k])) and ((h, i[1]) not in (A[k-1] + M[k])): 60 | if i in M[k]: 61 | M[k].remove(i) 62 | if brake == M[k]: 63 | brake_test = 0 64 | A[k] = A[k-1] + M[k] 65 | 66 | k += 1 67 | 68 | # deletion of empty and duplicated quasi orders 69 | A = {frozenset(x) for x in A} 70 | A.discard(set()) 71 | # sort 72 | A = [sorted(list(x)) for x in A] 73 | A.sort(key=len) 74 | 75 | return A -------------------------------------------------------------------------------- /learning_spaces/kst/mini_iita.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from learning_spaces.kst import ob_counter 4 | 5 | 6 | def mini_iita(dataset, A): 7 | """ 8 | Minimized Corrected Inductive Item Tree Analysis 9 | Performs the minimized corrected inductive item tree analysis procedure and returns the corresponding diff values. 10 | 11 | :param dataset: dataframe or matrix consisted of ones and zeros 12 | :param A: list of competing quasi orders 13 | :return: dictionary 14 | """ 15 | 16 | data = dataset 17 | if isinstance(dataset, pd.DataFrame): 18 | data = dataset.values 19 | 20 | b = ob_counter(data) 21 | n, m = data.shape 22 | 23 | bs_num = [] 24 | for i in range(len(A)): 25 | bs_num.insert(i, np.zeros((m, m))) 26 | 27 | p = [] 28 | for i in range(m): 29 | p.insert(i, data[:, i].sum()) 30 | 31 | diff_value_alt = np.repeat(0.0, len(A)) 32 | error = np.repeat(0.0, len(A)) 33 | 34 | # computation of error rate 35 | for k in range(len(A)): 36 | x = np.repeat(0.0, 4) 37 | for i in range(m): 38 | for j in range(m): 39 | if (i != j) and ((i, j) in A[k]): 40 | x[1] += -2 * b[i, j] * p[j] 41 | x[3] += 2 * p[j] ** 2 42 | if (i != j) and ((i, j) not in A[k]) and ((j, i) in A[k]): 43 | x[0] += -2 * b[i, j] * p[i] + 2 * p[i] * p[j] - 2 * p[i] ** 2 44 | x[2] += 2 * p[i] ** 2 45 | 46 | error[k] = -(x[0] + x[1]) / (x[2] + x[3]) 47 | 48 | # computation of diff values 49 | all_imp = set() 50 | for i in range(m - 1): 51 | for j in range(i + 1, m): 52 | all_imp = all_imp.union(all_imp, {(i, j), (j, i)}) 53 | 54 | for k in range(len(A)): 55 | if not A[k]: 56 | diff_value_alt[k] = None 57 | else: 58 | for i in all_imp: 59 | if i in A[k]: 60 | bs_num[k][i[0]][i[1]] = error[k] * data[:, i[1]].sum() 61 | if (i not in A[k]) and ((i[1], i[0]) not in A[k]): 62 | bs_num[k][i[0]][i[1]] = (1.0 - data[:, i[0]].sum() / float(n)) * data[:, i[1]].sum() 63 | if (i not in A[k]) and ((i[1], i[0]) in A[k]): 64 | bs_num[k][i[0]][i[1]] = data[:, i[1]].sum() - data[:, i[0]].sum() + data[:, i[0]].sum() * error[k] 65 | diff_value_alt[k] = ((b - bs_num[k]) ** 2).sum() / (m ** 2 - m) 66 | 67 | return {'diff.value': diff_value_alt, 'error.rate': error} 68 | -------------------------------------------------------------------------------- /learning_spaces/pks/conversion.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import string 3 | 4 | 5 | def convert_as_pattern(data, freq=False, as_letters=False): 6 | """ 7 | Convert binary matrix of response patterns or knowledge spaces into pattern representation 8 | :param data: dataframe with binary matrix 9 | :param freq: displaying frequencies of response patterns 10 | :param as_letters: return response patterns as combination of header letters 11 | :return: list of patterns or list of patterns with list of frequencies of patterns 12 | """ 13 | ret_val = [] 14 | for row in data.itertuples(): 15 | pattern = "" 16 | for i in range(1, len(row)): 17 | if as_letters: 18 | if row[i] == 1: 19 | pattern += list(data)[i-1] 20 | else: 21 | pattern += str(row[i]) 22 | if pattern == "": 23 | ret_val.append(str(0)) 24 | else: 25 | ret_val.append(pattern) 26 | 27 | if freq: 28 | ret_pat = [] 29 | counts = [] 30 | for pattern in ret_val: 31 | if pattern not in ret_pat: 32 | ret_pat.append(pattern) 33 | counts.append(ret_val.count(pattern)) 34 | return ret_pat, counts 35 | else: 36 | return ret_val 37 | 38 | 39 | def convert_as_bin_mat(data, col_names=None): 40 | """ 41 | Convert pattern representation of response patterns or knowledge spaces into binary matrix 42 | :param data: list of response patterns 43 | :param col_names: list of names of matrix columns 44 | :return: dataframe with binary matrix 45 | """ 46 | header = [] 47 | if col_names is None: 48 | num_of_letters = 0 49 | for pattern in data: 50 | if len(pattern) > num_of_letters: 51 | num_of_letters = len(pattern) 52 | header = list(string.ascii_lowercase[:num_of_letters]) 53 | else: 54 | header = col_names 55 | 56 | values = [] 57 | for pattern in data: 58 | value = [] 59 | if "0" in pattern or "1" in pattern: 60 | if pattern == "0": # empty set 61 | value = [int(0)] * len(header) 62 | else: 63 | for p in pattern: 64 | value.append(int(p)) 65 | else: # pattern is combination of header letters 66 | for h in header: 67 | if h in pattern: 68 | value.append(int(1)) 69 | else: 70 | value.append(int(0)) 71 | values.append(value) 72 | 73 | return pd.DataFrame(values, columns=header) 74 | -------------------------------------------------------------------------------- /learning_spaces/pks/delineation.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from .conversion import convert_as_bin_mat 3 | 4 | 5 | def delineate(skill_fun, item_id=0): 6 | """ 7 | Computes knowledge structure delineated by a skill function 8 | :param skill_fun: dataframe representing the skill function. Consists of an item indicator and a 9 | problem-by-skill indicator binary matrix 10 | :param item_id: index of a column in skill_fun that holds the item indicator 11 | :return: dataframe representing the knowledge structure and a dict of equivalence classes of competence states 12 | """ 13 | # extracting skill set 14 | data = skill_fun.drop(skill_fun.columns[item_id], axis=1) 15 | skills = list(data) 16 | # extracting item set with corresponding skills 17 | items = [] 18 | items_skills = {} 19 | for row in data.itertuples(): 20 | item = skill_fun.iloc[row.Index, item_id] 21 | item_skill = "" 22 | for i in range(1, len(row)): 23 | if int(row[i]) == 1: 24 | item_skill += skills[i-1] 25 | if item not in items: 26 | items.append(item) 27 | items_skills[item] = [] 28 | items_skills[item].append(item_skill) 29 | # generating 2 ^ skills mapping 30 | combinations = get_all_combinations(skills) 31 | # empty set 32 | # generating knowledge structure and appropriate classes 33 | values = ['0' * len(items)] 34 | classes = {} 35 | classes['0'] = values[0] 36 | # generating from skill function 37 | for combination in combinations: 38 | value = "" 39 | for item in items: 40 | if contains_string(combination, items_skills[item]): 41 | value += "1" 42 | else: 43 | value += "0" 44 | classes[combination] = value 45 | values.append(value) 46 | return convert_as_bin_mat(values, items), classes 47 | 48 | 49 | def get_all_combinations(input_chars): 50 | """ 51 | Generate all combinations of given characters 52 | :param input_chars: input characters 53 | :return: list of all combinations 54 | """ 55 | ret_val = [] 56 | for i in range(len(input_chars)): 57 | temp = list(itertools.combinations(input_chars, i + 1)) 58 | for t in temp: 59 | ret_val.append(''.join(t)) 60 | return ret_val 61 | 62 | 63 | def contains_string(src, dest): 64 | """ 65 | Checking if destination string contains any subset of source string 66 | :param src: source string 67 | :param dest: list of destination strings 68 | :return: True or False 69 | """ 70 | chars = list(src) 71 | combinations = get_all_combinations(chars) 72 | for combination in combinations: 73 | if combination in dest: 74 | return True 75 | return False 76 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Knowledge Space Theory 2 | 3 | KST is an open source software library with Python implementations of basic Knowledge Space Theory algorithms. 4 | 5 | ## Usage 6 | 7 | ### Installation 8 | You can either clone the project or download a distribution [file](./dist/learning_spaces-0.2.0-py3-none-any.whl) and run command: 9 | `pip install /path-to-downloaded-file/learning_spaces-0.2.0-py3-none-any.whl` 10 | 11 | ### Setup in Python 12 | KST requires installed Python 3.9. It is recommended to use the library in a separate virtual environment. A brief and practical introduction to virtual environments can be found on the following [link](https://docs.python-guide.org/dev/virtualenvs/). 13 | First, a virtual environment should be created. 14 | ``` 15 | mkvirtualenv kst 16 | ``` 17 | After creating a virtual environment, you should install the requirements. 18 | ``` 19 | pip install -r requirements.txt 20 | ``` 21 | After that, the library can be used. 22 | ```python 23 | >>> import pandas as pd 24 | >>> from learning_spaces.kst import iita 25 | >>> data_frame = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]}) 26 | >>> response = iita(data_frame, v=1) 27 | >>> print(response) 28 | {'diff': array([ 0.18518519, 0.16666667, 0.21296296]), 'implications': [(0, 1), (0, 2), (2, 0), (2, 1)], 'error.rate': 0.5, 'selection.set.index': 1, 'v': 1} 29 | ``` 30 | 31 | ### Setup in a browser 32 | KST can be run in a browser environment, without need for Python server. We use [Pyodide](https://github.com/pyodide/pyodide) which brings the Python runtime to the browser via WebAssembly. 33 | 34 | Full Example (open console to see the result): 35 | ```html 36 | 37 | 38 | 39 | 40 | 41 | 42 | 71 | 72 | 73 | ``` -------------------------------------------------------------------------------- /learning_spaces/kst/iita.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | import numpy as np 4 | from learning_spaces.kst import ind_gen 5 | from learning_spaces.kst import ob_counter 6 | from learning_spaces.kst import orig_iita 7 | from learning_spaces.kst import mini_iita 8 | from learning_spaces.kst import corr_iita 9 | 10 | 11 | def iita(dataset, v): 12 | """ 13 | Inductive Item Tree Analysis 14 | Performs one of the three inductive item tree analysis algorithms (minimized corrected, corrected and original). 15 | 16 | :param dataset: dataframe or matrix consisted of ones and zeros 17 | :param v: algorithm: v=1 (minimized corrected), v=2 (corrected) and v=3 (original) 18 | :return: dictionary 19 | """ 20 | 21 | if (not isinstance(dataset, pd.DataFrame) and not isinstance(dataset, np.ndarray)) or (dataset.shape[1] == 1): 22 | sys.exit('data must be either a numeric matrix or a dataframe, with at least two columns.') 23 | 24 | data = dataset 25 | if isinstance(dataset, pd.DataFrame): 26 | data = dataset.values 27 | 28 | if np.logical_not(np.logical_or(data == 0, data == 1)).sum() != 0: 29 | sys.exit('data must contain only 0 and 1') 30 | 31 | if v not in (1, 2, 3): 32 | sys.exit('IITA version must be specified') 33 | 34 | # inductively generated set of competing quasi orders 35 | i = ind_gen(ob_counter(data)) 36 | 37 | # call chosen algorithm 38 | if v == 1: 39 | ii = mini_iita(data, i) 40 | elif v == 2: 41 | ii = corr_iita(data, i) 42 | elif v == 3: 43 | ii = orig_iita(data, i) 44 | 45 | index = list(ii['diff.value']).index(min(ii['diff.value'])) 46 | return {'diff': ii['diff.value'], 'implications': i[index], 'error.rate': ii['error.rate'][index], 'selection.set.index': index, 'v': v} 47 | 48 | 49 | def iita_exclude_transitive(dataset, v): 50 | """ 51 | Inductive Item Tree Analysis 52 | Performs one of the three inductive item tree analysis algorithms (minimized corrected, corrected and original) 53 | and then performs transitive reduction (removes transitive edges). 54 | Implications array will have the same vertices and as few edges as possible. 55 | 56 | :param dataset: dataframe or matrix consisted of ones and zeros 57 | :param v: algorithm: v=1 (minimized corrected), v=2 (corrected) and v=3 (original) 58 | :return: dictionary 59 | """ 60 | response = iita(dataset, v) 61 | impl = response['implications'] 62 | 63 | # reflexive reduction 64 | # edges is a list of implication without reflexive edges 65 | edges = [] 66 | for x, y in impl: 67 | if (y, x) not in edges: 68 | edges.append((x, y)) 69 | 70 | 71 | # nodes is a list of all nodes extracted from edges 72 | nodes = list(set([node for pair in edges for node in pair])) 73 | 74 | # transitive reduction 75 | # remove transitive edges from the list of edges 76 | for x in nodes: 77 | for y in nodes: 78 | for z in nodes: 79 | if (x, y) in edges and (y, z) in edges: 80 | try: 81 | edges.remove((x, z)) 82 | except: 83 | pass 84 | 85 | # update a list of implications after transitive reduction 86 | response['implications'] = edges 87 | return response -------------------------------------------------------------------------------- /learning_spaces/kst/hasse.py: -------------------------------------------------------------------------------- 1 | import pydot 2 | import matplotlib.pyplot as plt 3 | import matplotlib.image as mpimg 4 | import tempfile 5 | import os 6 | 7 | def hasse(imp, items, dir_path = None, labels = None): 8 | """ 9 | Hasse diagram of Surmise Relation 10 | Plots the Hasse diagram of surmise relation. 11 | 12 | :param imp: list of implications 13 | :param items: number of items of the domain 14 | :param dir_path: path to the png directory 15 | :param labels: string labels for items 16 | :return: produces a plot and returns a list of the equally informative items 17 | """ 18 | 19 | parallel_items = {} 20 | implications = list(imp) 21 | 22 | # generate partially ordered set 23 | for i in implications: 24 | if (i[1], i[0]) in implications: 25 | if i[0] in parallel_items: 26 | parallel_items[i[0]].append(i[1]) 27 | else: 28 | parallel_items[i[0]] = [i[0], i[1]] 29 | implications.remove(i) 30 | implications.remove((i[1], i[0])) 31 | for j in range(len(implications)): 32 | if i[1] == implications[j][0]: 33 | implications[j] = (i[0], implications[j][1]) 34 | elif i[1] == implications[j][1]: 35 | implications[j] = (implications[j][0], i[0]) 36 | 37 | implications = list(set(implications)) 38 | # remove reflexive properties 39 | for i in list(implications): 40 | if i[0] == i[1]: 41 | implications.remove(i) 42 | 43 | # i j k 44 | # (0,1)(1,2),(0,2) 45 | # remove transitive properites 46 | for i in list(implications): 47 | for j in list(implications): 48 | for k in list(implications): 49 | if i[1]==j[0] and j[1]==k[1] and i[0]==k[0]: 50 | implications.remove(k) 51 | 52 | for i in list(implications): 53 | for j in range(items): 54 | if (i[0] != j) and (i[1] != j) and ((i[0], j) in implications) and ((i[1], j) in implications): 55 | implications.remove((i[0], j)) 56 | 57 | # bottom-up approach 58 | for i in range(len(implications)): 59 | implications[i] = (implications[i][1], implications[i][0]) 60 | 61 | graph = pydot.Dot(graph_type='graph') 62 | print(implications) 63 | if labels: 64 | for i in implications: 65 | graph.add_edge(pydot.Edge(str(labels[int(i[0])]), str(labels[int(i[1])]))) 66 | else: 67 | for i in implications: 68 | graph.add_edge(pydot.Edge(i[0], i[1])) 69 | 70 | # standalone nodes 71 | for i in range(items): 72 | found = False 73 | for implication in implications: 74 | if i in implication: 75 | found = True 76 | break 77 | if not found: 78 | parallel = False 79 | for key, value in parallel_items.items(): 80 | if i in value: 81 | parallel = True 82 | break 83 | if not parallel: 84 | graph.add_node(pydot.Node(i)) 85 | 86 | fout = tempfile.NamedTemporaryFile(mode = 'w+t', dir = dir_path, suffix=".png", delete = False) 87 | graph.write(fout.name, format="png") 88 | img = mpimg.imread(fout.name) 89 | plt.axis('off') 90 | plt.imshow(img) 91 | plt.show() 92 | os.remove(fout.name) 93 | 94 | return [list(set(value)) for key, value in parallel_items.items()] 95 | -------------------------------------------------------------------------------- /learning_spaces/kst/stochastic_markov.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import random 3 | from typing import Tuple 4 | 5 | import numpy as np 6 | 7 | def _array2dict_vals(array: np.ndarray, dict: dict): 8 | for i, key in enumerate(dict): 9 | dict[key] = array[i] 10 | 11 | def _scale_probabilites(states: dict[Tuple[str], float]): 12 | probabilites = np.array(list(states.values())) 13 | probabilites /= sum(probabilites) 14 | _array2dict_vals(probabilites, states) 15 | 16 | def _likeliest_state(states: dict[Tuple[str], float]) -> Tuple[Tuple[str], float]: 17 | """ 18 | Returns likeliest state and its probability. 19 | :return: (state, probability) 20 | """ 21 | return max(states.items(), key=operator.itemgetter(1)) 22 | 23 | def _take_answer(question: str) -> bool: 24 | print(f'{question}: correct/incorrect? [1/0]') 25 | return int(input()) == 1 26 | 27 | def questioning_rule(states: dict[Tuple[str], float]) -> str: 28 | """ 29 | :param states: dictionary mapping states (sets of problems/questions) to probabilities 30 | :return: question to be asked 31 | """ 32 | if not np.isclose(1, sum(states.values()), atol=0.01): 33 | raise ValueError('Probabilities do not add up to 1!') 34 | 35 | state, _ = _likeliest_state(states) 36 | return random.choice(state) 37 | 38 | def response_rule(question: str, states: dict[Tuple[str], float]) -> float: 39 | """ 40 | :param question: question the answer is given to 41 | :param states: dictionary mapping states (sets of problems/questions) to probabilities 42 | :return: probability of giving correct answer according to given states 43 | """ 44 | ret_val = 0 45 | for state, probability in states.items(): 46 | if question in state: 47 | ret_val += probability 48 | return ret_val 49 | 50 | def updating_rule(question: str, answer_correct: bool, r: float, states: dict[Tuple[str], float]): 51 | """ 52 | Updates probabilites on passed states. 53 | :param question: question the answer is given to 54 | :param answer_correct: whether answer is correct 55 | :param r: response rule output 56 | :param states: dictionary mapping states (sets of problems/questions) to probabilities 57 | """ 58 | theta = 0.1 * r 59 | theta_compl = 1 - theta 60 | if not answer_correct: 61 | theta, theta_compl = theta_compl, theta 62 | 63 | for state in states: 64 | if question in state: 65 | states[state] *= theta_compl 66 | else: 67 | states[state] *= theta 68 | _scale_probabilites(states) 69 | 70 | def final_state(states: dict[Tuple[str], float]): 71 | state, probability = _likeliest_state(states) 72 | return state if probability > 0.75 else None 73 | 74 | def stochastic_markov(states: dict[Tuple[str], float]) -> Tuple[str]: 75 | max_iter = 100 76 | for _ in range(max_iter): 77 | question = questioning_rule(states) 78 | r = response_rule(question, states) 79 | answer_correct = _take_answer(question) 80 | updating_rule(question, answer_correct, r, states) 81 | print(states) 82 | final = final_state(states) 83 | if final is not None: 84 | print(final) 85 | return 86 | print('Non-conclusive.') 87 | 88 | def demo(): 89 | states = {('a'): 0.125, ('a', 'b'): 0.25, ('b'): 0.125, ('a', 'b', 'c'): 0.5} 90 | print(states) 91 | stochastic_markov(states) 92 | 93 | if __name__ == '__main__': 94 | demo() 95 | -------------------------------------------------------------------------------- /learning_spaces/kst/simu.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from learning_spaces.kst import imp2state 3 | 4 | 5 | def simu(items, size, ce, lg, delta, imp=None): 6 | """ 7 | Data and Quasi Order Simulation Tool 8 | 9 | :param items: number of items of the domain taken as a basis for the simulation 10 | :param size: number of response patterns to be simulated (the sample size) 11 | :param ce: probability for a careless error 12 | :param lg: probability for a lucky guess 13 | :param delta: probability for adding an item pair to the randomly generated quasi order 14 | :param imp: list of implications (assumed to be a quasi order) used for simulating the data 15 | :return: dictionary 16 | """ 17 | 18 | R = set() 19 | 20 | if imp is None: 21 | # computation of transitive relations 22 | for i in range(items): 23 | for j in range(items): 24 | if (i != j) and (delta > np.random.uniform(1, 0, 1)): 25 | R.update({(i, j)}) 26 | if i == j: 27 | R.update({(i, j)}) 28 | 29 | R_2 = np.zeros((items, items), dtype=np.int8) 30 | for t in R: 31 | R_2[t[0], t[1]] = 1 32 | 33 | # base 34 | base = [] 35 | 36 | for i in range(items): 37 | tmp = [] 38 | for j in range(items): 39 | if R_2[i, j] == 1: 40 | tmp.append(j) 41 | base.insert(i, tmp) 42 | 43 | base_list = [] 44 | for i in range(items): 45 | base_list.insert(i, set()) 46 | for j in range(len(base[i])): 47 | base_list[i].update(frozenset([base[i][j]])) 48 | 49 | # span of base 50 | G = [] 51 | G.insert(0, {frozenset()}) 52 | G.insert(1, set()) 53 | for i in range(len(base[0])): 54 | G[1].update(frozenset([base[0][i]])) 55 | G[1] = {frozenset(), frozenset(G[1])} 56 | 57 | for i in range(1, items): 58 | H = {frozenset()} 59 | for j in G[i]: 60 | if not base_list[i].issubset(j): 61 | for d in range(i): 62 | if base_list[d].issubset(j.union(base_list[i])): 63 | if base_list[d].issubset(j): 64 | H.update(frozenset([j.union(base_list[i])])) 65 | if not base_list[d].issubset(j.union(base_list[i])): 66 | H.update(frozenset([j.union(base_list[i])])) 67 | G.insert(i + 1, G[i].union(H)) 68 | 69 | # patterns 70 | P = np.zeros((len(G[items]), items), dtype=np.int8) 71 | i = 0 72 | sorted_g = [list(i) for i in G[items]] 73 | sorted_g.sort(key=lambda x: (len(x), x)) 74 | 75 | for k in sorted_g: 76 | for j in range(items): 77 | if j in k: 78 | P[i, j] = 1 79 | i += 1 80 | 81 | # implications 82 | imp = set() 83 | for i in range(items): 84 | for j in range(items): 85 | if (i != j) and (base_list[i].issubset(base_list[j])): 86 | imp.update({(i, j)}) 87 | else: 88 | # patterns 89 | P = imp2state(imp, items) 90 | 91 | # simulating the dataset 92 | sim = np.zeros((size, items), dtype=np.int8) 93 | 94 | for i in range(size): 95 | sim[i,] = P[np.random.randint(0, P.shape[0], 1), ] 96 | for j in range(items): 97 | if (sim[i, j] == 1) and (np.random.uniform(1, 0, 1) < ce): 98 | sim[i, j] = 0 99 | if (sim[i, j] == 0) and (np.random.uniform(1, 0, 1) < lg): 100 | sim[i, j] = 1 101 | 102 | return {'dataset': sim, 'implications': imp, 'states': P} 103 | -------------------------------------------------------------------------------- /tests/test_iita.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pandas as pd 3 | import numpy as np 4 | import sys 5 | sys.path.append('../learning_spaces/') 6 | from learning_spaces.kst import iita 7 | 8 | 9 | class TestIita(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.dataframe = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 0], 'c': [0, 1, 1]}) 13 | self.matrix = np.matrix([[1, 0, 0], [0, 1, 1], [1, 0, 1]]) 14 | 15 | def test_iita_with_invalid_first_argument(self): 16 | self.assertRaises(SystemExit, lambda: iita(pd.DataFrame({'a': [1, 0, 1]}), v=1)) 17 | self.assertRaises(SystemExit, lambda: iita('Invalid dataset', v=1)) 18 | 19 | def test_iita_when_dataset_has_nan_values(self): 20 | dataset = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, float('nan')], 'c': [0, 1, 1]}) 21 | self.assertRaises(SystemExit, lambda: iita(dataset, v=1)) 22 | 23 | def test_iita_when_dataset_has_invalid_values(self): 24 | dataset = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 5], 'c': [0, 1, 1]}) 25 | self.assertRaises(SystemExit, lambda: iita(dataset, v=1)) 26 | 27 | def test_iita_with_invalid_second_argument(self): 28 | self.assertRaises(SystemExit, lambda: iita(self.dataframe, -100)) 29 | self.assertRaises(SystemExit, lambda: iita(self.dataframe, -1)) 30 | self.assertRaises(SystemExit, lambda: iita(self.dataframe, 0)) 31 | self.assertRaises(SystemExit, lambda: iita(self.dataframe, 4)) 32 | self.assertRaises(SystemExit, lambda: iita(self.dataframe, 100)) 33 | self.assertRaises(SystemExit, lambda: iita(self.dataframe, (1, 2))) 34 | self.assertRaises(SystemExit, lambda: iita(self.dataframe, [1, 2])) 35 | 36 | def test_mini_iita_with_dataframe(self): 37 | response = iita(self.dataframe, v=1) 38 | 39 | self.assertEqual([0.18518518518518515, 0.16666666666666666, 0.21296296296296294], response['diff'].tolist()) 40 | self.assertEqual(0.5, response['error.rate']) 41 | self.assertEqual([(0, 1), (0, 2), (2, 0), (2, 1)], response['implications']) 42 | self.assertEqual(1, response['selection.set.index']) 43 | self.assertEqual(1, response['v']) 44 | 45 | def test_corr_iita_with_dataframe(self): 46 | response = iita(self.dataframe, v=2) 47 | 48 | self.assertEqual([0.18518518518518515, 0.16666666666666666, 0.215277777777777776], response['diff'].tolist()) 49 | self.assertEqual(0.5, response['error.rate']) 50 | self.assertEqual([(0, 1), (0, 2), (2, 0), (2, 1)], response['implications']) 51 | self.assertEqual(1, response['selection.set.index']) 52 | self.assertEqual(2, response['v']) 53 | 54 | def test_orig_iita_with_dataframe(self): 55 | response = iita(self.dataframe, v=3) 56 | 57 | self.assertEqual([0.20370370370370369, 0.39814814814814814, 0.215277777777777776], response['diff'].tolist()) 58 | self.assertEqual(0, response['error.rate']) 59 | self.assertEqual([(2, 1)], response['implications']) 60 | self.assertEqual(0, response['selection.set.index']) 61 | self.assertEqual(3, response['v']) 62 | 63 | def test_mini_iita_with_matrix(self): 64 | response = iita(self.matrix, v=1) 65 | 66 | self.assertEqual([0.18518518518518515, 0.16666666666666666, 0.21296296296296294], response['diff'].tolist()) 67 | self.assertEqual(0.5, response['error.rate']) 68 | self.assertEqual([(0, 1), (0, 2), (2, 0), (2, 1)], response['implications']) 69 | self.assertEqual(1, response['selection.set.index']) 70 | self.assertEqual(1, response['v']) 71 | 72 | def test_corr_iita_with_matrix(self): 73 | response = iita(self.matrix, v=2) 74 | 75 | self.assertEqual([0.18518518518518515, 0.16666666666666666, 0.215277777777777776], response['diff'].tolist()) 76 | self.assertEqual(0.5, response['error.rate']) 77 | self.assertEqual([(0, 1), (0, 2), (2, 0), (2, 1)], response['implications']) 78 | self.assertEqual(1, response['selection.set.index']) 79 | self.assertEqual(2, response['v']) 80 | 81 | def test_orig_iita_with_matrix(self): 82 | response = iita(self.matrix, v=3) 83 | 84 | self.assertEqual([0.20370370370370369, 0.39814814814814814, 0.215277777777777776], response['diff'].tolist()) 85 | self.assertEqual(0, response['error.rate']) 86 | self.assertEqual([(2, 1)], response['implications']) 87 | self.assertEqual(0, response['selection.set.index']) 88 | self.assertEqual(3, response['v']) 89 | 90 | if __name__ == '__main__': 91 | unittest.main() 92 | -------------------------------------------------------------------------------- /tests/test_blim.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pandas as pd 3 | from collections import OrderedDict 4 | import sys 5 | sys.path.append('../learning_spaces/') 6 | from learning_spaces.pks.blim import BLIM 7 | 8 | 9 | class TestBlim(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.k = pd.read_csv("data/test_data.csv") 13 | self.n_r = OrderedDict() 14 | self.n_r["00000"] = 80 15 | self.n_r["10000"] = 92 16 | self.n_r["01000"] = 89 17 | self.n_r["00100"] = 3 18 | self.n_r["00010"] = 2 19 | self.n_r["00001"] = 1 20 | self.n_r["11000"] = 89 21 | self.n_r["10100"] = 16 22 | self.n_r["10010"] = 18 23 | self.n_r["10001"] = 10 24 | self.n_r["01100"] = 18 25 | self.n_r["01010"] = 20 26 | self.n_r["01001"] = 4 27 | self.n_r["00110"] = 2 28 | self.n_r["00101"] = 2 29 | self.n_r["00011"] = 3 30 | self.n_r["11100"] = 89 31 | self.n_r["11010"] = 89 32 | self.n_r["11001"] = 19 33 | self.n_r["10110"] = 16 34 | self.n_r["10101"] = 16 35 | self.n_r["10011"] = 3 36 | self.n_r["01110"] = 18 37 | self.n_r["01101"] = 16 38 | self.n_r["01011"] = 2 39 | self.n_r["00111"] = 2 40 | self.n_r["11110"] = 73 41 | self.n_r["11101"] = 82 42 | self.n_r["11011"] = 19 43 | self.n_r["10111"] = 15 44 | self.n_r["01111"] = 15 45 | self.n_r["11111"] = 77 46 | 47 | def test_blim_md(self): 48 | blim_md = BLIM(self.k, self.n_r) 49 | self.assertEqual(9, blim_md.n_states) 50 | self.assertEqual(32, blim_md.n_patterns) 51 | self.assertEqual(1000, blim_md.n_total) 52 | self.assertEqual("MD", blim_md.method) 53 | self.assertEqual(1, blim_md.iteration) 54 | self.assertEqual(91.28362323477515, blim_md.goodness_of_fit['g2']) 55 | self.assertEqual(13, blim_md.goodness_of_fit['df']) 56 | self.assertEqual(7.938094626069869e-14, blim_md.goodness_of_fit['pval']) 57 | self.assertEqual(0.254, blim_md.discrepancy) 58 | self.assertEqual(0.090000000000000011, blim_md.n_errors['lucky']) 59 | self.assertEqual(0.16399999999999998, blim_md.n_errors['careless']) 60 | self.assertListEqual([0.09208874005860192, 0.08871989860583017, 0.04505813953488372, 0.0, 0.0], 61 | blim_md.beta.values.tolist()[0]) 62 | self.assertListEqual([0.0, 0.0, 0.04064039408866995, 0.04085801838610828, 0.05472197705207414], 63 | blim_md.eta.values.tolist()[0]) 64 | 65 | def test_log_likelihood_md(self): 66 | blim_md = BLIM(self.k, self.n_r) 67 | self.assertEqual(blim_md.log_lik, blim_md.log_likelihood()) 68 | 69 | def test_number_of_obs_md(self): 70 | blim_md = BLIM(self.k, self.n_r) 71 | self.assertEqual(blim_md.n_patterns, blim_md.number_of_observations()) 72 | 73 | def test_deviance_md(self): 74 | blim_md = BLIM(self.k, self.n_r) 75 | self.assertEqual(blim_md.goodness_of_fit['g2'], blim_md.deviance()) 76 | 77 | def test_coef_md(self): 78 | blim_md = BLIM(self.k, self.n_r) 79 | beta, eta, p_k = blim_md.coef() 80 | self.assertListEqual(list(blim_md.beta), list(beta)) 81 | self.assertListEqual(blim_md.beta.values.tolist(), beta.values.tolist()) 82 | self.assertListEqual(list(blim_md.eta), list(eta)) 83 | self.assertListEqual(blim_md.eta.values.tolist(), eta.values.tolist()) 84 | self.assertListEqual(list(blim_md.p_k), list(p_k)) 85 | self.assertListEqual(blim_md.p_k.values.tolist(), p_k.values.tolist()) 86 | 87 | def test_blim_ml(self): 88 | blim_ml = BLIM(self.k, self.n_r, method="ML") 89 | self.assertEqual(9, blim_ml.n_states) 90 | self.assertEqual(32, blim_ml.n_patterns) 91 | self.assertEqual(1000, blim_ml.n_total) 92 | self.assertEqual("ML", blim_ml.method) 93 | self.assertEqual(300, blim_ml.iteration) 94 | self.assertEqual(12.622816435940905, blim_ml.goodness_of_fit['g2']) 95 | self.assertEqual(13, blim_ml.goodness_of_fit['df']) 96 | self.assertEqual(0.477349992130788, blim_ml.goodness_of_fit['pval']) 97 | self.assertEqual(0.254, blim_ml.discrepancy) 98 | self.assertEqual(0.044865390859123146, blim_ml.n_errors['lucky']) 99 | self.assertEqual(0.44280715825096656, blim_ml.n_errors['careless']) 100 | self.assertListEqual([0.1648712647718087, 0.16311278151263192, 0.18883863747163213, 101 | 0.07983530446636058, 0.08864829052919883], blim_ml.beta.values.tolist()[0]) 102 | self.assertListEqual([0.10306473120044671, 0.09507429143942243, 3.5426760020042067e-06, 103 | 3.157133824028973e-06, 0.019909716488346413], blim_ml.eta.values.tolist()[0]) 104 | 105 | def test_log_likelihood_ml(self): 106 | blim_ml = BLIM(self.k, self.n_r, method="ML") 107 | self.assertEqual(blim_ml.log_lik, blim_ml.log_likelihood()) 108 | 109 | def test_number_of_obs_ml(self): 110 | blim_ml = BLIM(self.k, self.n_r, method="ML") 111 | self.assertEqual(blim_ml.n_patterns, blim_ml.number_of_observations()) 112 | 113 | def test_deviance_ml(self): 114 | blim_ml = BLIM(self.k, self.n_r, method="ML") 115 | self.assertEqual(blim_ml.goodness_of_fit['g2'], blim_ml.deviance()) 116 | 117 | def test_coef_ml(self): 118 | blim_ml = BLIM(self.k, self.n_r, method="ML") 119 | beta, eta, p_k = blim_ml.coef() 120 | self.assertListEqual(list(blim_ml.beta), list(beta)) 121 | self.assertListEqual(blim_ml.beta.values.tolist(), beta.values.tolist()) 122 | self.assertListEqual(list(blim_ml.eta), list(eta)) 123 | self.assertListEqual(blim_ml.eta.values.tolist(), eta.values.tolist()) 124 | self.assertListEqual(list(blim_ml.p_k), list(p_k)) 125 | self.assertListEqual(blim_ml.p_k.values.tolist(), p_k.values.tolist()) 126 | -------------------------------------------------------------------------------- /learning_spaces/pks/blim.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from scipy.stats import chi2 4 | from .conversion import convert_as_pattern, convert_as_bin_mat 5 | 6 | 7 | class BLIM: 8 | """ 9 | Fits a basic local independence model (BLIM) for probabilistic knowledge structures by 10 | Minimum Discrepancy and Maximum Likelihood estimation 11 | """ 12 | 13 | def __init__(self, k, n_r, method="MD", r=None, p_k=None, beta=None, eta=None, rand_init=False, inc_radius=0, 14 | tol=1e-07, max_iter=10000): 15 | """ 16 | Fit a Basic Local Independence Model (BLIM) for probabilistic knowledge structures by 17 | Minimum Discrepancy Maximum Likelihood estimation 18 | :param k: a dataframe with binary matrix representing the knowledge structure 19 | :param n_r: dictionary of absolute frequencies of response patterns 20 | :param method: MD for Minimum Discrepancy estimation, ML for Maximum Likelihood estimation 21 | :param r: dataframe with binary matrix of unique response patterns. Per default inferred from the names of n_r 22 | :param p_k: list of initial parameter values for probabilities of knowledge states 23 | :param beta: list of initial parameter values for probabilities of a careless error 24 | :param eta: list of initial parameter values for probabilities of a lucky guess 25 | :param rand_init: if True then initial parameter values are sampled uniformly with constraints 26 | :param inc_radius: include knowledge states of distance from the minimum discrepant states less than or equal 27 | to inc_radius 28 | :param tol: tolerance, stopping criterion for iteration 29 | :param max_iter: the maximum number of iterations 30 | """ 31 | # setting initial BLIM object values 32 | self.k = k 33 | self.n_r = n_r 34 | self.n_total = sum(n_r.values()) 35 | self.method = method 36 | self.n_items = len(list(k)) 37 | if r is None: 38 | r = convert_as_bin_mat(list(self.n_r), col_names=list(self.k)) 39 | self.n_patterns = len(r.index) 40 | self.n_states = len(k.index) 41 | if p_k is None: 42 | self.p_k = [1 / self.n_states] * self.n_states 43 | else: 44 | self.p_k = p_k 45 | if beta is None: 46 | self.beta = [0.1] * self.n_items 47 | else: 48 | self.beta = beta 49 | if eta is None: 50 | self.eta = [0.1] * self.n_items 51 | else: 52 | self.eta = eta 53 | 54 | # uniformly random initial values 55 | if rand_init: 56 | self.beta = np.random.uniform(0, 1, self.n_items) 57 | self.eta = np.random.uniform(0, 1, self.n_items) 58 | # constraint: beta + eta < 1 59 | for i in range(len(self.beta)): 60 | if self.beta[i] + self.eta[i] >= 1: 61 | self.beta[i] = 1 - self.beta[i] 62 | self.eta[i] = 1 - self.eta[i] 63 | # constraint sum(p_k) == 1 64 | x = [] 65 | x.append(0) 66 | x += list(np.random.uniform(0, 1, self.n_states - 1)) 67 | x.append(1) 68 | x.sort() 69 | a = x[1:] 70 | b = x[:-1] 71 | for i in range(len(self.p_k)): 72 | self.p_k[i] = a[i] - b[i] 73 | 74 | # converting to dataframes 75 | self.p_k = pd.DataFrame([self.p_k], columns=convert_as_pattern(self.k)) 76 | self.beta = pd.DataFrame([self.beta], columns=list(self.k)) 77 | self.eta = pd.DataFrame([self.eta], columns=list(self.k)) 78 | # assigning state K given response R 79 | d_rk_header = convert_as_pattern(self.k) 80 | d_rk = pd.DataFrame(columns=d_rk_header) 81 | for i in range(len(self.k.index)): 82 | rk_matrix = np.logical_xor(r, list(self.k.iloc[i])) 83 | d_rk[d_rk_header[i]] = list(rk_matrix.sum(axis=1)) 84 | # minimum discrepancy 85 | d_min = d_rk.apply(min, axis=1) 86 | i_rk = np.logical_and(d_rk <= list(d_min + inc_radius), ~(d_rk is None)) 87 | # minimum discrepancy distribution 88 | frequencies = list(self.n_r.values()) 89 | values = pd.unique(d_min) 90 | sums = {} 91 | disc_sum = 0 92 | disc_count = 0 93 | for value in values: 94 | sums[value] = 0 95 | for i in range(len(d_min)): 96 | sums[d_min[i]] += frequencies[i] 97 | disc_sum += d_min[i] * frequencies[i] 98 | disc_count += frequencies[i] 99 | self.discrepancy = disc_sum / disc_count 100 | self.disc_tab = pd.DataFrame(sums, columns=sums.keys(), index=[0]) 101 | 102 | # selected method 103 | em = 1 104 | if method == "MD": 105 | em = 0 106 | md = 1 107 | if method == "ML": 108 | md = 0 109 | 110 | self.iteration = 0 111 | max_diff = 2 * tol 112 | beta_num = self.beta.copy(deep=True) 113 | beta_denom = self.beta.copy(deep=True) 114 | eta_num = self.beta.copy(deep=True) 115 | eta_denom = self.beta.copy(deep=True) 116 | 117 | while (max_diff > tol) and (self.iteration < max_iter) and ((md * (1 - em) != 1) or (self.iteration == 0)): 118 | pi_old = self.p_k.copy(deep=True) 119 | beta_old = self.beta.copy(deep=True) 120 | eta_old = self.eta.copy(deep=True) 121 | 122 | p_r_k = pd.DataFrame(columns=d_rk_header) 123 | for i in range(len(self.k.index)): 124 | p_r_k[d_rk_header[i]] = calculate_p_r_k(self.k.iloc[i], self.beta, self.eta, r) 125 | 126 | p_r = numpy_list_to_list(np.inner(np.asmatrix(p_r_k), np.asarray(self.p_k)).tolist()) 127 | # prediction of P(K|R) 128 | p_k_r = pd.DataFrame(np.multiply(np.asmatrix(p_r_k), np.outer((1 / np.asarray(p_r)), np.asarray(self.p_k))), 129 | columns=d_rk_header) 130 | 131 | mat_rk = pd.DataFrame(np.multiply(np.asmatrix(i_rk ** md), np.asmatrix(p_k_r ** em)), columns=d_rk_header) 132 | 133 | # m_r_k = E(M_RK) = P(K|R) * N(R) 134 | np_mat_rk = np.asmatrix(mat_rk) 135 | mat_rk_row_sum = np_mat_rk / np_mat_rk.sum(axis=1) 136 | list_n_r = np.array(list(self.n_r.values())) 137 | m_r_k = pd.DataFrame(np.multiply(mat_rk_row_sum, list_n_r[:, np.newaxis]), columns=d_rk_header) 138 | 139 | # distribution of knowledge states 140 | self.p_k = m_r_k.sum(axis=0) / self.n_total 141 | 142 | # careless error and guessing parameters 143 | k_header = list(self.k) 144 | for i in range(self.n_items): 145 | current_header = k_header[i] 146 | # filter by columns first 147 | del_col_0 = np.where(np.array(self.k[current_header]) == 0)[0] 148 | m_r_k_0 = m_r_k.drop(m_r_k.columns[del_col_0], axis=1) 149 | del_col_1 = np.where(np.array(self.k[current_header]) == 1)[0] 150 | m_r_k_1 = m_r_k.drop(m_r_k.columns[del_col_1], axis=1) 151 | # calculate errors 152 | beta_num[current_header] = m_r_k_0.loc[r[current_header] == 0].values.sum() 153 | beta_denom[current_header] = m_r_k_0.values.sum() 154 | eta_num[current_header] = m_r_k_1.loc[r[current_header] == 1].values.sum() 155 | eta_denom[current_header] = m_r_k_1.values.sum() 156 | 157 | # updating error values 158 | for header in k_header: 159 | self.beta[header] = beta_num[header] / beta_denom[header] 160 | self.beta.fillna(0) 161 | self.eta[header] = eta_num[header] / eta_denom[header] 162 | self.eta.fillna(0) 163 | 164 | # updating max_diff 165 | p_max = np.amax(abs(self.p_k - pi_old).values) 166 | beta_max = np.amax(abs(self.beta - beta_old).values) 167 | eta_max = np.amax(abs(self.eta - eta_old).values) 168 | max_diff = max(p_max, beta_max, eta_max) 169 | # updating iterations 170 | self.iteration += 1 171 | 172 | if self.iteration >= max_iter: 173 | print("Iteration maximum has been exceeded") 174 | 175 | # mean number of errors 176 | p_kq = [0] * self.n_items 177 | for i in range(self.n_items): 178 | current_header = k_header[i] 179 | selected_headers = np.where(np.array(self.k[current_header] == 1))[0] 180 | sums = 0 181 | for header in selected_headers: 182 | sums += self.p_k[d_rk_header[header]] 183 | p_kq[i] = sums 184 | 185 | self.n_errors = {} 186 | self.n_errors['careless'] = (self.beta * p_kq).values.sum() 187 | self.n_errors['lucky'] = (self.eta * (1 - np.array(p_kq))).values.sum() 188 | 189 | # recompute predictions and likelihood 190 | for i in range(len(self.k.index)): 191 | p_r_k[d_rk_header[i]] = calculate_p_r_k(self.k.iloc[i], self.beta, self.eta, r) 192 | 193 | p_r = np.inner(np.asmatrix(p_r_k), np.asarray(self.p_k)).tolist()[0] 194 | if sum(p_r) < 1: 195 | p_r = p_r / sum(p_r) 196 | 197 | self.log_lik = sum(np.log(p_r) * list(self.n_r.values())) 198 | 199 | # goodness of fit 200 | self.goodness_of_fit = {} 201 | fitted = np.asarray(p_r) * self.n_total 202 | self.fitted_values = pd.DataFrame([fitted], columns=self.n_r.keys()) 203 | n_r_list = list(self.n_r.values()) 204 | 205 | self.goodness_of_fit['g2'] = 2 * sum(n_r_list * np.log(n_r_list / fitted)) 206 | self.goodness_of_fit['df'] = min(2 ** self.n_items - 1, self.n_total) - 2 * self.n_states 207 | self.goodness_of_fit['pval'] = 1 - chi2.cdf(self.goodness_of_fit['g2'], self.goodness_of_fit['df']) 208 | 209 | def describe(self): 210 | """ 211 | Print BLIM object values 212 | """ 213 | print("\nBasic local independence models (BLIMs)\n") 214 | print("Number of knowledge states: {0}".format(self.n_states)) 215 | print("Number of response patterns: {0}".format(self.n_patterns)) 216 | print("Number of respondents: {0}".format(self.n_total)) 217 | print("\nMethod: " + self.method) 218 | print("Number of iterations: {0}".format(self.iteration)) 219 | g2 = self.goodness_of_fit['g2'] 220 | df = self.goodness_of_fit['df'] 221 | pval = self.goodness_of_fit['pval'] 222 | print("Goodness of fit (2 log likelihood ratio):\n") 223 | print("\tG2({0}) = {1}, p = {2} \n".format(df, g2, pval)) 224 | print("Minimum discrepancy distribution (mean = {0})\n".format(self.discrepancy)) 225 | print("Mean number of errors (total = {0})".format(sum(self.n_errors.values()))) 226 | print(self.n_errors) 227 | print("\nDistribution of knowledge states:") 228 | print(self.p_k) 229 | print("\nError and guessing parameters:") 230 | print("Beta") 231 | print(self.beta) 232 | print("Eta") 233 | print(self.eta) 234 | 235 | def log_likelihood(self): 236 | """ 237 | Log-Likelihood for BLIM object 238 | """ 239 | return self.log_lik 240 | 241 | def number_of_observations(self): 242 | """ 243 | Number of observations 244 | """ 245 | return self.n_patterns 246 | 247 | def simulate(self): 248 | """ 249 | Simulates responses from the distribution corresponding to a fitted BLIM model object. 250 | :return: dataframe of frequencies of response patterns 251 | """ 252 | seq_len = list(range(len(self.p_k.values))) 253 | states_id = np.random.choice(seq_len, size=self.n_total, replace=True, p=self.p_k.values) 254 | beta_inv = 1 - self.beta 255 | # P(resp = 1 | K) 256 | p_1_k = np.multiply(np.asmatrix(self.k), np.asarray(beta_inv.values)) + np.multiply(np.asmatrix(1 - self.k), np.asarray(self.eta.values)) 257 | p_1_k_df = pd.DataFrame(np.transpose(p_1_k), columns=convert_as_pattern(self.k)) 258 | # initialize response matrix 259 | r_mat = pd.DataFrame(0, index=np.arange(self.n_total), columns=list(self.k)) 260 | # draw a response 261 | for i in range(self.n_total): 262 | r_mat.loc[i, :] = np.random.binomial(n=1, size=self.n_items, p=np.array(p_1_k_df.iloc[:, states_id[i]])) 263 | 264 | patterns, frequencies = convert_as_pattern(r_mat, freq=True) 265 | return pd.DataFrame([frequencies], columns=patterns) 266 | 267 | def deviance(self): 268 | """ 269 | Deviance 270 | """ 271 | return self.goodness_of_fit['g2'] 272 | 273 | def coef(self): 274 | """ 275 | BLIM object parameters 276 | :return: dataframe for beta, eta nad p_k 277 | """ 278 | return self.beta, self.eta, self.p_k 279 | 280 | 281 | def calculate_p_r_k(k_row, beta, eta, r): 282 | """ 283 | Calculating P(R|K) for every row from knowledge structure matrix 284 | :param k_row: dataframe representing knowledge structure matrix row 285 | :param beta: dataframe representing beta 286 | :param eta: dataframe representing eta 287 | :param r: dataframe with binary matrix of unique response patterns 288 | :return: list of calculated values 289 | """ 290 | # converting data into numpy arrays and matrices 291 | k = np.asarray(k_row) 292 | k_inv = np.asarray(1 - k_row) 293 | beta_mat = np.asmatrix(beta) 294 | beta_inv_mat = np.asmatrix(1 - beta) 295 | eta_mat = np.asmatrix(eta) 296 | eta_inv_mat = np.asmatrix(1 - eta) 297 | r_mat = np.asmatrix(r) 298 | r_inv_mat = np.asmatrix(1 - r) 299 | # calculating betas 300 | beta1 = np.power(beta_mat, np.multiply(r_inv_mat, k)) 301 | beta2 = np.power(beta_inv_mat, np.multiply(r_mat, k)) 302 | eta1 = np.power(eta_mat, np.multiply(r_mat, k_inv)) 303 | eta2 = np.power(eta_inv_mat, np.multiply(r_inv_mat, k_inv)) 304 | # multiply betas and etas 305 | mul_mat = np.multiply(np.multiply(beta1, beta2), np.multiply(eta1, eta2)) 306 | # multiply by row 307 | row_prod = np.prod(mul_mat, axis=1).tolist() 308 | return numpy_list_to_list(row_prod) 309 | 310 | 311 | def numpy_list_to_list(numpy_list): 312 | """ 313 | Convert nested list to list 314 | :param numpy_list: nested list 315 | :return: list 316 | """ 317 | ret_val = [x[0] for x in numpy_list] 318 | return ret_val 319 | --------------------------------------------------------------------------------