├── tensorlog ├── __init__.py ├── MANIFEST.in ├── test-data │ ├── textcattoy_labels.cfacts │ ├── argmax.exam │ ├── testgrad.ppr │ ├── testgrad.tlog │ ├── matchtoy-train.exam │ ├── fam.exam │ ├── toytest.exam │ ├── textcat2.ppr │ ├── textcat2.tlog │ ├── textcattoy-train.examples │ ├── textcat.ppr │ ├── textcat.tlog │ ├── testgrad.cfacts │ ├── matchtoy-train.examples │ ├── textcat3.ppr │ ├── toytrain.exam │ ├── textcat3.tlog │ ├── textcat4.ppr │ ├── argmax.cfacts │ ├── toytest.examples │ ├── toytrain.examples │ ├── matchtoy.ppr │ ├── matchtoy.tlog │ ├── fam.cfacts │ ├── matchtoy.cfacts │ ├── textcattoy_corpus.cfacts │ ├── textcattoy2.cfacts │ ├── textcattoy_pairs.cfacts │ └── textcattoy3.cfacts ├── README.rst ├── xctargets.py ├── Makefile.config ├── Makefile.config.template ├── putil.py ├── trywam.py ├── config.py ├── symtab.py ├── util.py ├── Cleanup-notes.txt ├── masterconfig.py ├── list.py ├── trytheano.py ├── learnxcomp.py ├── trylearn.py ├── declare.py ├── version.py ├── Makefile ├── try.py ├── Notes.txt ├── testtf.py ├── opfunutil.py └── helper │ ├── countmin_embeddings.py │ └── minerules.py ├── datasets ├── grid │ ├── inputs │ │ └── README.txt │ ├── proppr │ │ ├── README.txt │ │ ├── grid.ppr │ │ ├── exam2proppr.py │ │ ├── facts2proppr.py │ │ ├── proppr-expt.sh │ │ └── average-time-in-solutions.py │ ├── grid.ppr │ ├── grid_embedded.ppr │ ├── Makefile │ ├── README.txt │ ├── testexpt.py │ ├── tfexpt.py │ ├── Notes.txt │ ├── demo.py │ ├── expected.txt │ ├── bigtfexpt.py │ └── bigexpt.py ├── fb15k-speed │ ├── tmp-cache │ │ └── README.txt │ ├── Makefile │ ├── README.txt │ ├── testexpt.py │ ├── tfexpt.py │ └── expt.py ├── wikimovies │ ├── tmp-cache │ │ └── README.txt │ ├── Makefile │ ├── testexpt.py │ ├── theory.ppr │ ├── tfexpt.py │ └── expt.py ├── socialgraphs │ ├── Makefile │ ├── README.txt │ ├── social.tlog │ ├── inputs │ │ ├── karate-train.exam │ │ ├── karate-test.exam │ │ ├── dolphins-test.exam │ │ ├── dolphins-train.exam │ │ ├── data2tlog.py │ │ ├── football-test.exam │ │ ├── football-train.exam │ │ ├── karate-ghirl.txt │ │ └── karate.cfacts │ ├── testexpt.py │ └── demo.py ├── textcat-ssl │ └── README.txt ├── README.txt ├── smokers │ ├── raw │ │ ├── extras.cfacts │ │ ├── generate.py │ │ ├── cancer-smokes.cfacts │ │ └── labels.txt │ ├── Notes.txt │ ├── Makefile │ ├── scaleup │ │ ├── queryent2proppr.py │ │ ├── smokers-for-proppr.ppr │ │ ├── smokers.ppr │ │ ├── average-time-in-solutions.py │ │ ├── proppr-expt.sh │ │ ├── tfexpt.py │ │ ├── Results.txt │ │ ├── expt.py │ │ └── gen.py │ ├── testexpt.py │ ├── smokers.ppr │ ├── README.txt │ ├── tfexpt.py │ ├── expected.txt │ ├── expt.py │ └── query-entities.txt ├── fb15k-237 │ └── README.txt ├── cora │ └── README.txt ├── amie │ └── README.txt ├── amie-qa │ └── README.txt ├── family │ ├── Makefile │ ├── README.txt │ ├── testexpt.py │ ├── expected.txt │ ├── tfexpt.py │ ├── inputs │ │ ├── kinship.cfacts │ │ ├── kinship-test.examples │ │ ├── kinship-train.examples │ │ └── kinship-rule.cfacts │ └── expt.py ├── top-1000-near-google │ ├── expected.txt │ ├── Makefile │ ├── tfexpt.py │ ├── testexpt.py │ ├── expt.py │ └── top-1000-near-google-rule.cfacts └── Makefile ├── .vscode └── settings.json ├── doc ├── tensorlog-paper.pdf └── QUICKSTART.txt ├── .gitignore ├── README.md └── setup.py /tensorlog/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tensorlog/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | -------------------------------------------------------------------------------- /datasets/grid/inputs/README.txt: -------------------------------------------------------------------------------- 1 | place holder for git 2 | -------------------------------------------------------------------------------- /datasets/grid/proppr/README.txt: -------------------------------------------------------------------------------- 1 | place holder for git 2 | -------------------------------------------------------------------------------- /datasets/fb15k-speed/tmp-cache/README.txt: -------------------------------------------------------------------------------- 1 | place holder for git 2 | -------------------------------------------------------------------------------- /datasets/wikimovies/tmp-cache/README.txt: -------------------------------------------------------------------------------- 1 | place holder for git 2 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcattoy_labels.cfacts: -------------------------------------------------------------------------------- 1 | label pos 2 | label neg 3 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "/usr/local/bin/python3" 3 | } -------------------------------------------------------------------------------- /datasets/grid/grid.ppr: -------------------------------------------------------------------------------- 1 | path(X,Y) :- edge(X,Y). 2 | path(X,Y) :- edge(X,Z), path(Z,Y). 3 | -------------------------------------------------------------------------------- /tensorlog/test-data/argmax.exam: -------------------------------------------------------------------------------- 1 | predict 1800 1896 2 | predict 1900 1900 3 | predict 2000 2008 4 | -------------------------------------------------------------------------------- /tensorlog/test-data/testgrad.ppr: -------------------------------------------------------------------------------- 1 | after(GX,GW) :- like(X,GX),before(W,X),like(W,GW). 2 | 3 | 4 | -------------------------------------------------------------------------------- /doc/tensorlog-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamCohen/TensorLog/HEAD/doc/tensorlog-paper.pdf -------------------------------------------------------------------------------- /tensorlog/test-data/testgrad.tlog: -------------------------------------------------------------------------------- 1 | after(GX,GW) <= like(X,GX) & before(W,X) & like(W,GW) 2 | 3 | 4 | -------------------------------------------------------------------------------- /datasets/grid/proppr/grid.ppr: -------------------------------------------------------------------------------- 1 | path(X,Y) :- edge(X,Y) {r1}. 2 | path(X,Y) :- edge(X,Z), path(Z,Y) {r2}. 3 | 4 | -------------------------------------------------------------------------------- /datasets/fb15k-speed/Makefile: -------------------------------------------------------------------------------- 1 | unittest: 2 | python testexpt.py 3 | 4 | clean: 5 | rm -rf tmp-cache/* 6 | 7 | 8 | -------------------------------------------------------------------------------- /datasets/grid/grid_embedded.ppr: -------------------------------------------------------------------------------- 1 | path(X,Y) :- embedded_edge(X,Y). 2 | path(X,Y) :- embedded_edge(X,Z), path(Z,Y). 3 | -------------------------------------------------------------------------------- /datasets/socialgraphs/Makefile: -------------------------------------------------------------------------------- 1 | clean: 2 | echo socialgraphs is clean 3 | 4 | unittest: 5 | python testexpt.py 6 | 7 | -------------------------------------------------------------------------------- /tensorlog/test-data/matchtoy-train.exam: -------------------------------------------------------------------------------- 1 | match r1 r2 r1 2 | match r3 r4 r3 3 | 4 | amatch a2 a1 a2 5 | amatch a4 a3 a4 6 | -------------------------------------------------------------------------------- /datasets/textcat-ssl/README.txt: -------------------------------------------------------------------------------- 1 | Preliminary experiments trying out using Tensorlog to do SSL text 2 | classification 3 | 4 | -------------------------------------------------------------------------------- /datasets/wikimovies/Makefile: -------------------------------------------------------------------------------- 1 | unittest: 2 | python testexpt.py 3 | 4 | clean: 5 | rm -rf *~ learned-model.db tmp-cache/* 6 | -------------------------------------------------------------------------------- /tensorlog/test-data/fam.exam: -------------------------------------------------------------------------------- 1 | child lottie charlotte lucas 2 | child sarah poppy 3 | child rachel caroline elizabeth 4 | child william josh charlie 5 | -------------------------------------------------------------------------------- /datasets/socialgraphs/README.txt: -------------------------------------------------------------------------------- 1 | Preliminary experiments trying out using Tensorlog to learn to predict 2 | community membership. See demo.py 3 | 4 | 5 | -------------------------------------------------------------------------------- /tensorlog/test-data/toytest.exam: -------------------------------------------------------------------------------- 1 | predict pb pos 2 | predict yc pos 3 | predict rb2 pos 4 | predict rp pos 5 | predict bp neg 6 | predict he neg 7 | predict wt neg 8 | -------------------------------------------------------------------------------- /datasets/README.txt: -------------------------------------------------------------------------------- 1 | These are various sample datasets for tensorlog - check the individual 2 | directories for more information. Some of these are used for testing. 3 | -------------------------------------------------------------------------------- /datasets/smokers/raw/extras.cfacts: -------------------------------------------------------------------------------- 1 | const yes 2 | const no 3 | 4 | rule r1 5 | rule r2 6 | rule r3 7 | rule r4 8 | rule r5 9 | rule r6 10 | rule r7 11 | rule r8 12 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcat2.ppr: -------------------------------------------------------------------------------- 1 | predict(X,Pos) :- assign(Pos,pos) {posWeighted(W): hasWord(X,W)}. 2 | predict(X,Neg) :- assign(Neg,neg) {negWeighted(W): hasWord(X,W)}. 3 | 4 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcat2.tlog: -------------------------------------------------------------------------------- 1 | predict(X,Pos) <= assign(Pos,pos) // posWeighted(W)| hasWord(X,W) 2 | predict(X,Neg) <= assign(Neg,neg) // negWeighted(W)| hasWord(X,W) 3 | 4 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcattoy-train.examples: -------------------------------------------------------------------------------- 1 | dh pos 2 | ft pos 3 | rw pos 4 | sc pos 5 | bk pos 6 | rb pos 7 | mv neg 8 | hs neg 9 | ji neg 10 | tf neg 11 | jm neg 12 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcat.ppr: -------------------------------------------------------------------------------- 1 | predict(X,Pos) :- assign(Pos,pos) {weighted(F): hasWord(X,W),posPair(W,F)}. 2 | predict(X,Neg) :- assign(Neg,neg) {weighted(F): hasWord(X,W),negPair(W,F)}. 3 | 4 | -------------------------------------------------------------------------------- /datasets/socialgraphs/social.tlog: -------------------------------------------------------------------------------- 1 | inferred_label(X,Y) <= influenced_by(X,Z) & label(Z,Y) 2 | 3 | influenced_by(A,B) <= friend(A,Z) & influenced_by(Z,B) 4 | influenced_by(A,B) <= friend(A,B) 5 | 6 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcat.tlog: -------------------------------------------------------------------------------- 1 | predict(X,Pos) <= assign(Pos,pos) // weighted(F) | hasWord(X,W) & posPair(W,F) 2 | predict(X,Neg) <= assign(Neg,neg) // weighted(F) | hasWord(X,W) & negPair(W,F) 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /tensorlog/test-data/testgrad.cfacts: -------------------------------------------------------------------------------- 1 | before a b 2 | before a d 3 | before b d 4 | like a alpha 5 | like b beta 6 | like d delta 7 | like a beta 8 | like b alpha 9 | like b delta 10 | like d beta 11 | like d delta 12 | -------------------------------------------------------------------------------- /tensorlog/test-data/matchtoy-train.examples: -------------------------------------------------------------------------------- 1 | match(r1,Y) +match(r1,r2) +match(r1,r1) 2 | match(r3,Y) +match(r3,r4) +match(r3,r3) 3 | 4 | amatch(a2,Y) +amatch(a2,a1) +amatch(a2,a2) 5 | amatch(a4,Y) +amatch(a4,a3) +amatch(a4,a4) 6 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcat3.ppr: -------------------------------------------------------------------------------- 1 | # typed version 2 | 3 | predict(X,Pos) :- assign(Pos,pos,label) {weighted(F): hasWord(X,W),posPair(W,F)}. 4 | predict(X,Neg) :- assign(Neg,neg,label) {weighted(F): hasWord(X,W),negPair(W,F)}. 5 | 6 | -------------------------------------------------------------------------------- /tensorlog/test-data/toytrain.exam: -------------------------------------------------------------------------------- 1 | predict dh pos 2 | predict ft pos 3 | predict sc pos 4 | predict bk pos 5 | predict rb pos 6 | predict mv neg 7 | predict hs neg 8 | predict ji neg 9 | predict tf neg 10 | predict jm neg 11 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcat3.tlog: -------------------------------------------------------------------------------- 1 | # typed version 2 | 3 | predict(X,Pos) <= assign(Pos,pos,label) // weighted(F) | hasWord(X,W) & posPair(W,F) 4 | predict(X,Neg) <= assign(Neg,neg,label) // weighted(F) | hasWord(X,W) & negPair(W,F) 5 | 6 | -------------------------------------------------------------------------------- /tensorlog/README.rst: -------------------------------------------------------------------------------- 1 | TensorLog: a differentiable deductive database. 2 | 3 | 4 | TensorLog is outlined in a technical paper: http://arxiv.org/abs/1605.06523 5 | There is documentation on the GitHub wiki page: https://github.com/TeamCohen/TensorLog 6 | 7 | -------------------------------------------------------------------------------- /datasets/fb15k-237/README.txt: -------------------------------------------------------------------------------- 1 | FB15K-237 Knowledge Base Completion Dataset from 2 | https://www.microsoft.com/en-us/download/details.aspx?id=52312 3 | 4 | Using a set of rules learned by ISG using PROPPR. 5 | 6 | Experiment does rule weight learning. 7 | 8 | 9 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcat4.ppr: -------------------------------------------------------------------------------- 1 | # test user-defined predicates 2 | 3 | predict(X,Y) :- assign(Pos,pos,label),udp1(Pos,Y) {weighted(F): hasWord(X,W),posPair(W,F)}. 4 | predict(X,Y) :- assign(Neg,neg,label),udp1(Neg,Y) {weighted(F): hasWord(X,W),negPair(W,F)}. 5 | 6 | -------------------------------------------------------------------------------- /datasets/grid/proppr/exam2proppr.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if __name__ == "__main__": 4 | for line in sys.stdin: 5 | rel,x,y = line.strip().split("\t") 6 | i,j = x.split(",") 7 | x = "%s_%s" % (i,j) 8 | print("%s(%s,Y)" % (rel,x)) 9 | 10 | -------------------------------------------------------------------------------- /datasets/smokers/Notes.txt: -------------------------------------------------------------------------------- 1 | runtime results: 2 | 3 | python smokers-expt.py 4 | eval t_stress/io time 0.00578594207764 sec 5 | eval t_influences/io time 0.004075050354 sec 6 | eval t_cancer_spont/io time 0.00533986091614 sec 7 | eval t_cancer_smoke/io time 0.00534009933472 sec 8 | total time 0.0206727981567 sec 9 | -------------------------------------------------------------------------------- /datasets/smokers/Makefile: -------------------------------------------------------------------------------- 1 | expt: actual.txt 2 | 3 | actual.txt: 4 | echo \# actual result on `date` > actual.txt 5 | python expt.py >> actual.txt 6 | 7 | check: actual.txt 8 | diff -y actual.txt expected.txt || true 9 | 10 | unittest: 11 | python testexpt.py 12 | 13 | clean: 14 | rm -f *~ *.pyc actual.txt 15 | 16 | -------------------------------------------------------------------------------- /datasets/socialgraphs/inputs/karate-train.exam: -------------------------------------------------------------------------------- 1 | inferred_label node007 b 2 | inferred_label node008 b 3 | inferred_label node012 b 4 | inferred_label node015 r 5 | inferred_label node017 b 6 | inferred_label node019 r 7 | inferred_label node024 r 8 | inferred_label node026 r 9 | inferred_label node027 r 10 | inferred_label node029 r 11 | -------------------------------------------------------------------------------- /datasets/grid/proppr/facts2proppr.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if __name__ == "__main__": 4 | for line in sys.stdin: 5 | rel,x,y,w = line.strip().split("\t") 6 | i,j = x.split(",") 7 | x = "%s_%s" % (i,j) 8 | i,j = y.split(",") 9 | y = "%s_%s" % (i,j) 10 | print("\t".join([rel,x,y])) 11 | 12 | 13 | -------------------------------------------------------------------------------- /tensorlog/xctargets.py: -------------------------------------------------------------------------------- 1 | # collect available target languages: 2 | try: 3 | import tensorflow as private1 4 | tf=True 5 | except: 6 | tf=False 7 | try: 8 | import theano as private2 9 | theano=True 10 | except: 11 | theano=False 12 | # disable theano tests for now, some of them fail and it's not a 13 | # priority... 14 | theano=False 15 | -------------------------------------------------------------------------------- /datasets/smokers/scaleup/queryent2proppr.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if __name__ == "__main__": 4 | xs = [] 5 | for line in sys.stdin: 6 | xs.append(line.strip()) 7 | for rel in ["t_stress", "t_influences","t_cancer_spont", "t_cancer_smoke"]: 8 | for x in xs: 9 | print('%s(%s,Y)' % (rel,x)) 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /tensorlog/test-data/argmax.cfacts: -------------------------------------------------------------------------------- 1 | # :- predict(century,year) 2 | # :- nations(year) 3 | nations 1896 14 4 | nations 1900 24 5 | nations 1904 12 6 | nations 2004 201 7 | nations 2008 204 8 | nations 2012 203 9 | 10 | # :- olympics(century,year) 11 | olympics 1800 1896 12 | olympics 1900 1900 13 | olympics 1900 1904 14 | olympics 2000 2004 15 | olympics 2000 2008 16 | olympics 2000 2012 17 | -------------------------------------------------------------------------------- /tensorlog/test-data/toytest.examples: -------------------------------------------------------------------------------- 1 | predict(pb,Y) -predict(pb,neg) +predict(pb,pos) 2 | predict(yc,Y) -predict(yc,neg) +predict(yc,pos) 3 | predict(rb2,Y) -predict(rb2,neg) +predict(rb2,pos) 4 | predict(rp,Y) -predict(rp,neg) +predict(rp,pos) 5 | predict(bp,Y) +predict(bp,neg) -predict(bp,pos) 6 | predict(he,Y) +predict(he,neg) -predict(he,pos) 7 | predict(wt,Y) +predict(wt,neg) -predict(wt,pos) 8 | -------------------------------------------------------------------------------- /datasets/socialgraphs/inputs/karate-test.exam: -------------------------------------------------------------------------------- 1 | inferred_label node002 b 2 | inferred_label node003 b 3 | inferred_label node006 b 4 | inferred_label node009 r 5 | inferred_label node013 b 6 | inferred_label node014 b 7 | inferred_label node018 b 8 | inferred_label node020 b 9 | inferred_label node023 r 10 | inferred_label node028 r 11 | inferred_label node032 r 12 | inferred_label node033 r 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #ignore these files 2 | .classpath 3 | .project 4 | .settings 5 | *~ 6 | tmp-cache 7 | *.pyc 8 | *.db 9 | *.mat 10 | 11 | tensorlog/fb15k-valid.db 12 | tensorlog/fb15k-valid.db/* 13 | tensorlog/tlog-cache 14 | tensorlog/toy-trained.db 15 | tensorlog/tlog-cache/* 16 | 17 | datasets/*/actual.txt 18 | datasets/*/eval.log 19 | datasets/*/expt.log 20 | datasets/*/tmp-cache/* 21 | datasets/*/inputs/* 22 | -------------------------------------------------------------------------------- /datasets/smokers/testexpt.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tfexpt 3 | import expt 4 | 5 | class TestTimeNative(unittest.TestCase): 6 | 7 | def testIt(self): 8 | time = expt.runMain() 9 | self.assertTrue(time <= 0.1) 10 | 11 | class TestTimeTF(unittest.TestCase): 12 | 13 | def testIt(self): 14 | time = tfexpt.runMain() 15 | self.assertTrue(time < 0.5) 16 | 17 | if __name__ == "__main__": 18 | unittest.main() 19 | -------------------------------------------------------------------------------- /datasets/cora/README.txt: -------------------------------------------------------------------------------- 1 | This is the CORA bibliography-matching problem described original in 2 | 3 | Poon, H., & Domingos, P. (2007, July). Joint inference in information 4 | extraction. In AAAI (Vol. 7, pp. 913-918). 5 | 6 | and later adapted for PROPPR 7 | 8 | William Yang Wang, Kathryn Mazaitis, William W. Cohen (2013): 9 | Programming with Personalized PageRank: A Locally Groundable 10 | First-Order Probabilistic Logic in CIKM-2013 11 | -------------------------------------------------------------------------------- /datasets/wikimovies/testexpt.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tfexpt 3 | import expt 4 | 5 | class TestAccNative(unittest.TestCase): 6 | 7 | def testIt(self): 8 | acc,loss = expt.runMain(250) 9 | self.assertTrue(acc >= 0.205) 10 | 11 | class TestAccTF(unittest.TestCase): 12 | 13 | def testIt(self): 14 | acc = tfexpt.runMain(250) 15 | self.assertTrue(acc >= 0.27) 16 | 17 | if __name__ == "__main__": 18 | unittest.main() 19 | -------------------------------------------------------------------------------- /datasets/amie/README.txt: -------------------------------------------------------------------------------- 1 | TODO (Katie): docs 2 | 3 | AMIE is a system that mines rules from KBs. Part of what was 4 | distributed with this system was a largish set of rules and some small 5 | KBs, used here to .... 6 | 7 | Luis Galárraga, Christina Teflioudi, Katja Hose, and Fabian 8 | M. Suchanek. 2015. Fast rule mining in ontological knowledge bases 9 | with AMIE$$+$$+. The VLDB Journal 24, 6 (December 2015), 10 | 707-730. DOI=http://dx.doi.org/10.1007/s00778-015-0394-1 11 | -------------------------------------------------------------------------------- /datasets/fb15k-speed/README.txt: -------------------------------------------------------------------------------- 1 | FB15K-237 Knowledge Base Completion Dataset from 2 | https://www.microsoft.com/en-us/download/details.aspx?id=52312 3 | 4 | Using a set of rules learned by ISG using PROPPR. 5 | 6 | Experiment does some performance tests for inference and such. The 7 | speed numbers don't seem very stable - they vary a lot, maybe due to 8 | system load? so the tests are kindof unreliable. 9 | 10 | TODO: decide if the system got slower sometime around v1.3.6 or not. 11 | -------------------------------------------------------------------------------- /tensorlog/Makefile.config: -------------------------------------------------------------------------------- 1 | # Long datasets are not stored in the primary github repository. 2 | # If you want to run them, clone http://curtis.ml.cmu.edu/git/tensorlog-datasets.git 3 | # and place the path here: 4 | #DATASETS:=/home/krivard/projects/tensorlog/tensorlog-datasets 5 | 6 | # By default, the cross-compile tests will run all available target 7 | # language configurations. If you want to limit the ones you run, add 8 | # those preferences here: e.g. "tensorflow sparse" 9 | XCOMP_FILTER:= -------------------------------------------------------------------------------- /datasets/smokers/smokers.ppr: -------------------------------------------------------------------------------- 1 | t_stress(P,Yes) :- assign(Yes,yes),person(P) {r1}. 2 | 3 | t_influences(P1,P2) :- friends(P1,P2) {r2}. 4 | t_cancer_spont(P,Yes) :- assign(Yes,yes),person(P) {r3}. 5 | t_cancer_smoke(P,Yes) :- assign(Yes,yes),person(P) {r4}. 6 | i_smokes(X,Yes) :- i_stress(X,Yes) {r5}. 7 | i_smokes(X,Yes) :- assign(Yes,yes), smokes(Y), i_influences(Y,X) {r6}. 8 | i_cancer(P,Yes) :- i_cancer_spont(P,Yes) {r7}. 9 | i_cancer(P,Yes) :- i_smokes(P,Yes), cancer_smoke(P,Yes ) {r8}. 10 | 11 | -------------------------------------------------------------------------------- /datasets/smokers/scaleup/smokers-for-proppr.ppr: -------------------------------------------------------------------------------- 1 | t_stress(P,Yes) :- const(Yes),person(P) {r1}. 2 | 3 | t_influences(P1,P2) :- friends(P1,P2) {r2}. 4 | t_cancer_spont(P,Yes) :- const(Yes),person(P) {r3}. 5 | t_cancer_smoke(P,Yes) :- const(Yes),person(P) {r4}. 6 | t_smokes(X,Yes) :- t_stress(X,Yes) {r5}. 7 | t_smokes(X,Yes) :- const(Yes), smokes(Y), t_influences(Y,X) {r6}. 8 | t_cancer(P,Yes) :- t_cancer_spont(P,Yes) {r7}. 9 | t_cancer(P,Yes) :- t_smokes(P,Yes), t_cancer_smoke(P,Yes ) {r8}. 10 | 11 | 12 | -------------------------------------------------------------------------------- /datasets/smokers/scaleup/smokers.ppr: -------------------------------------------------------------------------------- 1 | t_stress(P,Yes) :- assign(Yes,yes),person(P) {r1}. 2 | 3 | t_influences(P1,P2) :- friends(P1,P2) {r2}. 4 | t_cancer_spont(P,Yes) :- assign(Yes,yes),person(P) {r3}. 5 | t_cancer_smoke(P,Yes) :- assign(Yes,yes),person(P) {r4}. 6 | t_smokes(X,Yes) :- t_stress(X,Yes) {r5}. 7 | t_smokes(X,Yes) :- assign(Yes,yes), smokes(Y), t_influences(Y,X) {r6}. 8 | t_cancer(P,Yes) :- t_cancer_spont(P,Yes) {r7}. 9 | t_cancer(P,Yes) :- t_smokes(P,Yes), t_cancer_smoke(P,Yes ) {r8}. 10 | 11 | -------------------------------------------------------------------------------- /tensorlog/Makefile.config.template: -------------------------------------------------------------------------------- 1 | # Long datasets are not stored in the primary github repository. 2 | # If you want to run them, clone http://curtis.ml.cmu.edu/git/tensorlog-datasets.git 3 | # and place the path here: 4 | #DATASETS:=/home/krivard/projects/tensorlog/tensorlog-datasets 5 | 6 | 7 | # By default, the cross-compile tests will run all available target 8 | # language configurations. If you want to limit the ones you run, add 9 | # those preferences here: e.g. "tensorflow sparse" 10 | XCOMP_FILTER:= -------------------------------------------------------------------------------- /datasets/smokers/README.txt: -------------------------------------------------------------------------------- 1 | Various artificial learning tasks inspired by experiments in: 2 | 3 | Dries, Anton, et al. "ProbLog2: Probabilistic logic programming." 4 | Joint European Conference on Machine Learning and Knowledge Discovery 5 | in Databases. Springer, Cham, 2015. 6 | 7 | expt.py, testexpt.py, tfexpt.py are for automated tests (using a small 8 | graph based on a the cora citation data). 9 | 10 | scaleup/* is support for scalability experiments on different-sized 11 | artificially generated graphs in JAIR submission 12 | -------------------------------------------------------------------------------- /datasets/smokers/raw/generate.py: -------------------------------------------------------------------------------- 1 | if __name__=="__main__": 2 | fp = open("cancer-smokes.cfacts",'w') 3 | fp2 = open("query-entities.txt",'w') 4 | for line in open('labels.txt'): 5 | id,lab = line.strip().split("\t") 6 | fp2.write(id + '\n') 7 | if lab=="yAgents": 8 | fp.write("cancer\t%s\n" % id) 9 | elif lab=="yAI": 10 | fp.write("smokes\t%s\n" % id) 11 | elif lab=="yDB": 12 | fp.write("smokes\t%s\n" % id) 13 | fp.write("cancer\t%s\n" % id) 14 | 15 | 16 | -------------------------------------------------------------------------------- /datasets/amie-qa/README.txt: -------------------------------------------------------------------------------- 1 | AMIE is a system that mines rules from KBs. Part of what was 2 | distributed with this system was a small sample KB. 3 | /afs/cs.cmu.edu/user/wcohen/shared-home/data/dialog-toy 4 | contains code to generate questions (using templates) from 5 | this KB. 6 | 7 | Luis Galárraga, Christina Teflioudi, Katja Hose, and Fabian 8 | M. Suchanek. 2015. Fast rule mining in ontological knowledge bases 9 | with AMIE$$+$$+. The VLDB Journal 24, 6 (December 2015), 10 | 707-730. DOI=http://dx.doi.org/10.1007/s00778-015-0394-1 11 | -------------------------------------------------------------------------------- /datasets/family/Makefile: -------------------------------------------------------------------------------- 1 | #TODO: fix for new API 2 | 3 | VPATH= raw 4 | 5 | expt.log: 6 | python expt.py > expt.log 7 | 8 | setup: 9 | mkdir -p temp-cache 10 | 11 | actual.txt: expt.log 12 | echo \# actual result on `date` > actual.txt 13 | for f in tmp-cache/*.solutions.txt; do \ 14 | echo file $$f >> actual.txt ; \ 15 | proppr eval tmp-cache/kinship-test.examples $$f --metric auc --defaultNeg >> actual.txt ; \ 16 | done 17 | 18 | check: actual.txt 19 | diff -y $< expected.txt || true 20 | 21 | unittest: 22 | python testexpt.py 23 | -------------------------------------------------------------------------------- /datasets/socialgraphs/inputs/dolphins-test.exam: -------------------------------------------------------------------------------- 1 | inferred_label Web b 2 | inferred_label Jet b 3 | inferred_label Quasi b 4 | inferred_label Wave b 5 | inferred_label TSN103 nb 6 | inferred_label Grin nb 7 | inferred_label Double nb 8 | inferred_label Number1 b 9 | inferred_label Thumper nb 10 | inferred_label Ripplefluke b 11 | inferred_label Notch b 12 | inferred_label Kringel nb 13 | inferred_label TSN83 nb 14 | inferred_label SN4 nb 15 | inferred_label Hook nb 16 | inferred_label Patchback nb 17 | inferred_label Scabs nb 18 | inferred_label Gallatin b 19 | -------------------------------------------------------------------------------- /tensorlog/test-data/toytrain.examples: -------------------------------------------------------------------------------- 1 | predict(dh,Y) -predict(dh,neg) +predict(dh,pos) 2 | predict(ft,Y) -predict(ft,neg) +predict(ft,pos) 3 | predict(rw,Y) -predict(rw,neg) +predict(rw,pos) 4 | predict(sc,Y) -predict(sc,neg) +predict(sc,pos) 5 | predict(bk,Y) -predict(bk,neg) +predict(bk,pos) 6 | predict(rb,Y) -predict(rb,neg) +predict(rb,pos) 7 | predict(mv,Y) +predict(mv,neg) -predict(mv,pos) 8 | predict(hs,Y) +predict(hs,neg) -predict(hs,pos) 9 | predict(ji,Y) +predict(ji,neg) -predict(ji,pos) 10 | predict(tf,Y) +predict(tf,neg) -predict(tf,pos) 11 | predict(jm,Y) +predict(jm,neg) -predict(jm,pos) 12 | -------------------------------------------------------------------------------- /tensorlog/test-data/matchtoy.ppr: -------------------------------------------------------------------------------- 1 | match(R,S) :- fname(R,FR),fmatch(FR,FS),fname(S,FS) {f}. 2 | match(R,S) :- lname(R,LR),lmatch(LR,LS),lname(S,LS) {l}. 3 | match(R,S) :- addr(R,AR),amatch(AR,AS),addr(S,AS) {a}. 4 | 5 | fmatch(X,Y) :- same(X,Y) {f1}. 6 | fmatch(X,Y) :- dnick(X,Y) {f2}. 7 | fmatch(X,Y) :- dnick(Y,X) {f2}. 8 | 9 | lmatch(X,Y) :- same(X,Y) {l1}. 10 | lmatch(X,Y) :- dvar(X,Y) {l2}. 11 | lmatch(X,Y) :- dvar(Y,X) {l2}. 12 | 13 | amatch(X,Y) :- hasword(X,A),wmatch(A,B),hasword(Y,B) {a1}. 14 | wmatch(A,B) :- same(A,B) {w1}. 15 | wmatch(A,B) :- dabbrev(A,B) {w2}. 16 | wmatch(A,B) :- dabbrev(B,A) {w2}. 17 | 18 | -------------------------------------------------------------------------------- /tensorlog/test-data/matchtoy.tlog: -------------------------------------------------------------------------------- 1 | match(R,S) <= fname(R,FR) & fmatch(FR,FS) & fname(S,FS) // f 2 | match(R,S) <= lname(R,LR) & lmatch(LR,LS) & lname(S,LS) // l 3 | match(R,S) <= addr(R,AR) & amatch(AR,AS) & addr(S,AS) // a 4 | 5 | fmatch(X,Y) <= same(X,Y) // f1 6 | fmatch(X,Y) <= dnick(X,Y) // f2 7 | fmatch(X,Y) <= dnick(Y,X) // f2 8 | 9 | lmatch(X,Y) <= same(X,Y) // l1 10 | lmatch(X,Y) <= dvar(X,Y) // l2 11 | lmatch(X,Y) <= dvar(Y,X) // l2 12 | 13 | amatch(X,Y) <= hasword(X,A) & wmatch(A,B) & hasword(Y,B) // a1 14 | wmatch(A,B) <= same(A,B) // w1 15 | wmatch(A,B) <= dabbrev(A,B) // w2 16 | wmatch(A,B) <= dabbrev(B,A) // w2 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is the GitHub repository for TensorLog. Package documentation can be found in the [tensorlog](tensorlog) directory. Tutorials and research documentation is on the [wiki](https://github.com/TeamCohen/TensorLog/wiki). 2 | 3 | Additional (larger) datasets are available publicly: 4 | 5 | $ git clone http://curtis.ml.cmu.edu/git/tensorlog-datasets.git 6 | 7 | Branch conventions: 8 | 9 | working -- stable, all/most sample datasets & tests should run 10 | 11 | master -- in limbo; currently contains 1.3.4 12 | 13 | 0.0.0 -- wc's versioning of tensorflow cross-compiler mechanics 14 | 15 | 0.0.0-dev -- wc's bleeding-edge branch 16 | -------------------------------------------------------------------------------- /tensorlog/test-data/fam.cfacts: -------------------------------------------------------------------------------- 1 | sister william rachel 2 | sister william sarah 3 | sister william lottie 4 | 5 | spouse william susan 6 | spouse susan william 7 | 8 | child lottie charlotte 9 | child lottie lucas 10 | child sarah poppy 11 | child rachel caroline 12 | child rachel elizabeth 13 | child william josh 14 | child william charlie 15 | 16 | parent charlotte lottie 17 | parent lucas lottie 18 | parent poppy sarah 19 | parent caroline rachel 20 | parent elizabeth rachel 21 | 22 | feat r1 23 | feat r2 24 | 25 | active r1 26 | 27 | class pos 28 | class neg 29 | 30 | young lucas 31 | young charlotte 32 | young poppy 33 | 34 | xq a b 35 | xs a c 36 | xs b c 37 | -------------------------------------------------------------------------------- /datasets/socialgraphs/inputs/dolphins-train.exam: -------------------------------------------------------------------------------- 1 | inferred_label DN16 b 2 | inferred_label Cross nb 3 | inferred_label Topless nb 4 | inferred_label MN60 nb 5 | inferred_label Feather b 6 | inferred_label SN9 nb 7 | inferred_label Fork nb 8 | inferred_label Mus b 9 | inferred_label DN21 b 10 | inferred_label MN23 b 11 | inferred_label Stripes nb 12 | inferred_label Jonah nb 13 | inferred_label Beak nb 14 | inferred_label TR82 b 15 | inferred_label Trigger nb 16 | inferred_label Whitetip nb 17 | inferred_label Bumper nb 18 | inferred_label SN100 nb 19 | inferred_label Zipfel nb 20 | inferred_label PL nb 21 | inferred_label MN83 nb 22 | inferred_label TR88 nb 23 | inferred_label Upbang b 24 | -------------------------------------------------------------------------------- /datasets/top-1000-near-google/expected.txt: -------------------------------------------------------------------------------- 1 | # actual output of expt on Mon May 1 15:17:24 EDT 2017 2 | training tensorlog.learn.FixedRateGDLearner ... done in 121.314 sec 3 | training tensorlog.theanoxcomp.FixedRateGDLearner ... done in 393.066 sec 4 | training tensorlog.theanoxcomp.FixedRateGDLearner ... done in 430.367 sec 5 | file tmp-cache/top-1000-near-google-test.solutions.txt 6 | . micro: 0.804093567251 7 | file tmp-cache/top-1000-near-google-test.tensorlog.theanoxcomp.DenseMatDenseMsgCrossCompiler.solutions.txt 8 | . micro: 0.890372168285 9 | file tmp-cache/top-1000-near-google-test.tensorlog.theanoxcomp.SparseMatDenseMsgCrossCompiler.solutions.txt 10 | . micro: 0.890372168285 11 | -------------------------------------------------------------------------------- /tensorlog/putil.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | def multithreaded_map(func, data): 4 | """ 5 | Similar to the bultin function map(). But spawn a thread for each argument 6 | and apply `func` concurrently. 7 | 8 | Note: unlike map(), we cannot take an iterable argument. `data` should be an 9 | indexable sequence. 10 | """ 11 | 12 | N = len(data) 13 | result = [None] * N 14 | # wrapper to dispose the result in the right slot 15 | def task_wrapper(i): result[i] = func(data[i]) 16 | 17 | threads = [threading.Thread(target=task_wrapper, args=(i,)) for i in range(N)] 18 | for t in threads: t.start() 19 | for t in threads: t.join() 20 | return result 21 | -------------------------------------------------------------------------------- /tensorlog/trywam.py: -------------------------------------------------------------------------------- 1 | # (C) William W. Cohen and Carnegie Mellon University, 2016 2 | 3 | import sys 4 | from . import parser 5 | import wamcompiler 6 | import waminterpreter 7 | import factplugin 8 | 9 | if __name__ == "__main__": 10 | 11 | rules = parser.RuleCollection() 12 | rules.add(parser.Parser().parseRule('p(X,Y) :- spouse(X,Y) {r}.')) 13 | rules.listing() 14 | wp = wamcompiler.Compiler().compileRules(rules) 15 | wp.listing() 16 | fp = factplugin.FactPlugin.load('../test/fam.cfacts') 17 | wi = waminterpreter.Interpreter(wp,plugins=[fp]) 18 | print(wi.plugins) 19 | query = parser.Parser().parseQuery('p(X,Y).') 20 | print(query) 21 | answers = waminterpreter.Util.answer(wi,query) 22 | print(answers) 23 | -------------------------------------------------------------------------------- /datasets/grid/Makefile: -------------------------------------------------------------------------------- 1 | expt.log: 2 | python expt.py acc 16 > expt.log 3 | 4 | actual.txt: expt.log 5 | echo \# actual result on `date` > actual.txt 6 | for f in tmp-cache/*.solutions.txt; do \ 7 | echo file $$f >> actual.txt ; \ 8 | proppr eval tmp-cache/test.examples $$f --metric auc --defaultNeg >> actual.txt ; \ 9 | done 10 | # tail -13 expt.log >> actual.txt 11 | 12 | check: actual.txt 13 | diff -y actual.txt expected.txt || true 14 | 15 | setup: 16 | mkdir -p inputs tmp-cache 17 | 18 | unittest: 19 | python testexpt.py 20 | 21 | test: 22 | python expt.py time 16 23 | python expt.py time 64 24 | python expt.py acc 25 | 26 | clean: 27 | rm -f *.pyc *.cfacts *.exam *~ visualize.png expt.log actual.txt inputs/* tmp-cache/* 28 | 29 | 30 | -------------------------------------------------------------------------------- /datasets/fb15k-speed/testexpt.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tfexpt 3 | import expt 4 | 5 | class TestTimeNative(unittest.TestCase): 6 | 7 | # these tests are GPU-dependent 8 | def testIt(self): 9 | fps,qps1,qps2 = expt.runMain() 10 | print('fps,qps1,qps2 are',fps,qps1,qps2) 11 | self.assertTrue(fps >= 650.0) # compilation 12 | self.assertTrue(qps1 >= 100.0) # minibatches size = 1 13 | self.assertTrue(qps2 >= 750.0) # minibatches size = as large as possible 14 | 15 | class TestTimeTF(unittest.TestCase): 16 | 17 | def testIt(self): 18 | fps,qps = tfexpt.runMain() 19 | print('fps,qps are',fps,qps) 20 | self.assertTrue(fps >= 1.5) 21 | self.assertTrue(qps >= 8.0) 22 | 23 | if __name__ == "__main__": 24 | unittest.main() 25 | -------------------------------------------------------------------------------- /datasets/grid/proppr/proppr-expt.sh: -------------------------------------------------------------------------------- 1 | args="--duplicateCheck -1 --countFeatures false" 2 | date > proppr-expt.log 3 | echo args $args >> proppr-expt.log 4 | #args="--duplicateCheck -1 --countFeatures false --apr eps=1e-5:alph=0.1:depth=10" 5 | #args="--duplicateCheck -1 --countFeatures false --prover ppr --apr eps=1e-4:alph=0.1:depth=10" 6 | proppr compile grid.ppr 7 | #for n in 10 8 | for n in 10 25 50 100 200; 9 | do 10 | echo executing for size $n args $args 11 | python exam2proppr.py < g$n-test.exam > g$n.examples 12 | python facts2proppr.py < g$n.cfacts > p$n.cfacts 13 | echo run proppr answer g$n.examples --programFiles grid.wam:p$n.cfacts $args --threads 1 14 | proppr answer g$n.examples --programFiles grid.wam:p$n.cfacts $args --threads 1 > g$n.log 15 | python average-time-in-solutions.py g$n 1 < g$n.log >> proppr-expt.log 16 | done 17 | -------------------------------------------------------------------------------- /datasets/family/README.txt: -------------------------------------------------------------------------------- 1 | taken from regression benchmark at /afs/cs/project/proppr-1/regression/structureLearning/kinship 2 | 3 | 4 | RULES 5 | 6 | kinship-train-isg.ppr from 5/30/2016 regression run at duck:~krivard/ProPPR-nighlies/check/structureLearning 7 | 8 | learnedPred( and rel( were lowered to their predicate argument, and anonymous predicate rules were removed. 9 | 10 | rule features {} had their arguments merged using a $ delimiter. 11 | 12 | 13 | FACTS 14 | 15 | kinship-train.cfacts and kinship-test.cfacts were combined and duplicates removed, then lowered. 16 | 17 | kinship-rules.cfacts was generated from the rule features in the .ppr file, and the "rule" facts added for each feature predicate lr_if, lr_ifInv, lr_chain 18 | 19 | 20 | EXAMPLES 21 | 22 | interp( was lowered to its predicate argument 23 | 24 | examples with no + labels were removed 25 | 26 | 27 | -------------------------------------------------------------------------------- /datasets/wikimovies/theory.ppr: -------------------------------------------------------------------------------- 1 | answer(Q,A) :- mentions_entity(Q,E), directed_by(A,E) {w_directed_by(F) : has_feature(Q,F)}. 2 | answer(Q,A) :- mentions_entity(Q,E), has_genre(A,E) {w_has_genre(F) : has_feature(Q,F)}. 3 | answer(Q,A) :- mentions_entity(Q,E), has_imdb_rating(A,E) {w_has_imdb_rating(F) : has_feature(Q,F)}. 4 | answer(Q,A) :- mentions_entity(Q,E), has_imdb_votes(A,E) {w_has_imdb_votes(F) : has_feature(Q,F)}. 5 | answer(Q,A) :- mentions_entity(Q,E), has_tags(A,E) {w_has_tags(F) : has_feature(Q,F)}. 6 | answer(Q,A) :- mentions_entity(Q,E), release_year(A,E) {w_release_year(F) : has_feature(Q,F)}. 7 | answer(Q,A) :- mentions_entity(Q,E), starred_actors(A,E) {w_starred_actors(F) : has_feature(Q,F)}. 8 | answer(Q,A) :- mentions_entity(Q,E), written_by(A,E) {w_written_by(F) : has_feature(Q,F)}. 9 | answer(Q,A) :- mentions_entity(Q,E), in_language(A,E) {w_in_language(F) : has_feature(Q,F)}. 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | def readme(): 4 | with open('README.rst') as f: 5 | return f.read() 6 | 7 | setup(name='tensorlog', 8 | version='1.2.5', 9 | description='Differentiable deductive database platform', 10 | url='https://github.com/TeamCohen/TensorLog', 11 | author='William Cohen', 12 | author_email='wcohen@cs.cmu.edu', 13 | license='Apache 2.0', 14 | install_requires=['numpy','scipy','pyparsing'], 15 | # specify extras with pip like so: 16 | # $ pip install tensorlog[xc-theano,debug] 17 | # or, for development, 18 | # $ pip install -e .[xc-theano,xc-tensorflow,debug] 19 | extras_require={ 20 | 'xc-theano': ['theano'], 21 | 'xc-tensorflow': ['tensorflow'], 22 | #'debug':['ttk', 'Tkinter', 'tkfont'], 23 | 'debug':['pyttk'], 24 | }, 25 | packages=['tensorlog'], 26 | zip_safe=False) 27 | -------------------------------------------------------------------------------- /tensorlog/config.py: -------------------------------------------------------------------------------- 1 | # (C) William W. Cohen and Carnegie Mellon University, 2016 2 | 3 | # A class to contain configuration information 4 | 5 | class Config(object): 6 | 7 | def __init__(self): 8 | self.help = ConfigHelp() 9 | 10 | """A container for configuration options""" 11 | def pprint(self,depth=0): 12 | for key,val in sorted(self.__dict__.items()): 13 | if key!='help': 14 | self._explain(depth,key,val) 15 | if type(val)==type(Config()): 16 | val.pprint(depth+1) 17 | 18 | def _explain(self,depth,key,val): 19 | tmp = '| '*depth + key + ':' 20 | if type(val)!=type(Config()): 21 | tmp += ' '+repr(val) 22 | if key in self.help.__dict__: 23 | print(('%-40s %s' % (tmp,self.help.__dict__[key]))) 24 | 25 | class ConfigHelp(object): 26 | """A parallel object that stores help about configurations.""" 27 | pass 28 | -------------------------------------------------------------------------------- /datasets/top-1000-near-google/Makefile: -------------------------------------------------------------------------------- 1 | # default experiment and its output 2 | 3 | expt: expt.log eval.log 4 | 5 | expt.log: 6 | python expt.py > expt.log 7 | 8 | eval.log: expt.log 9 | rm -f $@ 10 | for f in tmp-cache/*.solutions.txt; do \ 11 | echo file $$f >> eval.log; \ 12 | proppr eval tmp-cache/top-1000-near-google-test.examples $$f --metric auc --defaultNeg >> $@; \ 13 | done 14 | 15 | # a quickly-viewable check on the default experiment (like a unit test) 16 | 17 | check: actual.txt 18 | diff -y actual.txt expected.txt || true 19 | 20 | unittest: 21 | python testexpt.py 22 | 23 | actual.txt: expt.log eval.log 24 | echo \# actual output of expt on `date` > actual.txt 25 | grep training.*done expt.log >> actual.txt 26 | grep -e file -e micro eval.log >> actual.txt 27 | 28 | # prepare for running expt 29 | setup: 30 | mkdir -p tmp-cache 31 | 32 | # clean up the directory 33 | 34 | clean: 35 | rm -rf *~ *.log actual.txt 36 | rm -rf tmp-cache/*.solutions.txt tmp-cache/*.examples 37 | -------------------------------------------------------------------------------- /datasets/grid/proppr/average-time-in-solutions.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # doesn't seem to work - maybe the times are bad 4 | def scan_solutions(): 5 | n = tot = 0 6 | for line in sys.stdin: 7 | if line[0]=='#': 8 | _rank,_goal,timeInMsec = line.strip().split("\t") 9 | (count,units) = timeInMsec.split(" ") 10 | assert units=="msec" 11 | n += 1 12 | tot += float(count) 13 | qps = 1000.0*n/tot 14 | print('==',sys.argv[1],'threads',sys.argv[2],'total',tot,'n',n,'average','%.2f' % (tot/n),'qps','%.2f' % qps) 15 | 16 | if __name__ == "__main__": 17 | for line in sys.stdin: 18 | if line.find("Total items:")>=0: 19 | _,n = line.strip().split(": ") 20 | elif line.find("Query-answering")>=0: 21 | _,t = line.strip().split(": ") 22 | qps = float(n)/(float(t)/1000) 23 | avg = float(t)/float(n) 24 | print('==',sys.argv[1],'threads',sys.argv[2],'total',t,'n',n,'average','%.2f' % avg,'qps','%.2f' % qps) 25 | 26 | -------------------------------------------------------------------------------- /datasets/smokers/scaleup/average-time-in-solutions.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # doesn't seem to work - maybe the times are bad 4 | def scan_solutions(): 5 | n = tot = 0 6 | for line in sys.stdin: 7 | if line[0]=='#': 8 | _rank,_goal,timeInMsec = line.strip().split("\t") 9 | (count,units) = timeInMsec.split(" ") 10 | assert units=="msec" 11 | n += 1 12 | tot += float(count) 13 | qps = 1000.0*n/tot 14 | print('==',sys.argv[1],'threads',sys.argv[2],'total',tot,'n',n,'average','%.2f' % (tot/n),'qps','%.2f' % qps) 15 | 16 | if __name__ == "__main__": 17 | for line in sys.stdin: 18 | if line.find("Total items:")>=0: 19 | _,n = line.strip().split(": ") 20 | elif line.find("Query-answering")>=0: 21 | _,t = line.strip().split(": ") 22 | qps = float(n)/(float(t)/1000) 23 | avg = float(t)/float(n) 24 | print('==',sys.argv[1],'threads',sys.argv[2],'total',t,'n',n,'average','%.2f' % avg,'qps','%.2f' % qps) 25 | 26 | -------------------------------------------------------------------------------- /datasets/grid/README.txt: -------------------------------------------------------------------------------- 1 | Various artificial learning tasks inspired by experiments in: 2 | 3 | Dries, Anton, et al. "ProbLog2: Probabilistic logic programming." 4 | Joint European Conference on Machine Learning and Knowledge Discovery 5 | in Databases. Springer, Cham, 2015. 6 | 7 | code for running scalability experiments in JAIR submission 8 | - bigexpt.py 9 | - bigtfexpt.py 10 | well-documented code to demo building a TF model 11 | - demo.py 12 | 13 | learning an approximation of the problog2 semantics, by learning 14 | probabilities defined by a biased logistic on top of the proof-count 15 | function, in JAIR submission 16 | - distlearning.py 17 | 18 | learning to process queries with >1 target ouput, by learning 19 | probabilities defined by a biased logistic on top of the proof-count 20 | function, in JAIR submission 21 | - multiclass.py 22 | 23 | demo of integration with TF - embedding learning is in JAIR submission 24 | - tfintegration.py 25 | 26 | automated tests 27 | - expt.py 28 | - testexpt.py 29 | - tfexpt.py 30 | 31 | -------------------------------------------------------------------------------- /datasets/socialgraphs/inputs/data2tlog.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | 4 | if __name__ == "__main__": 5 | stem = sys.argv[1] 6 | trainFrac = 2.0/3.0 7 | factFrac = 1.0/2.0 8 | seen = set() 9 | with open(stem+'.cfacts','w') as factFP, open(stem+'-train.exam','w') as trainFP, open(stem+'-test.exam','w') as testFP: 10 | for line in open(stem+'-ghirl.txt'): 11 | try: 12 | (_,rel,src,dst) = line.strip().split(" ") 13 | except ValueError: 14 | print('bad line %r' % line) 15 | if src.isdigit(): src = 'node%03d' % int(src) 16 | if dst.isdigit(): dst = 'node%03d' % int(dst) 17 | if rel=='e': 18 | factFP.write('\t'.join(['friend',src,dst]) + '\n') 19 | elif rel=='isa': 20 | if src not in seen: 21 | seen.add(src) 22 | r = random.uniform(0.0,1.0) 23 | if r > trainFrac: 24 | examFP = testFP 25 | examRel = 'inferred_label' 26 | elif r>factFrac*trainFrac: 27 | examFP = trainFP 28 | examRel = 'inferred_label' 29 | else: 30 | examFP = factFP 31 | examRel = 'label' 32 | examFP.write('\t'.join([examRel,src,dst]) + '\n') 33 | -------------------------------------------------------------------------------- /datasets/smokers/tfexpt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | 4 | from tensorlog import simple 5 | import tensorflow as tf 6 | 7 | import expt 8 | 9 | def runMain(): 10 | (ti,sparseX) = expt.setExptParams() 11 | X = sparseX.todense() 12 | 13 | # compile all the functions we'll need before I set up the session 14 | tlog = simple.Compiler(db=ti.db, prog=ti.prog, autoset_db_params=False) 15 | for modeString in ["t_stress/io", "t_influences/io","t_cancer_spont/io", "t_cancer_smoke/io"]: 16 | _ = tlog.inference(modeString) 17 | 18 | session = tf.Session() 19 | session.run(tf.global_variables_initializer()) 20 | start0 = time.time() 21 | for modeString in ["t_stress/io", "t_influences/io","t_cancer_spont/io", "t_cancer_smoke/io"]: 22 | session.run(tf.global_variables_initializer()) 23 | print('eval',modeString, end=' ') 24 | fd = {tlog.input_placeholder_name(modeString):X} 25 | session.run(tlog.inference(modeString), feed_dict=fd) 26 | print('time',time.time() - start0,'sec') 27 | tot = time.time() - start0 28 | print('total time',tot,'sec') 29 | return tot 30 | 31 | if __name__=="__main__": 32 | t = runMain() 33 | print('time',t) 34 | -------------------------------------------------------------------------------- /datasets/Makefile: -------------------------------------------------------------------------------- 1 | # conventions for the subdirectories 2 | # - (cd foo; make) should run some sort of experiment, which 3 | # should take < 5min to run 4 | # - (cd foo; make check) should display actual-vs-expected results 5 | # - (cd foo; make clean) should cleanup so that 'git status .' sees nothing unexpected 6 | # 7 | # hence 'make clean test check' should redo all the experiments and the tail 8 | # will be inspectable for obvious errors 9 | # 10 | # - foo/inputs holds inputs directly used for the experiments 11 | # - foo/raw, if present, holds stuff used to populate foo/inputs 12 | # - foo/tmp-cache holds outputs and/or intermediate results 13 | 14 | 15 | test: 16 | (cd cora; make) 17 | (cd wordnet; make) 18 | (cd grid; make) 19 | (cd fb15k-237; make) 20 | (cd smokers; make) 21 | # (cd textcat-ssl; make) 22 | 23 | check: 24 | (cd cora; make check) 25 | (cd wordnet; make check) 26 | (cd grid; make check) 27 | (cd fb15k-237; make check) 28 | (cd smokers; make check) 29 | # (cd textcat-ssl; make check) 30 | 31 | clean: 32 | (cd cora; make clean) 33 | (cd wordnet; make clean) 34 | (cd grid; make clean) 35 | (cd fb15k-237; make clean) 36 | (cd smokers; make clean) 37 | # (cd textcat-ssl; make clean) 38 | -------------------------------------------------------------------------------- /datasets/socialgraphs/testexpt.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import demo 3 | 4 | class TestAccTF(unittest.TestCase): 5 | 6 | def testCiteseer(self): 7 | init_acc,final_acc = demo.runMain("--stem citeseer".split()) 8 | self.assertTrue( 0.62 <= init_acc < final_acc < 0.65 ) 9 | 10 | def testCora(self): 11 | init_acc,final_acc = demo.runMain("--stem cora".split()) 12 | self.assertTrue( 0.75 <= init_acc < final_acc < 0.80 ) 13 | 14 | def testDolphins(self): 15 | init_acc,final_acc = demo.runMain("--stem dolphins".split()) 16 | self.assertTrue( init_acc == final_acc == 1.0 ) 17 | 18 | def testFootball(self): 19 | init_acc,final_acc = demo.runMain("--stem football --regularizer_scale 1.0".split()) 20 | self.assertTrue( 0.43 < init_acc < 0.45 ) 21 | self.assertTrue( 0.70 < final_acc < 0.75 ) 22 | 23 | def testKarate(self): 24 | init_acc,final_acc = demo.runMain("--stem karate".split()) 25 | self.assertTrue( 0.90 < init_acc < 1.0 ) 26 | self.assertTrue( 0.90 < final_acc < 1.0 ) 27 | 28 | def testUMBC(self): 29 | init_acc,final_acc = demo.runMain("--stem umbc --link_scale 0.1".split()) 30 | self.assertTrue( 0.94 < init_acc < final_acc < 0.95) 31 | 32 | if __name__ == "__main__": 33 | unittest.main() 34 | -------------------------------------------------------------------------------- /datasets/smokers/scaleup/proppr-expt.sh: -------------------------------------------------------------------------------- 1 | t=16 2 | args="--duplicateCheck -1 --countFeatures false --prover dpr --apr eps=1e-4:alph=0.1:depth=10" 3 | date > proppr-expt.log 4 | echo args $args >> proppr-expt.log 5 | #args="--duplicateCheck -1 --countFeatures false --prover ppr --apr eps=1e-4:alph=0.1:depth=10" 6 | proppr compile smokers-for-proppr.ppr 7 | #for n in 100 1000 10000 100000; 8 | for n in 100 1000 10000 100000 500000; 9 | do 10 | echo executing for size $n args $args 11 | python queryent2proppr.py < query-entities-$n.txt > p$n.examples 12 | echo run proppr answer p$n.examples --programFiles smokers-for-proppr.wam:const-for-proppr.cfacts:smoker-$n.cfacts $args --threads 1 13 | proppr answer p$n.examples --programFiles smokers-for-proppr.wam:const-for-proppr.cfacts:smoker-$n.cfacts $args --threads 1 > p$n.01.log 14 | python average-time-in-solutions.py p$n 1 < p$n.01.log >> proppr-expt.log 15 | # echo run proppr answer p$n.examples --programFiles smokers-for-proppr.wam:const-for-proppr.cfacts:smoker-$n.cfacts $args --threads $t 16 | # proppr answer p$n.examples --programFiles smokers-for-proppr.wam:const-for-proppr.cfacts:smoker-$n.cfacts $args --threads 10 > p$n.$t.log 17 | # python average-time-in-solutions.py p$n $t < p$n.$t.log >> proppr-expt.log 18 | done 19 | -------------------------------------------------------------------------------- /tensorlog/symtab.py: -------------------------------------------------------------------------------- 1 | # (C) William W. Cohen and Carnegie Mellon University, 2016 2 | # 3 | # a symbol table 4 | # 5 | 6 | import logging 7 | 8 | class SymbolTable(object): 9 | """A symbol table mapping strings to/from integers in the range 10 | 1..N inclusive.""" 11 | 12 | def __init__(self,initSymbols=[]): 13 | self.reservedSymbols = set() 14 | self._symbolList = [None] 15 | self._nextId = 0 16 | self._idDict = {} 17 | for s in initSymbols: 18 | self.insert(s) 19 | 20 | def insert(self,symbol): 21 | """Insert a symbol.""" 22 | if symbol not in self._idDict: 23 | self._nextId += 1 24 | self._idDict[symbol] = self._nextId 25 | self._symbolList += [symbol] 26 | 27 | def getSymbolList(self): 28 | """Get an array of all defined symbols.""" 29 | return self._symbolList[1:] 30 | 31 | def getSymbol(self,id): 32 | return self._symbolList[id] 33 | 34 | def hasId(self,symbol): 35 | return symbol in self._idDict 36 | 37 | def getId(self,symbol): 38 | """Get the numeric id, between 1 and N, of a symbol. 39 | """ 40 | self.insert(symbol) 41 | return self._idDict[symbol] 42 | 43 | def getMaxId(self): 44 | return self._nextId 45 | 46 | -------------------------------------------------------------------------------- /tensorlog/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import inspect 3 | 4 | # misc utilities 5 | 6 | def memusage(): 7 | """ Memory used by the current process in Gb 8 | """ 9 | proc_status = '/proc/%d/status' % os.getpid() 10 | try: 11 | t = open(proc_status) 12 | v = t.read() 13 | t.close() 14 | i = v.index('VmSize:') 15 | v = v[i:].split(None,3) 16 | scale = {'kB': 1024.0, 'mB': 1024.0*1024.0, 'KB': 1024.0, 'MB': 1024.0*1024.0} 17 | return (float(v[1]) * scale[v[2]]) / (1024.0*1024.0*1024.0) 18 | except IOError: 19 | return 0.0 20 | 21 | def linesIn(fileLike): 22 | """ If fileLike is a string, open it as a file and return lines in the file. 23 | Otherwise, just call fileLike's iterator method and iterate over that. 24 | Thus, you can use open file handles or strings as arguments to a function f if 25 | it accesses its arguments thru linesIn: 26 | 27 | def f(fileLikeInput,....): 28 | ... 29 | for line in linesIn(fileLikeInput): 30 | ... 31 | 32 | """ 33 | if isinstance(fileLike,str): 34 | with open(fileLike) as fp: 35 | for line in fp: 36 | yield line 37 | else: 38 | d = dict(inspect.getmembers(fileLike)) 39 | assert '__iter__' in d, 'cannot enumerate lines in the object %r' % fileLike 40 | for line in fileLike: 41 | yield line 42 | -------------------------------------------------------------------------------- /datasets/family/testexpt.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tfexpt 3 | import expt 4 | 5 | class TestNative(unittest.TestCase): 6 | def setUp(self): 7 | (self.prog, self.trainData, self.testData) = expt.setExptParams() 8 | def testIt(self): 9 | acc,loss = expt.accExpt(self.prog, self.trainData, self.testData) 10 | print("acc",acc) 11 | self.assertTrue(acc >= 0.71) 12 | 13 | TF_EXPECTED = {'i_husband/io':1.0, 14 | 'i_brother/io':1.0, 15 | 'i_uncle/io':0.5, 16 | 'i_daughter/io':0.0, # not sure about this 17 | 'i_wife/io':1.0, 18 | 'i_aunt/io':0.5, 19 | 'i_mother/io':1.0, 20 | 'i_sister/io':1.0, 21 | 'i_son/io':0.666666, 22 | 'i_niece/io':0.0, # not sure about this 23 | 'i_father/io':0.666666, 24 | 'i_nephew/io':0.0 # not sure about this 25 | } 26 | class TestAccTF(unittest.TestCase): 27 | def setUp(self): 28 | self.params = tfexpt.setup_tlog() 29 | def testIt(self): 30 | accs = tfexpt.trainAndTest(*self.params) 31 | for mode,acc in list(TF_EXPECTED.items()): 32 | self.assertTrue(accs[mode] >= acc,msg="%s:%g<%g" % (mode,accs[mode],acc)) 33 | 34 | 35 | 36 | if __name__=='__main__': 37 | unittest.main() 38 | 39 | -------------------------------------------------------------------------------- /tensorlog/test-data/matchtoy.cfacts: -------------------------------------------------------------------------------- 1 | dabbrev cmu carnegie-mellon-univ 2 | dabbrev cmu central-michigan-univ 3 | dabbrev ml machine-learning 4 | dabbrev cs computer-science 5 | dabbrev dept department 6 | dnick bill william 7 | dvar cohn cohen 8 | dvar kahn cohen 9 | 10 | record r1 11 | record r2 12 | record r3 13 | record r4 14 | 15 | fname r1 william 16 | lname r1 cohen 17 | addr r1 a1 18 | hasword a1 carnegie-mellon-univ 19 | hasword a1 machine-learning 20 | hasword a1 dept 21 | 22 | fname r2 bill 23 | lname r2 cohn 24 | addr r2 a2 25 | hasword a2 cmu 26 | hasword a2 ml 27 | hasword a2 department 28 | 29 | fname r3 pablo 30 | lname r3 cohn 31 | addr r3 a3 32 | hasword a3 central-michigan-univ 33 | hasword a3 cs 34 | hasword a3 department 35 | 36 | fname r4 pablo 37 | lname r4 cohen 38 | addr r4 a4 39 | hasword a4 cmu 40 | hasword a4 computer-science 41 | hasword a4 dept 42 | 43 | same bill bill 44 | same carnegie-mellon-univ carnegie-mellon-univ 45 | same central-michigan-univ central-michigan-univ 46 | same cmu cmu 47 | same cohen cohen 48 | same cohn cohn 49 | same computer-science computer-science 50 | same department department 51 | same dept dept 52 | same machine-learning machine-learning 53 | same pablo pablo 54 | same william william 55 | 56 | rule f 57 | rule l 58 | rule a 59 | rule f1 60 | rule f2 61 | rule l1 62 | rule l2 63 | rule a1 64 | rule w1 65 | rule w2 66 | -------------------------------------------------------------------------------- /tensorlog/Cleanup-notes.txt: -------------------------------------------------------------------------------- 1 | Cleanup: 2 | - clean up mutil: densify 3 | - clean up dataset: matrix examples (done?) and proppr nonsense (move proppr to extras) 4 | - clean up funs, ops: refactor bprop and eval into 'native' 5 | - clean up program: autoweighting, ProPPRProgram vs program 6 | 7 | __init__.py 8 | test (2k lines) 9 | benchmark.py 10 | testtensorlog.py 11 | testxcomp.py 12 | core: (maybe 2k lines) 13 | bpcompiler.py 14 | comline.py - should add my super simple option holder 15 | config.py 16 | dataset.py 17 | declare.py - do I really need something so complicated as a Goal? 18 | funs.py - a lot is backprop or eval 19 | masterconfig.py 20 | matrixdb.py 21 | ops.py - a lot is backprop or eval 22 | parser.py - 200 lines and do I need it? 23 | program.py - autoweighting 24 | symtab.py - should start at 0 or 1? do I need reserved words? 25 | simple.py 26 | tensorflowxcomp.py - *Function, *Grad, runExpt and support is test, this is a few hundred lines 27 | theanoxcomp.py - *Function, *Grad, runExpt and support is test 28 | xcomp.py - *Function, ... 29 | opfunutil.py - scratchpad crap 30 | extra (200) 31 | interp.py 32 | list.py 33 | native (1500) 34 | learn.py 35 | debug.py 36 | expt.py 37 | plearn.py 38 | putil.py 39 | mutil.py - shuffleRows, selectRows used in dataset only; stack, numRows and checkCSR a lot; mapData a lot, 40 | mostly for clipping; but it's ony 300 lines or so 41 | 42 | -------------------------------------------------------------------------------- /datasets/socialgraphs/inputs/football-test.exam: -------------------------------------------------------------------------------- 1 | inferred_label OregonState node008 2 | inferred_label NotreDame node005 3 | inferred_label Illinois node002 4 | inferred_label MississippiState node009 5 | inferred_label Cincinnati node004 6 | inferred_label Missouri node003 7 | inferred_label OklahomaState node003 8 | inferred_label UCLA node008 9 | inferred_label Maryland node000 10 | inferred_label SanJoseState node011 11 | inferred_label Pittsburgh node001 12 | inferred_label Stanford node008 13 | inferred_label Oregon node008 14 | inferred_label Syracuse node001 15 | inferred_label SanDiegoState node007 16 | inferred_label Baylor node003 17 | inferred_label Ohio node006 18 | inferred_label BoiseState node011 19 | inferred_label WakeForest node000 20 | inferred_label KansasState node003 21 | inferred_label WesternMichigan node006 22 | inferred_label UtahState node005 23 | inferred_label Oklahoma node003 24 | inferred_label GeorgiaTech node000 25 | inferred_label Arkansas node009 26 | inferred_label Vanderbilt node009 27 | inferred_label Akron node006 28 | inferred_label Georgia node009 29 | inferred_label FloridaState node000 30 | inferred_label Rutgers node001 31 | inferred_label LouisianaMonroe node010 32 | inferred_label Idaho node010 33 | inferred_label Utah node007 34 | inferred_label CentralMichigan node006 35 | inferred_label NorthCarolina node000 36 | inferred_label Kentucky node009 37 | inferred_label Wisconsin node002 38 | inferred_label Houston node004 39 | inferred_label AirForce node007 40 | -------------------------------------------------------------------------------- /datasets/smokers/scaleup/tfexpt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | 4 | from tensorlog import simple 5 | import tensorflow as tf 6 | 7 | import expt 8 | 9 | def runMain(n): 10 | (ti,sparseX,_) = expt.setExptParams(n) 11 | X = sparseX.todense() 12 | nQueries = X.shape[0] 13 | 14 | # compile all the functions we'll need before I set up the session 15 | tlog = simple.Compiler(db=ti.db, prog=ti.prog, autoset_db_params=False) 16 | for modeString in ["t_stress/io", "t_influences/io","t_cancer_spont/io", "t_cancer_smoke/io"]: 17 | _ = tlog.inference(modeString) 18 | 19 | session = tf.Session() 20 | session.run(tf.global_variables_initializer()) 21 | start0 = time.time() 22 | for modeString in ["t_stress/io", "t_influences/io","t_cancer_spont/io", "t_cancer_smoke/io"]: 23 | session.run(tf.global_variables_initializer()) 24 | print('eval',modeString, end=' ') 25 | fd = {tlog.input_placeholder_name(modeString):X} 26 | session.run(tlog.inference(modeString), feed_dict=fd) 27 | print('time',time.time() - start0,'sec') 28 | tot = time.time() - start0 29 | print('n',sys.argv[1]) 30 | print('batch size',nQueries) 31 | print('total query time',tot,'sec') 32 | print('queries/sec',nQueries/tot) 33 | print('%.2f\t%.2f' % (tot,nQueries/tot)) 34 | return tot 35 | 36 | if __name__=="__main__": 37 | n = 100 38 | if len(sys.argv) > 1: 39 | n = int(sys.argv[1]) 40 | 41 | t = runMain(n) 42 | print('time',t) 43 | -------------------------------------------------------------------------------- /tensorlog/masterconfig.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from tensorlog import config 4 | 5 | def masterConfig(): 6 | from tensorlog import bpcompiler 7 | from tensorlog import dataset 8 | from tensorlog import funs 9 | from tensorlog import learn 10 | from tensorlog import matrixdb 11 | from tensorlog import mutil 12 | from tensorlog import ops 13 | from tensorlog import program 14 | from tensorlog import xcomp 15 | 16 | master = config.Config() 17 | master.bpcompiler = bpcompiler.conf 18 | master.help.bpcompiler = 'config for tensorlog.bpcompiler' 19 | master.dataset = dataset.conf 20 | master.help.dataset = 'config for tensorlog.dataset' 21 | master.funs = funs.conf 22 | master.help.funs = 'config for tensorlog.funs' 23 | master.learn = learn.conf 24 | master.help.learn = 'config for tensorlog.learn' 25 | master.matrixdb = matrixdb.conf 26 | master.help.matrixdb = 'config for tensorlog.matrixdb' 27 | master.mutil = mutil.conf 28 | master.help.mutil = 'config for tensorlog.mutil' 29 | master.ops = ops.conf 30 | master.help.ops = 'config for tensorlog.ops' 31 | master.program = program.conf 32 | master.help.program = 'conf for tensorlog.program' 33 | master.xcomp = xcomp.conf 34 | master.help.xcomp = 'config for tensorlog.xcomp' 35 | try: 36 | from tensorlog import debug 37 | master.debug = debug.conf 38 | master.help.debug = 'config for tensorlog.debug' 39 | except ImportError: 40 | logging.warn('debug module not imported') 41 | return master 42 | 43 | if __name__ == "__main__": 44 | masterConfig().pprint() 45 | -------------------------------------------------------------------------------- /datasets/fb15k-speed/tfexpt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import tensorflow as tf 4 | 5 | import expt 6 | 7 | from tensorlog import simple 8 | from tensorlog import declare 9 | 10 | def tfCompileAll(tlog,modeSet,queries): 11 | t0 = time.time() 12 | k = 0 13 | print('compiling',len(modeSet),'modes') 14 | for mode in modeSet: 15 | if tlog.prog.findPredDef(mode): 16 | k += 1 17 | _ = tlog.inference(mode) 18 | if k%20 == 0: 19 | sys.stderr.write('compiled %d functions in %f.3 sec\n' % (k,time.time()-t0)) 20 | t1 = time.time() 21 | fps = k/(t1-t0) 22 | print('tlog compiled',k,'functions at',fps,'fps') 23 | return fps 24 | 25 | def runTF(tlog): 26 | dset = tlog.load_small_dataset('inputs/fb15k-valid.examples') 27 | session = tf.Session() 28 | session.run(tf.global_variables_initializer()) 29 | t0 = time.time() 30 | k = 0 31 | for mode in dset: 32 | if tlog.prog.findPredDef(declare.asMode(mode)): 33 | (X,Y) = dset[mode] 34 | f = tlog.inference(mode) 35 | session.run(f, feed_dict={tlog.input_placeholder_name(mode):X}) 36 | k += X.shape[0] 37 | t1 = time.time() 38 | qps = k/(t1-t0) 39 | print('tlog executes on',k,'inputs at',qps,'qps') 40 | return qps 41 | 42 | def runMain(): 43 | (db,prog,modeSet,queries) = expt.setExptParams() 44 | tlog = simple.Compiler(db=db, prog=prog, autoset_db_params=False) 45 | fps1 = expt.compileAll(db,prog,modeSet,queries) 46 | fps2 = tfCompileAll(tlog,modeSet,queries) # expect <= 2.5 fps 47 | qps = runTF(tlog) # expect less than 23 qps 48 | return fps2,qps 49 | 50 | if __name__ == "__main__": 51 | fps,qps = runMain() 52 | -------------------------------------------------------------------------------- /tensorlog/list.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import getopt 3 | 4 | from tensorlog import declare 5 | from tensorlog import comline 6 | from tensorlog import matrixdb 7 | 8 | if __name__ == "__main__": 9 | def usage(): 10 | print("usage: python -m list --db dbspec [--mode mode]") 11 | print(" without mode specified: lists the relations in the database") 12 | print(" with mode specified: lists the facts in one relation in .cfacts format") 13 | print("usage: python -m list --prog progspec [--ruleIds]") 14 | print(" list the all rule ids") 15 | argspec = ["db=","mode=","prog=","ruleIds"] 16 | try: 17 | optlist,args = getopt.getopt(sys.argv[1:], 'x', argspec) 18 | except getopt.GetoptError: 19 | usage() 20 | raise 21 | optdict = dict(optlist) 22 | 23 | db = comline.parseDBSpec(optdict['--db']) if '--db' in optdict else None 24 | if db and (not '--mode' in optdict): 25 | db.listing() 26 | elif db and ('--mode' in optdict): 27 | functor,rest = optdict['--mode'].split("/") 28 | arity = int(rest) 29 | m = db.matEncoding.get((functor,arity)) 30 | assert m is not None,'mode should be of the form functor/arity for something in the database' 31 | for goal,weight in list(db.matrixAsPredicateFacts(functor,arity,m).items()): 32 | print(('\t'.join([goal.functor] + goal.args + ['%g' % (weight)]))) 33 | elif '--prog' in optdict: 34 | prog = comline.parseProgSpec(optdict['--prog'],db,proppr=True) 35 | for rid in prog.ruleIds: 36 | print(('\t'.join(['ruleid',rid]))) 37 | else: 38 | usage() 39 | -------------------------------------------------------------------------------- /datasets/grid/testexpt.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tfexpt 3 | import expt 4 | 5 | from tensorlog import matrixdb 6 | from tensorlog import program 7 | from tensorlog import dataset 8 | 9 | class TestNative(unittest.TestCase): 10 | 11 | def setUp(self): 12 | (self.n,self.maxD,self.epochs) = (16,8,20) 13 | (self.factFile,trainFile,testFile) = expt.genInputs(self.n) 14 | 15 | # (self.factFile,self.trainFile,self.testFile) = ('inputs/g16.cfacts','inputs/g16-train.exam','inputs/g16-test.exam') 16 | self.db = matrixdb.MatrixDB.loadFile(self.factFile) 17 | self.prog = program.Program.loadRules("grid.ppr",self.db) 18 | self.trainData = dataset.Dataset.loadExamples(self.prog.db,trainFile) 19 | self.testData = dataset.Dataset.loadExamples(self.prog.db,testFile) 20 | 21 | def testIt(self): 22 | acc,loss = expt.accExpt(self.prog,self.trainData,self.testData,self.n,self.maxD,self.epochs) 23 | print('acc',acc) 24 | self.assertTrue(acc >= 0.85) 25 | times = expt.timingExpt(self.prog) 26 | for t in times: 27 | print('time',t) 28 | self.assertTrue(t < 0.05) 29 | 30 | class TestAccTF(unittest.TestCase): 31 | 32 | def setUp(self): 33 | (self.n,self.maxD,self.epochs) = (16,8,20) 34 | (self.factFile,self.trainFile,self.testFile) = expt.genInputs(self.n) 35 | (self.tlog,self.trainData,self.testData) = tfexpt.setup_tlog(self.maxD,self.factFile,self.trainFile,self.testFile) 36 | 37 | def testIt(self): 38 | acc = tfexpt.trainAndTest(self.tlog,self.trainData,self.testData,self.epochs) 39 | print('acc',acc) 40 | self.assertTrue(acc >= 0.85) 41 | 42 | if __name__ == "__main__": 43 | unittest.main() 44 | -------------------------------------------------------------------------------- /datasets/socialgraphs/inputs/football-train.exam: -------------------------------------------------------------------------------- 1 | inferred_label Wyoming node007 2 | inferred_label Minnesota node002 3 | inferred_label Indiana node002 4 | inferred_label TexasChristian node004 5 | inferred_label BostonCollege node001 6 | inferred_label Tulsa node011 7 | inferred_label Connecticut node005 8 | inferred_label PennState node002 9 | inferred_label Clemson node000 10 | inferred_label LouisianaState node009 11 | inferred_label Washington node008 12 | inferred_label Marshall node006 13 | inferred_label LouisianaTech node011 14 | inferred_label Rice node011 15 | inferred_label NorthCarolinaState node000 16 | inferred_label Arizona node008 17 | inferred_label Michigan node002 18 | inferred_label SouthernCalifornia node008 19 | inferred_label Northwestern node002 20 | inferred_label Florida node009 21 | inferred_label NewMexicoState node010 22 | inferred_label MichiganState node002 23 | inferred_label WashingtonState node008 24 | inferred_label NorthernIllinois node006 25 | inferred_label VirginiaTech node001 26 | inferred_label BallState node006 27 | inferred_label LouisianaLafayette node010 28 | inferred_label NevadaLasVegas node007 29 | inferred_label Duke node000 30 | inferred_label Colorado node003 31 | inferred_label ArizonaState node008 32 | inferred_label MiamiOhio node006 33 | inferred_label Auburn node009 34 | inferred_label ArkansasState node010 35 | inferred_label EasternMichigan node006 36 | inferred_label SouthCarolina node009 37 | inferred_label MiamiFlorida node001 38 | inferred_label OhioState node002 39 | inferred_label Virginia node000 40 | inferred_label Purdue node002 41 | inferred_label EastCarolina node004 42 | inferred_label Nebraska node003 43 | inferred_label Alabama node009 44 | -------------------------------------------------------------------------------- /datasets/wikimovies/tfexpt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | import tensorflow as tf 5 | from tensorlog import simple 6 | import expt 7 | 8 | def runMain(num=250): 9 | params = expt.setExptParams(num) 10 | prog = params['prog'] 11 | tlog = simple.Compiler(db=prog.db, prog=prog, autoset_db_params=False) 12 | train_data = tlog.load_big_dataset('inputs/train-%d.exam' % num) 13 | mode = params['targetMode'] 14 | 15 | loss = tlog.loss(mode) 16 | optimizer = tf.train.AdagradOptimizer(0.1) 17 | train_step = optimizer.minimize(loss) 18 | 19 | session = tf.Session() 20 | session.run(tf.global_variables_initializer()) 21 | t0 = time.time() 22 | epochs = 10 23 | for i in range(epochs): 24 | b = 0 25 | for (_,(TX,TY)) in tlog.minibatches(train_data,batch_size=125): 26 | print('epoch',i+1,'of',epochs,'minibatch',b+1) 27 | train_fd = {tlog.input_placeholder_name(mode):TX, tlog.target_output_placeholder_name(mode):TY} 28 | session.run(train_step, feed_dict=train_fd) 29 | b += 1 30 | print('learning time',time.time()-t0,'sec') 31 | 32 | predicted_y = tlog.inference(mode) 33 | actual_y = tlog.target_output_placeholder(mode) 34 | correct_predictions = tf.equal(tf.argmax(actual_y,1), tf.argmax(predicted_y,1)) 35 | accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32)) 36 | 37 | test_data = tlog.load_small_dataset('inputs/test-%d.exam' % num) 38 | UX,UY = test_data[mode] 39 | test_fd = {tlog.input_placeholder_name(mode):UX, tlog.target_output_placeholder_name(mode):UY} 40 | acc = session.run(accuracy, feed_dict=test_fd) 41 | print('test acc',acc) 42 | return acc #expect 27.2 43 | 44 | if __name__== "__main__": 45 | runMain() 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /datasets/smokers/expected.txt: -------------------------------------------------------------------------------- 1 | # actual result on Thu May 11 15:11:45 EDT 2017 2 | Tensorlog v1.3.3 (C) William W. Cohen and Carnegie Mellon University, 2016-2017 3 | eval t_stress/io time 0.00632214546204 sec 4 | eval t_influences/io time 0.00614285469055 sec 5 | eval t_cancer_spont/io time 0.00518012046814 sec 6 | eval t_cancer_smoke/io time 0.00516486167908 sec 7 | total time 0.0228428840637 sec 8 | time 0.0228428840637 9 | Tensorlog v1.3.3 (C) William W. Cohen and Carnegie Mellon University, 2016-2017 10 | tensorlog.theanoxcomp.DenseMatDenseMsgCrossCompiler 11 | eval t_stress/io time 7.65080595016 sec 12 | eval t_influences/io time 0.176002025604 sec 13 | eval t_cancer_spont/io time 0.143238782883 sec 14 | eval t_cancer_smoke/io time 0.144819974899 sec 15 | total time tensorlog.theanoxcomp.DenseMatDenseMsgCrossCompiler 8.62610697746 sec 16 | tensorlog.theanoxcomp.SparseMatDenseMsgCrossCompiler 17 | eval t_stress/io time 0.145111083984 sec 18 | eval t_influences/io time 0.14995598793 sec 19 | eval t_cancer_spont/io time 0.14336514473 sec 20 | eval t_cancer_smoke/io time 0.152543067932 sec 21 | total time tensorlog.theanoxcomp.SparseMatDenseMsgCrossCompiler 0.775099039078 sec 22 | tensorlog.tensorflowxcomp.DenseMatDenseMsgCrossCompiler 23 | eval t_stress/io time 0.158535003662 sec 24 | eval t_influences/io time 0.351592063904 sec 25 | eval t_cancer_spont/io time 0.233716011047 sec 26 | eval t_cancer_smoke/io time 0.23553109169 sec 27 | total time tensorlog.tensorflowxcomp.DenseMatDenseMsgCrossCompiler 1.92545294762 sec 28 | tensorlog.tensorflowxcomp.SparseMatDenseMsgCrossCompiler 29 | eval t_stress/io time 0.789707183838 sec 30 | eval t_influences/io time 0.33563709259 sec 31 | eval t_cancer_spont/io time 0.299407958984 sec 32 | eval t_cancer_smoke/io time 0.322700977325 sec 33 | total time tensorlog.tensorflowxcomp.SparseMatDenseMsgCrossCompiler 2.70987987518 sec 34 | -------------------------------------------------------------------------------- /datasets/smokers/scaleup/Results.txt: -------------------------------------------------------------------------------- 1 | expt.py - local infrastructure 2 | x-mb: minibatch of 25, local infrastructure 3 | x-tf: tensorflow 4 | x-tg: tensorflow + gpu (saskia) 5 | xxx failure in cpu version of tf 6 | 7 | size nodes edges facts 8 | 100 400 4250 5060 9 | 1000 4000 40250 48260 10 | 10000 40000 400250 480260 11 | 100000 400000 4000250 4800260 12 | 500000 2000000 20000250 24000260 13 | 14 | -------------------- 15 | version 2 - after smokers.ppr fixes 16 | -------------------- 17 | 18 | mb 25 mb 250 mb 25 mb 250 mb 25 mb 250 19 | size time qps time-mb qps-mb time-mb qps-mb time-tf qps-tf time-tf qps-tf time-tg qps-tg time-tg qps-tg 20 | 100 0.34 73.08 0.02 1247.64 -- -- 0.12 202.29 0.17 1486.47 0.06 452.53 0.11 2225.09 21 | 1000 0.35 71.40 0.02 1183.65 0.07 3635.62 0.17 143.34 0.27 926.22 0.13 198.99 0.16 1552.55 22 | 10000 0.37 68.39 0.05 551.53 0.28 907.64 0.56 44.34 1.05 237.26 0.37 67.95 0.80 314.10 23 | 100000 0.75 33.19 0.25 99.44 2.68 93.33 4.70 5.32 10.07 24.81 2.26 11.06 6.63 37.72 24 | 500000 2.06 12.16 1.50 16.72 14.62 17.10 -- -- -- -- 25 | 26 | -------------------- 27 | version 1 28 | -------------------- 29 | 30 | minibatch 25 31 | 32 | size time-mb qps-mb time-tf qps-tf time-tg qps-tg 33 | 100 0.02 1246.75 0.12 208.28 0.10 255.04 34 | 1000 0.02 1144.85 0.18 140.14 0.12 209.09 35 | 10000 0.05 536.48 0.55 45.12 0.31 81.50 36 | 100000 0.32 78.36 4.59 5.44 1.70 14.67 37 | 500000 1.61 15.56 -OOM- -OOM- -OOM- -OOM- 38 | 39 | minibatch 250 40 | 41 | size time qps time-mb qps-mb time-tf qps-tf time-tg qps-tg 42 | 100 3.37 74.11 0.04 6080.78 0.17 1431.98 0.10 2442.12 43 | 1000 3.44 72.73 0.07 3525.55 0.27 916.10 0.10 2423.78 44 | 10000 3.68 67.94 0.29 875.64 1.08 232.03 0.32 780.81 45 | 100000 6.58 38.01 2.61 95.63 10.07 24.83 2.74 91.14 46 | 500000 17.83 14.02 18.09 13.82 -OOM- -OOM- -OOM- -OOM- 47 | -------------------------------------------------------------------------------- /datasets/grid/tfexpt.py: -------------------------------------------------------------------------------- 1 | import time 2 | import tensorflow as tf 3 | 4 | from tensorlog import simple 5 | import expt 6 | 7 | def setup_tlog(maxD,factFile,trainFile,testFile): 8 | tlog = simple.Compiler(db=factFile,prog="grid.ppr") 9 | tlog.prog.db.markAsParameter('edge',2) 10 | tlog.prog.maxDepth = maxD 11 | trainData = tlog.load_small_dataset(trainFile) 12 | testData = tlog.load_small_dataset(testFile) 13 | return (tlog,trainData,testData) 14 | 15 | def trainAndTest(tlog,trainData,testData,epochs): 16 | mode = 'path/io' 17 | predicted_y = tlog.inference(mode) 18 | actual_y = tlog.target_output_placeholder(mode) 19 | correct_predictions = tf.equal(tf.argmax(actual_y,1), tf.argmax(predicted_y,1)) 20 | accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32)) 21 | 22 | unregularized_loss = tlog.loss(mode) 23 | optimizer = tf.train.AdagradOptimizer(1.0) 24 | train_step = optimizer.minimize(unregularized_loss) 25 | 26 | session = tf.Session() 27 | session.run(tf.global_variables_initializer()) 28 | (tx,ty) = trainData[mode] 29 | train_fd = {tlog.input_placeholder_name(mode):tx, tlog.target_output_placeholder_name(mode):ty} 30 | t0 = time.time() 31 | for i in range(epochs): 32 | print('epoch',i+1) 33 | session.run(train_step, feed_dict=train_fd) 34 | print('learning takes',time.time()-t0,'sec') 35 | (ux,uy) = testData[mode] 36 | test_fd = {tlog.input_placeholder_name(mode):ux, tlog.target_output_placeholder_name(mode):uy} 37 | acc = session.run(accuracy, feed_dict=test_fd) 38 | print('test acc',acc) 39 | return acc 40 | 41 | 42 | def runMain(): 43 | (goal,n,maxD,epochs) = expt.getargs() 44 | assert goal=="acc" 45 | (factFile,trainFile,testFile) = expt.genInputs(n) 46 | (tlog,trainData,testData) = setup_tlog(maxD,factFile,trainFile,testFile) 47 | trainAndTest(tlog,trainData,testData,epochs) 48 | 49 | 50 | if __name__=="__main__": 51 | runMain() 52 | -------------------------------------------------------------------------------- /datasets/smokers/scaleup/expt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | 4 | from tensorlog import comline 5 | from tensorlog import declare 6 | from tensorlog import interp 7 | from tensorlog import mutil 8 | 9 | BATCHSIZE=250 10 | 11 | def setExptParams(n): 12 | factFile = 'smoker-%d.cfacts' % n 13 | queryFile = 'query-entities-%d.txt' % n 14 | optdict,args = comline.parseCommandLine(['--prog','smokers.ppr','--proppr','--db',factFile]) 15 | ti = interp.Interp(optdict['prog']) 16 | ti.prog.setRuleWeights() 17 | ti.prog.maxDepth = 99 18 | rows = [] 19 | for line in open(queryFile): 20 | sym = line.strip() 21 | rows.append(ti.db.onehot(sym)) 22 | if len(rows)==BATCHSIZE: 23 | break 24 | X = mutil.stack(rows) 25 | print(len(rows),'queries') 26 | return ti,X,rows 27 | 28 | def runMain(n,minibatch): 29 | 30 | (ti,X,rows) = setExptParams(n) 31 | nQueries = X.shape[0] 32 | start0 = time.time() 33 | for modeString in ["t_stress/io", "t_influences/io","t_cancer_spont/io", "t_cancer_smoke/io"]: 34 | print('eval',modeString, end=' ') 35 | start = time.time() 36 | if minibatch: 37 | ti.prog.eval(declare.asMode(modeString), [X]) 38 | else: 39 | for Xi in rows: 40 | ti.prog.eval(declare.asMode(modeString), [Xi]) 41 | print('time',time.time() - start,'sec') 42 | tot = time.time() - start0 43 | print('batch size',len(rows)) 44 | print('minibatch',minibatch) 45 | print('total query time',tot,'sec') 46 | print('queries/sec',nQueries/tot) 47 | print('%.2f\t%.2f' % (tot,nQueries/tot)) 48 | return tot 49 | 50 | # usage n [no-minibatch] 51 | if __name__=="__main__": 52 | n = 100 53 | if len(sys.argv) > 1: 54 | n = int(sys.argv[1]) 55 | minibatch = True 56 | if len(sys.argv) > 2: 57 | minibatch = False 58 | 59 | t = runMain(n,minibatch) 60 | print('time',t) 61 | -------------------------------------------------------------------------------- /datasets/top-1000-near-google/tfexpt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | import tensorflow as tf 5 | from tensorlog import simple 6 | import expt 7 | 8 | def runMain(): 9 | params = expt.setExptParams() 10 | prog = params['prog'] 11 | tlog = simple.Compiler(db=prog.db, prog=prog, autoset_db_params=False) 12 | train_data = tlog.annotate_big_dataset(params['trainData']) 13 | test_data = tlog.annotate_small_dataset(params['testData']) 14 | result={} 15 | for mode in params['trainData'].modesToLearn(): 16 | print(mode) 17 | loss = tlog.loss(mode) 18 | optimizer = tf.train.AdagradOptimizer(0.1) 19 | train_step = optimizer.minimize(loss) 20 | 21 | session = tf.Session() 22 | session.run(tf.global_variables_initializer()) 23 | t0 = time.time() 24 | epochs = 10 25 | for i in range(epochs): 26 | b = 0 27 | print('epoch',i+1,'of',epochs) 28 | for (_,(TX,TY)) in tlog.minibatches(train_data,batch_size=125): 29 | #print 'epoch',i+1,'of',epochs,'minibatch',b+1 30 | train_fd = {tlog.input_placeholder_name(mode):TX, tlog.target_output_placeholder_name(mode):TY} 31 | session.run(train_step, feed_dict=train_fd) 32 | b += 1 33 | print('learning time',time.time()-t0,'sec') 34 | 35 | predicted_y = tlog.inference(mode) 36 | actual_y = tlog.target_output_placeholder(mode) 37 | correct_predictions = tf.equal(tf.argmax(actual_y,1), tf.argmax(predicted_y,1)) 38 | accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32)) 39 | modestr = str(mode) 40 | if modestr in test_data: 41 | UX,UY = test_data[modestr] 42 | test_fd = {tlog.input_placeholder_name(mode):UX, tlog.target_output_placeholder_name(mode):UY} 43 | acc = session.run(accuracy, feed_dict=test_fd) 44 | print(modestr,'test acc',acc) 45 | result[modestr]= acc 46 | else: print(mode,'not in test set') 47 | return result 48 | 49 | if __name__== "__main__": 50 | accs = runMain() 51 | for mode,acc in list(accs.items()): 52 | print(mode,'acc',acc) 53 | 54 | -------------------------------------------------------------------------------- /datasets/family/expected.txt: -------------------------------------------------------------------------------- 1 | # actual result on Wed May 10 12:50:16 EDT 2017 2 | file tmp-cache/kinship-test.solutions.txt 3 | queries 33 answers 117 labeled answers 117 4 | ============================================================================== 5 | metric auc (AUC): The probability of a positive example scoring higher than a negative example; or the area under the ROC curve 6 | . micro: 0.987878787879 7 | . macro: 0.72381431205 8 | file tmp-cache/kinship-test.tensorlog.tensorflowxcomp.DenseMatDenseMsgCrossCompiler.solutions.txt 9 | queries 33 answers 5973 labeled answers 5973 10 | ============================================================================== 11 | metric auc (AUC): The probability of a positive example scoring higher than a negative example; or the area under the ROC curve 12 | . micro: 0.999776773257 13 | . macro: 0.521047327352 14 | file tmp-cache/kinship-test.tensorlog.tensorflowxcomp.SparseMatDenseMsgCrossCompiler.solutions.txt 15 | queries 33 answers 5973 labeled answers 5973 16 | ============================================================================== 17 | metric auc (AUC): The probability of a positive example scoring higher than a negative example; or the area under the ROC curve 18 | . micro: 0.999776773257 19 | . macro: 0.521047327352 20 | file tmp-cache/kinship-test.tensorlog.theanoxcomp.DenseMatDenseMsgCrossCompiler.solutions.txt 21 | queries 33 answers 5973 labeled answers 5973 22 | ============================================================================== 23 | metric auc (AUC): The probability of a positive example scoring higher than a negative example; or the area under the ROC curve 24 | . micro: 0.999776773257 25 | . macro: 0.521047327352 26 | file tmp-cache/kinship-test.tensorlog.theanoxcomp.SparseMatDenseMsgCrossCompiler.solutions.txt 27 | queries 33 answers 5973 labeled answers 5973 28 | ============================================================================== 29 | metric auc (AUC): The probability of a positive example scoring higher than a negative example; or the area under the ROC curve 30 | . micro: 0.999776773257 31 | . macro: 0.521047327352 32 | -------------------------------------------------------------------------------- /datasets/smokers/raw/cancer-smokes.cfacts: -------------------------------------------------------------------------------- 1 | cancer mataric00sensorymotor 2 | smokes franconi00general 3 | cancer franconi00general 4 | smokes bernstein00data 5 | cancer bernstein00data 6 | smokes 537391 7 | cancer 537391 8 | smokes 530815 9 | cancer 530815 10 | smokes casati01improving 11 | cancer casati01improving 12 | cancer weiss99achieving 13 | smokes calvanese00containment 14 | cancer calvanese00containment 15 | smokes chen01detection 16 | cancer chen01detection 17 | smokes li99multifaceted 18 | cancer li99multifaceted 19 | smokes 249151 20 | cancer 249151 21 | smokes chen00algebraic 22 | cancer chen00algebraic 23 | cancer kotz99mobile 24 | smokes weinstein99agentbased 25 | cancer weinstein99agentbased 26 | smokes florescu99performance 27 | cancer florescu99performance 28 | smokes fegaras98new 29 | cancer fegaras98new 30 | smokes 529138 31 | cancer 529138 32 | smokes 257648 33 | cancer 257648 34 | smokes dzeroski98detecting 35 | cancer dzeroski98detecting 36 | cancer 30237 37 | cancer sandholm01side 38 | cancer heinze99plan 39 | cancer puliafito00map 40 | cancer 267501 41 | smokes abiteboul01representing 42 | cancer abiteboul01representing 43 | smokes 457806 44 | cancer nguyen00active 45 | cancer deloach01analysis 46 | smokes 23799 47 | cancer 23799 48 | smokes 448928 49 | cancer 448928 50 | cancer kowalski98from 51 | cancer 200188 52 | smokes wendler98cbr 53 | smokes mazumdar99achieving 54 | cancer mazumdar99achieving 55 | cancer hindriks00architecture 56 | cancer wermter01emergent 57 | cancer brandt01vicious 58 | cancer 487862 59 | cancer ceska01generating 60 | cancer 451495 61 | cancer jennings95controlling 62 | smokes chaudhri98okbc 63 | cancer murray01specifying 64 | smokes stader98intelligent 65 | smokes breazeal98regulating 66 | smokes layzell98new 67 | smokes muscettola98reformulating 68 | smokes goldberg99coordinating 69 | smokes roumeliotis99circumventing 70 | smokes 44892 71 | smokes degaris99building 72 | smokes tresp98description 73 | smokes hekanaho98dogma 74 | smokes kervrann00level 75 | smokes 89384 76 | smokes olson98probabilistic 77 | smokes 64225 78 | smokes abraham00optimal 79 | smokes jakobi98running 80 | smokes abraham00hybrid 81 | -------------------------------------------------------------------------------- /tensorlog/trytheano.py: -------------------------------------------------------------------------------- 1 | # (C) William W. Cohen and Carnegie Mellon University, 2016 2 | 3 | import theano 4 | import theano.tensor as T 5 | import theano.sparse as S 6 | import theano.sparse.basic as B 7 | from . import matrixdb 8 | import numpy 9 | 10 | def debugVar(v,depth=0,maxdepth=10): 11 | if depth>maxdepth: 12 | print('...') 13 | else: 14 | print('| '*(depth+1), end=' ') 15 | print('var: name',v.name,'type',type(v),'def',theano.pp(v)) 16 | for a in v.get_parents(): 17 | debugApply(a,depth=depth+1,maxdepth=maxdepth) 18 | 19 | def debugApply(a,depth=0,maxdepth=10): 20 | if depth>maxdepth: 21 | print('...') 22 | else: 23 | print('| '*(depth+1), end=' ') 24 | print('apply: ',a,'op',type(a.op),'output types',list(map(type,a.outputs))) 25 | for v in a.inputs: 26 | debugVar(v,depth=depth+1,maxdepth=maxdepth) 27 | 28 | if __name__=="__main__": 29 | 30 | db = matrixdb.MatrixDB.loadFile("test/fam.cfacts") 31 | va = db.onehot('william') 32 | vb = db.onehot('sarah') 33 | 34 | print('a',va) 35 | print('b',vb) 36 | print('shape',va.shape) 37 | 38 | print('f1: s = x*((x+x)+x)') 39 | tx = S.csr_matrix('x') 40 | r1 = B.sp_sum(tx+tx+tx,sparse_grad=True) 41 | s = tx*r1 42 | s.name = 's' 43 | f1 = theano.function(inputs=[tx],outputs=[s]) 44 | w = f1(va) 45 | print(w[0]) 46 | 47 | debugVar(s) 48 | 49 | #print db.rowAsSymbolDict(w[0]) 50 | # 51 | # print 'f2(w=a,c=b)' 52 | # tw = S.csr_matrix('w') #weighter 53 | # tc = S.csr_matrix('c') #constant 54 | # r2 = B.sp_sum(tw*1.7,sparse_grad=True) 55 | # s2 = tc*r2 56 | # f2 = theano.function(inputs=[tw,tc],outputs=[s2]) 57 | # w = f2(va,vb) 58 | # print w[0] 59 | # 60 | print('f3(w=a), b constant') 61 | tw3 = S.csr_matrix('w') #weighter 62 | #y = sparse.CSR(data, indices, indptr, shape) 63 | # tc3 = S.CSR(vb.data, vb.indices, vb.indptr, vb.shape) 64 | # r3 = B.sp_sum(tw3*1.7,sparse_grad=True) 65 | # s3 = tc3*r3 66 | # f3 = theano.function(inputs=[tw3],outputs=[s3]) 67 | # w = f3(va) 68 | # print w[0] 69 | 70 | # debugVar(tw3,maxdepth=5) 71 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcattoy_corpus.cfacts: -------------------------------------------------------------------------------- 1 | hasWord dh a 2 | hasWord dh pricy 3 | hasWord dh doll 4 | hasWord dh house 5 | hasWord ft a 6 | hasWord ft little 7 | hasWord ft red 8 | hasWord ft fire 9 | hasWord ft truck 10 | hasWord rw a 11 | hasWord rw red 12 | hasWord rw wagon 13 | hasWord sc a 14 | hasWord sc pricy 15 | hasWord sc red 16 | hasWord sc sports 17 | hasWord sc car 18 | hasWord bk punk 19 | hasWord bk queen 20 | hasWord bk barbie 21 | hasWord bk and 22 | hasWord bk ken 23 | hasWord rb a 24 | hasWord rb little 25 | hasWord rb red 26 | hasWord rb bike 27 | hasWord mv a 28 | hasWord mv big 29 | hasWord mv 7-seater 30 | hasWord mv minivan 31 | hasWord mv with 32 | hasWord mv an 33 | hasWord mv automatic 34 | hasWord mv transmission 35 | hasWord hs a 36 | hasWord hs big 37 | hasWord hs house 38 | hasWord hs in 39 | hasWord hs the 40 | hasWord hs suburbs 41 | hasWord hs with 42 | hasWord hs crushing 43 | hasWord hs mortgage 44 | hasWord ji a 45 | hasWord ji job 46 | hasWord ji for 47 | hasWord ji life 48 | hasWord ji at 49 | hasWord ji IBM 50 | hasWord tf a 51 | hasWord tf huge 52 | hasWord tf pile 53 | hasWord tf of 54 | hasWord tf tax 55 | hasWord tf forms 56 | hasWord tf due 57 | hasWord tf yesterday 58 | hasWord jm huge 59 | hasWord jm pile 60 | hasWord jm of 61 | hasWord jm junk 62 | hasWord jm mail 63 | hasWord jm bills 64 | hasWord jm and 65 | hasWord jm catalogs 66 | hasWord pb a 67 | hasWord pb pricy 68 | hasWord pb barbie 69 | hasWord pb doll 70 | hasWord yc a 71 | hasWord yc little 72 | hasWord yc yellow 73 | hasWord yc toy 74 | hasWord yc car 75 | hasWord rb2 a 76 | hasWord rb2 red 77 | hasWord rb2 ten 78 | hasWord rb2 speed 79 | hasWord rb2 bike 80 | hasWord rp a 81 | hasWord rp red 82 | hasWord rp convertible 83 | hasWord rp porshe 84 | hasWord bp a 85 | hasWord bp big 86 | hasWord bp pile 87 | hasWord bp of 88 | hasWord bp paperwork 89 | hasWord he a 90 | hasWord he huge 91 | hasWord he backlog 92 | hasWord he of 93 | hasWord he email 94 | hasWord wt a 95 | hasWord wt life 96 | hasWord wt of 97 | hasWord wt woe 98 | hasWord wt and 99 | hasWord wt trouble 100 | -------------------------------------------------------------------------------- /datasets/smokers/expt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import logging 4 | from tensorlog import comline 5 | from tensorlog import declare 6 | from tensorlog import interp 7 | from tensorlog import mutil 8 | from tensorlog import expt 9 | from tensorlog import xctargets 10 | 11 | CROSSCOMPILERS = [] 12 | if xctargets.theano: 13 | from tensorlog import theanoxcomp 14 | for c in [ 15 | theanoxcomp.DenseMatDenseMsgCrossCompiler, 16 | theanoxcomp.SparseMatDenseMsgCrossCompiler 17 | ]: 18 | CROSSCOMPILERS.append(c) 19 | 20 | 21 | modes = ["t_stress/io", "t_influences/io","t_cancer_spont/io", "t_cancer_smoke/io"] 22 | def setExptParams(): 23 | optdict,args = comline.parseCommandLine('--prog smokers.ppr --proppr --db smokers.cfacts'.split()) 24 | ti = interp.Interp(optdict['prog']) 25 | ti.prog.setRuleWeights() 26 | ti.prog.maxDepth = 99 27 | rows = [] 28 | for line in open('query-entities.txt'): 29 | sym = line.strip() 30 | rows.append(ti.db.onehot(sym)) 31 | X = mutil.stack(rows) 32 | return ti,X 33 | 34 | def runMain(): 35 | 36 | (ti,X) = setExptParams() 37 | start0 = time.time() 38 | 39 | for modeString in modes: 40 | print('eval',modeString, end=' ') 41 | start = time.time() 42 | ti.prog.eval(declare.asMode(modeString), [X]) 43 | print('time',time.time() - start,'sec') 44 | tot = time.time() - start0 45 | print('total time',tot,'sec') 46 | return tot 47 | 48 | if __name__=="__main__": 49 | t = runMain() 50 | print('time',t) 51 | 52 | (ti,X) = setExptParams() 53 | for compilerClass in CROSSCOMPILERS: 54 | start0=time.time() 55 | xc = compilerClass(ti.prog) 56 | print(expt.fulltype(xc)) 57 | # compile everything 58 | for modeString in modes: 59 | mode = declare.asMode(modeString) 60 | xc.ensureCompiled(mode) 61 | print('eval',modeString, end=' ') 62 | start = time.time() 63 | xc.inferenceFunction(mode)(X) 64 | print('time',time.time() - start,'sec') 65 | print('total time',expt.fulltype(xc),time.time()-start0,'sec') 66 | 67 | -------------------------------------------------------------------------------- /datasets/top-1000-near-google/testexpt.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tfexpt 3 | import expt 4 | 5 | TF_REFERENCE = { 6 | 'concept_worksfor/io':1.0, 7 | 'concept_atdate/io':0.733333, 8 | 'concept_languageofcountry/io':1.0, 9 | 'concept_personleadsorganization/io':1.0, 10 | 'concept_agriculturalproductcamefromcountry/io':1.0, 11 | 'concept_mutualproxyfor/io':1.0, 12 | 'concept_subpartof/io':1.0, 13 | 'concept_politicianholdsoffice/io':1.0, 14 | 'concept_agentcontrols/io':1.0, 15 | 'concept_agentcollaborateswithagent/io':1.0, 16 | 'concept_teamalsoknownas/io':1.0, 17 | 'concept_citylocatedingeopoliticallocation/io':1.0, 18 | 'concept_automobilemakerdealersincountry/io':1.0, 19 | 'concept_agentactsinlocation/io':1.0, 20 | 'concept_istallerthan/io':1.0, 21 | 'concept_personbelongstoorganization/io':1.0, 22 | 'concept_competeswith/io':1.0, 23 | 'concept_personhasresidenceingeopoliticallocation/io':1.0, 24 | 'concept_atlocation/io':1.0, 25 | 'concept_weaponmadeincountry/io':0.0, 26 | 'concept_automobilemakercardealersinstateorprovince/io':1.0, 27 | 'concept_countrylocatedingeopoliticallocation/io':1.0, 28 | 'concept_productproducedincountry/io':0.0, 29 | 'concept_agentinvolvedwithitem/io':1.0, 30 | 'concept_agentcreated/io':1.0, 31 | 'concept_agentparticipatedinevent/io':1.0, 32 | 'concept_journalistwritesforpublication/io':1.0, 33 | 'concept_hasofficeincity/io':1.0, 34 | 'concept_locationlocatedwithinlocation/io':1.0, 35 | 'concept_subpartoforganization/io':1.0, 36 | 'concept_proxyfor/io':1.0, 37 | 'concept_acquired/io':1.0 38 | } 39 | 40 | class TestNative(unittest.TestCase): 41 | def testIt(self): 42 | acc,loss = expt.runMain() 43 | print("acc",acc) 44 | self.assertTrue(acc >= 0.69) 45 | 46 | class TestAccTF(unittest.TestCase): 47 | def setUp(self): 48 | self.params = tfexpt.setup_tlog() 49 | def testIt(self): 50 | accs = tfexpt.trainAndTest(*self.params) 51 | for mode,acc in list(accs.items()): 52 | self.assertTrue(acc >= TF_REFERENCE[mode],"%s:%g<%g"%(mode,acc,TF_REFERENCE[mode])) 53 | 54 | if __name__=='__main__': 55 | unittest.main() 56 | 57 | -------------------------------------------------------------------------------- /datasets/family/tfexpt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | import tensorflow as tf 5 | from tensorlog import simple 6 | import expt 7 | 8 | def setup_tlog(): 9 | (prog, native_trainData, native_testData) = expt.setExptParams() 10 | tlog = simple.Compiler(db=prog.db, prog=prog, autoset_db_params=False) 11 | train_data = tlog.annotate_big_dataset(native_trainData) 12 | test_data = tlog.annotate_small_dataset(native_testData) 13 | return (tlog,train_data,test_data,native_trainData.modesToLearn()) 14 | 15 | def trainAndTest(tlog,train_data,test_data,modes): 16 | result={} 17 | for mode in modes: 18 | print(mode) 19 | loss = tlog.loss(mode) 20 | optimizer = tf.train.AdagradOptimizer(0.1) 21 | train_step = optimizer.minimize(loss) 22 | session = tf.Session() 23 | session.run(tf.global_variables_initializer()) 24 | t0 = time.time() 25 | epochs = 10 26 | for i in range(epochs): 27 | b = 0 28 | print('epoch',i+1,'of',epochs) 29 | for (_,(TX,TY)) in tlog.minibatches(train_data,batch_size=125): 30 | train_fd = {tlog.input_placeholder_name(mode):TX, tlog.target_output_placeholder_name(mode):TY} 31 | session.run(train_step, feed_dict=train_fd) 32 | b += 1 33 | print('learning time',time.time()-t0,'sec') 34 | 35 | predicted_y = tlog.inference(mode) 36 | actual_y = tlog.target_output_placeholder(mode) 37 | correct_predictions = tf.equal(tf.argmax(actual_y,1),tf.argmax(predicted_y,1)) 38 | accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32)) 39 | mode_str = str(mode) 40 | if mode_str in test_data: 41 | UX,UY = test_data[mode_str] 42 | test_fd = {tlog.input_placeholder_name(mode):UX, tlog.target_output_placeholder_name(mode):UY} 43 | acc = session.run(accuracy, feed_dict=test_fd) 44 | print(mode_str, 'test acc',acc) 45 | result[mode_str] = acc 46 | return result 47 | 48 | def runMain(): 49 | params = setup_tlog() 50 | return trainAndTest(*params) 51 | 52 | if __name__=='__main__': 53 | #params = setup_tlog() 54 | accs = runMain() 55 | for mode,acc in list(accs.items()): 56 | print(mode,'acc',acc) 57 | -------------------------------------------------------------------------------- /datasets/wikimovies/expt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from tensorlog import masterconfig 5 | from tensorlog import expt 6 | from tensorlog import learn 7 | from tensorlog import plearn 8 | from tensorlog import comline 9 | from tensorlog import xctargets 10 | from tensorlog import declare 11 | 12 | CROSSCOMPILERS = [] 13 | CROSSLEARNERS = {} 14 | if xctargets.theano: 15 | from tensorlog import theanoxcomp 16 | for c in [ 17 | theanoxcomp.DenseMatDenseMsgCrossCompiler, 18 | theanoxcomp.SparseMatDenseMsgCrossCompiler 19 | ]: 20 | CROSSCOMPILERS.append(c) 21 | CROSSLEARNERS[c]=theanoxcomp.FixedRateGDLearner 22 | 23 | modeString = 'answer/io' 24 | def setExptParams(num): 25 | db = comline.parseDBSpec('tmp-cache/train-%d.db|inputs/train-%d.cfacts' % (num,num)) 26 | trainData = comline.parseDatasetSpec('tmp-cache/train-%d.dset|inputs/train-%d.exam' % (num,num), db) 27 | testData = comline.parseDatasetSpec('tmp-cache/test-%d.dset|inputs/test-%d.exam' % (num,num), db) 28 | prog = comline.parseProgSpec("theory.ppr",db,proppr=True) 29 | prog.setFeatureWeights() 30 | learner = plearn.ParallelFixedRateGDLearner(prog,regularizer=learn.L2Regularizer(),parallel=5,epochs=10) 31 | return {'prog':prog, 32 | 'trainData':trainData, 33 | 'testData':testData, 34 | 'targetMode':modeString, 35 | 'savedModel':'learned-model.db', 36 | 'learner':learner 37 | } 38 | 39 | def runMain(num=250): 40 | if not os.path.exists("tmp-cache"): os.mkdir("tmp-cache") 41 | logging.basicConfig(level=logging.INFO) 42 | masterconfig.masterConfig().matrixdb.allow_weighted_tuples=False 43 | params = setExptParams(num) 44 | return expt.Expt(params).run() 45 | 46 | if __name__=="__main__": 47 | acc,loss = runMain() # expect 0.21,0.22 48 | print('acc,loss',acc,loss) 49 | params = setExptParams(250) 50 | for compilerClass in CROSSCOMPILERS: 51 | xc = compilerClass(params['prog']) 52 | print(expt.fulltype(xc)) 53 | # compile everything 54 | mode = declare.asMode(modeString) 55 | xc.ensureCompiled(mode) 56 | learner = CROSSLEARNERS[compilerClass](params['prog'],xc) 57 | params.update({ 58 | 'savedModel':'learned-model.%s.db' % (expt.fulltype(xc)), 59 | 'learner':learner, 60 | }) 61 | print('acc,loss',expt.Expt(params).run()) 62 | -------------------------------------------------------------------------------- /datasets/grid/Notes.txt: -------------------------------------------------------------------------------- 1 | depth n/2 2 | 3 | python tfintegration.py --repeat 10 --corner soft 4 | accs [0.8125, 0.84615386, 0.87878788, 0.69565219, 0.9375, 0.81481481, 0.93103451, 0.8974359, 0.89655173, 0.86842108] average 0.857885193825 5 | 6 | python tfintegration.py --repeat 10 --corner hard 7 | accs [1.0, 1.0, 1.0, 0.0, 0.96774191, 1.0, 0.97297299, 1.0, 1.0, 1.0] average 0.894071489573 8 | 9 | python tfintegration.py --repeat 10 --corner hard --edge learned_embedding 10 | accs [1.0, 1.0, 0.97222221, 0.93103451, 1.0, 0.93023258, 1.0, 1.0, 0.94444442, 1.0] average 0.977793371677 11 | 12 | python tfintegration.py --repeat 10 --corner soft --edge learned_embedding 13 | accs [0.0, 0.74074072, 0.0, 0.0, 0.0, 0.0, 0.7368421, 0.2, 0.0, 0.0] average 0.167758281529 14 | 15 | python tfintegration.py --repeat 10 --corner hard --edge fixed_embedding --epochs 0 16 | accs [0.61764705, 0.55172414, 0.64102566, 0.53571427, 0.70833331, 0.5, 0.56410259, 0.58064514, 0.69230771, 0.5] average 0.589149987698 17 | 18 | depth n, 100 epochs 19 | 20 | python tfintegration.py --repeat 10 --corner hard --edge learned_embedding 21 | accs [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] average 1.0 22 | 23 | python tfintegration.py --repeat 10 --corner hard 24 | accs [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] average 1.0 25 | 26 | python tfintegration.py --repeat 10 --corner soft 27 | accs [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] average 0.0 28 | -------------------- 29 | 30 | Multiclass STOP_AT 1.0 31 | python multiclass.py --repeat 10 --multiclass True 32 | accs [0.98998785, 0.98160452, 0.99004853, 0.98560429, 0.99500692, 0.9974829, 0.98486578, 0.98105609, 0.98978025, 0.99244875] average 0.988788586855 33 | stopped_at [575, 572, 548, 608, 557, 566, 560, 590, 632, 608] average 581 34 | 35 | 36 | Timing results: 37 | python generate.py 16 g16.cfacts 38 | python grid-expt.py g16.cfacts n_1_1 39 | depth 4 time 0.00571799278259 sec 40 | depth 8 time 0.00844502449036 sec 41 | depth 16 time 0.014888048172 sec 42 | depth 32 time 0.0299441814423 sec 43 | depth 64 time 0.0547881126404 sec 44 | depth 99 time 0.0845830440521 sec 45 | 46 | python generate.py 64 g64.cfacts 47 | python grid-expt.py g64.cfacts n_1_1 48 | depth 4 time 0.00711989402771 sec 49 | depth 8 time 0.00954079627991 sec 50 | depth 16 time 0.0163309574127 sec 51 | depth 32 time 0.0326619148254 sec 52 | depth 64 time 0.0765111446381 sec 53 | depth 99 time 0.130023002625 sec 54 | -------------------------------------------------------------------------------- /datasets/smokers/query-entities.txt: -------------------------------------------------------------------------------- 1 | mataric00sensorymotor 2 | ng01stable 3 | 524726 4 | heinonen96www 5 | bongard00legion 6 | franconi00general 7 | 373513 8 | bernstein00data 9 | kokku02halfpipe 10 | 240151 11 | 384644 12 | schnattinger98qualitybased 13 | 537391 14 | 530815 15 | wu01towards 16 | casati01improving 17 | weiss99achieving 18 | calvanese00containment 19 | chen01detection 20 | mcgowan02who 21 | li99multifaceted 22 | billinghurst98wearable 23 | wills01open 24 | 306088 25 | 249151 26 | kubiatowicz00oceanstore 27 | kargupta01mobimine 28 | chen00algebraic 29 | 266462 30 | kotz99mobile 31 | 382301 32 | faensen01hermes 33 | weinstein99agentbased 34 | florescu99performance 35 | fegaras98new 36 | pant02myspiders 37 | 529138 38 | 257648 39 | dzeroski98detecting 40 | 30237 41 | sandholm01side 42 | heinze99plan 43 | puliafito00map 44 | 267501 45 | abiteboul01representing 46 | he00comparative 47 | inza99feature 48 | meng99estimating 49 | 510049 50 | 457806 51 | feiner99wearing 52 | santini99similarity 53 | 208788 54 | smola01regularized 55 | pagonis99evolving 56 | cruz99preserving 57 | 7785 58 | nguyen00active 59 | deloach01analysis 60 | 23799 61 | 448928 62 | harel99workload 63 | kowalski98from 64 | glover99recommending 65 | 200188 66 | 366169 67 | wendler98cbr 68 | 316064 69 | wache01ontologybased 70 | 535112 71 | mazumdar99achieving 72 | driessens01speeding 73 | 206655 74 | morse00collablogger 75 | hindriks00architecture 76 | 311589 77 | wermter01emergent 78 | brandt01vicious 79 | 457393 80 | 487862 81 | dam01antitonic 82 | ceska01generating 83 | gellersen00adding 84 | 451495 85 | castillo99gpropii 86 | 539420 87 | jennings95controlling 88 | 442160 89 | tsoumakas01fuzzy 90 | wisneski98ambient 91 | wess94casebased 92 | chaudhri98okbc 93 | murray01specifying 94 | tsap01feedback 95 | fjeld99designing 96 | rizzo97personalitydriven 97 | knight00system 98 | sreerupa98dynamic 99 | mannila99prediction 100 | 508859 101 | stader98intelligent 102 | breazeal98regulating 103 | layzell98new 104 | reitmayr01mobile 105 | aha91casebased 106 | aha97casebased 107 | muscettola98reformulating 108 | goldberg99coordinating 109 | roumeliotis99circumventing 110 | 44892 111 | degaris99building 112 | tresp98description 113 | hekanaho98dogma 114 | kervrann00level 115 | 89384 116 | olson98probabilistic 117 | 64225 118 | abraham00optimal 119 | jakobi98running 120 | abraham00hybrid 121 | -------------------------------------------------------------------------------- /datasets/smokers/scaleup/gen.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import random as r 3 | 4 | BATCHSIZE = 250 5 | 6 | # generates a graph with four weakly-connected subcommunities, each a 7 | # barabasi_albert graph, with each cluster having different labels 8 | 9 | if __name__ == "__main__": 10 | seeds = [8347342984,901891212,1929282,73454129] 11 | for n in [100,1000,10000,100000,500000]: 12 | print('generate for n',n,'...') 13 | with open('query-entities-%d.txt' % n,'w') as fp: 14 | for tag in 'a b c d'.split(): 15 | for m in range(BATCHSIZE): 16 | fp.write('%s%d\n' % (tag,r.randint(0,n-1))) 17 | with open('smoker-%d.cfacts' % n,'w') as fp: 18 | fp.write('\t'.join(['const','yes']) + '\n') 19 | fp.write('\t'.join(['const','no']) + '\n') 20 | for k in range(1,9): 21 | fp.write('\t'.join(['rule','r%d' % k]) + '\n') 22 | g1 = nx.barabasi_albert_graph(n,5,seeds[0]) 23 | g2 = nx.barabasi_albert_graph(n,5,seeds[1]) 24 | g3 = nx.barabasi_albert_graph(n,5,seeds[2]) 25 | g4 = nx.barabasi_albert_graph(n,5,seeds[3]) 26 | for tag,g in [('a',g1),('b',g2),('c',g3),('d',g4)]: 27 | for (i,j) in g.edges(): 28 | fp.write('\t'.join(['friends','%s%d' % (tag,i),'%s%d' % (tag,j)]) + '\n') 29 | fp.write('\t'.join(['friends','%s%d' % (tag,j),'%s%d' % (tag,i)]) + '\n') 30 | # insert cross-cluster edges 31 | for m in range(25): 32 | for tag1 in 'a b c'.split(): 33 | for tag2 in 'a b c'.split(): 34 | i = r.randint(0,n) 35 | j = r.randint(0,n) 36 | fp.write('\t'.join(['friends','%s%d' % (tag1,i),'%s%d' % (tag2,j)]) + '\n') 37 | fp.write('\t'.join(['friends','%s%d' % (tag2,j),'%s%d' % (tag1,i)]) + '\n') 38 | # g1 -C -S 39 | # g2 +C -S 40 | # g3 -C +S 41 | # g4 +C +S 42 | for tag,g in [('a',g1),('b',g2),('c',g3),('d',g4)]: 43 | for i in g.nodes(): 44 | fp.write('\t'.join(['person','%s%d' % (tag,i)]) + '\n') 45 | for tag,g in [('b',g2),('d',g4)]: 46 | for i in g.nodes(): 47 | fp.write('\t'.join(['cancer','%s%d' % (tag,i)]) + '\n') 48 | for tag,g in [('c',g3),('d',g4)]: 49 | for i in g.nodes(): 50 | fp.write('\t'.join(['smoker','%s%d' % (tag,i)]) + '\n') 51 | 52 | -------------------------------------------------------------------------------- /tensorlog/learnxcomp.py: -------------------------------------------------------------------------------- 1 | from . import learn as L 2 | import time 3 | import sys 4 | import logging 5 | 6 | class XLearner(L.Learner): 7 | def __init__(self,prog,xc,compilerClass=None,regularizer=None,tracer=None,epochTracer=None): 8 | super(XLearner,self).__init__(prog,regularizer=regularizer,tracer=tracer,epochTracer=epochTracer) 9 | if xc: self.xc = xc 10 | else: self.xc = compilerClass(prog.db) 11 | def predict(self,mode,X,pad=None): 12 | """Make predictions on a data matrix associated with the given mode.""" 13 | logging.debug("XLearner predict %s"%mode) 14 | try: 15 | inferenceFun = self.xc.inferenceFunction(mode) 16 | result = inferenceFun(X) 17 | except: 18 | print("tlogfun:","\n".join(self.xc.ws.tensorlogFun.pprint())) 19 | raise 20 | return result 21 | def crossEntropyGrad(self,mode,X,Y,tracerArgs={},pad=None): 22 | """Compute the parameter gradient associated with softmax 23 | normalization followed by a cross-entropy cost function. If a 24 | scratchpad is passed in, then intermediate results of the 25 | gradient computation will be saved on that scratchpad. 26 | """ 27 | gradFun = self.xc.dataLossGradFunction(mode) 28 | paramsWithUpdates = gradFun(X,Y) 29 | return paramsWithUpdates 30 | def applyUpdate(self, paramGrads, rate): 31 | assert "Cross-compilers don't apply updates" 32 | def meanUpdate(self, functor, arity, delta, n, totalN=0): 33 | assert "Cross-compilers don't do mean updates" 34 | def train(self,dset): 35 | assert False, 'abstract method called' 36 | 37 | class BatchEpochsLearner(XLearner): 38 | def __init__(self,prog,xc,epochs=10,compilerClass=None,regularizer=None,tracer=None,epochTracer=None): 39 | super(BatchEpochsLearner,self).__init__(prog,xc,compilerClass=compilerClass,regularizer=regularizer,tracer=tracer,epochTracer=epochTracer) 40 | self.epochs=epochs 41 | def trainMode(self,mode,X,Y,epochs=-1): 42 | assert False, 'abstract method called' 43 | def train(self,dset): 44 | trainStartTime = time.time() 45 | modes = dset.modesToLearn() 46 | numModes = len(modes) 47 | for i in range(self.epochs): 48 | startTime = time.time() 49 | for j,mode in enumerate(dset.modesToLearn()): 50 | args = {'i':i,'startTime':startTime,'mode':str(mode)} 51 | try: 52 | self.trainMode(mode,dset.getX(mode),dset.getY(mode),epochs=1) 53 | except: 54 | print("Unexpected error at %s:" % str(args), sys.exc_info()[:2]) 55 | raise 56 | #self.epochTracer(self,) 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcattoy2.cfacts: -------------------------------------------------------------------------------- 1 | # training and test corpus for a toy text categorization problem 2 | 3 | hasWord dh a 4 | hasWord dh pricy 5 | hasWord dh doll 6 | hasWord dh house 7 | hasWord ft a 8 | hasWord ft little 9 | hasWord ft red 10 | hasWord ft fire 11 | hasWord ft truck 12 | hasWord rw a 13 | hasWord rw red 14 | hasWord rw wagon 15 | hasWord sc a 16 | hasWord sc pricy 17 | hasWord sc red 18 | hasWord sc sports 19 | hasWord sc car 20 | hasWord bk punk 21 | hasWord bk queen 22 | hasWord bk barbie 23 | hasWord bk and 24 | hasWord bk ken 25 | hasWord rb a 26 | hasWord rb little 27 | hasWord rb red 28 | hasWord rb bike 29 | hasWord mv a 30 | hasWord mv big 31 | hasWord mv 7-seater 32 | hasWord mv minivan 33 | hasWord mv with 34 | hasWord mv an 35 | hasWord mv automatic 36 | hasWord mv transmission 37 | hasWord hs a 38 | hasWord hs big 39 | hasWord hs house 40 | hasWord hs in 41 | hasWord hs the 42 | hasWord hs suburbs 43 | hasWord hs with 44 | hasWord hs crushing 45 | hasWord hs mortgage 46 | hasWord ji a 47 | hasWord ji job 48 | hasWord ji for 49 | hasWord ji life 50 | hasWord ji at 51 | hasWord ji IBM 52 | hasWord tf a 53 | hasWord tf huge 54 | hasWord tf pile 55 | hasWord tf of 56 | hasWord tf tax 57 | hasWord tf forms 58 | hasWord tf due 59 | hasWord tf yesterday 60 | hasWord jm huge 61 | hasWord jm pile 62 | hasWord jm of 63 | hasWord jm junk 64 | hasWord jm mail 65 | hasWord jm bills 66 | hasWord jm and 67 | hasWord jm catalogs 68 | hasWord pb a 69 | hasWord pb pricy 70 | hasWord pb barbie 71 | hasWord pb doll 72 | hasWord yc a 73 | hasWord yc little 74 | hasWord yc yellow 75 | hasWord yc toy 76 | hasWord yc car 77 | hasWord rb2 a 78 | hasWord rb2 red 79 | hasWord rb2 10 80 | hasWord rb2 speed 81 | hasWord rb2 bike 82 | hasWord rp a 83 | hasWord rp red 84 | hasWord rp convertible 85 | hasWord rp porshe 86 | hasWord bp a 87 | hasWord bp big 88 | hasWord bp pile 89 | hasWord bp of 90 | hasWord bp paperwork 91 | hasWord he a 92 | hasWord he huge 93 | hasWord he backlog 94 | hasWord he of 95 | hasWord he email 96 | hasWord wt a 97 | hasWord wt life 98 | hasWord wt of 99 | hasWord wt woe 100 | hasWord wt and 101 | hasWord wt trouble 102 | 103 | label pos 104 | label neg 105 | 106 | train dh pos 107 | train ft pos 108 | train rw pos 109 | train sc pos 110 | train bk pos 111 | train rb pos 112 | train mv neg 113 | train hs neg 114 | train ji neg 115 | train tf neg 116 | train jm neg 117 | 118 | test pb pos 119 | test yc pos 120 | test rb2 pos 121 | test rp pos 122 | test bp neg 123 | test he neg 124 | test wt neg 125 | -------------------------------------------------------------------------------- /tensorlog/trylearn.py: -------------------------------------------------------------------------------- 1 | # working on a script now to compute the gradient and do learning. 2 | # still some problems with type coercion in the interface between 3 | # weights and theano.... 4 | 5 | # d = scipy.sparse.spdiags(x,0,n,n) 6 | # returns a dia_matrix 7 | # m.getnnz() 8 | # d = scipy.sparse.spdiags(x,0,15,15,format='coo') 9 | # matrices are often coerced to csr 10 | 11 | #native mode seems to work for rows and matrices 12 | #theano does not 13 | 14 | #basic.py 15 | # def sp_sum(x, axis=None, sparse_grad=False): 16 | # def mul(x, y): 17 | 18 | NATIVE=False 19 | 20 | import tensorlog 21 | 22 | import theano 23 | import theano.tensor as T 24 | import theano.sparse as S 25 | import theano.sparse.basic as B 26 | import scipy.sparse 27 | 28 | def loadExamples(file,db): 29 | xs = [] 30 | ys = [] 31 | for line in util.linesIn(file): 32 | sx,sy = line.strip().split("\t") 33 | xs.append(db.onehot(sx)) 34 | ys.append(db.onehot(sy)) 35 | return xs,ys 36 | 37 | # 38 | # set up the program 39 | # 40 | 41 | p = tensorlog.ProPPRProgram.load(["test-data/textcat.ppr","test-data/textcattoy.cfacts"]) 42 | p.setWeights(p.db.ones()) 43 | p.listing() 44 | 45 | # 46 | # load the data 47 | # 48 | 49 | xs,ys = loadExamples("test-data/textcattoy-train.examples",p.db) 50 | 51 | #returns inputs and outputs that are used to build the prediction 52 | #function 53 | mode = tensorlog.ModeDeclaration('predict(i,o)') 54 | ins,outs = p.theanoPredictExpr(mode,['x']) 55 | scorex = outs[0] #the actual score vector for x 56 | 57 | # something simple to try differentiating 58 | toyLoss = B.sp_sum(scorex,sparse_grad=True) 59 | print('gradToyLoss...') 60 | gradToyLoss = T.grad(toyLoss, p.getParamList()) 61 | 62 | 63 | # 64 | # now define a theano function that computes loss for ONE example 65 | # 66 | y = S.csr_matrix('y') 67 | prob = scorex * (1.0/B.sp_sum(scorex, sparse_grad=True)) #scale x to 0-1 68 | loss = B.sp_sum(-y * B.structured_log(prob),sparse_grad=True) #cross-entropy loss 69 | print('loss...') 70 | theano.printing.debugprint(loss) 71 | lossFun = theano.function(inputs=[ins[0],y], outputs=[loss]) 72 | 73 | # 74 | # test one one example 75 | # 76 | lossFunResult = lossFun(xs[0],ys[0]) 77 | print('loss on example 0:',lossFunResult[0]) 78 | 79 | # 80 | # compute gradient 81 | # 82 | 83 | #this is where things fail now 84 | # File "/Library/Python/2.7/site-packages/theano/gradient.py", line 1262, in access_grad_cache 85 | # str(node.op), term.ndim, var.ndim)) 86 | #ValueError: MulSD.grad returned a term with 2 dimensions, but 0 are required. 87 | # 88 | 89 | print('gradLoss...') 90 | gradLoss = T.grad(loss, p.getParamList()) 91 | -------------------------------------------------------------------------------- /datasets/grid/demo.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from tensorlog import simple 4 | import expt 5 | 6 | def runMain(): 7 | # generate the data for a 10-by-10 grid 8 | (factFile,trainFile,testFile) = expt.genInputs(16) 9 | 10 | # generate the rules - for transitive closure 11 | b = simple.Builder() 12 | path,edge = b.predicates("path,edge") 13 | X,Y,Z = b.variables("X,Y,Z") 14 | b.rules += path(X,Y) <= edge(X,Y) 15 | b.rules += path(X,Y) <= edge(X,Z) & path(Z,Y) 16 | 17 | # construct a Compiler object 18 | tlog = simple.Compiler(db=factFile,prog=b.rules) 19 | 20 | # configure the database so that edge weights are a parameter 21 | tlog.prog.db.markAsParameter('edge',2) 22 | # configure the program so that maximum recursive depth is 16 23 | tlog.prog.maxDepth = 16 24 | 25 | # compile the rules, plus a query mode, into the inference function 26 | # we want to optimize - queries of the form {Y:path(x,Y)} where x is 27 | # a given starting point in the grid (an input) and Y is an output 28 | mode = 'path/io' 29 | predicted_y = tlog.inference(mode) 30 | 31 | # when we ask for an inference function, Tensorlog also compiles a 32 | # loss function. ask for the placeholder used to hold the desired 33 | # output when we're computing loss, and use that to define an 34 | # accuracy metric, for testing 35 | actual_y = tlog.target_output_placeholder(mode) 36 | correct_predictions = tf.equal(tf.argmax(actual_y,1), tf.argmax(predicted_y,1)) 37 | accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32)) 38 | 39 | # now get up the loss used in learning from the compiler and set up 40 | # a learner for it 41 | unregularized_loss = tlog.loss(mode) 42 | optimizer = tf.train.AdagradOptimizer(1.0) 43 | train_step = optimizer.minimize(unregularized_loss) 44 | 45 | # set up the session 46 | session = tf.Session() 47 | session.run(tf.global_variables_initializer()) 48 | 49 | # load the training and test data 50 | trainData = tlog.load_small_dataset(trainFile) 51 | testData = tlog.load_small_dataset(testFile) 52 | 53 | # run the optimizer for 20 epochs 54 | (tx,ty) = trainData[mode] 55 | train_fd = {tlog.input_placeholder_name(mode):tx, tlog.target_output_placeholder_name(mode):ty} 56 | for i in range(20): 57 | session.run(train_step, feed_dict=train_fd) 58 | print('epoch',i+1,'train loss and accuracy',session.run([unregularized_loss,accuracy], feed_dict=train_fd)) 59 | 60 | # test performance 61 | (ux,uy) = testData[mode] 62 | test_fd = {tlog.input_placeholder_name(mode):ux, tlog.target_output_placeholder_name(mode):uy} 63 | acc = session.run(accuracy, feed_dict=test_fd) 64 | 65 | print('test acc',acc) 66 | return acc 67 | 68 | if __name__=="__main__": 69 | runMain() 70 | -------------------------------------------------------------------------------- /datasets/family/inputs/kinship.cfacts: -------------------------------------------------------------------------------- 1 | son alfonso marco 2 | aunt angela alfonso 3 | daughter sophia lucia 4 | mother francesca angela 5 | father marco alfonso 6 | husband marco lucia 7 | uncle tomaso sophia 8 | aunt gina sophia 9 | brother marco angela 10 | sister sophia alfonso 11 | niece sophia angela 12 | wife francesca pierro 13 | wife gina emilio 14 | son alfonso lucia 15 | sister lucia emilio 16 | son marco francesca 17 | niece sophia gina 18 | daughter sophia marco 19 | niece sophia emilio 20 | mother maria emilio 21 | husband roberto maria 22 | uncle emilio alfonso 23 | mother maria lucia 24 | daughter angela pierro 25 | father roberto emilio 26 | aunt gina alfonso 27 | wife lucia marco 28 | father roberto lucia 29 | wife angela tomaso 30 | daughter lucia maria 31 | sister angela marco 32 | husband tomaso angela 33 | niece sophia tomaso 34 | mother francesca marco 35 | mother lucia sophia 36 | daughter lucia roberto 37 | father marco sophia 38 | brother alfonso sophia 39 | husband emilio gina 40 | son emilio roberto 41 | father pierro angela 42 | wife maria roberto 43 | son marco pierro 44 | father pierro marco 45 | nephew alfonso tomaso 46 | nephew alfonso gina 47 | uncle emilio sophia 48 | brother emilio lucia 49 | nephew alfonso angela 50 | husband pierro francesca 51 | uncle tomaso alfonso 52 | sister victoria arthur 53 | daughter jennifer andrew 54 | niece charlotte charles 55 | mother christine jennifer 56 | mother penelope victoria 57 | nephew colin margaret 58 | wife penelope christopher 59 | daughter charlotte james 60 | aunt jennifer charlotte 61 | husband christopher penelope 62 | daughter victoria christopher 63 | nephew colin jennifer 64 | brother colin charlotte 65 | nephew colin arthur 66 | mother victoria charlotte 67 | niece charlotte arthur 68 | uncle charles colin 69 | mother victoria colin 70 | daughter victoria penelope 71 | niece charlotte margaret 72 | husband andrew christine 73 | son james andrew 74 | aunt margaret charlotte 75 | son colin victoria 76 | father james charlotte 77 | son colin james 78 | son james christine 79 | husband charles jennifer 80 | mother penelope arthur 81 | daughter jennifer christine 82 | father james colin 83 | wife victoria james 84 | uncle charles charlotte 85 | son arthur penelope 86 | father andrew james 87 | father christopher arthur 88 | father christopher victoria 89 | father andrew jennifer 90 | daughter charlotte victoria 91 | wife christine andrew 92 | husband arthur margaret 93 | wife jennifer charles 94 | sister charlotte colin 95 | niece charlotte jennifer 96 | aunt jennifer colin 97 | aunt margaret colin 98 | uncle arthur charlotte 99 | son arthur christopher 100 | husband james victoria 101 | brother arthur victoria 102 | brother james jennifer 103 | nephew colin charles 104 | -------------------------------------------------------------------------------- /tensorlog/declare.py: -------------------------------------------------------------------------------- 1 | # (C) William W. Cohen and Carnegie Mellon University, 2016 2 | 3 | # mode declarations for Tensorlog (and eventually type declarations) 4 | 5 | from tensorlog import parser 6 | 7 | def asMode(spec): 8 | """Convert strings like "foo(i,o)" or "foo/io" to ModeDeclarations. 9 | Or, if given a ModeDeclaration object, return that object. 10 | """ 11 | if type(spec)==type("") and spec.find("/")>=0: 12 | functor,rest = spec.split("/") 13 | return ModeDeclaration(parser.Goal(functor,list(rest))) 14 | elif type(spec)==type(""): 15 | return ModeDeclaration(spec) 16 | else: 17 | return spec 18 | 19 | class AbstractDeclaration(object): 20 | """Mode - or later - type - declaration for a predicate.""" 21 | def __init__(self,goal): 22 | if type(goal)==type(""): 23 | goal = parser.Parser().parseGoal(goal) 24 | self.prototype = goal 25 | self._key = str(goal) 26 | def args(self): 27 | return self.prototype.args 28 | def arg(self,i): 29 | return self.prototype.args[i] 30 | def getArity(self): 31 | return self.prototype.arity 32 | def getFunctor(self): 33 | return self.prototype.functor 34 | arity = property(getArity) 35 | functor = property(getFunctor) 36 | def __str__(self): 37 | return str(self.prototype) 38 | def __repr__(self): 39 | return repr(self.prototype) 40 | def __hash__(self): 41 | return hash(self._key) 42 | def __eq__(self,other): 43 | return other and isinstance(other,AbstractDeclaration) and self._key == other._key 44 | 45 | class ModeDeclaration(AbstractDeclaration): 46 | """Declare a mode with a goal, eg hasWord(i1,o). Arguments starting 47 | with 'i' (respectively 'o') are inputs (outputs), and arguments 48 | ending with '1' are one-hot encodings, aka singleton sets. 49 | """ 50 | def __init__(self,goal,strict=True): 51 | super(ModeDeclaration,self).__init__(goal) 52 | if strict: 53 | for a in self.prototype.args: 54 | assert a=='i' or a=='o','arguments to a ModeDeclaration should be "i" or "o" (not %s for mode %r)' % (a,self.prototype) 55 | def isInput(self,i): 56 | return self.arg(i)=='i' 57 | def isOutput(self,i): 58 | return self.arg(i)=='o' 59 | def isConst(self,i): 60 | return not self.isInput(i) and not self.isOutput(i) 61 | def __str__(self): 62 | return self.functor + "/" + "".join(self.prototype.args) 63 | 64 | class TypeDeclaration(AbstractDeclaration): 65 | """Declare allowed types for a goal, eg hasWord(doc,word). 66 | """ 67 | def __init__(self,goal): 68 | super(TypeDeclaration,self).__init__(goal) 69 | def getType(self,i): 70 | return self.arg(i) 71 | def typeSet(self): 72 | return set(self.prototype.args) 73 | -------------------------------------------------------------------------------- /tensorlog/version.py: -------------------------------------------------------------------------------- 1 | # (C) William W. Cohen and Carnegie Mellon University, 2016 2 | # 3 | # version number tracking for Tensorlog 4 | 5 | VERSION = '1.3.6' 6 | 7 | # externally visible changes: 8 | # 9 | # version 1.0: refactored cleaned-up version of nips-submission codebase 10 | # version 1.1: thread-safe gradient and eval computation, and a parallel learner 11 | # version 1.1.1: cleaned up trace function api, bug fix in parallel learner 12 | # version 1.1.2: tracer output is not per example, no parallel option in funs 13 | # version 1.2.0: not sure, really. 14 | # version 1.2.1: plearn replaces epoch-level status monitoring with merged results minibatches 15 | # version 1.2.2: add learner option to expt command-line 16 | # version 1.2.3: 17 | # add p(X,Y) :- ... {foo(F): q(X,F)} templates, propprProg.setRuleWeights(), propprProg.setFeatureWeights() 18 | # list --prog xxx --ruleids 19 | # more options for expt 20 | # version 1.2.4: 21 | # added --params and --weightEpsilon to expt.py 22 | # made conf.ops.long_trace a number 23 | # added interp.set() 24 | # version 1.2.5: 25 | # cross-compilation 26 | # version 1.3.0: 27 | # tensorlog is a module 28 | # new api for cross compilers + "simple" api 29 | # type declaration in cfacts: # :- actedIn(actor_t,movie_t) 30 | # parameter declarations: # :- trainable(posWeight,1) 31 | # OOV marker for test/train .exam files 32 | # interp.Interp split off from program 33 | # version 1.3.1: 34 | # simple.Compiler() fleshed out and tested for tensorflow 35 | # version 1.3.1a: 36 | # AbstractCrossCompiler.possibleOps() added 37 | # version 1.3.2: 38 | # binary user-defined plugins, eg 39 | # plugins = program.Plugins() 40 | # plugins.define('double/io', lambda x:2*x, lambda inputType:inputType) 41 | # prog = program.ProPPRProgram(rules=rules,db=db,plugins=plugins) 42 | # simple.RuleBuilder 43 | # version 1.3.3: 44 | # split of version.py into different file 45 | # refactored schema 46 | # simple.RuleBuilder -> simple.Builder 47 | # version 1.3.4: 48 | # bug fix in type inference 49 | # new serialization and use of file-like objects for load* methods 50 | # db.schema.serializeTo(filelike) 51 | # db.serializeDataTo(filelike,filter=None|params|noparams) 52 | # db.importSerializeDataFrom(filelike) 53 | # pythonic syntax for parser (automatic for files with extension .tlog) 54 | # version 1.3.5: 55 | # api typo fix: db.importSerializeDataFrom(filelike) -> importSerializedDataFrom(filelike) 56 | # simple.compiler.load_xxx_examples can be passed file-like objects 57 | # builder rules will print in pythonic syntax 58 | # now working 59 | # builder.db = "dbspec" 60 | # builder.db += "file" 61 | # bug fixes for db += file, remove matrixdb.addFile which doesn't work well 62 | # version 1.3.6: 63 | # program.serialize, Program.deserialize 64 | # datasets/socialgraphs 65 | 66 | -------------------------------------------------------------------------------- /datasets/top-1000-near-google/expt.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import sys 3 | import scipy.sparse as SS 4 | import scipy.io 5 | 6 | from tensorlog import expt 7 | from tensorlog import comline 8 | from tensorlog import learn 9 | from tensorlog import ops 10 | import logging 11 | 12 | 13 | 14 | from tensorlog import xctargets 15 | 16 | CROSSCOMPILERS = [] 17 | CROSSLEARNERS = {} 18 | if xctargets.theano: 19 | from tensorlog import theanoxcomp 20 | for c in [ 21 | theanoxcomp.DenseMatDenseMsgCrossCompiler, 22 | theanoxcomp.SparseMatDenseMsgCrossCompiler 23 | ]: 24 | CROSSCOMPILERS.append(c) 25 | CROSSLEARNERS[c]=theanoxcomp.FixedRateGDLearner 26 | 27 | stem = "top-1000-near-google" 28 | 29 | def setExptParams(): 30 | if not os.path.exists("tmp-cache"): os.mkdir("tmp-cache") 31 | db = comline.parseDBSpec('tmp-cache/{stem}.db|{stem}.cfacts:{stem}-fact.cfacts:{stem}-rule.cfacts'.format(stem=stem)) 32 | trainData = comline.parseDatasetSpec('tmp-cache/{stem}-train.dset|raw/{stem}.train.examples'.format(stem=stem), db) 33 | testData = comline.parseDatasetSpec('tmp-cache/{stem}-test.dset|raw/{stem}.test.examples'.format(stem=stem), db) 34 | prog = comline.parseProgSpec("{stem}-recursive.ppr".format(stem=stem),db,proppr=True) 35 | prog.setRuleWeights() 36 | prog.maxDepth=4 37 | learner = learn.FixedRateGDLearner(prog,epochs=5) 38 | return {'prog':prog, 39 | 'trainData':trainData, 40 | 'testData':testData, 41 | 'savedModel':'%s-trained.db' % stem, 42 | 'savedTestPredictions':'tmp-cache/%s-test.solutions.txt' % stem, 43 | 'savedTrainExamples':'tmp-cache/%s-train.examples' % stem, 44 | 'savedTestExamples':'tmp-cache/%s-test.examples' % stem, 45 | 'learner':learner 46 | } 47 | 48 | def runMain(cross=False): 49 | logging.basicConfig(level=logging.INFO) 50 | params = setExptParams() 51 | result=[] 52 | result.append(expt.Expt(params).run()) 53 | if cross: 54 | for compilerClass in CROSSCOMPILERS: 55 | xc = compilerClass(prog) 56 | print(expt.fulltype(xc)) 57 | 58 | # compile everything 59 | #problem = declare.asMode('concept_politicianusendorsedbypoliticianus/io') 60 | for mode in params['trainData'].modesToLearn(): 61 | xc.ensureCompiled(mode) 62 | learner = CROSSLEARNERS[compilerClass](prog,xc,epochs=5) 63 | params.update({ 64 | #'targetMode':problem,'testData':None, 65 | 'savedTestPredictions':'tmp-cache/%s-test.%s.solutions.txt' % (stem,expt.fulltype(xc)), 66 | 'savedTestExamples':None, 67 | 'savedTrainExamples':None, 68 | 'learner':learner, 69 | }) 70 | result.append( (expt.fulltype(xc),expt.Expt(params).run()) ) 71 | return result 72 | 73 | 74 | if __name__=="__main__": 75 | acc,loss = runMain('cross' in sys.argv[1:])[0] 76 | print('acc,loss',acc,loss) 77 | -------------------------------------------------------------------------------- /datasets/smokers/raw/labels.txt: -------------------------------------------------------------------------------- 1 | mataric00sensorymotor yAgents 2 | ng01stable yIR 3 | 524726 yIR 4 | heinonen96www yIR 5 | bongard00legion yML 6 | franconi00general yDB 7 | 373513 yIR 8 | bernstein00data yDB 9 | kokku02halfpipe yIR 10 | 240151 yHCI 11 | 384644 yML 12 | schnattinger98qualitybased yML 13 | 537391 yDB 14 | 530815 yDB 15 | wu01towards yIR 16 | casati01improving yDB 17 | weiss99achieving yAgents 18 | calvanese00containment yDB 19 | chen01detection yDB 20 | mcgowan02who yIR 21 | li99multifaceted yDB 22 | billinghurst98wearable yHCI 23 | wills01open yML 24 | 306088 yHCI 25 | 249151 yDB 26 | kubiatowicz00oceanstore yHCI 27 | kargupta01mobimine yHCI 28 | chen00algebraic yDB 29 | 266462 yML 30 | kotz99mobile yAgents 31 | 382301 yML 32 | faensen01hermes yIR 33 | weinstein99agentbased yDB 34 | florescu99performance yDB 35 | fegaras98new yDB 36 | pant02myspiders yIR 37 | 529138 yDB 38 | 257648 yDB 39 | dzeroski98detecting yDB 40 | 30237 yAgents 41 | sandholm01side yAgents 42 | heinze99plan yAgents 43 | puliafito00map yAgents 44 | 267501 yAgents 45 | abiteboul01representing yDB 46 | he00comparative yIR 47 | inza99feature yML 48 | meng99estimating yIR 49 | 510049 yHCI 50 | 457806 yAI 51 | feiner99wearing yHCI 52 | santini99similarity yIR 53 | 208788 yIR 54 | smola01regularized yIR 55 | pagonis99evolving yIR 56 | cruz99preserving yIR 57 | 7785 yML 58 | nguyen00active yAgents 59 | deloach01analysis yAgents 60 | 23799 yDB 61 | 448928 yDB 62 | harel99workload yHCI 63 | kowalski98from yAgents 64 | glover99recommending yIR 65 | 200188 yAgents 66 | 366169 yHCI 67 | wendler98cbr yAI 68 | 316064 yML 69 | wache01ontologybased yIR 70 | 535112 yHCI 71 | mazumdar99achieving yDB 72 | driessens01speeding yML 73 | 206655 yIR 74 | morse00collablogger yHCI 75 | hindriks00architecture yAgents 76 | 311589 yIR 77 | wermter01emergent yAgents 78 | brandt01vicious yAgents 79 | 457393 yHCI 80 | 487862 yAgents 81 | dam01antitonic yML 82 | ceska01generating yAgents 83 | gellersen00adding yHCI 84 | 451495 yAgents 85 | castillo99gpropii yML 86 | 539420 yML 87 | jennings95controlling yAgents 88 | 442160 yML 89 | tsoumakas01fuzzy yML 90 | wisneski98ambient yHCI 91 | wess94casebased yML 92 | chaudhri98okbc yAI 93 | murray01specifying yAgents 94 | tsap01feedback yHCI 95 | fjeld99designing yHCI 96 | rizzo97personalitydriven yHCI 97 | knight00system yHCI 98 | sreerupa98dynamic yML 99 | mannila99prediction yML 100 | 508859 yHCI 101 | stader98intelligent yAI 102 | breazeal98regulating yAI 103 | layzell98new yAI 104 | reitmayr01mobile yHCI 105 | aha91casebased yML 106 | aha97casebased yML 107 | muscettola98reformulating yAI 108 | goldberg99coordinating yAI 109 | roumeliotis99circumventing yAI 110 | 44892 yAI 111 | degaris99building yAI 112 | tresp98description yAI 113 | hekanaho98dogma yAI 114 | kervrann00level yAI 115 | 89384 yAI 116 | olson98probabilistic yAI 117 | 64225 yAI 118 | abraham00optimal yAI 119 | jakobi98running yAI 120 | abraham00hybrid yAI 121 | -------------------------------------------------------------------------------- /tensorlog/Makefile: -------------------------------------------------------------------------------- 1 | include Makefile.config 2 | NOTIFY:="DATASETS" variable not set. For long tests, clone a copy of tensorlog-datasets.git and put the path to it in your Makefile.config. For more details, see Makefile.config.template 3 | 4 | Makefile.config: Makefile.config.template 5 | cp $< $@ 6 | 7 | # unit tests that take a few secs to run - all of these are about a minute all told 8 | short-tests: 9 | python -m unittest testtensorlog 10 | python -m unittest testxcomp $(XCOMP_FILTER) 11 | python -m unittest testtf 12 | 13 | all-automatic-tests: short-tests grid-test cora-test wikimovies-test simple-comline-test smokers-test fb-benchmark-test socialgraphs-test 14 | 15 | # tests that take a few minutes to run 16 | # run 'make prepare-for-long-tests' in a fresh install before you run these 17 | long-tests: benchmark-test wnet-test cora-test grid-test 18 | 19 | prepare-for-long-tests: 20 | ifneq ($(DATASETS),) 21 | (cd ../; PYTHONPATH=`pwd`; cd $(DATASETS)/wordnet; make setup) 22 | (cd ../; PYTHONPATH=`pwd`; cd $(DATASETS)/cora; make setup) 23 | else 24 | $(error $(NOTIFY)) 25 | endif 26 | # 27 | # individual longer tests, which should take about 1min each 28 | # 29 | 30 | cora-test: 31 | ifneq ($(DATASETS),) 32 | (cd ../; PYTHONPATH=`pwd`; cd $(DATASETS)/cora/; make clean; make; make check; make clean; make unittest) 33 | else 34 | $(error $(NOTIFY)) 35 | endif 36 | 37 | grid-test: 38 | (cd ../; PYTHONPATH=`pwd`; cd datasets/grid/; make clean; make; make check; make clean; make unittest) 39 | 40 | wikimovies-test: 41 | (cd ../; PYTHONPATH=`pwd`; cd datasets/wikimovies/; make clean; make unittest) 42 | 43 | smokers-test: 44 | (cd ../; PYTHONPATH=`pwd`; cd datasets/smokers/; make clean; make unittest) 45 | 46 | simple-comline-test: 47 | python simple.py experiment --db test-data/textcattoy.cfacts --prog test-data/textcat.ppr --train test-data/toytrain.exam --test test-data/toytest.exam 48 | 49 | fb-benchmark-test: 50 | (cd ../; PYTHONPATH=`pwd`; cd datasets/fb15k-speed/; make clean; make unittest) 51 | 52 | # not converted yet.... 53 | wnet-test: 54 | ifneq ($(DATASETS),) 55 | (cd ../; PYTHONPATH=`pwd`; cd $(DATASETS)/wordnet/; make clean; make; make check) 56 | else 57 | $(error $(NOTIFY)) 58 | endif 59 | 60 | 61 | # debug-test needs to be interactive 62 | debug-test: 63 | PYTHONPATH=../; python expt.py --prog test-data/textcat.ppr --db test-data/textcattoy.cfacts \ 64 | --trainData test-data/toytrain.exam --testData test-data/toytest.exam --proppr +++ --savedModel expt-model.db 65 | PYTHONPATH=../; python debug.py --prog test-data/textcat.ppr --db expt-model.db --trainData test-data/toytrain.exam --testData test-data/toytest.exam --proppr predict/io 66 | 67 | # word count 68 | 69 | wc: 70 | wc `ls *.py | grep -v test | grep -v try` 71 | 72 | # cleanup 73 | 74 | clean: 75 | rm -f *.pyc *.py~ *.prof 76 | rm -f toy-test.examples toy-test.solutions.txt toy-train.examples 77 | rm -rf toy-trained.db 78 | 79 | dist: 80 | tar -cvf ../tensorlog-dist.tar ../LICENSE Makefile Makefile.config.template *.py test-data/*.* 81 | -------------------------------------------------------------------------------- /datasets/grid/expected.txt: -------------------------------------------------------------------------------- 1 | | experiment results from Tue Jul 26 13:27:34 EDT 2016 - 16x16, EDGE_WEIGHT = 0.2, rate=0.01, L2Regularizer 2 | | epoch=20 maxEpoch=20 avg.crossEnt=0.526599 tot.crossEnt=3.2426 tot.loss=8.93986 avg.reg=0.813893 min.time=0 avg.time=0.478221 max.time=0 tot.time=3.0782| 7 tot.n=153 minibatches=7 3 | | training ParallelFixedRateGDLearner ... done in 13.064 sec 4 | | running trained theory on train data ... 5 | | running trained theory on train data ... done in 0.692 sec 6 | | running trained theory on test data ... 7 | | running trained theory on test data ... done in 0.465 sec 8 | | eval ..trained theory on train : acc 1.0 xent/ex 0.0194269172601 9 | | eval ..trained theory on test : acc 1.0 xent/ex 0.0736157306738 10 | | saving test predictions ... 11 | | saving test predictions ... done in 0.443 sec 12 | | saving test examples ... 13 | | saving test examples ... done in 0.073 sec 14 | | ready for commands like: proppr eval tmp-cache/test.examples tmp-cache/test.solutions.txt --metric auc --defaultNeg 15 | # actual result on Wed May 10 13:06:31 EDT 2017 16 | file tmp-cache/test.solutions.txt 17 | queries 78 answers 20046 labeled answers 20046 18 | ============================================================================== 19 | metric auc (AUC): The probability of a positive example scoring higher than a negative example; or the area under the ROC curve 20 | . micro: 0.999900229472 21 | . macro: 0.508329559961 22 | file tmp-cache/test.tensorlog.tensorflowxcomp.DenseMatDenseMsgCrossCompiler.solutions.txt 23 | queries 78 answers 78 labeled answers 78 24 | ============================================================================== 25 | metric auc (AUC): The probability of a positive example scoring higher than a negative example; or the area under the ROC curve 26 | . micro: 0.333333333333 27 | . macro: 0.240959894806 28 | file tmp-cache/test.tensorlog.tensorflowxcomp.SparseMatDenseMsgCrossCompiler.solutions.txt 29 | queries 78 answers 78 labeled answers 78 30 | ============================================================================== 31 | metric auc (AUC): The probability of a positive example scoring higher than a negative example; or the area under the ROC curve 32 | . micro: 0.333333333333 33 | . macro: 0.240959894806 34 | file tmp-cache/test.tensorlog.theanoxcomp.DenseMatDenseMsgCrossCompiler.solutions.txt 35 | queries 78 answers 20202 labeled answers 20202 36 | ============================================================================== 37 | metric auc (AUC): The probability of a positive example scoring higher than a negative example; or the area under the ROC curve 38 | . micro: 0.999900999901 39 | . macro: 0.508314739084 40 | file tmp-cache/test.tensorlog.theanoxcomp.SparseMatDenseMsgCrossCompiler.solutions.txt 41 | queries 78 answers 20202 labeled answers 20202 42 | ============================================================================== 43 | metric auc (AUC): The probability of a positive example scoring higher than a negative example; or the area under the ROC curve 44 | . micro: 0.999900999901 45 | . macro: 0.508314739084 46 | -------------------------------------------------------------------------------- /datasets/family/expt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import scipy.sparse as SS 3 | import scipy.io 4 | 5 | from tensorlog import expt 6 | from tensorlog import dataset 7 | from tensorlog import comline 8 | from tensorlog import matrixdb 9 | from tensorlog import mutil 10 | from tensorlog import program 11 | from tensorlog import funs 12 | from tensorlog import xctargets 13 | 14 | CROSSCOMPILERS = [] 15 | CROSSLEARNERS = {} 16 | if xctargets.theano: 17 | from tensorlog import theanoxcomp 18 | for c in [ 19 | theanoxcomp.DenseMatDenseMsgCrossCompiler, 20 | theanoxcomp.SparseMatDenseMsgCrossCompiler 21 | ]: 22 | CROSSCOMPILERS.append(c) 23 | CROSSLEARNERS[c]=theanoxcomp.FixedRateGDLearner 24 | 25 | stem = "kinship" 26 | def setExptParams(): 27 | db = comline.parseDBSpec('tmp-cache/{stem}.db|inputs/{stem}.cfacts:inputs/{stem}-rule.cfacts'.format(stem=stem)) 28 | trainData = comline.parseDatasetSpec('tmp-cache/{stem}-train.dset|inputs/{stem}-train.examples'.format(stem=stem),db) 29 | testData = comline.parseDatasetSpec('tmp-cache/{stem}-test.dset|inputs/{stem}-test.examples'.format(stem=stem),db) 30 | #print 'train:','\n '.join(trainData.pprint()) 31 | #print 'test: ','\n '.join(testData.pprint()) 32 | prog = program.ProPPRProgram.loadRules("%s-train-isg.ppr" % stem,db=db) 33 | prog.setRuleWeights() 34 | prog.maxDepth=4 35 | return (prog, trainData, testData) 36 | 37 | def runMain(): 38 | if not os.path.exists("tmp-cache"): os.mkdir("tmp-cache") 39 | (prog, trainData, testData) = setExptParams() 40 | print(accExpt(prog,trainData,testData)) 41 | print("\n".join(["%s: %s" % i for i in list(xc_accExpt(prog,trainData,testData).items())])) 42 | 43 | def accExpt(prog,trainData,testData): 44 | params = {'prog':prog, 45 | 'trainData':trainData, 46 | 'testData':testData, 47 | 'savedModel':'tmp-cache/%s-trained.db' % stem, 48 | 'savedTestPredictions':'tmp-cache/%s-test.solutions.txt' % stem, 49 | 'savedTrainExamples':'tmp-cache/%s-train.examples' % stem, 50 | 'savedTestExamples':'tmp-cache/%s-test.examples' % stem, 51 | } 52 | return expt.Expt(params).run() 53 | 54 | def xc_accExpt(prog,trainData,testData): 55 | results = {} 56 | for compilerClass in CROSSCOMPILERS: 57 | xc = compilerClass(prog) 58 | print(expt.fulltype(xc)) 59 | 60 | # compile everything 61 | for mode in trainData.modesToLearn(): 62 | xc.ensureCompiled(mode) 63 | learner = CROSSLEARNERS[compilerClass](prog,xc) 64 | 65 | params = {'prog':prog, 66 | 'trainData':trainData, 'testData':testData, 67 | 'savedTestPredictions':'tmp-cache/%s-test.%s.solutions.txt' % (stem,expt.fulltype(xc)), 68 | 'savedTestExamples':'tmp-cache/%s-test.%s.examples' % (stem,expt.fulltype(xc)), 69 | 'learner':learner, 70 | } 71 | 72 | results[expt.fulltype(xc)] = expt.Expt(params).run() 73 | return results 74 | 75 | if __name__=="__main__": 76 | runMain() 77 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcattoy_pairs.cfacts: -------------------------------------------------------------------------------- 1 | posPair ten ten_pos 2 | posPair 7-seater 7-seater_pos 3 | posPair IBM IBM_pos 4 | posPair a a_pos 5 | posPair an an_pos 6 | posPair and and_pos 7 | posPair at at_pos 8 | posPair automatic automatic_pos 9 | posPair backlog backlog_pos 10 | posPair barbie barbie_pos 11 | posPair big big_pos 12 | posPair bike bike_pos 13 | posPair bills bills_pos 14 | posPair car car_pos 15 | posPair catalogs catalogs_pos 16 | posPair convertible convertible_pos 17 | posPair crushing crushing_pos 18 | posPair doll doll_pos 19 | posPair due due_pos 20 | posPair email email_pos 21 | posPair fire fire_pos 22 | posPair for for_pos 23 | posPair forms forms_pos 24 | posPair house house_pos 25 | posPair huge huge_pos 26 | posPair in in_pos 27 | posPair job job_pos 28 | posPair junk junk_pos 29 | posPair ken ken_pos 30 | posPair life life_pos 31 | posPair little little_pos 32 | posPair mail mail_pos 33 | posPair minivan minivan_pos 34 | posPair mortgage mortgage_pos 35 | posPair of of_pos 36 | posPair paperwork paperwork_pos 37 | posPair pile pile_pos 38 | posPair porshe porshe_pos 39 | posPair pricy pricy_pos 40 | posPair punk punk_pos 41 | posPair queen queen_pos 42 | posPair red red_pos 43 | posPair speed speed_pos 44 | posPair sports sports_pos 45 | posPair suburbs suburbs_pos 46 | posPair tax tax_pos 47 | posPair the the_pos 48 | posPair toy toy_pos 49 | posPair transmission transmission_pos 50 | posPair trouble trouble_pos 51 | posPair truck truck_pos 52 | posPair wagon wagon_pos 53 | posPair with with_pos 54 | posPair woe woe_pos 55 | posPair yellow yellow_pos 56 | posPair yesterday yesterday_pos 57 | 58 | negPair ten ten_neg 59 | negPair 7-seater 7-seater_neg 60 | negPair IBM IBM_neg 61 | negPair a a_neg 62 | negPair an an_neg 63 | negPair and and_neg 64 | negPair at at_neg 65 | negPair automatic automatic_neg 66 | negPair backlog backlog_neg 67 | negPair barbie barbie_neg 68 | negPair big big_neg 69 | negPair bike bike_neg 70 | negPair bills bills_neg 71 | negPair car car_neg 72 | negPair catalogs catalogs_neg 73 | negPair convertible convertible_neg 74 | negPair crushing crushing_neg 75 | negPair doll doll_neg 76 | negPair due due_neg 77 | negPair email email_neg 78 | negPair fire fire_neg 79 | negPair for for_neg 80 | negPair forms forms_neg 81 | negPair house house_neg 82 | negPair huge huge_neg 83 | negPair in in_neg 84 | negPair job job_neg 85 | negPair junk junk_neg 86 | negPair ken ken_neg 87 | negPair life life_neg 88 | negPair little little_neg 89 | negPair mail mail_neg 90 | negPair minivan minivan_neg 91 | negPair mortgage mortgage_neg 92 | negPair of of_neg 93 | negPair paperwork paperwork_neg 94 | negPair pile pile_neg 95 | negPair porshe porshe_neg 96 | negPair pricy pricy_neg 97 | negPair punk punk_neg 98 | negPair queen queen_neg 99 | negPair red red_neg 100 | negPair speed speed_neg 101 | negPair sports sports_neg 102 | negPair suburbs suburbs_neg 103 | negPair tax tax_neg 104 | negPair the the_neg 105 | negPair toy toy_neg 106 | negPair transmission transmission_neg 107 | negPair trouble trouble_neg 108 | negPair truck truck_neg 109 | negPair wagon wagon_neg 110 | negPair with with_neg 111 | negPair woe woe_neg 112 | negPair yellow yellow_neg 113 | negPair yesterday yesterday_neg 114 | -------------------------------------------------------------------------------- /datasets/socialgraphs/inputs/karate-ghirl.txt: -------------------------------------------------------------------------------- 1 | edge isa 1 b 2 | edge e 1 32 3 | edge e 1 22 4 | edge e 1 20 5 | edge e 1 18 6 | edge e 1 14 7 | edge e 1 13 8 | edge e 1 12 9 | edge e 1 11 10 | edge e 1 9 11 | edge e 1 8 12 | edge e 1 7 13 | edge e 1 6 14 | edge e 1 5 15 | edge e 1 4 16 | edge e 1 3 17 | edge e 1 2 18 | edge isa 2 b 19 | edge e 2 31 20 | edge e 2 22 21 | edge e 2 20 22 | edge e 2 18 23 | edge e 2 14 24 | edge e 2 8 25 | edge e 2 4 26 | edge e 2 3 27 | edge e 2 1 28 | edge isa 3 b 29 | edge e 3 33 30 | edge e 3 29 31 | edge e 3 28 32 | edge e 3 14 33 | edge e 3 10 34 | edge e 3 9 35 | edge e 3 8 36 | edge e 3 4 37 | edge e 3 2 38 | edge e 3 1 39 | edge isa 4 b 40 | edge e 4 14 41 | edge e 4 13 42 | edge e 4 8 43 | edge e 4 3 44 | edge e 4 2 45 | edge e 4 1 46 | edge isa 5 b 47 | edge e 5 11 48 | edge e 5 7 49 | edge e 5 1 50 | edge isa 6 b 51 | edge e 6 17 52 | edge e 6 11 53 | edge e 6 7 54 | edge e 6 1 55 | edge isa 7 b 56 | edge e 7 17 57 | edge e 7 6 58 | edge e 7 5 59 | edge e 7 1 60 | edge isa 8 b 61 | edge e 8 4 62 | edge e 8 3 63 | edge e 8 2 64 | edge e 8 1 65 | edge isa 9 r 66 | edge e 9 34 67 | edge e 9 33 68 | edge e 9 31 69 | edge e 9 3 70 | edge e 9 1 71 | edge isa 10 r 72 | edge e 10 34 73 | edge e 10 3 74 | edge isa 11 b 75 | edge e 11 6 76 | edge e 11 5 77 | edge e 11 1 78 | edge isa 12 b 79 | edge e 12 1 80 | edge isa 13 b 81 | edge e 13 4 82 | edge e 13 1 83 | edge isa 14 b 84 | edge e 14 34 85 | edge e 14 4 86 | edge e 14 3 87 | edge e 14 2 88 | edge e 14 1 89 | edge isa 15 r 90 | edge e 15 34 91 | edge e 15 33 92 | edge isa 16 r 93 | edge e 16 34 94 | edge e 16 33 95 | edge isa 17 b 96 | edge e 17 7 97 | edge e 17 6 98 | edge isa 18 b 99 | edge e 18 2 100 | edge e 18 1 101 | edge isa 19 r 102 | edge e 19 34 103 | edge e 19 33 104 | edge isa 20 b 105 | edge e 20 34 106 | edge e 20 2 107 | edge e 20 1 108 | edge isa 21 r 109 | edge e 21 34 110 | edge e 21 33 111 | edge isa 22 b 112 | edge e 22 2 113 | edge e 22 1 114 | edge isa 23 r 115 | edge e 23 34 116 | edge e 23 33 117 | edge isa 24 r 118 | edge e 24 34 119 | edge e 24 33 120 | edge e 24 30 121 | edge e 24 28 122 | edge e 24 26 123 | edge isa 25 r 124 | edge e 25 32 125 | edge e 25 28 126 | edge e 25 26 127 | edge isa 26 r 128 | edge e 26 32 129 | edge e 26 25 130 | edge e 26 24 131 | edge isa 27 r 132 | edge e 27 34 133 | edge e 27 30 134 | edge isa 28 r 135 | edge e 28 34 136 | edge e 28 25 137 | edge e 28 24 138 | edge e 28 3 139 | edge isa 29 r 140 | edge e 29 34 141 | edge e 29 32 142 | edge e 29 3 143 | edge isa 30 r 144 | edge e 30 34 145 | edge e 30 33 146 | edge e 30 27 147 | edge e 30 24 148 | edge isa 31 r 149 | edge e 31 34 150 | edge e 31 33 151 | edge e 31 9 152 | edge e 31 2 153 | edge isa 32 r 154 | edge e 32 34 155 | edge e 32 33 156 | edge e 32 29 157 | edge e 32 26 158 | edge e 32 25 159 | edge e 32 1 160 | edge isa 33 r 161 | edge e 33 34 162 | edge e 33 32 163 | edge e 33 31 164 | edge e 33 30 165 | edge e 33 24 166 | edge e 33 23 167 | edge e 33 21 168 | edge e 33 19 169 | edge e 33 16 170 | edge e 33 15 171 | edge e 33 9 172 | edge e 33 3 173 | edge isa 34 r 174 | edge e 34 33 175 | edge e 34 32 176 | edge e 34 31 177 | edge e 34 30 178 | edge e 34 29 179 | edge e 34 28 180 | edge e 34 27 181 | edge e 34 24 182 | edge e 34 23 183 | edge e 34 21 184 | edge e 34 20 185 | edge e 34 19 186 | edge e 34 16 187 | edge e 34 15 188 | edge e 34 14 189 | edge e 34 10 190 | edge e 34 9 191 | -------------------------------------------------------------------------------- /tensorlog/try.py: -------------------------------------------------------------------------------- 1 | # random test/experimental code that william unaccountably wanted to 2 | # check into git 3 | 4 | import math 5 | import numpy as np 6 | import scipy.sparse as sp 7 | 8 | import tensorflow as tf 9 | 10 | from tensorlog import matrixdb,mutil 11 | 12 | def go(): 13 | db = matrixdb.MatrixDB.loadFile("test-data/textcattoy_corpus.cfacts") 14 | m = db.matEncoding[('hasWord',2)] 15 | print('m',m.shape,m.nnz) 16 | def tfidf_transform(tf_matrix): 17 | # implements Salton's TFIDF transformation, ie l2-normalized 18 | # vector after scaling by: log(tf+1.0) * log(#docs/idf) 19 | df = tf_matrix.sum(axis=0) 20 | # count docs by summing word counts w/in each row of m, 21 | # clipping the sum down to 1, and and then adding up 22 | ndoc = np.clip(tf_matrix.sum(axis=1),0.0,1.0).sum() 23 | # this ensures idfs will be zero for any terms with df==0, after I 24 | # take the log 25 | df[df==0] = ndoc 26 | idf = np.log( np.reciprocal(df) * ndoc ) 27 | # first compute log(tf+1.0) 28 | scaled_tf_matrix = mutil.mapData(lambda v:np.log(v+1.0),tf_matrix) 29 | # now multiply by idf factor 30 | unnormalized_tfidf_matrix = mutil.broadcastAndComponentwiseMultiply(scaled_tf_matrix, sp.csr_matrix(idf)) 31 | # compute normalizer needed for l2 normalization 32 | normalizer = mutil.mapData(lambda v:v*v, unnormalized_tfidf_matrix).sum(axis=1) 33 | normalizer = np.sqrt(normalizer) 34 | # finally, multiply unnormalized_tfidf_matrix and normalizer, 35 | # which is complicated since they are sparse, scipy.sparse doesn't 36 | # support broadcasting, and 37 | # mutil.broadcastAndComponentwiseMultiply can only broadcast along 38 | # rows. so we need to transpose everything before and after, and 39 | # convert into csr matrices 40 | tmp1 = sp.csr_matrix(unnormalized_tfidf_matrix.transpose()) 41 | tmp2 = sp.csr_matrix(normalizer.transpose()) 42 | tmp2.eliminate_zeros() 43 | tmp3 = mutil.mapData(np.reciprocal,tmp2) 44 | tmp4 = mutil.broadcastAndComponentwiseMultiply(tmp1,tmp3) 45 | result = sp.csr_matrix(tmp4.transpose()) 46 | return result 47 | m = tfidf_transform(m) 48 | 49 | def go1(): 50 | dense = tf.Variable([[0,0,10,1,0,0],[0,0,-2,3,0,0]], dtype=tf.float32) 51 | sm1 = tf.nn.softmax(dense) 52 | 53 | denseReplacing0WithNeg10 = tf.where( 54 | dense>0.0, 55 | dense, 56 | tf.ones(tf.shape(dense), tf.float32)*(-10.0)) 57 | sm2 = tf.nn.softmax(denseReplacing0WithNeg10) 58 | 59 | nz_indices = tf.where(tf.not_equal(dense, tf.constant(0, dtype=tf.float32))) 60 | nz_values = tf.gather_nd(dense,nz_indices) 61 | sparse = tf.SparseTensor(nz_indices, nz_values, dense.get_shape()) 62 | sm3 = tf.sparse_softmax(sparse) 63 | dm3a = tf.sparse_to_dense(sm3.indices,sm3.get_shape(),sm3.values) 64 | dm3b = tf.scatter_nd(sm3.indices,sm3.values,dense.get_shape()) 65 | 66 | session = tf.Session() 67 | session.run(tf.global_variables_initializer()) 68 | from tensorflow.python.framework import ops 69 | for v in nz_indices,nz_values,sparse,sm3,dm3a,dm3b: 70 | print('gradient of op',v,ops.get_gradient_function(v.op)) 71 | 72 | print('dense sm - direct',session.run(sm1)) 73 | print('dense sm - with -10 trick',session.run(sm2)) 74 | print('sparse sm',session.run(sm3)) 75 | print('densified sparse sm - old',session.run(dm3a)) 76 | print('densified sparse sm - new',session.run(dm3a)) 77 | 78 | if __name__ == "__main__": 79 | print('trying') 80 | go() 81 | -------------------------------------------------------------------------------- /datasets/grid/bigtfexpt.py: -------------------------------------------------------------------------------- 1 | # todo: use minibatch size 2 | 3 | BATCH_SIZE=250 4 | 5 | import time 6 | import tensorflow as tf 7 | 8 | from tensorlog import simple 9 | import bigexpt 10 | 11 | def setup_tlog(maxD,factFile,trainFile,testFile): 12 | tlog = simple.Compiler(db=factFile,prog="grid.ppr") 13 | tlog.prog.db.markAsParameter('edge',2) 14 | tlog.prog.maxDepth = maxD 15 | trainData = tlog.load_small_dataset(trainFile) 16 | testData = tlog.load_small_dataset(testFile) 17 | return (tlog,trainData,testData) 18 | 19 | # run timing experiment 20 | def timingExpt(tlog,maxD,trainFile,minibatch): 21 | print('depth',maxD,'minibatch',minibatch) 22 | tlog.prog.maxDepth = maxD 23 | dset = tlog.load_dataset(trainFile) 24 | predicted_y = tlog.inference('path/io') 25 | session = tf.Session() 26 | session.run(tf.global_variables_initializer()) 27 | t0 = time.time() 28 | for mode,(tx,ty) in tlog.minibatches(dset,batch_size=minibatch): 29 | train_fd = {tlog.input_placeholder_name('path/io'):tx, 30 | tlog.target_output_placeholder_name('path/io'):ty} 31 | session.run(tlog.inference(mode), feed_dict=train_fd) 32 | break 33 | elapsed = time.time() - t0 34 | print('learning takes',time.time()-t0,'sec') 35 | print(tx.shape[0],'examples','time',elapsed,'qps',tx.shape[0]/elapsed) 36 | return elapsed 37 | 38 | def trainAndTest(tlog,trainDataFile,testDataFile,epochs): 39 | mode = 'path/io' 40 | trainData = tlog.load_dataset(trainDataFile) 41 | testData = tlog.load_dataset(testDataFile) 42 | 43 | predicted_y = tlog.inference(mode) 44 | actual_y = tlog.target_output_placeholder(mode) 45 | correct_predictions = tf.equal(tf.argmax(actual_y,1), tf.argmax(predicted_y,1)) 46 | accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32)) 47 | 48 | unregularized_loss = tlog.loss(mode) 49 | optimizer = tf.train.AdagradOptimizer(1.0) 50 | train_step = optimizer.minimize(unregularized_loss) 51 | 52 | session = tf.Session() 53 | session.run(tf.global_variables_initializer()) 54 | t0 = time.time() 55 | for i in range(epochs): 56 | print('epoch',i+1,'elapsed',time.time()-t0) 57 | for (mode,(tx,ty)) in tlog.minibatches(trainData): 58 | train_fd = {tlog.input_placeholder_name(mode):tx, tlog.target_output_placeholder_name(mode):ty} 59 | session.run(train_step,feed_dict=train_fd) 60 | print('learning takes',time.time()-t0,'sec') 61 | tot_test = 0 62 | tot_acc = 0 63 | i = 0 64 | for (mode,(ux,uy)) in tlog.minibatches(testData): 65 | i += 1 66 | m = ux.shape[0] #examples 67 | test_fd = {tlog.input_placeholder_name(mode):tx, tlog.target_output_placeholder_name(mode):ty} 68 | acc = session.run(accuracy, feed_dict=test_fd) 69 | print('minibatch acc for batch',i,acc) 70 | tot_test += m 71 | tot_acc += acc*m 72 | acc = tot_acc/tot_test 73 | print('weighted acc',acc) 74 | return acc 75 | 76 | def runMain(): 77 | (goal,n,maxD,epochsOrMinibatch) = bigexpt.getargs() 78 | (factFile,trainFile,testFile) = bigexpt.genInputs(n) 79 | (tlog,trainData,testData) = setup_tlog(maxD,factFile,trainFile,testFile) 80 | print('tlog.prog.maxDepth',tlog.prog.maxDepth) 81 | if goal=='time': 82 | print(timingExpt(tlog,maxD,trainFile,epochsOrMinibatch)) 83 | elif goal=='acc': 84 | print(trainAndTest(tlog,trainFile,testFile,epochsOrMinibatch)) 85 | else: 86 | assert False,'bad goal %s' % goal 87 | 88 | if __name__=="__main__": 89 | runMain() 90 | -------------------------------------------------------------------------------- /datasets/socialgraphs/demo.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import tensorflow as tf 3 | 4 | from tensorlog import simple 5 | 6 | def runMain(argv): 7 | 8 | # option parsing - options should be passed in as something like sys.argv[1:], eg 9 | # runMain(["--epochs","20","--stem","cora"]) 10 | opts = simple.Options() 11 | opts.stem = 'karate' 12 | opts.regularizer_scale = 0.1 13 | opts.link_scale = 0.9 14 | opts.epochs = 20 # 0 for no learning 15 | opts.max_depth = 4 16 | opts.learn_friend = True 17 | opts.learn_label = False 18 | # override the option defaults, set above 19 | opts.set_from_command_line(argv) 20 | # define the input file names from the stems 21 | factFile = 'inputs/%s.cfacts' % opts.stem 22 | trainFile = 'inputs/%s-train.exam' % opts.stem 23 | testFile = 'inputs/%s-test.exam' % opts.stem 24 | 25 | # construct a Compiler object 26 | tlog = simple.Compiler(db=factFile,prog='social.tlog') 27 | 28 | # tweak the program and database 29 | tlog.prog.maxDepth = opts.max_depth 30 | # scale down the friend links, according to the option link_scale. 31 | # smaller weights are like a higher reset in RWR/PPR 32 | tlog.db.matEncoding[('friend',2)] = opts.link_scale * tlog.db.matEncoding[('friend',2)] 33 | # specify which relations will be treated as parameters 34 | if opts.learn_friend: tlog.mark_db_predicate_trainable('friend/2') 35 | if opts.learn_label: tlog.mark_db_predicate_trainable('label/2') 36 | 37 | # compile the rules, plus a query mode, into the inference function, 38 | # which we will use for testing 39 | mode = 'inferred_label/io' 40 | predicted_y = tlog.inference(mode) 41 | actual_y = tlog.target_output_placeholder(mode) 42 | correct_predictions = tf.equal(tf.argmax(actual_y,1), tf.argmax(predicted_y,1)) 43 | accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32)) 44 | 45 | # also get the corresponding loss function from tensorlog 46 | unregularized_loss = tlog.loss(mode) 47 | # L1 regularize the basic loss function 48 | weight_vectors = tlog.trainable_db_variables(mode,for_optimization=True) 49 | regularized_loss = unregularized_loss 50 | for v in weight_vectors: 51 | regularized_loss = regularized_loss + opts.regularizer_scale*tf.reduce_sum(tf.abs(v)) 52 | 53 | # how to optimize 54 | optimizer = tf.train.AdagradOptimizer(1.0) 55 | train_step = optimizer.minimize(regularized_loss) 56 | 57 | # set up the session 58 | session = tf.Session() 59 | session.run(tf.global_variables_initializer()) 60 | 61 | # load the training and test data 62 | trainData = tlog.load_small_dataset(trainFile) 63 | testData = tlog.load_small_dataset(testFile) 64 | 65 | # compute initial test-set performance 66 | (ux,uy) = testData[mode] 67 | test_fd = {tlog.input_placeholder_name(mode):ux, tlog.target_output_placeholder_name(mode):uy} 68 | initial_accuracy = session.run(accuracy, feed_dict=test_fd) 69 | print('initial test acc',initial_accuracy) 70 | 71 | # run the optimizer for fixed number of epochs 72 | (tx,ty) = trainData[mode] 73 | train_fd = {tlog.input_placeholder_name(mode):tx, tlog.target_output_placeholder_name(mode):ty} 74 | for i in range(opts.epochs): 75 | session.run(train_step, feed_dict=train_fd) 76 | print('epoch',i+1,'train loss and accuracy',session.run([unregularized_loss,accuracy], feed_dict=train_fd)) 77 | 78 | # save the learned model 79 | tlog.set_all_db_params_to_learned_values(session) 80 | direc = '/tmp/%s-learned-model.prog' % opts.stem 81 | tlog.serialize_program(direc) 82 | print('learned parameters serialized in',direc) 83 | 84 | # compute final test performance 85 | final_accuracy = session.run(accuracy, feed_dict=test_fd) 86 | print('initial test acc',initial_accuracy) 87 | print('final test acc',final_accuracy) 88 | 89 | # return summary of statistics 90 | return initial_accuracy,final_accuracy 91 | 92 | if __name__=="__main__": 93 | runMain(sys.argv[1:]) 94 | -------------------------------------------------------------------------------- /doc/QUICKSTART.txt: -------------------------------------------------------------------------------- 1 | BASICS 2 | 3 | A Tensorlog DATABASE is holds a bunch of unary and binary relations, 4 | which are encoded as scipy sparse matrixes. The human-readable format 5 | for this is a set of files with the .cfacts extension. Some examples, 6 | from tensorlog/test-data/textcattoy.cfacts: 7 | 8 | hasWord dh a 9 | hasWord dh pricy 10 | hasWord dh doll 11 | hasWord dh house 12 | hasWord ft a 13 | hasWord ft little 14 | hasWord ft red 15 | hasWord ft fire 16 | hasWord ft truck 17 | ... 18 | label pos 19 | label neg 20 | 21 | The columns are: predicate name, argument 1, and (optionally, for 22 | binary predicates) argument 2. An additional column can be added 23 | which is a numeric weight (so don't use any constant that parses to a 24 | number in a cfacts file to avoid program confusion - or if you do then 25 | set matrixdb.conf.allow_weighted_tuples = False.) You need to group 26 | facts with the same predicate together. 27 | 28 | Typing: You can optionally add type declarations in a cfacts file, 29 | like this: 30 | 31 | # :- predict(doc,label) 32 | # :- hasWord(doc,word) 33 | # :- posPair(word,labelWordPair) 34 | # :- label(label) 35 | 36 | This will basically put the constants of type 'doc', 'label', etc in 37 | different namespaces. Types are all disjoint. You should either type 38 | everything or nothing (in the latter case, everything is a default 39 | type __THING__). 40 | 41 | You can also mark a DB predicate as a parameter with a declaration with 42 | a line like 43 | 44 | # :- trainable(posWeight,1) 45 | 46 | A database can be SERIALIZED and should be stored in a directory with 47 | extension .db. 48 | 49 | A Tensorlog PROGRAM usually has extension .ppr. Some examples: 50 | 51 | ------------------------------------------------------------------------------ 52 | predict(X,Pos) :- assign(Pos,pos) {pos_weight(F): hasWord(X,W),posPair(W,F)}. 53 | predict(X,Neg) :- assign(Neg,neg) {neg_weight(F): hasWord(X,W),negPair(W,F)}. 54 | predict(X,Y) :- classify(X,Y) {weight(Y): true}. 55 | 56 | match(R,S) :- fname(R,FR),fmatch(FR,FS),fname(S,FS) {f}. 57 | match(R,S) :- lname(R,LR),lmatch(LR,LS),lname(S,LS) {l}. 58 | match(R,S) :- addr(R,AR),amatch(AR,AS),addr(S,AS) {a}. 59 | ------------------------------------------------------------------------------ 60 | 61 | Semantics: The first clause above is converted to 62 | 63 | predict(X,Neg) :- assign(Neg,neg), hasWord(X,W),negPair(W,F),pos_weight(F). 64 | 65 | but the {} syntax makes it more obvious what is used for 'control'. 66 | The third clause is converted to the following ("true" is a special 67 | dummy literal) here: 68 | 69 | predict(X,Y) :- classify(X,Y), weight(Y). 70 | 71 | The last clause is converted to the following ("weighted" is a special 72 | predicate.) 73 | 74 | match(R,S) :- addr(R,AR),amatch(AR,AS),addr(S,AS), assign(RuleID,a), weighted(RuleID). 75 | 76 | Typing: you can use assign(Var,const,type) to if you're using types: eg, 77 | 78 | predict(X,Pos) :- assign(Pos,pos,label) {all(F): hasWord(X,W),posPair(W,F)}. 79 | 80 | If you use the ProPPR-style rule features (in the curly braces) you 81 | should 82 | 1) make sure any constants appearing there are in the database. 83 | Pro tip: If you make these all appear in the database as arguments to the 84 | unary predicate 'weighted' then program.setRuleWeights() will 85 | operate properly by default. If you use another unary predicate 86 | you need to specify it in as an argument to program.setRuleWeights. 87 | 2) Load the rule file as 'proppr' format, which is NOT the default. 88 | 89 | There's no serialized form of a program. 90 | 91 | There's a more Pythonic syntax for rules, which can be used to create 92 | rules programmatically, described in the docs for 93 | tensorlog.simple.Builder class. Briefly, some examples are: 94 | 95 | from tensorlog import simple 96 | b = simple.Builder() 97 | X,Y,Z = b.variables("X Y Z") 98 | aunt,parent,sister,wife = b.predicates("aunt parent sister wife") 99 | uncle = b.predicate("uncle") 100 | b.rules += aunt(X,Y) <= parent(X,Z),sister(Z,Y) 101 | b.rule += aunt(X,Y) <= uncle(X,Z),wife(Z,Y) 102 | 103 | Or, with 'control' on the rules: 104 | 105 | r1,r2 = b.rule_ids("r1 r2") 106 | ... 107 | b.rules += aunt(X,Y) <= uncle(X,Z) & wife(Z,Y) // r1 108 | b.rules += aunt(X,Y) <= parent(X,Z) & sister(Z,Y) // r2 109 | b.rules += aunt(X,Y) <= uncle(X,Z) & wife(Z,Y) // (weight(F) | description(X,D) & feature(X,F)) 110 | 111 | -------------------------------------------------------------------------------- /datasets/socialgraphs/inputs/karate.cfacts: -------------------------------------------------------------------------------- 1 | label node001 b 2 | friend node001 node032 3 | friend node001 node022 4 | friend node001 node020 5 | friend node001 node018 6 | friend node001 node014 7 | friend node001 node013 8 | friend node001 node012 9 | friend node001 node011 10 | friend node001 node009 11 | friend node001 node008 12 | friend node001 node007 13 | friend node001 node006 14 | friend node001 node005 15 | friend node001 node004 16 | friend node001 node003 17 | friend node001 node002 18 | friend node002 node031 19 | friend node002 node022 20 | friend node002 node020 21 | friend node002 node018 22 | friend node002 node014 23 | friend node002 node008 24 | friend node002 node004 25 | friend node002 node003 26 | friend node002 node001 27 | friend node003 node033 28 | friend node003 node029 29 | friend node003 node028 30 | friend node003 node014 31 | friend node003 node010 32 | friend node003 node009 33 | friend node003 node008 34 | friend node003 node004 35 | friend node003 node002 36 | friend node003 node001 37 | label node004 b 38 | friend node004 node014 39 | friend node004 node013 40 | friend node004 node008 41 | friend node004 node003 42 | friend node004 node002 43 | friend node004 node001 44 | label node005 b 45 | friend node005 node011 46 | friend node005 node007 47 | friend node005 node001 48 | friend node006 node017 49 | friend node006 node011 50 | friend node006 node007 51 | friend node006 node001 52 | friend node007 node017 53 | friend node007 node006 54 | friend node007 node005 55 | friend node007 node001 56 | friend node008 node004 57 | friend node008 node003 58 | friend node008 node002 59 | friend node008 node001 60 | friend node009 node034 61 | friend node009 node033 62 | friend node009 node031 63 | friend node009 node003 64 | friend node009 node001 65 | label node010 r 66 | friend node010 node034 67 | friend node010 node003 68 | label node011 b 69 | friend node011 node006 70 | friend node011 node005 71 | friend node011 node001 72 | friend node012 node001 73 | friend node013 node004 74 | friend node013 node001 75 | friend node014 node034 76 | friend node014 node004 77 | friend node014 node003 78 | friend node014 node002 79 | friend node014 node001 80 | friend node015 node034 81 | friend node015 node033 82 | label node016 r 83 | friend node016 node034 84 | friend node016 node033 85 | friend node017 node007 86 | friend node017 node006 87 | friend node018 node002 88 | friend node018 node001 89 | friend node019 node034 90 | friend node019 node033 91 | friend node020 node034 92 | friend node020 node002 93 | friend node020 node001 94 | label node021 r 95 | friend node021 node034 96 | friend node021 node033 97 | label node022 b 98 | friend node022 node002 99 | friend node022 node001 100 | friend node023 node034 101 | friend node023 node033 102 | friend node024 node034 103 | friend node024 node033 104 | friend node024 node030 105 | friend node024 node028 106 | friend node024 node026 107 | label node025 r 108 | friend node025 node032 109 | friend node025 node028 110 | friend node025 node026 111 | friend node026 node032 112 | friend node026 node025 113 | friend node026 node024 114 | friend node027 node034 115 | friend node027 node030 116 | friend node028 node034 117 | friend node028 node025 118 | friend node028 node024 119 | friend node028 node003 120 | friend node029 node034 121 | friend node029 node032 122 | friend node029 node003 123 | label node030 r 124 | friend node030 node034 125 | friend node030 node033 126 | friend node030 node027 127 | friend node030 node024 128 | label node031 r 129 | friend node031 node034 130 | friend node031 node033 131 | friend node031 node009 132 | friend node031 node002 133 | friend node032 node034 134 | friend node032 node033 135 | friend node032 node029 136 | friend node032 node026 137 | friend node032 node025 138 | friend node032 node001 139 | friend node033 node034 140 | friend node033 node032 141 | friend node033 node031 142 | friend node033 node030 143 | friend node033 node024 144 | friend node033 node023 145 | friend node033 node021 146 | friend node033 node019 147 | friend node033 node016 148 | friend node033 node015 149 | friend node033 node009 150 | friend node033 node003 151 | label node034 r 152 | friend node034 node033 153 | friend node034 node032 154 | friend node034 node031 155 | friend node034 node030 156 | friend node034 node029 157 | friend node034 node028 158 | friend node034 node027 159 | friend node034 node024 160 | friend node034 node023 161 | friend node034 node021 162 | friend node034 node020 163 | friend node034 node019 164 | friend node034 node016 165 | friend node034 node015 166 | friend node034 node014 167 | friend node034 node010 168 | friend node034 node009 169 | -------------------------------------------------------------------------------- /tensorlog/Notes.txt: -------------------------------------------------------------------------------- 1 | Random howto: 2 | - export PYTHONPATH=/usr/local/google/home/cohenw/code/TensorLog:$PYTHONPATH 3 | - live theano add /usr/local/google/home/cohenw/code/Theano 4 | 5 | Next actions (KR): 6 | 7 | - revive debug.py - how can testing this be automated? should I bother? w/ Katie? 8 | - look at sparse messages in theano-based learner? 9 | 10 | Medium jobs: 11 | - simple.regularized_loss(regularization_scale=0.1, regularizer='l2') 12 | - simple.train? 13 | - demo/... 14 | - move native stuff into a subdirectory? 15 | - move dataset examples to demo/...? 16 | - allow you to write some extra information into a serialized database/model 17 | -- just serialize the parameters? 18 | -- serialize the rules as well? 19 | -- have a readme file that includes time, process, .... 20 | 21 | Little jobs (WC): 22 | - output types as typename/id 23 | - benchmark tests on tensorflow 24 | - have --proppr as the default option for programs 25 | - test document plugins, simple.Compiler(plugins=....) 26 | - docs/tutorial 27 | -- plugins, simple 28 | - ?clean up autoweighting for rule weights and types 29 | - ?get rid of typeless option 30 | - ?tests and etc for {id} features. Should I have these? they are just {weighted(R1): assign(R1,r1,ruleid_t)} 31 | - ?cleanup sessions in tensorflowxcomp 32 | - ?cleanup modes? should they always be written pred/io, and i,o no longer be reserved words? 33 | - ? lazy DB/program compilation - should be able to load a program w/o DB, and have 34 | declarations be embedded in in the program, before loading the DB. 35 | maybe. 36 | - ? automatically move program constants into the database and introduce assign clauses when needed? 37 | - ? move testability code to subclass "testabletensorflowxcompiler" 38 | - ? move "native" code to tensorlog.native 39 | - ?? snake_case fixes in testing???? 40 | - ?? repackage (see below)??? 41 | - ?? optimize compilation??? 42 | 43 | Overall package structure: 44 | tensorlog: config, matrixdb, parser, bpcomp(iler), program, funs, ops (-eval and bprop), xcomp 45 | .ui: comline, expt, list, debug 46 | .native: mutil, autodiff (eval and bprop), learn, plearn, putil, dataset 47 | .th: theanoxcomp 48 | .tf: tensorflowxcomp 49 | or maybe just stick a bunch of stuff in tlg.native: native.learn, ... 50 | 51 | Question->query idea 52 | 53 | for each property pi(X,t) where t is a tag and X the set of things 54 | which have that property, use the rules 55 | 56 | q1(Q,X) :- pi_query_tag(Q,T), pi(X,T), {pi_relevant(F): query_feature(Q,F)} 57 | q2(Q,X) :- anything(X) {pi_irrelevant(F): query_feature(Q,F)} 58 | ... 59 | qn(Q,X) :- anything(X) {pn_irrelevant(F): query_feature(Q,F)} 60 | 61 | q(Q,X) :- q1(Q,X),q2(Q,X),... qn(Q,X) 62 | 63 | pi_query_tag(Q,T) : tag T for property pi is in query, eg "T=red" for pi=color in "a red sweater vest" 64 | query_feature(Q,F) : words/ngrams etc in query 65 | 66 | -------------------- 67 | 68 | movie app idea: 69 | - train inference using provenance features 70 | 71 | triple Trip has: head(Trip,H),tail(Trip,H),rel(Trip,R),creator(Trip,C) 72 | 73 | | head rxy x 74 | | tail rxy y 75 | | creator rxy nyt 76 | | creator rxy fox 77 | | rel rxy r 78 | 79 | 80 | for predicate p(Slot,Filler):-r(Slot,Filler) inference rule is: 81 | 82 | | p(Slot,Filler) :- 83 | | head(Trip,Slot),assign(R,r),rel(Trip,R),tail(Trip,Filler) 84 | | creator(Trip,C), weighted(C). 85 | 86 | for predicate p(Slot,Filler):-r1(Slot,Z),r2(Z,Filler) inference rule is: 87 | 88 | | p(Slot,Filler) :- 89 | | head(Trip1,Slot),assign(R1,r1), rel(Trip1,R1), tail(Trip1,Z) 90 | | head(Trip2,Z), assign(R2,r2), rel(Trip2,R2), tail(Trip2,Filler) 91 | | creator(Trip1,C1), weighted(C1), creator(Trip2,C2), weighted(C2). 92 | 93 | Then train high-confidence results against low-confidence ones. 94 | 95 | - might be better to include relation name 'rel' in the 96 | head/tail/creator triple, eg r1_head(Trip,H), r1_tail(Trip,H), 97 | r1_creator(Trip,C) 98 | 99 | - if I get multi-mode training working then you could do a bit more, 100 | eg train against several preds at once, or include ssl-like 101 | constraints... except, will they work in Tensorlog? not sure...but 102 | you could introduce an explicit entropy penalty for answer to 103 | p_conflict 104 | 105 | p_conflict(Slot,Filler) :- p(Slot,Filler) 106 | (tensorflow) WilliamMacBook2:tensorlog wcohen$ echo $PYTHONPATH 107 | /Users/wcohen/Documents/code/TensorLog:. 108 | (tensorflow) WilliamMacBook2:tensorlog wcohen$ history | grep activate 109 | 522 source ~/tensorflow/bin/activate 110 | 528 history | grep activate 111 | -------------------------------------------------------------------------------- /tensorlog/testtf.py: -------------------------------------------------------------------------------- 1 | # (C) William W. Cohen and Carnegie Mellon University, 2017 2 | 3 | # tensorflowxcomp specific tests 4 | 5 | import os 6 | import unittest 7 | import sys 8 | from tensorlog import xctargets 9 | if xctargets.tf: 10 | import tensorflow as tf 11 | 12 | from tensorlog import simple 13 | from tensorlog import matrixdb 14 | from tensorlog import dbschema 15 | from tensorlog import program 16 | from tensorlog import declare 17 | from tensorlog import testtensorlog 18 | 19 | @unittest.skipUnless(xctargets.tf,"Tensorflow not available") 20 | class TestReuse(unittest.TestCase): 21 | 22 | def setUp(self): 23 | b = simple.Builder() 24 | p,q,sister,child = b.predicates("p q sister child") 25 | X,Y,Z = b.variables("X Y Z") 26 | b += p(X,Y) <= sister(X,Z) & child(Z,Y) 27 | b += q(X,Y) <= sister(X,Y) 28 | factFile = os.path.join(testtensorlog.TEST_DATA_DIR,"fam.cfacts") 29 | self.tlog = simple.Compiler(db=factFile, prog=b.rules) 30 | 31 | def testCombinePC(self): 32 | """ Check that we can reuse the inputs from one tensorlog function in another. 33 | """ 34 | self.f1 = self.tlog.proof_count("p/io") 35 | self.f2 = self.tlog.proof_count("q/io", inputs=[self.tlog.input_placeholder("p/io")]) 36 | self.g = (2*self.f1 + self.f2) 37 | self.checkBehavior() 38 | 39 | def testCombineInf(self): 40 | _1 = self.tlog.inference("p/io") 41 | _2 = self.tlog.inference("q/io", inputs=[self.tlog.input_placeholder("p/io")]) 42 | self.f1 = self.tlog.proof_count("p/io") 43 | self.f2 = self.tlog.proof_count("q/io") 44 | self.g = (2*self.f1 + self.f2) 45 | self.checkBehavior() 46 | 47 | def testCombineLoss(self): 48 | 49 | _1 = self.tlog.loss("p/io") 50 | _2 = self.tlog.loss("q/io", inputs=[self.tlog.input_placeholder("p/io")]) 51 | self.f1 = self.tlog.proof_count("p/io") 52 | self.f2 = self.tlog.proof_count("q/io") 53 | self.g = (2*self.f1 + self.f2) 54 | self.checkBehavior() 55 | 56 | def checkBehavior(self): 57 | tlog = self.tlog 58 | self.assertTrue(tlog.input_placeholder("p/io") is tlog.input_placeholder("q/io")) 59 | 60 | session = tf.Session() 61 | session.run(tf.global_variables_initializer()) 62 | 63 | x = tlog.db.onehot("william").todense() 64 | input_name = tlog.input_placeholder_name("p/io") 65 | y1 = session.run(self.f1, feed_dict={input_name:x}) 66 | dy1 = tlog.db.matrixAsSymbolDict(tlog.xc.unwrapOutput(y1)) 67 | y2 = session.run(self.f2, feed_dict={input_name:x}) 68 | dy2 = tlog.db.matrixAsSymbolDict(tlog.xc.unwrapOutput(y2)) 69 | s = session.run(self.g, feed_dict={input_name:x}) 70 | ds = tlog.db.matrixAsSymbolDict(tlog.xc.unwrapOutput(s)) 71 | self.check_dicts(dy1, {'charlotte': 1.0, 'elizabeth': 1.0, 'caroline': 1.0, 'lucas': 1.0, 'poppy': 1.0}) 72 | self.check_dicts(dy2, {'sarah': 1.0, 'rachel': 1.0, 'lottie': 1.0}) 73 | self.check_dicts(ds, {'sarah': 1.0, 'charlotte': 2.0, 'caroline': 2.0, 'lucas': 2.0, 'rachel': 1.0, 74 | 'poppy': 2.0, 'lottie': 1.0, 'elizabeth': 2.0}) 75 | 76 | def check_dicts(self,actualMat, expected): 77 | actual = actualMat[0] 78 | print('actual: ',actual) 79 | print('expected:',expected) 80 | self.assertEqual(len(list(actual.keys())), len(list(expected.keys()))) 81 | for k in list(actual.keys()): 82 | self.assertAlmostEqual(actual[k], expected[k], delta=0.05) 83 | 84 | # stuck in here because I use Builder, lazy me 85 | class TestTypeInference(unittest.TestCase): 86 | 87 | def testNest(self): 88 | b = simple.Builder() 89 | answer,about,actor,mention = b.predicates("answer,about,actor,mention") 90 | Q,M,A = b.variables("Q,M,A") 91 | b.rules += answer(Q,M) <= about(Q,A) & actor(M,A) 92 | b.rules += about(Q,A) <= mention(Q,A) 93 | b.rules.listing() 94 | db = matrixdb.MatrixDB(initSchema=dbschema.TypedSchema()) 95 | db.addLines([ "# :- answer(query_t,movie_t)\n", 96 | "# :- mention(query_t,actor_t)\n", 97 | "# :- actor(actor_t,movie_t)\n", 98 | '\t'.join(['mention','what_was_mel_brooks_in','mel_brooks']) + '\n', 99 | '\t'.join(['actor','young_frankenstein','mel_brooks']) + '\n' 100 | ]) 101 | prog = program.Program(db=db, rules=b.rules) 102 | afun = prog.compile(declare.asMode("answer/io")) 103 | for t in afun.inputTypes: 104 | self.assertTrue(t is not None) 105 | bfun = prog.compile(declare.asMode("about/io")) 106 | for t in bfun.inputTypes: 107 | self.assertTrue(t is not None) 108 | 109 | if __name__=="__main__": 110 | if len(sys.argv)==1: 111 | unittest.main() 112 | -------------------------------------------------------------------------------- /tensorlog/opfunutil.py: -------------------------------------------------------------------------------- 1 | # (C) William W. Cohen and Carnegie Mellon University, 2016 2 | # 3 | # utility classes used by funs.py or ops.ps 4 | # 5 | 6 | 7 | class OperatorOrFunction(object): 8 | """TensorLog programs are composed of functions and operators. 9 | Operators are lower-level things that side-effect an environment 10 | (Envir) object. Functions are higher-level. Operators and Functions 11 | have some common operations, mostly needed for visualization and 12 | working with scratchpads. 13 | """ 14 | 15 | #needed for visualizations 16 | 17 | def pprint(self,depth=0): 18 | """Return a list of strings that can be joined to form a textual view 19 | of self. This should include all the substructures. 20 | """ 21 | assert False, 'abstract method called' 22 | 23 | def pprintSummary(self): 24 | """A short summary string used in pprint() describing self. 25 | """ 26 | assert False, 'abstract method called' 27 | 28 | def pprintComment(self): 29 | """A comment/provenance info for self..""" 30 | assert False, 'abstract method called' 31 | 32 | # A TensorLog program is compiled to a tree of functions and 33 | # operators, which may need local memory. For instance, when1 a 34 | # function is evaluated the output needs to be recorded, because 35 | # it's used in backpropagation. When backpropagation is done the 36 | # deltas need to be stored. A function that is implemented by a 37 | # sequence of operators also stores the environment in which the 38 | # operators were evaluated, which also contains deltas. 39 | # 40 | # Storing this info is a bit messy because we also want 41 | # functions/operators to be thread safe. What's done it to assign 42 | # every function/operator in a tree a unique numeric id, and store 43 | # info in an auxiliary 'scratchpad' object. Info in a scratchpad 44 | # is always indexed by node id. Before using a function/operator 45 | # tree, you should call 'install' the root with the root of the 46 | # tree. 47 | # 48 | 49 | def install(self,nextId=1): 50 | """Traverse all substructures and assign numeric ids to then.""" 51 | assert False, 'abstract method called' 52 | 53 | def children(self): 54 | """List of substructures.""" 55 | assert False, 'abstract method called' 56 | 57 | 58 | class MutableObject(object): 59 | """An object that one can attach properties to, to put into a 60 | scratchpad dictionary. Scratchpad's currently have only a few: 61 | attributes .output, .delta, and sometimes .opEnv 62 | """ 63 | pass 64 | 65 | class Scratchpad(object): 66 | """ Space for data, like function outputs and gradients, generated 67 | during eval and backprop. Typically a Scratchpad 'pad' will be 68 | indexed by the numeric id of an OperatorOrFunction object, 69 | eg "pad[id].output = foo" or "pad[id].delta = bar". 70 | """ 71 | def __init__(self): 72 | self.d = dict() 73 | #override pad[id] to access d 74 | def __getitem__(self,key): 75 | if key not in self.d: 76 | self.d[key] = MutableObject() 77 | return self.d[key] 78 | def __setitem__(self,key,val): 79 | if key not in self.d: 80 | self.d[key] = MutableObject() 81 | self.d[key] = val 82 | 83 | # Arguably the environment and scratchpad should be combined, since 84 | # they perform similar tasks. But the environment is indexed by 85 | # variable names and the scratchpad by function/op ids. 86 | 87 | class Envir(object): 88 | """Holds a DB object, and a group of variable bindings for the 89 | variables used in message-passing. The value to which variable 90 | 'foo' is bound is stored in env.register[foo], which is also 91 | written env[foo]. The backprop-ed delta is stored in 92 | env.delta[foo]. 93 | """ 94 | 95 | def __init__(self,db): 96 | self.register = {} 97 | self.delta = {} 98 | self.db = db 99 | def bindList(self,vars,vals): 100 | """Bind each variable in a list to the corresponding value.""" 101 | assert len(vars)==len(vals),"Number of variables (%d) must match number of values (%d)" % (len(vars),len(vals)) 102 | for i in range(len(vars)): 103 | self[vars[i]] = vals[i] 104 | def __repr__(self): 105 | return 'Envir(%r)' % self.register 106 | # 107 | # override env[var] to access 'register' 108 | # 109 | def __getitem__(self,key): 110 | return self.register[key] 111 | def __setitem__(self,key,val): 112 | self.register[key] = val 113 | 114 | -------------------------------------------------------------------------------- /datasets/family/inputs/kinship-test.examples: -------------------------------------------------------------------------------- 1 | i_aunt(angela,Y) +i_aunt(angela,alfonso) +i_aunt(angela,sophia) -i_aunt(angela,marco) -i_aunt(angela,tomaso) -i_aunt(angela,pierro) -i_aunt(angela,francesca) 2 | i_aunt(gina,Y) +i_aunt(gina,alfonso) +i_aunt(gina,sophia) -i_aunt(gina,emilio) 3 | i_brother(alfonso,Y) +i_brother(alfonso,sophia) -i_brother(alfonso,marco) -i_brother(alfonso,tomaso) -i_brother(alfonso,gina) -i_brother(alfonso,angela) -i_brother(alfonso,lucia) -i_brother(alfonso,emilio) 4 | i_brother(emilio,Y) +i_brother(emilio,lucia) -i_brother(emilio,gina) -i_brother(emilio,roberto) -i_brother(emilio,alfonso) -i_brother(emilio,sophia) -i_brother(emilio,maria) 5 | i_daughter(angela,Y) +i_daughter(angela,francesca) +i_daughter(angela,pierro) -i_daughter(angela,marco) -i_daughter(angela,tomaso) -i_daughter(angela,alfonso) -i_daughter(angela,sophia) 6 | i_daughter(lucia,Y) +i_daughter(lucia,roberto) +i_daughter(lucia,maria) -i_daughter(lucia,marco) -i_daughter(lucia,alfonso) -i_daughter(lucia,emilio) -i_daughter(lucia,sophia) 7 | i_daughter(sophia,Y) +i_daughter(sophia,marco) +i_daughter(sophia,lucia) -i_daughter(sophia,tomaso) -i_daughter(sophia,gina) -i_daughter(sophia,angela) -i_daughter(sophia,alfonso) -i_daughter(sophia,emilio) 8 | i_father(marco,Y) +i_father(marco,sophia) -i_father(marco,pierro) -i_father(marco,angela) -i_father(marco,lucia) -i_father(marco,alfonso) -i_father(marco,francesca) 9 | i_father(pierro,Y) +i_father(pierro,marco) +i_father(pierro,angela) -i_father(pierro,francesca) 10 | i_father(roberto,Y) +i_father(roberto,lucia) +i_father(roberto,emilio) -i_father(roberto,maria) 11 | i_husband(emilio,Y) +i_husband(emilio,gina) -i_husband(emilio,roberto) -i_husband(emilio,lucia) -i_husband(emilio,alfonso) -i_husband(emilio,sophia) -i_husband(emilio,maria) 12 | i_husband(marco,Y) +i_husband(marco,lucia) -i_husband(marco,pierro) -i_husband(marco,angela) -i_husband(marco,alfonso) -i_husband(marco,francesca) -i_husband(marco,sophia) 13 | i_husband(pierro,Y) +i_husband(pierro,francesca) -i_husband(pierro,marco) -i_husband(pierro,angela) 14 | i_husband(roberto,Y) +i_husband(roberto,maria) -i_husband(roberto,lucia) -i_husband(roberto,emilio) 15 | i_husband(tomaso,Y) +i_husband(tomaso,angela) -i_husband(tomaso,alfonso) -i_husband(tomaso,sophia) 16 | i_mother(francesca,Y) +i_mother(francesca,marco) -i_mother(francesca,pierro) -i_mother(francesca,angela) 17 | i_mother(lucia,Y) +i_mother(lucia,alfonso) +i_mother(lucia,sophia) -i_mother(lucia,marco) -i_mother(lucia,roberto) -i_mother(lucia,emilio) -i_mother(lucia,maria) 18 | i_mother(maria,Y) +i_mother(maria,lucia) +i_mother(maria,emilio) -i_mother(maria,roberto) 19 | i_nephew(alfonso,Y) +i_nephew(alfonso,gina) +i_nephew(alfonso,tomaso) +i_nephew(alfonso,angela) -i_nephew(alfonso,marco) -i_nephew(alfonso,lucia) -i_nephew(alfonso,emilio) -i_nephew(alfonso,sophia) 20 | i_niece(sophia,Y) +i_niece(sophia,tomaso) +i_niece(sophia,angela) +i_niece(sophia,emilio) -i_niece(sophia,marco) -i_niece(sophia,gina) -i_niece(sophia,lucia) -i_niece(sophia,alfonso) 21 | i_sister(angela,Y) +i_sister(angela,marco) -i_sister(angela,tomaso) -i_sister(angela,pierro) -i_sister(angela,alfonso) -i_sister(angela,francesca) -i_sister(angela,sophia) 22 | i_sister(lucia,Y) +i_sister(lucia,emilio) -i_sister(lucia,marco) -i_sister(lucia,roberto) -i_sister(lucia,alfonso) -i_sister(lucia,sophia) -i_sister(lucia,maria) 23 | i_sister(sophia,Y) +i_sister(sophia,alfonso) -i_sister(sophia,marco) -i_sister(sophia,tomaso) -i_sister(sophia,gina) -i_sister(sophia,angela) -i_sister(sophia,lucia) -i_sister(sophia,emilio) 24 | i_son(alfonso,Y) +i_son(alfonso,marco) +i_son(alfonso,lucia) -i_son(alfonso,tomaso) -i_son(alfonso,gina) -i_son(alfonso,angela) -i_son(alfonso,emilio) -i_son(alfonso,sophia) 25 | i_son(emilio,Y) +i_son(emilio,roberto) +i_son(emilio,maria) -i_son(emilio,gina) -i_son(emilio,lucia) -i_son(emilio,alfonso) -i_son(emilio,sophia) 26 | i_son(marco,Y) +i_son(marco,francesca) +i_son(marco,pierro) -i_son(marco,angela) -i_son(marco,lucia) -i_son(marco,alfonso) -i_son(marco,sophia) 27 | i_uncle(emilio,Y) +i_uncle(emilio,alfonso) +i_uncle(emilio,sophia) -i_uncle(emilio,gina) -i_uncle(emilio,roberto) -i_uncle(emilio,lucia) -i_uncle(emilio,maria) 28 | i_uncle(tomaso,Y) +i_uncle(tomaso,alfonso) +i_uncle(tomaso,sophia) -i_uncle(tomaso,angela) 29 | i_wife(angela,Y) +i_wife(angela,tomaso) -i_wife(angela,marco) -i_wife(angela,pierro) -i_wife(angela,alfonso) -i_wife(angela,francesca) -i_wife(angela,sophia) 30 | i_wife(francesca,Y) +i_wife(francesca,pierro) -i_wife(francesca,marco) -i_wife(francesca,angela) 31 | i_wife(gina,Y) +i_wife(gina,emilio) -i_wife(gina,alfonso) -i_wife(gina,sophia) 32 | i_wife(lucia,Y) +i_wife(lucia,marco) -i_wife(lucia,roberto) -i_wife(lucia,alfonso) -i_wife(lucia,emilio) -i_wife(lucia,sophia) -i_wife(lucia,maria) 33 | i_wife(maria,Y) +i_wife(maria,roberto) -i_wife(maria,lucia) -i_wife(maria,emilio) 34 | -------------------------------------------------------------------------------- /tensorlog/helper/countmin_embeddings.py: -------------------------------------------------------------------------------- 1 | # first draft at code for doing count-min embeddings 2 | 3 | import collections 4 | import numpy as np 5 | 6 | def embedder_matrix(original_dim,embedded_dim,hash_salt): 7 | num_hashes = len(hash_salt) 8 | def hash_function(d,x): offset = hash("pittsburgh"); return (hash(x+offset)^hash_salt[d]) % embedded_dim 9 | h_rows = [] 10 | for i in range(original_dim): 11 | row = [0.0] * embedded_dim 12 | for d in range(num_hashes): 13 | j = hash_function(d,i) 14 | row[j] = 1.0 15 | h_rows.append(row) 16 | return np.array(h_rows) 17 | 18 | def sample_matrix(original_dim): 19 | m = np.zeros(shape=(original_dim,original_dim)) 20 | m[0,1] = m[0,3] = 1.0 21 | m[1,0] = m[1,2] = 1.0 22 | m[2,1] = m[2,5] = 1.0 23 | m[3,0] = m[3,4] = 1.0 24 | m[4,3] = m[4,7] = 1.0 25 | m[5,2] = m[5,8] = 1.0 26 | m[6,7] = 1.0 27 | m[7,6] = m[7,4] = m[7,8] = 1.0 28 | m[8,7] = m[8,5] = 1.0 29 | return m 30 | 31 | def show(label,mat,code=None,h=None): 32 | print('=' * 10, label, 'shape', mat.shape, '=' * 10) 33 | print(mat) 34 | if code=='onehot': 35 | return pp_decode_onehot(mat) 36 | elif code=='embedded': 37 | return pp_decode_embedded(mat,h) 38 | 39 | def pp_decode_embedded(mat,h): 40 | n_rows_m,n_cols_m = mat.shape 41 | n_rows_h,n_cols_h = h.shape 42 | assert n_cols_m==n_cols_h 43 | result = collections.defaultdict(set) 44 | for r1 in range(n_rows_m): 45 | print('row',r1,'contains embedding:', end=' ') 46 | for r2 in range(n_rows_h): 47 | if np.all(mat[r1,:]>=h[r2,:]): 48 | print(r2, end=' ') 49 | result[r1].add(r2) 50 | print() 51 | return result 52 | 53 | def pp_decode_onehot(mat): 54 | n_rows,n_cols = mat.shape 55 | result = collections.defaultdict(set) 56 | for r in range(n_rows): 57 | print('row',r,'contains:', end=' ') 58 | for c in range(n_cols): 59 | if mat[r,c]!=0: 60 | print(c, end=' ') 61 | result[r].add(c) 62 | print() 63 | return result 64 | 65 | def onehot(i,original_dim): 66 | v = np.zeros(shape=(1,original_dim)) 67 | v[0,i] = 1.0 68 | return v 69 | 70 | # summary: 71 | # 72 | # let N be original space, M be embedding space 73 | # H (h in code) maps k-hot vectors to CM embeddings: 74 | # for all i, H[i,j_k]=1 for D distinct hashes of i, { j_1, ..., j_K } 75 | # i.e., the J-th column of H indicates which indices [in the original N space] get hashed to index J in the c-m space 76 | # 77 | # m is a N-by-N matrix, intended to encode a relation p(X,Y) 78 | # 79 | # 1) to embed a one-hot vector v, compute ev = vH 80 | # 2) to embed a matrix M mapping i to i' in the original space, 81 | # let H1 be a row-normalized version of H, then compute eM = H1^T M H 82 | # Then, absent collisions, ev eM ~= (vM) H 83 | # 3) to see if a row v of an embedded matrix contains i, 84 | # test if np.all( v>= u_iH ), where u_i is one-hot for i 85 | # 4) to estimate (vM)[i,i1] from w = (ev eM), look at 86 | # min{ w[ w >= (u_i1)H ] } ---I think, not tested 87 | 88 | def run_main1(): 89 | original_dim = 10 90 | embedded_dim = 5 91 | 92 | #hash_salt = [hash("william"),hash("cohen"),hash("rubber duckie")] 93 | hash_salt = [hash("william"),hash("cohen")] 94 | h = embedder_matrix(original_dim,embedded_dim,hash_salt) 95 | show('h',h) 96 | m = sample_matrix(original_dim) 97 | show('m',m,code='onehot') 98 | mh = np.dot(m,h) 99 | show('mh',mh,code='embedded',h=h) 100 | #this isn't quite right since you need to allow for possibility 101 | #of hash collisions in h 102 | oneByD = np.reciprocal(h.sum(1)) 103 | hTbyD = h.transpose()*oneByD 104 | show('h^T/D',hTbyD) 105 | E_m = np.dot(hTbyD,mh) 106 | show('E_m',E_m) 107 | 108 | def check_results(i): 109 | ui = onehot(i,original_dim) 110 | ui_m = np.dot(ui,m) 111 | baseline = pp_decode_onehot(ui_m) 112 | E_ui = np.dot(ui,h) 113 | E_ui_dot_E_m = np.dot(E_ui,E_m) 114 | proposed = pp_decode_embedded(E_ui_dot_E_m,h) 115 | n = collisions = 0 116 | for i in baseline: 117 | assert i in proposed 118 | for j in baseline[i]: 119 | assert j in proposed[i] 120 | n += 1 121 | for j in proposed[i]: 122 | if j not in baseline[i]: 123 | collisions += 1 124 | print('row',i,'collisions',collisions,'baseline',baseline,'proposed',proposed) 125 | return collisions,n 126 | 127 | tot = tot_collisions = 0 128 | for i in range(original_dim): 129 | c,n = check_results(i) 130 | tot_collisions += c 131 | tot += n 132 | print('tot_collisions',tot_collisions,'tot',tot) 133 | 134 | if __name__ == "__main__": 135 | original_dim = 10 136 | embedded_dim = 5 137 | hash_salt = [hash("william"),hash("cohen")] 138 | H = embedder_matrix(original_dim,embedded_dim,hash_salt) 139 | x = onehot(7,original_dim) 140 | ex = np.dot(x,H) 141 | print('x',x) 142 | print('ex',ex) 143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /tensorlog/helper/minerules.py: -------------------------------------------------------------------------------- 1 | from guineapig import * 2 | import gpextras 3 | import sys 4 | import math 5 | 6 | # 7 | # given a 'triples' file, which has tab-separated triples of the form 8 | # (head,relation,tail) generate a file of all plausible rules and 9 | # store in rules.ppr. Uses GuineaPig package. Invocation for a small 10 | # rule set: 11 | # 12 | # python bin/minerules.py --params input:foo.triples --store allRules 13 | # 14 | # for a large one: 15 | # 16 | # python -m mrs_gp --serve 17 | # python bin/minerules.py --opts viewdir:gpfs%3A,target:mrs,parallel:20,input:foo.triples --store allRules 18 | # python -m mrs_gp --shutdown 19 | # 20 | 21 | def Count(v=None, by=lambda x:x): 22 | return Group(inner=v, by=by, retaining=lambda row:1, reducingTo=ReduceToSum(), combiningTo=ReduceToSum()) 23 | 24 | class MineRules(Planner): 25 | 26 | D = GPig.getArgvParams(required=['input']) 27 | #read the triples (head,relation,tail) 28 | triples = ReadLines(D['input']) | Map(by=lambda line:line.strip().split("\t")) 29 | 30 | htCount = Count(triples, by=lambda h_r_t7:(h_r_t7[0],h_r_t7[2])) 31 | rCount = Count(triples, by=lambda h_r_t8:h_r_t8[1]) 32 | 33 | # 34 | #look for entailments: p(X,Y) :- q(X,Y) 35 | # 36 | candidateEntailments = Join( Jin(triples,by=lambda h_r_t:(h_r_t[0],h_r_t[2])), Jin(triples,by=lambda h_r_t1:(h_r_t1[0],h_r_t1[2]))) \ 37 | | Map( by=lambda h1_p_t1_h2_q_t2:((h1_p_t1_h2_q_t2[0][1],h1_p_t1_h2_q_t2[1][1]),(h1_p_t1_h2_q_t2[0][0],h1_p_t1_h2_q_t2[0][2]))) \ 38 | | Filter( by=lambda p_q_h_t:p_q_h_t[0][0]!=p_q_h_t[0][1]) \ 39 | | Count(by=lambda pq_ht:pq_ht[0]) 40 | 41 | scoredEntailments = Join( Jin(candidateEntailments,by=lambda p_q_npq:p_q_npq[0][1]),Jin(rCount,lambda r_nr:r_nr[0])) \ 42 | | Map( by=lambda p__q_npq_q_nq : ((p__q_npq_q_nq[0][0],p__q_npq_q_nq[1][0]),math.log(p__q_npq_q_nq[0][1]/float(p__q_npq_q_nq[1][1]))) ) 43 | 44 | 45 | entailmentsAsRules = Map(scoredEntailments, 46 | by=lambda p_q_score:'%s(X,Y):-%s(X,Y) {if_%s_%s}.\t#score %.3f' % (p_q_score[0][0],p_q_score[0][1],p_q_score[0][0],p_q_score[0][1],p_q_score[1])) 47 | 48 | # 49 | #look for inversions: p(X,Y) :- q(Y,X) 50 | # 51 | candidateInversions = Join( Jin(triples,by=lambda h_r_t2:(h_r_t2[0],h_r_t2[2])), Jin(triples,by=lambda h_r_t3:(h_r_t3[2],h_r_t3[0]))) \ 52 | | Map( by=lambda h1_p_t1_h2_q_t29:((h1_p_t1_h2_q_t29[0][1],h1_p_t1_h2_q_t29[1][1]),(h1_p_t1_h2_q_t29[0][0],h1_p_t1_h2_q_t29[0][2]))) \ 53 | | Count(by=lambda pq_ht10:pq_ht10[0]) 54 | 55 | scoredInversions = Join( Jin(candidateInversions,by=lambda p_q_npq4:p_q_npq4[0][1]),Jin(rCount,lambda r_nr5:r_nr5[0])) \ 56 | | Map( by=lambda p__q_npq_q_nq11 : ((p__q_npq_q_nq11[0][0],p__q_npq_q_nq11[1][0]),math.log(p__q_npq_q_nq11[0][1]/float(p__q_npq_q_nq11[1][1]))) ) 57 | 58 | inversionsAsRules = Map(scoredInversions, 59 | by=lambda p_q_score12:'%s(X,Y):-%s(Y,X) {ifInv_%s_%s}.\t#score %.3f' % (p_q_score12[0][0],p_q_score12[0][1],p_q_score12[0][0],p_q_score12[0][1],p_q_score12[1])) 60 | 61 | # 62 | #look for chains: p(X,Y):-q(X,Z),r(Z,Y) 63 | # 64 | 65 | headToTail = Join( Jin(triples,by=lambda h1_r1_t1:h1_r1_t1[2]), Jin(triples,by=lambda h2_r2_t2:h2_r2_t2[0])) \ 66 | | Map(by=lambda h1_r1__mid_mid_r2_t2:(h1_r1__mid_mid_r2_t2[0][0],h1_r1__mid_mid_r2_t2[0][1],h1_r1__mid_mid_r2_t2[1][0],h1_r1__mid_mid_r2_t2[1][1],h1_r1__mid_mid_r2_t2[1][2])) 67 | 68 | candidateChains = Join( Jin(headToTail, by=lambda x_q_z_r_y:(x_q_z_r_y[0],x_q_z_r_y[4])), Jin(triples, by=lambda x_p_y:(x_p_y[0],x_p_y[2])) ) \ 69 | | Map(by=lambda x_q_z_r_y__x_p__y:((x_q_z_r_y__x_p__y[1][1],x_q_z_r_y__x_p__y[0][1],x_q_z_r_y__x_p__y[0][3]),(x_q_z_r_y__x_p__y[0][0],x_q_z_r_y__x_p__y[0][4]))) | Distinct() | Count(by=lambda pqr_xy:pqr_xy[0]) 70 | 71 | qrCount = Map(headToTail, by=lambda x_q_z_r_y6:((x_q_z_r_y6[1],x_q_z_r_y6[3]),(x_q_z_r_y6[0],x_q_z_r_y6[4]))) | Count(by=lambda qr_xy:qr_xy[0]) 72 | 73 | scoredChains = Join( Jin(candidateChains, by=lambda p_q_r_npqr:(p_q_r_npqr[0][1],p_q_r_npqr[0][2])), Jin(qrCount, by=lambda q_r_nqr:(q_r_nqr[0][0],q_r_nqr[0][1])) ) \ 74 | | Map(by=lambda p_q_r_npqr__q__r_nqr: ((p_q_r_npqr__q__r_nqr[0][0],p_q_r_npqr__q__r_nqr[0][1],p_q_r_npqr__q__r_nqr[0][2]),math.log(p_q_r_npqr__q__r_nqr[0][1]/float(p_q_r_npqr__q__r_nqr[1][1]))) ) 75 | 76 | 77 | chainsAsRules = Map(scoredChains, 78 | by=lambda p_q_r_score:'%s(X,Y):-%s(X,Z),%s(Z,Y) {chain_%s_%s_%s}.\t#score %.3f' % (p_q_r_score[0][0],p_q_r_score[0][1],p_q_r_score[0][2],p_q_r_score[0][0],p_q_r_score[0][1],p_q_r_score[0][2],p_q_r_score[1])) 79 | 80 | allRules = Union(entailmentsAsRules, inversionsAsRules, chainsAsRules) | Format() 81 | allRules.opts(storedAt='rules.ppr') 82 | 83 | # always end like this 84 | if __name__ == "__main__": 85 | planner = MineRules() 86 | planner.registerCompiler('mrs',gpextras.MRSCompiler) 87 | planner.main(sys.argv) 88 | -------------------------------------------------------------------------------- /datasets/top-1000-near-google/top-1000-near-google-rule.cfacts: -------------------------------------------------------------------------------- 1 | rule f1 2 | rule f2 3 | rule f3 4 | rule f4 5 | rule f5 6 | rule f6 7 | rule f7 8 | rule f8 9 | rule f9 10 | rule f10 11 | rule f11 12 | rule f12 13 | rule f13 14 | rule f14 15 | rule f15 16 | rule f16 17 | rule f17 18 | rule f18 19 | rule f19 20 | rule f20 21 | rule f21 22 | rule f22 23 | rule f23 24 | rule f24 25 | rule f25 26 | rule f26 27 | rule f27 28 | rule f28 29 | rule f29 30 | rule f30 31 | rule f31 32 | rule f32 33 | rule f33 34 | rule f34 35 | rule f35 36 | rule f36 37 | rule f37 38 | rule f38 39 | rule f39 40 | rule f40 41 | rule f41 42 | rule f42 43 | rule f43 44 | rule f44 45 | rule f45 46 | rule f46 47 | rule f47 48 | rule f48 49 | rule f49 50 | rule f50 51 | rule f51 52 | rule f52 53 | rule f53 54 | rule f54 55 | rule f55 56 | rule f56 57 | rule f57 58 | rule f58 59 | rule f59 60 | rule f60 61 | rule f61 62 | rule f62 63 | rule f63 64 | rule f64 65 | rule f65 66 | rule f66 67 | rule f67 68 | rule f68 69 | rule f69 70 | rule f70 71 | rule f71 72 | rule f72 73 | rule f73 74 | rule f74 75 | rule f75 76 | rule f76 77 | rule f77 78 | rule f78 79 | rule f79 80 | rule f80 81 | rule f81 82 | rule f82 83 | rule f83 84 | rule f84 85 | rule f85 86 | rule f86 87 | rule f87 88 | rule f88 89 | rule f89 90 | rule f90 91 | rule f91 92 | rule f92 93 | rule f93 94 | rule f94 95 | rule f95 96 | rule f96 97 | rule f97 98 | rule f98 99 | rule f99 100 | rule f100 101 | rule f101 102 | rule f102 103 | rule f103 104 | rule f104 105 | rule f105 106 | rule f106 107 | rule f107 108 | rule f108 109 | rule f109 110 | rule f110 111 | rule f111 112 | rule f112 113 | rule f113 114 | rule f114 115 | rule f115 116 | rule f116 117 | rule f117 118 | rule f118 119 | rule f119 120 | rule f120 121 | rule f121 122 | rule f122 123 | rule f123 124 | rule f124 125 | rule f125 126 | rule f126 127 | rule f127 128 | rule f128 129 | rule f129 130 | rule f130 131 | rule f131 132 | rule f132 133 | rule f133 134 | rule f134 135 | rule f135 136 | rule f136 137 | rule f137 138 | rule f138 139 | rule f139 140 | rule f140 141 | rule f141 142 | rule f142 143 | rule f143 144 | rule f144 145 | rule f145 146 | rule f146 147 | rule f147 148 | rule f148 149 | rule f149 150 | rule f150 151 | rule f151 152 | rule f152 153 | rule f153 154 | rule f154 155 | rule f155 156 | rule f156 157 | rule f157 158 | rule f158 159 | rule f159 160 | rule f160 161 | rule f161 162 | rule f162 163 | rule f163 164 | rule f164 165 | rule f165 166 | rule f166 167 | rule f167 168 | rule f168 169 | rule f169 170 | rule f170 171 | rule f171 172 | rule f172 173 | rule f173 174 | rule f174 175 | rule f175 176 | rule f176 177 | rule f177 178 | rule f178 179 | rule f179 180 | rule f180 181 | rule f181 182 | rule f182 183 | rule f183 184 | rule f184 185 | rule f185 186 | rule f186 187 | rule f187 188 | rule f188 189 | rule f189 190 | rule f190 191 | rule f191 192 | rule f192 193 | rule f193 194 | rule f194 195 | rule f195 196 | rule f196 197 | rule f197 198 | rule f198 199 | rule f199 200 | rule f200 201 | rule f201 202 | rule f202 203 | rule f203 204 | rule f204 205 | rule f205 206 | rule f206 207 | rule f207 208 | rule f208 209 | rule f209 210 | rule f210 211 | rule f211 212 | rule f212 213 | rule f213 214 | rule f214 215 | rule f215 216 | rule f216 217 | rule f217 218 | rule f218 219 | rule f219 220 | rule f220 221 | rule f221 222 | rule f222 223 | rule f223 224 | rule f224 225 | rule f225 226 | rule f226 227 | rule f227 228 | rule f228 229 | rule f229 230 | rule f230 231 | rule f231 232 | rule f232 233 | rule f233 234 | rule f234 235 | rule f235 236 | rule f236 237 | rule f237 238 | rule f238 239 | rule f239 240 | rule f240 241 | rule f241 242 | rule f242 243 | rule f243 244 | rule f244 245 | rule f245 246 | rule f246 247 | rule f247 248 | rule f248 249 | rule f249 250 | rule f250 251 | rule f251 252 | rule f252 253 | rule f253 254 | rule f254 255 | rule f255 256 | rule f256 257 | rule f257 258 | rule f258 259 | rule f259 260 | rule f260 261 | rule f261 262 | rule f262 263 | rule f263 264 | rule f264 265 | rule f265 266 | rule f266 267 | rule f267 268 | rule f268 269 | rule f269 270 | rule f270 271 | rule f271 272 | rule f272 273 | rule f273 274 | rule f274 275 | rule f275 276 | rule f276 277 | rule f277 278 | rule f278 279 | rule f279 280 | rule f280 281 | rule f281 282 | rule f282 283 | rule f283 284 | rule f284 285 | rule f285 286 | rule f286 287 | rule f287 288 | rule f288 289 | rule f289 290 | rule f290 291 | rule f291 292 | rule f292 293 | rule f293 294 | rule f294 295 | rule f295 296 | rule f296 297 | rule f297 298 | rule f298 299 | rule f299 300 | rule f300 301 | rule f301 302 | rule f302 303 | rule f303 304 | rule f304 305 | rule f305 306 | rule f306 307 | rule f307 308 | rule f308 309 | rule f309 310 | rule f310 311 | rule f311 312 | rule f312 313 | rule f313 314 | rule f314 315 | rule f315 316 | rule f316 317 | rule f317 318 | rule f318 319 | rule f319 320 | rule f320 321 | rule f321 322 | rule f322 323 | rule f323 324 | rule f324 325 | rule f325 326 | rule f326 327 | rule f327 328 | rule f328 329 | rule f329 330 | rule f330 331 | rule f331 332 | rule f332 333 | rule f333 334 | rule f334 335 | -------------------------------------------------------------------------------- /datasets/family/inputs/kinship-train.examples: -------------------------------------------------------------------------------- 1 | i_aunt(jennifer,Y) +i_aunt(jennifer,charlotte) +i_aunt(jennifer,colin) -i_aunt(jennifer,christine) -i_aunt(jennifer,james) -i_aunt(jennifer,charles) -i_aunt(jennifer,andrew) 2 | i_aunt(margaret,Y) +i_aunt(margaret,charlotte) +i_aunt(margaret,colin) -i_aunt(margaret,arthur) 3 | i_brother(arthur,Y) +i_brother(arthur,victoria) -i_brother(arthur,charlotte) -i_brother(arthur,penelope) -i_brother(arthur,margaret) -i_brother(arthur,colin) -i_brother(arthur,christopher) 4 | i_brother(colin,Y) +i_brother(colin,charlotte) -i_brother(colin,james) -i_brother(colin,charles) -i_brother(colin,arthur) -i_brother(colin,jennifer) -i_brother(colin,margaret) -i_brother(colin,victoria) 5 | i_brother(james,Y) +i_brother(james,jennifer) -i_brother(james,charlotte) -i_brother(james,christine) -i_brother(james,colin) -i_brother(james,andrew) -i_brother(james,victoria) 6 | i_daughter(charlotte,Y) +i_daughter(charlotte,james) +i_daughter(charlotte,victoria) -i_daughter(charlotte,charles) -i_daughter(charlotte,arthur) -i_daughter(charlotte,jennifer) -i_daughter(charlotte,margaret) -i_daughter(charlotte,colin) 7 | i_daughter(jennifer,Y) +i_daughter(jennifer,andrew) +i_daughter(jennifer,christine) -i_daughter(jennifer,charlotte) -i_daughter(jennifer,james) -i_daughter(jennifer,charles) -i_daughter(jennifer,colin) 8 | i_daughter(victoria,Y) +i_daughter(victoria,penelope) +i_daughter(victoria,christopher) -i_daughter(victoria,charlotte) -i_daughter(victoria,james) -i_daughter(victoria,arthur) -i_daughter(victoria,colin) 9 | i_father(andrew,Y) +i_father(andrew,james) +i_father(andrew,jennifer) -i_father(andrew,christine) 10 | i_father(christopher,Y) +i_father(christopher,arthur) +i_father(christopher,victoria) -i_father(christopher,penelope) 11 | i_father(james,Y) +i_father(james,charlotte) +i_father(james,colin) -i_father(james,christine) -i_father(james,jennifer) -i_father(james,andrew) -i_father(james,victoria) 12 | i_husband(andrew,Y) +i_husband(andrew,christine) -i_husband(andrew,james) -i_husband(andrew,jennifer) 13 | i_husband(arthur,Y) +i_husband(arthur,margaret) -i_husband(arthur,charlotte) -i_husband(arthur,penelope) -i_husband(arthur,colin) -i_husband(arthur,christopher) -i_husband(arthur,victoria) 14 | i_husband(charles,Y) +i_husband(charles,jennifer) -i_husband(charles,charlotte) -i_husband(charles,colin) 15 | i_husband(christopher,Y) +i_husband(christopher,penelope) -i_husband(christopher,arthur) -i_husband(christopher,victoria) 16 | i_husband(james,Y) +i_husband(james,victoria) -i_husband(james,charlotte) -i_husband(james,christine) -i_husband(james,jennifer) -i_husband(james,colin) -i_husband(james,andrew) 17 | i_mother(christine,Y) +i_mother(christine,james) +i_mother(christine,jennifer) -i_mother(christine,andrew) 18 | i_mother(penelope,Y) +i_mother(penelope,victoria) -i_mother(penelope,arthur) -i_mother(penelope,christopher) 19 | i_mother(victoria,Y) +i_mother(victoria,charlotte) +i_mother(victoria,colin) -i_mother(victoria,james) -i_mother(victoria,arthur) -i_mother(victoria,penelope) -i_mother(victoria,christopher) 20 | i_nephew(colin,Y) +i_nephew(colin,margaret) +i_nephew(colin,arthur) +i_nephew(colin,charles) +i_nephew(colin,jennifer) -i_nephew(colin,charlotte) -i_nephew(colin,james) -i_nephew(colin,victoria) 21 | i_niece(charlotte,Y) +i_niece(charlotte,margaret) +i_niece(charlotte,arthur) +i_niece(charlotte,charles) +i_niece(charlotte,jennifer) -i_niece(charlotte,james) -i_niece(charlotte,colin) -i_niece(charlotte,victoria) 22 | i_sister(charlotte,Y) +i_sister(charlotte,colin) -i_sister(charlotte,james) -i_sister(charlotte,charles) -i_sister(charlotte,arthur) -i_sister(charlotte,jennifer) -i_sister(charlotte,margaret) -i_sister(charlotte,victoria) 23 | i_sister(victoria,Y) +i_sister(victoria,arthur) -i_sister(victoria,charlotte) -i_sister(victoria,james) -i_sister(victoria,penelope) -i_sister(victoria,colin) -i_sister(victoria,christopher) 24 | i_son(arthur,Y) +i_son(arthur,penelope) +i_son(arthur,christopher) -i_son(arthur,charlotte) -i_son(arthur,margaret) -i_son(arthur,colin) -i_son(arthur,victoria) 25 | i_son(colin,Y) +i_son(colin,james) +i_son(colin,victoria) -i_son(colin,charlotte) -i_son(colin,charles) -i_son(colin,arthur) -i_son(colin,jennifer) -i_son(colin,margaret) 26 | i_son(james,Y) +i_son(james,andrew) +i_son(james,christine) -i_son(james,charlotte) -i_son(james,jennifer) -i_son(james,colin) -i_son(james,victoria) 27 | i_uncle(arthur,Y) +i_uncle(arthur,charlotte) +i_uncle(arthur,colin) -i_uncle(arthur,penelope) -i_uncle(arthur,margaret) -i_uncle(arthur,christopher) -i_uncle(arthur,victoria) 28 | i_uncle(charles,Y) +i_uncle(charles,charlotte) +i_uncle(charles,colin) -i_uncle(charles,jennifer) 29 | i_wife(christine,Y) +i_wife(christine,andrew) -i_wife(christine,james) -i_wife(christine,jennifer) 30 | i_wife(jennifer,Y) +i_wife(jennifer,charles) -i_wife(jennifer,charlotte) -i_wife(jennifer,christine) -i_wife(jennifer,james) -i_wife(jennifer,colin) -i_wife(jennifer,andrew) 31 | i_wife(margaret,Y) +i_wife(margaret,arthur) -i_wife(margaret,charlotte) -i_wife(margaret,colin) 32 | i_wife(victoria,Y) +i_wife(victoria,james) -i_wife(victoria,charlotte) -i_wife(victoria,arthur) -i_wife(victoria,penelope) -i_wife(victoria,colin) -i_wife(victoria,christopher) 33 | -------------------------------------------------------------------------------- /datasets/family/inputs/kinship-rule.cfacts: -------------------------------------------------------------------------------- 1 | 2 | fixedWeight 3 | lr_chain i_aunt_aunt_brother 4 | lr_chain i_aunt_aunt_sister 5 | lr_chain i_aunt_father_mother 6 | lr_chain i_aunt_father_nephew 7 | lr_chain i_aunt_husband_daughter 8 | lr_chain i_aunt_husband_sister 9 | lr_chain i_aunt_husband_wife 10 | lr_chain i_aunt_mother_nephew 11 | lr_chain i_aunt_nephew_son 12 | lr_chain i_aunt_uncle_son 13 | lr_chain i_aunt_wife_son 14 | lr_chain i_aunt_wife_uncle 15 | lr_chain i_brother_father_aunt 16 | lr_chain i_brother_father_father 17 | lr_chain i_brother_father_nephew 18 | lr_chain i_brother_father_niece 19 | lr_chain i_brother_nephew_aunt 20 | lr_chain i_brother_nephew_uncle 21 | lr_chain i_brother_son_father 22 | lr_chain i_brother_son_mother 23 | lr_chain i_brother_uncle_daughter 24 | lr_chain i_brother_wife_husband 25 | lr_chain i_daughter_daughter_husband 26 | lr_chain i_daughter_daughter_wife 27 | lr_chain i_daughter_father_aunt 28 | lr_chain i_daughter_father_mother 29 | lr_chain i_daughter_father_nephew 30 | lr_chain i_daughter_father_niece 31 | lr_chain i_daughter_father_uncle 32 | lr_chain i_daughter_father_wife 33 | lr_chain i_daughter_husband_wife 34 | lr_chain i_daughter_mother_mother 35 | lr_chain i_daughter_nephew_daughter 36 | lr_chain i_daughter_niece_brother 37 | lr_chain i_daughter_sister_son 38 | lr_chain i_father_brother_aunt 39 | lr_chain i_father_father_brother 40 | lr_chain i_father_father_sister 41 | lr_chain i_father_husband_mother 42 | lr_chain i_father_mother_aunt 43 | lr_chain i_father_uncle_son 44 | lr_chain i_husband_father_daughter 45 | lr_chain i_husband_father_son 46 | lr_chain i_husband_mother_aunt 47 | lr_chain i_husband_nephew_daughter 48 | lr_chain i_husband_uncle_nephew 49 | lr_chain i_husband_uncle_niece 50 | lr_chain i_husband_wife_son 51 | lr_chain i_mother_father_aunt 52 | lr_chain i_mother_father_father 53 | lr_chain i_mother_father_husband 54 | lr_chain i_mother_father_nephew 55 | lr_chain i_mother_father_niece 56 | lr_chain i_mother_father_uncle 57 | lr_chain i_mother_husband_sister 58 | lr_chain i_mother_mother_brother 59 | lr_chain i_mother_mother_sister 60 | lr_chain i_mother_sister_uncle 61 | lr_chain i_mother_uncle_son 62 | lr_chain i_mother_wife_father 63 | lr_chain i_nephew_father_aunt 64 | lr_chain i_nephew_father_mother 65 | lr_chain i_nephew_father_uncle 66 | lr_chain i_nephew_nephew_husband 67 | lr_chain i_nephew_nephew_wife 68 | lr_chain i_nephew_uncle_son 69 | lr_chain i_nephew_wife_son 70 | lr_chain i_niece_brother_wife 71 | lr_chain i_niece_father_aunt 72 | lr_chain i_niece_father_father 73 | lr_chain i_niece_father_mother 74 | lr_chain i_niece_mother_nephew 75 | lr_chain i_niece_mother_niece 76 | lr_chain i_niece_niece_husband 77 | lr_chain i_niece_niece_wife 78 | lr_chain i_niece_sister_husband 79 | lr_chain i_niece_wife_brother 80 | lr_chain i_niece_wife_son 81 | lr_chain i_sister_brother_sister 82 | lr_chain i_sister_daughter_father 83 | lr_chain i_sister_daughter_mother 84 | lr_chain i_sister_father_wife 85 | lr_chain i_sister_husband_daughter 86 | lr_chain i_sister_mother_husband 87 | lr_chain i_sister_mother_nephew 88 | lr_chain i_sister_mother_niece 89 | lr_chain i_sister_mother_uncle 90 | lr_chain i_sister_mother_wife 91 | lr_chain i_sister_niece_aunt 92 | lr_chain i_sister_niece_uncle 93 | lr_chain i_sister_uncle_son 94 | lr_chain i_sister_wife_husband 95 | lr_chain i_son_brother_daughter 96 | lr_chain i_son_father_aunt 97 | lr_chain i_son_father_mother 98 | lr_chain i_son_father_uncle 99 | lr_chain i_son_nephew_brother 100 | lr_chain i_son_niece_son 101 | lr_chain i_son_son_husband 102 | lr_chain i_son_son_wife 103 | lr_chain i_son_uncle_son 104 | lr_chain i_son_wife_husband 105 | lr_chain i_uncle_brother_mother 106 | lr_chain i_uncle_father_father 107 | lr_chain i_uncle_husband_aunt 108 | lr_chain i_uncle_mother_husband 109 | lr_chain i_uncle_mother_uncle 110 | lr_chain i_uncle_niece_daughter 111 | lr_chain i_uncle_uncle_brother 112 | lr_chain i_uncle_uncle_sister 113 | lr_chain i_wife_aunt_nephew 114 | lr_chain i_wife_aunt_niece 115 | lr_chain i_wife_father_aunt 116 | lr_chain i_wife_father_mother 117 | lr_chain i_wife_father_nephew 118 | lr_chain i_wife_father_niece 119 | lr_chain i_wife_husband_sister 120 | lr_chain i_wife_mother_daughter 121 | lr_chain i_wife_mother_son 122 | lr_if i_aunt_aunt 123 | lr_if i_brother_brother 124 | lr_if i_daughter_daughter 125 | lr_if i_father_father 126 | lr_if i_husband_husband 127 | lr_if i_mother_mother 128 | lr_if i_nephew_nephew 129 | lr_if i_niece_niece 130 | lr_ifInv i_aunt_nephew 131 | lr_ifInv i_aunt_niece 132 | lr_ifInv i_brother_sister 133 | lr_ifInv i_daughter_father 134 | lr_ifInv i_daughter_mother 135 | lr_ifInv i_father_daughter 136 | lr_ifInv i_father_son 137 | lr_ifInv i_husband_wife 138 | lr_ifInv i_mother_daughter 139 | lr_ifInv i_mother_son 140 | lr_ifInv i_nephew_aunt 141 | lr_ifInv i_nephew_uncle 142 | lr_ifInv i_niece_aunt 143 | lr_ifInv i_niece_uncle 144 | lr_ifInv i_sister_brother 145 | lr_ifInv i_son_father 146 | lr_ifInv i_son_mother 147 | lr_ifInv i_uncle_nephew 148 | lr_ifInv i_uncle_niece 149 | lr_ifInv i_wife_husband 150 | lr_if i_sister_sister 151 | lr_if i_son_son 152 | lr_if i_uncle_uncle 153 | lr_if i_wife_wife 154 | rule lr_if 155 | rule lr_chain 156 | rule lr_ifInv 157 | -------------------------------------------------------------------------------- /datasets/fb15k-speed/expt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | 4 | from tensorlog import comline 5 | from tensorlog import dataset 6 | from tensorlog import declare 7 | from tensorlog import matrixdb 8 | from tensorlog import mutil 9 | from tensorlog import program 10 | from tensorlog import opfunutil 11 | from tensorlog import expt 12 | 13 | def setExptParams(): 14 | print('loading db....') 15 | db = comline.parseDBSpec("tmp-cache/fb15k.db|inputs/fb15k-valid.cfacts") 16 | print('loading program....') 17 | prog = comline.parseProgSpec("inputs/fb15k.ppr",db) 18 | print('loading queries....') 19 | queries = fbQueries(prog,db) 20 | modeSet = set(mode for (mode,_) in queries) 21 | return (db,prog,modeSet,queries) 22 | 23 | def compileAll(db,prog,modeSet,queries): 24 | start = time.time() 25 | k = 0 26 | for mode in modeSet: 27 | if prog.findPredDef(mode): 28 | k += 1 29 | fun = prog.compile(mode) 30 | fps = k/(time.time() - start) 31 | print("compiled",k,"of",len(modeSet),"functions at",fps,"fps") 32 | return fps 33 | 34 | def runNative(db,prog,modeSet,queries): 35 | dset = comline.parseDatasetSpec('tmp-cache/fb15k-valid.dset|inputs/fb15k-valid.examples',db) 36 | #dataset.Dataset.loadProPPRExamples(db,'inputs/fb15k-valid.examples') 37 | start = time.time() 38 | for mode in dset.modesToLearn(): 39 | if prog.findPredDef(mode): 40 | X = dset.getX(mode) 41 | fun = prog.function[(mode, 0)] 42 | fun.eval(db, [X], opfunutil.Scratchpad()) 43 | qps = len(queries)/(time.time() - start) 44 | print("answered",len(queries),"queries at",qps,"qps") 45 | return qps 46 | 47 | def runSequential(db,prog,modeSet,queries): 48 | start = time.time() 49 | k = 0 50 | for (mode,vx) in queries: 51 | fun = prog.function[(mode,0)] 52 | fun.eval(db, [vx], opfunutil.Scratchpad()) 53 | k += 1 54 | if not k%100: print("answered",k,"queries") 55 | qps = len(queries)/(time.time() - start) 56 | print("answered",len(queries),"queries at",qps,"qps") 57 | return qps 58 | 59 | def fbQueries(prog,db): 60 | queries = [] 61 | ignored = 0 62 | for line in open("inputs/fb15k-valid.examples"): 63 | k1 = line.find("(") 64 | k2 = line.find(",") 65 | pred = line[:k1] 66 | x = line[k1+1:k2] 67 | mode = declare.asMode("%s/io" % pred) 68 | if prog.findPredDef(mode): 69 | vx = db.onehot(x) 70 | queries.append((mode, vx)) 71 | else: 72 | ignored += 1 73 | print(len(queries), "queries loaded", "ignored", ignored) 74 | return queries 75 | 76 | 77 | def runMain(): 78 | (db,prog,modeSet,queries) = setExptParams() 79 | fps = compileAll(db,prog,modeSet,queries) 80 | qps1 = runSequential(db,prog,modeSet,queries) 81 | qps2 = runNative(db,prog,modeSet,queries) 82 | return (fps,qps1,qps2) 83 | 84 | def runCross(): 85 | (db,prog,modeSet,queries) = setExptParams() 86 | from tensorlog import xctargets 87 | CROSSCOMPILERS = [] 88 | CROSSLEARNERS = {} 89 | if xctargets.theano: 90 | from tensorlog import theanoxcomp 91 | for c in [ 92 | #theanoxcomp.DenseMatDenseMsgCrossCompiler, 93 | theanoxcomp.SparseMatDenseMsgCrossCompiler 94 | ]: 95 | CROSSCOMPILERS.append(c) 96 | CROSSLEARNERS[c]=theanoxcomp.FixedRateGDLearner 97 | if xctargets.tf: 98 | from tensorlog import tensorflowxcomp 99 | for c in [ 100 | #tensorflowxcomp.DenseMatDenseMsgCrossCompiler, 101 | tensorflowxcomp.SparseMatDenseMsgCrossCompiler, 102 | ]: 103 | CROSSCOMPILERS.append(c) 104 | CROSSLEARNERS[c]=tensorflowxcomp.FixedRateGDLearner 105 | results = {} 106 | for compilerClass in CROSSCOMPILERS: 107 | xc = compilerClass(prog) 108 | print(expt.fulltype(xc)) 109 | 110 | # compileAll 111 | start = time.time() 112 | k = 0 113 | # compile 114 | for mode in modeSet: 115 | if not prog.findPredDef(mode):continue 116 | k += 1 117 | xc.ensureCompiled(mode) 118 | fps = k / (time.time() - start) 119 | print("compiled",k,"of",len(modeSet),"functions at",fps,"fps") 120 | 121 | # runSequential 122 | start = time.time() 123 | k = 0 124 | for (mode,vx) in queries: 125 | xc.inferenceFunction(mode)(vx) 126 | k += 1 127 | if not k%100: print("answered",k,"queries") 128 | qps1 = len(queries) / (time.time() - start) 129 | print("answered",len(queries),"queries at",qps1,"qps") 130 | 131 | # runNative 132 | dset = comline.parseDatasetSpec('tmp-cache/fb15k-valid.dset|inputs/fb15k-valid.examples',db) 133 | start = time.time() 134 | for mode in dset.modesToLearn(): 135 | if not prog.findPredDef(mode):continue 136 | X = dset.getX(mode) 137 | xc.inferenceFunction(mode)(X) 138 | qps2 = len(queries) / (time.time() - start) 139 | print("answered",len(queries),"queries at",qps2,"qps") 140 | results[expt.fulltype(xc)] = (fps,qps1,qps2) 141 | return results 142 | 143 | if __name__ == "__main__": 144 | fps,qps1,qps2 = runMain() 145 | if "cross" in sys.argv[1:]: runCross() 146 | -------------------------------------------------------------------------------- /tensorlog/test-data/textcattoy3.cfacts: -------------------------------------------------------------------------------- 1 | # typed variant of the textcat problem 2 | 3 | # :- predict(doc,label) 4 | # :- hasWord(doc,word) 5 | 6 | hasWord dh a 7 | hasWord dh pricy 8 | hasWord dh doll 9 | hasWord dh house 10 | hasWord ft a 11 | hasWord ft little 12 | hasWord ft red 13 | hasWord ft fire 14 | hasWord ft truck 15 | hasWord rw a 16 | hasWord rw red 17 | hasWord rw wagon 18 | hasWord sc a 19 | hasWord sc pricy 20 | hasWord sc red 21 | hasWord sc sports 22 | hasWord sc car 23 | hasWord bk punk 24 | hasWord bk queen 25 | hasWord bk barbie 26 | hasWord bk and 27 | hasWord bk ken 28 | hasWord rb a 29 | hasWord rb little 30 | hasWord rb red 31 | hasWord rb bike 32 | hasWord mv a 33 | hasWord mv big 34 | hasWord mv 7-seater 35 | hasWord mv minivan 36 | hasWord mv with 37 | hasWord mv an 38 | hasWord mv automatic 39 | hasWord mv transmission 40 | hasWord hs a 41 | hasWord hs big 42 | hasWord hs house 43 | hasWord hs in 44 | hasWord hs the 45 | hasWord hs suburbs 46 | hasWord hs with 47 | hasWord hs crushing 48 | hasWord hs mortgage 49 | hasWord ji a 50 | hasWord ji job 51 | hasWord ji for 52 | hasWord ji life 53 | hasWord ji at 54 | hasWord ji IBM 55 | hasWord tf a 56 | hasWord tf huge 57 | hasWord tf pile 58 | hasWord tf of 59 | hasWord tf tax 60 | hasWord tf forms 61 | hasWord tf due 62 | hasWord tf yesterday 63 | hasWord jm huge 64 | hasWord jm pile 65 | hasWord jm of 66 | hasWord jm junk 67 | hasWord jm mail 68 | hasWord jm bills 69 | hasWord jm and 70 | hasWord jm catalogs 71 | hasWord pb a 72 | hasWord pb pricy 73 | hasWord pb barbie 74 | hasWord pb doll 75 | hasWord yc a 76 | hasWord yc little 77 | hasWord yc yellow 78 | hasWord yc toy 79 | hasWord yc car 80 | hasWord rb2 a 81 | hasWord rb2 red 82 | hasWord rb2 ten 83 | hasWord rb2 speed 84 | hasWord rb2 bike 85 | hasWord rp a 86 | hasWord rp red 87 | hasWord rp convertible 88 | hasWord rp porshe 89 | hasWord bp a 90 | hasWord bp big 91 | hasWord bp pile 92 | hasWord bp of 93 | hasWord bp paperwork 94 | hasWord he a 95 | hasWord he huge 96 | hasWord he backlog 97 | hasWord he of 98 | hasWord he email 99 | hasWord wt a 100 | hasWord wt life 101 | hasWord wt of 102 | hasWord wt woe 103 | hasWord wt and 104 | hasWord wt trouble 105 | 106 | # :- posPair(word,labelWordPair) 107 | 108 | posPair ten ten_pos 109 | posPair 7-seater 7-seater_pos 110 | posPair IBM IBM_pos 111 | posPair a a_pos 112 | posPair an an_pos 113 | posPair and and_pos 114 | posPair at at_pos 115 | posPair automatic automatic_pos 116 | posPair backlog backlog_pos 117 | posPair barbie barbie_pos 118 | posPair big big_pos 119 | posPair bike bike_pos 120 | posPair bills bills_pos 121 | posPair car car_pos 122 | posPair catalogs catalogs_pos 123 | posPair convertible convertible_pos 124 | posPair crushing crushing_pos 125 | posPair doll doll_pos 126 | posPair due due_pos 127 | posPair email email_pos 128 | posPair fire fire_pos 129 | posPair for for_pos 130 | posPair forms forms_pos 131 | posPair house house_pos 132 | posPair huge huge_pos 133 | posPair in in_pos 134 | posPair job job_pos 135 | posPair junk junk_pos 136 | posPair ken ken_pos 137 | posPair life life_pos 138 | posPair little little_pos 139 | posPair mail mail_pos 140 | posPair minivan minivan_pos 141 | posPair mortgage mortgage_pos 142 | posPair of of_pos 143 | posPair paperwork paperwork_pos 144 | posPair pile pile_pos 145 | posPair porshe porshe_pos 146 | posPair pricy pricy_pos 147 | posPair punk punk_pos 148 | posPair queen queen_pos 149 | posPair red red_pos 150 | posPair speed speed_pos 151 | posPair sports sports_pos 152 | posPair suburbs suburbs_pos 153 | posPair tax tax_pos 154 | posPair the the_pos 155 | posPair toy toy_pos 156 | posPair transmission transmission_pos 157 | posPair trouble trouble_pos 158 | posPair truck truck_pos 159 | posPair wagon wagon_pos 160 | posPair with with_pos 161 | posPair woe woe_pos 162 | posPair yellow yellow_pos 163 | posPair yesterday yesterday_pos 164 | 165 | # :- negPair(word,labelWordPair) 166 | 167 | negPair ten ten_neg 168 | negPair 7-seater 7-seater_neg 169 | negPair IBM IBM_neg 170 | negPair a a_neg 171 | negPair an an_neg 172 | negPair and and_neg 173 | negPair at at_neg 174 | negPair automatic automatic_neg 175 | negPair backlog backlog_neg 176 | negPair barbie barbie_neg 177 | negPair big big_neg 178 | negPair bike bike_neg 179 | negPair bills bills_neg 180 | negPair car car_neg 181 | negPair catalogs catalogs_neg 182 | negPair convertible convertible_neg 183 | negPair crushing crushing_neg 184 | negPair doll doll_neg 185 | negPair due due_neg 186 | negPair email email_neg 187 | negPair fire fire_neg 188 | negPair for for_neg 189 | negPair forms forms_neg 190 | negPair house house_neg 191 | negPair huge huge_neg 192 | negPair in in_neg 193 | negPair job job_neg 194 | negPair junk junk_neg 195 | negPair ken ken_neg 196 | negPair life life_neg 197 | negPair little little_neg 198 | negPair mail mail_neg 199 | negPair minivan minivan_neg 200 | negPair mortgage mortgage_neg 201 | negPair of of_neg 202 | negPair paperwork paperwork_neg 203 | negPair pile pile_neg 204 | negPair porshe porshe_neg 205 | negPair pricy pricy_neg 206 | negPair punk punk_neg 207 | negPair queen queen_neg 208 | negPair red red_neg 209 | negPair speed speed_neg 210 | negPair sports sports_neg 211 | negPair suburbs suburbs_neg 212 | negPair tax tax_neg 213 | negPair the the_neg 214 | negPair toy toy_neg 215 | negPair transmission transmission_neg 216 | negPair trouble trouble_neg 217 | negPair truck truck_neg 218 | negPair wagon wagon_neg 219 | negPair with with_neg 220 | negPair woe woe_neg 221 | negPair yellow yellow_neg 222 | negPair yesterday yesterday_neg 223 | 224 | # :- label(label) 225 | 226 | label pos 227 | label neg 228 | 229 | 230 | -------------------------------------------------------------------------------- /datasets/grid/bigexpt.py: -------------------------------------------------------------------------------- 1 | # code for running scalability experiments in JAIR submission 2 | 3 | import sys 4 | import numpy as NP 5 | import random 6 | import math 7 | import time 8 | import scipy 9 | 10 | from tensorlog import comline 11 | from tensorlog import dataset 12 | from tensorlog import declare 13 | from tensorlog import expt 14 | from tensorlog import funs 15 | from tensorlog import interp 16 | from tensorlog import learn 17 | from tensorlog import matrixdb 18 | from tensorlog import ops 19 | from tensorlog import plearn 20 | from tensorlog import program 21 | from tensorlog import simple 22 | 23 | EDGE_WEIGHT = 0.2 24 | SUBGRID = 10 25 | 26 | def nodeName(i,j): 27 | return '%d,%d' % (i,j) 28 | 29 | def generateGrid(n,outf): 30 | fp = open(outf,'w') 31 | for i in range(1,n+1): 32 | for j in range(1,n+1): 33 | for di in [-1,0,+1]: 34 | for dj in [-1,0,+1]: 35 | if (1 <= i+di <= n) and (1 <= j+dj <= n): 36 | fp.write('edge\t%s\t%s\t%f\n' % (nodeName(i,j),nodeName(i+di,j+dj),EDGE_WEIGHT)) 37 | 38 | def generateData(n,trainFile,testFile): 39 | fpTrain = open(trainFile,'w') 40 | fpTest = open(testFile,'w') 41 | r = random.Random() 42 | for i in range(1,n+1): 43 | for j in range(1,n+1): 44 | #target - note early version used i,j < n/2 which is a bug 45 | ti = (i/SUBGRID)*SUBGRID + SUBGRID/2 46 | tj = (j/SUBGRID)*SUBGRID + SUBGRID/2 47 | x = nodeName(i,j) 48 | y = nodeName(ti,tj) 49 | fp = fpTrain if r.random()<0.67 else fpTest 50 | fp.write('\t'.join(['path',x,y]) + '\n') 51 | 52 | # parse command line args 53 | def getargs(): 54 | goal = 'acc' 55 | if len(sys.argv)>1: 56 | goal = sys.argv[1] 57 | n = 6 58 | if len(sys.argv)>2: 59 | n = int(sys.argv[2]) 60 | maxD = round(n/2.0) 61 | if len(sys.argv)>3: 62 | maxD = int(sys.argv[3]) 63 | epochs = 30 64 | if len(sys.argv)>4: 65 | epochs = int(sys.argv[4]) 66 | return (goal,n,maxD,epochs) 67 | 68 | # generate all inputs for an accuracy (or timing) experiment 69 | def genInputs(n): 70 | #generate grid 71 | stem = 'inputs/g%d' % n 72 | 73 | factFile = stem+'.cfacts' 74 | trainFile = stem+'-train.exam' 75 | testFile = stem+'-test.exam' 76 | 77 | generateGrid(n,factFile) 78 | generateData(n,trainFile,testFile) 79 | return (factFile,trainFile,testFile) 80 | 81 | # run timing experiment 82 | def timingExpt(prog,maxD,trainFile,minibatch): 83 | times = [] 84 | print('depth',maxD,'minibatch',minibatch) 85 | ti = interp.Interp(prog) 86 | ti.prog.maxDepth = maxD 87 | tlog = simple.Compiler(db=prog.db,prog=prog) 88 | dset = tlog.load_dataset(trainFile) 89 | if minibatch: 90 | batchSize = minibatch 91 | quitAfter = 1 92 | else: 93 | batchSize = 1 94 | quitAfter = 25 95 | start = time.time() 96 | for k,(mode,(X0,Y0)) in enumerate(tlog.minibatches(dset,batch_size=batchSize)): 97 | print('batch',k) 98 | X = scipy.sparse.csr_matrix(X0) 99 | Y = scipy.sparse.csr_matrix(Y0) 100 | ti.prog.eval(declare.asMode(mode), [X]) 101 | if k>=quitAfter: 102 | break 103 | elapsed = time.time() - start 104 | print(k*batchSize,'examples','miniBatchSize',batchSize,'time',elapsed,'qps',k*batchSize/elapsed) 105 | return elapsed 106 | 107 | # run accuracy experiment 108 | def accExpt(prog,trainFile,testFile,n,maxD,epochs): 109 | print('grid-acc-expt: %d x %d grid, %d epochs, maxPath %d' % (n,n,epochs,maxD)) 110 | trainData = dataset.Dataset.loadExamples(prog.db,trainFile) 111 | testData = dataset.Dataset.loadExamples(prog.db,testFile) 112 | prog.db.markAsParameter('edge',2) 113 | prog.maxDepth = maxD 114 | # 20 epochs and rate=0.01 is ok for grid size 16 depth 10 115 | # then it gets sort of chancy 116 | #learner = learn.FixedRateGDLearner(prog,epochs=epochs,epochTracer=learn.EpochTracer.cheap) 117 | learner = learn.FixedRateGDLearner(prog,epochs=epochs,epochTracer=learn.EpochTracer.cheap,rate=0.005) 118 | plearner = plearn.ParallelFixedRateGDLearner( 119 | prog, 120 | epochs=epochs, 121 | parallel=40, 122 | miniBatchSize=BATCHSIZE, 123 | regularizer=learn.L2Regularizer(), 124 | epochTracer=learn.EpochTracer.cheap, 125 | rate=0.01) 126 | params = {'prog':prog, 127 | 'trainData':trainData, 'testData':testData, 128 | 'savedTestPredictions':'tmp-cache/test.solutions.txt', 129 | 'savedTestExamples':'tmp-cache/test.examples', 130 | 'learner':learner, 131 | } 132 | NP.seterr(divide='raise') 133 | t0 = time.time() 134 | result = expt.Expt(params).run() 135 | print('elapsed time',time.time()-t0) 136 | return result 137 | 138 | def runMain(): 139 | 140 | # usage: acc [grid-size] [maxDepth] [epochs]" 141 | # time [grid-size] [maxDepth] [no-minibatch]" 142 | (goal,n,maxD,epochsOrMinibatch) = getargs() 143 | print('args',(goal,n,maxD,epochsOrMinibatch)) 144 | (factFile,trainFile,testFile) = genInputs(n) 145 | 146 | db = matrixdb.MatrixDB.loadFile(factFile) 147 | prog = program.Program.loadRules("grid.ppr",db) 148 | 149 | if goal=='time': 150 | print(timingExpt(prog,maxD,trainFile,epochsOrMinibatch)) 151 | elif goal=='acc': 152 | print(accExpt(prog,trainFile,testFile,n,maxD,epochsOrMinibatch)) 153 | print('prog.maxDepth',prog.maxDepth) 154 | else: 155 | assert False,'bad goal %s' % goal 156 | 157 | if __name__=="__main__": 158 | runMain() 159 | --------------------------------------------------------------------------------