├── maint ├── __init__.py ├── remove_blacklisted.py ├── update_best_raw.py ├── update_sentences.py ├── assign_scores_pt.py ├── set_visible.py ├── conceptnet_fixes │ ├── 003_bedume_is_silly.py │ ├── 002_are_for.py │ ├── 001_is_like.py │ ├── 004_bedume_is_still_silly.py │ └── 000_is_for.py ├── fix_stray_spaces.py ├── fix_concept_counts.py ├── undo_globalmind.py ├── nerf_a_user.py ├── fix_stray_spaces2.py ├── update_scores.py ├── fix_people_person.py ├── ratings_to_votes_to_events.py ├── fix_abnormal_concepts.py ├── fix_dup_frames.py ├── check_best_frame.py ├── generalize_dependencies.py ├── count_surfaceforms.py ├── count_assertions.py ├── simple_update_rawassertion_assertion_fkey.py ├── import_conceptnet_zh.py ├── update_rawassertion_assertion_fkey.py ├── fix_raw_duplicates.py ├── extract_concepts.py ├── reconcile_assertions.py ├── compare_sentences.py └── dump_csv.py ├── tools ├── __init__.py ├── make_sqlite.sh ├── make_sqlite.py ├── stats.py ├── load_autocorrector.py ├── cnet_rdf.py ├── dump_to_sqlite.py ├── create_placeholder_users.py └── cnet_n3.py ├── serialize ├── __init__.py └── pyyaml.py ├── conceptnet ├── corpus │ ├── parse │ │ ├── admin.py │ │ ├── __init__.py │ │ ├── to-be-fixed.txt │ │ ├── patterns.pcfg │ │ ├── offline_parser.py │ │ ├── adverbs.py │ │ ├── migrate_templated_qs4e.py │ │ ├── models.py │ │ ├── try_patterns.py │ │ ├── migrate_templated.py │ │ ├── build.py │ │ └── run_parser.py │ ├── migrations │ │ ├── __init__.py │ │ └── 0002_rename_tables.py │ ├── views.py │ ├── __init__.py │ └── admin.py ├── lib │ ├── events │ │ ├── __init__.py │ │ ├── migrations │ │ │ ├── __init__.py │ │ │ └── 0001_initial.py │ │ └── models.py │ ├── voting │ │ ├── templatetags │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── admin.py │ │ ├── models.py │ │ └── views.py │ └── __init__.py ├── migrations │ ├── __init__.py │ └── 0002_rename_tables.py ├── webapi │ ├── __init__.py │ ├── templatetags │ │ ├── __init__.py │ │ └── rst.py │ ├── api.wsgi │ ├── docs.py │ ├── templates │ │ └── documentation.txt │ └── urls.py ├── concepttools │ ├── __init__.py │ ├── models.py │ ├── test.txt │ ├── lightning.txt │ ├── context │ │ ├── pink.txt │ │ ├── red.txt │ │ ├── black.txt │ │ ├── grey.txt │ │ ├── purple.txt │ │ ├── brown.txt │ │ ├── yellow.txt │ │ ├── white.txt │ │ ├── blue.txt │ │ ├── orange.txt │ │ └── green.txt │ ├── urls.py │ ├── ocean.txt │ ├── test.txt.html │ ├── amsterdam.txt │ ├── lightning.txt.html │ ├── testwords.html │ └── ConceptNetGUI.py ├── pseudo_auth │ ├── __init__.py │ ├── models.py │ └── backends.py ├── __init__.py ├── analogyspace.py ├── django_settings │ ├── default_db_config.py │ ├── db_downloader.py │ └── __init__.py ├── admin.py ├── network.py └── analogyspace2.py ├── setup.cfg ├── models.pdf ├── doc ├── source │ ├── others.rst │ ├── corpus.rst │ ├── _static │ │ └── graph │ │ │ ├── corpus.pdf │ │ │ ├── others.pdf │ │ │ ├── conceptnet4.pdf │ │ │ ├── conceptnet4.png │ │ │ ├── conceptnet_all.pdf │ │ │ ├── conceptnet_all.png │ │ │ └── others.dot │ ├── install.rst │ ├── index.rst │ └── conf.py ├── Makefile ├── bzr-howto.txt └── zero-to-conceptnet-on-xvm.txt ├── MANIFEST.in ├── test ├── test_normalize.py ├── test_users.py ├── test_analogyspace.py ├── test_conceptnet_queries.py ├── test_denormalized.py └── test_ja_harness.py ├── .gitignore ├── urls.py ├── README.rst ├── manage.py ├── setup.py └── conf └── db_config.py.orig /maint/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /serialize/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conceptnet/corpus/parse/admin.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conceptnet/lib/events/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conceptnet/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conceptnet/webapi/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conceptnet/concepttools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conceptnet/corpus/parse/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conceptnet/pseudo_auth/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conceptnet/corpus/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conceptnet/webapi/templatetags/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conceptnet/lib/events/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conceptnet/lib/voting/templatetags/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [egg_info] 2 | tag_svn_revision = 1 3 | -------------------------------------------------------------------------------- /conceptnet/corpus/views.py: -------------------------------------------------------------------------------- 1 | # Create your views here. 2 | -------------------------------------------------------------------------------- /conceptnet/lib/voting/__init__.py: -------------------------------------------------------------------------------- 1 | VERSION = (0, 1, None) -------------------------------------------------------------------------------- /models.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/models.pdf -------------------------------------------------------------------------------- /doc/source/others.rst: -------------------------------------------------------------------------------- 1 | .. _others: 2 | 3 | Other modules 4 | ============= 5 | 6 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include manage.py 2 | include tools 3 | include test 4 | include serialize 5 | -------------------------------------------------------------------------------- /tools/make_sqlite.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python make_sqlite.py $1 && python dump_to_sqlite.py $1 3 | -------------------------------------------------------------------------------- /conceptnet/concepttools/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | # Create your models here. 4 | -------------------------------------------------------------------------------- /doc/source/corpus.rst: -------------------------------------------------------------------------------- 1 | .. _corpus: 2 | 3 | The :mod:`corpus` module 4 | ======================== 5 | 6 | -------------------------------------------------------------------------------- /conceptnet/corpus/__init__.py: -------------------------------------------------------------------------------- 1 | __import__('os').environ.setdefault('DJANGO_SETTINGS_MODULE', 'conceptnet.django_settings') 2 | -------------------------------------------------------------------------------- /doc/source/_static/graph/corpus.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/doc/source/_static/graph/corpus.pdf -------------------------------------------------------------------------------- /doc/source/_static/graph/others.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/doc/source/_static/graph/others.pdf -------------------------------------------------------------------------------- /conceptnet/lib/voting/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from voting.models import Vote 3 | 4 | admin.site.register(Vote) 5 | -------------------------------------------------------------------------------- /doc/source/_static/graph/conceptnet4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/doc/source/_static/graph/conceptnet4.pdf -------------------------------------------------------------------------------- /doc/source/_static/graph/conceptnet4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/doc/source/_static/graph/conceptnet4.png -------------------------------------------------------------------------------- /conceptnet/__init__.py: -------------------------------------------------------------------------------- 1 | __import__('os').environ.setdefault('DJANGO_SETTINGS_MODULE', 'conceptnet.django_settings') 2 | import conceptnet.lib 3 | -------------------------------------------------------------------------------- /doc/source/_static/graph/conceptnet_all.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/doc/source/_static/graph/conceptnet_all.pdf -------------------------------------------------------------------------------- /doc/source/_static/graph/conceptnet_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/doc/source/_static/graph/conceptnet_all.png -------------------------------------------------------------------------------- /conceptnet/analogyspace.py: -------------------------------------------------------------------------------- 1 | raise ImportError("conceptnet.analogyspace is deprecated. See http://csc.media.mit.edu/docs/divisi2/tutorial_aspace.html for how to use Divisi2 to run AnalogySpace.") 2 | -------------------------------------------------------------------------------- /test/test_normalize.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet4.models import * 2 | def test_normalize(): 3 | assert en.nl.normalize('they are running') == 'run' 4 | assert en.nl.normalize('went') == 'go' 5 | -------------------------------------------------------------------------------- /conceptnet/corpus/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from conceptnet.corpus.models import Language, Sentence 3 | 4 | admin.site.register(Language) 5 | admin.site.register(Sentence) 6 | 7 | -------------------------------------------------------------------------------- /conceptnet/lib/__init__.py: -------------------------------------------------------------------------------- 1 | # Add this directory to the Python path. 2 | import sys, os.path 3 | _path = os.path.dirname(__file__) 4 | joinpath = os.path.join 5 | sys.path.insert(0, _path) 6 | 7 | __test__ = False 8 | -------------------------------------------------------------------------------- /test/test_users.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet4.models import * 2 | 3 | def test_users_do_not_explode(): 4 | a = RawAssertion.objects.filter(language=en)[0] 5 | a.sentence.creator 6 | a.sentence.creator.username 7 | -------------------------------------------------------------------------------- /maint/remove_blacklisted.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet.models import * 2 | 3 | for concept in Concept.objects.all(): 4 | if concept.language.nl.is_blacklisted(concept.text): 5 | concept.useful = False 6 | concept.save() 7 | -------------------------------------------------------------------------------- /conceptnet/concepttools/test.txt: -------------------------------------------------------------------------------- 1 | I am feeling happy today because the sun is shining. Also because I watched a 2 | movie last night which was hilariously bad, called "Giant Octopus Versus Mega 3 | Shark". Colorless green ideas sleep furiously. 4 | -------------------------------------------------------------------------------- /maint/update_best_raw.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet4.models import Sentence, Assertion, RawAssertion 2 | from csc.util import queryset_foreach 3 | 4 | queryset_foreach(Assertion.objects.all(), lambda a: a.update_raw_cache(), 5 | batch_size=100) 6 | 7 | -------------------------------------------------------------------------------- /maint/update_sentences.py: -------------------------------------------------------------------------------- 1 | from csc.util import queryset_foreach 2 | from csc.corpus.models import Sentence 3 | 4 | queryset_foreach(Sentence.objects.filter(id__lt=1367900).order_by('-id'), 5 | lambda x: x.update_consistency(), 6 | batch_size=100) 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | db_config.py 2 | db_password.py 3 | *.pyc 4 | *.pyd 5 | *.so 6 | build 7 | *.egg-info 8 | dist 9 | pip-log.txt 10 | .achievements 11 | .coverage 12 | .bzr 13 | .bzrignore 14 | *.train 15 | *.psql 16 | *.pickle.gz 17 | conceptnet_zh_*.txt 18 | db 19 | *~ 20 | -------------------------------------------------------------------------------- /doc/source/install.rst: -------------------------------------------------------------------------------- 1 | .. _install: 2 | 3 | How to install 4 | ============== 5 | If only we knew... 6 | 7 | Actually. If you're even seeing this document, you're probably in contact with 8 | someone in the Commonsense Computing group. Ask them, and they'll be able to 9 | tell you how to install ConceptNet. At least, the way it works this week. 10 | 11 | -------------------------------------------------------------------------------- /urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls.defaults import * 2 | from django.contrib import admin 3 | 4 | admin.autodiscover() 5 | 6 | urlpatterns = patterns('', 7 | # Web API (REST) 8 | (r'^api/', include('csc.webapi.urls')), 9 | (r'', include('csc.webapi.urls')), 10 | 11 | # # ConceptTools (realm) 12 | # (r'^api/', include('realm.urls')), 13 | ) 14 | -------------------------------------------------------------------------------- /conceptnet/webapi/templatetags/rst.py: -------------------------------------------------------------------------------- 1 | from django.template.defaultfilters import stringfilter 2 | from django import template 3 | 4 | register = template.Library() 5 | 6 | @stringfilter 7 | def indent(value, spaces): 8 | indentation = ' '*int(spaces) 9 | return '\n'.join(indentation+line for line in value.split('\n')).strip() 10 | register.filter('indent', indent) -------------------------------------------------------------------------------- /conceptnet/pseudo_auth/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | class LegacyUser(models.Model): 4 | username = models.CharField(max_length=30) 5 | password = models.CharField(max_length=128) 6 | salt = models.CharField(max_length=128,null=True) 7 | 8 | def __unicode__(self): 9 | return self.username 10 | class Meta: 11 | db_table = 'auth_user' 12 | -------------------------------------------------------------------------------- /conceptnet/corpus/parse/to-be-fixed.txt: -------------------------------------------------------------------------------- 1 | Things to fix: 2 | * "children" stems to "childran" 3 | * is {P} makes the {P} show up as a frequency 4 | * "of", "near"/"around" special cases for AtLocation 5 | 6 | Steps that still need to be done: 7 | * count frequencies for concepts 8 | * Blacklist 9 | * When you {1} you do the following: 1. {2} 10 | * merge frequencies into a few classes 11 | 12 | -------------------------------------------------------------------------------- /conceptnet/concepttools/lightning.txt: -------------------------------------------------------------------------------- 1 | Standing on a building I am a lightning rod 2 | And all these clouds are so familiar 3 | Descending from the mountain tops the gods are threatening. 4 | I will return an honest soldier 5 | 6 | Steady on this high rise like every lightning rod 7 | And all these clouds are boiling over 8 | Swimming in adrenaline the sky is caving in 9 | but I will remain the honest soldier. 10 | -------------------------------------------------------------------------------- /test/test_analogyspace.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | from csc.conceptnet4.analogyspace import * 3 | 4 | def test_basic_analogyspace(): 5 | mat = conceptnet_2d_from_db('en', cutoff=15) 6 | item = mat.iteritems().next() 7 | key, value = item 8 | concept1, feature = key 9 | filled_side, relation, concept2 = feature 10 | assert filled_side in ['left', 'right'] 11 | assert relation[0] == relation[0].upper() 12 | 13 | -------------------------------------------------------------------------------- /conceptnet/concepttools/context/pink.txt: -------------------------------------------------------------------------------- 1 | admiration, affection, appreciation, bisexuality, calming, caring, delicacy, emotional healing, emotional maturity, ethereal, femininity, friendship, good will, gratitude, happiness, health, homosexuality, joy, june, kindness, love, lust, marriage, nurturing, passivity, peace, romance, sex, spring, sweet smelling, sweet tasting, sweetness, sympathy, truth, 2 | 3 | bunny, cosmetics, dress, elephant, flamingo, flower, makeup, pink panther, rose, tulip -------------------------------------------------------------------------------- /conceptnet/django_settings/default_db_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Don't use a "dot" directory on Windows. It might make Windows sad. 4 | if os.name == 'nt': 5 | user_data_dir = os.path.expanduser('~/conceptnet/') 6 | else: 7 | user_data_dir = os.path.expanduser('~/.conceptnet/') 8 | 9 | DB_ENGINE = "sqlite3" 10 | DB_NAME = user_data_dir + "ConceptNet.db" 11 | DB_HOST = "" 12 | DB_PORT = "" 13 | DB_USER = "" 14 | DB_PASSWORD = "" 15 | DB_SCHEMAS = "" 16 | 17 | DEBUG = True 18 | SERVE_API = True 19 | -------------------------------------------------------------------------------- /maint/assign_scores_pt.py: -------------------------------------------------------------------------------- 1 | from csc.util import queryset_foreach 2 | from csc.conceptnet4.models import Sentence, Assertion, RawAssertion, Language, Vote 3 | 4 | pt = Language.get('pt') 5 | def process(raw): 6 | if pt.nl.is_blacklisted(raw.surface1.text) or pt.nl.is_blacklisted(raw.surface2.text): 7 | raw.votes.delete() 8 | else: 9 | Vote.objects.record_vote(raw, raw.sentence.creator, 1) 10 | 11 | queryset_foreach(RawAssertion.objects.filter(language=pt), process, batch_size=100) 12 | 13 | -------------------------------------------------------------------------------- /maint/set_visible.py: -------------------------------------------------------------------------------- 1 | from csc.util import queryset_foreach 2 | from csc.conceptnet.models import Concept, Language 3 | 4 | def set_visible(concept): 5 | if not concept.language.nl.is_blacklisted(concept.text): 6 | concept.visible=True 7 | concept.save() 8 | 9 | def set_invisible(concept): 10 | if concept.language.nl.is_blacklisted(concept.text): 11 | concept.visible=False 12 | concept.save() 13 | 14 | queryset_foreach(Concept.objects.filter(visible=False), set_visible) 15 | 16 | -------------------------------------------------------------------------------- /maint/conceptnet_fixes/003_bedume_is_silly.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet.models import * 2 | from csc.util import foreach 3 | 4 | bedume = User.objects.get(username='bedume') 5 | activity = Activity.objects.get(name='administrative fiat') 6 | braw = [r for r in bedume.vote_set.all() if isinstance(r.object, RawAssertion)] 7 | for b in braw: 8 | if b.object.assertion.relation.name == 'HasProperty': 9 | print b.object 10 | b.object.set_rating(bedume, 0, activity) 11 | b.object.assertion.set_rating(bedume, 0, activity) 12 | 13 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ConceptNet 2 | ========== 3 | 4 | ConceptNet aims to give computers access to common-sense knowledge, the kind of information that ordinary 5 | people know but usually leave unstated. 6 | 7 | The new version of ConceptNet, **ConceptNet 5**, is now maintained in a separate repository: 8 | 9 | http://github.com/commonsense/conceptnet5/ 10 | 11 | See http://conceptnet5.media.mit.edu for more information. 12 | 13 | If you're interested in ConceptNet, please join the conceptnet-users Google group: 14 | http://groups.google.com/group/conceptnet-users?hl=en 15 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from django.core.management import execute_manager 3 | try: 4 | from csc import django_settings as settings 5 | except ImportError: 6 | import sys 7 | sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__) 8 | sys.exit(1) 9 | 10 | if __name__ == "__main__": 11 | execute_manager(settings) 12 | -------------------------------------------------------------------------------- /tools/make_sqlite.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | db_name = sys.argv[1] 4 | 5 | from django.conf import settings 6 | settings.configure( 7 | DATABASE_ENGINE = 'sqlite3', 8 | DATABASE_NAME = db_name, 9 | INSTALLED_APPS=( 10 | 'django.contrib.auth', 11 | 'django.contrib.contenttypes', 12 | 'conceptnet.corpus', 13 | 'conceptnet', 14 | 'simplenlp', 15 | 'voting', 16 | 'events', 17 | 'south')) 18 | 19 | from django.core.management import call_command 20 | call_command('syncdb') 21 | call_command('migrate') 22 | 23 | -------------------------------------------------------------------------------- /maint/fix_stray_spaces.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet.models import * 2 | from csc.util import foreach 3 | 4 | def fix_spaces(s): 5 | if (s.surface1.text.startswith(' ') or s.surface2.text.startswith(' ')): 6 | print s 7 | newsurf1 = SurfaceForm.get(s.surface1.text.strip(), s.language, 8 | auto_create=True) 9 | newsurf2 = SurfaceForm.get(s.surface2.text.strip(), s.language, 10 | auto_create=True) 11 | print "=>", 12 | print s.correct_assertion(s.frame, newsurf1, newsurf2) 13 | s.save() 14 | 15 | foreach(RawAssertion.objects.filter(language__id='zh-Hant'), fix_spaces) 16 | 17 | -------------------------------------------------------------------------------- /maint/fix_concept_counts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Concepts keep track of their number of words. Or, they should. 5 | ''' 6 | 7 | from csc.util.batch import queryset_foreach 8 | from csc.conceptnet4.models import Concept 9 | from django.db.models.query import Q 10 | 11 | def fix_concept_counts(): 12 | def fix_concept(concept): 13 | if concept.words: return 14 | concept.words = len(concept.text.split()) 15 | concept.save() 16 | 17 | return queryset_foreach( 18 | Concept.objects.filter(Q(words=0) | Q(words__isnull=True)), fix_concept) 19 | 20 | if __name__ == '__main__': 21 | fix_concept_counts() 22 | -------------------------------------------------------------------------------- /maint/undo_globalmind.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet4.models import * 2 | from events.models import Event, Activity 3 | from voting.models import Vote 4 | from csc.util import queryset_foreach 5 | 6 | def nuke_it(event): 7 | object = event.object 8 | if object is None: return 9 | for vote in object.votes.all(): 10 | vote.delete() 11 | object.delete() 12 | 13 | #queryset_foreach(Event.objects.filter(content_type__id=92, activity__id=41), 14 | #nuke_it, 50) 15 | queryset_foreach(Event.objects.filter(content_type__id=90, activity__id=41), 16 | nuke_it, 50) 17 | queryset_foreach(Event.objects.filter(content_type__id=20, activity__id=41), 18 | nuke_it, 50) 19 | 20 | -------------------------------------------------------------------------------- /maint/nerf_a_user.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet4.models import * 2 | from django.db import transaction 3 | 4 | def nerf(user): 5 | for vote in Vote.objects.filter(user=user): 6 | badass = vote.object 7 | vote.delete() 8 | badass.update_score() 9 | print badass 10 | 11 | @transaction.commit_on_success 12 | def nerf_bobman(): 13 | bobman = User.objects.get(username='bobMan') 14 | crap = bobman.rawassertion_set.all()[0] 15 | lusers = [vote.user for vote in crap.votes.all() if vote.vote == 1] 16 | 17 | for luser in lusers: 18 | print 19 | print luser 20 | nerf(luser) 21 | 22 | if __name__ == '__main__': nerf_bobman() -------------------------------------------------------------------------------- /conceptnet/concepttools/context/red.txt: -------------------------------------------------------------------------------- 1 | aggression, ambition, anger, arrogance, attention, autumn, blood, bravery, career goals, cheerfulness, christmas, communism, courage, danger, debt, december, desire, determination, devil, drama, driving forces, dynamic, emergency, emotional intensity, energy, eroticism, excitement, fame, fast action, fire, force, gaudiness, gemini, generosity, good fortune, good-tasting, happiness, heat, intense passion, leadership, love, lust, mars, masculinity, passion, power, provoking, radicalism, rage, respect, revolution, risk, romance, sex, shame, socialism, speed, stimulating, stop, strength, summer, survival, urgency, vibrancy, vigor, violence, war, warmth, heart, kill, bleed 2 | -------------------------------------------------------------------------------- /maint/conceptnet_fixes/002_are_for.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet.models import * 2 | from csc.util import foreach 3 | 4 | target_frame = Frame.objects.get(language=en, relation__name='UsedFor', text='{1} is for {2}') 5 | 6 | def queryset(): 7 | frame = Frame.objects.get(text='{1} are {2}', language=en, relation__name='IsA') 8 | got = RawAssertion.objects.filter(language=en, frame=frame) 9 | return got 10 | 11 | def fix(s): 12 | if s.surface2.text.startswith('for '): 13 | print s 14 | newsurf = SurfaceForm.get(s.surface2.text[4:], 'en', auto_create=True) 15 | print "=>", 16 | print s.correct_assertion(target_frame, s.surface1, newsurf) 17 | 18 | foreach(queryset(), fix) 19 | 20 | -------------------------------------------------------------------------------- /maint/fix_stray_spaces2.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet.models import * 2 | from csc.util import foreach 3 | 4 | def fix_spaces(s): 5 | if (s.surface1.text.startswith(' ') or s.surface2.text.startswith(' ')): 6 | print s 7 | newsurf1 = SurfaceForm.get(s.surface1.text.strip(), s.language, 8 | auto_create=True) 9 | newsurf2 = SurfaceForm.get(s.surface2.text.strip(), s.language, 10 | auto_create=True) 11 | #print s.correct_assertion(s.frame, newsurf1, newsurf2) 12 | s.surface1=newsurf1 13 | s.surface2=newsurf2 14 | s.save() 15 | print "=>", 16 | print s 17 | 18 | foreach(RawAssertion.objects.filter(language__id='zh-Hant'), fix_spaces) 19 | 20 | -------------------------------------------------------------------------------- /maint/update_scores.py: -------------------------------------------------------------------------------- 1 | from csc_utils.batch import queryset_foreach 2 | from conceptnet.models import Sentence, Assertion, RawAssertion 3 | 4 | 5 | def update_scores(): 6 | queryset_foreach(Assertion, lambda x: x.update_score(), 7 | batch_size=100) 8 | queryset_foreach(RawAssertion, lambda x: x.update_score(), 9 | batch_size=100) 10 | # queryset_foreach(Sentence.objects.exclude(language__id='en'), lambda x: x.update_score(), batch_size=100) 11 | 12 | def fix_raw_assertion_vote(raw): 13 | for vote in raw.votes.all(): 14 | raw.assertion.set_rating(vote.user, vote.vote) 15 | 16 | def update_votes(): 17 | queryset_foreach(RawAssertion, lambda x: fix_raw_assertion_vote(x), batch_size=100) 18 | 19 | -------------------------------------------------------------------------------- /maint/fix_people_person.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet4.models import RawAssertion, Concept, Assertion,\ 2 | SurfaceForm 3 | from django.db import transaction 4 | 5 | people = Concept.get('people', 'en') 6 | person = Concept.get('person', 'en') 7 | 8 | @transaction.commit_on_success 9 | def fix_all(): 10 | for peopleform in people.surfaceform_set.all(): 11 | print peopleform 12 | peopleform.concept = person 13 | peopleform.save() 14 | for raw in RawAssertion.objects.filter(surface1=peopleform): 15 | print raw.update_assertion() 16 | for raw in RawAssertion.objects.filter(surface2=peopleform): 17 | print raw.update_assertion() 18 | 19 | if __name__ == '__main__': fix_all() 20 | 21 | -------------------------------------------------------------------------------- /maint/conceptnet_fixes/001_is_like.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet.models import * 2 | from csc.util import foreach 3 | 4 | target_frame = Frame.objects.get(language=en, relation__name='ConceptuallyRelatedTo', text='{1} is like {2}') 5 | 6 | def queryset(): 7 | frame = Frame.objects.get(text='{1} is {2}', language=en, relation__name='HasProperty') 8 | got = RawAssertion.objects.filter(language=en, frame=frame) 9 | return got 10 | 11 | def fix(s): 12 | if s.surface2.text.startswith('like '): 13 | print s 14 | newsurf = SurfaceForm.get(s.surface2.text[4:], 'en', auto_create=True) 15 | print "=>", 16 | print s.correct_assertion(target_frame, s.surface1, newsurf) 17 | 18 | foreach(queryset(), fix) 19 | 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from setuptools import setup, find_packages 3 | 4 | packages = find_packages(exclude=['maint']) 5 | version_str = '4.0.1' 6 | 7 | setup( name='ConceptNet', 8 | version=version_str, 9 | description='A Python API to a Semantic Network Representation of the Open Mind Common Sense Project', 10 | author='Catherine Havasi, Robert Speer, Jason Alonso, and Kenneth Arnold', 11 | author_email='conceptnet@media.mit.edu', 12 | url='http://conceptnet.media.mit.edu/', 13 | packages=packages, 14 | include_package_data=False, 15 | install_requires=['csc-utils >= 0.6', 'django', 'simplenlp'], 16 | # Metadata 17 | license = "GPL v3", 18 | ) 19 | -------------------------------------------------------------------------------- /conceptnet/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from conceptnet.models import Frequency, Frame, RawAssertion, Concept,\ 3 | Assertion, Relation 4 | 5 | for model in (RawAssertion, Concept, Assertion, Relation): 6 | admin.site.register(model) 7 | 8 | class FrequencyAdmin(admin.ModelAdmin): 9 | list_display = ('language', 'text', 'value') 10 | list_filter = ('language',) 11 | admin.site.register(Frequency, FrequencyAdmin) 12 | 13 | class FrameAdmin(admin.ModelAdmin): 14 | list_display = ('id', 'language','relation','text','preferred') 15 | list_filter = ('language','relation') 16 | list_per_page = 100 17 | fields = ('relation', 'text', 'language', 'goodness', 'frequency') 18 | admin.site.register(Frame, FrameAdmin) 19 | -------------------------------------------------------------------------------- /maint/ratings_to_votes_to_events.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | import settings 4 | from util import queryset_foreach 5 | from events.models import Event 6 | from voting.models import Vote 7 | from datetime import datetime 8 | from conceptnet4.models import Rating 9 | 10 | def rating_to_vote(r): 11 | obj = r.sentence or r.raw_assertion or r.assertion 12 | score = 0 13 | if r.score > 0: score=1 14 | if r.score < 0: score=-1 15 | Vote.objects.record_vote(obj, r.user, score) 16 | ev = Event.record_event(obj, r.user, r.activity) 17 | ev.timestamp = r.updated 18 | ev.save() 19 | 20 | def progress_callback(num, den): 21 | print num, '/', den 22 | 23 | queryset_foreach(Rating.objects.all(), rating_to_vote) 24 | 25 | -------------------------------------------------------------------------------- /conf/db_config.py.orig: -------------------------------------------------------------------------------- 1 | try: 2 | # You may create a db_password.py file in your project with a PASSWORD variable. 3 | # This lets you keep sensitive information out of your source code repositories 4 | # and your installation directories. 5 | from db_password import PASSWORD 6 | except: 7 | PASSWORD = "" 8 | 9 | DB_ENGINE = "postgresql_psycopg2" # 'postgresql', 'postgresql_psycopg2', 'mysql', 'sqlite3' or 'ado_mssql'. 10 | DB_NAME = "" # Or path to database file if using sqlite3. 11 | DB_HOST = "" # Not used with sqlite3. 12 | DB_PORT = "5432" # Not used with sqlite3. 13 | DB_USER = "" # Set to empty string for localhost. Not used with sqlite3. 14 | DB_PASSWORD = PASSWORD # Set to empty string for default. Not used with sqlite3. 15 | DB_SCHEMAS = "public" 16 | -------------------------------------------------------------------------------- /tools/stats.py: -------------------------------------------------------------------------------- 1 | from conceptnet4.models import * 2 | from operator import itemgetter 3 | 4 | def relations_distribution(lang): 5 | return sorted( 6 | ((relation.name, relation.assertion_set.filter(language=lang).count()) 7 | for relation in Relation.objects.filter(description__isnull=False)), 8 | key=itemgetter(1)) 9 | 10 | def sample_assertions(relation, n=10): 11 | return [assertion.nl_repr() for assertion in 12 | Relation.get(relation).assertion_set 13 | .filter(score__gt=0).order_by('?')[:n]] 14 | 15 | def oldest_assertion(lang): 16 | return Assertion.objects.filter(language=lang).order_by('-rawassertion__created')[0] 17 | 18 | 19 | if __name__ == '__main__': 20 | print relations_distribution('en') 21 | 22 | 23 | -------------------------------------------------------------------------------- /conceptnet/webapi/api.wsgi: -------------------------------------------------------------------------------- 1 | # commons2.wsgi is configured to live in projects/commons2/deploy. 2 | 3 | import os 4 | import sys 5 | 6 | # redirect sys.stdout to sys.stderr for bad libraries like geopy that uses 7 | # print statements for optional import exceptions. 8 | sys.stdout = sys.stderr 9 | 10 | from os.path import abspath, dirname, join 11 | from site import addsitedir 12 | 13 | addsitedir('/srv/conceptnet/lib/python2.6/site-packages') 14 | addsitedir('/usr/lib/pymodules/python2.6') 15 | sys.path.insert(0, '/srv/conceptnet') 16 | sys.path.insert(0, '/srv/conceptnet/conceptnet') 17 | from django.conf import settings 18 | os.environ["DJANGO_SETTINGS_MODULE"] = "conceptnet.django_settings" 19 | 20 | from django.core.handlers.wsgi import WSGIHandler 21 | application = WSGIHandler() 22 | 23 | -------------------------------------------------------------------------------- /maint/fix_abnormal_concepts.py: -------------------------------------------------------------------------------- 1 | from csc.util import queryset_foreach 2 | from csc.conceptnet.models import Concept, SurfaceForm, Language, Assertion 3 | from django.db import connection 4 | 5 | en = Language.get('en') 6 | 7 | def fix_surface(surface): 8 | norm, residue = en.nl.lemma_split(surface.text) 9 | if norm != surface.concept.text: 10 | print 11 | print "surface:", surface.text.encode('utf-8') 12 | print "concept:", surface.concept.text.encode('utf-8') 13 | print "normal:", norm.encode('utf-8') 14 | surface.update(norm, residue) 15 | 16 | queryset_foreach(SurfaceForm.objects.filter(language=en), 17 | fix_surface, 18 | batch_size=100) 19 | 20 | 21 | # plan: 22 | # fix surface form -> concept mapping 23 | # remove obsolete concepts 24 | -------------------------------------------------------------------------------- /maint/fix_dup_frames.py: -------------------------------------------------------------------------------- 1 | from csc.util import queryset_foreach 2 | from csc.conceptnet4.models import Frame 3 | from django.db import connection 4 | def fix_dups(frame): 5 | dups = Frame.objects.filter(language=frame.language, text=frame.text, 6 | relation=frame.relation) 7 | for dup in dups: 8 | if dup.id == frame.id: 9 | continue 10 | print dup 11 | cursor = connection.cursor() 12 | print("UPDATE raw_assertions SET frame_id=%s WHERE frame_id=%s" % (frame.id, dup.id)) 13 | cursor.execute("UPDATE raw_assertions SET frame_id=%s WHERE frame_id=%s" % (frame.id, dup.id)) 14 | dup.delete() 15 | print 16 | 17 | queryset_foreach(Frame.objects.all().order_by('-goodness', 'id'), 18 | fix_dups, 19 | batch_size=100) 20 | 21 | -------------------------------------------------------------------------------- /conceptnet/concepttools/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls.defaults import * 2 | from csamoa.representation.presentation.models import Predicate 3 | 4 | urlpatterns = patterns('csamoa.realm.views', 5 | url(r'^concept/', 'get_stemid'), 6 | url(r'^concept/(?P\d+)/all', 'get_stem_allforms'), 7 | ) 8 | 9 | # URLs: 10 | # GET /concept/?text={text,...}&language={language} 11 | # -> gets concept id(s) for text(s) 12 | # GET /concept/{id}/canonical/ -> gets canonical form for concept 13 | # GET /concept/{id}/all/ -> gets all forms for concept 14 | # GET /concept/{id,...}/context -> gets context for the concept(s) 15 | 16 | 17 | # # Programmatically define the API 18 | # api = { 19 | # 'concept': { 20 | # '__required': { 21 | # 'language': TextField, 22 | # }, 23 | # 'id': Function(get_stemid, 24 | -------------------------------------------------------------------------------- /conceptnet/concepttools/context/black.txt: -------------------------------------------------------------------------------- 1 | anger, anonymity, anti-establishment, authority, bad luck, binding, classic, classy, conventionality, dark, death, depth, devil, dignity, dirt, dirtiness, drama, dramatic, elegance, emptiness, evil, fear, fear of the unknown, formality, gloomy, grief, high quality, january, modern music, modernism, modernity, mourning, mysterious, mystery, night, power, prestige, protection, rebellion, reliability, remorse, repelling negativity, reservedness, sadness, secretiveness, seduction, serious, seriousness, sex, sexuality, shapeshifting, sophistication, sorrow, space, strength, strength of character, strong, style, underground, unhappiness, wealth, wicked, 2 | 3 | bear, beetle, berry, boots, cape, cat, clay, goth, hole, ink, leather, oak, panther, pearl, raven, robes, screen, spider, suit, text, universe, void, person, deep, die, sleep 4 | -------------------------------------------------------------------------------- /conceptnet/concepttools/context/grey.txt: -------------------------------------------------------------------------------- 1 | ambivalence, anachronism, astral energies, balance, boredom, clairaudience, clairvoyance, communication, confusion, decay, decrepitude, depression, distinguishment, dreams, dullness, dust, elegance, fear, female power, formality, fright, glamous, goddess, graceful aging, high tech, humility, ill health, industrial, intuition, march, modernity, mourning, neutrality, ornate riches, pollution, psychometry, respect, reverence, sleekness, stability, strong emotions, subtlety, telepathy, urban sprawl, wisdom, 2 | 3 | alien, ash, cardboard, castle, confederate, concrete, battleship, beard, brain, cloud, chrome, computer, dolphin, duality, dust, engine, fighter plane, fish, hair, garbage can, goblin, grave, gui, metal, motor, mountain, mouse, office, pigeon, screw, rabbit, rock, seal, silver, sky, steel, stone, suit, wallflower, wehrmacht, wolf -------------------------------------------------------------------------------- /conceptnet/concepttools/context/purple.txt: -------------------------------------------------------------------------------- 1 | ambition, arrogance, big profits, ceremony, compassion, conceit, confusion, contemplation, creativity, cruelty, deeper truth, deja vu, delicacy, devotion, dignity, dreams, ego, elegance, empathy, energy, enlightenment, envy, exaggeration, extravagance, fame, fantasy, flamboyance, gaudiness, hidden knowledge, high aspirations, homosexuality, imagination, independence, influence, intuition, invisible, lesbianism, libra, luxury, magic, majesty, may, meditation, mourning, mystery, nausea, new age spirituality, nobility, november, paganism, pride, profanity, psychic ability, religion, riches, richness, romanticism, royalty, self assurance, self-confidence, sensuality, shadow, sophistication, spiritual connection, spiritual power, spirituality, telepathy, third eye, transformation, unconscious, universal spirit, war, wealth, wicca, wisdom 2 | -------------------------------------------------------------------------------- /conceptnet/concepttools/context/brown.txt: -------------------------------------------------------------------------------- 1 | anachronism, appetite, autumn, basic, boorishness, calm, capricorn, coffee, comfort, conservatism, contentment, credibility, dependability, depth, dirt, down-to-earth, dullness, earth, endurance, fertility, filth, friendliness, friendships, generosity, ground, hard work, hearth, heaviness, home, inexpensive, intimacy, longevity, masculine, materialistic thoughts, natural organisms, nature, nurturing, october, outdoors, passivity, poverty, practicality, productivity, reliability, richness, roughness, rustic, rusticism, scorpio, sensuality, simplicity, special events, stability, steadfastness, strength, substance, tradition, tranquility, wholesomeness, 2 | 3 | bark, bear, beer, bread, branch, chocolate, cocoa, dog, faeces, fox, hair, horse, monk, mud, paper bag, ppop, puddle, rabbit, shoes, soil, squirrel, sugar, tree, trunk, wolf, person 4 | -------------------------------------------------------------------------------- /tools/load_autocorrector.py: -------------------------------------------------------------------------------- 1 | from csc.corpus.models import AutocorrectRule, Language 2 | from django.db import transaction 3 | 4 | print "Loading table..." 5 | autocorrect_file = './autocorrect.txt' 6 | autocorrect_kb = {} 7 | items = filter(lambda line:line.strip()!='',open(autocorrect_file,'r').read().split('\n')) 8 | lang_en = Language.objects.get(pk='EN') 9 | 10 | def bulk_commit(lst): 11 | for obj in lst: obj.save() 12 | bulk_commit_wrapped = transaction.commit_on_success(bulk_commit) 13 | 14 | print "Building entries..." 15 | ars = [] 16 | for entry in items: 17 | match = entry.split()[0] 18 | replace_with = ' '.join(entry.split()[1:]) 19 | ar = AutocorrectRule() 20 | ar.language = lang_en 21 | ar.match = match 22 | ar.replace_with = replace_with 23 | ars.append(ar) 24 | 25 | print "Bulk committing..." 26 | bulk_commit_wrapped(ars) 27 | -------------------------------------------------------------------------------- /maint/check_best_frame.py: -------------------------------------------------------------------------------- 1 | from csc.util import queryset_foreach 2 | from csc.conceptnet4.models import Frame, Assertion, RawAssertion, SurfaceForm 3 | from django.db import connection 4 | 5 | def check_frame(assertion): 6 | try: 7 | assertion.best_frame 8 | except Frame.DoesNotExist: 9 | print "No frame for:", assertion 10 | assertion.best_frame = None 11 | assertion.save() 12 | 13 | try: 14 | assertion.best_raw 15 | assertion.best_surface1 16 | assertion.best_surface2 17 | except (RawAssertion.DoesNotExist, SurfaceForm.DoesNotExist): 18 | print "No raw assertion for:", assertion 19 | assertion.best_raw = None 20 | assertion.best_surface1 = None 21 | assertion.best_surface2 = None 22 | assertion.save() 23 | 24 | queryset_foreach(Assertion.objects.all(), check_frame, 25 | batch_size=100) 26 | 27 | -------------------------------------------------------------------------------- /maint/generalize_dependencies.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | import settings 4 | 5 | from util import queryset_foreach 6 | from corpus.models import DependencyParse 7 | 8 | def generalize_dep(dep): 9 | if dep.linktype.startswith('prep_') or dep.linktype.startswith('prepc_'): 10 | newlt = 'prep' 11 | elif dep.linktype.startswith('conj_'): 12 | newlt = 'conj' 13 | else: return 14 | 15 | newdep = DependencyParse(sentence_id=dep.sentence_id, 16 | linktype=newlt, 17 | word1=dep.word1, 18 | word2=dep.word2, 19 | index1=dep.index1, 20 | index2=dep.index2) 21 | newdep.save() 22 | 23 | def progress_callback(num, den): 24 | print num, '/', den 25 | 26 | queryset_foreach(DependencyParse.objects.all(), generalize_dep) 27 | 28 | -------------------------------------------------------------------------------- /maint/conceptnet_fixes/004_bedume_is_still_silly.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet.models import * 2 | from csc.conceptnet.analogyspace import * 3 | from csc.util import foreach 4 | 5 | cnet = conceptnet_2d_from_db('en') 6 | aspace = cnet.svd() 7 | 8 | bedume = User.objects.get(username='bedume') 9 | activity = Activity.objects.get(name='administrative fiat') 10 | braw = [r for r in bedume.vote_set.all() if isinstance(r.object, RawAssertion)] 11 | for b in braw: 12 | if b.object.assertion.relation.name == 'IsA': 13 | print b.object 14 | concept = b.object.assertion.concept1.text 15 | if concept in aspace.u.label_list(0): 16 | sim = aspace.u[concept,:].hat() * aspace.u['debbie',:].hat() 17 | if sim > 0.9: 18 | print sim, b.object 19 | #b.object.set_rating(bedume, 0, activity) 20 | #b.object.assertion.set_rating(bedume, 0, activity) 21 | 22 | -------------------------------------------------------------------------------- /conceptnet/concepttools/context/yellow.txt: -------------------------------------------------------------------------------- 1 | accelerated learning, action, agility, air, april, avarice, betrayal, breaking mental blocks, brightness, caution, cheerful, cheerfulness, clarity, confidence, cooperation, courage, covetousness, cowardice, cowardness, creativity, curiosity, deceit, desire to improve, devil, disease, dishonesty, dreams, earth, energy, enlightenment, enthusiasm, femininity, follower, friendship, gladness, glory, greed, happiness, harmony, hazard signs, hazards, heat, honour, hope, humour, idealism, illness, imagination, intellect, intelligence, jaundiced, jealousy, joy, leo, liberalism, light, logical imagination, loyalty, mellowness, memory, mental force, mourning, optimism, organisation, perception, philosophy, purity, quarantine, radiance, restlessness, september, sickness, sociability, social energy, summer, sun, sunlight, sunshine, taurus, uncertainty, understanding, warmth, weakness, wealth, wisdom, 2 | person, gold, golden 3 | -------------------------------------------------------------------------------- /maint/count_surfaceforms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from csc.conceptnet4.models import SurfaceForm, RawAssertion 4 | from csc.util import queryset_foreach 5 | from django.db.models import Q 6 | 7 | fixed = 0 8 | 9 | def update_count(surface): 10 | global fixed 11 | num_raws = RawAssertion.objects.filter(Q(surface1=surface) | Q(surface2=surface)).count() 12 | if num_raws != surface.use_count: 13 | fixed += 1 14 | surface.use_count = num_raws 15 | surface.save() 16 | 17 | def update_surfaceform_usecounts(lang): 18 | '''Fix the num_assertions count for each concept''' 19 | status = queryset_foreach(SurfaceForm.objects.filter(language=lang), update_count) 20 | print 'Updated counts on %d of %d surface forms' % (fixed, status.total) 21 | return status 22 | 23 | if __name__=='__main__': 24 | import sys 25 | lang = sys.argv[1] 26 | status = update_surfaceform_usecounts(lang) 27 | -------------------------------------------------------------------------------- /maint/conceptnet_fixes/000_is_for.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet.models import * 2 | from csc.util import foreach 3 | 4 | target_frame = Frame.objects.get(language=en, relation__name='UsedFor', text='{1} is for {2}') 5 | 6 | def queryset1(): 7 | frame = Frame.objects.get(text='{1} is {2}', language=en, relation__name='HasProperty') 8 | got = RawAssertion.objects.filter(language=en, frame=frame) 9 | return got 10 | 11 | def queryset2(): 12 | frame = Frame.objects.get(text='{1} is {2}', language=en, relation__name='ReceivesAction') 13 | got = RawAssertion.objects.filter(language=en, frame=frame) 14 | return got 15 | 16 | def fix_is_for(s): 17 | if s.surface2.text.startswith('for '): 18 | print s 19 | newsurf = SurfaceForm.get(s.surface2.text[4:], 'en', auto_create=True) 20 | print "=>", 21 | print s.correct_assertion(target_frame, s.surface1, newsurf) 22 | 23 | foreach(queryset1(), fix_is_for) 24 | 25 | -------------------------------------------------------------------------------- /conceptnet/concepttools/context/white.txt: -------------------------------------------------------------------------------- 1 | air, aries, awareness, birth, charity, chastity, clarity, clean, cleanliness, clinical, clinicism, cold, coldness, coolness, cowardice, dignity, emptiness, fairness, faith, fearfulness, feminine divinity, fire, glory, goddess, good, goodness, harsh, higher self, holiness, hope, humility, innocence, january, light, pale, peace, perfection, pisces, positivity, precision, pristine, purity, reverence, safety, security, self-sacrifice, simplicity, snow, spirituality, sterility, successful innovations, surrender, unimaginative, union, virgin, virginity, void, weak, winter, youth, 2 | 3 | airplane, background, black, bread, bride, cat, chocolate, cloud, cocaine, cream, doctor, dove, feather, flag, flour, flower, flutter, fog, ghost, glacier, hair, horse, ice, light, macbook, marble, milk, mist, noise, nurse, paper, panty, polar bear, porcelain, rabbit, racism, rose, salt, sand, silk, socks, snow, swam, wedding, white house, wolf, person 4 | -------------------------------------------------------------------------------- /conceptnet/concepttools/context/blue.txt: -------------------------------------------------------------------------------- 1 | acceptance, affection, air, aquarius, artistry, calm, cleanliness, cold, coldness, comfort, communication, compassion, confidence, consciousness, conservatism, contemplation, coolness, cooperation, creativity, dependability, depression, depth, devotion, distance, earth, empathy, eternity, faith, february, fluidity, formality, freedom. betterment of humanity, friendliness, friendship, gentleness, good fortune, harmony, heaven, heavy, higher thoughts, ice, idealism, ideas, infinity, inner strength, inspiration, intellect, intelligence, july, krishna, light, love, loyalty, masculinity, melancholy, messages, mourning, mystery, non-threatening, obscenity, openness, order, patience, peace, piety, pisces, precision, progress, protection, quiet wisdom, reassurance, relaxation, reliabilty, religion, rest, sadness, sea, security, serenity, sharing, sincerity, sky, speech, spiritual inspiration, stability, steadfastness, strength, tackiness, technology, tranquility, travel, trust, truth, truthfulness, understanding, unity, virgo, water, winter, wisdom 2 | -------------------------------------------------------------------------------- /conceptnet/concepttools/context/orange.txt: -------------------------------------------------------------------------------- 1 | action, aggression, ambition, amiability, anxiety, appetite, arrogance, assurance, attention-grabbing, attraction, attractiveness, autumn, balance, beginnings, buddhism, business goals, career, charity, cheap, cheer, cheerfulness, constructiveness, courage, courtesy, creativity, danger, desire, determination, earth, emotional lift, encouragement, endurance, energy, enthusiasm, excitement, expansiveness, fascination, fire, flamboyance, friendliness, friendly, fun kids colour, gaudiness, general success, goals, growing things, happiness, health, heat, hinduism, independence, intense, inviting, joy, justice, knowledge, legal matters, life, lively, low-budget, mental and appetite stimulatant, mind, mood-lightening, overemotion, playfulness, property deals, quick movement, sagittarius, self-assuredness, selling, september, sociability, social force, steadfastness, strength, success, tension, the sun, tropics, uninhibited, vibrancy, vibrant, vitality, warmth, warning, youth, 2 | 3 | bee, brick, bronze, cake, carrot, copper, dragonfly, fire, flame, flower, goldfish, honey, leaf, marmalade, mellon, netherlands, orange, pumpkin, rust, smiley, -------------------------------------------------------------------------------- /maint/count_assertions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from csc.conceptnet4.models import Concept 4 | from csc.util import queryset_foreach 5 | 6 | concepts_fixed = 0 7 | significant = 0 8 | 9 | def fix_concept(concept): 10 | global concepts_fixed, significant 11 | rels = concept.get_assertions(useful_only=True).count() 12 | if rels != concept.num_assertions: 13 | # print '%s: %d->%d' % (concept.canonical_name, concept.num_assertions, rels) 14 | concepts_fixed += 1 15 | if rels > 2: 16 | significant += 1 17 | concept.num_assertions = rels 18 | concept.save() 19 | if not concept.words: 20 | concept.words = len(concept.text.split()) 21 | concept.save() 22 | 23 | def update_assertion_counts(lang): 24 | '''Fix the num_assertions count for each concept''' 25 | status = queryset_foreach(Concept.objects.filter(language=lang), fix_concept) 26 | print 'Fixed %s of %s concepts (%s with >2 rels).' % (concepts_fixed, status.total, significant) 27 | return status 28 | 29 | if __name__=='__main__': 30 | import sys 31 | lang = sys.argv[1] 32 | status = update_assertion_counts(lang) 33 | -------------------------------------------------------------------------------- /conceptnet/lib/voting/models.py: -------------------------------------------------------------------------------- 1 | from django.contrib.contenttypes import generic 2 | from django.contrib.contenttypes.models import ContentType 3 | from django.contrib.auth.models import User 4 | from django.db import models 5 | 6 | from voting.managers import VoteManager 7 | 8 | SCORES = ( 9 | (u'+1', +1), 10 | (u'-1', -1), 11 | ) 12 | 13 | class Vote(models.Model): 14 | """ 15 | A vote on an object by a User. 16 | """ 17 | user = models.ForeignKey(User) 18 | content_type = models.ForeignKey(ContentType) 19 | object_id = models.PositiveIntegerField() 20 | object = generic.GenericForeignKey('content_type', 'object_id') 21 | vote = models.SmallIntegerField(choices=SCORES) 22 | 23 | objects = VoteManager() 24 | 25 | class Meta: 26 | db_table = 'votes' 27 | # One vote per user per object 28 | unique_together = (('user', 'content_type', 'object_id'),) 29 | 30 | def __unicode__(self): 31 | return u'%s: %s on %s' % (self.user, self.vote, self.object) 32 | 33 | def is_upvote(self): 34 | return self.vote == 1 35 | 36 | def is_downvote(self): 37 | return self.vote == -1 38 | -------------------------------------------------------------------------------- /maint/simple_update_rawassertion_assertion_fkey.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet.models import RawAssertion, Concept, Assertion 2 | from django.db import transaction 3 | import sys 4 | 5 | no_assertion = set() 6 | nonunique = set() 7 | 8 | @transaction.commit_on_success 9 | def main(): 10 | updated_count = 0 11 | 12 | for raw in RawAssertion.objects.filter(predicate__id__isnull=True).iterator(): 13 | assertions = list(Assertion.objects.filter(sentence__id=raw.sentence_id)) 14 | if len(assertions) == 0: 15 | no_assertion.add(raw.id) 16 | elif len(assertions)==1: 17 | updated_count += 1 18 | if updated_count % 1000 == 1: 19 | sys.stderr.write('\r'+str(updated_count)) 20 | sys.stderr.flush() 21 | transaction.commit_if_managed() 22 | raw.predicate = assertions[0] 23 | raw.save() 24 | else: 25 | nonunique.add(raw.id) 26 | 27 | print 'Updated', updated_count, 'assertions' 28 | print 'No assertion for', len(no_assertion), 'assertions' 29 | print 'Non-unique assertion for', len(nonunique), 'assertions' 30 | 31 | if __name__ == '__main__': 32 | main() 33 | -------------------------------------------------------------------------------- /conceptnet/concepttools/ocean.txt: -------------------------------------------------------------------------------- 1 | All day the city's selling something 2 | Always, the busy people spinning 'round 3 | Busier 4 | Dizzier 5 | 'Til they go back home to somewhere 6 | 7 | And taxies stop to say "hello" 8 | "Want a ride? I'll take you there" 9 | "To anywhere, just tell my driver" 10 | 11 | The sun is casting shadows 12 | An afternoon is fading 13 | I ask, but no one knows 14 | The answer to the question 15 | My life is like an island 16 | Where does this ocean go? 17 | 18 | Shyly, a wino sips his wine 19 | Slowly, cause to him that is all that matters 20 | He sees a cat he knows so well 21 | Now sleeping on a bench together 22 | 23 | A woman waiting by herself, selling flowers 24 | "Please buy some, so I can help my daughter, will you?" 25 | 26 | The man with spider eyebrows 27 | Is standing on a corner 28 | "Who wants to see a show?" 29 | His head looks like a melon 30 | He turns into an alley 31 | Then stops to blow his nose 32 | Sky is filled with neon 33 | The buildings stand electric 34 | And almost seem to glow 35 | Want answers to the question 36 | My life is like an island 37 | Where does the ocean go? 38 | I really want to know 39 | My life is like an island 40 | It's time for me now to fly 41 | Where does the ocean go? 42 | -------------------------------------------------------------------------------- /maint/import_conceptnet_zh.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet.models import * 2 | import codecs 3 | activity, _ = Activity.objects.get_or_create(name='Pet game') 4 | zh = Language.get('zh-Hant') 5 | def run(filename): 6 | f = codecs.open(filename, encoding='utf-8') 7 | count = 0 8 | for line in f: 9 | if filename.endswith('1.txt') and count < 77600: 10 | count += 1 11 | continue 12 | line = line.strip() 13 | if not line: continue 14 | username, frame_id, text1, text2 = line.split(', ') 15 | user, _ = User.objects.get_or_create(username=username, 16 | defaults=dict( 17 | first_name='', 18 | last_name='', 19 | email='', 20 | password='-' 21 | ) 22 | ) 23 | frame = Frame.objects.get(id=int(frame_id)) 24 | assert frame.language == zh 25 | try: 26 | got = RawAssertion.make(user, frame, text1, text2, activity) 27 | print got 28 | except RawAssertion.MultipleObjectsReturned: 29 | print "got multiple" 30 | f.close() 31 | 32 | run('conceptnet_zh_part9.txt') 33 | run('conceptnet_zh_part10.txt') 34 | run('conceptnet_zh_api.txt') 35 | 36 | -------------------------------------------------------------------------------- /conceptnet/network.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tools for working with ConceptNet as a generalized semantic network. 3 | 4 | Requires the NetworkX library. 5 | """ 6 | import networkx as nx 7 | import codecs 8 | from conceptnet.models import Assertion 9 | 10 | def make_network(lang): 11 | """ 12 | Get the ConceptNet network for a particular language. It takes one 13 | parameter, which is `lang`, the language ID as a string. 14 | """ 15 | assertions = Assertion.useful.filter(language__id=lang) 16 | graph = nx.MultiDiGraph() 17 | for text1, text2, rel, score, freq in assertions.values_list( 18 | 'concept1__text', 'concept2__text', 'relation__name', 'score', 19 | 'frequency__value').iterator(): 20 | if text1 and text2 and text1 != text2: 21 | graph.add_edge(text1, text2, rel=rel, score=score, freq=freq) 22 | return graph 23 | 24 | def export_gml(lang, filename): 25 | f = codecs.open(filename, 'w', encoding='utf-7') 26 | graph = make_network(lang) 27 | nx.write_gml(graph, f) 28 | f.close() 29 | 30 | def export_edgelist(lang, filename): 31 | f = codecs.open(filename, 'w', encoding='utf-8') 32 | graph = make_network(lang) 33 | nx.write_edgelist(graph, f, data=True, delimiter='\t') 34 | f.close() 35 | 36 | -------------------------------------------------------------------------------- /conceptnet/concepttools/context/green.txt: -------------------------------------------------------------------------------- 1 | abundance, aggression, air, august, bad spirits, balance, calm, cancer, change, christmas, coldness, compassion, contemplation, corruption, dependability, devil, diplomacy, disgrace, earth, earth mother, endurance, environment, envy, fauna, fertility, food, fresh, freshness, friendliness, generosity, gentle, go, good luck, good omens, grass, greed, growth, harmony, healing, health, hope, illness, immortality, inexperience, islam, jealousy, joy, life eternal, luck, misfortune, moderation, mold, monetary success, money, natural abundance, nature, nausea, nurturing, pastoral, peace, personal goals, physical healing, prosperity, quiet, quietude, refreshing, renewal, responsiveness, resurrection, safety, self-control, serenity, sharing, sincerity, soothing, spring, stability, tranquil, magic, trees, vegetation, vigor, vigour, vitality, wealth, youth, 2 | 3 | acid, absinth, algae, apple, army, beryl, brown, camouflage, chemical, chemistry, cucumber, bush, caterpillar, earth, eco, emerald, forest, grass, grasshopper, goblin, ireland, jade, jungle, lawn, leaf, leprechaun, lime, lizard, matrix, moss, nature, new, nuclear waste, ok, olive, organic, park, pea, peter pan, plant, pine, radioactivity, rainforest, recycle, salad, sap, shrub, slime, snake, surgery, tea, turtle, venom, woods 4 | -------------------------------------------------------------------------------- /conceptnet/pseudo_auth/backends.py: -------------------------------------------------------------------------------- 1 | from django.contrib.auth.models import User as DjangoUser, check_password 2 | from conceptnet.pseudo_auth.models import LegacyUser 3 | 4 | class LegacyBackend: 5 | def authenticate(self, username=None, password=None): 6 | try: 7 | # Load user object 8 | u = LegacyUser.objects.get(username=username) 9 | 10 | # Abort if Django should handle this 11 | if u.password.startswith('sha1$'): return None 12 | salt = u.salt 13 | 14 | # Build Django-compatible password string 15 | enc_password = 'sha1$--' + u.salt + '--$' + u.password 16 | 17 | # Check password 18 | if check_password(password+'--',enc_password): 19 | # Migrate them to new passwords. 20 | u.salt = None 21 | u.save() 22 | user = self.get_user(u.id) 23 | user.set_password(password) 24 | user.save() 25 | return user 26 | except LegacyUser.DoesNotExist: 27 | return None 28 | 29 | # Operation Complete! 30 | return None 31 | 32 | def get_user(self, user_id): 33 | try: 34 | return DjangoUser.objects.get(pk=user_id) 35 | except DjangoUser.DoesNotExist: 36 | return None 37 | -------------------------------------------------------------------------------- /maint/update_rawassertion_assertion_fkey.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet.models import RawAssertion, Concept, Assertion 2 | from django.db import transaction 3 | 4 | no_assertion = set() 5 | nonunique = set() 6 | failed = set() 7 | 8 | @transaction.commit_on_success 9 | def main(): 10 | updated_count = 0 11 | for raw in RawAssertion.objects.filter(predicate__id__isnull=True)[:1000].iterator(): 12 | try: 13 | concept1 = Concept.get(raw.text1, raw.language_id) 14 | concept2 = Concept.get(raw.text2, raw.language_id) 15 | assertions = list(Assertion.objects.filter(stem1=concept1, 16 | stem2=concept2, 17 | predtype__id=raw.predtype_id)) 18 | if len(assertions) == 0: 19 | no_assertion.add(raw.id) 20 | elif len(assertions) == 1: 21 | updated_count += 1 22 | raw.predicate = assertions[0] 23 | raw.save() 24 | else: 25 | nonunique.add(raw.id) 26 | except: 27 | failed.add(raw.id) 28 | 29 | print 'Updated', updated_count, 'assertions' 30 | print 'No assertion for', len(no_assertion), 'assertions' 31 | print 'Non-unique assertion for', len(nonunique), 'assertions' 32 | print len(failed), 'failed.' 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /test/test_conceptnet_queries.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | from conceptnet.models import * 3 | from nose.plugins.attrib import * 4 | def setup(): 5 | en = Language.get('en') 6 | 7 | def test_assertions_exist(): 8 | Assertion.objects.filter(language=en)[0] 9 | Assertion.objects.filter(language=Language.get('pt'))[0] 10 | Assertion.objects.filter(language=Language.get('ja'))[0] 11 | Assertion.objects.filter(language=Language.get('ko'))[0] 12 | Assertion.objects.filter(language=Language.get('zh-Hant'))[0] 13 | 14 | def test_relations(): 15 | relations = [a.relation.name for a in Assertion.objects.filter(concept1__text='dog', concept2__text='bark', language=en)] 16 | assert u'CapableOf' in relations 17 | 18 | def test_get(): 19 | Concept.get('dog', 'en') 20 | Concept.get('the dog', 'en') 21 | Concept.get('dogs', 'en') 22 | Concept.get_raw('dog', 'en') 23 | 24 | @raises(Concept.DoesNotExist) 25 | def test_normalize(): 26 | Concept.get_raw('the dog', 'en') 27 | 28 | def test_surface_forms(): 29 | surfaces = [s.text for s in SurfaceForm.objects.filter(concept__text='run', language=en)] 30 | assert u'run' in surfaces 31 | assert u'to run' in surfaces 32 | assert u'running' in surfaces 33 | 34 | @attr('postgres') 35 | def test_raw_assertion_search(): 36 | raw = RawAssertion.objects.filter(surface1__concept__text='couch', 37 | surface2__concept__text='sit', language=en) 38 | assert len(raw) > 0 39 | 40 | -------------------------------------------------------------------------------- /conceptnet/concepttools/test.txt.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | I am feeling happy today because the sun is shining . 4 | Also because I watched a movie last night which was hilariously bad , called `` Giant Octopus Versus Mega Shark '' . 5 | Colorless green ideas sleep furiously . . 6 | -------------------------------------------------------------------------------- /conceptnet/concepttools/amsterdam.txt: -------------------------------------------------------------------------------- 1 | I threw away your greatest hits 2 | You left them here the day you split 3 | Your bass guitar and Shagg's CD 4 | Well they don't mean that much to me right now 5 | I'm going through your things 6 | These days, I'm changing all my strings 7 | 8 | Chorus 1: 9 | I'm gonna write you a letter 10 | I'm gonna write you a book 11 | I wanna see your reaction 12 | I wanna see how it looks 13 | 14 | Chorus 2: 15 | From way up on your cloud 16 | Where you've been hiding out 17 | Are you getting somewhere? 18 | Or did you get lost in Amsterdam? 19 | 20 | You won't get too far from me 21 | believing everything you read 22 | You're wasted in the great unknown 23 | and I am finally ready to dispose 24 | of all your vintage clothes 25 | Your drugs and every secret code 26 | 27 | Chorus 1: 28 | I'm gonna write you a letter 29 | I'm gonna write you a book 30 | I wanna see your reaction 31 | I wanna see how it looks 32 | 33 | Chorus 2: 34 | From way up on your cloud 35 | Where you've been hiding out 36 | Are you getting somewhere? 37 | Or did you get lost in Amsterdam? 38 | 39 | From your red balloon you were 40 | a super high tech jet fighter 41 | Floating over planet earth 42 | Come back down here, I'll show you where it hurts 43 | Take this bitter pill 44 | Is it easy to swallow? 45 | 46 | Chorus 1: 47 | I'm gonna write you a letter 48 | I'm gonna write you a book 49 | I wanna see your reaction 50 | I wanna see how it looks 51 | 52 | From way up on your cloud 53 | You're never coming down 54 | Are you getting somewhere? 55 | Or did you get lost in Amsterdam? 56 | -------------------------------------------------------------------------------- /maint/fix_raw_duplicates.py: -------------------------------------------------------------------------------- 1 | from csc.util import queryset_foreach 2 | from csc.conceptnet4.models import Sentence, Assertion, RawAssertion, Vote 3 | 4 | def sort_and_check(): 5 | all_raw = RawAssertion.objects.filter(language__id='zh-Hant').order_by('language', 'surface1__text', 'surface2__text', 'frame__id') 6 | print "Checking for duplicates." 7 | prev = None 8 | for raw in all_raw: 9 | print raw.id 10 | if equivalent(prev, raw): 11 | print (u"%s[%s] == %s[%s]" % (prev, prev.creator.username, raw, raw.creator.username)).encode('utf-8') 12 | prev = switch_raw(raw, prev) 13 | else: 14 | prev = raw 15 | 16 | def equivalent(raw1, raw2): 17 | if raw1 is None: return False 18 | return (raw1.language.id == raw2.language.id 19 | and raw1.surface1.text == raw2.surface1.text 20 | and raw1.surface2.text == raw2.surface2.text 21 | and raw1.frame.id == raw2.frame.id) 22 | 23 | def switch_raw(oldraw, newraw): 24 | # avoid the generic username when possible 25 | if newraw.creator.username == 'openmind': 26 | oldraw, newraw = newraw, oldraw 27 | for vote in oldraw.votes.all(): 28 | nvotes = Vote.objects.filter(user=vote.user, object_id=newraw.id).count() 29 | if nvotes == 0: 30 | vote.object = newraw 31 | vote.save() 32 | else: 33 | vote.delete() 34 | oldraw.delete() 35 | newraw.update_score() 36 | newraw.save() 37 | return newraw 38 | 39 | if __name__ == '__main__': 40 | sort_and_check() 41 | 42 | -------------------------------------------------------------------------------- /conceptnet/lib/events/models.py: -------------------------------------------------------------------------------- 1 | from django.contrib.contenttypes import generic 2 | from django.contrib.contenttypes.models import ContentType 3 | from django.contrib.auth.models import User 4 | from django.db import models 5 | from datetime import datetime 6 | 7 | class Activity(models.Model): 8 | name = models.TextField() 9 | def __unicode__(self): 10 | return self.name 11 | 12 | @staticmethod 13 | def get(name): 14 | activity, created = Activity.objects.get_or_create(name=name) 15 | return activity 16 | 17 | class Meta: 18 | verbose_name_plural = 'Activities' 19 | 20 | class Event(models.Model): 21 | """ 22 | Indicates that an object was created or possibly modified by an Activity. 23 | """ 24 | user = models.ForeignKey(User) 25 | content_type = models.ForeignKey(ContentType) 26 | object_id = models.PositiveIntegerField() 27 | object = generic.GenericForeignKey('content_type', 'object_id') 28 | activity = models.ForeignKey(Activity) 29 | timestamp = models.DateTimeField(default=datetime.now) 30 | 31 | @classmethod 32 | def record_event(cls, obj, user, activity): 33 | ctype = ContentType.objects.get_for_model(obj) 34 | event = cls.objects.create(user=user, content_type=ctype, 35 | object_id=obj._get_pk_val(), 36 | activity=activity) 37 | return event 38 | 39 | def __unicode__(self): 40 | return u'%s: %r/%r/%r' % (self.timestamp, self.user, self.object, self.activity) 41 | 42 | class Meta: 43 | ordering = ['-timestamp'] 44 | -------------------------------------------------------------------------------- /maint/extract_concepts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from csc.conceptnet.models import Concept 4 | 5 | 6 | from nltk import wordnet 7 | def in_wordnet(word): 8 | base = wordnet.morphy(word) 9 | if base is None: base = word 10 | for d in wordnet.Dictionaries.values(): 11 | if base in d: return True 12 | if word in d: return True 13 | return False 14 | 15 | 16 | if __name__=='__main__': 17 | import sys 18 | lang = sys.argv[1] 19 | outfile = open(sys.argv[2], 'w') 20 | 21 | 22 | # Stopword detector 23 | from csc.representation.parsing.tools.models import FunctionFamily 24 | is_stopword = FunctionFamily.build_function_detector(lang, 'stop') 25 | 26 | import cPickle as pickle 27 | try: 28 | concepts = pickle.load(open('concepts_dict.pickle','rb')) 29 | except: 30 | concepts_qs = Concept.objects.filter(language=lang, num_predicates__gt=0) 31 | print >> sys.stderr, "Constructing concepts dictionary" 32 | concepts = dict(((c.text, c) for c in concepts_qs.iterator())) 33 | pickle.dump(concepts, open('concepts_dict.pickle','wb'), -1) 34 | 35 | print >> sys.stderr, "Filtering concepts" 36 | skipped1 = skipped2 = 0 37 | for stem_text, concept in concepts.iteritems(): 38 | stem_words = stem_text.split(' ') 39 | if any(((word not in concepts) for word in stem_words)): 40 | print >> sys.stderr, "Skipped-1: "+ stem_text 41 | skipped1 += 1 42 | continue 43 | cname = concept.canonical_name 44 | if any(((not is_stopword(word) and not in_wordnet(word)) for word in cname.split(' '))): 45 | print >> sys.stderr, "Skipped-2: "+ stem_text 46 | skipped2 += 1 47 | continue 48 | print >> outfile, cname 49 | 50 | print "Skipped1: %d, Skipped2: %d, total: %d" % (skipped1, skipped2, len(concepts)) 51 | -------------------------------------------------------------------------------- /conceptnet/corpus/parse/patterns.pcfg: -------------------------------------------------------------------------------- 1 | NP -> N1 [0.1] | DT N1 [0.1] | AP N1 [0.1] | DT AP N1 [0.1] 2 | NP -> Npr [0.1] | PRP [0.05] | WP [0.05] | NP PP [0.05] | NP join NP [0.05] 3 | NP -> VG [0.05] | VG NP [0.05] | VG ADV [0.05] | VG NP P [0.04] | VG NP VP [0.01] 4 | NP -> QUOT NP QUOT [0.05] | NP POS NP [0.05] 5 | N1 -> NN [0.3] | NNS [0.3] | NN N1 [0.3] | NNS N1 [0.1] 6 | Npr -> NNP [0.5] | NNP Npr [0.5] 7 | join -> ',' [0.4] | 'and' [0.4] | 'or' [0.2] 8 | AP -> JJ [0.1] | VBN [0.1] | PRPp [0.1] | JJR [0.1] | JJS [0.1] | CD [0.1] 9 | AP -> AP join AP [0.1] | AP AP [0.2] 10 | AP -> JJ PP [0.1] 11 | P -> IN [0.5] | TO [0.5] 12 | PP -> P NP [0.5] | TO VP [0.5] 13 | V -> VB [0.3] | VBZ [0.3] | VBP [0.3] | VB V [0.05] | V RP [0.05] 14 | VG -> VBG [0.8] | VB VBG [0.1] | VBG RP [0.1] 15 | VP -> V [0.2] | V NP [0.15] | V PP [0.15] | V NP PP [0.1] 16 | VP -> STATEVB NP [0.1] | STATEVB AP [0.1] | VP ADV [0.1] 17 | VP -> ADVP V [0.02] | ADVP V NP [0.02] | ADVP V PP [0.02] | ADVP V NP PP [0.02] 18 | VP -> ADVP STATEVB NP [0.01] | ADVP STATEVB AP [0.01] 19 | STATEVB -> BE [0.5] | CHANGE [0.5] 20 | POST -> [0.9] | VBN PP [0.03] | WDT VP [0.04] | WDT S [0.03] 21 | S -> NP VP [1.0] 22 | XP -> NP [0.4] | VP [0.3] | S [0.3] 23 | PASV -> VBN [0.4] | PASV PP [0.3] | PASV ADV [0.3] 24 | BEWORD -> 'be' [0.1] | 'is' [0.15] | 'are' [0.15] | 'was' [0.1] | 'being' [0.1] 25 | BEWORD -> 'were' [0.1] | 'been' [0.1] | "'re" [0.1] | "'m" [0.1] 26 | BE -> BEWORD [0.8] | MD BEWORD [0.1] | MD RB BEWORD [0.1] 27 | MD -> 'can' [1.0] 28 | HAVE -> 'has' [0.25] | 'have' [0.25] | 'contain' [0.25] | 'contains' [0.25] 29 | DO -> 'do' [0.4] | 'does' [0.3] | 'did' [0.3] 30 | CHANGE -> 'get' [0.25] | 'gets' [0.25] | 'become' [0.25] | 'becomes' [0.25] 31 | ADV -> RB [0.5] | RP [0.3] | RBR [0.2] 32 | ADVP -> [0.9] | RB [0.025] | RB ADVP [0.025] | MD ADVP [0.025] | DO ADVP [0.025] 33 | ADVP1 -> RB [0.25] | RB ADVP [0.25] | MD ADVP [0.25] | DO ADVP [0.25] 34 | ADVP2 -> [0.9] | RB [0.075] | RB ADVP2 [0.025] 35 | -------------------------------------------------------------------------------- /conceptnet/corpus/parse/offline_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys, traceback 3 | from pcfgpattern import pattern_parse 4 | import yaml 5 | from conceptnet.models import Sentence, Language 6 | from django.core.paginator import Paginator 7 | #from django.db import transaction 8 | 9 | def process_sentence(sentence): 10 | print sentence.text.encode('utf-8') 11 | _, frametext, reltext, matches = pattern_parse(sentence.text) 12 | if reltext is None or reltext == 'junk': return [] 13 | else: 14 | return [dict(id=sentence.id, frametext=frametext, reltext=reltext, 15 | matches=matches)] 16 | 17 | def run(file, start_page=1, end_page=1000000): 18 | all_sentences = Sentence.objects.filter(language=Language.get('en')).order_by('id') 19 | paginator = Paginator(all_sentences,100) 20 | #pages = ((i,paginator.page(i)) for i in range(start_page,paginator.num_pages)) 21 | 22 | def do_batch(sentences): 23 | preds = [] 24 | for sentence in sentences: 25 | try: 26 | preds.extend(process_sentence(sentence)) 27 | # changed to an improbable exception for now 28 | except Exception, e: 29 | # Add sentence 30 | e.sentence = sentence 31 | 32 | # Extract traceback 33 | e_type, e_value, e_tb = sys.exc_info() 34 | e.tb = "\n".join(traceback.format_exception( e_type, e_value, e_tb )) 35 | 36 | # Raise again 37 | raise e 38 | file.write('\n--- ') 39 | yaml.dump_all(preds, file) 40 | 41 | # Process sentences 42 | page_range = [p for p in paginator.page_range if p >= start_page and p < 43 | end_page] 44 | for i in page_range: 45 | sentences = paginator.page(i).object_list 46 | do_batch(sentences) 47 | 48 | 49 | if __name__ == '__main__': 50 | start_page = int(sys.argv[1]) 51 | end_page = int(sys.argv[2]) 52 | out = open(sys.argv[3], 'w+') 53 | run(out, start_page, end_page) 54 | 55 | -------------------------------------------------------------------------------- /conceptnet/webapi/docs.py: -------------------------------------------------------------------------------- 1 | from conceptnet.models import * 2 | from piston.handler import BaseHandler 3 | from piston.doc import generate_doc 4 | from conceptnet.webapi import handlers 5 | 6 | from django.test.client import Client 7 | from django.shortcuts import render_to_response 8 | from django.template import RequestContext, Context, loader 9 | from django.http import HttpResponse 10 | 11 | from docutils.core import publish_string 12 | 13 | API_BASE = 'http://openmind.media.mit.edu' 14 | 15 | client = Client() 16 | def documentation_view(request): 17 | docs = [] 18 | for klass in handlers.__dict__.values(): 19 | if isinstance(klass, type) and issubclass(klass, BaseHandler): 20 | doc = generate_doc(klass) 21 | if doc.get_resource_uri_template(): 22 | doc.useful_methods = [m for m in doc.get_all_methods() if m.get_doc()] 23 | if hasattr(klass, 'example_args'): 24 | args = klass.example_args 25 | example_url = doc.get_resource_uri_template() 26 | for arg, value in args.items(): 27 | example_url = example_url.replace('{%s}' % arg, str(value)) 28 | doc.example_url = example_url+'query.yaml' 29 | doc.example_result = client.get(doc.example_url).content 30 | doc.uri_template = doc.get_resource_uri_template() 31 | docs.append(doc) 32 | elif hasattr(klass, 'example_uri'): 33 | doc = generate_doc(klass) 34 | example_url = klass.example_uri 35 | doc.example_url = example_url+'query.yaml' 36 | doc.example_result = client.get(doc.example_url).content 37 | doc.uri_template = klass.example_uri_template 38 | docs.append(doc) 39 | docs.sort(key=lambda doc: doc.uri_template) 40 | t = loader.get_template('documentation.txt') 41 | rst = t.render(Context({'docs': docs, 'API_BASE': API_BASE})) 42 | return HttpResponse(rst, mimetype='text/plain') 43 | -------------------------------------------------------------------------------- /maint/reconcile_assertions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from csc.conceptnet.models import * 3 | from csc.corpus.models import * 4 | from django.contrib.auth import * 5 | from django.db import transaction 6 | 7 | den = Assertion.objects.filter(raw__isnull=True).count() 8 | 9 | # Add raw assertions to predicates created on Ruby Commons. 10 | if den > 0: 11 | batch = Batch(owner=User.objects.get(id=20003), 12 | remarks="creating raw assertions for ruby commons", 13 | progress_den=den) 14 | batch.save() 15 | 16 | num = 0 17 | for a in Assertion.objects.filter(raw__isnull=True): 18 | raw = RawAssertion(batch=batch, frame=a.frame, predtype=a.predtype, 19 | text1=a.text1, text2=a.text2, polarity=a.polarity, 20 | modality=a.modality, sentence=a.sentence, 21 | language=a.language, predicate=a) 22 | raw.save() 23 | a.raw = raw 24 | a.save() 25 | num += 1 26 | batch.progress_num = num 27 | batch.save() 28 | print num, '/', den, raw 29 | 30 | # Some raw assertions have text1 and text2 switched, and this was fixed after 31 | # the fact in their predicates. Fix that. 32 | @transaction.commit_on_success 33 | def switch_raw(): 34 | i = 0 35 | for a in Assertion.objects.all().select_related('raw'): 36 | if i % 1000 == 0: print i 37 | i += 1 38 | if (a.language.nl.normalize(a.text1) == a.language.nl.normalize(a.raw.text2) and 39 | a.language.nl.normalize(a.text2) == a.language.nl.normalize(a.raw.text1) and 40 | a.stem1.text != a.stem2.text): 41 | t1, t2 = a.raw.text2, a.raw.text1 42 | a.raw.text1 = t1 43 | a.raw.text2 = t2 44 | a.raw.save() 45 | print a 46 | print a.raw 47 | print 48 | 49 | switch_raw() 50 | 51 | #for a in Assertion.objects.all(): 52 | # if a.text1 != a.raw.text1 or a.text2 != a.raw.text2: 53 | # print a.text1, '/', a.text2, a 54 | # print a.raw 55 | # print 56 | -------------------------------------------------------------------------------- /test/test_denormalized.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | from csc.conceptnet.models import * 3 | from nose.plugins.attrib import * 4 | 5 | activity = Activity.objects.get_or_create(name="nosetests")[0] 6 | user1 = User.objects.get(username='rspeer') 7 | user2 = User.objects.get(username='kcarnold') 8 | 9 | def test_denormalized(): 10 | testconcept = Concept.get('test', 'en') 11 | 12 | raw = RawAssertion.make( 13 | user=user1, 14 | frame=Frame.objects.get(language=en, relation__name='HasProperty', 15 | text='{1} is {2}'), 16 | text1='the test', 17 | text2='successful', 18 | activity=activity) 19 | raw.set_rating(user2, 0, activity) 20 | raw.set_rating(user1, 0, activity) 21 | raw.delete() 22 | raw.assertion.delete() 23 | 24 | testconcept.update_num_assertions() 25 | num = testconcept.num_assertions 26 | 27 | raw = RawAssertion.make( 28 | user=user1, 29 | frame=Frame.objects.get(language=en, relation__name='HasProperty', 30 | text='{1} is {2}'), 31 | text1='the test', 32 | text2='successful', 33 | activity=activity) 34 | raw_id = raw.id 35 | 36 | raw = RawAssertion.objects.get(id=raw_id) 37 | assert raw.score == 1 38 | 39 | testconcept = Concept.get('test', 'en') 40 | assert testconcept.num_assertions == (num + 1) 41 | 42 | raw.set_rating(user2, 1, activity) 43 | 44 | raw = RawAssertion.objects.get(id=raw_id) 45 | assert raw.score == 2 46 | 47 | testconcept = Concept.get('test', 'en') 48 | assert testconcept.num_assertions == (num + 1) 49 | 50 | raw.set_rating(user2, 0, activity) 51 | raw.set_rating(user1, 0, activity) 52 | raw.assertion.set_rating(user2, 0, activity) 53 | raw.assertion.set_rating(user1, 0, activity) 54 | 55 | testconcept = Concept.get('test', 'en') 56 | assert testconcept.num_assertions == num 57 | 58 | raw = RawAssertion.objects.get(id=raw_id) 59 | assert raw.score == 0 60 | 61 | if __name__ == '__main__': 62 | test_denormalized() 63 | -------------------------------------------------------------------------------- /maint/compare_sentences.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from csc.conceptnet.models import * 4 | from csc.corpus.models import * 5 | #from django.contrib.auth import * 6 | from django.db import transaction 7 | 8 | def check_polarity(): 9 | for a in Assertion.objects.all().select_related('raw'): 10 | if a.polarity != a.raw.polarity: 11 | print a.sentence 12 | print a.raw.sentence 13 | print a 14 | print a.raw 15 | print a.rating_set.all() 16 | print 17 | 18 | #check_polarity() 19 | 20 | # conclusion: not worth fixing. The cases where they conflict are all generally 21 | # ugly, but the raw assertions (which we're keeping) are closer to correct. 22 | # 23 | # other conclusion: do not use the old csamoa ratings. 24 | 25 | def basically_the_same(s1, s2): 26 | def canonical(s): 27 | return s.replace(' ', ' ').strip('. ') 28 | return canonical(s1) == canonical(s2) 29 | 30 | def check_raw_mistakes(): 31 | for ra in RawAssertion.objects.all().select_related('sentence'): 32 | rawsent = ra.nl_repr() 33 | sent = ra.sentence.text 34 | if not basically_the_same(rawsent, sent): 35 | print ra 36 | print repr(rawsent) 37 | print repr(sent) 38 | print "batch:", ra.batch 39 | print "predicate:", ra.predicate 40 | print "frame:", ra.frame.id, ra.frame 41 | betterone = False 42 | for r2 in ra.sentence.rawassertion_set.all(): 43 | if basically_the_same(rawsent, r2.nl_repr()): 44 | betterone = True 45 | break 46 | if ra.predicate is None and betterone: 47 | print "This raw predicate should be deleted." 48 | print 49 | 50 | @transaction.commit_on_success 51 | def unswitch_raw(): 52 | evilbatch = Batch.objects.get(id=136) 53 | for ra in RawAssertion.objects.filter(batch=evilbatch).select_related('frame'): 54 | if ra.predicate is None and ra.frame.id in [1384, 1387, 1420]: 55 | text1 = ra.text2 56 | text2 = ra.text1 57 | ra.text1 = text1 58 | ra.text2 = text2 59 | ra.save() 60 | print ra 61 | 62 | unswitch_raw() -------------------------------------------------------------------------------- /conceptnet/corpus/parse/adverbs.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from corpus.models import Language 3 | from conceptnet4.models import Frequency 4 | frequencies = { 5 | 'never': -10, 6 | "n't": -5, 7 | "doesn't": -5, 8 | "not": -5, 9 | "no": -5, 10 | "can't": -5, 11 | "won't": -5, 12 | "don't": -5, 13 | "couldn't": -5, 14 | "wouldn't": -5, 15 | "didn't": -5, 16 | "shouldn't": -5, 17 | "cannot": -5, 18 | "isn't": -5, 19 | "wasn't": -5, 20 | "aren't": -5, 21 | "weren't": -5, 22 | 'rarely': -2, 23 | 'infrequently': -2, 24 | 'few': -2, 25 | 'seldom': -2, 26 | 'hardly': -2, 27 | 'occasionally': 2, 28 | 'sometimes': 4, 29 | 'possibly': 4, 30 | 'some': 4, 31 | 'generally': 6, 32 | 'typically': 6, 33 | 'likely': 6, 34 | 'probably': 6, 35 | 'often': 6, 36 | 'oftentimes': 6, 37 | 'frequently': 6, 38 | 'usually': 8, 39 | 'most': 8, 40 | 'mostly': 8, 41 | 'almost': 9, 42 | 'always': 10, 43 | 'every': 10, 44 | 'all': 10, 45 | } 46 | en = Language.get('en') 47 | dbfreqs = { 48 | -10: Frequency.objects.get(language=en, text=u"never"), 49 | -5: Frequency.objects.get(language=en, text=u"not"), 50 | -2: Frequency.objects.get(language=en, text=u"rarely"), 51 | 2: Frequency.objects.get(language=en, text=u"occasionally"), 52 | 4: Frequency.objects.get(language=en, text=u"sometimes"), 53 | 5: Frequency.objects.get(language=en, text=u""), 54 | 6: Frequency.objects.get(language=en, text=u"generally"), 55 | 8: Frequency.objects.get(language=en, text=u"usually"), 56 | 9: Frequency.objects.get(language=en, text=u"almost always"), 57 | 10: Frequency.objects.get(language=en, text=u"always"), 58 | } 59 | 60 | def map_adverb(adv): 61 | words = [w.lower() for w in adv.split()] 62 | minfreq = 11 63 | for word in words: 64 | if word in frequencies: 65 | minfreq = min(minfreq, frequencies[word]) 66 | if minfreq == 11: minfreq = 5 67 | return dbfreqs[minfreq] 68 | 69 | def demo(): 70 | adverbs = set() 71 | for entry in yaml.load_all(open('delayed_sentences.yaml')): 72 | if entry is None: continue 73 | matches = entry.get('matches', {}) 74 | adv = matches.get('a') 75 | if adv and adv not in adverbs: 76 | print adv, 77 | print map_adverb(adv) 78 | adverbs.add(adv) 79 | 80 | -------------------------------------------------------------------------------- /conceptnet/django_settings/db_downloader.py: -------------------------------------------------------------------------------- 1 | import urllib, os, sys 2 | import tarfile 3 | SQLITE_URL = "http://conceptnet.media.mit.edu/dist/ConceptNet-sqlite.tar.gz" 4 | 5 | def prompt_for_download(filename): 6 | print """ 7 | You don't seem to have the ConceptNet database installed. (If you do, 8 | I couldn't find the db_config.py file that says where it is.) 9 | 10 | If you want, I can download the current database for you and save it as: 11 | """ 12 | print '\t'+filename 13 | print 14 | print "This will be a large download -- around 450 megabytes." 15 | response = raw_input("Do you want to download the database? [Y/n] ") 16 | if response == '' or response.lower().startswith('y'): 17 | return download(SQLITE_URL, filename) 18 | else: 19 | print """ 20 | Not downloading the database. 21 | The program will have to exit now. For information on setting up ConceptNet, 22 | go to: http://csc.media.mit.edu/docs/conceptnet/install.html 23 | """ 24 | return False 25 | 26 | def _mkdir(newdir): 27 | """ 28 | http://code.activestate.com/recipes/82465/ 29 | 30 | works the way a good mkdir should :) 31 | - already exists, silently complete 32 | - regular file in the way, raise an exception 33 | - parent directory(ies) does not exist, make them as well 34 | """ 35 | if os.path.isdir(newdir): 36 | pass 37 | elif os.path.isfile(newdir): 38 | raise OSError("A file with the same name as the desired " \ 39 | "directory, '%s', already exists." % newdir) 40 | else: 41 | head, tail = os.path.split(newdir) 42 | if head and not os.path.isdir(head): 43 | _mkdir(head) 44 | if tail: 45 | os.mkdir(newdir) 46 | 47 | 48 | def download(rem_filename, dest_filename): 49 | dir = os.path.dirname(dest_filename) 50 | member = os.path.basename(dest_filename) 51 | _mkdir(dir) 52 | tar_filename = dir + os.path.sep + 'ConceptNet-sqlite.tar.gz' 53 | def dlProgress(count, blockSize, totalSize): 54 | percent = int(count*blockSize*100/totalSize) 55 | sys.stdout.write("\r" + rem_filename + "... %2d%%" % percent) 56 | sys.stdout.flush() 57 | urllib.urlretrieve(rem_filename, tar_filename, reporthook=dlProgress) 58 | tar_obj = tarfile.open(tar_filename) 59 | print 60 | print "Extracting." 61 | tar_obj.extract(member, path=dir) 62 | return True 63 | 64 | 65 | -------------------------------------------------------------------------------- /tools/cnet_rdf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | PREFIX = 'http://conceptnet.media.mit.edu/' 4 | 5 | from conceptnet.models import Assertion, Frame 6 | 7 | from rdflib.Graph import Graph 8 | from rdflib.store import Store 9 | from rdflib import Namespace, Literal, BNode, RDF, plugin, URIRef 10 | 11 | store = plugin.get('SQLite', Store)() 12 | store.open('db') 13 | g = Graph(store, identifier=URIRef(PREFIX+'graph/en')) 14 | 15 | base = Namespace(PREFIX) 16 | concept = Namespace(PREFIX+'concepts/') 17 | reltype = Namespace(PREFIX+'reltypes/') 18 | frame = Namespace(PREFIX+'frames/') 19 | user = Namespace(PREFIX+'users/') 20 | language = Namespace(PREFIX+'language/') 21 | 22 | 23 | #surface_form_ = base['SurfaceForm'] 24 | left_text_ = base['LeftText'] 25 | right_text = base['RightText'] 26 | 27 | def b(thing): return base[thing] 28 | 29 | class SuperNode(BNode): 30 | def __init__(self): 31 | g.add((self, RDF.type, RDF.Statement)) 32 | 33 | def say(self, type, obj): 34 | g.add((self, type, obj)) 35 | 36 | def add(subj, type, obj): 37 | stmt = SuperNode() 38 | stmt.say(RDF.subject, subj) 39 | stmt.say(RDF.predicate, type) 40 | stmt.say(RDF.object, obj) 41 | return stmt 42 | 43 | print 'Dumping assertions.' 44 | for stem1, predtype, stem2, text1, text2, frame_id, language_id, creator_id, score, sentence in Assertion.useful.filter(language='en').values_list('stem1__text', 'predtype__name', 'stem2__text', 45 | 'text1', 'text2', 'frame_id', 'language_id', 'creator_id', 'score', 'sentence__text').iterator(): 46 | stmt = add(concept[stem1], reltype[predtype], concept[stem2]) 47 | stmt.say(b('LeftText'), Literal(text1)) 48 | stmt.say(b('RightText'), Literal(text2)) 49 | stmt.say(b('FrameId'), frame[str(frame_id)]) 50 | stmt.say(b('Language'), language[str(language_id)]) 51 | stmt.say(b('Creator'), user[str(creator_id)]) 52 | stmt.say(b('Score'), Literal(score)) 53 | stmt.say(b('Sentence'), Literal(sentence)) 54 | 55 | g.commit() 56 | print 'Dumping frames.' 57 | for id, predtype, text, goodness in Frame.objects.filter(language='en').values_list('id', 'predtype__name', 'text', 'goodness').iterator(): 58 | ff = frame[str(id)] 59 | g.add((ff, b('RelationType'), reltype[predtype])) 60 | g.add((ff, b('FrameText'), Literal(text))) 61 | g.add((ff, b('FrameGoodness'), Literal(str(goodness)))) 62 | 63 | 64 | g.commit() 65 | -------------------------------------------------------------------------------- /tools/dump_to_sqlite.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # This one should run in the ConceptNet Django environment. 3 | from conceptnet.models import Concept # just for the environment setup. 4 | from django.db.models import get_models 5 | from django.db.models.query import QuerySet 6 | from csc_utils.batch import Status 7 | import sys, sqlite3 8 | 9 | models_to_dump = ''' 10 | Vote RawAssertion Frame SurfaceForm Assertion 11 | Relation Frequency Concept Language 12 | Sentence User ContentType Activity Batch 13 | '''.strip().split() 14 | 15 | models = dict((model.__name__, model) for model in get_models() 16 | if model.__name__ in models_to_dump) 17 | 18 | def dump_to_sqlite(conn): 19 | cursor = conn.cursor() 20 | 21 | for idx, model_name in enumerate(models_to_dump): 22 | model = models[model_name] 23 | print >> sys.stderr, '(%2d/%2d) dumping %s' % (idx+1, len(models_to_dump), model_name) 24 | meta = model._meta 25 | db_table = meta.db_table 26 | 27 | truncate = 'DELETE FROM %s' % db_table 28 | print truncate 29 | cursor.execute(truncate) 30 | 31 | if model_name == 'User': 32 | # User is special because we don't want to dump private info. 33 | placeholder_timestamp = '1969-12-31 19:00:00' 34 | sql = 'INSERT INTO %s (id, username, last_login, date_joined, first_name, last_name, email, password, is_staff, is_active, is_superuser) VALUES (?, ?, %r, %r, "", "", "", "X", 0, 1, 0)' % (db_table, placeholder_timestamp, placeholder_timestamp) 35 | queryset = QuerySet(model).values_list('id', 'username') 36 | else: 37 | # Okay, so a field has a .serialize parameter on it. But the auto 38 | # id field has this set to False. Fail. Just serialize all the 39 | # local fields. 40 | fields = meta.local_fields 41 | field_names = [f.column for f in fields] 42 | 43 | sql = 'INSERT INTO %s (%s) VALUES (%s)' % ( 44 | db_table, 45 | ', '.join(field_names), 46 | ', '.join('?'*len(fields))) 47 | queryset = QuerySet(model).values_list(*(field_names)) # hm, this might not work if the db names are different. 48 | 49 | print sql 50 | cursor.executemany(sql, Status.reporter(queryset, report_interval=1000)) 51 | conn.commit() 52 | 53 | cursor.close() 54 | 55 | if __name__ == '__main__': 56 | db_name = sys.argv[1] 57 | conn = sqlite3.connect(db_name) 58 | dump_to_sqlite(conn) 59 | 60 | -------------------------------------------------------------------------------- /tools/create_placeholder_users.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys, os 3 | if 'DJANGO_SETTINGS_MODULE' not in os.environ: 4 | print "Setting DJANGO_SETTINGS_MODULE=csamoa.settings temporarily." 5 | print "You may want to set that more permanently in your environment." 6 | print 7 | os.environ['DJANGO_SETTINGS_MODULE'] = 'csc.django_settings' 8 | 9 | from csc.conceptnet.models import User 10 | from csc.corpus.models import Sentence 11 | from votes.models import Vote 12 | from django.db import transaction, connection 13 | from django.conf import settings 14 | 15 | try: 16 | cursor = connection.cursor() 17 | except: 18 | print "Problem while connecting to the database. Check your db_config.py." 19 | print "Original error:" 20 | raise 21 | 22 | users_table_error = """ 23 | Use this script ONLY if you have just created a fresh ConceptNet 24 | database, imported the dump from the website, and ran 25 | `./manage.py syncdb` to add the Django tables. 26 | 27 | When running `syncdb`, DO NOT create an admin user. It will conflict 28 | with a user that this script will add. 29 | """ 30 | 31 | try: 32 | if User.objects.all().count() > 0: 33 | print "Refusing to run because you already have users in the database." 34 | print 35 | print users_table_error 36 | print "Original error:" 37 | sys.exit(1) 38 | except: 39 | print """ 40 | Encountered a problem checking the users table (auth_user). Maybe it 41 | doesn't exist?""" 42 | print 43 | print users_table_error 44 | print "Original error:" 45 | raise 46 | 47 | 48 | ## Now the real work. 49 | 50 | print "Getting all known uids... ", 51 | # All Assertions have Sentences, which have the same creator. So the Sentences 52 | # is the most complete list of users. 53 | print "(users...) ", 54 | uids = set(Sentence.objects.all().values_list('creator__id', flat=True).iterator()) 55 | # But some users may have been raters only. 56 | print "(ratings...) ", 57 | for uid in Vote.objects.all().values_list('user__id', flat=True).iterator(): 58 | uids.add(uid) 59 | print 60 | 61 | @transaction.commit_on_success 62 | def make_users(uids): 63 | for uid in uids: 64 | User.objects.create(id=uid, username='user_%d' % uid) 65 | 66 | print "Creating %d placeholder users..." % len(uids) 67 | make_users(uids) 68 | 69 | if settings.DATABASE_ENGINE in ('postgresql_psycopg2', 'postgresql'): 70 | print "Resetting id sequence for PostgreSQL..." 71 | seq = 'auth_user_id_seq' 72 | cursor.execute('ALTER SEQUENCE %s RESTART WITH %d;' % (seq, max(uids)+1)) 73 | -------------------------------------------------------------------------------- /conceptnet/concepttools/lightning.txt.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
Standing on a building I am a lightning rod
4 |
And all these clouds are so familiar
5 |
Descending from the mountain tops the gods are threatening .
6 |
I will return an honest soldier
7 |
8 |
Steady on this high rise like every lightning rod
9 |
And all these clouds are boiling over
10 |
Swimming in adrenaline the sky is caving in
11 |
but I will remain the honest soldier .
12 |
13 | -------------------------------------------------------------------------------- /conceptnet/corpus/parse/migrate_templated_qs4e.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import csamoa 4 | from conceptnet4.models import Assertion, Batch, RawAssertion, Frame,\ 5 | Frequency, Relation, SurfaceForm, Concept 6 | import conceptnet.models as cn3 7 | from corpus.models import Sentence, Language, Activity 8 | from django.contrib.auth.models import User 9 | from itertools import islice 10 | import yaml 11 | from csc_utils import queryset_foreach 12 | 13 | csamoa4_activity = Activity.objects.get(name='csamoa4 self-rating') 14 | def process_predicate(pred): 15 | frametext = pred.frame.text 16 | relation = Relation.objects.get(id=pred.relation.id) 17 | sentence = pred.sentence 18 | lang = pred.language 19 | if pred.polarity < 0: 20 | freq, c = Frequency.objects.get_or_create(value=-5, language=lang, 21 | defaults=dict(text='[negative]')) 22 | else: 23 | freq, c = Frequency.objects.get_or_create(value=5, language=lang, 24 | defaults=dict(text='')) 25 | if c: freq.save() 26 | 27 | frame, c = Frame.objects.get_or_create(relation=relation, language=lang, 28 | text=frametext, 29 | defaults=dict(frequency=freq, 30 | goodness=1)) 31 | if c: frame.save() 32 | raw_assertion = RawAssertion.make(sentence.creator, frame, pred.text1, 33 | pred.text2, csamoa4_activity, 1) 34 | assertion = raw_assertion.assertion 35 | 36 | for rating in pred.rating_set.all(): 37 | score = rating.rating_value.deltascore 38 | if score < -1: score = -1 39 | if score > 1: score = 1 40 | if rating.activity_id is None: 41 | rating_activity = Activity.objects.get(name='unknown') 42 | else: 43 | rating_activity = rating.activity 44 | sentence.set_rating(rating.user, score, rating_activity) 45 | raw_assertion.set_rating(rating.user, score, rating_activity) 46 | assertion.set_rating(rating.user, score, rating_activity) 47 | return raw_assertion 48 | 49 | def run(): 50 | #generator = yaml.load_all(open('delayed_test.yaml')) 51 | #all_entries = list(generator) 52 | 53 | #activity_filter = Q() 54 | #for actid in good_acts: 55 | # activity_filter |= Q(sentence__activity__id=actid) 56 | for lang in ['it', 'fr', 'nl', 'es', 'pt']: 57 | queryset_foreach(cn3.Predicate.objects.filter(language__id=lang), 58 | process_predicate, batch_size=10) 59 | 60 | if __name__ == '__main__': 61 | run() 62 | 63 | -------------------------------------------------------------------------------- /conceptnet/webapi/templates/documentation.txt: -------------------------------------------------------------------------------- 1 | {% load rst %} 2 | .. _webapi: 3 | 4 | The ConceptNet Web API 5 | ====================== 6 | 7 | You can look up information in ConceptNet using a Web-based API. The API 8 | follows the `Representational State Transfer`_ (REST) standard, using simple 9 | HTTP requests to interact with the server. (A prominent example of a REST API 10 | is the `Twitter API`_.) 11 | 12 | .. _`Representational State Transfer`: http://en.wikipedia.org/wiki/Representational_State_Transfer 13 | .. _`Twitter API`: http://apiwiki.twitter.com/Twitter-API-Documentation 14 | 15 | The URLs listed below are relative to the base URL of {{API_BASE}}. As an 16 | example, you can use the command line utility cURL to see the results of the 17 | `/api/en/concept/duck` call:: 18 | 19 | curl {{API_BASE}}/api/en/concept/duck/ 20 | 21 | By the way, the excellent `django-piston`_ library made it much easier to write 22 | this API, its documentation, and its examples, all at the same time. 23 | 24 | .. _`django-piston`: http://bitbucket.org/jespern/django-piston/wiki/Home 25 | 26 | If you want to quickly get started using this Web API in Python, go to the next 27 | section, `webapi-client`_. 28 | 29 | Output formats 30 | -------------- 31 | 32 | When the API returns an object, it will represent it as a structure of key-value 33 | mappings. This structure will, by default, be represented in JSON format. 34 | 35 | You can request the results in a different format by adding "query.format" to 36 | the end of a URL: 37 | 38 | - Adding `query.xml` will request the results in XML format. 39 | - Adding `query.json` will request the results in their default JSON format. 40 | - Adding `query.yaml` will request the results in YAML_ format. 41 | 42 | .. _YAML: http://yaml.org 43 | 44 | For example, adding "query.xml" will request the results in XML format. 45 | 46 | The examples shown below all use YAML format, because it is fairly readable and 47 | the most compact of all these formats. 48 | 49 | REST requests 50 | ------------- 51 | 52 | {% for doc in docs %} 53 | 54 | {{ doc.name }} 55 | ....................................... 56 | 57 | .. function:: {{ doc.uri_template }} 58 | 59 | {{ doc.get_doc|default:""|safe }} 60 | Implemented by: :class:`conceptnet.webapi.{{ doc.name }}` 61 | 62 | {% for method in doc.useful_methods %} 63 | .. method:: {{ method.name }}({{ method.signature }}) 64 | 65 | {{ method.get_doc|indent:"8"|safe }} 66 | {% endfor %} 67 | {% if doc.example_url %} 68 | **Example:** `GET {{ doc.example_url }} <{{API_BASE}}{{ doc.example_url }}>`_ :: 69 | 70 | {{ doc.example_result|indent:"8"|safe }} 71 | 72 | {% endif %} 73 | {% endfor %} 74 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | PYTHONPATH = ..:../.. 9 | export DJANGO_SETTINGS_MODULE = settings 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | 15 | .PHONY: help clean html web pickle htmlhelp latex changes linkcheck 16 | 17 | help: 18 | @echo "Please use \`make ' where is one of" 19 | @echo " html to make standalone HTML files" 20 | @echo " pickle to make pickle files" 21 | @echo " json to make JSON files" 22 | @echo " htmlhelp to make HTML files and a HTML help project" 23 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 24 | @echo " changes to make an overview over all changed/added/deprecated items" 25 | @echo " linkcheck to check all external links for integrity" 26 | 27 | clean: 28 | -rm -rf build/* 29 | 30 | html: 31 | mkdir -p build/html build/doctrees 32 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html 33 | @echo 34 | @echo "Build finished. The HTML pages are in build/html." 35 | 36 | pickle: 37 | mkdir -p build/pickle build/doctrees 38 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle 39 | @echo 40 | @echo "Build finished; now you can process the pickle files." 41 | 42 | web: pickle 43 | 44 | json: 45 | mkdir -p build/json build/doctrees 46 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) build/json 47 | @echo 48 | @echo "Build finished; now you can process the JSON files." 49 | 50 | htmlhelp: 51 | mkdir -p build/htmlhelp build/doctrees 52 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp 53 | @echo 54 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 55 | ".hhp project file in build/htmlhelp." 56 | 57 | latex: 58 | mkdir -p build/latex build/doctrees 59 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex 60 | @echo 61 | @echo "Build finished; the LaTeX files are in build/latex." 62 | @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ 63 | "run these through (pdf)latex." 64 | 65 | changes: 66 | mkdir -p build/changes build/doctrees 67 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes 68 | @echo 69 | @echo "The overview file is in build/changes." 70 | 71 | linkcheck: 72 | mkdir -p build/linkcheck build/doctrees 73 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck 74 | @echo 75 | @echo "Link check complete; look for any errors in the above output " \ 76 | "or in build/linkcheck/output.txt." 77 | -------------------------------------------------------------------------------- /conceptnet/concepttools/testwords.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
Stars
4 |
5 |
Dog
6 |
7 |
Ocean
8 |
9 |
Tree
10 |
11 |
Sky
12 |
13 |
Teeth
14 |
15 |
Computer
16 |
17 |
Person
18 |
19 |
Flower
20 |
21 |
Calm
22 |
23 |
Birth
24 |
25 |
Death
26 |
27 |
Love
28 |
29 |
Fire
30 |
31 |
Happiness
32 |
33 |
Sadness
34 |
35 |
Anger
36 |
37 |
38 |
39 | -------------------------------------------------------------------------------- /tools/cnet_n3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | PREFIX = 'http://conceptnet.media.mit.edu' 4 | 5 | from conceptnet.models import Assertion, Frame, RelationType, Concept 6 | 7 | import codecs 8 | ofile_raw = open('conceptnet_en_20080604.n3','w') 9 | ofile = codecs.getwriter('utf-8')(ofile_raw) 10 | 11 | print >>ofile, '@prefix conceptnet: <%s>.' % (PREFIX+'/') 12 | 13 | def prefixed(type, rest): 14 | return '<%s/%s/%s>' % (PREFIX, type, rest) 15 | 16 | def concept(id): return prefixed('concept', id) 17 | def reltype(x): return prefixed('reltype', reltype_id2name[x]) 18 | def literal(x): return '"'+x.replace('"','_')+'"' 19 | def _frame(id): return prefixed('frame', id) 20 | def language(x): return prefixed('language', x) 21 | def user(x): return prefixed('user', x) 22 | 23 | def proplist(p): 24 | return u'; '.join(u'conceptnet:%s %s' % (prop, val) 25 | for prop, val in p) 26 | 27 | reltype_id2name = dict((x.id, x.name) for x in RelationType.objects.all()) 28 | frames = set() 29 | concepts = set() 30 | 31 | print 'Dumping assertions.' 32 | for (id, stem1_id, reltype_id, stem2_id, 33 | text1, text2, frame_id, language_id, creator_id, 34 | score, sentence) in Assertion.useful.filter(language='en').values_list( 35 | 'id', 'stem1_id', 'predtype_id', 'stem2_id', 36 | 'text1', 'text2', 'frame_id', 'language_id', 'creator_id', 37 | 'score', 'sentence__text').iterator(): 38 | 39 | ofile.write('<%s/assertion/%s> ' % (PREFIX, id)) 40 | ofile.write(proplist(( 41 | ('LeftConcept', concept(stem1_id)), 42 | ('RelationType', reltype(reltype_id)), 43 | ('RightConcept', concept(stem2_id)), 44 | ('LeftText', literal(text1)), 45 | ('RightText', literal(text2)), 46 | ('FrameId', _frame(frame_id)), 47 | ('Language', language(language_id)), 48 | ('Creator', user(creator_id)), 49 | ('Score', score), 50 | ('Sentence', literal(sentence)) 51 | ))) 52 | ofile.write('.\n') 53 | 54 | frames.add(frame_id) 55 | concepts.add(stem1_id) 56 | concepts.add(stem2_id) 57 | 58 | ofile.flush() 59 | 60 | print 'Dumping frames.' 61 | for id, frame in Frame.objects.in_bulk(list(frames)).iteritems(): 62 | ofile.write(_frame(id)+' ') 63 | ofile.write(proplist(( 64 | ('RelationType', reltype(frame.predtype_id)), 65 | ('FrameText', literal(frame.text)), 66 | ('FrameGoodness', literal(str(frame.goodness))))) 67 | ) 68 | ofile.write('.\n') 69 | 70 | ofile.flush() 71 | 72 | print 'Dumping concepts.' 73 | for id, c in Concept.objects.in_bulk(list(concepts)).iteritems(): 74 | ofile.write(concept(id)+' ') 75 | ofile.write(proplist(( 76 | ('NormalizedText', literal(c.text)), 77 | ('CanonicalName', literal(c.canonical_name)) 78 | ))) 79 | ofile.write('.\n') 80 | 81 | 82 | print 'Done.' 83 | 84 | ofile.close() 85 | -------------------------------------------------------------------------------- /serialize/pyyaml.py: -------------------------------------------------------------------------------- 1 | """ 2 | Improved YAML serializer by rspeer@mit.edu. Uses a stream of documents so that 3 | it doesn't have to keep all database entries in memory. 4 | 5 | Requires PyYaml (http://pyyaml.org/), but that's checked for in __init__. 6 | 7 | To use it, add a line like this to your settings.py:: 8 | 9 | SERIALIZATION_MODULES = { 10 | 'yaml': 'path.to.import.this.module' 11 | } 12 | """ 13 | 14 | from StringIO import StringIO 15 | import yaml 16 | from django.utils.encoding import smart_unicode 17 | 18 | try: 19 | import decimal 20 | except ImportError: 21 | from django.utils import _decimal as decimal # Python 2.3 fallback 22 | 23 | from django.db import models 24 | from django.core.serializers.python import Serializer as PythonSerializer 25 | from django.core.serializers.python import Deserializer as PythonDeserializer 26 | 27 | class DjangoSafeDumper(yaml.SafeDumper): 28 | def represent_decimal(self, data): 29 | return self.represent_scalar('tag:yaml.org,2002:str', str(data)) 30 | 31 | DjangoSafeDumper.add_representer(decimal.Decimal, DjangoSafeDumper.represent_decimal) 32 | 33 | class Serializer(PythonSerializer): 34 | """ 35 | Convert a queryset to YAML. 36 | """ 37 | 38 | internal_use_only = False 39 | 40 | def handle_field(self, obj, field): 41 | # A nasty special case: base YAML doesn't support serialization of time 42 | # types (as opposed to dates or datetimes, which it does support). Since 43 | # we want to use the "safe" serializer for better interoperability, we 44 | # need to do something with those pesky times. Converting 'em to strings 45 | # isn't perfect, but it's better than a "!!python/time" type which would 46 | # halt deserialization under any other language. 47 | if isinstance(field, models.TimeField) and getattr(obj, field.name) is not None: 48 | self._current[field.name] = str(getattr(obj, field.name)) 49 | else: 50 | super(Serializer, self).handle_field(obj, field) 51 | 52 | def end_object(self, obj): 53 | the_object = { 54 | "model" : smart_unicode(obj._meta), 55 | "pk" : smart_unicode(obj._get_pk_val(), strings_only=True), 56 | "fields" : self._current 57 | } 58 | self._current = None 59 | dumpstr = yaml.dump(the_object, Dumper=DjangoSafeDumper, 60 | explicit_start=True, **self.options) 61 | self.stream.write(dumpstr) 62 | 63 | def start_serialization(self): 64 | self.options.pop('stream', None) 65 | self.options.pop('fields', None) 66 | PythonSerializer.start_serialization(self) 67 | 68 | def end_serialization(self): 69 | self.stream.close() 70 | 71 | def getvalue(self): 72 | return self.stream.getvalue() 73 | 74 | def Deserializer(stream_or_string, **options): 75 | """ 76 | Deserialize a stream or string of YAML data. 77 | """ 78 | if isinstance(stream_or_string, basestring): 79 | stream = StringIO(stream_or_string) 80 | else: 81 | stream = stream_or_string 82 | for obj in PythonDeserializer(yaml.load_all(stream)): 83 | yield obj 84 | 85 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. ConceptNet documentation master file, created by sphinx-quickstart on Fri Feb 27 14:59:14 2009. 2 | You can adapt this file completely to your liking, but it should at least 3 | contain the root `toctree` directive. 4 | 5 | .. _root: 6 | 7 | ConceptNet API 8 | ============== 9 | 10 | Contents: 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | install 16 | conceptnet4 17 | corpus 18 | others 19 | 20 | Overview and apology 21 | -------------------- 22 | 23 | The current ConceptNet API has the ability to access two versions of the 24 | database: ConceptNet 3 and the experimental ConceptNet 4. We call this the 25 | "ConceptNet 3.5" API, pronounced "ConceptNet three and a half". 26 | 27 | Except this isn't quite a release of ConceptNet 3.5 yet. We're working on it. 28 | 29 | Most of our released code, as well of our released database, take the form of 30 | ConceptNet 3. However, ConceptNet 3 has become a bit of a mess as a result of 31 | years of research and paper deadlines. 32 | 33 | This documentation, then, will mostly document ConceptNet 4. Much of what we 34 | say will work about the same in ConceptNet 3. But some of it won't. We're sorry 35 | about that. 36 | 37 | How does this code work? 38 | ------------------------ 39 | The answer from 30,000 feet up is simple: It's Django. 40 | 41 | Django is a Python framework for working with databases and web applications. 42 | All of ConceptNet is represented as Django models that interact with each other 43 | and with a database. We don't use the web application part -- not here, at 44 | least -- but we provide the appropriate hooks so that ConceptNet can power a 45 | Django web application. (Because it does. It's at 46 | http://openmind.media.mit.edu.) 47 | 48 | The code is divided into a few main modules, or *apps*: 49 | 50 | - :mod:`corpus`, representing the sentences of glorious, ambiguous natural 51 | language that our contributors have provided us with. 52 | - :mod:`conceptnet` (or :mod:`conceptnet4`), representing the structured 53 | assertions that we have parsed from the corpus. 54 | - :mod:`events`, which lets us keep track of how, when, and why various objects 55 | came into being. 56 | - (:mod:`voting`, which actually isn't by us at all; it's the `django-voting`_ 57 | package by Jonathan Buchanan.) 58 | 59 | .. _`django-voting`: http://code.google.com/p/django-voting/ 60 | 61 | :mod:`conceptnet` and :mod:`conceptnet4` are two conflicting implementations of the 62 | same idea. In :file:`settings.py`, we refer to :mod:`conceptnet`. If you have a 63 | database of ConceptNet 4 and a desire to live on the edge, you can change it to 64 | :mod:`conceptnet4`. 65 | 66 | 67 | Model diagram 68 | ------------- 69 | Each app contains several *models*, representing objects that are stored in a 70 | database. The information in ConceptNet is represented by these models and 71 | their relationships to each other. 72 | 73 | .. image:: _static/graph/conceptnet_all.png 74 | :width: 600 75 | :alt: ConceptNet 4 model diagram 76 | :target: _static/graph/conceptnet_all.png 77 | 78 | (`PDF version`_) 79 | 80 | .. _`PDF version`: _static/graph/conceptnet_all.pdf 81 | 82 | Components 83 | ---------- 84 | - :ref:`conceptnet4` 85 | - :ref:`corpus` 86 | - :ref:`others` 87 | 88 | Indices and tables 89 | ================== 90 | 91 | * :ref:`genindex` 92 | * :ref:`modindex` 93 | * :ref:`search` 94 | 95 | -------------------------------------------------------------------------------- /maint/dump_csv.py: -------------------------------------------------------------------------------- 1 | from csc.conceptnet.models import Concept, Assertion, Sentence, Frame 2 | from csc.corpus.models import TaggedSentence 3 | import csv 4 | 5 | def dump_assertion_sentences(lang, f): 6 | writer = csv.writer(f) 7 | writer.writerow(('id', 'creator', 'score', 'text')) 8 | for id, username, score, text in Assertion.objects.filter(language=lang).values_list('id','creator__username', 'score','sentence__text').iterator(): 9 | writer.writerow((id, username.encode('utf-8'), score, text.encode('utf-8'))) 10 | 11 | def dump_all_sentences(lang, f): 12 | writer = csv.writer(f) 13 | writer.writerow(('id', 'creator', 'created_on', 'activity', 'text')) 14 | for id, username, created_on, activity, text in Sentence.objects.filter(language=lang).values_list('id','creator__username','created_on', 'activity__name', 'text').iterator(): 15 | writer.writerow((id, username.encode('utf-8'), created_on, 16 | activity, text.encode('utf-8'))) 17 | 18 | def dump_concepts(lang, f): 19 | writer = csv.writer(f) 20 | writer.writerow(('id', 'num_assertions', 'normalized_text', 'canonical_name')) 21 | for c in Concept.objects.filter(language=lang).iterator(): 22 | writer.writerow((c.id, c.num_predicates, c.text.encode('utf-8'), 23 | c.canonical_name.encode('utf-8'))) 24 | 25 | def dump_assertions(lang, f): 26 | writer = csv.writer(f) 27 | writer.writerow(('id', 'sentence', 'relation_type', 'text1', 'text2', 'stem1_id', 'stem2_id', 'frame_id', 'score', 'creator')) 28 | for id, sentence, relation_type, text1, text2, stem1_id, stem2_id, frame_id, score, creator in Assertion.objects.filter(language=lang).values_list( 29 | 'id', 'sentence__text', 'predtype__name', 'text1', 'text2', 30 | 'stem1_id', 'stem2_id', 'frame_id', 'score', 'creator__username' 31 | ).iterator(): 32 | writer.writerow(( 33 | id, sentence.encode('utf-8'), relation_type, 34 | text1.encode('utf-8'), text2.encode('utf-8'), 35 | stem1_id, stem2_id, frame_id, score, 36 | creator.encode('utf-8') 37 | )) 38 | 39 | def dump_frames(lang, f): 40 | writer = csv.writer(f) 41 | writer.writerow(('id', 'relation_type', 'text', 'goodness')) 42 | for id, relation_type, text, goodness in Frame.objects.filter(language=lang).values_list( 43 | 'id', 'predtype__name', 'text', 'goodness' 44 | ).iterator(): 45 | writer.writerow(( 46 | id, relation_type, 47 | text.encode('utf-8'), 48 | goodness 49 | )) 50 | 51 | def dump_tagged_sentences(lang, f): 52 | writer = csv.writer(f) 53 | writer.writerow(('id', 'text')) 54 | for id, text in TaggedSentence.objects.filter(language=lang).values_list( 55 | 'id', 'text' 56 | ).iterator(): 57 | writer.writerow(( 58 | id, text.encode('utf-8') 59 | )) 60 | 61 | if __name__=='__main__': 62 | import sys 63 | name, lang = sys.argv 64 | 65 | dump_assertion_sentences(lang, open(lang+'_assertion_sentences.csv','w')) 66 | dump_all_sentences(lang, open(lang+'_all_sentences.csv','w')) 67 | dump_concepts(lang, open(lang+'_concepts.csv','w')) 68 | dump_assertions(lang, open(lang+'_assertions.csv','w')) 69 | dump_frames(lang, open(lang+'_frames.csv','w')) 70 | -------------------------------------------------------------------------------- /conceptnet/corpus/parse/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from conceptnet.corpus.models import Language 3 | from conceptnet.models import Relation 4 | 5 | class FunctionFamilyDetector(object): 6 | def __init__(self,kb,language,family): 7 | self.language = language 8 | self.kb = kb 9 | self.family = family 10 | 11 | def __str__(self): 12 | return '<' + self.language.id + ': ' + \ 13 | 'function words (family=' + self.family + ')>' 14 | 15 | def __call__(self,word): 16 | return (word in self.kb) 17 | 18 | 19 | class FunctionWord(models.Model): 20 | """ a word of particular significance to a parser """ 21 | language = models.ForeignKey(Language) 22 | word = models.TextField() 23 | unique_together = (('language', 'word'),) 24 | 25 | def __str__(self): 26 | return "<" + self.language.id + ":" + self.word + ">" 27 | 28 | class Meta: 29 | db_table = 'functionwords' 30 | 31 | class FunctionFamily(models.Model): 32 | """ defines a family of function words """ 33 | family = models.TextField() 34 | f_word = models.ForeignKey(FunctionWord) 35 | unique_together = (('family', 'f_word'),) 36 | 37 | def __str__(self): 38 | return self.family + ": " + str(self.f_word) 39 | 40 | class Meta: 41 | db_table = 'functionfamilies' 42 | 43 | @staticmethod 44 | def build_function_detector(language, family): 45 | # Prepare the kb 46 | words = list(FunctionFamily.objects.filter(family=family,f_word__language=language).values_list('f_word__word', flat=True)) 47 | 48 | return FunctionFamilyDetector(words,language,family) 49 | 50 | class ParsingPattern(models.Model): 51 | pattern = models.TextField(blank=False) 52 | predtype = models.ForeignKey(Relation) 53 | polarity = models.IntegerField() 54 | sort_order = models.IntegerField() 55 | language = models.ForeignKey(Language) 56 | 57 | class Meta: 58 | db_table = 'parsing_patterns' 59 | 60 | 61 | class SecondOrderPattern(models.Model): 62 | regex = models.TextField() 63 | language = models.ForeignKey(Language) 64 | use_group = models.IntegerField(default=0) 65 | abort = models.BooleanField(default=False) 66 | 67 | def __str__(self): 68 | return "(" + self.language.id + ") /" + self.regex + "/" 69 | 70 | def compile(self): 71 | self._compiled_regex = re.compile( self.regex ) 72 | 73 | def __call__(self, text): 74 | if not hasattr( self, '_compiled_regex' ): self.compile() 75 | return self._compiled_regex.search(text) 76 | 77 | class Meta: 78 | db_table = 'secondorderpatterns' 79 | 80 | class SecondOrderSplitter: 81 | def __init__(self,patterns,language): 82 | self.language = language 83 | self.patterns = patterns 84 | 85 | def __call__(self,text): 86 | # FIXME: THIS IS A HIDEOUSLY USELESS ROUTINE 87 | for pattern in self.patterns: 88 | m = pattern(text) 89 | if m: 90 | if pattern.abort: text = '' 91 | else: text = m.groups()[pattern.use_group] 92 | return [text] 93 | 94 | def __str__(self): 95 | return "Second order splitter (" + self.language.id + ")" 96 | 97 | @staticmethod 98 | def build_splitter(language): 99 | return SecondOrderPattern.SecondOrderSplitter(language.secondorderpattern_set.all(), language) 100 | -------------------------------------------------------------------------------- /doc/bzr-howto.txt: -------------------------------------------------------------------------------- 1 | Common Sense Computing and Bazaar 2 | ================================= 3 | 4 | 5 | First-time setup 6 | ---------------- 7 | * Install Bazaar (bazaar-vcs.org) 8 | * Sign up for Launchpad (launchpad.net) 9 | * Join the Commonsense Computing team (http://launchpad.net/~commonsense) 10 | 11 | 12 | Working on a project 13 | -------------------- 14 | 15 | Start by making a branch of the project you're working on: 16 | bzr branch lp:conceptnet my_csamoa_branch 17 | (This gives you a local working directory called my_csamoa_branch.) 18 | 19 | Hack on the code. 20 | 21 | If you create new files, add them: 22 | bzr add filename 23 | 24 | From time to time, commit: 25 | bzr commit -m "this is my highly informative commit message" 26 | This commits to _your_ version-controlled repository. It can't mess with anyone else. It's safe. 27 | 28 | To incorporate new things that happen on the trunk, you need to _merge_: 29 | bzr merge lp:conceptnet # get your branch up to date with what's changed 30 | bzr commit -m "Merged" 31 | 32 | If for some reason your working copy is out of date: 33 | bzr update 34 | 35 | When it's ready for prime time, push it back into the trunk: 36 | bzr push lp:conceptnet 37 | 38 | If the trunk has changes you haven't merged, you'll need to merge before you can push. 39 | 40 | 41 | I don't want my own branch, I just want to use this like SVN 42 | ------------------------------------------------------------ 43 | 44 | Okay. This makes perfect sense for a quick change, but if you make a habit of this you're probably going to get in someone's way. 45 | 46 | Instead of branching, get a _checkout_: 47 | bzr checkout lp:conceptnet 48 | 49 | A checkout is a working copy whose repository is somewhere else. When you commit, it commits to that repository. This is how everything worked in Subversion. 50 | 51 | To pull in new stuff from the repository: 52 | bzr update 53 | 54 | To commit your changes to the repository: 55 | bzr commit -m "extremely informative message" 56 | 57 | 58 | Checking out the same branch somewhere else 59 | ------------------------------------------- 60 | You've made a branch on one computer, and you want to work with the same branch on another computer. No problem: make a checkout of it. 61 | bzr checkout bzr+ssh://your.host.name/path/to/your/branch 62 | 63 | Now you have multiple checkouts, and you can update, commit, etc. just like above. 64 | 65 | This also makes sense if you want to work on some minor branch that's on Launchpad (like ~commonsense/conceptnet/new-caledonia) without re-branching it. Check out that branch and commit to it. 66 | 67 | 68 | Sharing a branch 69 | ---------------- 70 | If you want someone else to be able to work with your branch, you probably want it hosted on Launchpad instead of your own computer. Here's how to do that: 71 | 72 | bzr push lp:~username/project/branch-name 73 | 74 | For example, Rob might do this: 75 | bzr push lp:~rspeer/conceptnet/speed-up-the-lemmatizer 76 | 77 | That's right, you can just make up a URL like that and suddenly Launchpad is hosting a branch for you. Now make your branch into a checkout of that new hosted branch: 78 | bzr bind lp:~username/project/branch-name 79 | 80 | 81 | I screwed up! Shit shit shit. 82 | ----------------------------- 83 | If you committed something you didn't mean to, you can fix it: 84 | bzr uncommit 85 | 86 | If you added something you meant to be unversioned: 87 | bzr remove --keep filename 88 | 89 | If you want to go back to a previous revision, look up how to use bzr merge -r. 90 | 91 | If you pushed to somewhere you didn't mean to, check out that branch and bzr merge -r it back to something sane. 92 | 93 | 94 | -------------------------------------------------------------------------------- /conceptnet/webapi/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls.defaults import * 2 | from piston.resource import Resource 3 | from conceptnet.webapi.docs import documentation_view 4 | from conceptnet.webapi.handlers import * 5 | 6 | # This gives a way to accept "query.foo" on the end of the URL to set the 7 | # format to 'foo'. "?format=foo" works as well. 8 | Q = r'(query\.(?P.+))?$' 9 | 10 | urlpatterns = patterns('', 11 | url(r'^(?P[^/]+)/'+Q, 12 | Resource(LanguageHandler), name='language_handler'), 13 | url(r'^(?P.+)/concept/(?P[^/]*)/'+Q, 14 | Resource(ConceptHandler), name='concept_handler'), 15 | url(r'^(?P.+)/concept/(?P[^/]*)/assertions/'+Q, 16 | Resource(ConceptAssertionHandler), name='concept_assertion_handler_default'), 17 | url(r'^(?P.+)/concept/(?P[^/]*)/assertions/limit:(?P[0-9]+)/'+Q, 18 | Resource(ConceptAssertionHandler), name='concept_assertion_handler'), 19 | url(r'^(?P.+)/concept/(?P[^/]*)/surfaceforms/'+Q, 20 | Resource(ConceptSurfaceHandler), name='concept_surface_handler_default'), 21 | url(r'^(?P.+)/concept/(?P[^/]*)/surfaceforms/limit:(?P[0-9]+)/'+Q, 22 | Resource(ConceptSurfaceHandler), name='concept_surface_handler'), 23 | url(r'^(?P.+)/concept/(?P[^/]*)/features/'+Q, 24 | Resource(ConceptFeatureHandler), name='concept_feature_handler'), 25 | url(r'^(?P.+)/(?Pleft|right)feature/(?P[^/]+)/(?P[^/]+)/'+Q, 26 | Resource(FeatureQueryHandler), name='feature_query_handler_default'), 27 | url(r'^(?P.+)/(?Pleft|right)feature/(?P[^/]+)/(?P[^/]+)/limit:(?P[0-9]+)/'+Q, 28 | Resource(FeatureQueryHandler), name='feature_query_handler'), 29 | url(r'^(?P.+)/(?P.+)/(?P[0-9]+)/votes/'+Q, 30 | Resource(RatedObjectHandler), name='rated_object_handler'), 31 | url(r'^(?P.+)/surface/(?P.+)/'+Q, 32 | Resource(SurfaceFormHandler), name='surface_form_handler'), 33 | url(r'^(?P.+)/frame/(?P[0-9]+)/'+Q, 34 | Resource(FrameHandler), name='frame_handler'), 35 | url(r'^(?P.+)/frame/(?P[0-9]+)/statements/'+Q, 36 | Resource(RawAssertionByFrameHandler), 37 | name='raw_assertion_by_frame_handler_default'), 38 | url(r'^(?P.+)/frame/(?P[0-9]+)/statements/limit:(?P[0-9]+)/'+Q, 39 | Resource(RawAssertionByFrameHandler), 40 | name='raw_assertion_by_frame_handler'), 41 | url(r'^(?P.+)/assertion/(?P[0-9]+)/'+Q, 42 | Resource(AssertionHandler), name='assertion_handler'), 43 | url(r'^(?P.+)/assertion/(?P[0-9]+)/raw/'+Q, 44 | Resource(AssertionToRawHandler), name='assertion_to_raw_handler'), 45 | url(r'^(?P.+)/raw_assertion/(?P[0-9]+)/'+Q, 46 | Resource(RawAssertionHandler), name='raw_assertion_handler'), 47 | url(r'^(?P.+)/frequency/(?P[^/]*)/'+Q, 48 | Resource(FrequencyHandler), name='frequency_handler'), 49 | url(r'^(?P.+)/assertionfind/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)/'+Q, 50 | Resource(AssertionFindHandler), name='assertion_find_handler'), 51 | url(r'^user/(?P.+)/'+Q, 52 | Resource(UserHandler), name='user_handler'), 53 | url(r'^(?P.+)/similar_to/(?P[^/]+)/limit:(?P[0-9]+)/'+Q, 54 | Resource(SimilarityHandler), name='similarity_handler'), 55 | url(r'^(?P.+)/similar_to/(?P[^/]+)/'+Q, 56 | Resource(SimilarityHandler), name='similarity_handler_default'), 57 | url(r'docs.txt$', 58 | documentation_view, name='documentation_view') 59 | ) 60 | # :vim:tw=0:nowrap: 61 | -------------------------------------------------------------------------------- /conceptnet/corpus/parse/try_patterns.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from conceptnet.corpus.parse.pcfgpattern import * 3 | __test__ = False 4 | 5 | def textrepr(rel, matchdict): 6 | if rel is None: return 'None' 7 | return "%s(%s, %s)" % (rel, matchdict.get(1), matchdict.get(2)) 8 | 9 | # A selection of sentences from OMCS that we should be able to parse correctly. 10 | # This test suite does not vouch for the correctness or usefulness of the 11 | # sentences it contains. 12 | 13 | tests = [ 14 | ("If you want to impanel a jury then you should ask questions.", 15 | "HasPrerequisite(impanel a jury, ask questions)"), 16 | ('"Lucy in the Sky with Diamonds" was a famous Beatles song', 17 | 'IsA("Lucy in the Sky with Diamonds", a famous Beatles song)'), 18 | ("sound can be recorded", 19 | "ReceivesAction(sound, recorded)"), 20 | ("sounds can be soothing", 21 | "HasProperty(sounds, soothing)"), 22 | ("music can be recorded with a recording device", 23 | "ReceivesAction(music, recorded with a recording device)"), 24 | ("The first thing you do when you buy a shirt is try it on", 25 | "HasFirstSubevent(buy a shirt, try it on)"), 26 | ("One of the things you do when you water a plant is pour", 27 | "HasSubevent(water a plant, pour)"), 28 | ("A small sister can bug an older brother", 29 | "CapableOf(A small sister, bug an older brother)"), 30 | ("McDonald's hamburgers contain mayonnaise", 31 | "HasA(McDonald's hamburgers, mayonnaise)"), 32 | ("If you want to stab to death then you should get a knife.", 33 | "HasPrerequisite(stab to death, get a knife)"), 34 | ("carbon can cake hard", 35 | "CapableOf(carbon, cake hard)"), 36 | ("You would take a walk because your housemates were having sex in your bed.", 37 | "MotivatedByGoal(take a walk, your housemates were having sex in your bed)"), 38 | ("police can tail a suspect", 39 | "CapableOf(police, tail a suspect)"), 40 | ("people can race horses", 41 | "CapableOf(people, race horses)"), 42 | ("computer can mine data", 43 | "CapableOf(computer, mine data)"), 44 | ("to use a phone you must dial numbers", 45 | "HasSubevent(use a phone, dial numbers)"), 46 | ("People who are depressed are more likely to kill themselves", 47 | "HasProperty(People who are depressed, more likely to kill themselves)"), 48 | ("Bird eggs are good with toast and jam", 49 | "HasProperty(Bird eggs, good with toast and jam)"), 50 | ("housewife can can fruit", 51 | "CapableOf(housewife, can fruit)"), 52 | ("pictures can be showing nudity", 53 | "CapableOf(pictures, be showing nudity)"), 54 | ("a large house where the president of the US resides", 55 | "junk(a large house where the president of the US resides, None)"), 56 | ("girls are cute when they eat", 57 | "HasProperty(girls, cute when they eat)"), 58 | ("When books are on a bookshelf, you see only their spines.", 59 | "HasSubevent(books are on a bookshelf, you see only their spines)"), 60 | ("The effect of taking a phone call is finding out who is calling", 61 | "Causes(taking a phone call, finding out who is calling)"), 62 | ("There are 60 seconds in a minute", 63 | "AtLocation(60 seconds, a minute)"), 64 | ("Two wrongs don't make a right.", 65 | "CapableOf(Two wrongs, make a right)"), 66 | ("Somewhere someone can be is an art gallery", 67 | "AtLocation(someone, an art gallery)"), 68 | ("A person doesn't want war", 69 | "Desires(A person, war)"), 70 | ("That's weird", 71 | "junk(That's weird, None)"), 72 | ] 73 | 74 | def run_tests(): 75 | success = 0 76 | ntests = 0 77 | for testin, testout in tests: 78 | ntests += 1 79 | prob, frame, rel, matches = pattern_parse(testin) 80 | if textrepr(rel, matches) == testout: 81 | success += 1 82 | print "Success:", testin 83 | else: 84 | print "Failed:", testin 85 | print "Got:", textrepr(rel, matches) 86 | print "Expected:", testout 87 | pattern_parse(testin, 1) 88 | 89 | print "Tests complete: %d/%d" % (success, ntests) 90 | 91 | run_tests.__test__ = False 92 | 93 | if __name__ == '__main__': 94 | run_tests() 95 | 96 | -------------------------------------------------------------------------------- /doc/zero-to-conceptnet-on-xvm.txt: -------------------------------------------------------------------------------- 1 | Zero to ConceptNet on XVM 2 | by Ken Arnold (kcarnold@mit.edu) 3 | 4 | These instructions tell you how to: 5 | * Use your MIT Athena account to conjure up a new Ubuntu virtual machine that you can use 6 | * Install ConceptNet and Divisi on that fresh Ubuntu machine 7 | 8 | If you don't have an MIT account, or you have your own Ubuntu Linux machine already, you can skip to the "Getting dependencies" section. 9 | 10 | Creating a new VM 11 | ================= 12 | http://xvm.mit.edu/ 13 | Log in 14 | Create VM: autoinstall Ubuntu Jaunty i386 (our stuff works on AMD64, but 64-bit pointers waste the precious little RAM you get 15 | go, wait 5 minutes, power on the new VM 16 | 17 | At a terminal with Kerberos tickets (e.g., Athena; ssh linux.mit.edu first) 18 | ssh MACHINE-NAME@xvm-console.mit.edu 19 | Hit Enter, type 'root' 20 | 21 | Making a user account to log in with ssh 22 | ---------------------------------------- 23 | 24 | Now add yourself as an admin user. But first we have to fix the configuration (this should not be necessary...): 25 | 26 | addgroup --gid 114 admin 27 | cat >> /etc/sudoers <> ~/.bashrc 62 | easy_install ipython 63 | 64 | Installing ConceptNet 65 | ===================== 66 | 67 | All of this will get installed inside your virtual environment. 68 | 69 | easy_install django 70 | easy_install conceptnet 71 | wget http://conceptnet.media.mit.edu/dist/ConceptNet-sqlite.tar.gz 72 | tar -xvf ConceptNet-sqlite.tar.gz 73 | 74 | If you want to develop ConceptNet itself, replace `easy_install conceptnet` with: 75 | bzr branch lp:conceptnet 76 | cd conceptnet; ./setup.py develop; cd .. 77 | 78 | Try it out 79 | ========== 80 | 81 | ipython 82 | from csc.conceptnet4.models import Concept 83 | dog = Concept.get('dog', 'en') 84 | for fwd in dog.get_assertions_forward()[:15]: 85 | print fwd 86 | 87 | Documentation: http://conceptnet.media.mit.edu/doc/conceptnet/overview.html 88 | 89 | Installing Divisi 90 | ================= 91 | 92 | easy_install divisi 93 | 94 | If you want to develop Divisi itself, do this instead: 95 | bzr branch lp:divisi 96 | cd divisi; ./setup.py develop; cd .. 97 | 98 | Try out Divisi 99 | ============== 100 | 101 | You can make an AnalogySpace tensor like this: 102 | ipython 103 | from csc.conceptnet4.analogyspace import * 104 | tensor = conceptnet_2d_from_db(lang='en') 105 | [or alternatively, follow directions at http://csc.media.mit.edu/pages/ubuntu-install/ to get it online] 106 | tensor['baseball', :].top_items() 107 | svd = tensor.svd(k=50) 108 | concept_similarity(svd, 'teach').top_items(10) 109 | 110 | 111 | Also, if you checked out the source, you can run our test suite: 112 | python divisi/test/tests.py 113 | 114 | Docuementation http://divisi.media.mit.edu/doc/intro.html 115 | 116 | Using our database server 117 | ========================= 118 | sudo aptitude install python-psycopg2 119 | Then see: http://conceptnet.media.mit.edu/doc/conceptnet/install.html#optional-using-a-postgresql-database 120 | 121 | 122 | Some basic ConceptNet queries 123 | ============================= 124 | 125 | http://conceptnet.media.mit.edu/doc/ 126 | 127 | from csc.conceptnet4.models import * 128 | 129 | All assertions about "dog": 130 | >>> dog = Concept.get('dog','en') 131 | >>> Assertion.objects.filter(concept1=dog) 132 | (same as dog.get_assertions_forward() if you replace `objects` by `useful`) 133 | 134 | All sentences where "a dog" is the first item: 135 | >>> Sentence.objects.filter(rawassertion__text1__iexact='a dog') 136 | 137 | All assertions above some score 138 | >>> Assertion.objects.filter(language='en', score__gte=3).count() 139 | 140 | A useful reference: http://docs.djangoproject.com/en/dev/topics/db/queries/ 141 | 142 | -------------------------------------------------------------------------------- /conceptnet/lib/events/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | 2 | from south.db import db 3 | from django.db import models 4 | from events.models import * 5 | 6 | class Migration: 7 | 8 | def forwards(self, orm): 9 | 10 | # Adding model 'Event' 11 | db.create_table('events_event', ( 12 | ('id', orm['events.Event:id']), 13 | ('user', orm['events.Event:user']), 14 | ('content_type', orm['events.Event:content_type']), 15 | ('object_id', orm['events.Event:object_id']), 16 | ('activity', orm['events.Event:activity']), 17 | ('timestamp', orm['events.Event:timestamp']), 18 | )) 19 | db.send_create_signal('events', ['Event']) 20 | 21 | # Adding model 'Activity' 22 | db.create_table('events_activity', ( 23 | ('id', orm['events.Activity:id']), 24 | ('name', orm['events.Activity:name']), 25 | )) 26 | db.send_create_signal('events', ['Activity']) 27 | 28 | 29 | 30 | def backwards(self, orm): 31 | 32 | # Deleting model 'Event' 33 | db.delete_table('events_event') 34 | 35 | # Deleting model 'Activity' 36 | db.delete_table('events_activity') 37 | 38 | 39 | 40 | models = { 41 | 'auth.group': { 42 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 43 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '80', 'unique': 'True'}), 44 | 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'blank': 'True'}) 45 | }, 46 | 'auth.permission': { 47 | 'Meta': {'unique_together': "(('content_type', 'codename'),)"}, 48 | 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), 49 | 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), 50 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 51 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) 52 | }, 53 | 'auth.user': { 54 | 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), 55 | 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), 56 | 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), 57 | 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'blank': 'True'}), 58 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 59 | 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}), 60 | 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}), 61 | 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}), 62 | 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), 63 | 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), 64 | 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), 65 | 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'blank': 'True'}), 66 | 'username': ('django.db.models.fields.CharField', [], {'max_length': '30', 'unique': 'True'}) 67 | }, 68 | 'contenttypes.contenttype': { 69 | 'Meta': {'unique_together': "(('app_label', 'model'),)", 'db_table': "'django_content_type'"}, 70 | 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), 71 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 72 | 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), 73 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) 74 | }, 75 | 'events.activity': { 76 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 77 | 'name': ('django.db.models.fields.TextField', [], {}) 78 | }, 79 | 'events.event': { 80 | 'activity': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['events.Activity']"}), 81 | 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), 82 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 83 | 'object_id': ('django.db.models.fields.PositiveIntegerField', [], {}), 84 | 'timestamp': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), 85 | 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}) 86 | } 87 | } 88 | 89 | complete_apps = ['events'] 90 | -------------------------------------------------------------------------------- /conceptnet/corpus/parse/migrate_templated.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys, traceback 3 | from conceptnet4.models import Assertion, Batch, RawAssertion, Frame,\ 4 | Frequency, Relation, SurfaceForm, Concept, Rating 5 | import conceptnet.models as cn3 6 | from corpus.models import Sentence, Language, Activity 7 | from django.contrib.auth.models import User 8 | from django.core.paginator import Paginator 9 | from django.db import transaction 10 | from corpus.parse.adverbs import map_adverb 11 | from itertools import islice 12 | import yaml 13 | 14 | csamoa4_activity = Activity.objects.get(name='csamoa4 self-rating') 15 | good_acts = [ 16, 20, 22, 24, 28, 31, 32 ] 16 | en = Language.get('en') 17 | 18 | def process_predicate(pred, batch): 19 | frametext = pred.frame.text 20 | matches = {1: pred.text1, 2: pred.text2} 21 | if pred.polarity < 0: matches['a'] = 'not' 22 | relation = pred.relation 23 | sentence = pred.sentence 24 | lang = pred.language 25 | 26 | surface_forms = [SurfaceForm.get(matches[i], lang, auto_create=True) 27 | for i in (1, 2)] 28 | concepts = [s.concept for s in surface_forms] 29 | 30 | # FIXME: english only so far 31 | freq = map_adverb(matches.get('a', '')) 32 | relation = Relation.objects.get(id=relation.id) 33 | frame, _ = Frame.objects.get_or_create(relation=relation, language=lang, 34 | text=frametext, 35 | defaults=dict(frequency=freq, 36 | goodness=1)) 37 | frame.save() 38 | 39 | raw_assertion, _ = RawAssertion.objects.get_or_create( 40 | surface1=surface_forms[0], 41 | surface2=surface_forms[1], 42 | frame=frame, 43 | language=lang, 44 | creator=sentence.creator, 45 | defaults=dict(batch=batch)) 46 | # still need to set assertion_id 47 | 48 | assertion, _ = Assertion.objects.get_or_create( 49 | relation=relation, 50 | concept1=concepts[0], 51 | concept2=concepts[1], 52 | frequency=freq, 53 | language=lang, 54 | defaults=dict(score=0) 55 | ) 56 | #assertion.save() 57 | 58 | raw_assertion.assertion = assertion 59 | raw_assertion.sentence = sentence 60 | raw_assertion.save() 61 | 62 | sentence.set_rating(sentence.creator, 1, csamoa4_activity) 63 | raw_assertion.set_rating(sentence.creator, 1, csamoa4_activity) 64 | assertion.set_rating(sentence.creator, 1, csamoa4_activity) 65 | 66 | for rating in pred.rating_set.all(): 67 | score = rating.rating_value.deltascore 68 | if score < -1: score = -1 69 | if score > 1: score = 1 70 | if rating.activity_id is None: 71 | rating_activity = Activity.objects.get(name='unknown') 72 | else: 73 | rating_activity = rating.activity 74 | sentence.set_rating(rating.user, score, rating_activity) 75 | raw_assertion.set_rating(rating.user, score, rating_activity) 76 | assertion.set_rating(rating.user, score, rating_activity) 77 | 78 | print '=>', unicode(assertion).encode('utf-8') 79 | return [assertion] 80 | 81 | def run(user, start_page=1): 82 | batch = Batch() 83 | batch.owner = user 84 | 85 | #generator = yaml.load_all(open('delayed_test.yaml')) 86 | #all_entries = list(generator) 87 | all_preds = [] 88 | for actid in good_acts: 89 | all_preds.extend(cn3.Predicate.objects.filter(sentence__activity__id=actid, language=en)) 90 | paginator = Paginator(all_preds,100) 91 | #pages = ((i,paginator.page(i)) for i in range(start_page,paginator.num_pages)) 92 | 93 | @transaction.commit_on_success 94 | def do_batch(entries): 95 | for entry in entries: 96 | try: 97 | preds = process_predicate(entry, batch) 98 | # changed to an improbable exception for now 99 | except ZeroDivisionError, e: 100 | # Add entry 101 | e.entry = entry 102 | 103 | # Extract traceback 104 | e_type, e_value, e_tb = sys.exc_info() 105 | e.tb = "\n".join(traceback.format_exception( e_type, e_value, e_tb )) 106 | 107 | # Raise again 108 | raise e 109 | 110 | # Process entries 111 | page_range = [p for p in paginator.page_range if p >= start_page] 112 | for i in page_range: 113 | entries = paginator.page(i).object_list 114 | 115 | # Update progress 116 | batch.status = "process_entry_batch " + str(i) + "/" + str(paginator.num_pages) 117 | batch.progress_num = i 118 | batch.progress_den = paginator.num_pages 119 | batch.save() 120 | 121 | try: do_batch(entries) 122 | 123 | except ZeroDivisionError, e: 124 | batch.status = "process_entry_batch " + str(i) + "/" + str(paginator.num_pages) + " ERROR!" 125 | batch.remarks = str(e.entry) + "\n" + str(e) + "\n" + e.tb 126 | print "***TRACEBACK***" 127 | print batch.remarks 128 | batch.save() 129 | raise e 130 | 131 | if __name__ == '__main__': 132 | user = User.objects.get(username='rspeer') 133 | run(user, start_page=164) 134 | 135 | -------------------------------------------------------------------------------- /test/test_ja_harness.py: -------------------------------------------------------------------------------- 1 | #python-encoding: UTF-8 2 | 3 | from csc.conceptnet4.models import Concept 4 | from csc.nl.ja.system import * 5 | from csc.corpus.models import * 6 | import MeCab 7 | 8 | def GetConcept(concept, lang): 9 | strings = [] 10 | 11 | if not Concept.exists(concept, lang): 12 | print '{' 13 | print '\tword = "%s",' % concept 14 | print '\terror = "Word not found!",' 15 | print '}' 16 | return None 17 | 18 | result = Concept.get(concept, lang) 19 | 20 | lang = result.language.name 21 | word = result.text 22 | assertions = str(result.num_assertions) 23 | 24 | relations = {} 25 | 26 | for item in result.get_assertions(): 27 | if not (item.relation.name in relations): 28 | relations[item.relation.name] = [] 29 | 30 | relations[item.relation.name].append( 31 | { 32 | '-- comment': item.__str__(), 33 | 'first': item.concept1.text, 34 | 'second': item.concept2.text, 35 | 'score': item.score, 36 | 'frequency': item.frequency.value, 37 | 'mods': '', 38 | }) 39 | 40 | print '{' 41 | print '\tword = "%s",' % word 42 | print '\tlang = "%s",' % lang 43 | print '\tassertions = %s,' % assertions 44 | 45 | for item.relation.name in relations: 46 | print '\t', item.relation.name, ' =' 47 | print '\t{' 48 | 49 | for v in relations[item.relation.name]: 50 | print '\t\t{' 51 | if v['first'] != word: 52 | print '\t\t\tfirst = "%s",' % v['first'] 53 | else: 54 | print '\t\t\tsecond = "%s",' % v['second'] 55 | 56 | if v['mods'] != '': 57 | print '\t\t\tmods = "%s",' % v['mods'] 58 | 59 | print '\t\t\tscore = %d,' % v['score'] 60 | print '\t\t\tfrequency = %d,' % v['frequency'] 61 | 62 | print '\t\t},' 63 | 64 | print '\t},' 65 | 66 | print '}' 67 | 68 | return result 69 | 70 | #################################################################################################### 71 | ## Main ############################################################################################ 72 | #################################################################################################### 73 | 74 | j = Language.get('ja') 75 | j_s = Sentence.objects.filter(language=j) 76 | e = Language.get('en') 77 | e_s = Sentence.objects.filter(language=e) 78 | parser = JaParser() 79 | 80 | u = \ 81 | [ 82 | parser.parse_string(v) for v in \ 83 | [ 84 | '赤いappleが9月に生える。', 85 | 'が', 86 | 'は', 87 | 'を', 88 | '1月', 89 | '1月', 90 | '私の彼って、最近車買ったんだよぉ?明日は軽井沢へ連れて行ってくれるんだぁ', 91 | '外国人はよく社会問題の原因だとせめられ、差別されるものです。', 92 | 'すてきな人に会いたい。', 93 | '大きな人に会いたい。', 94 | '大きい人に会いたい。', 95 | '赤い花は素敵。', 96 | 'アメリカには白人がいっぱい住んでいます。', 97 | 'テストには問題ない。', 98 | '夏休みに見に行った畑のいちごがとても赤かった。', 99 | '今すぐ行かなければならない。', 100 | '今日は寝てしまいました。', 101 | '君に今すぐ会いたい', 102 | 'この毛布は暖かくなかった。', 103 | 'この毛布は暖かくなるんだろう。', 104 | '彼女のかみが細かくて更々です。', 105 | '素敵な人に会いたい。', 106 | '教授が「分かった」とさけた。', 107 | '教授が「分かった」とさけた。', 108 | '事実はそうではなかった。', 109 | '米がやすくならなければならなくはないだろう。', 110 | 'その帽子が綺麗です。', 111 | 'その帽子が綺麗でした。', 112 | 'その帽子が綺麗だ。', 113 | 'その帽子が綺麗だった。', 114 | 'その帽子が綺麗である。', 115 | 'その帽子が綺麗であった。', 116 | '春は寒いであって寂しい時期である。', 117 | 'この世の中じゃ、人間には説明できないことだってあるよ!', 118 | '赤い', 119 | '赤くない', 120 | '赤かった', 121 | '赤くなかった', 122 | '赤いです', 123 | '赤いではありません', 124 | '赤いじゃありません', 125 | '赤いではありませんでした', 126 | '顔が赤くなった', 127 | '顔が赤くなってしまいました', 128 | '顔が赤くならなかった', 129 | '君が面白くなりました', 130 | '君が結局面白くならなかった', 131 | 'アメリカへのお客様にお知らせします。', 132 | '札幌には牛乳が人気である。', 133 | 'コンピュータの世界では「モニタ」とは出力の仕方の一種だ。', 134 | '説明することが無理なときがある。', 135 | '8月にリンゴが赤くなる', 136 | '8月にリンゴを赤くする', 137 | '8月にリンゴを赤くしてやる', 138 | '8月にリンゴを赤くしておく', 139 | '人間は哺乳類の一種である', 140 | 'あなたが会議の際にすることの一つは資料を配布するである.', 141 | 'とうもろこしは地面でなくても育つことができる.', 142 | '', 143 | ] 144 | ] 145 | 146 | def listUtterances(start = 0, count = -1): 147 | if count < 0: count = len(u) 148 | 149 | for i in range(start, count): 150 | print('[' + str(i) + '] : ' + u[i].surface) 151 | 152 | def dumpUtterances(start = -1, count = -1): 153 | if start < 0 and count < 0: 154 | start = 0 155 | count = len(u) 156 | 157 | elif count == -1: 158 | count = 1 159 | 160 | count = min(len(u) - start, count) 161 | 162 | for i in range(start, start + count): 163 | u[i].dump(True) 164 | 165 | listUtterances() 166 | 167 | def objMethods(obj): 168 | out = filter(lambda k: True, obj.__class__.__dict__) 169 | out.sort() 170 | return out 171 | 172 | 173 | def dumpSentences(lang): 174 | f = file("/tmp/out_" + lang + ".txt", "w"); 175 | div = 1000 176 | i = 0 177 | 178 | for s in Sentence.objects.filter(language = lang): 179 | i += 1 180 | if not (i % div): 181 | print(str(i) + " sentences dumped") 182 | 183 | f.write(ja_enc(s.text)) 184 | f.write("\n") 185 | 186 | -------------------------------------------------------------------------------- /conceptnet/concepttools/ConceptNetGUI.py: -------------------------------------------------------------------------------- 1 | from Tkinter import * 2 | import concepttools,sys 3 | 4 | __version__ = "2.0" 5 | __author__ = "hugo@media.mit.edu" 6 | __url__ = 'www.conceptnet.org' 7 | config_filename = 'ConceptNet.ini' 8 | welcome_text = """ 9 | *************************************************** 10 | Welcome to the ConceptNet v2 mini-browser! 11 | (for more info, please visit www.conceptnet.org) 12 | *************************************************** 13 | The purpose of this browser is to allow you to 14 | explore the ConceptNet API interactively! 15 | Instructions for browsing: 16 | - First, click on one of the light-green or yellow 17 | buttons to select a mode of browsing 18 | - In the red box, enter some input text 19 | - Light-green buttons signify "node-level" modes, 20 | so you may only input concepts like "apple" or 21 | "eat food". You'll notice that the query 22 | automatically executes when you press the space 23 | bar or the return key. In this mode, concepts 24 | must be given in normalized form (verbs in 25 | infinitive form, no plurals, no "the" or "a") 26 | - Yellow buttons signify "document-level" modes, so 27 | you can paste any amount of text into the red 28 | box (e.g. a sentence to a document) and the text 29 | doesn't have to be normalized. In this mode, you 30 | must press the return key to execute your query. 31 | - Results are displayed in the deep-green box and 32 | you may have to scroll to see all of the results 33 | - Most modes are self-explanatory, but for 34 | additional information, please consult the api's 35 | html documentation and www.conceptnet.org 36 | That's all! So enjoy! 37 | """ 38 | 39 | c = concepttools.ConceptTools() 40 | root = Tk() 41 | mode_var = StringVar() 42 | 43 | root.title("conceptnet 2.0 mini-browser"),root.option_add('*Font',('Courier', 14, 'bold')) 44 | 45 | frame1,win2,frame3 = Frame(root),Frame(root,height="1",bg="#CCFF99"),Frame(root) 46 | 47 | frame1.pack(fill=BOTH,expand=NO),win2.pack(fill=BOTH,expand=NO),frame3.pack(fill=BOTH,expand=YES) 48 | 49 | win,win3,win_scroll,win3_scroll = Text(frame1,bg="#FF3300",fg="white",height="3",wrap=WORD),Text(frame3,wrap=WORD,height="30",width="20",bg="#669933",fg="white"),Scrollbar(frame1),Scrollbar(frame3) 50 | 51 | win_scroll.pack(side=RIGHT,fill=Y),win3_scroll.pack(side=RIGHT,fill=Y),win.pack(fill=BOTH,expand=NO),win2.pack(fill=BOTH,expand=NO),win3.pack(fill=BOTH,expand=1) 52 | 53 | win.config(yscrollcommand=win_scroll.set),win3.config(yscrollcommand=win3_scroll.set),win_scroll.config(command=win.yview),win3_scroll.config(command=win3.yview) 54 | 55 | Radiobutton(win2,text="BROWSE",variable=mode_var,value='browse',fg="#FF3399",bg='#CCFF99',indicatoron=0).pack(side=LEFT),Radiobutton(win2,text="CONTEXT",variable=mode_var,value='context',indicatoron=0,fg="#FF3399",bg='#CCFF99').pack(side=LEFT),Radiobutton(win2,text="PROJECTION",variable=mode_var,value='projection',indicatoron=0,fg="#FF3399",bg='#CCFF99').pack(side=LEFT),Radiobutton(win2,text="ANALOGY",variable=mode_var,value='analogy',indicatoron=0,fg="#FF3399",bg='#CCFF99').pack(side=LEFT),Radiobutton(win2,text="GUESS CONCEPT",variable=mode_var,value='guessconcept',indicatoron=0,fg="#FF3399",bg='#FFFF66').pack(side=LEFT),Radiobutton(win2,text="GUESS TOPIC",variable=mode_var,value='guesstopic',indicatoron=0,fg="#FF3399",bg='#FFFF66').pack(side=LEFT),Radiobutton(win2,text="GUESS MOOD",variable=mode_var,value='guessmood',indicatoron=0,fg="#FF3399",bg='#FFFF66').pack(side=LEFT),Radiobutton(win2,text="SUMMARIZE",variable=mode_var,value='summarize',indicatoron=0,fg="#FF3399",bg='#FFFF66').pack(side=LEFT) 56 | 57 | win3.insert(0.0,welcome_text) 58 | 59 | def execution1(x): 60 | #if mode_var.get() not in ['guessmood','guesstopic','guessconcept','summarize']: 61 | # return execution2(x) 62 | #else: 63 | return False 64 | 65 | def execution2(x): 66 | win3.delete(0.0,END) 67 | if win.get(0.0,END).strip()=='': 68 | win3.insert(0.0,welcome_text) 69 | return 70 | 71 | mode = mode_var.get() 72 | input = win.get(0.0,END).encode('ascii','ignore').strip() 73 | concepts = [tok.strip() for tok in input.split(',')] 74 | if mode == 'context': 75 | result = '\n'.join(['%s (%d%%)' % (concept, weight*100) for concept, weight in c.spreading_activation(concepts)] ) +'\n\n' 76 | 77 | elif mode == 'projection': 78 | result = '\n\n'.join([ v[0].upper() + '\n' + '\n'.join( [ z[0] + ' (' + str(int(z[1]*100)) + '%)' for z in v[1] ] [:10]) for v in c.get_all_projections(concepts)] ) +'\n\n' 79 | 80 | elif mode == 'analogy': 81 | result = '\n\n'.join( ['[~' + match[0] + '] (' + str(match[2]) + ')\n ' + '\n '.join( ['==' + struct[0] + '==> ' + struct[1] + ' (' +str(struct[2]) + ') ' for struct in match[1]] ) for match in c.get_analogous_concepts(input)]) 82 | 83 | elif mode == 'guessconcept': 84 | result = '\n\n'.join( [ '[is it: ' + match[0] + '?] (' + str(match[2]) + ')\n ' + '\n '.join([ '==' + struct[0] + '==> ' + struct[1] + ' (' + str(struct[2]) + ') ' for struct in match[1]] ) for match in c.nltools.guess_concept(input)]) 85 | 86 | elif mode == 'guesstopic': 87 | result = '\n'.join( [ z[0] + ' (' + str(int(z[1]*100)) + '%)' for z in c.nltools.guess_topic(input)[1]]) + '\n\n' 88 | 89 | elif mode == 'guessmood': 90 | result = '\n'.join([ z[0] + ' (' + str(int(z[1]*100)) + '%)' for z in c.nltools.guess_mood(input) ] ) + '\n\n' 91 | 92 | elif mode == 'summarize': 93 | result = c.nltools.summarize_document(input) + '\n\n' 94 | 95 | elif mode == 'foo': 96 | result = '' 97 | 98 | else: 99 | result = c.display_node(input) + '\n\n' 100 | 101 | win3.insert(0.0,result) 102 | return True 103 | 104 | win.bind('',execution1),win.bind('',execution2) 105 | root.mainloop() 106 | -------------------------------------------------------------------------------- /conceptnet/corpus/migrations/0002_rename_tables.py: -------------------------------------------------------------------------------- 1 | 2 | from south.db import db 3 | from django.db import models 4 | from conceptnet.corpus.models import * 5 | 6 | class Migration: 7 | 8 | def forwards(self, orm): 9 | db.rename_table('sentences', 'corpus_sentence') 10 | db.rename_table('tagged_sentences', 'corpus_taggedsentence') 11 | db.rename_table('dependency_parses', 'corpus_dependencyparse') 12 | 13 | def backwards(self, orm): 14 | db.rename_table('corpus_sentence', 'sentences') 15 | db.rename_table('corpus_taggedsentence', 'tagged_sentences') 16 | db.rename_table('corpus_dependencyparse', 'dependency_parses') 17 | 18 | models = { 19 | 'auth.group': { 20 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 21 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '80', 'unique': 'True'}), 22 | 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'blank': 'True'}) 23 | }, 24 | 'auth.permission': { 25 | 'Meta': {'unique_together': "(('content_type', 'codename'),)"}, 26 | 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), 27 | 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), 28 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 29 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) 30 | }, 31 | 'auth.user': { 32 | 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), 33 | 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), 34 | 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), 35 | 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'blank': 'True'}), 36 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 37 | 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}), 38 | 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}), 39 | 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}), 40 | 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), 41 | 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), 42 | 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), 43 | 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'blank': 'True'}), 44 | 'username': ('django.db.models.fields.CharField', [], {'max_length': '30', 'unique': 'True'}) 45 | }, 46 | 'contenttypes.contenttype': { 47 | 'Meta': {'unique_together': "(('app_label', 'model'),)", 'db_table': "'django_content_type'"}, 48 | 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), 49 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 50 | 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), 51 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) 52 | }, 53 | 'corpus.dependencyparse': { 54 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 55 | 'index1': ('django.db.models.fields.IntegerField', [], {}), 56 | 'index2': ('django.db.models.fields.IntegerField', [], {}), 57 | 'linktype': ('django.db.models.fields.CharField', [], {'max_length': '20'}), 58 | 'sentence': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Sentence']"}), 59 | 'word1': ('django.db.models.fields.CharField', [], {'max_length': '100'}), 60 | 'word2': ('django.db.models.fields.CharField', [], {'max_length': '100'}) 61 | }, 62 | 'corpus.language': { 63 | 'id': ('django.db.models.fields.CharField', [], {'max_length': '16', 'primary_key': 'True'}), 64 | 'name': ('django.db.models.fields.TextField', [], {'blank': 'True'}), 65 | 'sentence_count': ('django.db.models.fields.IntegerField', [], {'default': '0'}) 66 | }, 67 | 'corpus.sentence': { 68 | 'activity': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['events.Activity']"}), 69 | 'created_on': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), 70 | 'creator': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}), 71 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 72 | 'language': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Language']"}), 73 | 'score': ('django.db.models.fields.IntegerField', [], {'default': '0'}), 74 | 'text': ('django.db.models.fields.TextField', [], {}), 75 | 'votes': ('django.contrib.contenttypes.generic.GenericRelation', [], {'to': "orm['voting.Vote']"}) 76 | }, 77 | 'corpus.taggedsentence': { 78 | 'language': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Language']"}), 79 | 'sentence': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Sentence']", 'primary_key': 'True'}), 80 | 'text': ('django.db.models.fields.TextField', [], {}) 81 | }, 82 | 'events.activity': { 83 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 84 | 'name': ('django.db.models.fields.TextField', [], {}) 85 | }, 86 | 'voting.vote': { 87 | 'Meta': {'unique_together': "(('user', 'content_type', 'object_id'),)", 'db_table': "'votes'"}, 88 | 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), 89 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 90 | 'object_id': ('django.db.models.fields.PositiveIntegerField', [], {}), 91 | 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}), 92 | 'vote': ('django.db.models.fields.SmallIntegerField', [], {}) 93 | } 94 | } 95 | 96 | complete_apps = ['corpus'] 97 | -------------------------------------------------------------------------------- /conceptnet/corpus/parse/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys, traceback 3 | from conceptnet4.models import Assertion, Batch, RawAssertion, Frame,\ 4 | Frequency, Relation, SurfaceForm, Concept, Rating 5 | import conceptnet.models as cn3 6 | from corpus.models import Sentence, Language, Activity 7 | from django.contrib.auth.models import User 8 | from django.core.paginator import Paginator 9 | from django.db import transaction 10 | from corpus.parse.adverbs import map_adverb 11 | from itertools import islice 12 | import yaml 13 | 14 | csamoa4_activity = Activity.objects.get(name='csamoa4 self-rating') 15 | good_acts = [ 16, 20, 22, 24, 28, 31, 32 ] 16 | 17 | def process_yaml(entry, lang, batch): 18 | if entry is None: return [] 19 | frametext, id, matches, reltext = (entry['frametext'], entry['id'], 20 | entry['matches'], entry['reltext']) 21 | sentence = Sentence.objects.get(id=id) 22 | print sentence.text.encode('utf-8') 23 | if sentence.activity.id in good_acts: 24 | print "(we have a better parse)" 25 | return [] 26 | if (sentence.text.startswith('Situation:') 27 | or sentence.text.startswith('The statement') 28 | or sentence.text.startswith('To understand') 29 | or sentence.text.startswith('In the event')): 30 | print "* skipped *" 31 | return [] 32 | if matches.get(2).startswith('do the following'): 33 | print "** skipped **" 34 | return [] 35 | 36 | if reltext is None or reltext == 'junk': return [] 37 | 38 | # quick fixes 39 | if reltext == 'AtLocation' and matches.get('a') == 'of': return [] 40 | if reltext == 'AtLocation' and matches.get('a') == 'near': 41 | reltext = 'LocatedNear' 42 | if reltext in ['IsA', 'CapableOf'] and matches.get('a') in ['in', 'on', 'at', 'by']: 43 | reltext = 'AtLocation' 44 | matches['a'] = '' 45 | for val in matches.values(): 46 | if len(val.split()) > 6: 47 | # we'd rather wait to parse this better. 48 | return [] 49 | 50 | relation = Relation.objects.get(name=reltext) 51 | 52 | surface_forms = [SurfaceForm.get(matches[i], lang, auto_create=True) 53 | for i in (1, 2)] 54 | concepts = [s.concept for s in surface_forms] 55 | 56 | # FIXME: english only so far 57 | freq = map_adverb(matches.get('a', '')) 58 | 59 | frame, _ = Frame.objects.get_or_create(relation=relation, language=lang, 60 | text=frametext, 61 | defaults=dict(frequency=freq, 62 | goodness=1)) 63 | frame.save() 64 | 65 | raw_assertion, _ = RawAssertion.objects.get_or_create( 66 | surface1=surface_forms[0], 67 | surface2=surface_forms[1], 68 | frame=frame, 69 | language=lang, 70 | creator=sentence.creator, 71 | defaults=dict(batch=batch)) 72 | # still need to set assertion_id 73 | 74 | assertion, _ = Assertion.objects.get_or_create( 75 | relation=relation, 76 | concept1=concepts[0], 77 | concept2=concepts[1], 78 | frequency=freq, 79 | language=lang, 80 | defaults=dict(score=0) 81 | ) 82 | assertion.score += 1 83 | #assertion.save() 84 | 85 | raw_assertion.assertion = assertion 86 | raw_assertion.sentence = sentence 87 | raw_assertion.save() 88 | 89 | sentence.set_rating(sentence.creator, 1, csamoa4_activity) 90 | raw_assertion.set_rating(sentence.creator, 1, csamoa4_activity) 91 | assertion.set_rating(sentence.creator, 1, csamoa4_activity) 92 | 93 | for old_raw in cn3.RawAssertion.objects.filter(sentence=sentence): 94 | pred = old_raw.predicate 95 | if not pred: continue 96 | for rating in pred.rating_set.all(): 97 | score = rating.rating_value.deltascore 98 | if score > 0: score = 1 99 | if score < 0: score = -1 100 | if rating.activity_id is None: 101 | rating_activity = Activity.objects.get(name='unknown') 102 | else: 103 | rating_activity = rating.activity 104 | sentence.set_rating(rating.user, score, rating_activity) 105 | raw_assertion.set_rating(rating.user, score, rating_activity) 106 | assertion.set_rating(rating.user, score, rating_activity) 107 | 108 | print '=>', unicode(assertion).encode('utf-8') 109 | return [assertion] 110 | 111 | def run(user, lang, start_page=1): 112 | batch = Batch() 113 | batch.owner = user 114 | 115 | #generator = yaml.load_all(open('delayed_test.yaml')) 116 | #all_entries = list(generator) 117 | all_entries = pickle.load(open('yamlparsed.pickle')) 118 | paginator = Paginator(all_entries,100) 119 | #pages = ((i,paginator.page(i)) for i in range(start_page,paginator.num_pages)) 120 | 121 | @transaction.commit_on_success 122 | def do_batch(entries): 123 | for entry in entries: 124 | try: 125 | preds = process_yaml(entry, lang, batch) 126 | # changed to an improbable exception for now 127 | except ZeroDivisionError, e: 128 | # Add entry 129 | e.entry = entry 130 | 131 | # Extract traceback 132 | e_type, e_value, e_tb = sys.exc_info() 133 | e.tb = "\n".join(traceback.format_exception( e_type, e_value, e_tb )) 134 | 135 | # Raise again 136 | raise e 137 | 138 | # Process entries 139 | page_range = [p for p in paginator.page_range if p >= start_page] 140 | for i in page_range: 141 | entries = paginator.page(i).object_list 142 | 143 | # Update progress 144 | batch.status = "process_entry_batch " + str(i) + "/" + str(paginator.num_pages) 145 | batch.progress_num = i 146 | batch.progress_den = paginator.num_pages 147 | batch.save() 148 | 149 | try: do_batch(entries) 150 | 151 | except ZeroDivisionError, e: 152 | batch.status = "process_entry_batch " + str(i) + "/" + str(paginator.num_pages) + " ERROR!" 153 | batch.remarks = str(e.entry) + "\n" + str(e) + "\n" + e.tb 154 | print "***TRACEBACK***" 155 | print batch.remarks 156 | batch.save() 157 | raise e 158 | 159 | import migrate_templated 160 | if __name__ == '__main__': 161 | user = User.objects.get(username='rspeer') 162 | lang = Language.get('en') 163 | run(user, lang, start_page=214) 164 | migrate_templated.run(user, start_page=1) 165 | 166 | -------------------------------------------------------------------------------- /conceptnet/lib/voting/views.py: -------------------------------------------------------------------------------- 1 | from django.contrib.contenttypes.models import ContentType 2 | from django.core.exceptions import ObjectDoesNotExist 3 | from django.http import Http404, HttpResponse, HttpResponseRedirect 4 | from django.contrib.auth.views import redirect_to_login 5 | from django.template import loader, RequestContext 6 | from django.utils import simplejson 7 | 8 | from voting.models import Vote 9 | 10 | VOTE_DIRECTIONS = (('up', 1), ('down', -1), ('clear', 0)) 11 | 12 | def vote_on_object(request, model, direction, post_vote_redirect=None, 13 | object_id=None, slug=None, slug_field=None, template_name=None, 14 | template_loader=loader, extra_context=None, context_processors=None, 15 | template_object_name='object', allow_xmlhttprequest=False): 16 | """ 17 | Generic object vote function. 18 | 19 | The given template will be used to confirm the vote if this view is 20 | fetched using GET; vote registration will only be performed if this 21 | view is POSTed. 22 | 23 | If ``allow_xmlhttprequest`` is ``True`` and an XMLHttpRequest is 24 | detected by examining the ``HTTP_X_REQUESTED_WITH`` header, the 25 | ``xmlhttp_vote_on_object`` view will be used to process the 26 | request - this makes it trivial to implement voting via 27 | XMLHttpRequest with a fallback for users who don't have JavaScript 28 | enabled. 29 | 30 | Templates:``/_confirm_vote.html`` 31 | Context: 32 | object 33 | The object being voted on. 34 | direction 35 | The type of vote which will be registered for the object. 36 | """ 37 | if allow_xmlhttprequest and request.is_ajax(): 38 | return xmlhttprequest_vote_on_object(request, model, direction, 39 | object_id=object_id, slug=slug, 40 | slug_field=slug_field) 41 | 42 | if extra_context is None: extra_context = {} 43 | if not request.user.is_authenticated(): 44 | return redirect_to_login(request.path) 45 | 46 | try: 47 | vote = dict(VOTE_DIRECTIONS)[direction] 48 | except KeyError: 49 | raise AttributeError("'%s' is not a valid vote type." % vote_type) 50 | 51 | # Look up the object to be voted on 52 | lookup_kwargs = {} 53 | if object_id: 54 | lookup_kwargs['%s__exact' % model._meta.pk.name] = object_id 55 | elif slug and slug_field: 56 | lookup_kwargs['%s__exact' % slug_field] = slug 57 | else: 58 | raise AttributeError('Generic vote view must be called with either ' 59 | 'object_id or slug and slug_field.') 60 | try: 61 | obj = model._default_manager.get(**lookup_kwargs) 62 | except ObjectDoesNotExist: 63 | raise Http404, 'No %s found for %s.' % (model._meta.app_label, lookup_kwargs) 64 | 65 | if request.method == 'POST': 66 | if post_vote_redirect is not None: 67 | next = post_vote_redirect 68 | elif request.REQUEST.has_key('next'): 69 | next = request.REQUEST['next'] 70 | elif hasattr(obj, 'get_absolute_url'): 71 | if callable(getattr(obj, 'get_absolute_url')): 72 | next = obj.get_absolute_url() 73 | else: 74 | next = obj.get_absolute_url 75 | else: 76 | raise AttributeError('Generic vote view must be called with either ' 77 | 'post_vote_redirect, a "next" parameter in ' 78 | 'the request, or the object being voted on ' 79 | 'must define a get_absolute_url method or ' 80 | 'property.') 81 | Vote.objects.record_vote(obj, request.user, vote) 82 | return HttpResponseRedirect(next) 83 | else: 84 | if not template_name: 85 | template_name = '%s/%s_confirm_vote.html' % ( 86 | model._meta.app_label, model._meta.object_name.lower()) 87 | t = template_loader.get_template(template_name) 88 | c = RequestContext(request, { 89 | template_object_name: obj, 90 | 'direction': direction, 91 | }, context_processors) 92 | for key, value in extra_context.items(): 93 | if callable(value): 94 | c[key] = value() 95 | else: 96 | c[key] = value 97 | response = HttpResponse(t.render(c)) 98 | return response 99 | 100 | def json_error_response(error_message): 101 | return HttpResponse(simplejson.dumps(dict(success=False, 102 | error_message=error_message))) 103 | 104 | def xmlhttprequest_vote_on_object(request, model, direction, 105 | object_id=None, slug=None, slug_field=None): 106 | """ 107 | Generic object vote function for use via XMLHttpRequest. 108 | 109 | Properties of the resulting JSON object: 110 | success 111 | ``true`` if the vote was successfully processed, ``false`` 112 | otherwise. 113 | score 114 | The object's updated score and number of votes if the vote 115 | was successfully processed. 116 | error_message 117 | Contains an error message if the vote was not successfully 118 | processed. 119 | """ 120 | if request.method == 'GET': 121 | return json_error_response( 122 | 'XMLHttpRequest votes can only be made using POST.') 123 | if not request.user.is_authenticated(): 124 | return json_error_response('Not authenticated.') 125 | 126 | try: 127 | vote = dict(VOTE_DIRECTIONS)[direction] 128 | except KeyError: 129 | return json_error_response( 130 | '\'%s\' is not a valid vote type.' % direction) 131 | 132 | # Look up the object to be voted on 133 | lookup_kwargs = {} 134 | if object_id: 135 | lookup_kwargs['%s__exact' % model._meta.pk.name] = object_id 136 | elif slug and slug_field: 137 | lookup_kwargs['%s__exact' % slug_field] = slug 138 | else: 139 | return json_error_response('Generic XMLHttpRequest vote view must be ' 140 | 'called with either object_id or slug and ' 141 | 'slug_field.') 142 | try: 143 | obj = model._default_manager.get(**lookup_kwargs) 144 | except ObjectDoesNotExist: 145 | return json_error_response( 146 | 'No %s found for %s.' % (model._meta.verbose_name, lookup_kwargs)) 147 | 148 | # Vote and respond 149 | Vote.objects.record_vote(obj, request.user, vote) 150 | return HttpResponse(simplejson.dumps({ 151 | 'success': True, 152 | 'score': Vote.objects.get_score(obj), 153 | })) 154 | -------------------------------------------------------------------------------- /doc/source/_static/graph/others.dot: -------------------------------------------------------------------------------- 1 | 2 | digraph name { 3 | fontname = "Helvetica" 4 | fontsize = 8 5 | 6 | node [ 7 | fontname = "Helvetica" 8 | fontsize = 8 9 | shape = "plaintext" 10 | ] 11 | edge [ 12 | fontname = "Helvetica" 13 | fontsize = 8 14 | ] 15 | 16 | 17 | 18 | 19 | 20 | 21 | voting_models_Vote [label=< 22 | 23 | 26 | 27 | 28 | 29 | 32 | 35 | 36 | 39 | 42 | 43 | 46 | 49 | 50 | 53 | 56 | 57 | 60 | 63 | 64 | 65 |
Vote
idAutoField
userForeignKey
content_typeForeignKey
object_idPositiveIntegerField
voteSmallIntegerField
66 | >] 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | events_models_Activity [label=< 76 | 77 | 80 | 81 | 82 | 83 | 86 | 89 | 90 | 93 | 96 | 97 | 98 |
Activity
idAutoField
nameTextField
99 | >] 100 | 101 | events_models_Event [label=< 102 | 103 | 106 | 107 | 108 | 109 | 112 | 115 | 116 | 119 | 122 | 123 | 126 | 129 | 130 | 133 | 136 | 137 | 140 | 143 | 144 | 147 | 150 | 151 | 152 |
Event
idAutoField
userForeignKey
content_typeForeignKey
object_idPositiveIntegerField
activityForeignKey
timestampDateTimeField
153 | >] 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | django_contrib_auth_models_User [label=< 163 | 164 | 167 |
User
168 | >] 169 | 170 | voting_models_Vote -> django_contrib_auth_models_User 171 | [label="user"] ; 172 | 173 | 174 | django_contrib_contenttypes_models_ContentType [label=< 175 | 176 | 179 |
ContentType
180 | >] 181 | 182 | voting_models_Vote -> django_contrib_contenttypes_models_ContentType 183 | [label="content_type"] ; 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | django_contrib_auth_models_User [label=< 194 | 195 | 198 |
User
199 | >] 200 | 201 | events_models_Event -> django_contrib_auth_models_User 202 | [label="user"] ; 203 | 204 | 205 | django_contrib_contenttypes_models_ContentType [label=< 206 | 207 | 210 |
ContentType
211 | >] 212 | 213 | events_models_Event -> django_contrib_contenttypes_models_ContentType 214 | [label="content_type"] ; 215 | 216 | 217 | events_models_Event -> events_models_Activity 218 | [label="activity"] ; 219 | 220 | 221 | 222 | 223 | } 224 | 225 | -------------------------------------------------------------------------------- /conceptnet/django_settings/__init__.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | 3 | ### 4 | ### Database configuration 5 | ### 6 | 7 | # ConceptNet uses a database configuration file to determine how to 8 | # connect to the database. It's just a normal Python file (e.g., 9 | # db_config.py) that contains the Django database settings (see 10 | # http://docs.djangoproject.com/en/dev/intro/tutorial01/#database-setup 11 | # or 12 | # http://docs.djangoproject.com/en/dev/ref/settings/#setting-DATABASE_ENGINE 13 | # 14 | # You just have to tell ConceptNet how to find this file. You can put 15 | # the full path to this file in the CONCEPTNET_DB_CONFIG environment 16 | # variable, or you can put the file on the Python path. 17 | # 18 | # Added bonuses: 19 | # 1. You can use either DATABASE_ or DB_ in your configuration variables. 20 | # 2. If DATABASE_ENGINE is sqlite3, DATABASE_NAME will be treated as relative 21 | # to the database config file. 22 | # 3. You can use '~' in the environment variable to mean your home directory, 23 | # like ~/commonsense/db_config.py 24 | 25 | if 'CONCEPTNET_DB_CONFIG' in os.environ: 26 | db_config = {} 27 | db_config_path = os.path.expanduser(os.environ['CONCEPTNET_DB_CONFIG']) 28 | db_config_dir = os.path.dirname(db_config_path) 29 | execfile(db_config_path, db_config) 30 | else: 31 | try: 32 | import db_config 33 | db_config_dir = os.path.abspath(os.path.dirname(db_config.__file__)) 34 | db_config = db_config.__dict__ 35 | except ImportError: 36 | from conceptnet.django_settings import default_db_config 37 | db_config = default_db_config.__dict__ 38 | if not os.path.exists(db_config['DB_NAME']): 39 | from conceptnet.django_settings import db_downloader 40 | if not db_downloader.prompt_for_download(db_config['DB_NAME']): 41 | raise SystemExit 42 | 43 | def get_db_config(param, default=''): 44 | long_param = 'DATABASE_'+param 45 | short_param = 'DB_'+param 46 | if long_param in db_config: return db_config[long_param] 47 | if short_param in db_config: return db_config[short_param] 48 | return default 49 | 50 | 51 | def relative_to_db_config(path): 52 | if not os.path.isabs(path): 53 | path = os.path.join(db_config_dir, path) 54 | return os.path.normpath(path) 55 | 56 | 57 | # This sets the Python path to include the distributed libraries. 58 | import conceptnet.lib 59 | 60 | DEBUG = db_config.get('DEBUG', False) 61 | TEMPLATE_DEBUG = DEBUG 62 | 63 | ADMINS = () 64 | 65 | MANAGERS = ADMINS 66 | 67 | DATABASE_ENGINE = get_db_config('ENGINE') # 'postgresql', 'mysql', 'sqlite3' or 'ado_mssql'. 68 | DATABASE_NAME = get_db_config('NAME') # Or path to database file if using sqlite3. 69 | if DATABASE_ENGINE == 'sqlite3': 70 | # normalize the path name 71 | DATABASE_NAME = relative_to_db_config(DATABASE_NAME) 72 | DATABASE_USER = get_db_config('USER', '') # Not used with sqlite3. 73 | DATABASE_PASSWORD = get_db_config('PASSWORD', '') # Not used with sqlite3. 74 | DATABASE_HOST = get_db_config('HOST', '') # Set to empty string for localhost. Not used with sqlite3. 75 | DATABASE_PORT = get_db_config('PORT', '') # Set to empty string for default. Not used with sqlite3. 76 | DATABASE_OPTIONS = get_db_config('OPTIONS', {}) 77 | 78 | DATABASES = { 79 | 'default': { 80 | 'ENGINE': 'django.db.backends.'+DATABASE_ENGINE, 81 | 'NAME': DATABASE_NAME, 82 | 'USER': DATABASE_USER, 83 | 'PASSWORD': DATABASE_PASSWORD, 84 | 'HOST': DATABASE_HOST, 85 | 'PORT': DATABASE_PORT, 86 | 'OPTIONS': DATABASE_OPTIONS 87 | } 88 | } 89 | 90 | # Local time zone for this installation. All choices can be found here: 91 | # http://www.postgresql.org/docs/current/static/datetime-keywords.html#DATETIME-TIMEZONE-SET-TABLE 92 | TIME_ZONE = 'America/New_York' 93 | 94 | # Language code for this installation. All choices can be found here: 95 | # http://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes 96 | # http://blogs.law.harvard.edu/tech/stories/storyReader$15 97 | LANGUAGE_CODE = 'en-us' 98 | 99 | SITE_ID = 1 100 | 101 | # If you set this to False, Django will make some optimizations so as not 102 | # to load the internationalization machinery. 103 | USE_I18N = True 104 | 105 | # Absolute path to the directory that holds media. 106 | # Example: "/home/media/media.lawrence.com/" 107 | MEDIA_ROOT = '' 108 | 109 | # URL that handles the media served from MEDIA_ROOT. 110 | # Example: "http://media.lawrence.com" 111 | MEDIA_URL = '' 112 | 113 | # URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a 114 | # trailing slash. 115 | # Examples: "http://foo.com/media/", "/media/". 116 | ADMIN_MEDIA_PREFIX = '/media/' 117 | 118 | # Make this unique, and don't share it with anybody. 119 | SECRET_KEY = 'rebo=05i#a6^%d3m#a=0dzy)cs7(ek%!^nvhwe93n1g4rajas1' 120 | 121 | # List of callables that know how to import templates from various sources. 122 | TEMPLATE_LOADERS = ( 123 | 'django.template.loaders.filesystem.load_template_source', 124 | 'django.template.loaders.app_directories.load_template_source', 125 | # 'django.template.loaders.eggs.load_template_source', 126 | ) 127 | 128 | # Middleware necessary for the admin site. 129 | MIDDLEWARE_CLASSES = ( 130 | # URL normalization, etc. 131 | 'django.middleware.common.CommonMiddleware', 132 | # Handle sessions. 133 | 'django.contrib.sessions.middleware.SessionMiddleware', 134 | # Keep track of users. 135 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 136 | ) 137 | 138 | AUTHENTICATION_BACKENDS = ( 139 | 'conceptnet.pseudo_auth.backends.LegacyBackend', 140 | 'django.contrib.auth.backends.ModelBackend', 141 | ) 142 | 143 | ROOT_URLCONF = 'urls' 144 | 145 | INSTALLED_APPS = ( 146 | 'django.contrib.auth', 147 | 'django.contrib.contenttypes', 148 | 'django.contrib.sessions', 149 | 'django.contrib.sites', 150 | 'django.contrib.admin', 151 | 'conceptnet.pseudo_auth', 152 | 'conceptnet.corpus', 153 | 'conceptnet.webapi', 154 | 'conceptnet', 155 | 'simplenlp', 156 | 'voting', 157 | 'events', 158 | # 'south', 159 | # 'django.contrib.markup', 160 | ) 161 | 162 | # Serve the API if we can. 163 | SERVE_API = db_config.get('SERVE_API', False) 164 | if SERVE_API: 165 | try: 166 | import conceptnet.webapi.handlers 167 | INSTALLED_APPS += ('conceptnet.webapi',) 168 | except ImportError: 169 | pass 170 | 171 | # Install command extensions, if available. 172 | try: 173 | import django_extensions 174 | INSTALLED_APPS += ('django_extensions',) 175 | except ImportError: 176 | pass 177 | 178 | # Use memcache if available. 179 | memcache = False 180 | try: 181 | import cmemcache 182 | memcache = True 183 | except ImportError: 184 | try: 185 | import memcache 186 | memcache = True 187 | except ImportError: 188 | pass 189 | 190 | if memcache: 191 | CACHE_BACKEND="memcached://127.0.0.1:11211" 192 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # ConceptNet documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Feb 27 17:56:32 2009. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # The contents of this file are pickled, so don't put values in the namespace 9 | # that aren't pickleable (module imports are okay, they're removed automatically). 10 | # 11 | # Note that not all possible configuration values are present in this 12 | # autogenerated file. 13 | # 14 | # All configuration values have a default; values that are commented out 15 | # serve to show the default. 16 | 17 | import sys, os 18 | 19 | # If your extensions are in another directory, add it here. If the directory 20 | # is relative to the documentation root, use os.path.abspath to make it 21 | # absolute, like shown here. 22 | sys.path.append(os.path.abspath('..')) 23 | 24 | # General configuration 25 | # --------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be extensions 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 29 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx'] 30 | 31 | # Add any paths that contain templates here, relative to this directory. 32 | templates_path = ['_templates'] 33 | 34 | # The suffix of source filenames. 35 | source_suffix = '.rst' 36 | 37 | # The encoding of source files. 38 | #source_encoding = 'utf-8' 39 | 40 | # The master toctree document. 41 | master_doc = 'index' 42 | 43 | # General information about the project. 44 | project = u'ConceptNet' 45 | copyright = u'2009, Commonsense Computing Initiative' 46 | 47 | # The version info for the project you're documenting, acts as replacement for 48 | # |version| and |release|, also used in various other places throughout the 49 | # built documents. 50 | # 51 | # The short X.Y version. 52 | version = '3.5' 53 | # The full version, including alpha/beta/rc tags. 54 | release = '3.5pre' 55 | 56 | # The language for content autogenerated by Sphinx. Refer to documentation 57 | # for a list of supported languages. 58 | #language = None 59 | 60 | # There are two options for replacing |today|: either, you set today to some 61 | # non-false value, then it is used: 62 | #today = '' 63 | # Else, today_fmt is used as the format for a strftime call. 64 | #today_fmt = '%B %d, %Y' 65 | 66 | # List of documents that shouldn't be included in the build. 67 | #unused_docs = [] 68 | 69 | # List of directories, relative to source directory, that shouldn't be searched 70 | # for source files. 71 | exclude_trees = [] 72 | 73 | # The reST default role (used for this markup: `text`) to use for all documents. 74 | #default_role = None 75 | 76 | # If true, '()' will be appended to :func: etc. cross-reference text. 77 | #add_function_parentheses = True 78 | 79 | # If true, the current module name will be prepended to all description 80 | # unit titles (such as .. function::). 81 | #add_module_names = True 82 | 83 | # If true, sectionauthor and moduleauthor directives will be shown in the 84 | # output. They are ignored by default. 85 | #show_authors = False 86 | 87 | # The name of the Pygments (syntax highlighting) style to use. 88 | pygments_style = 'sphinx' 89 | 90 | 91 | # Options for HTML output 92 | # ----------------------- 93 | 94 | # The style sheet to use for HTML and HTML Help pages. A file of that name 95 | # must exist either in Sphinx' static/ path, or in one of the custom paths 96 | # given in html_static_path. 97 | html_style = 'default.css' 98 | 99 | # The name for this set of Sphinx documents. If None, it defaults to 100 | # " v documentation". 101 | #html_title = None 102 | 103 | # A shorter title for the navigation bar. Default is the same as html_title. 104 | #html_short_title = None 105 | 106 | # The name of an image file (relative to this directory) to place at the top 107 | # of the sidebar. 108 | #html_logo = None 109 | 110 | # The name of an image file (within the static path) to use as favicon of the 111 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 112 | # pixels large. 113 | #html_favicon = None 114 | 115 | # Add any paths that contain custom static files (such as style sheets) here, 116 | # relative to this directory. They are copied after the builtin static files, 117 | # so a file named "default.css" will overwrite the builtin "default.css". 118 | html_static_path = ['_static'] 119 | 120 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 121 | # using the given strftime format. 122 | #html_last_updated_fmt = '%b %d, %Y' 123 | 124 | # If true, SmartyPants will be used to convert quotes and dashes to 125 | # typographically correct entities. 126 | #html_use_smartypants = True 127 | 128 | # Custom sidebar templates, maps document names to template names. 129 | #html_sidebars = {} 130 | 131 | # Additional templates that should be rendered to pages, maps page names to 132 | # template names. 133 | #html_additional_pages = {} 134 | 135 | # If false, no module index is generated. 136 | #html_use_modindex = True 137 | 138 | # If false, no index is generated. 139 | #html_use_index = True 140 | 141 | # If true, the index is split into individual pages for each letter. 142 | #html_split_index = False 143 | 144 | # If true, the reST sources are included in the HTML build as _sources/. 145 | #html_copy_source = True 146 | 147 | # If true, an OpenSearch description file will be output, and all pages will 148 | # contain a tag referring to it. The value of this option must be the 149 | # base URL from which the finished HTML is served. 150 | #html_use_opensearch = '' 151 | 152 | # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). 153 | #html_file_suffix = '' 154 | 155 | # Output file base name for HTML help builder. 156 | htmlhelp_basename = 'ConceptNetdoc' 157 | 158 | 159 | # Options for LaTeX output 160 | # ------------------------ 161 | 162 | # The paper size ('letter' or 'a4'). 163 | #latex_paper_size = 'letter' 164 | 165 | # The font size ('10pt', '11pt' or '12pt'). 166 | #latex_font_size = '10pt' 167 | 168 | # Grouping the document tree into LaTeX files. List of tuples 169 | # (source start file, target name, title, author, document class [howto/manual]). 170 | latex_documents = [ 171 | ('index', 'ConceptNet.tex', ur'ConceptNet Documentation', 172 | ur'Commonsense Computing Initiative', 'manual'), 173 | ] 174 | 175 | # The name of an image file (relative to this directory) to place at the top of 176 | # the title page. 177 | #latex_logo = None 178 | 179 | # For "manual" documents, if this is true, then toplevel headings are parts, 180 | # not chapters. 181 | #latex_use_parts = False 182 | 183 | # Additional stuff for the LaTeX preamble. 184 | #latex_preamble = '' 185 | 186 | # Documents to append as an appendix to all manuals. 187 | #latex_appendices = [] 188 | 189 | # If false, no module index is generated. 190 | #latex_use_modindex = True 191 | 192 | 193 | # Example configuration for intersphinx: refer to the Python standard library. 194 | intersphinx_mapping = {'http://docs.python.org/dev': None} 195 | -------------------------------------------------------------------------------- /conceptnet/migrations/0002_rename_tables.py: -------------------------------------------------------------------------------- 1 | 2 | from south.db import db 3 | from django.db import models 4 | from conceptnet.corpus.models import * 5 | 6 | class Migration: 7 | 8 | def forwards(self, orm): 9 | db.rename_table('parsing_batch', 'conceptnet_batch') 10 | db.rename_table('predicatetypes', 'conceptnet_relation') 11 | db.rename_table('conceptnet_frames', 'conceptnet_frame') 12 | db.rename_table('concepts', 'conceptnet_concept') 13 | db.rename_table('surface_forms', 'conceptnet_surfaceform') 14 | db.rename_table('assertions', 'conceptnet_assertion') 15 | db.rename_table('raw_assertions', 'conceptnet_rawassertion') 16 | 17 | def backwards(self, orm): 18 | db.rename_table('conceptnet_batch', 'parsing_batch') 19 | db.rename_table('conceptnet_relation', 'predicatetypes') 20 | db.rename_table('conceptnet_frame', 'conceptnet_frames') 21 | db.rename_table('conceptnet_concept', 'concepts') 22 | db.rename_table('conceptnet_surfaceform', 'surface_forms') 23 | db.rename_table('conceptnet_assertion', 'assertions') 24 | db.rename_table('conceptnet_rawassertion', 'raw_assertions') 25 | 26 | models = { 27 | 'auth.group': { 28 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 29 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '80', 'unique': 'True'}), 30 | 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'blank': 'True'}) 31 | }, 32 | 'auth.permission': { 33 | 'Meta': {'unique_together': "(('content_type', 'codename'),)"}, 34 | 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), 35 | 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), 36 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 37 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) 38 | }, 39 | 'auth.user': { 40 | 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), 41 | 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), 42 | 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), 43 | 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'blank': 'True'}), 44 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 45 | 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}), 46 | 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}), 47 | 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}), 48 | 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), 49 | 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), 50 | 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), 51 | 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'blank': 'True'}), 52 | 'username': ('django.db.models.fields.CharField', [], {'max_length': '30', 'unique': 'True'}) 53 | }, 54 | 'contenttypes.contenttype': { 55 | 'Meta': {'unique_together': "(('app_label', 'model'),)", 'db_table': "'django_content_type'"}, 56 | 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), 57 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 58 | 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), 59 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) 60 | }, 61 | 'corpus.dependencyparse': { 62 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 63 | 'index1': ('django.db.models.fields.IntegerField', [], {}), 64 | 'index2': ('django.db.models.fields.IntegerField', [], {}), 65 | 'linktype': ('django.db.models.fields.CharField', [], {'max_length': '20'}), 66 | 'sentence': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Sentence']"}), 67 | 'word1': ('django.db.models.fields.CharField', [], {'max_length': '100'}), 68 | 'word2': ('django.db.models.fields.CharField', [], {'max_length': '100'}) 69 | }, 70 | 'corpus.language': { 71 | 'id': ('django.db.models.fields.CharField', [], {'max_length': '16', 'primary_key': 'True'}), 72 | 'name': ('django.db.models.fields.TextField', [], {'blank': 'True'}), 73 | 'sentence_count': ('django.db.models.fields.IntegerField', [], {'default': '0'}) 74 | }, 75 | 'corpus.sentence': { 76 | 'activity': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['events.Activity']"}), 77 | 'created_on': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), 78 | 'creator': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}), 79 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 80 | 'language': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Language']"}), 81 | 'score': ('django.db.models.fields.IntegerField', [], {'default': '0'}), 82 | 'text': ('django.db.models.fields.TextField', [], {}), 83 | 'votes': ('django.contrib.contenttypes.generic.GenericRelation', [], {'to': "orm['voting.Vote']"}) 84 | }, 85 | 'corpus.taggedsentence': { 86 | 'language': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Language']"}), 87 | 'sentence': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Sentence']", 'primary_key': 'True'}), 88 | 'text': ('django.db.models.fields.TextField', [], {}) 89 | }, 90 | 'events.activity': { 91 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 92 | 'name': ('django.db.models.fields.TextField', [], {}) 93 | }, 94 | 'voting.vote': { 95 | 'Meta': {'unique_together': "(('user', 'content_type', 'object_id'),)", 'db_table': "'votes'"}, 96 | 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), 97 | 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 98 | 'object_id': ('django.db.models.fields.PositiveIntegerField', [], {}), 99 | 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}), 100 | 'vote': ('django.db.models.fields.SmallIntegerField', [], {}) 101 | } 102 | } 103 | 104 | complete_apps = ['corpus'] 105 | -------------------------------------------------------------------------------- /conceptnet/analogyspace2.py: -------------------------------------------------------------------------------- 1 | from csc import divisi2 2 | from conceptnet.models import Assertion, Relation, RawAssertion, Feature 3 | from conceptnet.corpus.models import Language 4 | from math import log, sqrt 5 | import logging 6 | logging.basicConfig(level=logging.INFO) 7 | logger = logging.getLogger('conceptnet.analogyspace2') 8 | 9 | DEFAULT_IDENTITY_WEIGHT = 0 10 | DEFAULT_CUTOFF = 5 11 | 12 | log_2 = log(2) 13 | 14 | def get_value(score, freq): 15 | """ 16 | This function gives diminishing returns from higher scores, on a 17 | logarithmic scale. It also scales the resulting value according to the 18 | *frequency* value, which ranges from -10 to 10. 19 | """ 20 | return (freq/10.0) * log(max((score+1, 1)))/log_2 21 | 22 | ### Getting quads of (concept1, relation, concept2, value) from the database. 23 | 24 | def conceptnet_quads(query, cutoff=DEFAULT_CUTOFF): 25 | ''' 26 | Generates a sequence of ((concept, relation, concept), value) 27 | triples for ConceptNet. 28 | 29 | Query can be a language identifier, in which case it will construct the 30 | default query for that language. It can also be a Django QuerySet 31 | containing Assertions, which it will use directly. 32 | ''' 33 | if isinstance(query, (basestring, Language)): 34 | queryset = conceptnet_queryset(query, cutoff=cutoff) 35 | else: 36 | queryset = query 37 | 38 | for (relation, concept1, concept2, score, freq) in queryset.values_list( 39 | 'relation__name', 'concept1__text', 'concept2__text', 'score', 'frequency__value').iterator(): 40 | yield (concept1, relation, concept2, get_value(score, freq)) 41 | 42 | def conceptnet_queryset(lang=None, cutoff=DEFAULT_CUTOFF): 43 | """ 44 | Construct a typical queryset for retrieving all relevant assertions 45 | from ConceptNet: 46 | 47 | - Limit it to a particular language, unless lang=None 48 | - Ensure that the reliability score is greater than 0 49 | - Use Assertion.useful to discard concepts that we have marked as invalid 50 | - Include only concepts that appear in a minimum number of assertions 51 | (the *cutoff*) 52 | """ 53 | queryset = Assertion.useful.filter(score__gt=0) 54 | if lang is not None: 55 | queryset = queryset.filter(language=lang) 56 | if cutoff: 57 | queryset = queryset.filter( 58 | concept1__num_assertions__gte=cutoff, 59 | concept2__num_assertions__gte=cutoff) 60 | return queryset 61 | 62 | def rating_quads(lang, cutoff=DEFAULT_CUTOFF, filter=None): 63 | ''' 64 | Generates a quad for each rating (vote) on Assertions. 65 | 66 | A django.db.models.Q object passed to filter will be applied to 67 | the Vote queryset. 68 | ''' 69 | from conceptnet.models import AssertionVote 70 | ratings = AssertionVote.objects.filter( 71 | assertion__concept1__num_assertions__gte=cutoff, 72 | assertion__concept2__num_assertions__gte=cutoff) 73 | if filter is not None: 74 | ratings = ratings.filter(filter) 75 | for concept1, rel, concept2, vote in ratings.values_list( 76 | 'assertion__concept1__text', 'assertion__relation__name', 'assertion__concept2__text', 'vote').iterator(): 77 | yield (concept1, rel, concept2, vote) 78 | 79 | def rawassertion_quads(lang, cutoff=DEFAULT_CUTOFF): 80 | # Experiment: deal with RawAssertions only. 81 | from conceptnet.models import RawAssertion 82 | queryset = RawAssertion.objects.filter( 83 | score__gt=0, 84 | surface1__concept__num_assertions__gte=cutoff, 85 | surface2__concept__num_assertions__gte=cutoff, 86 | language=lang) 87 | for (rel, concept1, concept2, text1, text2, frame_id, score, freq) in queryset.values_list( 88 | 'frame__relation__name', 'surface1__concept__text', 'surface2__concept__text', 'surface1__text', 'surface2__text', 'frame__id', 'score', 'frame__frequency__value' 89 | ).iterator(): 90 | value = get_value(score, freq) 91 | 92 | # Raw 93 | yield (text1, frame_id, text2, value) 94 | 95 | # Assertion 96 | yield (concept1, rel, concept2, value) 97 | 98 | ## NormalizesTo 99 | yield (concept1, 'NormalizesTo', text1, 1) 100 | yield (concept2, 'NormalizesTo', text2, 1) 101 | yield (concept1, 'NormalizesTo', concept1, 1) 102 | yield (concept2, 'NormalizesTo', concept2, 1) 103 | 104 | def to_value_concept_feature(quads): 105 | """ 106 | Convert a stream of assertion quads into a stream of twice 107 | as many (value, concept, feature) triples. 108 | """ 109 | for concept1, rel, concept2, value in quads: 110 | yield value, concept1, ('right', rel, concept2) 111 | yield value, concept2, ('left', rel, concept1) 112 | 113 | def to_value_concept_concept(quads): 114 | """ 115 | Convert a stream of assertion quads into a stream of twice 116 | as many (value, concept1, concept2) triples, ignoring the relation and 117 | simply treating all kinds of edges equally. 118 | """ 119 | for concept1, rel, concept2, value in quads: 120 | yield value, concept1, concept2 121 | yield value, concept2, concept1 122 | 123 | def to_value_pair_relation(quads): 124 | """ 125 | Convert a stream of assertion quads into a stream of 126 | (value, conceptPair, relation) triples. 127 | """ 128 | for concept1, rel, concept2, value in quads: 129 | concept1, rel, concept2 = triple 130 | yield value, (concept1, concept2), rel 131 | 132 | def build_matrix(query, cutoff=DEFAULT_CUTOFF, identity_weight=DEFAULT_IDENTITY_WEIGHT, data_source=conceptnet_quads, transform=to_value_concept_feature): 133 | """ 134 | Builds a Divisi2 SparseMatrix from relational data. 135 | 136 | One required argument is the `query`, which can be a QuerySet or just a 137 | language identifier. 138 | 139 | Optional arguments: 140 | 141 | - `cutoff`: specifies how common a concept has to be to appear in the 142 | matrix. Defaults to DEFAULT_CUTOFF=5. 143 | - `identity_weight` 144 | - `data_source`: a function that produces (concept1, rel, concept2, value) 145 | quads given the `query` and `cutoff`. Defaults to 146 | :meth:`conceptnet_quads`. 147 | - `transform`: the function for transforming quads into 148 | (value, row_name, column_name) triples. Defaults to 149 | :meth:`to_value_concept_feature`, which yields 150 | (value, concept, feature) triples. 151 | """ 152 | logger.info("Performing ConceptNet query") 153 | quads = list(data_source(query, cutoff)) 154 | # todo: separate this out into a customizable function 155 | 156 | if identity_weight > 0: 157 | logger.info("Adding identities") 158 | morequads = [] 159 | concept_set = set(q[0] for q in quads) 160 | for concept in concept_set: 161 | morequads.append( (concept, 'InheritsFrom', concept, identity_weight) ) 162 | for c1, rel, c2, val in quads: 163 | if rel == 'IsA': 164 | morequads.append( (c1, 'InheritsFrom', c1, val) ) 165 | quads.extend(morequads) 166 | 167 | logger.info("Creating triples") 168 | triples = transform(quads) 169 | logger.info("Building matrix") 170 | matrix = divisi2.make_sparse(triples) 171 | logger.info("Squishing underused rows") 172 | return matrix.squish(cutoff) 173 | 174 | -------------------------------------------------------------------------------- /conceptnet/corpus/parse/run_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys, traceback 3 | from conceptnet.models import Assertion, Batch, RawAssertion, Frame,\ 4 | Frequency, Relation, SurfaceForm, Concept, Rating 5 | from conceptnet.corpus.models import Sentence, Language, Activity 6 | from django.contrib.auth.models import User 7 | from pcfgpattern import pattern_parse 8 | from django.core.paginator import Paginator 9 | from django.db import transaction 10 | 11 | csamoa4_activity = Activity.objects.get(name='csamoa4 self-rating') 12 | 13 | def process_sentence_delayed(entry, lang, batch): 14 | frametext, id, matches, reltext = (entry['frametext'], entry['id'], 15 | entry['matches'], entry['reltext']) 16 | sentence = Sentence.objects.get(id=id) 17 | print sentence.text.encode('utf-8') 18 | 19 | if reltext is None or reltext == 'junk': return [] 20 | relation = Relation.objects.get(name=reltext) 21 | text_factors = [lang.nl.lemma_factor(matches[i]) for i in (1, 2)] 22 | concepts = [Concept.objects.get_or_create(language=lang, text=stem)[0] 23 | for stem, residue in text_factors] 24 | for c in concepts: c.save() 25 | 26 | surface_forms = [SurfaceForm.objects.get_or_create(concept=concepts[i], 27 | text=matches[i+1], 28 | residue=text_factors[i][1], 29 | language=lang)[0] 30 | for i in (0, 1)] 31 | for s in surface_forms: s.save() 32 | 33 | freq, _ = Frequency.objects.get_or_create(text=matches.get('a', ''), 34 | language=lang, 35 | defaults=dict(value=50)) 36 | freq.save() 37 | 38 | frame, _ = Frame.objects.get_or_create(relation=relation, language=lang, 39 | text=frametext, frequency=freq, 40 | defaults=dict(goodness=1)) 41 | frame.save() 42 | 43 | raw_assertion, _ = RawAssertion.objects.get_or_create( 44 | surface1=surface_forms[0], 45 | surface2=surface_forms[1], 46 | frame=frame, 47 | language=lang, 48 | defaults=dict(batch=batch)) 49 | # still need to set assertion_id 50 | 51 | assertion, _ = Assertion.objects.get_or_create( 52 | relation=relation, 53 | concept1=concepts[0], 54 | concept2=concepts[1], 55 | frequency=freq, 56 | language=lang, 57 | defaults=dict(score=0) 58 | ) 59 | assertion.score += 1 60 | assertion.save() 61 | raw_assertion.assertion = assertion 62 | raw_assertion.save() 63 | 64 | rating1, _ = Rating.objects.get_or_create( 65 | user=sentence.creator, activity=csamoa4_activity, 66 | sentence=sentence, score=1 67 | ) 68 | rating2, _ = Rating.objects.get_or_create( 69 | user=sentence.creator, activity=csamoa4_activity, 70 | raw_assertion=raw_assertion, score=1 71 | ) 72 | rating1.save() 73 | rating2.save() 74 | 75 | print '=>', str(assertion).encode('utf-8') 76 | return [assertion] 77 | 78 | def process_sentence(sentence, lang, batch): 79 | print sentence.text.encode('utf-8') 80 | _, frametext, reltext, matches = pattern_parse(sentence.text) 81 | 82 | if reltext is None or reltext == 'junk': return [] 83 | relation = Relation.objects.get(name=reltext) 84 | text_factors = [lang.nl.lemma_factor(matches[i]) for i in (1, 2)] 85 | concepts = [Concept.objects.get_or_create(language=lang, text=stem)[0] 86 | for stem, residue in text_factors] 87 | for c in concepts: c.save() 88 | 89 | surface_forms = [SurfaceForm.objects.get_or_create(concept=concepts[i], 90 | text=matches[i+1], 91 | residue=text_factors[i][1], 92 | language=lang)[0] 93 | for i in (0, 1)] 94 | for s in surface_forms: s.save() 95 | 96 | freq, _ = Frequency.objects.get_or_create(text=matches.get('a', ''), 97 | language=lang, 98 | defaults=dict(value=50)) 99 | freq.save() 100 | 101 | frame, _ = Frame.objects.get_or_create(relation=relation, language=lang, 102 | text=frametext, frequency=freq, 103 | defaults=dict(goodness=1)) 104 | frame.save() 105 | 106 | raw_assertion, _ = RawAssertion.objects.get_or_create( 107 | surface1=surface_forms[0], 108 | surface2=surface_forms[1], 109 | frame=frame, 110 | language=lang, 111 | defaults=dict(batch=batch)) 112 | # still need to set assertion_id 113 | 114 | assertion, _ = Assertion.objects.get_or_create( 115 | relation=relation, 116 | concept1=concepts[0], 117 | concept2=concepts[1], 118 | frequency=freq, 119 | language=lang, 120 | defaults=dict(score=0) 121 | ) 122 | assertion.score += 1 123 | assertion.save() 124 | raw_assertion.assertion = assertion 125 | raw_assertion.save() 126 | 127 | rating1, _ = Rating.objects.get_or_create( 128 | user=sentence.creator, activity=csamoa4_activity, 129 | sentence=sentence, score=1 130 | ) 131 | rating2, _ = Rating.objects.get_or_create( 132 | user=sentence.creator, activity=csamoa4_activity, 133 | raw_assertion=raw_assertion, score=1 134 | ) 135 | rating1.save() 136 | rating2.save() 137 | 138 | print '=>', str(assertion).encode('utf-8') 139 | return [assertion] 140 | 141 | def run(user, lang, start_page=1): 142 | batch = Batch() 143 | batch.owner = user 144 | 145 | all_sentences = Sentence.objects.filter(language=lang).order_by('id') 146 | paginator = Paginator(all_sentences,10) 147 | #pages = ((i,paginator.page(i)) for i in range(start_page,paginator.num_pages)) 148 | 149 | @transaction.commit_on_success 150 | def do_batch(sentences): 151 | for sentence in sentences: 152 | try: 153 | preds = process_sentence(sentence, lang, batch) 154 | # changed to an improbable exception for now 155 | except Exception, e: 156 | # Add sentence 157 | e.sentence = sentence 158 | 159 | # Extract traceback 160 | e_type, e_value, e_tb = sys.exc_info() 161 | e.tb = "\n".join(traceback.format_exception( e_type, e_value, e_tb )) 162 | 163 | # Raise again 164 | raise e 165 | 166 | # Process sentences 167 | page_range = [p for p in paginator.page_range if p >= start_page] 168 | for i in page_range: 169 | sentences = paginator.page(i).object_list 170 | 171 | # Update progress 172 | batch.status = "process_sentence_batch " + str(i) + "/" + str(paginator.num_pages) 173 | batch.progress_num = i 174 | batch.progress_den = paginator.num_pages 175 | batch.save() 176 | 177 | try: do_batch(sentences) 178 | 179 | except Exception, e: #improbable exception for now 180 | batch.status = "process_sentence_batch " + str(i) + "/" + str(paginator.num_pages) + " ERROR!" 181 | batch.remarks = str(e.sentence) + "\n" + str(e) + "\n" + e.tb 182 | print "***TRACEBACK***" 183 | print batch.remarks 184 | batch.save() 185 | raise e 186 | 187 | 188 | if __name__ == '__main__': 189 | user = User.objects.get(username='rspeer') 190 | lang = Language.get('en') 191 | run(user, lang, start_page=50000) 192 | 193 | --------------------------------------------------------------------------------